예제 #1
0
파일: adaboost_test.py 프로젝트: z8g/common
 def test_loadDataSet(self):
     dataArr, labelArr = adaboost.loadDataSet('train.txt')
     print "[dataArr]", dataArr
     print "[labelArr]", labelArr
     classifierArray = adaboost.adaBoostTrainDS(dataArr, labelArr, 9)
     testArr, testLabelArr = adaboost.loadDataSet('test.txt')
     prediction10 = adaboost.adaClassify(testArr, classifierArray)
     errArr = mat(ones((67, 1)))
     print errArr[prediction10 != mat(testLabelArr).T].sum()
예제 #2
0
def main():
    import adaboost
    from numpy import mat, ones
    datMat, classLabels = adaboost.loadSimpleData()
    D = mat(ones((5, 1)) / 5)
    bestStump, minError, bestClasEst = adaboost.buildStump(
        datMat, classLabels, D)
    classifierArr, aggClassEst = adaboost.adaBoostTrainDS(datMat, classLabels, 9)
    adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)
예제 #3
0
def main():
    print '---------------------training--------------------'
    datArr, labelArr = loadDataSet('horseColicTraining2.txt')
    #the last input is the number of classifier
    classifierArray, aggClassEst= ab.adaBoostTrainDS(datArr, labelArr, 50)

    print '---------------------testing---------------------'
    testArr, testLabelArr = loadDataSet('horseColicTest2.txt')
    prediction10 = ab.adaClassify(testArr, classifierArray)

    errArr = mat(ones((67,1)))
    print 'error rate:', errArr[prediction10 != mat(testLabelArr).T].sum()/67
예제 #4
0
def main():
    print '---------------------training--------------------'
    datArr, labelArr = loadDataSet('horseColicTraining2.txt')
    #the last input is the number of classifier
    classifierArray, aggClassEst = ab.adaBoostTrainDS(datArr, labelArr, 50)

    print '---------------------testing---------------------'
    testArr, testLabelArr = loadDataSet('horseColicTest2.txt')
    prediction10 = ab.adaClassify(testArr, classifierArray)

    errArr = mat(ones((67, 1)))
    print 'error rate:', errArr[prediction10 != mat(testLabelArr).T].sum() / 67
예제 #5
0
def test():
    datMat, classLabels = adaboost.loadSimpleData()
    print("dataMat: [%s] classLabels: [%s]" % (datMat, classLabels))
    #adaboost.plt(datMat, classLabels)
    D = mat(ones((5,1))/5)
    bestStump, minError, bestClassEst = adaboost.buildStump(datMat, classLabels, D) 
    print("bestStump: ", bestStump, " minError:", minError, " bestClasEst:", bestClassEst)

    classifierArray, classifierEst = adaboost.adaBoostTrainDS(datMat, classLabels, 9)
    print("classifierArray:", classifierArray)

    print(adaboost.adaClassify([0,0], classifierArray))
    print(adaboost.adaClassify([[5,5],[0,0]], classifierArray))
예제 #6
0
def testHolic():
    datArr,labelArr = adaboost.loadDataSet('horseColicTraining2.txt')
    classifierArray, classifierEst = adaboost.adaBoostTrainDS(datArr, labelArr, 10)

    testArr, testLabelArr = adaboost.loadDataSet('horseColicTest2.txt')
    prediction10 = adaboost.adaClassify(testArr, classifierArray)
    print("prediction:", prediction10)

    errArr = mat(ones((67,1)))
    errCnt = errArr[prediction10 != mat(testLabelArr).T].sum()
    print("err count:%d error rate:%.2f" % (errCnt, float(errCnt)/67))

    adaboost.plotROC(classifierEst.T, labelArr)
예제 #7
0
import adaboost
from numpy import *
#datMat, classLabels = adaboost.loadSimpleData()
datMat, classLabels = adaboost.loadDataSet('horseColicTraining2.txt')
classifierArray, aggClassEst = adaboost.adaBoostTrainDS(
    datMat, classLabels, 10)
adaboost.plotROC(aggClassEst.T, classLabels)
예제 #8
0
        labelArr[i] = -1
        #adaboost只能区分-1和1的标签

# dataArr=dataMat
label = labelArr
skf = StratifiedKFold(n_splits=10)
for train, test in skf.split(dataArr, labelArr):
    print("%s %s" % (train, test))
    train.tolist()
    train_in = dataArr[train]
    test_in = dataArr[test]
    train_out = label[train]
    test_out = label[test]
    train_in, train_out = RandomOverSampler().fit_sample(
        train_in, train_out)  #训练集过采样,平衡样本
    classifierArray, aggClassEst = adaboost.adaBoostTrainDS(
        train_in, train_out, 200)

    prediction_train, prob_train = adaboost.adaClassify(
        train_in, classifierArray)
    #测试训练集
    prediction_test, prob_test = adaboost.adaClassify(test_in, classifierArray)
    #测试测试集

    tmp_train, fp_train_tmp = adaboost.evaluatemodel(train_out,
                                                     prediction_train,
                                                     prob_train)
    #evaluate_train=np.array(evaluate_train);
    evaluate_train.extend(tmp_train)
    #训练集结果评估
    fp_train.extend(fp_train_tmp)
예제 #9
0
import adaboost
datmat, classlabel=adaboost.loadSimpData()

from numpy import *
d = mat(ones((5,1))/5)
#print(adaboost.buildStump(datmat,classlabel,d))

#classifier,aggClassEst = adaboost.adaBoostTrainDS(datmat,classlabel,9)
#print(classifier)
#print(aggClassEst)

#print(adaboost.adaClassify([[0,0],[1,1]],classifier))

datarr,labelarr = adaboost.loadDataSet('horseColicTraining2.txt')
classifier,aggClassEst = adaboost.adaBoostTrainDS(datarr,labelarr,40)
testarr,testlabelarr = adaboost.loadDataSet('horseColicTest2.txt')
prediction = adaboost.adaClassify(testarr,classifier)
errarr = mat(ones((67,1)))
print(errarr[prediction != mat(testlabelarr).T].sum())
adaboost.plotROC(aggClassEst.T,labelarr)
예제 #10
0
import adaboost
from numpy import *
'''
datMat, classLabels= adaboost.loadSimpData()
D = mat(ones((5,1))/5)
adaboost.draw(datMat,classLabels)
weakClassArr = classifyierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9)
#print('bestStump = ',bestStump)
#print('minError = ',minError)
#print('bestClasEst = ',bestClasEst)
print('weakClassArr = ',weakClassArr)
datToClass = [[5,5],[0,0]]
adaboost.adaClassify(datToClass,weakClassArr)
'''
dataMat, labelMat = adaboost.loadDataSet("horseColicTraining2.txt")
weakClassArr, aggClassEst = adaboost.adaBoostTrainDS(dataMat, labelMat, 10)

testMat, testlabelMat = adaboost.loadDataSet("horseColicTest2.txt")
prediction = adaboost.adaClassify(testMat, weakClassArr)
print("prediction= ", prediction)
errorRate = adaboost.errorRate(testlabelMat, prediction)
print("errorRate= ", errorRate)
adaboost.plotROC(aggClassEst.T, labelMat)
import adaboost
from numpy import *

datArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt')
#classifierArray = adaboost.adaBoostTrainDS(datArr, labelArr, 10)
#testArr, testLabelArr = adaboost.loadDataSet('horseColicTest2.txt')
#prediction10 = adaboost.adaClassify(testArr, classifierArray)
#errArr = mat(ones((67, 1)))
#print errArr[prediction10!=mat(testLabelArr).T].sum()

classifierArray, aggClassEst = adaboost.adaBoostTrainDS(datArr, labelArr, 10)
adaboost.plotROC(aggClassEst.T, labelArr)
예제 #12
0
파일: adaboost_test.py 프로젝트: z8g/common
 def test_adaboost_train_ds(self):
     #print "test_adaboost_train_ds"
     dataMat, classLabels = adaboost.loadSimpleData()
     classifierArray = adaboost.adaBoostTrainDS(dataMat, classLabels, 9)
예제 #13
0
import adaboost
from numpy import *

datMat, classLabels = adaboost.loadSimpData()
D = mat(ones((5, 1)) / 5)
print adaboost.buildStump(datMat, classLabels, D)
classifierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9)
print classifierArray

datArr, labelArr = adaboost.loadSimpData()
classifierArr = adaboost.adaBoostTrainDS(datArr, labelArr, 30)
print adaboost.adaClassify([0, 0], classifierArr)
print adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)
예제 #14
0
Created on Thu Jun 11 12:57:27 2015

@author: LiuLongpo
"""
import optunity
import adaboost
import matplotlib.pyplot as plt
from numpy import *
dataMat,classLabels = adaboost.loadSimpData()

#plt.scatter(dataMat[:,0],dataMat[:,1])
# D是样本的权重矩阵
D = mat(ones((5,1))/5)
#adaboost.buildStump(dataMat,classLabels,D)
print 'data train...'
classifierArr = adaboost.adaBoostTrainDS(dataMat,classLabels,30)
print 'getClassifier:',classifierArr
print 'data predict...'
# 学习得到3个分类器,predict时,每一个分类器级联分类得到的预测累加值 
# aggClassEst越来越远离0,也就是正越大或负越大,也就是分类结果越来越强
adaboost.adaClassify([[1,0.8],[1.8,2]],classifierArr)
# 0,lt,1.3   1,lt,1.0   0,lt,0.9
plt.figure()
I = nonzero(classLabels>0)[0]
plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'r',marker=u'o')
I = nonzero(classLabels<0)[0]
plt.scatter(dataMat[I,0],dataMat[I,1],s=60,c=u'b',marker=u'o')


plt.plot([1.32,1.32],[0.5,2.5])
plt.plot([0.5,2.5],[1.42,1.42])
예제 #15
0
import adaboost
import numpy as np
#dataMat ,classlabel = adaboost.loadSimpData()
#print(dataMat)
#D = np.mat(np.ones((5,1)))/5
#print(adaboost.buildStump(dataMat,classlabel,D))
#classifierArray,aggest = adaboost.adaBoostTrainDS(dataMat,classlabel,9)
#print(aggest)

file = open('data.txt', 'r')
datalist = []
classlabel = []
for line in file.readlines():
    data = line.split()[:-4]
    label = int(line.split()[-1])
    datalist.append(list(map(float, data)))
    classlabel.append(label)
dataMat = np.mat(datalist)
classlabels = np.mat(classlabel)
classifierArray, aggest = adaboost.adaBoostTrainDS(dataMat, classlabel, 40,
                                                   100)
print(classifierArray)

adaboost.plotROC(aggest.T, classlabel)
예제 #16
0
import adaboost

datArr, labelArr = adaboost.loadSimpData()
classifierArr = adaboost.adaBoostTrainDS(datArr, labelArr, 30)

adaboost.adaClassify([0, 0], classifierArr)
adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)
예제 #17
0
#!usr/bin/env python3
# -*- coding:utf-8 -*-
"""
#@author:Benny.Chen
#@file: main.py
#@time: 2020/6/6 16:23
#@email:[email protected]
"""
import adaboost as ada
if __name__ == '__main__':
    dataArr,classLabel = ada.loadSimpData()
    ada.adaBoostTrainDS(dataArr,classLabel,10)
예제 #18
0
파일: main.py 프로젝트: thatwaylw/pycl
@author: laiwei
date: 2017年3月4日
'''

import adaboost
from numpy import *

#datMat, classLabels = adaboost.loadSimpData()
#adaboost.plotData(datMat, classLabels)
datMat, classLabels = adaboost.loadDataSet('horseColicTraining2.txt')

#D = mat(ones((5, 1))/5)
#bestStump,minError,bestClasEst = adaboost.buildStump(datMat, classLabels, D)
#print(bestStump);print(minError);print(bestClasEst)

weakClassArr, aggClassEst = adaboost.adaBoostTrainDS(datMat, classLabels, 37)
#aggClassEst[0,0] = -0.2
#classLabels[0] = -1
#print(weakClassArr);print(aggClassEst)
#print(adaboost.adaClassify([[0,0],[5,5]], weakClassArr))

# 当预测label按大小排序,对应真实label不是先全部-1,再全部+1,而是中间有错乱时,曲线下弯
#adaboost.plotROC(mat(classLabels), classLabels)
adaboost.plotROC(aggClassEst.T, classLabels)

testdatMat, testclassLabels = adaboost.loadDataSet('horseColicTest2.txt')
testResult = adaboost.adaClassify(testdatMat, weakClassArr)
errArr = mat(ones((len(testclassLabels), 1)))
print(errArr[testResult != mat(testclassLabels).T].sum(), "of total",
      shape(testclassLabels))
예제 #19
0
from numpy import *
import numpy as np
import adaboost
D = mat(ones((5,1))/5)
datMat , classLabels = adaboost.loadSimpData()
# print adaboost.buildStump(datMat,classLabels,D)
classifierArray = adaboost.adaBoostTrainDS(datMat, classLabels,9)
# print classifierArray
print adaboost.adaClassify([0,0],classifierArray)
print adaboost.adaClassify([[5,5],[0,0]],classifierArray)

# datArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt')
# classifierArray = adaboost.adaBoostTrainDS(datArr,labelArr,10)
# print classifierArray
# testArr,testlabelArr = adaboost.loadDataSet('horseColicTest2.txt')
# prediction10 = adaboost.adaClassify(testArr,classifierArray)
# errArr = mat(ones((67,1)))
# e=errArr[prediction10!=mat(testlabelArr).T].sum()
예제 #20
0
파일: boostTest.py 프로젝트: anty-zhang/dm
# -*- coding:utf-8 -*-
import adaboost
from numpy import *

myData,myLabels = adaboost.loadSimpData()
'''
print ('myData is ' , myData)
print ('myLabels is' , myLabels)

D = mat(ones((5,1))/5)
print ('D is', D)

myBStump,myMError,myBCE = adaboost.buildStump(myData, myLabels, D)
print ('myBStump is', myBStump)
print ('myMError is', myMError)
print ('myBCE is', myBCE)
'''
classiFierArray,classEst = adaboost.adaBoostTrainDS(myData,myLabels,30)
print ('classiFierArray is ',classiFierArray)
aggClassEst = adaboost.adaClassify([[5,5],[0,0]], classiFierArray)
print ('aggClassEst is ' ,  aggClassEst)
import adaboost
from numpy import *

datMat, classLabels = adaboost.loadSimpData()
D = mat(ones((5, 1))/5)
print adaboost.buildStump(datMat, classLabels, D)
classifierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9)
print classifierArray

datArr, labelArr = adaboost.loadSimpData()
classifierArr = adaboost.adaBoostTrainDS(datArr, labelArr, 30)
print adaboost.adaClassify([0, 0], classifierArr)
print adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)
import os

# homedir= os.getcwd()+'/machinelearninginaction/ch07/'  #绝对路径
homedir = ''  #相对路径

#7.3 基于单层决策树构建弱分类器
datMat, classLabels = adaboost.loadSimpData()
D = mat(ones((5, 1)) / 5)
print "datMat:", datMat
print "classLabels:", classLabels
print "D:", D
print ":",
adaboost.buildStump(datMat, classLabels, D)

#7.4 完整AdaBoost算法的实现
classifierArr = adaboost.adaBoostTrainDS(datMat, classLabels, 9)
print "classifierArr:", classifierArr

#7.5 测试算法:基于AdaBoost的分类
datMat, classLabels = adaboost.loadSimpData()
classifierArr = adaboost.adaBoostTrainDS(datMat, classLabels, 30)
print "分类1:", adaboost.adaClassify([0, 0], classifierArr)
print "分类2:", adaboost.adaClassify([[5, 5], [0, 0]], classifierArr)

#7.6 示例:在一个难数据集上应用AdaBoost
datArr, labelArr = adaboost.loadDataSet(homedir + 'horseColicTraining2.txt')
print "datArr:", datArr
print "labelArr:", labelArr
classifierArray = adaboost.adaBoostTrainDS(datArr, labelArr, 500)
testArr, testLabelArr = adaboost.loadDataSet(homedir + 'horseColicTest2.txt')
prediction10 = adaboost.adaClassify(testArr, classifierArray)
예제 #23
0
파일: adaboost_test.py 프로젝트: z8g/common
 def test_ada_classify(self):
     print "test_ada_classify"
     dataMat, classLabels = adaboost.loadSimpleData()
     classifierArr = adaboost.adaBoostTrainDS(dataMat, classLabels, 9)
Created on Thu Jun 11 12:57:27 2015

@author: LiuLongpo
"""
import optunity
import adaboost
import matplotlib.pyplot as plt
from numpy import *
dataMat, classLabels = adaboost.loadSimpData()

#plt.scatter(dataMat[:,0],dataMat[:,1])
# D是样本的权重矩阵
D = mat(ones((5, 1)) / 5)
#adaboost.buildStump(dataMat,classLabels,D)
print 'data train...'
classifierArr = adaboost.adaBoostTrainDS(dataMat, classLabels, 30)
print 'getClassifier:', classifierArr
print 'data predict...'
# 学习得到3个分类器,predict时,每一个分类器级联分类得到的预测累加值
# aggClassEst越来越远离0,也就是正越大或负越大,也就是分类结果越来越强
adaboost.adaClassify([[1, 0.8], [1.8, 2]], classifierArr)
# 0,lt,1.3   1,lt,1.0   0,lt,0.9
plt.figure()
I = nonzero(classLabels > 0)[0]
plt.scatter(dataMat[I, 0], dataMat[I, 1], s=60, c=u'r', marker=u'o')
I = nonzero(classLabels < 0)[0]
plt.scatter(dataMat[I, 0], dataMat[I, 1], s=60, c=u'b', marker=u'o')

plt.plot([1.32, 1.32], [0.5, 2.5])
plt.plot([0.5, 2.5], [1.42, 1.42])
plt.plot([0.97, 0.97], [0.5, 2.5])
# print(bestStump)
# print(minError)
# print(bestClasEst)

# classifierArr = adaboost.adaBoostTrainDS(dataMat, labelMat, 40)

# print(classifierArr)

# result = adaboost.adaClassify([[1, 5], [2, 4]], classifierArr)

# print(result)

dataMat, labelMat = adaboost.loadDataSet("horseColicTraining2.txt")

classifierArr, aggClassEst = adaboost.adaBoostTrainDS(dataMat, labelMat, 50)

print(classifierArr)
print(aggClassEst.T.shape)

adaboost.plotROC(aggClassEst.T, labelMat)

# dataMat, labelMat = adaboost.loadDataSet("horseColicTest2.txt")
# pred = adaboost.adaClassify(dataMat, classifierArr)

# print(np.mat(labelMat).shape)
# print(np.mat(labelMat).T.shape)
# print(len(dataMat))
# errorMat = np.mat(np.ones((len(dataMat), 1)))
# rate = (errorMat[pred != np.mat(labelMat).T].sum() / len(dataMat))
예제 #26
0
def main():
    datArr, labelArr = hc.loadDataSet('horseColicTraining2.txt')
    classiferArray, aggClassEst = ab.adaBoostTrainDS(datArr, labelArr, 10)
    plotRoc(aggClassEst.T, labelArr)
예제 #27
0
    lines = list(fr.readlines())
    linesLen = len(lines)
    print(linesLen)
    numFeat = len(lines[0].strip().split('\t'))
    dataMat = []
    labelMat = []
    for i in range(linesLen):
        lineArr = []
        curLine = lines[i].strip().split('\t')
        for j in range(numFeat - 1):
            lineArr.append(float(curLine[j]))
        dataMat.append(lineArr)
        labelMat.append(float(curLine[-1]))
    return dataMat, labelMat


if __name__ == '__main__':
    import adaboost
    import adaboostDemo
    datMat, labelMat = adaboostDemo.loadDataSet(
        'C:/Users/v_wangdehong/PycharmProjects/MachineLearning_V/6.AdaBoost/input_data/horseColicTraining2.txt'
    )
    classifierArray = adaboost.adaBoostTrainDS(datMat, labelMat, 10)
    testArr, testLabelArr = adaboostDemo.loadDataSet(
        'C:/Users/v_wangdehong/PycharmProjects/MachineLearning_V/6.AdaBoost/input_data/horseColicTest2.txt'
    )
    prediction10 = adaboost.adaClassify(testArr, classifierArray)
    errArr = mat(ones((67, 1)))
    print(shape(mat(testLabelArr)), shape(prediction10))
    print(errArr[prediction10 != mat(testLabelArr).T].sum())
예제 #28
0
#coding:utf-8

import adaboost
from numpy import *

datMat, classLabels = adaboost.loadSimpData()

#print datMat,classLabels

D = mat(ones((5, 1)) / 5)
#print adaboost.buildStump(datMat,classLabels,D)

classifierArray = adaboost.adaBoostTrainDS(datMat, classLabels, 9)
print classifierArray
예제 #29
0
import adaboost

datArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt')
classifierArray, aggClassEst = adaboost.adaBoostTrainDS(datArr, labelArr, 10)
adaboost.plotROC(aggClassEst.T, labelArr)
예제 #30
0
# -*- coding: utf-8 -*-

import adaboost
from numpy import *
da, la = adaboost.loadDataSet('horseColicTraining.txt')
ca = adaboost.adaBoostTrainDS(da, la, 10)
tda, tla = adaboost.loadDataSet('horseColicTest.txt')
prediction10 = adaboost.adaClassify(tda, ca)
errArr = mat(ones((67, 1)))
errArr[prediction10 != mat(tla).T].sum()

reload(adaboost)
da, la = adaboost.loadDataSet('horseColicTraining.txt')
ca, ace = adaboost.adaBoostTrainDS(da, la, 40)
adaboost.plotROC(ace.T, la)
예제 #31
0
# -*- coding: utf-8 -*-
import adaboost

# dataMat,classLabels=stumpTree.loadData()
# stumpTree.adaBoostTrainDS(dataMat,classLabels)

dataMat, classLabels = adaboost.file2Matrix(
    '/home/lvsolo/python/adaBoosting/horseColicTraining2.txt')
weakClassify = adaboost.adaBoostTrainDS(dataMat, classLabels, 50)
dataTest, testLabels = adaboost.file2Matrix(
    '/home/lvsolo/python/adaBoosting/horseColicTest2.txt')
adaboost.adaBoostTest(dataTest, testLabels, weakClassify)
예제 #32
0
#!/usr/bin/env python
# encoding=utf-8

import logging
import numpy as np
import matplotlib.pyplot as plt

import adaboost

logging.basicConfig(
    level=logging.DEBUG,
    # level=logging.INFO,
    format='[%(levelname)s %(module)s line:%(lineno)d] %(message)s',
)
TRACE = logging.DEBUG - 1

datArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt')
weekClassArr, aggClassEst = adaboost.adaBoostTrainDS(datArr, labelArr, 20)

testDatArr, testLabelArr = adaboost.loadDataSet('horseColicTest2.txt')
prediction = adaboost.addClassify(testDatArr, weekClassArr)

errArr = np.mat(np.ones((67, 1)))
errArr[prediction != np.mat(testLabelArr).T]
errArr[prediction != np.mat(testLabelArr).T].sum() / 67
예제 #33
0
def AdaFeature(train_in, train_out, test_in):
    classifierArray, aggClassEst = adaboost.adaBoostTrainDS(
        train_in, train_out, 200)
    test_predict, prob_test = adaboost.adaClassify(test_in, classifierArray)
    # 测试测试集
    return test_predict
예제 #34
0
            label_1 = train_set.ix[:int(0.3 * len(train_set)), :]
            label_0 = train_set.ix[-int(0.3 * len(train_set)):, :]
            label_1['train_ret'] = 1
            label_0['train_ret'] = 0
            feature_one = pd.concat([label_1,
                                     label_0]).iloc[:, 2:-2].values.tolist()
            for i in range(len(feature_one)):
                features.append(feature_one[i])
            label_one = pd.concat([label_1,
                                   label_0])['train_ret'].values.tolist()
            for i in range(len(label_one)):
                labels.append(label_one[i])

        # train classifier

        classifierArr = adaboost.adaBoostTrainDS(mat(features), labels, 30)

        old_trading_day = trading_date_open[
            trading_date_open['calendarDate'] <
            trading_day]['calendarDate'].values[-1]

        predict_data = pd.read_csv('data/factor_old' + old_trading_day +
                                   '.csv')
        predict_data = predict_data.dropna()
        predict_data.iloc[:, 3:] = predict_data.iloc[:, 3:].rank(
            method='first').apply(lambda x: x / len(predict_data))
        x = predict_data.iloc[:, 3:].values.tolist()

        # predict
        y = adaboost.adaClassify(x, classifierArr)
        predict_label = predict_data.loc[:, ['secID', 'tradeDate']]
            label_1=train_set.ix[:int(0.3*len(train_set)),:]
            label_0=train_set.ix[-int(0.3*len(train_set)):,:]
            label_1['train_ret']=1
            label_0['train_ret']=0
            feature_one=pd.concat([label_1,label_0]).iloc[:,2:-2].values.tolist()
            for i in range(len(feature_one)):
                features.append(feature_one[i])
            label_one=pd.concat([label_1,label_0])['train_ret'].values.tolist()
            for i in range(len(label_one)):
                labels.append(label_one[i])



        # train classifier
        
        classifierArr = adaboost.adaBoostTrainDS(mat(features), labels, 30)

        old_trading_day=trading_date_open[trading_date_open['calendarDate'] < trading_day]['calendarDate'].values[-1]        

        predict_data = pd.read_csv('data/factor_old'+old_trading_day+'.csv')
        predict_data=predict_data.dropna()
        predict_data.iloc[:,3:]=predict_data.iloc[:,3:].rank(method='first').apply(lambda x : x/len(predict_data))
        x=predict_data.iloc[:,3:].values.tolist()
        
        # predict
        y=adaboost.adaClassify(x,classifierArr)
        predict_label=predict_data.loc[:,['secID','tradeDate']]
        predict_label['pro']=y
        predict_label['label']=sign(y)
        buy=predict_label[predict_label['label'] == 1]
        buy=buy.sort(columns=['pro'],ascending=False)[:45]
예제 #36
0
# -*- coding:utf-8 -*-
import adaboost
from numpy import *
import time


time_start = time.time()
datArr,labelArr = adaboost.loadDataSet('horseColicTraining2.txt')
classifierArr,aggBestEst = adaboost.adaBoostTrainDS(datArr, labelArr, 30)

testArr,testLabelArr = adaboost.loadDataSet('horseColicTest2.txt')
prediction10 = adaboost.adaClassify(testArr, classifierArr)
m = shape(testArr)[0]
errArr = mat(ones((m,1)))
errCount = errArr[prediction10  != mat(testLabelArr).T].sum()
print ('errCount is ' , errCount , 'error rate is ',errCount/m)
time_end = time.time()
print ("the program spend  %d s" % (time_end - time_start))
예제 #37
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

'7.6'

__author__ = 'lxp'

import adaboost
import numpy as np

datArr, labelArr = adaboost.loadDataSet('horseColicTraining2.txt')
classifierArray = adaboost.adaBoostTrainDS(datArr, labelArr, 10)
testArr, testLabelArr = adaboost.loadDataSet('horseColicTest2.txt')
prediction10 = adaboost.adaClassify(testArr, classifierArray)
errArr = np.mat(np.ones((67, 1)))
print(errArr[prediction10 != np.mat(testLabelArr).T].sum())
예제 #38
0
def main():
    datArr, labelArr = hc.loadDataSet('horseColicTraining2.txt')
    classiferArray, aggClassEst = ab.adaBoostTrainDS(datArr, labelArr, 10)
    plotRoc(aggClassEst.T, labelArr)