def colicTest(): frTrain = open('horseColicTraining.txt') frTest = open('horseColicTest.txt') trainingSet = [] trainingLabels = [] for line in frTrain.readlines(): currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) trainingSet.append(lineArr) trainingLabels.append(float(currLine[21])) trainWeights = logRegres.stocGradAscent1(np.array(trainingSet), trainingLabels, 500) errorCount = 0 numTestVec = 0.0 for line in frTest.readlines(): numTestVec += 1.0 currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) if int(logRegres.classifyVector(np.array(lineArr), trainWeights)) != int(currLine[21]): errorCount += 1 errorRate = float(errorCount) / numTestVec print 'the error rate of this test is: %f' % errorRate return errorRate
def colicTest(): frTrain = open('horseColicTraining.txt') frTest = open('horseColicTest.txt') trainingSet = [] trainingLabels = [] # 训练回归模型 for line in frTrain.readlines(): currLine = line.strip().split('\t') #每行按\t分割 print('currLine:') print(currLine) #每一行进行读取数据 lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) print('lineArr:') print(lineArr) trainingSet.append(lineArr) trainingLabels.append(float(currLine[21])) trainWeights = logRegres.stocGradAscent1(array(trainingSet), trainingLabels, 1000) errorCount = 0 numTestVec = 0.0 #测试回归模型 for line in frTest.readlines(): numTestVec += 1.0 currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) if int(logRegres.classifyVector(array(lineArr), trainWeights)) != int( currLine[21]): errorCount += 1 errorRate = (float(errorCount) / numTestVec) print("the error rate of this test is: %f" % errorRate) return errorRate
def colicTest(): trainingSet,trainingLabels = dln.loadDataSet('horseColicTraining.txt') #载入训练数据 trainWeights = lgr.stocGradAscent1(array(trainingSet), trainingLabels, 1000) #训练得到模型参数:改进的随机梯度上升算法 testSet,testLabels = dln.loadDataSet('horseColicTest.txt') #载入训练数据 errorCount = 0; numTestVec = 0.0 m,n = shape(testSet) for i in range(m): numTestVec += 1.0 if int(lgr.classifyVector(array(testSet[i]), trainWeights))!= int(testLabels[i]): errorCount += 1 errorRate = (float(errorCount)/numTestVec) print "the error rate of this test is: %f" % errorRate return errorRate
def LRFeature(train_in, train_out, test_in): n_train = np.shape(train_in)[0] n_test = np.shape(test_in)[0] # ---------对于LR的特殊处理 addones_train = np.ones((n_train, 1)) train_in = np.c_[addones_train, train_in] # 给训练集数据加1列1 addones_test = np.ones((n_test, 1)) test_in = np.c_[addones_test, test_in] # 给测试集加一列1 train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out) #过采样 trainWeights = LR.stocGradAscent1(train_in, train_out, 500) #梯度下降算法,训练500次 len_test = np.shape(test_in)[0] #测试集样本个数 test_predict = [] for i in range(len_test): test_predict_tmp = LR.classifyVector( test_in[i, :], trainWeights) #一个样本一个样本的算,样本中各特征乘以对应的权重 test_predict.append(test_predict_tmp) test_predict = np.array(test_predict) #得到最终的预测结果 return test_predict
# for train,test in skf.split(dataMat,labelMat): #============================================================================== print("%s %s" % (train, test)) train_in = dataMat[train] test_in = dataMat[test] train_out = labelMat[train] test_out = labelMat[test] train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out) trainWeights = LR.stocGradAscent1(train_in, train_out, 500) len_train = np.shape(train_in)[0] len_test = np.shape(test_in)[0] test_predict = [] proba_test = [] for i in range(len_test): test_predict_tmp = LR.classifyVector(test_in[i, :], trainWeights) test_predict.append(test_predict_tmp) proba_test_tmp = LR.classifyProb(test_in[i, :], trainWeights) proba_test.append(proba_test_tmp) train_predict = [] proba_train = [] for i in range(len_train): train_predict_tmp = LR.classifyVector(train_in[i, :], trainWeights) train_predict.append(train_predict_tmp) proba_train_tmp = LR.classifyProb(train_in[i, :], trainWeights) proba_train.append(proba_train_tmp) test1, test2 = ann.evaluatemodel(train_out, train_predict, proba_train) #test model with trainset evaluate_train.extend(test1)