def colicTest(): frTrain = open('horseColicTraining.txt') frTest = open('horseColicTest.txt') trainingSet = [] trainingLabels = [] for line in frTrain.readlines(): currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) trainingSet.append(lineArr) trainingLabels.append(float(currLine[21])) trainWeights = logRegres.stocGradAscent1(array(trainingSet), trainingLabels, 500) errorCount = 0 numTestVec = 0.0 for line in frTest.readlines(): numTestVec += 1.0 currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) if int(classifyVector(array(lineArr), trainWeights)) != int( currLine[21]): errorCount += 1 errorRate = (float(errorCount) / numTestVec) print "the error rate of this test is : %f" % errorRate return errorRate
def colicTest(): #create training data frTrain = open('horseColicTraining.txt') frTest = open('horseColicTest.txt') trainingSet = [] trainingLabels = [] for line in frTrain.readlines(): currentLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currentLine[i])) trainingSet.append(lineArr) trainingLabels.append(float(currentLine[21])) #training trainWeights = lr.stocGradAscent1(array(trainingSet), trainingLabels, 500) #test errorCount = 0 numTestVec = 0.0 for line in frTest.readlines(): numTestVec += 1.0 currLine = line.strip().split('\t') lineArr = [] for i in range(21): lineArr.append(float(currLine[i])) if int(classifyVector(array(lineArr), trainWeights)) != int(currLine[21]): errorCount += 1 errorRate = float(errorCount)/numTestVec print "the error rate of this set is: %f " %errorRate return errorRate
def test_best_stoc_grade_plot(self): data_set, label_mat = logRegres.loadDataSet() print("\n data_set == %s" % (data_set)) print("\n label_mat == %s" % (label_mat)) # 迭代150次 weights = logRegres.stocGradAscent1(array(data_set), label_mat, 200) print("\n weights == %s" % (weights)) # getA 为将numpy中的矩阵转换为python的array logRegres.plotBestFit(weights)
def colicTest(): trainingSet,trainingLabels = dln.loadDataSet('horseColicTraining.txt') #载入训练数据 trainWeights = lgr.stocGradAscent1(array(trainingSet), trainingLabels, 1000) #训练得到模型参数:改进的随机梯度上升算法 testSet,testLabels = dln.loadDataSet('horseColicTest.txt') #载入训练数据 errorCount = 0; numTestVec = 0.0 m,n = shape(testSet) for i in range(m): numTestVec += 1.0 if int(lgr.classifyVector(array(testSet[i]), trainWeights))!= int(testLabels[i]): errorCount += 1 errorRate = (float(errorCount)/numTestVec) print "the error rate of this test is: %f" % errorRate return errorRate
def colicTest(): trainingSet, trainingLabels = loadData('horseColicTraining.txt') testSet, testLabels = loadData('horseColicTest.txt') trainWeights = logRegres.stocGradAscent1(np.array(trainingSet), trainingLabels, 500) errorCount = 0 numTestVec = len(testLabels) for i in range(numTestVec): if int(classifyVector(testSet[i], trainWeights)) != int(testLabels[i]): errorCount += 1 errorRate = float(errorCount) / numTestVec print('the error rate of this test is: %f' % errorRate) return errorRate
def colicTest(): trainData = loadDataset('horseColicTraining.txt') testData = loadDataset('horseColicTest.txt') X_train = trainData[:, :-1] y_train = trainData[:, -1] X_test = testData[:, :-1] y_test = testData[:, -1] weights, costs = logRegres.stocGradAscent1(X_train, y_train) y_pred = logRegres.predict(X_test, weights) errorCount = 0 for i in range(y_test.shape[0]): if y_pred[i] != y_test[i]: errorCount += 1 print('Error rate is', errorCount / y_test.shape[0]) return errorCount / y_test.shape[0]
def costTest(intNum=100): trainingSet, traingLabels = loadTrainData() # trainingSet = [] # traingLabels = [] # for line in frTrain.readlines(): # currLine = line.strip().split('\t') # lineArr = [] # for i in range(21): # lineArr.append(float(currLine[i])) # trainingSet.append(lineArr) # traingLabels.append(float(currLine[21])) trainWeights = lr.stocGradAscent1(array(trainingSet), traingLabels, intNum) # print "trainWeights: %f" % trainWeights errorCount = 0 numTestVec = 0.0 frTest = open('horseColicTest.txt')
def LRFeature(train_in, train_out, test_in): n_train = np.shape(train_in)[0] n_test = np.shape(test_in)[0] # ---------对于LR的特殊处理 addones_train = np.ones((n_train, 1)) train_in = np.c_[addones_train, train_in] # 给训练集数据加1列1 addones_test = np.ones((n_test, 1)) test_in = np.c_[addones_test, test_in] # 给测试集加一列1 train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out) #过采样 trainWeights = LR.stocGradAscent1(train_in, train_out, 500) #梯度下降算法,训练500次 len_test = np.shape(test_in)[0] #测试集样本个数 test_predict = [] for i in range(len_test): test_predict_tmp = LR.classifyVector( test_in[i, :], trainWeights) #一个样本一个样本的算,样本中各特征乘以对应的权重 test_predict.append(test_predict_tmp) test_predict = np.array(test_predict) #得到最终的预测结果 return test_predict
def colicTest(numIter): frTrain = open('horseColicTraining.txt', encoding='ISO-8859-1') frTest = open('horseColicTest.txt', encoding='ISO-8859-1') trainingMat = [] trainingLabel = [] for line in frTrain.readlines(): lineArr = line.strip().split('\t') lineArr = [float(i) for i in lineArr] trainingMat.append(lineArr[:-1]) trainingLabel.append(int(lineArr[-1])) trainWeight = logRegres.stocGradAscent1(dataMat=np.array(trainingMat), labelMat=trainingLabel, numIter=numIter) errorCount = 0.0 numTestVec = 0.0 for line in frTest.readlines(): numTestVec += 1 lineArr = line.strip().split('\t') lineArr = [float(i) for i in lineArr] if int(classifyVector(np.array(lineArr[:-1]), weight=trainWeight)) != int(lineArr[-1]): errorCount += 1 errorRate = (errorCount) / (numTestVec) return errorRate
#!/usr/bin/python import numpy as np import logRegres dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.stocGradAscent1(np.array(dataArr), labelMat) logRegres.plotBestFit(weights)
def stocGradAscent1(numIter): dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.stocGradAscent1(array(dataArr), labelMat, numIter); print weights logRegres.plotBestFit(weights);
import logRegres from numpy import * if __name__ == '__main__': dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataArr, labelMat) logRegres.plotBestFit(weights.getA()) weights1 = logRegres.stocGradAscent0(array(dataArr), labelMat) logRegres.plotBestFit(weights1) weights2 = logRegres.stocGradAscent1(array(dataArr), labelMat) logRegres.plotBestFit(weights2)
from numpy import * import logRegres dataarr, labelmat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataarr, labelmat) print(weights) print(weights.getA()) #logRegres.plotBestFit(weights.getA()) weights = logRegres.stocGradAscent0(array(dataarr), labelmat) print(weights) #logRegres.plotBestFit(weights) weights = logRegres.stocGradAscent1(array(dataarr), labelmat) print(weights) #logRegres.plotBestFit(weights) logRegres.multiTest()
reload(logRegres) weights = logRegres.gradAscent(dataArr, labelMat) logRegres.plotBestFit(weights.getA()) # 测试随机梯度上升 from numpy import * from importlib import reload reload(logRegres) dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.stocGradAscent0(array(dataArr), labelMat) logRegres.plotBestFit(weights) # 测试随机梯度上升改进算法 from numpy import * from importlib import reload reload(logRegres) dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.stocGradAscent1(array(dataArr), labelMat) logRegres.plotBestFit(weights) weights = logRegres.stocGradAscent1(array(dataArr), labelMat, 500) logRegres.plotBestFit(weights) # 测试病马死亡预测 from importlib import reload reload(logRegres) logRegres.multiTest()
prenum_train = [] prenum_test = [] skf = StratifiedKFold(n_splits=10) for train, test in skf.split(dataMat, labelMat): #============================================================================== # skf=StratifiedShuffleSplit(n_splits=10) # for train,test in skf.split(dataMat,labelMat): #============================================================================== print("%s %s" % (train, test)) train_in = dataMat[train] test_in = dataMat[test] train_out = labelMat[train] test_out = labelMat[test] train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out) trainWeights = LR.stocGradAscent1(train_in, train_out, 500) len_train = np.shape(train_in)[0] len_test = np.shape(test_in)[0] test_predict = [] proba_test = [] for i in range(len_test): test_predict_tmp = LR.classifyVector(test_in[i, :], trainWeights) test_predict.append(test_predict_tmp) proba_test_tmp = LR.classifyProb(test_in[i, :], trainWeights) proba_test.append(proba_test_tmp) train_predict = [] proba_train = [] for i in range(len_train): train_predict_tmp = LR.classifyVector(train_in[i, :], trainWeights)
def run(): dataMat, labelMat = lr.loadDataSet() weights = lr.stocGradAscent1(dataMat, labelMat) print weights lr.plotBestFit(weights)
#!usr/bin/python #coding:utf8 import logRegres from numpy import * dataMat, Lables = logRegres.loadDataSet() weights = logRegres.stocGradAscent1(array(dataMat), Lables) logRegres.plotBestFit(dataMat, Lables, weights) # x = arange(-3.0, 3.0, 0.1) # print x # logRegres.muliTest()
import logRegres import numpy as np dataArr, labelMat = logRegres.loadDataSet() # print(logRegres.gradAscent(dataArr, labelMat)) # weights = logRegres.gradAscent(dataArr, labelMat) # logRegres.plotBestFit(weights.getA()) # weights = logRegres.stocGradAscent0(dataArr, labelMat) # print(logRegres.stocGradAscent0(dataArr, labelMat)) # logRegres.plotBestFit(weights) dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.stocGradAscent1(np.array(dataArr), labelMat) print(logRegres.stocGradAscent1(np.array(dataArr), labelMat, 500)) logRegres.plotBestFit(weights) logRegres.multiTest()
__author__ = 'sunbeansoft' import logRegres as lr from numpy import * dataArr, labelMat = lr.loadDataSet() weight = lr.gradAscent(dataArr, labelMat) lr.plotBestFit(weight.getA()) weight = lr.stocGradAscent0(array(dataArr), labelMat) lr.plotBestFit(weight) weight = lr.stocGradAscent1(array(dataArr), labelMat) lr.plotBestFit(weight) lr.multiTest()
# coding=utf-8 import numpy import logRegres dataMat, labelMat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataMat, labelMat) print weights #logRegres.plotBestFit(weights.getA()) weights = logRegres.stocGradAscent0(dataMat, labelMat) print '--随机梯度' #logRegres.plotBestFit(weights) weights = logRegres.stocGradAscent1(dataMat, labelMat) print '--改进的随机梯度' logRegres.plotBestFit(weights) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) x = numpy.arange(-8.0, 8.0, 0.2) print logRegres.sigmoid(-0.3) # 设置偏移量 不是所有数据都是x=0划分 y = [logRegres.sigmoid(xi - 0.1) for xi in x] ax.plot(x, y) plt.xlabel('X1') plt.ylabel('X2') #plt.show() print '--预测病马' logRegres.multiTest()
def stocGradAscent1(numIter): dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.stocGradAscent1(array(dataArr), labelMat, numIter) print weights logRegres.plotBestFit(weights)
#其他:程序提供的例子有两个属性(不含常数x0=1),则绘制出的是切分二位空间的直线x1+x2=0,如果是三个属性,则绘制出的是切分三维空间的平面x1+x2+x3=0。 #同理可知,四个属性绘制出的是切分思维空间的立方体(思维图形是想象不出来的,但是其投影是可以想象出来的三维,所以可推测x1+x2+x3+x4=0是思维空间表达式下的三维空间方程)。 #一维是直线,二位是平面,三维是显示空间,平面可能存在交集,但蚂蚁不能从一个平面到另一个平面,甚至不能抬头看见另一个平面。同理现实空间与现实空间也会存在交集,但是去不了,甚至不能抬头看见另一个空间。 #时间:2018年6月8日(星期五) 下午开始,6月12日(星期二)上午完成备注分析 import logRegres from numpy import * #从文件夹中提取数据 dataArr, labelMat = logRegres.loadDataSet() #加载数据,存放在列表中 print "\n数据列表是:\n", dataArr #打印数据,测试读取是否异常 print "\n类列表是:\n", labelMat #用数据和标签 利用梯度上升算法计算 权重 weights = logRegres.gradAscent(dataArr, labelMat) #梯度上升算法计算最佳参数值 stocWeights = logRegres.stocGradAscent1(array(dataArr), labelMat, 500) #随机梯度上升算法计算最佳参数值 print "\n权重w0,w1,w2的值是:\n", weights ####################################### 第一个图:梯度上升算法的例子 ################################# #利用权重绘制直线 利用数据绘制点 print "\n第一个图:梯度上升算法的例子" #梯度上升算法:批量处理方法(一次性处理所有数) logRegres.plotBestFit( weights.getA()) # .getA()将矩阵转换成数组 因为数组可以很方便的任意读取其中的元素,矩阵不行 ####################################### 第二个图:随机梯度上升算法的例子 ################################# #随机梯度上升:在线学习方法(新样本来到时,对分类器进行增量式更新) print "第二个图:随机梯度上升算法的例子" # logRegres.plotBestFit(stocWeights) ######################################## 第三个例子:预测病马的死亡率 #################################
import logRegres import numpy as np #print(logRegres.loadDataSet()) dataArr,labelMat = logRegres.loadDataSet() #print(np.mat(labelMat)) #print(np.mat(dataArr)) weight = np.ones((3,1)) value = np.mat(dataArr)*weight labelMat = np.mat(labelMat) #print(labelMat) #print(value-labelMat) #print(np.mat(dataArr)*weight) #weight = logRegres.gradAscent(dataArr,labelMat) weight = logRegres.stocGradAscent1(dataArr,labelMat) print(weight) logRegres.plotBestFit(weight.getA())
# -*- coding: utf-8 -*- from numpy import * import logRegres data, ls = logRegres.loadDataSet() wei1 = logRegres.gradAscent(data, ls) logRegres.plotBestFit(wei1) reload(logRegres) wei2 = logRegres.stocGradAscent0(array(data), ls) logRegres.plotBestFit(wei2) wei3 = logRegres.stocGradAscent1(array(data), ls) logRegres.plotBestFit(wei3) import logRegres logRegres.multiTest()
# autor: zhumenger import logRegres from numpy import * dataArr, labelMat = logRegres.loadDataSet() print(logRegres.gradAscent(dataArr, labelMat)) weigths = logRegres.stocGradAscent1(array(dataArr), labelMat) print(logRegres.plotBestFit(weigths))
import logRegres from numpy import * dataArr, labelMat = logRegres.loadDataSet() print(logRegres.gradAscent(dataArr, labelMat)) #打印回归系数 #打印随机梯度上升法拟合的回归系数 print(logRegres.stocGradAscent0(array(dataArr), labelMat)) #打印改进的随机梯度上升法拟合的回归系数 print(logRegres.stocGradAscent1(array(dataArr), labelMat))