def test_get(): dataSet, labels = logRegres.loadDataSet() print dataSet print labels #plotUtil.plot2DScatterWith2Type(dataSet,labels,1,0) print logRegres.gradAscent(dataSet, labels)
def test_grade_plot(self): data_set, label_mat = logRegres.loadDataSet() print("\n data_set == %s" % (data_set)) print("\n label_mat == %s" % (label_mat)) weights = logRegres.gradAscent(data_set, label_mat) print("\n weights == %s" % (weights)) # getA 为将numpy中的矩阵转换为python的array logRegres.plotBestFit(weights.getA())
def test_debug_logregres(self): # 分类直线为经过(0,1)(3,4)两个点,则直线为y=x+1 # 其中[x,y,label]分别表示x坐标,y坐标 # label==1表示在直线之上 label==0表示在直线之下 # 由于此处数据数值偏大,因此gradAscent方法中的步长也对应应该变大,否则分类线则不太准确 data_set = [ [1, 3, 1], [2, 4, 1], [2, 5, 1], # [-1, 2, 1], # [-2, 4, 1], # [-3, 3, 1], [1, 1, 0], [2, 1, 0], [3, 3, 0] # [0, 0, 0], # [-2, -3, 0], # [-4, -5, 0] ] data_set = mat(data_set) m, n = shape(data_set) print("\n data_set == %s" % (data_set)) # 获取出去label的子矩阵 data_in = data_set[:, :n - 1] # 初始化设置x0=1? one = ones((m, 1)) print("\n one == %s" % (one)) data_in = column_stack((one, data_in)) classmat = data_set[:, n - 1:] classLabel = [] for i in classmat.flat: classLabel.append(i) print("\n\n data_in == %s" % (data_in)) print("\n classLabel == %s" % (classLabel)) weights = logRegres.gradAscent(data_in, classLabel) print("\n weights == %s" % (weights)) # getA 为将numpy中的矩阵转换为python的array logRegres.plotBestFit1(weights.getA(), data_in, classLabel)
def test_plotBestFit(): dataSet, labels = logRegres.loadDataSet() weights = logRegres.gradAscent(dataSet, labels) logRegres.plotBestFit(weights.getA())
import logRegres dataArr, labelMat = logRegres.loadDataSet() print(logRegres.gradAscent(dataArr, labelMat))
''' Created on May 27, 2014 Logistic Regression Main Study @author: Guodong Jin ''' import logRegres from numpy import * dataArr,labelMat = logRegres.loadDataSet() print len(dataArr) print labelMat res_w = logRegres.gradAscent(dataArr, labelMat) print res_w # logRegres.plotBestFit(res_w.getA()) res_w, l_w0, l_w1, l_w2= logRegres.stocGradAscent1_0(array(dataArr), labelMat, 100) logRegres.plotBestFit(res_w) # import matplotlib.pyplot as plt # x = range(len(l_w0)) # fig = plt.figure() # ax = fig.add_subplot(111) # print res_w # ax.plot(x, array(l_w1)) # plt.show()
def gradAscent(): dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataArr, labelMat) print weights logRegres.plotBestFit(weights.getA())
from numpy import * import logRegres dataarr, labelmat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataarr, labelmat) print(weights) print(weights.getA()) #logRegres.plotBestFit(weights.getA()) weights = logRegres.stocGradAscent0(array(dataarr), labelmat) print(weights) #logRegres.plotBestFit(weights) weights = logRegres.stocGradAscent1(array(dataarr), labelmat) print(weights) #logRegres.plotBestFit(weights) logRegres.multiTest()
import logRegres import sys dataArr, labelMat = logRegres.loadDataSet() matrix = logRegres.gradAscent(dataArr, labelMat) #print matrix[1] for line in sys.stdin: line = line.strip() lines = line.split('\t') result = float(matrix[0]) + float(lines[0]) * float(matrix[1]) + float( lines[1]) * float(matrix[2]) print "%s\t%f" % (line, result)
__author__ = 'sunbeansoft' import logRegres as lr from numpy import * dataArr, labelMat = lr.loadDataSet() weight = lr.gradAscent(dataArr, labelMat) lr.plotBestFit(weight.getA()) weight = lr.stocGradAscent0(array(dataArr), labelMat) lr.plotBestFit(weight) weight = lr.stocGradAscent1(array(dataArr), labelMat) lr.plotBestFit(weight) lr.multiTest()
#!/usr/bin/env python # -*- coding: utf-8 -*- # author:yiluzhang import logRegres if __name__ == "__main__": # 测试批梯度上升 data_arr, label_arr = logRegres.loadDataSet() weights = logRegres.gradAscent(data_arr, label_arr) logRegres.plotBestFit(weights)
# -*- coding:utf-8 -*- import logRegres dataArr, labelMat = logRegres.loadDataSet() print logRegres.gradAscent(dataArr, labelMat)
import logRegres from numpy import * a1, a2 = logRegres.loadDataSet() #print(a1) #print(a2) b1 = logRegres.gradAscent(a1, a2) print(b1.getA()) logRegres.plotBestFit(b1.getA()) ###perfect #c1 = logRegres.stocGradAscent1(array(a1),a2) #logRegres.plotBestFit(c1)
#__author__: dongj #date: 2018/7/1 import sys import os sys.path.append(os.path.abspath(os.path.dirname(os.getcwd()))) import logRegres import figure_plot ## step 1: load data dataArr, labelMat = logRegres.loadDataSet() test_x = dataArr test_y = labelMat ## step 2: training... print("step 2: training...") print(logRegres.gradAscent(dataArr, labelMat)) optimalWeights = logRegres.gradAscent(dataArr, labelMat) ## step 3: testing print("step 3: testing...") accuracy = logRegres.testLogRegres(optimalWeights, test_x, test_y) ## step 4: show the result print("step 4: show the result...") print('The classify accuracy is: %.3f%%' % (accuracy * 100)) figure_plot.plotBestFit(optimalWeights.getA())
def main(): dataAttr,labelsMat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataAttr,labelsMat) logRegres.plotBestFit(weights)
#!/usr/bin/python # -*- coding:utf-8 -*- import logRegres from numpy import * #5.2.2 训练算法:使用梯度上升找到最佳参数 dataArr, labelMat = logRegres.loadDataSet() print "回归系数:", logRegres.gradAscent(dataArr, labelMat) #5.2.3 分析数据:画出决策边界 weights = logRegres.gradAscent(dataArr, labelMat) logRegres.plotBestFit(weights.getA()) #5.2.4 训练算法:随机梯度上升 dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.stocGradAscent0(array(dataArr), labelMat) logRegres.plotBestFit(weights) weights = logRegres.stocGradAscent1(array(dataArr), labelMat) logRegres.plotBestFit(weights) #5.3.2 测试算法:用Logistic回归进行分类 logRegres.multiTest()
# -*- coding: utf-8 -*- from numpy import * import logRegres data, ls = logRegres.loadDataSet() wei1 = logRegres.gradAscent(data, ls) logRegres.plotBestFit(wei1) reload(logRegres) wei2 = logRegres.stocGradAscent0(array(data), ls) logRegres.plotBestFit(wei2) wei3 = logRegres.stocGradAscent1(array(data), ls) logRegres.plotBestFit(wei3) import logRegres logRegres.multiTest()
import logRegres from numpy import * if __name__ == '__main__': dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataArr, labelMat) logRegres.plotBestFit(weights.getA()) weights1 = logRegres.stocGradAscent0(array(dataArr), labelMat) logRegres.plotBestFit(weights1) weights2 = logRegres.stocGradAscent1(array(dataArr), labelMat) logRegres.plotBestFit(weights2)
#疑问:1.为什么每个数据都要添加一个常数属性呢? #其他:程序提供的例子有两个属性(不含常数x0=1),则绘制出的是切分二位空间的直线x1+x2=0,如果是三个属性,则绘制出的是切分三维空间的平面x1+x2+x3=0。 #同理可知,四个属性绘制出的是切分思维空间的立方体(思维图形是想象不出来的,但是其投影是可以想象出来的三维,所以可推测x1+x2+x3+x4=0是思维空间表达式下的三维空间方程)。 #一维是直线,二位是平面,三维是显示空间,平面可能存在交集,但蚂蚁不能从一个平面到另一个平面,甚至不能抬头看见另一个平面。同理现实空间与现实空间也会存在交集,但是去不了,甚至不能抬头看见另一个空间。 #时间:2018年6月8日(星期五) 下午开始,6月12日(星期二)上午完成备注分析 import logRegres from numpy import * #从文件夹中提取数据 dataArr, labelMat = logRegres.loadDataSet() #加载数据,存放在列表中 print "\n数据列表是:\n", dataArr #打印数据,测试读取是否异常 print "\n类列表是:\n", labelMat #用数据和标签 利用梯度上升算法计算 权重 weights = logRegres.gradAscent(dataArr, labelMat) #梯度上升算法计算最佳参数值 stocWeights = logRegres.stocGradAscent1(array(dataArr), labelMat, 500) #随机梯度上升算法计算最佳参数值 print "\n权重w0,w1,w2的值是:\n", weights ####################################### 第一个图:梯度上升算法的例子 ################################# #利用权重绘制直线 利用数据绘制点 print "\n第一个图:梯度上升算法的例子" #梯度上升算法:批量处理方法(一次性处理所有数) logRegres.plotBestFit( weights.getA()) # .getA()将矩阵转换成数组 因为数组可以很方便的任意读取其中的元素,矩阵不行 ####################################### 第二个图:随机梯度上升算法的例子 ################################# #随机梯度上升:在线学习方法(新样本来到时,对分类器进行增量式更新) print "第二个图:随机梯度上升算法的例子" # logRegres.plotBestFit(stocWeights)
from numpy import * import logRegres import logRegresGo dataArr, labelMat = logRegres.loadDataSet() weights = logRegres.gradAscent(dataArr, labelMat) logRegres.plotBestFit(weights)
def test_grade(self): data_set, label_mat = logRegres.loadDataSet() print("\n data_set == %s" % (data_set)) print("\n label_mat == %s" % (label_mat)) ascent = logRegres.gradAscent(data_set, label_mat) print("\n ascent == %s" % (ascent))