예제 #1
0
 def test_stoc_grade_plot(self):
     data_set, label_mat = logRegres.loadDataSet()
     print("\n data_set == %s" % (data_set))
     print("\n label_mat == %s" % (label_mat))
     weights = logRegres.stocGradAscent0(array(data_set), label_mat)
     print("\n weights == %s" % (weights))
     logRegres.plotBestFit(weights)
예제 #2
0
def plotSDError(grad_type):

    dataMat, labelMat = logRegres.loadDataSet()
    dataArr = array(dataMat)
    if grad_type == 0:
        myHist = stocGradAscent0(dataArr, labelMat)
    elif grad_type == 1:
        myHist = stocGradAscent1(dataArr, labelMat)

    n = shape(dataArr)[0]  #number of points to create
    xcord1 = []
    ycord1 = []
    xcord2 = []
    ycord2 = []

    markers = []
    colors = []

    fig = plt.figure(figsize=(7, 7))
    ax = fig.add_subplot(311)
    type1 = ax.plot(myHist[:, 0])
    plt.ylabel('X0')
    ax = fig.add_subplot(312)
    type1 = ax.plot(myHist[:, 1])
    plt.ylabel('X1')
    ax = fig.add_subplot(313)
    type1 = ax.plot(myHist[:, 2])
    plt.xlabel('iteration')
    plt.ylabel('X2')
    plt.show()
예제 #3
0
def plotBestFit(weights):
    import matplotlib.pyplot as plt
    dataMat, labelMat = loadDataSet()
    dataArr = np.array(dataMat)
    n = np.shape(dataArr)[0]
    xcord1 = []
    ycord1 = []
    xcord2 = []
    ycord2 = []
    for i in range(n):
        if int(labelMat[i]) == 1:
            xcord1.append(dataArr[i, 1])
            ycord1.append(dataArr[i, 2])
        else:
            xcord2.append(dataArr[i, 1])
            ycord2.append(dataArr[i, 2])
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(xcord1, ycord1, s=30, c='red', marker='s')
    ax.scatter(xcord2, ycord2, s=30, c='green')
    x = np.arange(-3.0, 3.0, 0.1)
    y = (-weights[0] - weights[1] * x) / weights[2]
    print(weights[0])
    print(weights[1])
    print(weights[2])
    ax.plot(x, y)
    plt.xlabel('x1')
    plt.ylabel('x2')
    plt.show()
예제 #4
0
def test_get():
    dataSet, labels = logRegres.loadDataSet()

    print dataSet
    print labels
    #plotUtil.plot2DScatterWith2Type(dataSet,labels,1,0)

    print logRegres.gradAscent(dataSet, labels)
예제 #5
0
 def test_grade_plot(self):
     data_set, label_mat = logRegres.loadDataSet()
     print("\n data_set == %s" % (data_set))
     print("\n label_mat == %s" % (label_mat))
     weights = logRegres.gradAscent(data_set, label_mat)
     print("\n weights == %s" % (weights))
     # getA 为将numpy中的矩阵转换为python的array
     logRegres.plotBestFit(weights.getA())
예제 #6
0
 def test_best_stoc_grade_plot(self):
     data_set, label_mat = logRegres.loadDataSet()
     print("\n data_set == %s" % (data_set))
     print("\n label_mat == %s" % (label_mat))
     # 迭代150次
     weights = logRegres.stocGradAscent1(array(data_set), label_mat, 200)
     print("\n weights == %s" % (weights))
     # getA 为将numpy中的矩阵转换为python的array
     logRegres.plotBestFit(weights)
예제 #7
0
# -*- coding: utf-8 -*-

from numpy import *
import logRegres
data, ls = logRegres.loadDataSet()
wei1 = logRegres.gradAscent(data, ls)
logRegres.plotBestFit(wei1)

reload(logRegres)
wei2 = logRegres.stocGradAscent0(array(data), ls)
logRegres.plotBestFit(wei2)

wei3 = logRegres.stocGradAscent1(array(data), ls)
logRegres.plotBestFit(wei3)

import logRegres
logRegres.multiTest()
예제 #8
0
#!usr/bin/python
#coding:utf8

import logRegres
from numpy import *

dataMat, Lables = logRegres.loadDataSet()
weights = logRegres.stocGradAscent1(array(dataMat), Lables)

logRegres.plotBestFit(dataMat, Lables, weights)

# x = arange(-3.0, 3.0, 0.1)
# print x

# logRegres.muliTest()
예제 #9
0
def main():
    dataAttr,labelsMat = logRegres.loadDataSet()
    weights = logRegres.gradAscent(dataAttr,labelsMat)
    logRegres.plotBestFit(weights)
예제 #10
0
def run():
    dataMat, labelMat = lr.loadDataSet()
    weights = lr.stocGradAscent1(dataMat, labelMat)
    print weights
    lr.plotBestFit(weights)
예제 #11
0
    for j in range(40):
        dataIndex = range(m)
        for i in range(m):
            alpha = 4 / (1.0 + j + i) + 0.01
            randIndex = int(random.uniform(0, len(dataIndex)))
            h = logRegres.sigmoid(sum(dataMatrix[randIndex] * weights))
            error = classLabels[randIndex] - h
            #print error
            weights = weights + alpha * error * dataMatrix[randIndex]
            weightsHistory[j * m + i, :] = weights
            del (dataIndex[randIndex])
    print(weights)
    return weightsHistory


dataMat, labelMat = logRegres.loadDataSet()
dataArr = array(dataMat)
myHist = stocGradAscent1(dataArr, labelMat)

n = shape(dataArr)[0]  #number of points to create
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []

markers = []
colors = []

fig = plt.figure()
ax = fig.add_subplot(311)
type1 = ax.plot(myHist[:, 0])
예제 #12
0
def stocGradAscent1(numIter):
    dataArr, labelMat = logRegres.loadDataSet()
    weights = logRegres.stocGradAscent1(array(dataArr), labelMat, numIter);
    print weights
    
    logRegres.plotBestFit(weights);
예제 #13
0
'''
Created on Oct 6, 2010

@author: Peter
'''
from numpy import *
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import logRegres

dataArr, labelArr = logRegres.loadDataSet()
dataArray = array(dataArr)
weights = logRegres.stocGradAscent0(dataArray, labelArr)

n = shape(dataArray)[0]  #number of points to create
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []

markers = []
colors = []
for i in range(n):
    if int(labelArr[i]) == 1:
        xcord1.append(dataArray[i, 1])
        ycord1.append(dataArray[i, 2])
    else:
        xcord2.append(dataArray[i, 1])
        ycord2.append(dataArray[i, 2])
예제 #14
0
#3.初始权重:设置也是任意的,通常设置为1,多次迭代后都会趋于最有权重参数(前提是有唯一最优解/最大似然函数有唯一极值解)。
#4.迭代的次数:这个多试几次,看图大概就能知道设置为多少,理论上来说迭代次数越大最好,但实际后面的迭代没有太大效果
#5.第三个例子中的X0是直接在文档中就给出的,不需要在程序中添加其值。训练样本和测试样本都是22列(21列属性值 + 1列 标签值01 )
#5.#程序内容:

#疑问:1.为什么每个数据都要添加一个常数属性呢?
#其他:程序提供的例子有两个属性(不含常数x0=1),则绘制出的是切分二位空间的直线x1+x2=0,如果是三个属性,则绘制出的是切分三维空间的平面x1+x2+x3=0。
#同理可知,四个属性绘制出的是切分思维空间的立方体(思维图形是想象不出来的,但是其投影是可以想象出来的三维,所以可推测x1+x2+x3+x4=0是思维空间表达式下的三维空间方程)。
#一维是直线,二位是平面,三维是显示空间,平面可能存在交集,但蚂蚁不能从一个平面到另一个平面,甚至不能抬头看见另一个平面。同理现实空间与现实空间也会存在交集,但是去不了,甚至不能抬头看见另一个空间。
#时间:2018年6月8日(星期五) 下午开始,6月12日(星期二)上午完成备注分析

import logRegres
from numpy import *

#从文件夹中提取数据
dataArr, labelMat = logRegres.loadDataSet()  #加载数据,存放在列表中
print "\n数据列表是:\n", dataArr  #打印数据,测试读取是否异常
print "\n类列表是:\n", labelMat

#用数据和标签 利用梯度上升算法计算 权重
weights = logRegres.gradAscent(dataArr, labelMat)  #梯度上升算法计算最佳参数值
stocWeights = logRegres.stocGradAscent1(array(dataArr), labelMat,
                                        500)  #随机梯度上升算法计算最佳参数值
print "\n权重w0,w1,w2的值是:\n", weights

#######################################       第一个图:梯度上升算法的例子          #################################
#利用权重绘制直线 利用数据绘制点
print "\n第一个图:梯度上升算法的例子"
#梯度上升算法:批量处理方法(一次性处理所有数)
logRegres.plotBestFit(
    weights.getA())  # .getA()将矩阵转换成数组 因为数组可以很方便的任意读取其中的元素,矩阵不行
예제 #15
0
def gradAscent():
    dataArr, labelMat = logRegres.loadDataSet()
    weights = logRegres.gradAscent(dataArr, labelMat)
    print weights

    logRegres.plotBestFit(weights.getA())
예제 #16
0
def stocGradAscent1(numIter):
    dataArr, labelMat = logRegres.loadDataSet()
    weights = logRegres.stocGradAscent1(array(dataArr), labelMat, numIter)
    print weights

    logRegres.plotBestFit(weights)
예제 #17
0
__author__ = 'sunbeansoft'

import logRegres as lr
from numpy import *

dataArr, labelMat = lr.loadDataSet()
weight = lr.gradAscent(dataArr, labelMat)
lr.plotBestFit(weight.getA())
weight = lr.stocGradAscent0(array(dataArr), labelMat)
lr.plotBestFit(weight)
weight = lr.stocGradAscent1(array(dataArr), labelMat)
lr.plotBestFit(weight)

lr.multiTest()
예제 #18
0
from numpy import *
import logRegres

dataarr, labelmat = logRegres.loadDataSet()
weights = logRegres.gradAscent(dataarr, labelmat)
print(weights)

print(weights.getA())
#logRegres.plotBestFit(weights.getA())

weights = logRegres.stocGradAscent0(array(dataarr), labelmat)
print(weights)
#logRegres.plotBestFit(weights)

weights = logRegres.stocGradAscent1(array(dataarr), labelmat)
print(weights)
#logRegres.plotBestFit(weights)

logRegres.multiTest()
예제 #19
0
#!/usr/bin/python
# encoding: utf-8

'''
Created on Nov 28, 2015

@author: yanruibo
'''
import logRegres
import numpy as np
if __name__ == '__main__':
    dataArr,labelMat = logRegres.loadDataSet()
    #weights = logRegres.gradAscent(dataArr, labelMat)
    weights = logRegres.stocGradAscent0(np.array(dataArr), labelMat)
    print weights
    #logRegres.plotBestFit(weights.getA())
    logRegres.plotBestFit(weights)
예제 #20
0
    for j in range(40):
        dataIndex = range(m)
        for i in range(m):
            alpha = 4/(1.0+j+i)+0.01
            randIndex = int(random.uniform(0,len(dataIndex)))
            h = logRegres.sigmoid(sum(dataMatrix[randIndex]*weights))
            error = classLabels[randIndex] - h
            #print error
            weights = weights + alpha * error * dataMatrix[randIndex]
            weightsHistory[j*m + i,:] = weights
            del(dataIndex[randIndex])
    print weights
    return weightsHistory
    

dataMat,labelMat=logRegres.loadDataSet()
dataArr = array(dataMat)
myHist = stocGradAscent1(dataArr,labelMat)


n = shape(dataArr)[0] #number of points to create
xcord1 = []; ycord1 = []
xcord2 = []; ycord2 = []

markers =[]
colors =[]


fig = plt.figure()
ax = fig.add_subplot(311)
type1 = ax.plot(myHist[:,0])
예제 #21
0
import logRegres
from numpy import *
a1, a2 = logRegres.loadDataSet()
#print(a1)
#print(a2)
b1 = logRegres.gradAscent(a1, a2)
print(b1.getA())
logRegres.plotBestFit(b1.getA())  ###perfect

#c1 = logRegres.stocGradAscent1(array(a1),a2)
#logRegres.plotBestFit(c1)
예제 #22
0
def gradAscent():
    dataArr, labelMat = logRegres.loadDataSet()
    weights = logRegres.gradAscent(dataArr, labelMat)
    print weights
    
    logRegres.plotBestFit(weights.getA())
예제 #23
0
def test_plotBestFit():
    dataSet, labels = logRegres.loadDataSet()
    weights = logRegres.gradAscent(dataSet, labels)
    logRegres.plotBestFit(weights.getA())
예제 #24
0
import logRegres

trainData, trainLabels = logRegres.loadDataSet('data/train-set', 13)
weights = logRegres.kgradAscent(trainData, trainLabels)
#print 'weights:\n'
#print weights
testData, testLabels = logRegres.loadDataSet('data/test-set', 13)
print 'Original:'
print testLabels
correct, classified = logRegres.ktest(testData, testLabels, weights)
print 'Classified:'
print classified
예제 #25
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author:yiluzhang


import logRegres
if __name__ == "__main__":
    # 测试批梯度上升
    data_arr, label_arr = logRegres.loadDataSet()
    weights = logRegres.gradAscent(data_arr, label_arr)
    logRegres.plotBestFit(weights)
예제 #26
0
from numpy import *
import logRegres
import logRegresGo

dataArr, labelMat = logRegres.loadDataSet()
weights = logRegres.gradAscent(dataArr, labelMat)
logRegres.plotBestFit(weights)
예제 #27
0
 def test_grade(self):
     data_set, label_mat = logRegres.loadDataSet()
     print("\n data_set == %s" % (data_set))
     print("\n label_mat == %s" % (label_mat))
     ascent = logRegres.gradAscent(data_set, label_mat)
     print("\n ascent == %s" % (ascent))