def colicTest():
    frTrain = open('horseColicTraining.txt')
    frTest = open('horseColicTest.txt')
    trainingSet = []
    trainingLabels = []
    for line in frTrain.readlines():
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(21):
            lineArr.append(float(currLine[i]))
        trainingSet.append(lineArr)
        trainingLabels.append(float(currLine[21]))
    trainWeights = logRegres.stocGradAscent1(array(trainingSet),
                                             trainingLabels, 500)
    errorCount = 0
    numTestVec = 0.0
    for line in frTest.readlines():
        numTestVec += 1.0
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(21):
            lineArr.append(float(currLine[i]))
        if int(classifyVector(array(lineArr), trainWeights)) != int(
                currLine[21]):
            errorCount += 1
    errorRate = (float(errorCount) / numTestVec)
    print "the error rate of this test is : %f" % errorRate
    return errorRate
def colicTest():
    #create training data
    frTrain = open('horseColicTraining.txt')
    frTest = open('horseColicTest.txt')
    trainingSet = []
    trainingLabels = []
    for line in frTrain.readlines():
        currentLine = line.strip().split('\t')
        lineArr = []
        for i in range(21):
            lineArr.append(float(currentLine[i]))
        trainingSet.append(lineArr)
        trainingLabels.append(float(currentLine[21]))

    #training
    trainWeights = lr.stocGradAscent1(array(trainingSet), trainingLabels, 500)

    #test
    errorCount = 0
    numTestVec = 0.0
    for line in frTest.readlines():
        numTestVec += 1.0
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(21):
            lineArr.append(float(currLine[i]))
        if int(classifyVector(array(lineArr), trainWeights)) != int(currLine[21]):
            errorCount += 1
    errorRate = float(errorCount)/numTestVec
    print "the error rate of this set is: %f " %errorRate
    return errorRate
Example #3
0
 def test_best_stoc_grade_plot(self):
     data_set, label_mat = logRegres.loadDataSet()
     print("\n data_set == %s" % (data_set))
     print("\n label_mat == %s" % (label_mat))
     # 迭代150次
     weights = logRegres.stocGradAscent1(array(data_set), label_mat, 200)
     print("\n weights == %s" % (weights))
     # getA 为将numpy中的矩阵转换为python的array
     logRegres.plotBestFit(weights)
Example #4
0
def colicTest():
    trainingSet,trainingLabels = dln.loadDataSet('horseColicTraining.txt') #载入训练数据
    trainWeights = lgr.stocGradAscent1(array(trainingSet), trainingLabels, 1000) #训练得到模型参数:改进的随机梯度上升算法
    testSet,testLabels = dln.loadDataSet('horseColicTest.txt') #载入训练数据
    errorCount = 0; numTestVec = 0.0
    m,n = shape(testSet)
    for i in range(m):
        numTestVec += 1.0
        if int(lgr.classifyVector(array(testSet[i]), trainWeights))!= int(testLabels[i]):
            errorCount += 1
    errorRate = (float(errorCount)/numTestVec)
    print "the error rate of this test is: %f" % errorRate
    return errorRate
Example #5
0
def colicTest():
    trainingSet, trainingLabels = loadData('horseColicTraining.txt')
    testSet, testLabels = loadData('horseColicTest.txt')
    trainWeights = logRegres.stocGradAscent1(np.array(trainingSet),
                                             trainingLabels, 500)
    errorCount = 0
    numTestVec = len(testLabels)
    for i in range(numTestVec):
        if int(classifyVector(testSet[i], trainWeights)) != int(testLabels[i]):
            errorCount += 1
    errorRate = float(errorCount) / numTestVec
    print('the error rate of this test is: %f' % errorRate)
    return errorRate
Example #6
0
def colicTest():
    trainData = loadDataset('horseColicTraining.txt')
    testData = loadDataset('horseColicTest.txt')
    X_train = trainData[:, :-1]
    y_train = trainData[:, -1]
    X_test = testData[:, :-1]
    y_test = testData[:, -1]

    weights, costs = logRegres.stocGradAscent1(X_train, y_train)
    y_pred = logRegres.predict(X_test, weights)

    errorCount = 0
    for i in range(y_test.shape[0]):
        if y_pred[i] != y_test[i]:
            errorCount += 1
    print('Error rate is', errorCount / y_test.shape[0])
    return errorCount / y_test.shape[0]
def costTest(intNum=100):
    trainingSet, traingLabels = loadTrainData()
    # trainingSet = []
    # traingLabels = []
    # for line in frTrain.readlines():
    #     currLine = line.strip().split('\t')
    #     lineArr = []
    #     for i in range(21):
    #         lineArr.append(float(currLine[i]))
    #     trainingSet.append(lineArr)
    #     traingLabels.append(float(currLine[21]))
    trainWeights = lr.stocGradAscent1(array(trainingSet), traingLabels, intNum)
    # print "trainWeights: %f" % trainWeights

    errorCount = 0
    numTestVec = 0.0

    frTest = open('horseColicTest.txt')
Example #8
0
def LRFeature(train_in, train_out, test_in):
    n_train = np.shape(train_in)[0]
    n_test = np.shape(test_in)[0]

    # ---------对于LR的特殊处理
    addones_train = np.ones((n_train, 1))
    train_in = np.c_[addones_train, train_in]  # 给训练集数据加1列1

    addones_test = np.ones((n_test, 1))
    test_in = np.c_[addones_test, test_in]  # 给测试集加一列1

    train_in, train_out = RandomOverSampler().fit_sample(train_in,
                                                         train_out)  #过采样

    trainWeights = LR.stocGradAscent1(train_in, train_out, 500)  #梯度下降算法,训练500次
    len_test = np.shape(test_in)[0]  #测试集样本个数
    test_predict = []
    for i in range(len_test):
        test_predict_tmp = LR.classifyVector(
            test_in[i, :], trainWeights)  #一个样本一个样本的算,样本中各特征乘以对应的权重
        test_predict.append(test_predict_tmp)
    test_predict = np.array(test_predict)  #得到最终的预测结果
    return test_predict
Example #9
0
def colicTest(numIter):
    frTrain = open('horseColicTraining.txt', encoding='ISO-8859-1')
    frTest = open('horseColicTest.txt', encoding='ISO-8859-1')
    trainingMat = []
    trainingLabel = []
    for line in frTrain.readlines():
        lineArr = line.strip().split('\t')
        lineArr = [float(i) for i in lineArr]
        trainingMat.append(lineArr[:-1])
        trainingLabel.append(int(lineArr[-1]))
    trainWeight = logRegres.stocGradAscent1(dataMat=np.array(trainingMat),
                                            labelMat=trainingLabel,
                                            numIter=numIter)
    errorCount = 0.0
    numTestVec = 0.0
    for line in frTest.readlines():
        numTestVec += 1
        lineArr = line.strip().split('\t')
        lineArr = [float(i) for i in lineArr]
        if int(classifyVector(np.array(lineArr[:-1]),
                              weight=trainWeight)) != int(lineArr[-1]):
            errorCount += 1
    errorRate = (errorCount) / (numTestVec)
    return errorRate
Example #10
0
#!/usr/bin/python

import numpy as np
import logRegres

dataArr, labelMat = logRegres.loadDataSet()
weights = logRegres.stocGradAscent1(np.array(dataArr), labelMat)
logRegres.plotBestFit(weights)
def stocGradAscent1(numIter):
    dataArr, labelMat = logRegres.loadDataSet()
    weights = logRegres.stocGradAscent1(array(dataArr), labelMat, numIter);
    print weights
    
    logRegres.plotBestFit(weights);
Example #12
0
import logRegres
from numpy import *

if __name__ == '__main__':
    dataArr, labelMat = logRegres.loadDataSet()
    weights = logRegres.gradAscent(dataArr, labelMat)
    logRegres.plotBestFit(weights.getA())
    weights1 = logRegres.stocGradAscent0(array(dataArr), labelMat)
    logRegres.plotBestFit(weights1)
    weights2 = logRegres.stocGradAscent1(array(dataArr), labelMat)
    logRegres.plotBestFit(weights2)
Example #13
0
from numpy import *
import logRegres

dataarr, labelmat = logRegres.loadDataSet()
weights = logRegres.gradAscent(dataarr, labelmat)
print(weights)

print(weights.getA())
#logRegres.plotBestFit(weights.getA())

weights = logRegres.stocGradAscent0(array(dataarr), labelmat)
print(weights)
#logRegres.plotBestFit(weights)

weights = logRegres.stocGradAscent1(array(dataarr), labelmat)
print(weights)
#logRegres.plotBestFit(weights)

logRegres.multiTest()
Example #14
0
reload(logRegres)
weights = logRegres.gradAscent(dataArr, labelMat)
logRegres.plotBestFit(weights.getA())

# 测试随机梯度上升
from numpy import *
from importlib import reload

reload(logRegres)
dataArr, labelMat = logRegres.loadDataSet()
weights = logRegres.stocGradAscent0(array(dataArr), labelMat)
logRegres.plotBestFit(weights)

# 测试随机梯度上升改进算法
from numpy import *
from importlib import reload

reload(logRegres)
dataArr, labelMat = logRegres.loadDataSet()
weights = logRegres.stocGradAscent1(array(dataArr), labelMat)
logRegres.plotBestFit(weights)

weights = logRegres.stocGradAscent1(array(dataArr), labelMat, 500)
logRegres.plotBestFit(weights)

# 测试病马死亡预测
from importlib import reload

reload(logRegres)
logRegres.multiTest()
Example #15
0
prenum_train = []
prenum_test = []

skf = StratifiedKFold(n_splits=10)
for train, test in skf.split(dataMat, labelMat):
    #==============================================================================
    # skf=StratifiedShuffleSplit(n_splits=10)
    # for train,test in skf.split(dataMat,labelMat):
    #==============================================================================
    print("%s %s" % (train, test))
    train_in = dataMat[train]
    test_in = dataMat[test]
    train_out = labelMat[train]
    test_out = labelMat[test]
    train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out)
    trainWeights = LR.stocGradAscent1(train_in, train_out, 500)

    len_train = np.shape(train_in)[0]
    len_test = np.shape(test_in)[0]
    test_predict = []
    proba_test = []
    for i in range(len_test):
        test_predict_tmp = LR.classifyVector(test_in[i, :], trainWeights)
        test_predict.append(test_predict_tmp)
        proba_test_tmp = LR.classifyProb(test_in[i, :], trainWeights)
        proba_test.append(proba_test_tmp)

    train_predict = []
    proba_train = []
    for i in range(len_train):
        train_predict_tmp = LR.classifyVector(train_in[i, :], trainWeights)
Example #16
0
def run():
    dataMat, labelMat = lr.loadDataSet()
    weights = lr.stocGradAscent1(dataMat, labelMat)
    print weights
    lr.plotBestFit(weights)
Example #17
0
#!usr/bin/python
#coding:utf8

import logRegres
from numpy import *

dataMat, Lables = logRegres.loadDataSet()
weights = logRegres.stocGradAscent1(array(dataMat), Lables)

logRegres.plotBestFit(dataMat, Lables, weights)

# x = arange(-3.0, 3.0, 0.1)
# print x

# logRegres.muliTest()
Example #18
0
import logRegres
import numpy as np

dataArr, labelMat = logRegres.loadDataSet()

# print(logRegres.gradAscent(dataArr, labelMat))
# weights = logRegres.gradAscent(dataArr, labelMat)
# logRegres.plotBestFit(weights.getA())

# weights = logRegres.stocGradAscent0(dataArr, labelMat)
# print(logRegres.stocGradAscent0(dataArr, labelMat))
# logRegres.plotBestFit(weights)

dataArr, labelMat = logRegres.loadDataSet()
weights = logRegres.stocGradAscent1(np.array(dataArr), labelMat)
print(logRegres.stocGradAscent1(np.array(dataArr), labelMat, 500))
logRegres.plotBestFit(weights)

logRegres.multiTest()
Example #19
0
__author__ = 'sunbeansoft'

import logRegres as lr
from numpy import *

dataArr, labelMat = lr.loadDataSet()
weight = lr.gradAscent(dataArr, labelMat)
lr.plotBestFit(weight.getA())
weight = lr.stocGradAscent0(array(dataArr), labelMat)
lr.plotBestFit(weight)
weight = lr.stocGradAscent1(array(dataArr), labelMat)
lr.plotBestFit(weight)

lr.multiTest()
Example #20
0
# coding=utf-8
import numpy
import logRegres

dataMat, labelMat = logRegres.loadDataSet()
weights = logRegres.gradAscent(dataMat, labelMat)
print weights
#logRegres.plotBestFit(weights.getA())
weights = logRegres.stocGradAscent0(dataMat, labelMat)
print '--随机梯度'
#logRegres.plotBestFit(weights)

weights = logRegres.stocGradAscent1(dataMat, labelMat)
print '--改进的随机梯度'
logRegres.plotBestFit(weights)

import matplotlib.pyplot as plt

fig = plt.figure()
ax = fig.add_subplot(111)

x = numpy.arange(-8.0, 8.0, 0.2)
print logRegres.sigmoid(-0.3)
# 设置偏移量 不是所有数据都是x=0划分
y = [logRegres.sigmoid(xi - 0.1) for xi in x]
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
#plt.show()
print '--预测病马'
logRegres.multiTest()
Example #21
0
def stocGradAscent1(numIter):
    dataArr, labelMat = logRegres.loadDataSet()
    weights = logRegres.stocGradAscent1(array(dataArr), labelMat, numIter)
    print weights

    logRegres.plotBestFit(weights)
Example #22
0
#其他:程序提供的例子有两个属性(不含常数x0=1),则绘制出的是切分二位空间的直线x1+x2=0,如果是三个属性,则绘制出的是切分三维空间的平面x1+x2+x3=0。
#同理可知,四个属性绘制出的是切分思维空间的立方体(思维图形是想象不出来的,但是其投影是可以想象出来的三维,所以可推测x1+x2+x3+x4=0是思维空间表达式下的三维空间方程)。
#一维是直线,二位是平面,三维是显示空间,平面可能存在交集,但蚂蚁不能从一个平面到另一个平面,甚至不能抬头看见另一个平面。同理现实空间与现实空间也会存在交集,但是去不了,甚至不能抬头看见另一个空间。
#时间:2018年6月8日(星期五) 下午开始,6月12日(星期二)上午完成备注分析

import logRegres
from numpy import *

#从文件夹中提取数据
dataArr, labelMat = logRegres.loadDataSet()  #加载数据,存放在列表中
print "\n数据列表是:\n", dataArr  #打印数据,测试读取是否异常
print "\n类列表是:\n", labelMat

#用数据和标签 利用梯度上升算法计算 权重
weights = logRegres.gradAscent(dataArr, labelMat)  #梯度上升算法计算最佳参数值
stocWeights = logRegres.stocGradAscent1(array(dataArr), labelMat,
                                        500)  #随机梯度上升算法计算最佳参数值
print "\n权重w0,w1,w2的值是:\n", weights

#######################################       第一个图:梯度上升算法的例子          #################################
#利用权重绘制直线 利用数据绘制点
print "\n第一个图:梯度上升算法的例子"
#梯度上升算法:批量处理方法(一次性处理所有数)
logRegres.plotBestFit(
    weights.getA())  # .getA()将矩阵转换成数组 因为数组可以很方便的任意读取其中的元素,矩阵不行

#######################################       第二个图:随机梯度上升算法的例子          #################################
#随机梯度上升:在线学习方法(新样本来到时,对分类器进行增量式更新)
print "第二个图:随机梯度上升算法的例子"  #
logRegres.plotBestFit(stocWeights)

########################################       第三个例子:预测病马的死亡率          #################################
Example #23
0
import logRegres
import numpy as np
#print(logRegres.loadDataSet())
dataArr,labelMat = logRegres.loadDataSet()

#print(np.mat(labelMat))
#print(np.mat(dataArr))
weight = np.ones((3,1))
value = np.mat(dataArr)*weight
labelMat = np.mat(labelMat)
#print(labelMat)
#print(value-labelMat)
#print(np.mat(dataArr)*weight)
#weight = logRegres.gradAscent(dataArr,labelMat)
weight = logRegres.stocGradAscent1(dataArr,labelMat)
print(weight)
logRegres.plotBestFit(weight.getA())
Example #24
0
# -*- coding: utf-8 -*-

from numpy import *
import logRegres
data, ls = logRegres.loadDataSet()
wei1 = logRegres.gradAscent(data, ls)
logRegres.plotBestFit(wei1)

reload(logRegres)
wei2 = logRegres.stocGradAscent0(array(data), ls)
logRegres.plotBestFit(wei2)

wei3 = logRegres.stocGradAscent1(array(data), ls)
logRegres.plotBestFit(wei3)

import logRegres
logRegres.multiTest()
# autor: zhumenger
import logRegres
from numpy import *
dataArr, labelMat = logRegres.loadDataSet()
print(logRegres.gradAscent(dataArr, labelMat))
weigths = logRegres.stocGradAscent1(array(dataArr), labelMat)
print(logRegres.plotBestFit(weigths))
Example #26
0
import logRegres
from numpy import *

dataArr, labelMat = logRegres.loadDataSet()
print(logRegres.gradAscent(dataArr, labelMat))  #打印回归系数

#打印随机梯度上升法拟合的回归系数
print(logRegres.stocGradAscent0(array(dataArr), labelMat))

#打印改进的随机梯度上升法拟合的回归系数
print(logRegres.stocGradAscent1(array(dataArr), labelMat))