Esempio n. 1
0
def colicTest():
    frTrain = open('horseColicTraining.txt')
    frTest = open('horseColicTest.txt')
    trainingSet = []
    trainingLabels = []
    for line in frTrain.readlines():
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(21):
            lineArr.append(float(currLine[i]))
        trainingSet.append(lineArr)
        trainingLabels.append(float(currLine[21]))
    trainWeights = logRegres.stocGradAscent1(np.array(trainingSet),
                                             trainingLabels, 500)
    errorCount = 0
    numTestVec = 0.0
    for line in frTest.readlines():
        numTestVec += 1.0
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(21):
            lineArr.append(float(currLine[i]))
        if int(logRegres.classifyVector(np.array(lineArr),
                                        trainWeights)) != int(currLine[21]):
            errorCount += 1
    errorRate = float(errorCount) / numTestVec
    print 'the error rate of this test is: %f' % errorRate
    return errorRate
def colicTest():
    frTrain = open('horseColicTraining.txt')

    frTest = open('horseColicTest.txt')
    trainingSet = []
    trainingLabels = []
    # 训练回归模型
    for line in frTrain.readlines():
        currLine = line.strip().split('\t')  #每行按\t分割
        print('currLine:')
        print(currLine)  #每一行进行读取数据
        lineArr = []
        for i in range(21):
            lineArr.append(float(currLine[i]))
            print('lineArr:')
            print(lineArr)
        trainingSet.append(lineArr)
        trainingLabels.append(float(currLine[21]))
    trainWeights = logRegres.stocGradAscent1(array(trainingSet),
                                             trainingLabels, 1000)
    errorCount = 0
    numTestVec = 0.0
    #测试回归模型
    for line in frTest.readlines():
        numTestVec += 1.0
        currLine = line.strip().split('\t')
        lineArr = []
        for i in range(21):
            lineArr.append(float(currLine[i]))
        if int(logRegres.classifyVector(array(lineArr), trainWeights)) != int(
                currLine[21]):
            errorCount += 1
    errorRate = (float(errorCount) / numTestVec)
    print("the error rate of this test is: %f" % errorRate)
    return errorRate
Esempio n. 3
0
def colicTest():
    trainingSet,trainingLabels = dln.loadDataSet('horseColicTraining.txt') #载入训练数据
    trainWeights = lgr.stocGradAscent1(array(trainingSet), trainingLabels, 1000) #训练得到模型参数:改进的随机梯度上升算法
    testSet,testLabels = dln.loadDataSet('horseColicTest.txt') #载入训练数据
    errorCount = 0; numTestVec = 0.0
    m,n = shape(testSet)
    for i in range(m):
        numTestVec += 1.0
        if int(lgr.classifyVector(array(testSet[i]), trainWeights))!= int(testLabels[i]):
            errorCount += 1
    errorRate = (float(errorCount)/numTestVec)
    print "the error rate of this test is: %f" % errorRate
    return errorRate
Esempio n. 4
0
def LRFeature(train_in, train_out, test_in):
    n_train = np.shape(train_in)[0]
    n_test = np.shape(test_in)[0]

    # ---------对于LR的特殊处理
    addones_train = np.ones((n_train, 1))
    train_in = np.c_[addones_train, train_in]  # 给训练集数据加1列1

    addones_test = np.ones((n_test, 1))
    test_in = np.c_[addones_test, test_in]  # 给测试集加一列1

    train_in, train_out = RandomOverSampler().fit_sample(train_in,
                                                         train_out)  #过采样

    trainWeights = LR.stocGradAscent1(train_in, train_out, 500)  #梯度下降算法,训练500次
    len_test = np.shape(test_in)[0]  #测试集样本个数
    test_predict = []
    for i in range(len_test):
        test_predict_tmp = LR.classifyVector(
            test_in[i, :], trainWeights)  #一个样本一个样本的算,样本中各特征乘以对应的权重
        test_predict.append(test_predict_tmp)
    test_predict = np.array(test_predict)  #得到最终的预测结果
    return test_predict
Esempio n. 5
0
    # for train,test in skf.split(dataMat,labelMat):
    #==============================================================================
    print("%s %s" % (train, test))
    train_in = dataMat[train]
    test_in = dataMat[test]
    train_out = labelMat[train]
    test_out = labelMat[test]
    train_in, train_out = RandomOverSampler().fit_sample(train_in, train_out)
    trainWeights = LR.stocGradAscent1(train_in, train_out, 500)

    len_train = np.shape(train_in)[0]
    len_test = np.shape(test_in)[0]
    test_predict = []
    proba_test = []
    for i in range(len_test):
        test_predict_tmp = LR.classifyVector(test_in[i, :], trainWeights)
        test_predict.append(test_predict_tmp)
        proba_test_tmp = LR.classifyProb(test_in[i, :], trainWeights)
        proba_test.append(proba_test_tmp)

    train_predict = []
    proba_train = []
    for i in range(len_train):
        train_predict_tmp = LR.classifyVector(train_in[i, :], trainWeights)
        train_predict.append(train_predict_tmp)
        proba_train_tmp = LR.classifyProb(train_in[i, :], trainWeights)
        proba_train.append(proba_train_tmp)

    test1, test2 = ann.evaluatemodel(train_out, train_predict,
                                     proba_train)  #test model with trainset
    evaluate_train.extend(test1)