feature, value = ChooseBestFeature(dataArray, resultList, maxDataCount,
                                       minErrorReduce)
    if feature == None:
        #Leaf
        node = Node(0, value, None, None)
    else:
        subDataArray1, subResultList1, subDataArray2, subResultList2 = SplitDataset(
            dataArray, resultList, feature, value)
        leftTree = CreateModelTree(subDataArray1, subResultList1, maxDataCount,
                                   minErrorReduce)
        rightTree = CreateModelTree(subDataArray2, subResultList2,
                                    maxDataCount, minErrorReduce)
        node = Node(feature, value, leftTree, rightTree)
    return node


def ForecastByModelTree(tree, inData):
    if (tree.left == None and tree.right == None):
        return (numpy.mat(inData) * tree.value).tolist()[0][0]
    if (inData[tree.feature] > tree.value):
        return ForecastByModelTree(tree.left, inData)
    else:
        return ForecastByModelTree(tree.right, inData)


if __name__ == '__main__':
    dataArray, resultList = LoadDataAndLabel("Dataset2.txt")
    tree = CreateModelTree(dataArray, resultList, 4, 0.1)
    #ShowTree(tree)
    print(ForecastByModelTree(tree, [0.530897]))
    print(ForecastByModelTree(tree, [0.993349]))
                        #Caluclate the b in two conditions
                        b1 = b - Ei - labelMatrix[i] * (
                            newAlphaI - alphas[i]) * dataMatrix[
                                i, :] * dataMatrix[i, :].T - labelMatrix[j] * (
                                    newAlphaJ - alphas[j]
                                ) * dataMatrix[i, :] * dataMatrix[j, :].T
                        b2 = b - Ej - labelMatrix[i] * (
                            newAlphaI - alphas[i]) * dataMatrix[
                                i, :] * dataMatrix[j, :].T - labelMatrix[j] * (
                                    newAlphaJ - alphas[j]
                                ) * dataMatrix[j, :] * dataMatrix[j, :].T

                        #Update the alpha[i], alpha[j] and b
                        alphas[i] = newAlphaI
                        alphas[j] = newAlphaJ
                        if (0 < alphas[i] and alphas[i] < border):
                            b = b1
                        elif (0 < alphas[j] and alphas[j] < border):
                            b = b2
                        else:
                            b = (b1 + b2) / 2.0
        iter = iter + 1
    return alphas, b


if __name__ == '__main__':
    dataArray, labelList = LoadDataAndLabel("Dataset.txt")
    alphas, b = smoSimple(dataArray, labelList, 0.6, 0.001, 100)
    print(alphas)
    print(b)
Beispiel #3
0
            errorCount += 1
    print("The error rate:%f" % (errorCount / float(numToTest)))

#绘图
def Plot(dataMatrix, labelList):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.scatter(dataMatrix[:, 1],dataMatrix[:, 2])
    ax.scatter(dataMatrix[:, 1],dataMatrix[:, 2], 15.0 * array(labelList), 15.0 * array(labelList))
    plt.show()

def ClassifyPerson(normMatrix, ranges, minValues, labelList):
    resultList = ['Not in all','In small doses','In large doses']
    percentTats = float(input("Percentage of time spent playing video game?"))
    ffMiles = float(input("Frequent flier miles earned per year?"))
    iceCream = float(input("Liters of ice cream consumed per year?"))
    inArray = [ffMiles,percentTats,iceCream]
    classifierResult = Classify0((inArray)/ranges,normMatrix,labelList,3)
    print("You will probably like this person:",resultList[int(classifierResult)-1])

if __name__ == '__main__':
    dataArray,labelList = LoadDataAndLabel('DatingTestSet2.txt')
    dataMatrix = array(dataArray)
    normMatrix,ranges,minValues = AutoNorm(dataMatrix)
    DataClassTest(normMatrix, ranges, minValues, labelList)
    ClassifyPerson(normMatrix, ranges, minValues, labelList)
    Plot(dataMatrix, labelList)



Beispiel #4
0
    x1 = min(X)
    y1 = w[0] + x1 * w[1]
    x2 = max(X)
    y2 = w[0] + x2 * w[1]
    ax.plot([x1, x2], [y1, y2], 'r-')

    sortedXIndex = numpy.argsort(X)
    for index in range(len(X) - 1):
        ax.plot([X[sortedXIndex[index]], X[sortedXIndex[index + 1]]], [
            LWLRPredY[sortedXIndex[index]], LWLRPredY[sortedXIndex[index + 1]]
        ], 'g--')
    matplotlib.pyplot.show()


if __name__ == '__main__':
    dataArray, labelList = LoadDataAndLabel('Dataset1.txt')

    w = StandRegres(dataArray, labelList)
    StandPredY = [numpy.mat(dataArray[i]) * w for i in range(len(dataArray))]
    LWLRPredY = [
        LocalWeightLinearRegress(data, dataArray, labelList,
                                 0.01).tolist()[0][0] for data in dataArray
    ]
    w1 = RidgeRegres(dataArray, labelList)
    RidgePredY = [(w1[0] + dataArray[i][1] * w1[1]).tolist()[0][0]
                  for i in range(len(dataArray))]
    x = [i[1] for i in dataArray]
    Plot(x, labelList, w.flatten().A[0], LWLRPredY)

    print(RegError(labelList, StandPredY))
    print(RegError(labelList, RidgePredY))