Example #1
0
def createKnnLearner(naFeatures, lKnn=30, leafsize=10, method='mean'):
    '''
    @summary: Creates a quick KNN learner 
    @param naFeatures:  Numpy array of features,  
    @param fMin: Data frame containing the price information for all of the stocks.
    @param fMax: List of feature functions, most likely coming from features.py
    @param bAbsolute: If true, min value will be scaled to fMin, max to fMax, if false,
                      +-1 standard deviations will be scaled to fit between fMin and fMax, i.e. ~69% of the values
    @param bIgnoreLast: If true, last column is ignored (assumed to be classification)
    @return: None, data is modified in place
    '''
    cLearner = kdt.kdtknn(k=lKnn, method=method, leafsize=leafsize)

    cLearner.addEvidence(naFeatures)

    return cLearner
def createKnnLearner( naFeatures, lKnn=30, leafsize=10, method='mean' ):
    '''
    @summary: Creates a quick KNN learner 
    @param naFeatures:  Numpy array of features,  
    @param fMin: Data frame containing the price information for all of the stocks.
    @param fMax: List of feature functions, most likely coming from features.py
    @param bAbsolute: If true, min value will be scaled to fMin, max to fMax, if false,
                      +-1 standard deviations will be scaled to fit between fMin and fMax, i.e. ~69% of the values
    @param bIgnoreLast: If true, last column is ignored (assumed to be classification)
    @return: None, data is modified in place
    '''
    cLearner = kdt.kdtknn( k=lKnn, method=method, leafsize=leafsize)

    cLearner.addEvidence( naFeatures )

    return cLearner
Example #3
0
def main():
    #
    # read in and slice up the data
    #
    #data = np.loadtxt('data-classification-prob.csv',delimiter=',',skiprows=1)
    data = np.loadtxt('data-ripple-prob.csv', delimiter=',', skiprows=1)
    X1 = data[:, 0]
    X2 = data[:, 1]
    Y = data[:, 2]
    colors = findcolors(Y)

    #
    # scatter plot X1 vs X2 and colors are Y
    #
    plt.clf()
    fig = plt.figure()
    fig1 = fig.add_subplot(221)
    plt.scatter(X1, X2, c=colors, edgecolors='none')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.xlim(-1, 1)  # set x scale
    plt.ylim(-1, 1)  # set y scale
    plt.title('Training Data 2D View', fontsize=12)

    # plot the 3d view
    ax = fig.add_subplot(222, projection='3d')
    ax.scatter(X1, X2, Y, c=colors, edgecolors='none')
    #ax.scatter(X1,X2,Y,c=colors)
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.set_zlabel('Y')
    ax.set_xlim3d(-1, 1)
    ax.set_ylim3d(-1, 1)
    ax.set_zlim3d(-1, 1)
    plt.title('Training Data 3D View', fontsize=12)

    ##########
    # OK, now create and train a learner
    #
    learner = kdt.kdtknn(k=30, method='mean')
    numpoints = X1.shape[0]
    dataX = np.zeros([numpoints, 2])
    dataX[:, 0] = X1
    dataX[:, 1] = X2

    trainsize = floor(dataX.shape[0] * .6)
    learner.addEvidence(dataX[0:trainsize], dataY=Y[0:trainsize])
    steps = 50.0
    stepsize = 2.0 / steps

    Xtest = np.zeros([steps * steps, 2])
    count = 0
    for i in np.arange(-1, 1, stepsize):
        for j in np.arange(-1, 1, stepsize):
            Xtest[count, 0] = i + stepsize / 2
            Xtest[count, 1] = j + stepsize / 2
            count = count + 1
    Ytest = learner.query(Xtest)  # to check every point

    #
    # Choose colors
    #
    colors = findcolors(Ytest)

    #
    # scatter plot X1 vs X2 and colors are Y
    #
    fig1 = fig.add_subplot(223)
    plt.scatter(Xtest[:, 0], Xtest[:, 1], c=colors, edgecolors='none')
    plt.xlabel('X1')
    plt.ylabel('X2')
    plt.xlim(-1, 1)  # set x scale
    plt.ylim(-1, 1)  # set y scale
    plt.title('Learned Model 2D', fontsize=12)

    # plot the 3d view
    ax = fig.add_subplot(224, projection='3d')
    ax.scatter(Xtest[:, 0], Xtest[:, 1], Ytest, c=colors, edgecolors='none')
    #X1 = Xtest[:,0]
    #X2 = Xtest[:,1]
    #X1 = np.reshape(X1,(steps,steps))
    #X2 = np.reshape(X2,(steps,steps))
    #Ytest = np.reshape(Ytest,(steps,steps))
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    ax.set_zlabel('Y')
    ax.set_xlim3d(-1, 1)
    ax.set_ylim3d(-1, 1)
    ax.set_zlim3d(-1, 1)
    plt.title('Learned Model 3D', fontsize=12)
    savefig("scatterdata3D.png", format='png')
    plt.close()

    #
    # Compare to ground truth
    #
    print 'trainsize ' + str(trainsize)
    Ytruth = Y[-trainsize:]
    print 'Ytruth.shape ' + str(Ytruth.shape)
    Xtest = dataX[-trainsize:, :]
    print 'Xtest.shape ' + str(Xtest.shape)
    Ytest = learner.query(Xtest)  # to check every point
    print 'Ytest.shape ' + str(Ytest.shape)

    plt.clf()
    plt.scatter(Ytruth, Ytest, edgecolors='none')
    plt.xlim(-1.2, 1.2)  # set x scale
    plt.ylim(-1.2, 1.2)  # set y scale
    plt.xlabel('Ground Truth')
    plt.ylabel('Estimated')
    savefig("scatterdata.png", format='png')

    print corrcoef(Ytruth, Ytest)
def main():
    #
    # read in and slice up the data
    #
    # data = np.loadtxt('data-classification-prob.csv',delimiter=',',skiprows=1)
    data = np.loadtxt("data-ripple-prob.csv", delimiter=",", skiprows=1)
    X1 = data[:, 0]
    X2 = data[:, 1]
    Y = data[:, 2]
    colors = findcolors(Y)

    #
    # scatter plot X1 vs X2 and colors are Y
    #
    plt.clf()
    fig = plt.figure()
    fig1 = fig.add_subplot(221)
    plt.scatter(X1, X2, c=colors, edgecolors="none")
    plt.xlabel("X1")
    plt.ylabel("X2")
    plt.xlim(-1, 1)  # set x scale
    plt.ylim(-1, 1)  # set y scale
    plt.title("Training Data 2D View", fontsize=12)

    # plot the 3d view
    ax = fig.add_subplot(222, projection="3d")
    ax.scatter(X1, X2, Y, c=colors, edgecolors="none")
    # ax.scatter(X1,X2,Y,c=colors)
    ax.set_xlabel("X1")
    ax.set_ylabel("X2")
    ax.set_zlabel("Y")
    ax.set_xlim3d(-1, 1)
    ax.set_ylim3d(-1, 1)
    ax.set_zlim3d(-1, 1)
    plt.title("Training Data 3D View", fontsize=12)

    ##########
    # OK, now create and train a learner
    #
    learner = kdt.kdtknn(k=30, method="mean")
    numpoints = X1.shape[0]
    dataX = np.zeros([numpoints, 2])
    dataX[:, 0] = X1
    dataX[:, 1] = X2

    trainsize = floor(dataX.shape[0] * 0.6)
    learner.addEvidence(dataX[0:trainsize], dataY=Y[0:trainsize])
    steps = 50.0
    stepsize = 2.0 / steps

    Xtest = np.zeros([steps * steps, 2])
    count = 0
    for i in np.arange(-1, 1, stepsize):
        for j in np.arange(-1, 1, stepsize):
            Xtest[count, 0] = i + stepsize / 2
            Xtest[count, 1] = j + stepsize / 2
            count = count + 1
    Ytest = learner.query(Xtest)  # to check every point

    #
    # Choose colors
    #
    colors = findcolors(Ytest)

    #
    # scatter plot X1 vs X2 and colors are Y
    #
    fig1 = fig.add_subplot(223)
    plt.scatter(Xtest[:, 0], Xtest[:, 1], c=colors, edgecolors="none")
    plt.xlabel("X1")
    plt.ylabel("X2")
    plt.xlim(-1, 1)  # set x scale
    plt.ylim(-1, 1)  # set y scale
    plt.title("Learned Model 2D", fontsize=12)

    # plot the 3d view
    ax = fig.add_subplot(224, projection="3d")
    ax.scatter(Xtest[:, 0], Xtest[:, 1], Ytest, c=colors, edgecolors="none")
    # X1 = Xtest[:,0]
    # X2 = Xtest[:,1]
    # X1 = np.reshape(X1,(steps,steps))
    # X2 = np.reshape(X2,(steps,steps))
    # Ytest = np.reshape(Ytest,(steps,steps))
    ax.set_xlabel("X1")
    ax.set_ylabel("X2")
    ax.set_zlabel("Y")
    ax.set_xlim3d(-1, 1)
    ax.set_ylim3d(-1, 1)
    ax.set_zlim3d(-1, 1)
    plt.title("Learned Model 3D", fontsize=12)
    savefig("scatterdata3D.png", format="png")
    plt.close()

    #
    # Compare to ground truth
    #
    print("trainsize " + str(trainsize))
    Ytruth = Y[-trainsize:]
    print("Ytruth.shape " + str(Ytruth.shape))
    Xtest = dataX[-trainsize:, :]
    print("Xtest.shape " + str(Xtest.shape))
    Ytest = learner.query(Xtest)  # to check every point
    print("Ytest.shape " + str(Ytest.shape))

    plt.clf()
    plt.scatter(Ytruth, Ytest, edgecolors="none")
    plt.xlim(-1.2, 1.2)  # set x scale
    plt.ylim(-1.2, 1.2)  # set y scale
    plt.xlabel("Ground Truth")
    plt.ylabel("Estimated")
    savefig("scatterdata.png", format="png")

    print(corrcoef(Ytruth, Ytest))