def createKnnLearner(naFeatures, lKnn=30, leafsize=10, method='mean'): ''' @summary: Creates a quick KNN learner @param naFeatures: Numpy array of features, @param fMin: Data frame containing the price information for all of the stocks. @param fMax: List of feature functions, most likely coming from features.py @param bAbsolute: If true, min value will be scaled to fMin, max to fMax, if false, +-1 standard deviations will be scaled to fit between fMin and fMax, i.e. ~69% of the values @param bIgnoreLast: If true, last column is ignored (assumed to be classification) @return: None, data is modified in place ''' cLearner = kdt.kdtknn(k=lKnn, method=method, leafsize=leafsize) cLearner.addEvidence(naFeatures) return cLearner
def createKnnLearner( naFeatures, lKnn=30, leafsize=10, method='mean' ): ''' @summary: Creates a quick KNN learner @param naFeatures: Numpy array of features, @param fMin: Data frame containing the price information for all of the stocks. @param fMax: List of feature functions, most likely coming from features.py @param bAbsolute: If true, min value will be scaled to fMin, max to fMax, if false, +-1 standard deviations will be scaled to fit between fMin and fMax, i.e. ~69% of the values @param bIgnoreLast: If true, last column is ignored (assumed to be classification) @return: None, data is modified in place ''' cLearner = kdt.kdtknn( k=lKnn, method=method, leafsize=leafsize) cLearner.addEvidence( naFeatures ) return cLearner
def main(): # # read in and slice up the data # #data = np.loadtxt('data-classification-prob.csv',delimiter=',',skiprows=1) data = np.loadtxt('data-ripple-prob.csv', delimiter=',', skiprows=1) X1 = data[:, 0] X2 = data[:, 1] Y = data[:, 2] colors = findcolors(Y) # # scatter plot X1 vs X2 and colors are Y # plt.clf() fig = plt.figure() fig1 = fig.add_subplot(221) plt.scatter(X1, X2, c=colors, edgecolors='none') plt.xlabel('X1') plt.ylabel('X2') plt.xlim(-1, 1) # set x scale plt.ylim(-1, 1) # set y scale plt.title('Training Data 2D View', fontsize=12) # plot the 3d view ax = fig.add_subplot(222, projection='3d') ax.scatter(X1, X2, Y, c=colors, edgecolors='none') #ax.scatter(X1,X2,Y,c=colors) ax.set_xlabel('X1') ax.set_ylabel('X2') ax.set_zlabel('Y') ax.set_xlim3d(-1, 1) ax.set_ylim3d(-1, 1) ax.set_zlim3d(-1, 1) plt.title('Training Data 3D View', fontsize=12) ########## # OK, now create and train a learner # learner = kdt.kdtknn(k=30, method='mean') numpoints = X1.shape[0] dataX = np.zeros([numpoints, 2]) dataX[:, 0] = X1 dataX[:, 1] = X2 trainsize = floor(dataX.shape[0] * .6) learner.addEvidence(dataX[0:trainsize], dataY=Y[0:trainsize]) steps = 50.0 stepsize = 2.0 / steps Xtest = np.zeros([steps * steps, 2]) count = 0 for i in np.arange(-1, 1, stepsize): for j in np.arange(-1, 1, stepsize): Xtest[count, 0] = i + stepsize / 2 Xtest[count, 1] = j + stepsize / 2 count = count + 1 Ytest = learner.query(Xtest) # to check every point # # Choose colors # colors = findcolors(Ytest) # # scatter plot X1 vs X2 and colors are Y # fig1 = fig.add_subplot(223) plt.scatter(Xtest[:, 0], Xtest[:, 1], c=colors, edgecolors='none') plt.xlabel('X1') plt.ylabel('X2') plt.xlim(-1, 1) # set x scale plt.ylim(-1, 1) # set y scale plt.title('Learned Model 2D', fontsize=12) # plot the 3d view ax = fig.add_subplot(224, projection='3d') ax.scatter(Xtest[:, 0], Xtest[:, 1], Ytest, c=colors, edgecolors='none') #X1 = Xtest[:,0] #X2 = Xtest[:,1] #X1 = np.reshape(X1,(steps,steps)) #X2 = np.reshape(X2,(steps,steps)) #Ytest = np.reshape(Ytest,(steps,steps)) ax.set_xlabel('X1') ax.set_ylabel('X2') ax.set_zlabel('Y') ax.set_xlim3d(-1, 1) ax.set_ylim3d(-1, 1) ax.set_zlim3d(-1, 1) plt.title('Learned Model 3D', fontsize=12) savefig("scatterdata3D.png", format='png') plt.close() # # Compare to ground truth # print 'trainsize ' + str(trainsize) Ytruth = Y[-trainsize:] print 'Ytruth.shape ' + str(Ytruth.shape) Xtest = dataX[-trainsize:, :] print 'Xtest.shape ' + str(Xtest.shape) Ytest = learner.query(Xtest) # to check every point print 'Ytest.shape ' + str(Ytest.shape) plt.clf() plt.scatter(Ytruth, Ytest, edgecolors='none') plt.xlim(-1.2, 1.2) # set x scale plt.ylim(-1.2, 1.2) # set y scale plt.xlabel('Ground Truth') plt.ylabel('Estimated') savefig("scatterdata.png", format='png') print corrcoef(Ytruth, Ytest)
def main(): # # read in and slice up the data # # data = np.loadtxt('data-classification-prob.csv',delimiter=',',skiprows=1) data = np.loadtxt("data-ripple-prob.csv", delimiter=",", skiprows=1) X1 = data[:, 0] X2 = data[:, 1] Y = data[:, 2] colors = findcolors(Y) # # scatter plot X1 vs X2 and colors are Y # plt.clf() fig = plt.figure() fig1 = fig.add_subplot(221) plt.scatter(X1, X2, c=colors, edgecolors="none") plt.xlabel("X1") plt.ylabel("X2") plt.xlim(-1, 1) # set x scale plt.ylim(-1, 1) # set y scale plt.title("Training Data 2D View", fontsize=12) # plot the 3d view ax = fig.add_subplot(222, projection="3d") ax.scatter(X1, X2, Y, c=colors, edgecolors="none") # ax.scatter(X1,X2,Y,c=colors) ax.set_xlabel("X1") ax.set_ylabel("X2") ax.set_zlabel("Y") ax.set_xlim3d(-1, 1) ax.set_ylim3d(-1, 1) ax.set_zlim3d(-1, 1) plt.title("Training Data 3D View", fontsize=12) ########## # OK, now create and train a learner # learner = kdt.kdtknn(k=30, method="mean") numpoints = X1.shape[0] dataX = np.zeros([numpoints, 2]) dataX[:, 0] = X1 dataX[:, 1] = X2 trainsize = floor(dataX.shape[0] * 0.6) learner.addEvidence(dataX[0:trainsize], dataY=Y[0:trainsize]) steps = 50.0 stepsize = 2.0 / steps Xtest = np.zeros([steps * steps, 2]) count = 0 for i in np.arange(-1, 1, stepsize): for j in np.arange(-1, 1, stepsize): Xtest[count, 0] = i + stepsize / 2 Xtest[count, 1] = j + stepsize / 2 count = count + 1 Ytest = learner.query(Xtest) # to check every point # # Choose colors # colors = findcolors(Ytest) # # scatter plot X1 vs X2 and colors are Y # fig1 = fig.add_subplot(223) plt.scatter(Xtest[:, 0], Xtest[:, 1], c=colors, edgecolors="none") plt.xlabel("X1") plt.ylabel("X2") plt.xlim(-1, 1) # set x scale plt.ylim(-1, 1) # set y scale plt.title("Learned Model 2D", fontsize=12) # plot the 3d view ax = fig.add_subplot(224, projection="3d") ax.scatter(Xtest[:, 0], Xtest[:, 1], Ytest, c=colors, edgecolors="none") # X1 = Xtest[:,0] # X2 = Xtest[:,1] # X1 = np.reshape(X1,(steps,steps)) # X2 = np.reshape(X2,(steps,steps)) # Ytest = np.reshape(Ytest,(steps,steps)) ax.set_xlabel("X1") ax.set_ylabel("X2") ax.set_zlabel("Y") ax.set_xlim3d(-1, 1) ax.set_ylim3d(-1, 1) ax.set_zlim3d(-1, 1) plt.title("Learned Model 3D", fontsize=12) savefig("scatterdata3D.png", format="png") plt.close() # # Compare to ground truth # print("trainsize " + str(trainsize)) Ytruth = Y[-trainsize:] print("Ytruth.shape " + str(Ytruth.shape)) Xtest = dataX[-trainsize:, :] print("Xtest.shape " + str(Xtest.shape)) Ytest = learner.query(Xtest) # to check every point print("Ytest.shape " + str(Ytest.shape)) plt.clf() plt.scatter(Ytruth, Ytest, edgecolors="none") plt.xlim(-1.2, 1.2) # set x scale plt.ylim(-1.2, 1.2) # set y scale plt.xlabel("Ground Truth") plt.ylabel("Estimated") savefig("scatterdata.png", format="png") print(corrcoef(Ytruth, Ytest))