def createKnnLearner(naFeatures, lKnn=30, leafsize=10, method='mean'): ''' @summary: Creates a quick KNN learner @param naFeatures: Numpy array of features, @param fMin: Data frame containing the price information for all of the stocks. @param fMax: List of feature functions, most likely coming from features.py @param bAbsolute: If true, min value will be scaled to fMin, max to fMax, if false, +-1 standard deviations will be scaled to fit between fMin and fMax, i.e. ~69% of the values @param bIgnoreLast: If true, last column is ignored (assumed to be classification) @return: None, data is modified in place ''' cLearner = kdt.kdtknn(k=lKnn, method=method, leafsize=leafsize) cLearner.addEvidence(naFeatures) return cLearner
# plot the 3d view ax = fig.add_subplot(222, projection='3d') ax.scatter(X1, X2, Y, c=colors, edgecolors='none') #ax.scatter(X1,X2,Y,c=colors) ax.set_xlabel('X1') ax.set_ylabel('X2') ax.set_zlabel('Y') ax.set_xlim3d(-1, 1) ax.set_ylim3d(-1, 1) ax.set_zlim3d(-1, 1) plt.title('Training Data 3D View', fontsize=12) ########## # OK, now create and train a learner # learner = kdt.kdtknn(k=30, method='mean') numpoints = X1.shape[0] dataX = np.zeros([numpoints, 2]) dataX[:, 0] = X1 dataX[:, 1] = X2 trainsize = floor(dataX.shape[0] * .6) learner.addEvidence(dataX[0:trainsize], dataY=Y[0:trainsize]) steps = 50.0 stepsize = 2.0 / steps Xtest = np.zeros([steps * steps, 2]) count = 0 for i in np.arange(-1, 1, stepsize): for j in np.arange(-1, 1, stepsize): Xtest[count, 0] = i + stepsize / 2
def main(): # symbols = np.loadtxt('./Examples/Features/symbols.txt',dtype='S10',comments='#') symbols = [ "AA", "AXP", "BA", "BAC", "CAT", "CSCO", "CVX", "DD", "DIS", "GE", "HD", "HPQ", "IBM", "INTC", "JNJ", "JPM", "KFT", "KO", "MCD", "MMM", "MRK", "MSFT", "PFE", "PG", "T", "TRV", "UTX", "VZ", "WMT", "XOM", ] # symbols = ['XOM'] # This is the start and end dates for the entire train and test data combined alldatastartday = dt.datetime(2007, 1, 1) alldataendday = dt.datetime(2010, 6, 30) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(alldatastartday, alldataendday, timeofday) dataobj = da.DataAccess("Norgate") voldata = dataobj.get_data(timestamps, symbols, "volume", verbose=True) voldata = (voldata.fillna()).fillna(method="backfill") close = dataobj.get_data(timestamps, symbols, "close", verbose=True) close = (close.fillna()).fillna(method="backfill") featureList = [ featMA, featMA, featRSI, featRSI, featDrawDown, featRunUp, featVolumeDelta, featVolumeDelta, featAroon, classFutRet, ] featureListArgs = [ {"lLookback": 10, "bRel": True}, {"lLookback": 20}, {"lLookback": 10}, {"lLookback": 20}, {}, {}, {"lLookback": 10}, {"lLookback": 20}, {"bDown": False}, {"lLookforward": 5}, ] # print 'Applying Features' # # John Cornwell's featuretest.py was consulted for figuring out the syntax of ftu.applyFeatures() methods and ftu.stackSyms() methods # allfeatureValues = ftu.applyFeatures(close, voldata, featureList, featureListArgs) trainstartday = dt.datetime(2007, 1, 1) trainendday = dt.datetime(2009, 12, 31) traintimestamps = du.getNYSEdays(trainstartday, trainendday, timeofday) # print 'Stack Syms for Training' trainingData = ftu.stackSyms(allfeatureValues, traintimestamps[0], traintimestamps[-1]) # print 'Norm Features for Training' scaleshiftvalues = ftu.normFeatures(trainingData, -1.0, 1.0, False) teststartday = dt.datetime(2010, 1, 1) testendday = dt.datetime(2010, 6, 30) testtimestamps = du.getNYSEdays(teststartday, testendday, timeofday) # print 'Stack Syms for Test' testData = ftu.stackSyms(allfeatureValues, testtimestamps[0], testtimestamps[-1]) # print 'Norm Features for Test' ftu.normQuery(testData[:, :-1], scaleshiftvalues) NUMFEATURES = 9 bestFeatureIndices = [] bestCorrelation = 0.0 fid = open("output.txt", "w") for iteration in range(NUMFEATURES): nextFeatureIndexToAdd = -1 for featureIndex in range(NUMFEATURES): if featureIndex not in bestFeatureIndices: bestFeatureIndices.append(featureIndex) fid.write("testing feature set " + str(bestFeatureIndices) + "\n") print("testing feature set " + str(bestFeatureIndices)) bestFeatureIndices.append(9) curTrainingData = trainingData[:, bestFeatureIndices] curTestData = testData[:, bestFeatureIndices] bestFeatureIndices.remove(9) kdtlearner = knn.kdtknn(5, "mean", leafsize=100) kdtlearner.addEvidence(curTrainingData[:, :-1], curTrainingData[:, -1]) testEstimatedValues = kdtlearner.query(curTestData[:, :-1]) testcorrelation = np.corrcoef(testEstimatedValues.T, curTestData[:, -1].T) curCorrelation = testcorrelation[0, 1] fid.write("corr coef = %.4f\n" % (curCorrelation)) print("corr coef = %.4f" % (curCorrelation)) if curCorrelation > bestCorrelation: nextFeatureIndexToAdd = featureIndex bestCorrelation = curCorrelation bestFeatureIndices.remove(featureIndex) if nextFeatureIndexToAdd >= 0: bestFeatureIndices.append(nextFeatureIndexToAdd) else: break fid.write("best feature set is " + str(bestFeatureIndices) + "\n") print("best feature set is " + str(bestFeatureIndices)) fid.write("corr coef = %.4f" % (bestCorrelation) + "\n") print("corr coef = %.4f" % (bestCorrelation)) fid.close()
# plot the 3d view ax = fig.add_subplot(222,projection='3d') ax.scatter(X1,X2,Y,c=colors,edgecolors='none') #ax.scatter(X1,X2,Y,c=colors) ax.set_xlabel('X1') ax.set_ylabel('X2') ax.set_zlabel('Y') ax.set_xlim3d(-1,1) ax.set_ylim3d(-1,1) ax.set_zlim3d(-1,1) plt.title('Training Data 3D View',fontsize=12) ########## # OK, now create and train a learner # learner = kdt.kdtknn(k=30,method='mean') numpoints = X1.shape[0] dataX = np.zeros([numpoints,2]) dataX[:,0] = X1 dataX[:,1] = X2 trainsize = floor(dataX.shape[0] * .6) learner.addEvidence(dataX[0:trainsize],dataY=Y[0:trainsize]) steps = 50.0 stepsize = 2.0/steps Xtest = np.zeros([steps*steps,2]) count = 0 for i in np.arange(-1,1,stepsize): for j in np.arange(-1,1,stepsize): Xtest[count,0] = i + stepsize/2