''' Plot feature for XOM ''' for i, fcFunc in enumerate(lfcFeatures[:-1]): plt.clf() plt.subplot(211) plt.title( fcFunc.__name__ ) plt.plot( dfPrice.index, dfPrice['XOM'].values, 'r-' ) plt.subplot(212) plt.plot( dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-' ) plt.show() ''' Pick Test and Training Points ''' lSplit = int(len(ldtTimestamps) * 0.7) dtStartTrain = ldtTimestamps[0] dtEndTrain = ldtTimestamps[lSplit] dtStartTest = ldtTimestamps[lSplit+1] dtEndTest = ldtTimestamps[-1] ''' Stack all information into one Numpy array ''' naFeatTrain = ftu.stackSyms( ldfFeatures, dtStartTrain, dtEndTrain ) naFeatTest = ftu.stackSyms( ldfFeatures, dtStartTest, dtEndTest ) ''' Normalize features, use same normalization factors for testing data as training data ''' ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False ) ''' Normalize query points with same weights that come from test data ''' ftu.normQuery( naFeatTest[:,:-1], ltWeights ) learnerTest( naFeatTrain, naFeatTest )
{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {},\ {},\ {'i_lookforward':5} ] ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' ldfFeaturesTrain = ftu.applyFeatures(dDataTrain, lfcFeatures, ldArgs, '$SPX') ldfFeaturesTest = ftu.applyFeatures(dDataTest, lfcFeatures, ldArgs, '$SPX') ''' Pick Test and Training Points ''' dtStartTrain = dt.datetime(2008, 01, 01) dtEndTrain = dt.datetime(2009, 12, 31) dtStartTest = dt.datetime(2010, 01, 01) dtEndTest = dt.datetime(2010, 12, 31) ''' Stack all information into one Numpy array ''' naFeatTrain = ftu.stackSyms(ldfFeaturesTrain, dtStartTrain, dtEndTrain) naFeatTest = ftu.stackSyms(ldfFeaturesTest, dtStartTest, dtEndTest) ''' Normalize features, use same normalization factors for testing data as training data ''' ltWeights = ftu.normFeatures(naFeatTrain, -1.0, 1.0, False) ''' Normalize query points with same weights that come from test data ''' ftu.normQuery(naFeatTest[:, :-1], ltWeights) lFeatures = range(0, len(lfcFeatures) - 1) classLabelIndex = len(lfcFeatures) - 1 funccall = sys.argv[ 1] + '(naFeatTrain,naFeatTest,lFeatures,classLabelIndex)' timestart = time.time() clockstart = time.clock() eval(funccall)
def main(): # symbols = np.loadtxt('./Examples/Features/symbols.txt',dtype='S10',comments='#') symbols = [ "AA", "AXP", "BA", "BAC", "CAT", "CSCO", "CVX", "DD", "DIS", "GE", "HD", "HPQ", "IBM", "INTC", "JNJ", "JPM", "KFT", "KO", "MCD", "MMM", "MRK", "MSFT", "PFE", "PG", "T", "TRV", "UTX", "VZ", "WMT", "XOM", ] # symbols = ['XOM'] # This is the start and end dates for the entire train and test data combined alldatastartday = dt.datetime(2007, 1, 1) alldataendday = dt.datetime(2010, 6, 30) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(alldatastartday, alldataendday, timeofday) dataobj = da.DataAccess("Norgate") voldata = dataobj.get_data(timestamps, symbols, "volume", verbose=True) voldata = (voldata.fillna()).fillna(method="backfill") close = dataobj.get_data(timestamps, symbols, "close", verbose=True) close = (close.fillna()).fillna(method="backfill") featureList = [ featMA, featMA, featRSI, featRSI, featDrawDown, featRunUp, featVolumeDelta, featVolumeDelta, featAroon, classFutRet, ] featureListArgs = [ {"lLookback": 10, "bRel": True}, {"lLookback": 20}, {"lLookback": 10}, {"lLookback": 20}, {}, {}, {"lLookback": 10}, {"lLookback": 20}, {"bDown": False}, {"lLookforward": 5}, ] # print 'Applying Features' # # John Cornwell's featuretest.py was consulted for figuring out the syntax of ftu.applyFeatures() methods and ftu.stackSyms() methods # allfeatureValues = ftu.applyFeatures(close, voldata, featureList, featureListArgs) trainstartday = dt.datetime(2007, 1, 1) trainendday = dt.datetime(2009, 12, 31) traintimestamps = du.getNYSEdays(trainstartday, trainendday, timeofday) # print 'Stack Syms for Training' trainingData = ftu.stackSyms(allfeatureValues, traintimestamps[0], traintimestamps[-1]) # print 'Norm Features for Training' scaleshiftvalues = ftu.normFeatures(trainingData, -1.0, 1.0, False) teststartday = dt.datetime(2010, 1, 1) testendday = dt.datetime(2010, 6, 30) testtimestamps = du.getNYSEdays(teststartday, testendday, timeofday) # print 'Stack Syms for Test' testData = ftu.stackSyms(allfeatureValues, testtimestamps[0], testtimestamps[-1]) # print 'Norm Features for Test' ftu.normQuery(testData[:, :-1], scaleshiftvalues) NUMFEATURES = 9 bestFeatureIndices = [] bestCorrelation = 0.0 fid = open("output.txt", "w") for iteration in range(NUMFEATURES): nextFeatureIndexToAdd = -1 for featureIndex in range(NUMFEATURES): if featureIndex not in bestFeatureIndices: bestFeatureIndices.append(featureIndex) fid.write("testing feature set " + str(bestFeatureIndices) + "\n") print("testing feature set " + str(bestFeatureIndices)) bestFeatureIndices.append(9) curTrainingData = trainingData[:, bestFeatureIndices] curTestData = testData[:, bestFeatureIndices] bestFeatureIndices.remove(9) kdtlearner = knn.kdtknn(5, "mean", leafsize=100) kdtlearner.addEvidence(curTrainingData[:, :-1], curTrainingData[:, -1]) testEstimatedValues = kdtlearner.query(curTestData[:, :-1]) testcorrelation = np.corrcoef(testEstimatedValues.T, curTestData[:, -1].T) curCorrelation = testcorrelation[0, 1] fid.write("corr coef = %.4f\n" % (curCorrelation)) print("corr coef = %.4f" % (curCorrelation)) if curCorrelation > bestCorrelation: nextFeatureIndexToAdd = featureIndex bestCorrelation = curCorrelation bestFeatureIndices.remove(featureIndex) if nextFeatureIndexToAdd >= 0: bestFeatureIndices.append(nextFeatureIndexToAdd) else: break fid.write("best feature set is " + str(bestFeatureIndices) + "\n") print("best feature set is " + str(bestFeatureIndices)) fid.write("corr coef = %.4f" % (bestCorrelation) + "\n") print("corr coef = %.4f" % (bestCorrelation)) fid.close()
def find_best_feature(features): #first for loop gets the first best feature. hmax = 0 for i in range(0,9): print 'testing feature set[',i,',9]' args = [] features = [] global train_data global test_data features.append(copyfeatures[i]) features.append(copyfeatures[9]) args.append(copyargs[i]) args.append(copyargs[9]) ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args ) naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain) naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest ) store_train_and_test(naFeatTrain,naFeatTest) #print naFeatTrain,"NEXT" #print naFeatTest ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False ) ftu.normQuery( naFeatTest[:,:-1], ltWeights ) corr_of_this = learnerTest(naFeatTrain,naFeatTest) print 'corr coef = ',corr_of_this[0][1] if(corr_of_this[0][1] > hmax): hmax = corr_of_this[0][1] feature = copyfeatures[i] best = i #print "best corr coef",hmax,'for',best #now all combinations are checked for all other 8 possible ones. allbest = [] args = [] features = [] features.append(copyfeatures[best]) args.append(copyargs[best]) h = 1 allbest.append(best) for k in range(0,8): found = 0 for i in range(0,9): if(i not in allbest): global train_data global test_data features.append(copyfeatures[i]) h = h+1 features.append(copyfeatures[9]) args.append(copyargs[i]) args.append(copyargs[9]) print 'testing feature set [',allbest,',',i,',9]' ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args ) naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain) naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest ) store_train_and_test(naFeatTrain,naFeatTest) #print naFeatTrain,"NEXT" #print naFeatTest ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False ) ftu.normQuery( naFeatTest[:,:-1], ltWeights ) corr_of_this = learnerTest(naFeatTrain,naFeatTest) if(corr_of_this[0][1] > hmax): found = 1 hmax = corr_of_this[0][1] argument = copyargs[i] feature = copyfeatures[i] args.remove(copyargs[i]) features.remove(copyfeatures[i]) args.remove(copyargs[9]) features.remove(copyfeatures[9]) best = i #allbest.append(best) print 'corr coef =', hmax #best = i + best else: h = h - 1 print 'corr coef = ',corr_of_this[0][1] #args.remove(copyargs[i]) args.remove(copyargs[9]) #features.remove(copyfeatures[i]) features.remove(copyfeatures[9]) if found==1: allbest.append(best) args.append(argument) features.append(feature) else: break #for k in range(0,h): #print 'best is',features[k] print 'best feature set [',allbest,',9]' print "corr coef = ",hmax return