{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5,'bDown':True},{'lLookback':10,'bDown':True},{'lLookback':20,'bDown':True},{'lLookback':5,'bDown':False},{'lLookback':10,'bDown':False},{'lLookback':20,'bDown':False},{'lLookback':5,'bDown':True,'MR':True},{'lLookback':10,'bDown':True,'MR':True},{'lLookback':20,'bDown':True,'MR':True},{'lLookback':5,'bDown':False,'MR':True},{'lLookback':10,'bDown':False,'MR':True},{'lLookback':20,'bDown':False,'MR':True},\ #{'lLookback':5,'bFast':True},{'lLookback':10,'bFast':True},{'lLookback':20,'bFast':True},{'lLookback':5,'bFast':False},{'lLookback':10,'bFast':False},{'lLookback':20,'bFast':False},{'lLookback':5,'bFast':True,'MR':True},{'lLookback':10,'bFast':True,'MR':True},{'lLookback':20,'bFast':True,'MR':True},{'lLookback':5,'bFast':False,'MR':True},{'lLookback':10,'bFast':False,'MR':True},{'lLookback':20,'bFast':False,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {},\ {},\ {'i_lookforward':5} ] ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' ldfFeaturesTrain = ftu.applyFeatures( dDataTrain, lfcFeatures, ldArgs, '$SPX') ldfFeaturesTest = ftu.applyFeatures( dDataTest, lfcFeatures, ldArgs, '$SPX') ''' Pick Test and Training Points ''' dtStartTrain = dt.datetime(2008,01,01) dtEndTrain = dt.datetime(2009,12,31) dtStartTest = dt.datetime(2010,01,01) dtEndTest = dt.datetime(2010,12,31) ''' Stack all information into one Numpy array ''' naFeatTrain = ftu.stackSyms( ldfFeaturesTrain, dtStartTrain, dtEndTrain ) naFeatTest = ftu.stackSyms( ldfFeaturesTest, dtStartTest, dtEndTest ) ''' Normalize features, use same normalization factors for testing data as training data ''' ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False ) ''' Normalize query points with same weights that come from test data '''
dfPrice = norObj.get_data( ldtTimestamps, lsSym, 'close' ) dfVolume = norObj.get_data( ldtTimestamps, lsSym, 'volume' ) ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures = [ featMA, featRSI, classFutRet ] ''' Default Arguments ''' #ldArgs = [{}] * len(lfcFeatures) ''' Custom Arguments ''' ldArgs = [ {'lLookback':30, 'bRel':True},\ {},\ {}] ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' ldfFeatures = ftu.applyFeatures( dfPrice, dfVolume, lfcFeatures, ldArgs ) bPlot = False if bPlot: ''' Plot feature for XOM ''' for i, fcFunc in enumerate(lfcFeatures[:-1]): plt.clf() plt.subplot(211) plt.title( fcFunc.__name__ ) plt.plot( dfPrice.index, dfPrice['XOM'].values, 'r-' ) plt.subplot(212) plt.plot( dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-' ) plt.show() ''' Pick Test and Training Points '''
ldArgs = [ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5,'bDown':True},{'lLookback':10,'bDown':True},{'lLookback':20,'bDown':True},{'lLookback':5,'bDown':False},{'lLookback':10,'bDown':False},{'lLookback':20,'bDown':False},{'lLookback':5,'bDown':True,'MR':True},{'lLookback':10,'bDown':True,'MR':True},{'lLookback':20,'bDown':True,'MR':True},{'lLookback':5,'bDown':False,'MR':True},{'lLookback':10,'bDown':False,'MR':True},{'lLookback':20,'bDown':False,'MR':True},\ #{'lLookback':5,'bFast':True},{'lLookback':10,'bFast':True},{'lLookback':20,'bFast':True},{'lLookback':5,'bFast':False},{'lLookback':10,'bFast':False},{'lLookback':20,'bFast':False},{'lLookback':5,'bFast':True,'MR':True},{'lLookback':10,'bFast':True,'MR':True},{'lLookback':20,'bFast':True,'MR':True},{'lLookback':5,'bFast':False,'MR':True},{'lLookback':10,'bFast':False,'MR':True},{'lLookback':20,'bFast':False,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\ {},\ {},\ {'i_lookforward':5} ] ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' ldfFeaturesTrain = ftu.applyFeatures(dDataTrain, lfcFeatures, ldArgs, '$SPX') ldfFeaturesTest = ftu.applyFeatures(dDataTest, lfcFeatures, ldArgs, '$SPX') ''' Pick Test and Training Points ''' dtStartTrain = dt.datetime(2008, 01, 01) dtEndTrain = dt.datetime(2009, 12, 31) dtStartTest = dt.datetime(2010, 01, 01) dtEndTest = dt.datetime(2010, 12, 31) ''' Stack all information into one Numpy array ''' naFeatTrain = ftu.stackSyms(ldfFeaturesTrain, dtStartTrain, dtEndTrain) naFeatTest = ftu.stackSyms(ldfFeaturesTest, dtStartTest, dtEndTest) ''' Normalize features, use same normalization factors for testing data as training data ''' ltWeights = ftu.normFeatures(naFeatTrain, -1.0, 1.0, False) ''' Normalize query points with same weights that come from test data ''' ftu.normQuery(naFeatTest[:, :-1], ltWeights) lFeatures = range(0, len(lfcFeatures) - 1)
def main(): # symbols = np.loadtxt('./Examples/Features/symbols.txt',dtype='S10',comments='#') symbols = [ "AA", "AXP", "BA", "BAC", "CAT", "CSCO", "CVX", "DD", "DIS", "GE", "HD", "HPQ", "IBM", "INTC", "JNJ", "JPM", "KFT", "KO", "MCD", "MMM", "MRK", "MSFT", "PFE", "PG", "T", "TRV", "UTX", "VZ", "WMT", "XOM", ] # symbols = ['XOM'] # This is the start and end dates for the entire train and test data combined alldatastartday = dt.datetime(2007, 1, 1) alldataendday = dt.datetime(2010, 6, 30) timeofday = dt.timedelta(hours=16) timestamps = du.getNYSEdays(alldatastartday, alldataendday, timeofday) dataobj = da.DataAccess("Norgate") voldata = dataobj.get_data(timestamps, symbols, "volume", verbose=True) voldata = (voldata.fillna()).fillna(method="backfill") close = dataobj.get_data(timestamps, symbols, "close", verbose=True) close = (close.fillna()).fillna(method="backfill") featureList = [ featMA, featMA, featRSI, featRSI, featDrawDown, featRunUp, featVolumeDelta, featVolumeDelta, featAroon, classFutRet, ] featureListArgs = [ {"lLookback": 10, "bRel": True}, {"lLookback": 20}, {"lLookback": 10}, {"lLookback": 20}, {}, {}, {"lLookback": 10}, {"lLookback": 20}, {"bDown": False}, {"lLookforward": 5}, ] # print 'Applying Features' # # John Cornwell's featuretest.py was consulted for figuring out the syntax of ftu.applyFeatures() methods and ftu.stackSyms() methods # allfeatureValues = ftu.applyFeatures(close, voldata, featureList, featureListArgs) trainstartday = dt.datetime(2007, 1, 1) trainendday = dt.datetime(2009, 12, 31) traintimestamps = du.getNYSEdays(trainstartday, trainendday, timeofday) # print 'Stack Syms for Training' trainingData = ftu.stackSyms(allfeatureValues, traintimestamps[0], traintimestamps[-1]) # print 'Norm Features for Training' scaleshiftvalues = ftu.normFeatures(trainingData, -1.0, 1.0, False) teststartday = dt.datetime(2010, 1, 1) testendday = dt.datetime(2010, 6, 30) testtimestamps = du.getNYSEdays(teststartday, testendday, timeofday) # print 'Stack Syms for Test' testData = ftu.stackSyms(allfeatureValues, testtimestamps[0], testtimestamps[-1]) # print 'Norm Features for Test' ftu.normQuery(testData[:, :-1], scaleshiftvalues) NUMFEATURES = 9 bestFeatureIndices = [] bestCorrelation = 0.0 fid = open("output.txt", "w") for iteration in range(NUMFEATURES): nextFeatureIndexToAdd = -1 for featureIndex in range(NUMFEATURES): if featureIndex not in bestFeatureIndices: bestFeatureIndices.append(featureIndex) fid.write("testing feature set " + str(bestFeatureIndices) + "\n") print("testing feature set " + str(bestFeatureIndices)) bestFeatureIndices.append(9) curTrainingData = trainingData[:, bestFeatureIndices] curTestData = testData[:, bestFeatureIndices] bestFeatureIndices.remove(9) kdtlearner = knn.kdtknn(5, "mean", leafsize=100) kdtlearner.addEvidence(curTrainingData[:, :-1], curTrainingData[:, -1]) testEstimatedValues = kdtlearner.query(curTestData[:, :-1]) testcorrelation = np.corrcoef(testEstimatedValues.T, curTestData[:, -1].T) curCorrelation = testcorrelation[0, 1] fid.write("corr coef = %.4f\n" % (curCorrelation)) print("corr coef = %.4f" % (curCorrelation)) if curCorrelation > bestCorrelation: nextFeatureIndexToAdd = featureIndex bestCorrelation = curCorrelation bestFeatureIndices.remove(featureIndex) if nextFeatureIndexToAdd >= 0: bestFeatureIndices.append(nextFeatureIndexToAdd) else: break fid.write("best feature set is " + str(bestFeatureIndices) + "\n") print("best feature set is " + str(bestFeatureIndices)) fid.write("corr coef = %.4f" % (bestCorrelation) + "\n") print("corr coef = %.4f" % (bestCorrelation)) fid.close()
ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys ) dData = dict(zip(lsKeys, ldfData)) ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures = [ featMA, featRSI, class_fut_ret ] ''' Default Arguments ''' #ldArgs = [{}] * len(lfcFeatures) ''' Custom Arguments ''' ldArgs = [ {'lLookback':30, 'bRel':True},\ {},\ {}] ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' ldfFeatures = ftu.applyFeatures( dData, lfcFeatures, ldArgs ) bPlot = False if bPlot: ''' Plot feature for XOM ''' for i, fcFunc in enumerate(lfcFeatures[:-1]): plt.clf() plt.subplot(211) plt.title( fcFunc.__name__ ) plt.plot( dfPrice.index, dfPrice['XOM'].values, 'r-' ) plt.subplot(212) plt.plot( dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-' ) plt.show() ''' Pick Test and Training Points '''
def find_best_feature(features): #first for loop gets the first best feature. hmax = 0 for i in range(0,9): print 'testing feature set[',i,',9]' args = [] features = [] global train_data global test_data features.append(copyfeatures[i]) features.append(copyfeatures[9]) args.append(copyargs[i]) args.append(copyargs[9]) ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args ) naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain) naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest ) store_train_and_test(naFeatTrain,naFeatTest) #print naFeatTrain,"NEXT" #print naFeatTest ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False ) ftu.normQuery( naFeatTest[:,:-1], ltWeights ) corr_of_this = learnerTest(naFeatTrain,naFeatTest) print 'corr coef = ',corr_of_this[0][1] if(corr_of_this[0][1] > hmax): hmax = corr_of_this[0][1] feature = copyfeatures[i] best = i #print "best corr coef",hmax,'for',best #now all combinations are checked for all other 8 possible ones. allbest = [] args = [] features = [] features.append(copyfeatures[best]) args.append(copyargs[best]) h = 1 allbest.append(best) for k in range(0,8): found = 0 for i in range(0,9): if(i not in allbest): global train_data global test_data features.append(copyfeatures[i]) h = h+1 features.append(copyfeatures[9]) args.append(copyargs[i]) args.append(copyargs[9]) print 'testing feature set [',allbest,',',i,',9]' ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args ) naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain) naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest ) store_train_and_test(naFeatTrain,naFeatTest) #print naFeatTrain,"NEXT" #print naFeatTest ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False ) ftu.normQuery( naFeatTest[:,:-1], ltWeights ) corr_of_this = learnerTest(naFeatTrain,naFeatTest) if(corr_of_this[0][1] > hmax): found = 1 hmax = corr_of_this[0][1] argument = copyargs[i] feature = copyfeatures[i] args.remove(copyargs[i]) features.remove(copyfeatures[i]) args.remove(copyargs[9]) features.remove(copyfeatures[9]) best = i #allbest.append(best) print 'corr coef =', hmax #best = i + best else: h = h - 1 print 'corr coef = ',corr_of_this[0][1] #args.remove(copyargs[i]) args.remove(copyargs[9]) #features.remove(copyfeatures[i]) features.remove(copyfeatures[9]) if found==1: allbest.append(best) args.append(argument) features.append(feature) else: break #for k in range(0,h): #print 'best is',features[k] print 'best feature set [',allbest,',9]' print "corr coef = ",hmax return
norObj = da.DataAccess('Norgate') ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, dt.timedelta(hours=16)) lsKeys = ['open', 'high', 'low', 'close', 'volume'] ldfData = norObj.get_data(ldtTimestamps, lsSym, lsKeys) dData = dict(zip(lsKeys, ldfData)) ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures = [featMA, featRSI, class_fut_ret] ''' Default Arguments ''' #ldArgs = [{}] * len(lfcFeatures) ''' Custom Arguments ''' ldArgs = [ {'lLookback':30, 'bRel':True},\ {},\ {}] ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' ldfFeatures = ftu.applyFeatures(dData, lfcFeatures, ldArgs) bPlot = False if bPlot: ''' Plot feature for XOM ''' for i, fcFunc in enumerate(lfcFeatures[:-1]): plt.clf() plt.subplot(211) plt.title(fcFunc.__name__) plt.plot(dfPrice.index, dfPrice['XOM'].values, 'r-') plt.subplot(212) plt.plot(dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-') plt.show() ''' Pick Test and Training Points ''' lSplit = int(len(ldtTimestamps) * 0.7) dtStartTrain = ldtTimestamps[0]
norObj = da.DataAccess('Norgate') ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, dt.timedelta(hours=16)) dfPrice = norObj.get_data(ldtTimestamps, lsSym, 'close') dfVolume = norObj.get_data(ldtTimestamps, lsSym, 'volume') ''' Imported functions from qstkfeat.features, NOTE: last function is classification ''' lfcFeatures = [featMA, featRSI, classFutRet] ''' Default Arguments ''' #ldArgs = [{}] * len(lfcFeatures) ''' Custom Arguments ''' ldArgs = [ {'lLookback':30, 'bRel':True},\ {},\ {}] ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data ''' ldfFeatures = ftu.applyFeatures(dfPrice, dfVolume, lfcFeatures, ldArgs) bPlot = False if bPlot: ''' Plot feature for XOM ''' for i, fcFunc in enumerate(lfcFeatures[:-1]): plt.clf() plt.subplot(211) plt.title(fcFunc.__name__) plt.plot(dfPrice.index, dfPrice['XOM'].values, 'r-') plt.subplot(212) plt.plot(dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-') plt.show() ''' Pick Test and Training Points ''' lSplit = int(len(ldtTimestamps) * 0.7) dtStartTrain = ldtTimestamps[0]
symbols = ['SINE_FAST'] dataobj = da.DataAccess('Yahoo') dtStart = dt.datetime(2008,1,1) dtEnd = dt.datetime(2008,12,31) timestamps = du.getNYSEdays(dtStart, dtEnd, dt.timedelta(hours = 16) ) Keys = ['open', 'high', 'low', 'close', 'volume'] lldata = dataobj.get_data( timestamps, symbols, Keys ) dData = dict(zip(Keys, lldata)) Features = [featMA, featBollinger, featMomentum, class_fut_ret] ldArgs = [ {'lLookback':20, 'bRel':True},\ {},\ {},\ {'i_lookforward':5}] llfeatures = featu.applyFeatures( dData, Features, ldArgs ) rLearner = RandomForestLearner.RandomForestLearner(_k = 15) kLearner = KNNLearner.KNNLearner(3) LinRegLearner = LinRegLearner.LinRegLearner() Xtrain = np.zeros( ( np.size(llfeatures[1].values) - 20, 3) ) Ytrain = np.zeros( np.size(llfeatures[1].values) - 20) def runKNNExperiment(ma,data=""): for k in k_axis: learner_k = knnl.KNNLearner(k, method = "mean") learner_k.addEvidence(xtrain, ytrain) Yret_k = learner_k.query(xtest)