''' Plot feature for XOM '''
        for i, fcFunc in enumerate(lfcFeatures[:-1]):
            plt.clf()
            plt.subplot(211)
            plt.title( fcFunc.__name__ )
            plt.plot( dfPrice.index, dfPrice['XOM'].values, 'r-' )
            plt.subplot(212)
            plt.plot( dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-' )
            plt.show()
     
    ''' Pick Test and Training Points '''
    lSplit = int(len(ldtTimestamps) * 0.7)
    dtStartTrain = ldtTimestamps[0]
    dtEndTrain = ldtTimestamps[lSplit]
    dtStartTest = ldtTimestamps[lSplit+1]
    dtEndTest = ldtTimestamps[-1]
     
    ''' Stack all information into one Numpy array ''' 
    naFeatTrain = ftu.stackSyms( ldfFeatures, dtStartTrain, dtEndTrain )
    naFeatTest = ftu.stackSyms( ldfFeatures, dtStartTest, dtEndTest )
    
    ''' Normalize features, use same normalization factors for testing data as training data '''
    ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False )
    ''' Normalize query points with same weights that come from test data '''
    ftu.normQuery( naFeatTest[:,:-1], ltWeights )

    learnerTest( naFeatTrain, naFeatTest )
    
    
    
Exemple #2
0
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    ldfFeaturesTrain = ftu.applyFeatures(dDataTrain, lfcFeatures, ldArgs,
                                         '$SPX')
    ldfFeaturesTest = ftu.applyFeatures(dDataTest, lfcFeatures, ldArgs, '$SPX')
    ''' Pick Test and Training Points '''
    dtStartTrain = dt.datetime(2008, 01, 01)
    dtEndTrain = dt.datetime(2009, 12, 31)
    dtStartTest = dt.datetime(2010, 01, 01)
    dtEndTest = dt.datetime(2010, 12, 31)
    ''' Stack all information into one Numpy array '''
    naFeatTrain = ftu.stackSyms(ldfFeaturesTrain, dtStartTrain, dtEndTrain)
    naFeatTest = ftu.stackSyms(ldfFeaturesTest, dtStartTest, dtEndTest)
    ''' Normalize features, use same normalization factors for testing data as training data '''
    ltWeights = ftu.normFeatures(naFeatTrain, -1.0, 1.0, False)
    ''' Normalize query points with same weights that come from test data '''
    ftu.normQuery(naFeatTest[:, :-1], ltWeights)

    lFeatures = range(0, len(lfcFeatures) - 1)
    classLabelIndex = len(lfcFeatures) - 1

    funccall = sys.argv[
        1] + '(naFeatTrain,naFeatTest,lFeatures,classLabelIndex)'

    timestart = time.time()
    clockstart = time.clock()
    eval(funccall)
    clockend = time.clock()
    timeend = time.time()

    sys.stdout.write('\n\nclock diff: ' + str(clockend - clockstart) + 'sec\n')
    sys.stdout.write('time diff: ' + str(timeend - timestart) + 'sec\n')
def main():
    # symbols = np.loadtxt('./Examples/Features/symbols.txt',dtype='S10',comments='#')
    symbols = [
        "AA",
        "AXP",
        "BA",
        "BAC",
        "CAT",
        "CSCO",
        "CVX",
        "DD",
        "DIS",
        "GE",
        "HD",
        "HPQ",
        "IBM",
        "INTC",
        "JNJ",
        "JPM",
        "KFT",
        "KO",
        "MCD",
        "MMM",
        "MRK",
        "MSFT",
        "PFE",
        "PG",
        "T",
        "TRV",
        "UTX",
        "VZ",
        "WMT",
        "XOM",
    ]
    # symbols = ['XOM']
    # This is the start and end dates for the entire train and test data combined
    alldatastartday = dt.datetime(2007, 1, 1)
    alldataendday = dt.datetime(2010, 6, 30)
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(alldatastartday, alldataendday, timeofday)
    dataobj = da.DataAccess("Norgate")
    voldata = dataobj.get_data(timestamps, symbols, "volume", verbose=True)
    voldata = (voldata.fillna()).fillna(method="backfill")
    close = dataobj.get_data(timestamps, symbols, "close", verbose=True)
    close = (close.fillna()).fillna(method="backfill")

    featureList = [
        featMA,
        featMA,
        featRSI,
        featRSI,
        featDrawDown,
        featRunUp,
        featVolumeDelta,
        featVolumeDelta,
        featAroon,
        classFutRet,
    ]
    featureListArgs = [
        {"lLookback": 10, "bRel": True},
        {"lLookback": 20},
        {"lLookback": 10},
        {"lLookback": 20},
        {},
        {},
        {"lLookback": 10},
        {"lLookback": 20},
        {"bDown": False},
        {"lLookforward": 5},
    ]

    # print 'Applying Features'
    #
    # John Cornwell's featuretest.py was consulted for figuring out the syntax of ftu.applyFeatures() methods and ftu.stackSyms() methods
    #
    allfeatureValues = ftu.applyFeatures(close, voldata, featureList, featureListArgs)

    trainstartday = dt.datetime(2007, 1, 1)
    trainendday = dt.datetime(2009, 12, 31)
    traintimestamps = du.getNYSEdays(trainstartday, trainendday, timeofday)
    # print 'Stack Syms for Training'
    trainingData = ftu.stackSyms(allfeatureValues, traintimestamps[0], traintimestamps[-1])
    # print 'Norm Features for Training'
    scaleshiftvalues = ftu.normFeatures(trainingData, -1.0, 1.0, False)

    teststartday = dt.datetime(2010, 1, 1)
    testendday = dt.datetime(2010, 6, 30)
    testtimestamps = du.getNYSEdays(teststartday, testendday, timeofday)
    # print 'Stack Syms for Test'
    testData = ftu.stackSyms(allfeatureValues, testtimestamps[0], testtimestamps[-1])
    # print 'Norm Features for Test'
    ftu.normQuery(testData[:, :-1], scaleshiftvalues)

    NUMFEATURES = 9
    bestFeatureIndices = []
    bestCorrelation = 0.0

    fid = open("output.txt", "w")

    for iteration in range(NUMFEATURES):
        nextFeatureIndexToAdd = -1

        for featureIndex in range(NUMFEATURES):

            if featureIndex not in bestFeatureIndices:

                bestFeatureIndices.append(featureIndex)

                fid.write("testing feature set " + str(bestFeatureIndices) + "\n")
                print("testing feature set " + str(bestFeatureIndices))

                bestFeatureIndices.append(9)
                curTrainingData = trainingData[:, bestFeatureIndices]
                curTestData = testData[:, bestFeatureIndices]
                bestFeatureIndices.remove(9)

                kdtlearner = knn.kdtknn(5, "mean", leafsize=100)
                kdtlearner.addEvidence(curTrainingData[:, :-1], curTrainingData[:, -1])
                testEstimatedValues = kdtlearner.query(curTestData[:, :-1])
                testcorrelation = np.corrcoef(testEstimatedValues.T, curTestData[:, -1].T)
                curCorrelation = testcorrelation[0, 1]

                fid.write("corr coef = %.4f\n" % (curCorrelation))
                print("corr coef = %.4f" % (curCorrelation))

                if curCorrelation > bestCorrelation:
                    nextFeatureIndexToAdd = featureIndex
                    bestCorrelation = curCorrelation

                bestFeatureIndices.remove(featureIndex)

        if nextFeatureIndexToAdd >= 0:
            bestFeatureIndices.append(nextFeatureIndexToAdd)
        else:
            break

    fid.write("best feature set is " + str(bestFeatureIndices) + "\n")
    print("best feature set is " + str(bestFeatureIndices))
    fid.write("corr coef = %.4f" % (bestCorrelation) + "\n")
    print("corr coef = %.4f" % (bestCorrelation))
    fid.close()
Exemple #4
0
def find_best_feature(features):

#first for loop gets the first best feature.

	hmax = 0
	for i in range(0,9):
		 print 'testing feature set[',i,',9]'
                 args = []
                 features = []
                 global train_data
                 global test_data
                 features.append(copyfeatures[i])
                 features.append(copyfeatures[9])
                 args.append(copyargs[i])
                 args.append(copyargs[9])
                 ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args )
                 naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain)
                 naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest )
                 store_train_and_test(naFeatTrain,naFeatTest)
                 #print naFeatTrain,"NEXT"
                 #print naFeatTest
                 ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False )
                 ftu.normQuery( naFeatTest[:,:-1], ltWeights )
		
                 corr_of_this = learnerTest(naFeatTrain,naFeatTest)

		 print 'corr coef = ',corr_of_this[0][1]
		
                 if(corr_of_this[0][1] > hmax):
                        hmax = corr_of_this[0][1]
                        feature = copyfeatures[i]
                        best = i
        #print "best corr coef",hmax,'for',best

#now all combinations are checked for all other 8 possible ones.

 	allbest = []
	args = []
	features = []
	features.append(copyfeatures[best])
  	args.append(copyargs[best])
	h = 1
	allbest.append(best)
	for k in range(0,8):
		found = 0
        	for i in range(0,9):
		 	if(i not in allbest):
		 		global train_data
				global test_data
		 		features.append(copyfeatures[i])
				h = h+1
		 		features.append(copyfeatures[9])
		 		args.append(copyargs[i])
		 		args.append(copyargs[9])
			
				print 'testing feature set [',allbest,',',i,',9]'

                 		ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args )
		 		naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain)
    		 		naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest )
    		 		store_train_and_test(naFeatTrain,naFeatTest)
    		 		#print naFeatTrain,"NEXT"
    		 		#print naFeatTest
    		 		ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False )
    		 		ftu.normQuery( naFeatTest[:,:-1], ltWeights )
	
	                 	corr_of_this = learnerTest(naFeatTrain,naFeatTest)
                 		if(corr_of_this[0][1] > hmax):
					found = 1
                       			hmax = corr_of_this[0][1]
					argument = copyargs[i]
                       			feature = copyfeatures[i]
					args.remove(copyargs[i])
					features.remove(copyfeatures[i])
					args.remove(copyargs[9])
					features.remove(copyfeatures[9])
					best = i
				#allbest.append(best)
					print 'corr coef =', hmax
				#best = i + best
		 		else:
					h = h - 1
					print 'corr coef = ',corr_of_this[0][1]
					#args.remove(copyargs[i])
					args.remove(copyargs[9])
				
					#features.remove(copyfeatures[i])
					features.remove(copyfeatures[9])
		if found==1:
			allbest.append(best)
			args.append(argument)
			features.append(feature)
		else:
			break
			#for k in range(0,h):
				#print 'best is',features[k]
        print 'best feature set [',allbest,',9]'
	print "corr coef = ",hmax
	return