Exemple #1
0
				{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
				{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
				{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
				{'lLookback':5,'bDown':True},{'lLookback':10,'bDown':True},{'lLookback':20,'bDown':True},{'lLookback':5,'bDown':False},{'lLookback':10,'bDown':False},{'lLookback':20,'bDown':False},{'lLookback':5,'bDown':True,'MR':True},{'lLookback':10,'bDown':True,'MR':True},{'lLookback':20,'bDown':True,'MR':True},{'lLookback':5,'bDown':False,'MR':True},{'lLookback':10,'bDown':False,'MR':True},{'lLookback':20,'bDown':False,'MR':True},\
				#{'lLookback':5,'bFast':True},{'lLookback':10,'bFast':True},{'lLookback':20,'bFast':True},{'lLookback':5,'bFast':False},{'lLookback':10,'bFast':False},{'lLookback':20,'bFast':False},{'lLookback':5,'bFast':True,'MR':True},{'lLookback':10,'bFast':True,'MR':True},{'lLookback':20,'bFast':True,'MR':True},{'lLookback':5,'bFast':False,'MR':True},{'lLookback':10,'bFast':False,'MR':True},{'lLookback':20,'bFast':False,'MR':True},\
				{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
				{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
				{'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
				{},\
				{},\
				{'i_lookforward':5}
				]
	
	
	''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
	ldfFeaturesTrain = ftu.applyFeatures( dDataTrain, lfcFeatures, ldArgs, '$SPX')
	ldfFeaturesTest = ftu.applyFeatures( dDataTest, lfcFeatures, ldArgs, '$SPX')

	''' Pick Test and Training Points '''		
	dtStartTrain = dt.datetime(2008,01,01)
	dtEndTrain = dt.datetime(2009,12,31)
	dtStartTest = dt.datetime(2010,01,01)
	dtEndTest = dt.datetime(2010,12,31)
	
	''' Stack all information into one Numpy array ''' 
	naFeatTrain = ftu.stackSyms( ldfFeaturesTrain, dtStartTrain, dtEndTrain )
	naFeatTest = ftu.stackSyms( ldfFeaturesTest, dtStartTest, dtEndTest )
	
	''' Normalize features, use same normalization factors for testing data as training data '''
	ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False )
	''' Normalize query points with same weights that come from test data '''
    dfPrice = norObj.get_data( ldtTimestamps, lsSym, 'close' )
    dfVolume = norObj.get_data( ldtTimestamps, lsSym, 'volume' )
    
    ''' Imported functions from qstkfeat.features, NOTE: last function is classification '''
    lfcFeatures = [ featMA, featRSI, classFutRet ]

    ''' Default Arguments '''
    #ldArgs = [{}] * len(lfcFeatures) 
    
    ''' Custom Arguments '''
    ldArgs = [ {'lLookback':30, 'bRel':True},\
               {},\
               {}]                    
    
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    ldfFeatures = ftu.applyFeatures( dfPrice, dfVolume, lfcFeatures, ldArgs )
    
    
    bPlot = False
    if bPlot:
        ''' Plot feature for XOM '''
        for i, fcFunc in enumerate(lfcFeatures[:-1]):
            plt.clf()
            plt.subplot(211)
            plt.title( fcFunc.__name__ )
            plt.plot( dfPrice.index, dfPrice['XOM'].values, 'r-' )
            plt.subplot(212)
            plt.plot( dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-' )
            plt.show()
     
    ''' Pick Test and Training Points '''
Exemple #3
0
    ldArgs = [  {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {'lLookback':5,'bDown':True},{'lLookback':10,'bDown':True},{'lLookback':20,'bDown':True},{'lLookback':5,'bDown':False},{'lLookback':10,'bDown':False},{'lLookback':20,'bDown':False},{'lLookback':5,'bDown':True,'MR':True},{'lLookback':10,'bDown':True,'MR':True},{'lLookback':20,'bDown':True,'MR':True},{'lLookback':5,'bDown':False,'MR':True},{'lLookback':10,'bDown':False,'MR':True},{'lLookback':20,'bDown':False,'MR':True},\
				#{'lLookback':5,'bFast':True},{'lLookback':10,'bFast':True},{'lLookback':20,'bFast':True},{'lLookback':5,'bFast':False},{'lLookback':10,'bFast':False},{'lLookback':20,'bFast':False},{'lLookback':5,'bFast':True,'MR':True},{'lLookback':10,'bFast':True,'MR':True},{'lLookback':20,'bFast':True,'MR':True},{'lLookback':5,'bFast':False,'MR':True},{'lLookback':10,'bFast':False,'MR':True},{'lLookback':20,'bFast':False,'MR':True},\
       {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {'lLookback':5},{'lLookback':10},{'lLookback':20}, {'lLookback':5,'MR':True},{'lLookback':10,'MR':True},{'lLookback':20,'MR':True},\
       {},\
       {},\
       {'i_lookforward':5}
       ]
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    ldfFeaturesTrain = ftu.applyFeatures(dDataTrain, lfcFeatures, ldArgs,
                                         '$SPX')
    ldfFeaturesTest = ftu.applyFeatures(dDataTest, lfcFeatures, ldArgs, '$SPX')
    ''' Pick Test and Training Points '''
    dtStartTrain = dt.datetime(2008, 01, 01)
    dtEndTrain = dt.datetime(2009, 12, 31)
    dtStartTest = dt.datetime(2010, 01, 01)
    dtEndTest = dt.datetime(2010, 12, 31)
    ''' Stack all information into one Numpy array '''
    naFeatTrain = ftu.stackSyms(ldfFeaturesTrain, dtStartTrain, dtEndTrain)
    naFeatTest = ftu.stackSyms(ldfFeaturesTest, dtStartTest, dtEndTest)
    ''' Normalize features, use same normalization factors for testing data as training data '''
    ltWeights = ftu.normFeatures(naFeatTrain, -1.0, 1.0, False)
    ''' Normalize query points with same weights that come from test data '''
    ftu.normQuery(naFeatTest[:, :-1], ltWeights)

    lFeatures = range(0, len(lfcFeatures) - 1)
def main():
    # symbols = np.loadtxt('./Examples/Features/symbols.txt',dtype='S10',comments='#')
    symbols = [
        "AA",
        "AXP",
        "BA",
        "BAC",
        "CAT",
        "CSCO",
        "CVX",
        "DD",
        "DIS",
        "GE",
        "HD",
        "HPQ",
        "IBM",
        "INTC",
        "JNJ",
        "JPM",
        "KFT",
        "KO",
        "MCD",
        "MMM",
        "MRK",
        "MSFT",
        "PFE",
        "PG",
        "T",
        "TRV",
        "UTX",
        "VZ",
        "WMT",
        "XOM",
    ]
    # symbols = ['XOM']
    # This is the start and end dates for the entire train and test data combined
    alldatastartday = dt.datetime(2007, 1, 1)
    alldataendday = dt.datetime(2010, 6, 30)
    timeofday = dt.timedelta(hours=16)
    timestamps = du.getNYSEdays(alldatastartday, alldataendday, timeofday)
    dataobj = da.DataAccess("Norgate")
    voldata = dataobj.get_data(timestamps, symbols, "volume", verbose=True)
    voldata = (voldata.fillna()).fillna(method="backfill")
    close = dataobj.get_data(timestamps, symbols, "close", verbose=True)
    close = (close.fillna()).fillna(method="backfill")

    featureList = [
        featMA,
        featMA,
        featRSI,
        featRSI,
        featDrawDown,
        featRunUp,
        featVolumeDelta,
        featVolumeDelta,
        featAroon,
        classFutRet,
    ]
    featureListArgs = [
        {"lLookback": 10, "bRel": True},
        {"lLookback": 20},
        {"lLookback": 10},
        {"lLookback": 20},
        {},
        {},
        {"lLookback": 10},
        {"lLookback": 20},
        {"bDown": False},
        {"lLookforward": 5},
    ]

    # print 'Applying Features'
    #
    # John Cornwell's featuretest.py was consulted for figuring out the syntax of ftu.applyFeatures() methods and ftu.stackSyms() methods
    #
    allfeatureValues = ftu.applyFeatures(close, voldata, featureList, featureListArgs)

    trainstartday = dt.datetime(2007, 1, 1)
    trainendday = dt.datetime(2009, 12, 31)
    traintimestamps = du.getNYSEdays(trainstartday, trainendday, timeofday)
    # print 'Stack Syms for Training'
    trainingData = ftu.stackSyms(allfeatureValues, traintimestamps[0], traintimestamps[-1])
    # print 'Norm Features for Training'
    scaleshiftvalues = ftu.normFeatures(trainingData, -1.0, 1.0, False)

    teststartday = dt.datetime(2010, 1, 1)
    testendday = dt.datetime(2010, 6, 30)
    testtimestamps = du.getNYSEdays(teststartday, testendday, timeofday)
    # print 'Stack Syms for Test'
    testData = ftu.stackSyms(allfeatureValues, testtimestamps[0], testtimestamps[-1])
    # print 'Norm Features for Test'
    ftu.normQuery(testData[:, :-1], scaleshiftvalues)

    NUMFEATURES = 9
    bestFeatureIndices = []
    bestCorrelation = 0.0

    fid = open("output.txt", "w")

    for iteration in range(NUMFEATURES):
        nextFeatureIndexToAdd = -1

        for featureIndex in range(NUMFEATURES):

            if featureIndex not in bestFeatureIndices:

                bestFeatureIndices.append(featureIndex)

                fid.write("testing feature set " + str(bestFeatureIndices) + "\n")
                print("testing feature set " + str(bestFeatureIndices))

                bestFeatureIndices.append(9)
                curTrainingData = trainingData[:, bestFeatureIndices]
                curTestData = testData[:, bestFeatureIndices]
                bestFeatureIndices.remove(9)

                kdtlearner = knn.kdtknn(5, "mean", leafsize=100)
                kdtlearner.addEvidence(curTrainingData[:, :-1], curTrainingData[:, -1])
                testEstimatedValues = kdtlearner.query(curTestData[:, :-1])
                testcorrelation = np.corrcoef(testEstimatedValues.T, curTestData[:, -1].T)
                curCorrelation = testcorrelation[0, 1]

                fid.write("corr coef = %.4f\n" % (curCorrelation))
                print("corr coef = %.4f" % (curCorrelation))

                if curCorrelation > bestCorrelation:
                    nextFeatureIndexToAdd = featureIndex
                    bestCorrelation = curCorrelation

                bestFeatureIndices.remove(featureIndex)

        if nextFeatureIndexToAdd >= 0:
            bestFeatureIndices.append(nextFeatureIndexToAdd)
        else:
            break

    fid.write("best feature set is " + str(bestFeatureIndices) + "\n")
    print("best feature set is " + str(bestFeatureIndices))
    fid.write("corr coef = %.4f" % (bestCorrelation) + "\n")
    print("corr coef = %.4f" % (bestCorrelation))
    fid.close()
Exemple #5
0
    ldfData = norObj.get_data( ldtTimestamps, lsSym, lsKeys )
    dData = dict(zip(lsKeys, ldfData))
    
    ''' Imported functions from qstkfeat.features, NOTE: last function is classification '''
    lfcFeatures = [ featMA, featRSI, class_fut_ret ]

    ''' Default Arguments '''
    #ldArgs = [{}] * len(lfcFeatures) 
    
    ''' Custom Arguments '''
    ldArgs = [ {'lLookback':30, 'bRel':True},\
               {},\
               {}]                    
    
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    ldfFeatures = ftu.applyFeatures( dData, lfcFeatures, ldArgs )
    
    
    bPlot = False
    if bPlot:
        ''' Plot feature for XOM '''
        for i, fcFunc in enumerate(lfcFeatures[:-1]):
            plt.clf()
            plt.subplot(211)
            plt.title( fcFunc.__name__ )
            plt.plot( dfPrice.index, dfPrice['XOM'].values, 'r-' )
            plt.subplot(212)
            plt.plot( dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-' )
            plt.show()
     
    ''' Pick Test and Training Points '''
Exemple #6
0
def find_best_feature(features):

#first for loop gets the first best feature.

	hmax = 0
	for i in range(0,9):
		 print 'testing feature set[',i,',9]'
                 args = []
                 features = []
                 global train_data
                 global test_data
                 features.append(copyfeatures[i])
                 features.append(copyfeatures[9])
                 args.append(copyargs[i])
                 args.append(copyargs[9])
                 ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args )
                 naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain)
                 naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest )
                 store_train_and_test(naFeatTrain,naFeatTest)
                 #print naFeatTrain,"NEXT"
                 #print naFeatTest
                 ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False )
                 ftu.normQuery( naFeatTest[:,:-1], ltWeights )
		
                 corr_of_this = learnerTest(naFeatTrain,naFeatTest)

		 print 'corr coef = ',corr_of_this[0][1]
		
                 if(corr_of_this[0][1] > hmax):
                        hmax = corr_of_this[0][1]
                        feature = copyfeatures[i]
                        best = i
        #print "best corr coef",hmax,'for',best

#now all combinations are checked for all other 8 possible ones.

 	allbest = []
	args = []
	features = []
	features.append(copyfeatures[best])
  	args.append(copyargs[best])
	h = 1
	allbest.append(best)
	for k in range(0,8):
		found = 0
        	for i in range(0,9):
		 	if(i not in allbest):
		 		global train_data
				global test_data
		 		features.append(copyfeatures[i])
				h = h+1
		 		features.append(copyfeatures[9])
		 		args.append(copyargs[i])
		 		args.append(copyargs[9])
			
				print 'testing feature set [',allbest,',',i,',9]'

                 		ldfFeatures_new = ftu.applyFeatures( dfPrice, dfVolume, features, args )
		 		naFeatTrain = ftu.stackSyms( ldfFeatures_new, dtStartTrain, dtEndTrain)
    		 		naFeatTest = ftu.stackSyms( ldfFeatures_new, dtStartTest, dtEndTest )
    		 		store_train_and_test(naFeatTrain,naFeatTest)
    		 		#print naFeatTrain,"NEXT"
    		 		#print naFeatTest
    		 		ltWeights = ftu.normFeatures( naFeatTrain, -1.0, 1.0, False )
    		 		ftu.normQuery( naFeatTest[:,:-1], ltWeights )
	
	                 	corr_of_this = learnerTest(naFeatTrain,naFeatTest)
                 		if(corr_of_this[0][1] > hmax):
					found = 1
                       			hmax = corr_of_this[0][1]
					argument = copyargs[i]
                       			feature = copyfeatures[i]
					args.remove(copyargs[i])
					features.remove(copyfeatures[i])
					args.remove(copyargs[9])
					features.remove(copyfeatures[9])
					best = i
				#allbest.append(best)
					print 'corr coef =', hmax
				#best = i + best
		 		else:
					h = h - 1
					print 'corr coef = ',corr_of_this[0][1]
					#args.remove(copyargs[i])
					args.remove(copyargs[9])
				
					#features.remove(copyfeatures[i])
					features.remove(copyfeatures[9])
		if found==1:
			allbest.append(best)
			args.append(argument)
			features.append(feature)
		else:
			break
			#for k in range(0,h):
				#print 'best is',features[k]
        print 'best feature set [',allbest,',9]'
	print "corr coef = ",hmax
	return
Exemple #7
0
    norObj = da.DataAccess('Norgate')
    ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, dt.timedelta(hours=16))

    lsKeys = ['open', 'high', 'low', 'close', 'volume']
    ldfData = norObj.get_data(ldtTimestamps, lsSym, lsKeys)
    dData = dict(zip(lsKeys, ldfData))
    ''' Imported functions from qstkfeat.features, NOTE: last function is classification '''
    lfcFeatures = [featMA, featRSI, class_fut_ret]
    ''' Default Arguments '''
    #ldArgs = [{}] * len(lfcFeatures)
    ''' Custom Arguments '''
    ldArgs = [ {'lLookback':30, 'bRel':True},\
               {},\
               {}]
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    ldfFeatures = ftu.applyFeatures(dData, lfcFeatures, ldArgs)

    bPlot = False
    if bPlot:
        ''' Plot feature for XOM '''
        for i, fcFunc in enumerate(lfcFeatures[:-1]):
            plt.clf()
            plt.subplot(211)
            plt.title(fcFunc.__name__)
            plt.plot(dfPrice.index, dfPrice['XOM'].values, 'r-')
            plt.subplot(212)
            plt.plot(dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-')
            plt.show()
    ''' Pick Test and Training Points '''
    lSplit = int(len(ldtTimestamps) * 0.7)
    dtStartTrain = ldtTimestamps[0]
    norObj = da.DataAccess('Norgate')
    ldtTimestamps = du.getNYSEdays(dtStart, dtEnd, dt.timedelta(hours=16))

    dfPrice = norObj.get_data(ldtTimestamps, lsSym, 'close')
    dfVolume = norObj.get_data(ldtTimestamps, lsSym, 'volume')
    ''' Imported functions from qstkfeat.features, NOTE: last function is classification '''
    lfcFeatures = [featMA, featRSI, classFutRet]
    ''' Default Arguments '''
    #ldArgs = [{}] * len(lfcFeatures)
    ''' Custom Arguments '''
    ldArgs = [ {'lLookback':30, 'bRel':True},\
               {},\
               {}]
    ''' Generate a list of DataFrames, one for each feature, with the same index/column structure as price data '''
    ldfFeatures = ftu.applyFeatures(dfPrice, dfVolume, lfcFeatures, ldArgs)

    bPlot = False
    if bPlot:
        ''' Plot feature for XOM '''
        for i, fcFunc in enumerate(lfcFeatures[:-1]):
            plt.clf()
            plt.subplot(211)
            plt.title(fcFunc.__name__)
            plt.plot(dfPrice.index, dfPrice['XOM'].values, 'r-')
            plt.subplot(212)
            plt.plot(dfPrice.index, ldfFeatures[i]['XOM'].values, 'g-')
            plt.show()
    ''' Pick Test and Training Points '''
    lSplit = int(len(ldtTimestamps) * 0.7)
    dtStartTrain = ldtTimestamps[0]
symbols = ['SINE_FAST']
dataobj = da.DataAccess('Yahoo')
dtStart = dt.datetime(2008,1,1)
dtEnd = dt.datetime(2008,12,31)
timestamps = du.getNYSEdays(dtStart, dtEnd, dt.timedelta(hours = 16) )

Keys = ['open', 'high', 'low', 'close', 'volume']
lldata = dataobj.get_data( timestamps, symbols, Keys )
dData = dict(zip(Keys, lldata))
Features = [featMA, featBollinger, featMomentum, class_fut_ret]
ldArgs = [ {'lLookback':20, 'bRel':True},\
{},\
{},\
{'i_lookforward':5}] 
llfeatures = featu.applyFeatures( dData, Features, ldArgs )
rLearner = RandomForestLearner.RandomForestLearner(_k = 15)
kLearner = KNNLearner.KNNLearner(3)
LinRegLearner = LinRegLearner.LinRegLearner()
Xtrain = np.zeros( ( np.size(llfeatures[1].values) - 20, 3) )
Ytrain = np.zeros( np.size(llfeatures[1].values) - 20)




def runKNNExperiment(ma,data=""):		
	for k in k_axis:

		learner_k = knnl.KNNLearner(k, method = "mean")
		learner_k.addEvidence(xtrain, ytrain)
		Yret_k = learner_k.query(xtest)