def CompareModels(modelOneName: str, modelOneFunction, modelTwoName: str, modelTwoFunction, ticker: str, startDate: str, durationInYears: int, portfolioSize: int = 10000): #Compare two models to measure the difference in returns m1 = RunModel(modelOneName, modelOneFunction, ticker, startDate, durationInYears, portfolioSize, saveHistoryToFile=False, returndailyValues=True, verbose=False) m2 = RunModel(modelTwoName, modelTwoFunction, ticker, startDate, durationInYears, portfolioSize, saveHistoryToFile=False, returndailyValues=True, verbose=False) if m1.shape[0] > 0 and m2.shape[0] > 0: m1 = m1.join(m2, lsuffix='_' + modelOneName, rsuffix='_' + modelTwoName) plot = PlotHelper() plot.PlotDataFrame(m1, ticker + ' Model Comparison', 'Date', 'Value')
def SampleCNN(ticker: str): #Print sample CNN graphs of ticker, CNN will treat price data as picture and anticipate the next picture plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.NormalizePrices() window_size = 80 target_size = 10 daysInTraining = 800 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(window_size) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples\CNNsampleLearning', startDate, cuttoffDate, endDate) for i in range(0, 10): ii = i * window_size d1 = startDate + BDay(ii) d2 = d1 + BDay(target_size) print(d1, d2, window_size, target_size) plot.PlotDataFrameDateRange( sampleData[['Average']], d1, window_size, 'Sample image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Sample') plot.PlotDataFrameDateRange( sampleData[['Average']], d2, target_size, 'Target image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Target')
def CreateAdditionalGraph(): plot = PlotHelper() for root, dirs, files in os.walk(dataFolder): for f in files: if f.endswith('.csv'): predDF = pandas.read_csv(os.path.join(root, f), index_col=0, parse_dates=True, na_values=['nan']) modelDescription = f[:len(f)-4] print(modelDescription) plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 500, modelDescription + '_last500ays', 'Date', 'Price', dataFolder + modelDescription + '_last500Days') plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 1000, modelDescription + '_last1000ays', 'Date', 'Price', dataFolder + modelDescription + '_last1000Days')
def SampleGraphs(ticker:str, daysInGraph:int): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.NormalizePrices() sampleData = prices.GetPriceHistory() d = sampleData.index[-1] for i in range(0,200, 10): #Add new days to the end for crystal ball predictions sampleDate = d - BDay(i) #pick business day to plot plot.PlotDataFrameDateRange(sampleData[['Open','High', 'Low','Close']], sampleDate, daysInGraph, 'Sample window ' + str(daysInGraph), 'Date', 'Price', dataFolder + 'samples/sample' + str(i) + '_' + str(daysInGraph))
def PlotModeldailyValue(ticker: str, startDate: str, durationInYears: int, modelName: str, modelFunction): m1 = modelFunction(ticker, startDate, durationInYears, 10000, verbose=False, saveHistoryToFile=True, returndailyValues=True) if m1.shape[0] > 0: plot = PlotHelper() plot.PlotDataFrame(m1, modelName + ' Daily Value (' + ticker + ')', 'Date', 'Value')
def PlotModeldailyValue(modelName: str, modelFunction, ticker: str, startDate: str, durationInYears: int): PortfolioSize = 10000 m1 = RunModel(modelName, modelFunction, ticker, startDate, durationInYears, PortfolioSize, saveHistoryToFile=True, returndailyValues=True, verbose=False) if m1.shape[0] > 0: plot = PlotHelper() plot.PlotDataFrame(m1, modelName + ' Daily Value (' + ticker + ')', 'Date', 'Value')
def SampleLSTM(ticker:str): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) CreateFolder(dataFolder + 'samples') if prices.LoadHistory(True): prices.NormalizePrices() daysInTarget = 15 daysInTraining = 200 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(daysInTarget) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples/LSTMsampleLearning', startDate, cuttoffDate, endDate) plot.PlotDataFrameDateRange(sampleData[['Average']], cuttoffDate, daysInTraining, 'Learn from this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMLearning') plot.PlotDataFrameDateRange(sampleData[['Average']], endDate, daysInTarget, 'Predict what happens after this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMTarget')
def PlotAnnualPerformance(ticker: str = '^SPX'): print('Annual performance rate for ' + ticker) prices = PricingData(ticker) if prices.LoadHistory(True): x = prices.GetPriceHistory(['Average']) yearly = x.groupby([(x.index.year)]).first() yearlyChange = yearly.pct_change(1) monthly = x.groupby([(x.index.year), (x.index.month)]).first() plot = PlotHelper() plot.PlotDataFrame(yearly, title='Yearly', adjustScale=False) plot.PlotDataFrame(monthly, title='Monthly', adjustScale=False) plot.PlotDataFrame(yearlyChange, title='Yearly Percentage Change', adjustScale=False) print('Average annual change from ', prices.historyStartDate, ' to ', prices.historyEndDate, ': ', yearlyChange.mean().values * 100, '%')
def TestPredictionModels(ticker: str = '^SPX', numberOfLearningPasses: int = 300): #Simple procedure to test different prediction methods 4,20,60 days in the future plot = PlotHelper() prices = PricingData(ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/2000', '3/1/2018') print('Loading ' + ticker) for daysForward in [4, 20, 60]: for predictionMethod in range(0, 5): modelDescription = ticker + '_method' + str( predictionMethod) + '_epochs' + str( numberOfLearningPasses) + '_daysforward' + str( daysForward) print('Predicting ' + str(daysForward) + ' days using method ' + modelDescription) prices.PredictPrices(predictionMethod, daysForward, numberOfLearningPasses) predDF = prices.pricePredictions.copy() predDF = predDF.join(prices.GetPriceHistory()) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['estAverage']) / predDF['Average']) averageDeviation = predDF['PercentageDeviation'].tail( round(predDF.shape[0] / 4)).mean( ) #Average of the last 25% to account for training. print('Average deviation: ', averageDeviation * 100, '%') predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['estAverage', 'Average']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'estAverage']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'estAverage']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days')
def CompareModels(ticker: str, startDate: str, durationInYears: int, modelOneName: str, modelTwoName: str, modelOne, modelTwo): m1 = modelOne(ticker, startDate, durationInYears, 10000, verbose=False, saveHistoryToFile=False, returndailyValues=True) m2 = modelTwo(ticker, startDate, durationInYears, 10000, verbose=False, saveHistoryToFile=False, returndailyValues=True) if m1.shape[0] > 0 and m2.shape[0] > 0: m1 = m1.join(m2, lsuffix='_' + modelOneName, rsuffix='_' + modelTwoName) plot = PlotHelper() plot.PlotDataFrame(m1, ticker + ' Model Comparison', 'Date', 'Value')
def PredictPrices(prices:PricingData, predictionMethod:int=0, daysForward:int = 4, numberOfLearningPasses:int = 500): #Simple procedure to test different prediction methods assert(0 <= predictionMethod <= 2) plot = PlotHelper() if predictionMethod ==0: #Linear projection print('Running Linear Projection model predicting ' + str(daysForward) + ' days...') modelDescription = prices.stockTicker + '_Linear_daysforward' + str(daysForward) predDF = prices.GetPriceHistory() predDF['Average'] = (predDF['Open'] + predDF['High'] + predDF['Low'] + predDF['Close'])/4 d = predDF.index[-1] for i in range(0,daysForward): #Add new days to the end for crystal ball predictions predDF.loc[d + BDay(i+1), 'Average_Predicted'] = 0 predDF['PastSlope'] = predDF['Average'].shift(daysForward) / predDF['Average'].shift(daysForward*2) predDF['Average_Predicted'] = predDF['Average'].shift(daysForward) * predDF['PastSlope'] predDF['PercentageDeviation'] = abs((predDF['Average']-predDF['Average_Predicted'])/predDF['Average']) else: learningModule = StockPredictionNN() SourceFieldList = ['High','Low','Open','Close'] if predictionMethod ==1: #LSTM learning print('Running LSTM model predicting ' + str(daysForward) + ' days...') window_size = 1 modelDescription = prices.stockTicker + '_LSTM' + '_epochs' + str(numberOfLearningPasses) + '_histwin' + str(window_size) + '_daysforward' + str(daysForward) learningModule.LoadData(prices.GetPriceHistory(), window_size=window_size, prediction_target_days=daysForward, UseLSTM=True, SourceFieldList=SourceFieldList, batch_size=10, train_test_split=.93) learningModule.TrainLSTM(epochs=numberOfLearningPasses, learning_rate=2e-5, dropout_rate=0.8, gradient_clip_margin=4) elif predictionMethod ==2: #CNN Learning print('Running CNN model predicting ' + str(daysForward) + ' days...') window_size = 16 * daysForward modelDescription = prices.stockTicker + '_CNN' + '_epochs' + str(numberOfLearningPasses) + '_histwin' + str(window_size) + '_daysforward' + str(daysForward) learningModule.LoadData(prices.GetPriceHistory(), window_size=window_size, prediction_target_days=daysForward, UseLSTM=False, SourceFieldList=SourceFieldList, batch_size=32, train_test_split=.93) learningModule.TrainCNN(epochs=numberOfLearningPasses) predDF = learningModule.GetTrainingResults(True, True) averageDeviation = predDF['PercentageDeviation'].tail(round(predDF.shape[0]/4)).mean() #Average of the last 25% to account for training. print('Average deviation: ', averageDeviation * 100, '%') predDF = predDF.reindex(sorted(predDF.columns), axis=1) #Sort columns alphabetical predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average','Average_Predicted']], modelDescription, 'Date', 'Price', True, 'experiment/' + modelDescription) plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange(predDF[['Average','Average_Predicted']], None, 1000, modelDescription + '_last1000ays', 'Date', 'Price', dataFolder + modelDescription + '_last1000Days')
def PredictPrices(prices: PricingData, predictionMethod: int = 0, daysForward: int = 5, numberOfLearningPasses: int = 500): #Procedure to execute a given prediction method: linear projection, LSTM, CNN #Results are exported to the "experiment" sub folder, including a CSV file containing actual and predicted data, and graphs assert (0 <= predictionMethod <= 2) plot = PlotHelper() if predictionMethod == 0: #Linear projection print('Running Linear Projection model predicting ' + str(daysForward) + ' days...') modelDescription = prices.stockTicker + '_Linear_daysforward' + str( daysForward) predDF = prices.GetPriceHistory() predDF['Average'] = (predDF['Open'] + predDF['High'] + predDF['Low'] + predDF['Close']) / 4 d = predDF.index[-1] for i in range( 0, daysForward ): #Add new days to the end for crystal ball predictions predDF.loc[d + BDay(i + 1), 'Average_Predicted'] = 0 predDF['PastSlope'] = predDF['Average'].shift( daysForward) / predDF['Average'].shift(daysForward * 2) predDF['Average_Predicted'] = predDF['Average'].shift( daysForward) * predDF['PastSlope'] predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) else: SourceFieldList = ['High', 'Low', 'Open', 'Close'] if predictionMethod == 1: #LSTM learning print('Running LSTM model predicting ' + str(daysForward) + ' days...') SourceFieldList = None UseLSTM = True window_size = 10 modelDescription = prices.stockTicker + '_LSTM' + '_epochs' + str( numberOfLearningPasses) + '_histwin' + str( window_size) + '_daysforward' + str(daysForward) elif predictionMethod == 2: #CNN Learning print('Running CNN model predicting ' + str(daysForward) + ' days...') UseLSTM = False window_size = 16 * daysForward modelDescription = prices.stockTicker + '_CNN' + '_epochs' + str( numberOfLearningPasses) + '_histwin' + str( window_size) + '_daysforward' + str(daysForward) learningModule = StockPredictionNN(baseModelName=prices.stockTicker, UseLSTM=UseLSTM) learningModule.LoadSource(prices.GetPriceHistory(), FieldList=SourceFieldList, window_size=window_size) learningModule.LoadTarget(targetDF=None, prediction_target_days=daysForward) learningModule.MakeBatches(batch_size=32, train_test_split=.93) learningModule.Train(epochs=numberOfLearningPasses) learningModule.Predict(True) predDF = learningModule.GetTrainingResults(True, True) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) averageDeviation = predDF['PercentageDeviation'].tail( round(predDF.shape[0] / 4)).mean() #Average of the last 25% to account for training. print('Average deviation: ', averageDeviation * 100, '%') predDF = predDF.reindex(sorted(predDF.columns), axis=1) #Sort columns alphabetical predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, 'experiment/' + modelDescription) plot.PlotDataFrameDateRange(predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 1000, modelDescription + '_last1000ays', 'Date', 'Price', dataFolder + modelDescription + '_last1000Days')
def TrainTickerRaw(ticker: str = '^SPX', UseLSTM: bool = True, prediction_target_days: int = 5, epochs: int = 500, usePercentages: bool = False, hidden_layer_size: int = 512, dropout: bool = True, dropout_rate: float = 0.01, learning_rate: float = 2e-5): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange('1/1/2000', '3/1/2018') if usePercentages: prices.ConvertToPercentages( ) #Percentages don't work well I suspect because small errors have a huge impact when you revert back to the original prices and they roll forward else: prices.NormalizePrices() prices.CalculateStats() model = StockPredictionNN(baseModelName=ticker, UseLSTM=UseLSTM) if UseLSTM: window_size = 1 modelDescription = ticker + '_LSTM' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' FieldList = ['Average'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=128, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() #model.DisplayDataSample() else: #CNN window_size = 16 * prediction_target_days modelDescription = ticker + '_CNN' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' #FieldList = None FieldList = ['High', 'Low', 'Open', 'Close'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=64, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() if usePercentages: predDF = model.GetTrainingResults(True, True) predDF = predDF.loc[:, ['Average', 'Average_Predicted']] print('Unraveling percentages..') predDF['Average_Predicted'].fillna(0, inplace=True) predDF.iloc[0] = prices.CTPFactor['Average'] for i in range(1, predDF.shape[0]): predDF.iloc[i] = (1 + predDF.iloc[i]) * predDF.iloc[i - 1] print(predDF) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days') else: model.PredictionResultsSave(modelDescription, True, True) model.PredictionResultsPlot(modelDescription, True, False)