def SampleLSTM(ticker: str): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) CreateFolder(dataFolder + 'samples') if prices.LoadHistory(): prices.NormalizePrices() daysInTarget = 15 daysInTraining = 200 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(daysInTarget) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples/LSTMsampleLearning', startDate, cuttoffDate, endDate) plot.PlotDataFrameDateRange(sampleData[['Average']], cuttoffDate, daysInTraining, 'Learn from this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMLearning') plot.PlotDataFrameDateRange( sampleData[['Average']], endDate, daysInTarget, 'Predict what happens after this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMTarget')
def SampleGraphs(ticker: str, daysInGraph: int): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.NormalizePrices() sampleData = prices.GetPriceHistory() d = sampleData.index[-1] for i in range( 0, 200, 10): #Add new days to the end for crystal ball predictions sampleDate = d - BDay(i) #pick business day to plot plot.PlotDataFrameDateRange( sampleData[['Open', 'High', 'Low', 'Close']], sampleDate, daysInGraph, 'Sample window ' + str(daysInGraph), 'Date', 'Price', dataFolder + 'samples/sample' + str(i) + '_' + str(daysInGraph))
def SampleCNN(ticker:str): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.NormalizePrices() window_size = 80 target_size = 10 daysInTraining = 800 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(window_size) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples/LSTMsampleLearning', startDate, cuttoffDate, endDate) for i in range(0,10): ii = i * window_size d1 = startDate + BDay(ii) d2 = d1 + BDay(target_size) print(d1, d2, window_size, target_size) plot.PlotDataFrameDateRange(sampleData[['Average']], d1, window_size, 'Sample image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Sample') plot.PlotDataFrameDateRange(sampleData[['Average']], d2, target_size, 'Target image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Target')
def PredictPrices(prices: PricingData, predictionMethod: int = 0, daysForward: int = 5, numberOfLearningPasses: int = 500): #Procedure to execute a given prediction method: linear projection, LSTM, CNN #Results are exported to the "experiment" sub folder, including a CSV file containing actual and predicted data, and graphs assert (0 <= predictionMethod <= 2) plot = PlotHelper() if predictionMethod == 0: #Linear projection print('Running Linear Projection model predicting ' + str(daysForward) + ' days...') modelDescription = prices.stockTicker + '_Linear_daysforward' + str( daysForward) predDF = prices.GetPriceHistory() predDF['Average'] = (predDF['Open'] + predDF['High'] + predDF['Low'] + predDF['Close']) / 4 d = predDF.index[-1] for i in range( 0, daysForward ): #Add new days to the end for crystal ball predictions predDF.loc[d + BDay(i + 1), 'Average_Predicted'] = 0 predDF['PastSlope'] = predDF['Average'].shift( daysForward) / predDF['Average'].shift(daysForward * 2) predDF['Average_Predicted'] = predDF['Average'].shift( daysForward) * predDF['PastSlope'] predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) else: SourceFieldList = ['High', 'Low', 'Open', 'Close'] if predictionMethod == 1: #LSTM learning print('Running LSTM model predicting ' + str(daysForward) + ' days...') SourceFieldList = None UseLSTM = True window_size = 10 modelDescription = prices.stockTicker + '_LSTM' + '_epochs' + str( numberOfLearningPasses) + '_histwin' + str( window_size) + '_daysforward' + str(daysForward) elif predictionMethod == 2: #CNN Learning print('Running CNN model predicting ' + str(daysForward) + ' days...') UseLSTM = False window_size = 16 * daysForward modelDescription = prices.stockTicker + '_CNN' + '_epochs' + str( numberOfLearningPasses) + '_histwin' + str( window_size) + '_daysforward' + str(daysForward) learningModule = StockPredictionNN(baseModelName=prices.stockTicker, UseLSTM=UseLSTM) learningModule.LoadSource(prices.GetPriceHistory(), FieldList=SourceFieldList, window_size=window_size) learningModule.LoadTarget(targetDF=None, prediction_target_days=daysForward) learningModule.MakeBatches(batch_size=32, train_test_split=.93) learningModule.Train(epochs=numberOfLearningPasses) learningModule.Predict(True) predDF = learningModule.GetTrainingResults(True, True) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) averageDeviation = predDF['PercentageDeviation'].tail( round(predDF.shape[0] / 4)).mean() #Average of the last 25% to account for training. print('Average deviation: ', averageDeviation * 100, '%') predDF = predDF.reindex(sorted(predDF.columns), axis=1) #Sort columns alphabetical predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, 'experiment/' + modelDescription) plot.PlotDataFrameDateRange(predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 1000, modelDescription + '_last1000ays', 'Date', 'Price', dataFolder + modelDescription + '_last1000Days')
def TrainTickerRaw(ticker: str = '^SPX', UseLSTM: bool = True, prediction_target_days: int = 5, epochs: int = 500, usePercentages: bool = False, hidden_layer_size: int = 512, dropout: bool = True, dropout_rate: float = 0.01, learning_rate: float = 2e-5): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange('1/1/2000', '3/1/2018') if usePercentages: prices.ConvertToPercentages( ) #Percentages don't work well I suspect because small errors have a huge impact when you revert back to the original prices and they roll forward else: prices.NormalizePrices() prices.CalculateStats() model = StockPredictionNN(baseModelName=ticker, UseLSTM=UseLSTM) if UseLSTM: window_size = 1 modelDescription = ticker + '_LSTM' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' FieldList = ['Average'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=128, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() #model.DisplayDataSample() else: #CNN window_size = 16 * prediction_target_days modelDescription = ticker + '_CNN' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' #FieldList = None FieldList = ['High', 'Low', 'Open', 'Close'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=64, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() if usePercentages: predDF = model.GetTrainingResults(True, True) predDF = predDF.loc[:, ['Average', 'Average_Predicted']] print('Unraveling percentages..') predDF['Average_Predicted'].fillna(0, inplace=True) predDF.iloc[0] = prices.CTPFactor['Average'] for i in range(1, predDF.shape[0]): predDF.iloc[i] = (1 + predDF.iloc[i]) * predDF.iloc[i - 1] print(predDF) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days') else: model.PredictionResultsSave(modelDescription, True, True) model.PredictionResultsPlot(modelDescription, True, False)