def DownloadAndGraphStocks(tickerList: list): for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(requestedEndDate=GetTodaysDate()): print('Calcualting stats ' + ticker) prices.NormalizePrices() prices.CalculateStats() prices.PredictPrices(2, 15) prices.NormalizePrices() #prices.SaveStatsToFile(includePredictions=True, verbose=True) psnap = prices.GetCurrentPriceSnapshot() titleStatistics = ' 5/15 dev: ' + str( round(psnap.fiveDayDeviation * 100, 2)) + '/' + str( round(psnap.fifteenDayDeviation * 100, 2)) + '% ' + str( psnap.low) + '/' + str( psnap.nextDayTarget) + '/' + str( psnap.high) + ' ' + str( psnap.snapShotDate)[:10] print('Graphing ' + ticker + ' ' + str(psnap.snapShotDate)[:10]) for days in [90, 180, 365, 2190, 4380]: prices.GraphData(endDate=None, daysToGraph=days, graphTitle=ticker + '_days' + str(days) + ' ' + titleStatistics, includePredictions=(days < 1000), saveToFile=True, fileNameSuffix=str(days).rjust(4, '0') + 'd', trimHistoricalPredictions=False)
def DownloadAndGraphStocks(tickerList: list): for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): print('Calcualting stats ' + ticker) prices.NormalizePrices() prices.CalculateStats() prices.PredictPrices(2, 15) prices.NormalizePrices() prices.SaveStatsToFile(True) psnap = prices.GetCurrentPriceSnapshot() titleStatistics = ' 5/15 dev: ' + str( round(psnap.fiveDayDeviation * 100, 2)) + '/' + str( round(psnap.fifteenDayDeviation * 100, 2)) + '% ' + str( psnap.low) + '/' + str( psnap.nextDayTarget) + '/' + str( psnap.high) + ' ' + str( psnap.snapshotDate)[:10] print('Graphing ' + ticker + ' ' + str(psnap.snapshotDate)[:10]) for days in [90, 180, 365, 2190, 4380]: prices.GraphData(None, days, ticker + '_days' + str(days) + ' ' + titleStatistics, (days < 1000), True, str(days).rjust(4, '0') + 'd', trimHistoricalPredictions=False)
def PlotPrediction(ticker: str = '^SPX', predictionMethod: int = 0, daysToGraph: int = 60, daysForward: int = 5, learnhingEpochs: int = 500): print('Plotting predictions for ' + ticker) prices = PricingData(ticker) if prices.LoadHistory(True): prices.NormalizePrices() prices.PredictPrices(predictionMethod, daysForward, learnhingEpochs) prices.NormalizePrices() prices.GraphData(None, daysToGraph, ticker + ' ' + str(daysToGraph) + 'days', True, True, str(daysToGraph) + 'days') prices.SaveStatsToFile(True)
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = pandas.DataFrame() startDate = str(datetime.datetime.now().date() + datetime.timedelta(days=-365)) endDate = str(datetime.datetime.now().date()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() result[ticker] = prices.GetPriceHistory(['Average']) result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close()
def SampleCNN(ticker: str): #Print sample CNN graphs of ticker, CNN will treat price data as picture and anticipate the next picture plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.NormalizePrices() window_size = 80 target_size = 10 daysInTraining = 800 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(window_size) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples\CNNsampleLearning', startDate, cuttoffDate, endDate) for i in range(0, 10): ii = i * window_size d1 = startDate + BDay(ii) d2 = d1 + BDay(target_size) print(d1, d2, window_size, target_size) plot.PlotDataFrameDateRange( sampleData[['Average']], d1, window_size, 'Sample image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Sample') plot.PlotDataFrameDateRange( sampleData[['Average']], d2, target_size, 'Target image ' + str(i), 'Date', 'Price', dataFolder + 'samples/CNN' + str(i) + 'Target')
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = None startDate = str(AddDays(GetTodaysDate(), -365)) endDate = str(GetTodaysDate()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(requestedEndDate=GetTodaysDate()): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() x = prices.GetPriceHistory(['Average']) x.rename(index=str, columns={"Average": ticker}, inplace=True) if result is None: result = x else: result = result.join(x, how='outer') result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close() print( 'Intended to create stability, in practice, this is a great way to pair well performing stocks with poor performing or volatile stocks.' )
def CalculatePriceCorrelation(tickerList: list): datafileName = 'data/_priceCorrelations.csv' summaryfileName = 'data/_priceCorrelationTop10.txt' result = None startDate = str(datetime.datetime.now().date() + datetime.timedelta(days=-365)) endDate = str(datetime.datetime.now().date()) for ticker in tickerList: prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange(startDate, endDate) prices.NormalizePrices() x = prices.GetPriceHistory(['Average']) x.rename(index=str, columns={"Average": ticker}, inplace=True) if result is None: result = x else: result = result.join(x, how='outer') result = result.corr() result.to_csv(datafileName) f = open(summaryfileName, 'w') for ticker in tickerList: topTen = result.nsmallest(10, ticker) print(topTen[ticker]) f.write(ticker + '\n') f.write(topTen[ticker].to_string(header=True, index=True) + '\n') f.write('\n') f.close()
def RunPredictions(ticker: str = '^SPX', numberOfLearningPasses: int = 750): #Runs three prediction models (Linear, LSTM, CCN) predicting a target price 4, 20, and 60 days in the future. prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/1950', '3/1/2018') prices.NormalizePrices() for ii in [4, 20, 60]: for i in range(0, 3): PredictPrices(prices, i, ii, numberOfLearningPasses)
def RunPredictions(ticker: str = '^SPX', numberOfLearningPasses: int = 750): prices = PricingData(ticker) CreateFolder(dataFolder) print('Loading ' + ticker) if prices.LoadHistory(): prices.TrimToDateRange('1/1/1950', '3/1/2018') prices.NormalizePrices() for ii in [4, 20, 60]: for i in range(0, 3): PredictPrices(prices, i, ii, numberOfLearningPasses)
def SampleGraphs(ticker:str, daysInGraph:int): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.NormalizePrices() sampleData = prices.GetPriceHistory() d = sampleData.index[-1] for i in range(0,200, 10): #Add new days to the end for crystal ball predictions sampleDate = d - BDay(i) #pick business day to plot plot.PlotDataFrameDateRange(sampleData[['Open','High', 'Low','Close']], sampleDate, daysInGraph, 'Sample window ' + str(daysInGraph), 'Date', 'Price', dataFolder + 'samples/sample' + str(i) + '_' + str(daysInGraph))
def SampleLSTM(ticker:str): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) CreateFolder(dataFolder + 'samples') if prices.LoadHistory(True): prices.NormalizePrices() daysInTarget = 15 daysInTraining = 200 sampleData = prices.GetPriceHistory() endDate = sampleData.index.max() cuttoffDate = endDate - BDay(daysInTarget) startDate = cuttoffDate - BDay(daysInTraining) print(dataFolder + 'samples/LSTMsampleLearning', startDate, cuttoffDate, endDate) plot.PlotDataFrameDateRange(sampleData[['Average']], cuttoffDate, daysInTraining, 'Learn from this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMLearning') plot.PlotDataFrameDateRange(sampleData[['Average']], endDate, daysInTarget, 'Predict what happens after this series of days', 'Date', 'Price', dataFolder + 'samples/LSTMTarget')
def TrainTickerRaw(ticker: str = '^SPX', UseLSTM: bool = True, prediction_target_days: int = 5, epochs: int = 500, usePercentages: bool = False, hidden_layer_size: int = 512, dropout: bool = True, dropout_rate: float = 0.01, learning_rate: float = 2e-5): plot = PlotHelper() prices = PricingData(ticker) print('Loading ' + ticker) if prices.LoadHistory(True): prices.TrimToDateRange('1/1/2000', '3/1/2018') if usePercentages: prices.ConvertToPercentages( ) #Percentages don't work well I suspect because small errors have a huge impact when you revert back to the original prices and they roll forward else: prices.NormalizePrices() prices.CalculateStats() model = StockPredictionNN(baseModelName=ticker, UseLSTM=UseLSTM) if UseLSTM: window_size = 1 modelDescription = ticker + '_LSTM' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' FieldList = ['Average'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=128, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() #model.DisplayDataSample() else: #CNN window_size = 16 * prediction_target_days modelDescription = ticker + '_CNN' modelDescription += '_epochs' + str(epochs) + '_histwin' + str( window_size) + '_daysforward' + str(prediction_target_days) if usePercentages: modelDescription += '_percentages' #FieldList = None FieldList = ['High', 'Low', 'Open', 'Close'] model.LoadSource(sourceDF=prices.GetPriceHistory(), FieldList=FieldList, window_size=window_size) model.LoadTarget(targetDF=None, prediction_target_days=prediction_target_days) model.MakeBatches(batch_size=64, train_test_split=.93) model.BuildModel(layer_count=1, hidden_layer_size=hidden_layer_size, dropout=dropout, dropout_rate=dropout_rate, learning_rate=learning_rate) model.DisplayModel() model.Train(epochs=epochs) model.Predict(True) model.Save() if usePercentages: predDF = model.GetTrainingResults(True, True) predDF = predDF.loc[:, ['Average', 'Average_Predicted']] print('Unraveling percentages..') predDF['Average_Predicted'].fillna(0, inplace=True) predDF.iloc[0] = prices.CTPFactor['Average'] for i in range(1, predDF.shape[0]): predDF.iloc[i] = (1 + predDF.iloc[i]) * predDF.iloc[i - 1] print(predDF) predDF['PercentageDeviation'] = abs( (predDF['Average'] - predDF['Average_Predicted']) / predDF['Average']) predDF.to_csv(dataFolder + modelDescription + '.csv') plot.PlotDataFrame(predDF[['Average', 'Average_Predicted']], modelDescription, 'Date', 'Price', True, dataFolder + modelDescription) plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 160, modelDescription + '_last160ays', 'Date', 'Price', dataFolder + modelDescription + '_last160Days') plot.PlotDataFrameDateRange( predDF[['Average', 'Average_Predicted']], None, 500, modelDescription + '_last500Days', 'Date', 'Price', dataFolder + modelDescription + '_last500Days') else: model.PredictionResultsSave(modelDescription, True, True) model.PredictionResultsPlot(modelDescription, True, False)