def train(): # We train different models to predict different outputs. # 0 = 15th percentile, 1 = 25th percentile, 2 = 35th percentile, # 3 = 50th percentile, 4 = 65th percentile, 5 = 75th percentile, # 6 = 85th percentile outputToPredict = 0 # This represents if we are predicting a binary output (e.g. tomorrow's 50th # percentile > today's mean) or if we are predicting a percentage (e.g. # tomorrow's 50th percentile is 97% of today's mean) binary = BINARY_PREDICTION print("Loading dataset...") datasetLoader = DatasetLoader() # For our outputs, only put in the 15th percentile as the output. # Note: THIS IS ONLY USING THE TOP INDICATORS! data, labels = datasetLoader.load( shuffle=False, path="../../data_set/final-train-dataset.csv", onlyLabelToUse=outputToPredict, useOnlyBestIndicators=False, binary=binary) # Hyperparameters! # learningRate = 0.00005 learningRate = 0.0002 epochs = 200 batchSize = 64 dropout = 0.01 # Not currently in use: # decayRate = 0.03 # decayStep = 1.0 model = AttentionModel(tryUsingGPU=True, binary=binary) model.setup(Hyperparameters(learningRate, epochs, dropout, batchSize)) # If we want to predict all the percentiles at once: # model.createModel(OUTPUT_CHANNELS, generateGraph=False) # If we only want to predict one of the percentiles: model.createModel(generateGraph=False) # model.load() epochs, hist = model.trainModel(data, labels, 0.15) listOfMetricsToPlot = model.listOfMetrics model.plotCurve(epochs, hist) model.save() # Test the model on test data. testData, testLabels = datasetLoader.load( shuffle=False, path="../../data_set/final-test-dataset.csv", onlyLabelToUse=outputToPredict, useOnlyBestIndicators=False) model.evaluate(testData, testLabels) print(model.predict(testData[50:51, :, :])) print(testData.shape[0])
def testPerformance(): # We are going to need data from 15 days ago up to and including yesterday. # Make sure to download all the data using data_downloads/DownloadBinanceData.py! # now = datetime.now() now = datetime(year=2021, month=3, day=3, hour=23, minute=35) timezone = "Etc/GMT-0" timezone = pytz.timezone(timezone) now = timezone.localize(now) now = now.replace(hour=0, minute=0, second=0, microsecond=0) startDate = now - timedelta(days=193) listOfStocks = ["BTCUSDT"] historicalObtainer = HistoricalDataObtainer(startDate, now, "../data_downloads/") print("Reading historical stock data...") historicalObtainer.trackStocks(listOfStocks) model = CnnRnnMlpModel(tryUsingGPU=False) # Hyperparameters! learningRate = 0.003 epochs = 500 batchSize = 40 decayRate = 0.005 decayStep = 1.0 dropout = 0.1 model.setup( Hyperparameters(learningRate, epochs, dropout, batchSize, decayRate=decayRate, decayStep=decayStep)) model.createModel() model.load() inputData = prepareData(listOfStocks[0], historicalObtainer, startDate, now) inputData = np.array(inputData[-1]).T df = historicalObtainer.getHistoricalDataAsDataframe(listOfStocks[0]) startIndex = df.index[df["Timestamp"] == now - timedelta(days=15)].tolist() assert len(startIndex) != 0 startIndex = startIndex[0] endIndex = df.index[df["Timestamp"] == now].tolist() assert len(endIndex) != 0 endIndex = endIndex[0] meanPrice = df["Close"].iloc[startIndex:endIndex].mean() predictions = model.makePricePredictionForTommorrow(inputData, meanPrice) print("Predictions: [min, 25th percentile, median, 75th percentile, max]") print(predictions)
def testPerformance(): datasetLoader = DatasetLoader() trainData, trainLabels = datasetLoader.load(path="../data_set/final-train-dataset.csv", shuffle=False) testData, testLabels = datasetLoader.load(path="../data_set/final-test-dataset.csv", shuffle=False) model = CnnRnnMlpModel(tryUsingGPU=True) # Hyperparameters! learningRate = 0.003 epochs = 500 batchSize = 40 decayRate = 0.005 decayStep = 1.0 dropout = 0.1 model.setup(Hyperparameters(learningRate, epochs, dropout, batchSize, decayRate=decayRate, decayStep=decayStep)) model.createModel() model.load() predictions = model.predict(trainData, concatenate=True) reporter = PerformanceReporter() reporter.reportOnPoorPredictions(predictions, trainLabels, errorForPoor=0.01) print("The model's performance on the test set.") reporter.reportPerformanceOnDataset(model, testData, testLabels)