def training(self, trainingEnv, trainingParameters=[], verbose=False, rendering=False, plotTraining=False, showPerformance=False): """ GOAL: Train the trading strategy on a known trading environment (called training set) in order to tune the trading strategy parameters. However, there is no training required for a simple Sell and Hold strategy because the strategy does not involve any tunable parameter. INPUTS: - trainingEnv: Known trading environment (training set). - trainingParameters: Additional parameters associated with the training phase. None for the Sell and Hold strategy. - verbose: Enable the printing of a training feedback. None for the Sell and Hold strategy. - rendering: Enable the trading environment rendering. - plotTraining: Enable the plotting of the training results. None for the Sell and Hold strategy. - showPerformance: Enable the printing of a table summarizing the trading strategy performance. OUTPUTS: - trainingEnv: Trading environment backtested. """ # Execution of the trading strategy on the trading environment trainingEnv.reset() done = 0 while done == 0: _, _, done, _ = trainingEnv.step( self.chooseAction(trainingEnv.state)) # If required, print a feedback about the training if verbose: print( "No training is required as the simple Sell and Hold trading strategy does not involve any tunable parameters." ) # If required, render the trading environment backtested if rendering: trainingEnv.render() # If required, plot the training results if plotTraining: print( "No training results are available as the simple Sell and Hold trading strategy does not involve any tunable parameters." ) # If required, print the strategy performance in a table if showPerformance: analyser = PerformanceEstimator(trainingEnv.data) analyser.displayPerformance('S&H') # Return the trading environment backtested (training set) return trainingEnv
def testing(self, trainingEnv, testingEnv, rendering=False, showPerformance=False): """ GOAL: Test the RL agent trading policy on a new trading environment in order to assess the trading strategy performance. INPUTS: - trainingEnv: Training RL environment (known). - testingEnv: Unknown trading RL environment. - rendering: Enable the trading environment rendering. - showPerformance: Enable the printing of a table summarizing the trading strategy performance. OUTPUTS: - testingEnv: Trading environment backtested. """ # Apply data augmentation techniques to process the testing set dataAugmentation = DataAugmentation() testingEnvSmoothed = dataAugmentation.lowPassFilter(testingEnv, filterOrder) trainingEnv = dataAugmentation.lowPassFilter(trainingEnv, filterOrder) # Initialization of some RL variables coefficients = self.getNormalizationCoefficients(trainingEnv) state = self.processState(testingEnvSmoothed.reset(), coefficients) testingEnv.reset() QValues0 = [] QValues1 = [] done = 0 # Interact with the environment until the episode termination while done == 0: # Choose an action according to the RL policy and the current RL state action, _, QValues = self.chooseAction(state) # Interact with the environment with the chosen action nextState, _, done, _ = testingEnvSmoothed.step(action) testingEnv.step(action) # Update the new state state = self.processState(nextState, coefficients) # Storing of the Q values QValues0.append(QValues[0]) QValues1.append(QValues[1]) # If required, show the rendering of the trading environment if rendering: testingEnv.render() self.plotQValues(QValues0, QValues1, testingEnv.marketSymbol) # If required, print the strategy performance in a table if showPerformance: analyser = PerformanceEstimator(testingEnv.data) analyser.displayPerformance('TDQN') return testingEnv
def testing(self, trainingEnv, testingEnv, rendering=False, showPerformance=False): """ GOAL: Test the trading strategy on another unknown trading environment (called testing set) in order to evaluate the trading strategy performance. INPUTS: - testingEnv: Unknown trading environment (testing set). - trainingEnv: Known trading environment (training set). - rendering: Enable the trading environment rendering. - showPerformance: Enable the printing of a table summarizing the trading strategy performance. OUTPUTS: - testingEnv: Trading environment backtested. """ # Execution of the trading strategy on the trading environment testingEnv.reset() done = 0 while done == 0: _, _, done, _ = testingEnv.step(self.chooseAction( testingEnv.state)) # If required, render the trading environment backtested if rendering: testingEnv.render() # If required, print the strategy performance in a table if showPerformance: analyser = PerformanceEstimator(testingEnv.data) analyser.displayPerformance('MAMR') # Return the trading environment backtested (testing set) return testingEnv
def evaluateStock(self, stockName, startingDate=startingDate, endingDate=endingDate, splitingDate=splitingDate, observationSpace=observationSpace, actionSpace=actionSpace, money=money, stateLength=stateLength, transactionCosts=transactionCosts, bounds=bounds, step=step, numberOfEpisodes=numberOfEpisodes, verbose=False, plotTraining=False, rendering=False, showPerformance=False, saveStrategy=False): """ GOAL: Simulate and compare the performance achieved by all the supported trading strategies on a certain stock of the testbench. INPUTS: - stockName: Name of the stock (in the testbench). - startingDate: Beginning of the trading horizon. - endingDate: Ending of the trading horizon. - splitingDate: Spliting date between the training dataset and the testing dataset. - money: Initial capital at the disposal of the agent. - stateLength: Length of the trading agent state. - transactionCosts: Additional costs incurred while trading (e.g. 0.01 <=> 1% of transaction costs). - bounds: Bounds of the parameter search space (training). - step: Step of the parameter search space (training). - numberOfEpisodes: Number of epsiodes of the RL training phase. - verbose: Enable the printing of a simulation feedback. - plotTraining: Enable the plotting of the training results. - rendering: Enable the rendering of the trading environment. - showPerformance: Enable the printing of a table summarizing the trading strategy performance. - saveStrategy: Enable the saving of the trading strategy. OUTPUTS: - performanceTable: Table summarizing the performance of a trading strategy. """ # Initialization of some variables performanceTable = [["Profit & Loss (P&L)"], ["Annualized Return"], ["Annualized Volatility"], ["Sharpe Ratio"], ["Sortino Ratio"], ["Maximum DrawDown"], ["Maximum DrawDown Duration"], ["Profitability"], ["Ratio Average Profit/Loss"], ["Skewness"]] headers = ["Performance Indicator"] # Loop through all the trading strategies supported (progress bar) print("Trading strategies evaluation progression:") for strategy in tqdm(itertools.chain(strategies, strategiesAI)): # Simulation of the current trading strategy on the stock try: # Simulate an already existing trading strategy on the stock _, _, testingEnv = self.simulateExistingStrategy( strategy, stockName, startingDate, endingDate, splitingDate, observationSpace, actionSpace, money, stateLength, transactionCosts, rendering, showPerformance) except SystemError: # Simulate a new trading strategy on the stock _, _, testingEnv = self.simulateNewStrategy( strategy, stockName, startingDate, endingDate, splitingDate, observationSpace, actionSpace, money, stateLength, transactionCosts, bounds, step, numberOfEpisodes, verbose, plotTraining, rendering, showPerformance, saveStrategy) # Retrieve the trading performance associated with the trading strategy analyser = PerformanceEstimator(testingEnv.data) performance = analyser.computePerformance() # Get the required format for the display of the performance table headers.append(strategy) for i in range(len(performanceTable)): performanceTable[i].append(performance[i][1]) # Display the performance table tabulation = tabulate(performanceTable, headers, tablefmt="fancy_grid", stralign="center") print(tabulation) return performanceTable
def plotExpectedPerformance(self, trainingEnv, trainingParameters=[], iterations=10): """ GOAL: Plot the expected performance of the intelligent DRL trading agent. INPUTS: - trainingEnv: Training RL environment (known). - trainingParameters: Additional parameters associated with the training phase (e.g. the number of episodes). - iterations: Number of training/testing iterations to compute the expected performance. OUTPUTS: - trainingEnv: Training RL environment. """ # Preprocessing of the training set dataAugmentation = DataAugmentation() trainingEnvList = dataAugmentation.generate(trainingEnv) # Save the initial Deep Neural Network weights initialWeights = copy.deepcopy(self.policyNetwork.state_dict()) # Initialization of some variables tracking both training and testing performances performanceTrain = np.zeros((trainingParameters[0], iterations)) performanceTest = np.zeros((trainingParameters[0], iterations)) # Initialization of the testing trading environment marketSymbol = trainingEnv.marketSymbol startingDate = trainingEnv.endingDate endingDate = '2020-1-1' money = trainingEnv.data['Money'][0] stateLength = trainingEnv.stateLength transactionCosts = trainingEnv.transactionCosts testingEnv = TradingEnv(marketSymbol, startingDate, endingDate, money, stateLength, transactionCosts) # Print the hardware selected for the training of the Deep Neural Network (either CPU or GPU) print("Hardware selected for training: " + str(self.device)) try: # Apply the training/testing procedure for the number of iterations specified for iteration in range(iterations): # Print the progression print(''.join(["Expected performance evaluation progression: ", str(iteration+1), "/", str(iterations)])) # Training phase for the number of episodes specified as parameter for episode in tqdm(range(trainingParameters[0])): # For each episode, train on the entire set of training environments for i in range(len(trainingEnvList)): # Set the initial RL variables coefficients = self.getNormalizationCoefficients(trainingEnvList[i]) trainingEnvList[i].reset() startingPoint = random.randrange(len(trainingEnvList[i].data.index)) trainingEnvList[i].setStartingPoint(startingPoint) state = self.processState(trainingEnvList[i].state, coefficients) previousAction = 0 done = 0 stepsCounter = 0 # Interact with the training environment until termination while done == 0: # Choose an action according to the RL policy and the current RL state action, _, _ = self.chooseActionEpsilonGreedy(state, previousAction) # Interact with the environment with the chosen action nextState, reward, done, info = trainingEnvList[i].step(action) # Process the RL variables retrieved and insert this new experience into the Experience Replay memory reward = self.processReward(reward) nextState = self.processState(nextState, coefficients) self.replayMemory.push(state, action, reward, nextState, done) # Trick for better exploration otherAction = int(not bool(action)) otherReward = self.processReward(info['Reward']) otherDone = info['Done'] otherNextState = self.processState(info['State'], coefficients) self.replayMemory.push(state, otherAction, otherReward, otherNextState, otherDone) # Execute the DQN learning procedure stepsCounter += 1 if stepsCounter == learningUpdatePeriod: self.learning() stepsCounter = 0 # Update the RL state state = nextState previousAction = action # Compute both training and testing current performances trainingEnv = self.testing(trainingEnv, trainingEnv) analyser = PerformanceEstimator(trainingEnv.data) performanceTrain[episode][iteration] = analyser.computeSharpeRatio() self.writer.add_scalar('Training performance (Sharpe Ratio)', performanceTrain[episode][iteration], episode) testingEnv = self.testing(trainingEnv, testingEnv) analyser = PerformanceEstimator(testingEnv.data) performanceTest[episode][iteration] = analyser.computeSharpeRatio() self.writer.add_scalar('Testing performance (Sharpe Ratio)', performanceTest[episode][iteration], episode) # Restore the initial state of the intelligent RL agent if iteration < (iterations-1): trainingEnv.reset() testingEnv.reset() self.policyNetwork.load_state_dict(initialWeights) self.targetNetwork.load_state_dict(initialWeights) self.optimizer = optim.Adam(self.policyNetwork.parameters(), lr=learningRate, weight_decay=L2Factor) self.replayMemory.reset() self.iterations = 0 stepsCounter = 0 iteration += 1 except KeyboardInterrupt: print() print("WARNING: Expected performance evaluation prematurely interrupted...") print() self.policyNetwork.eval() # Compute the expected performance of the intelligent DRL trading agent expectedPerformanceTrain = [] expectedPerformanceTest = [] stdPerformanceTrain = [] stdPerformanceTest = [] for episode in range(trainingParameters[0]): expectedPerformanceTrain.append(np.mean(performanceTrain[episode][:iteration])) expectedPerformanceTest.append(np.mean(performanceTest[episode][:iteration])) stdPerformanceTrain.append(np.std(performanceTrain[episode][:iteration])) stdPerformanceTest.append(np.std(performanceTest[episode][:iteration])) expectedPerformanceTrain = np.array(expectedPerformanceTrain) expectedPerformanceTest = np.array(expectedPerformanceTest) stdPerformanceTrain = np.array(stdPerformanceTrain) stdPerformanceTest = np.array(stdPerformanceTest) # Plot each training/testing iteration performance of the intelligent DRL trading agent for i in range(iteration): fig = plt.figure() ax = fig.add_subplot(111, ylabel='Performance (Sharpe Ratio)', xlabel='Episode') ax.plot([performanceTrain[e][i] for e in range(trainingParameters[0])]) ax.plot([performanceTest[e][i] for e in range(trainingParameters[0])]) ax.legend(["Training", "Testing"]) plt.savefig(''.join(['Figures/', str(marketSymbol), '_TrainingTestingPerformance', str(i+1), '.png'])) #plt.show() # Plot the expected performance of the intelligent DRL trading agent fig = plt.figure() ax = fig.add_subplot(111, ylabel='Performance (Sharpe Ratio)', xlabel='Episode') ax.plot(expectedPerformanceTrain) ax.plot(expectedPerformanceTest) ax.fill_between(range(len(expectedPerformanceTrain)), expectedPerformanceTrain-stdPerformanceTrain, expectedPerformanceTrain+stdPerformanceTrain, alpha=0.25) ax.fill_between(range(len(expectedPerformanceTest)), expectedPerformanceTest-stdPerformanceTest, expectedPerformanceTest+stdPerformanceTest, alpha=0.25) ax.legend(["Training", "Testing"]) plt.savefig(''.join(['Figures/', str(marketSymbol), '_TrainingTestingExpectedPerformance', '.png'])) #plt.show() # Closing of the tensorboard writer self.writer.close() return trainingEnv
def training(self, trainingEnv, trainingParameters=[], verbose=False, rendering=False, plotTraining=False, showPerformance=False): """ GOAL: Train the RL trading agent by interacting with its trading environment. INPUTS: - trainingEnv: Training RL environment (known). - trainingParameters: Additional parameters associated with the training phase (e.g. the number of episodes). - verbose: Enable the printing of a training feedback. - rendering: Enable the training environment rendering. - plotTraining: Enable the plotting of the training results. - showPerformance: Enable the printing of a table summarizing the trading strategy performance. OUTPUTS: - trainingEnv: Training RL environment. """ """ # Compute and plot the expected performance of the trading policy trainingEnv = self.plotExpectedPerformance(trainingEnv, trainingParameters, iterations=50) return trainingEnv """ # Apply data augmentation techniques to improve the training set dataAugmentation = DataAugmentation() trainingEnvList = dataAugmentation.generate(trainingEnv) # Initialization of some variables tracking the training and testing performances if plotTraining: # Training performance performanceTrain = [] score = np.zeros((len(trainingEnvList), trainingParameters[0])) # Testing performance marketSymbol = trainingEnv.marketSymbol startingDate = trainingEnv.endingDate endingDate = '2020-1-1' money = trainingEnv.data['Money'][0] stateLength = trainingEnv.stateLength transactionCosts = trainingEnv.transactionCosts testingEnv = TradingEnv(marketSymbol, startingDate, endingDate, money, stateLength, transactionCosts) performanceTest = [] try: # If required, print the training progression if verbose: print("Training progression (hardware selected => " + str(self.device) + "):") # Training phase for the number of episodes specified as parameter for episode in tqdm(range(trainingParameters[0]), disable=not(verbose)): # For each episode, train on the entire set of training environments for i in range(len(trainingEnvList)): # Set the initial RL variables coefficients = self.getNormalizationCoefficients(trainingEnvList[i]) trainingEnvList[i].reset() startingPoint = random.randrange(len(trainingEnvList[i].data.index)) trainingEnvList[i].setStartingPoint(startingPoint) state = self.processState(trainingEnvList[i].state, coefficients) previousAction = 0 done = 0 stepsCounter = 0 # Set the performance tracking veriables if plotTraining: totalReward = 0 # Interact with the training environment until termination while done == 0: # Choose an action according to the RL policy and the current RL state action, _, _ = self.chooseActionEpsilonGreedy(state, previousAction) # Interact with the environment with the chosen action nextState, reward, done, info = trainingEnvList[i].step(action) # Process the RL variables retrieved and insert this new experience into the Experience Replay memory reward = self.processReward(reward) nextState = self.processState(nextState, coefficients) self.replayMemory.push(state, action, reward, nextState, done) # Trick for better exploration otherAction = int(not bool(action)) otherReward = self.processReward(info['Reward']) otherNextState = self.processState(info['State'], coefficients) otherDone = info['Done'] self.replayMemory.push(state, otherAction, otherReward, otherNextState, otherDone) # Execute the DQN learning procedure stepsCounter += 1 if stepsCounter == learningUpdatePeriod: self.learning() stepsCounter = 0 # Update the RL state state = nextState previousAction = action # Continuous tracking of the training performance if plotTraining: totalReward += reward # Store the current training results if plotTraining: score[i][episode] = totalReward # Compute the current performance on both the training and testing sets if plotTraining: # Training set performance trainingEnv = self.testing(trainingEnv, trainingEnv) analyser = PerformanceEstimator(trainingEnv.data) performance = analyser.computeSharpeRatio() performanceTrain.append(performance) self.writer.add_scalar('Training performance (Sharpe Ratio)', performance, episode) trainingEnv.reset() # Testing set performance testingEnv = self.testing(trainingEnv, testingEnv) analyser = PerformanceEstimator(testingEnv.data) performance = analyser.computeSharpeRatio() performanceTest.append(performance) self.writer.add_scalar('Testing performance (Sharpe Ratio)', performance, episode) testingEnv.reset() except KeyboardInterrupt: print() print("WARNING: Training prematurely interrupted...") print() self.policyNetwork.eval() # Assess the algorithm performance on the training trading environment trainingEnv = self.testing(trainingEnv, trainingEnv) # If required, show the rendering of the trading environment if rendering: trainingEnv.render() # If required, plot the training results if plotTraining: fig = plt.figure() ax = fig.add_subplot(111, ylabel='Performance (Sharpe Ratio)', xlabel='Episode') ax.plot(performanceTrain) ax.plot(performanceTest) ax.legend(["Training", "Testing"]) plt.savefig(''.join(['Figures/', str(marketSymbol), '_TrainingTestingPerformance', '.png'])) #plt.show() for i in range(len(trainingEnvList)): self.plotTraining(score[i][:episode], marketSymbol) # If required, print the strategy performance in a table if showPerformance: analyser = PerformanceEstimator(trainingEnv.data) analyser.displayPerformance('TDQN') # Closing of the tensorboard writer self.writer.close() return trainingEnv
def training(self, trainingEnv, trainingParameters=[], verbose=False, rendering=False, plotTraining=False, showPerformance=False): """ GOAL: Train the trading strategy on a known trading environment (called training set) in order to tune the trading strategy parameters, by simulating many combinations of parameters. INPUTS: - trainingEnv: Known trading environment (training set). - trainingParameters: Additional parameters associated with the training phase simulations. - verbose: Enable the printing of a training feedback. - rendering: Enable the trading environment rendering. - plotTraining: Enable the plotting of the training results. - showPerformance: Enable the printing of a table summarizing the trading strategy performance. OUTPUTS: - trainingEnv: Trading environment associated with the best trading strategy parameters backtested. """ # Compute the dimension of the parameter search space bounds = trainingParameters[0] step = trainingParameters[1] dimension = math.ceil((bounds[1] - bounds[0]) / step) # Initialize some variables required for the simulations trainingEnv.reset() results = np.zeros((dimension, dimension)) bestShort = 0 bestLong = 0 bestPerformance = -100 i = 0 j = 0 count = 1 # If required, compute the number of simulation iterations if verbose: iterations = dimension - 1 length = 0 while iterations > 0: length += iterations iterations -= 1 # Loop through all the parameters combinations included in the parameter search space for shorter in range(bounds[0], bounds[1], step): for longer in range(bounds[0], bounds[1], step): # Obvious restriction on the parameters if (shorter < longer): # If required, print the progression of the training if (verbose): print("".join([ "Training progression: ", str(count), "/", str(length) ]), end='\r', flush=True) # Apply the trading strategy with the current combination of parameters self.setParameters([shorter, longer]) done = 0 while done == 0: _, _, done, _ = trainingEnv.step( self.chooseAction(trainingEnv.state)) # Retrieve the performance associated with this simulation (Sharpe Ratio) performanceAnalysis = PerformanceEstimator( trainingEnv.data) performance = performanceAnalysis.computeSharpeRatio() results[i][j] = performance # Track the best performance and parameters if (performance > bestPerformance): bestShort = shorter bestLong = longer bestPerformance = performance # Reset of the trading environment trainingEnv.reset() count += 1 j += 1 i += 1 j = 0 # Execute once again the strategy associated with the best parameters simulated trainingEnv.reset() self.setParameters([bestShort, bestLong]) done = 0 while done == 0: _, _, done, _ = trainingEnv.step( self.chooseAction(trainingEnv.state)) # If required, render the trading environment backtested if rendering: trainingEnv.render() # If required, plot the training results if plotTraining: self.plotTraining(results, bounds, step, trainingEnv.marketSymbol) # If required, print the strategy performance in a table if showPerformance: analyser = PerformanceEstimator(trainingEnv.data) analyser.displayPerformance('MAMR') # Return the trading environment backtested (training set) return trainingEnv