# This method retrains the traders from scratch using training data from TRAINING and test data from TESTING EPISODES = 5 if __name__ == "__main__": # Create the training data and testing data # Hint: You can crop the training data with training_data.deepcopy_first_n_items(n) training_data = StockMarketData([Company.A, Company.B], [Period.TRAINING]) testing_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) #training_data = training_data.deepcopy_first_n_items(6000) # 12588 #print(f'training_data[Company.A].get_row_count(): {training_data[Company.A].get_row_count()}') # Create the stock exchange and one traders to train the net stock_exchange = stock_exchange.StockExchange(10000.0) training_trader = DeepQLearningTrader(ObscureExpert(Company.A), ObscureExpert(Company.B), False, True) # Save the final portfolio values per episode final_values_training, final_values_test = [], [] for i in range(EPISODES): logger.info(f"DQL Trader: Starting training episode {i}") # train the net stock_exchange.run(training_data, [training_trader]) training_trader.save_trained_model() final_values_training.append( stock_exchange.get_final_portfolio_value(training_trader))
final_portfolio_values = dict(train=[], test=[]) return_pct_per_day = dict(train=[], test=[]) traders = dict(train=[], test=[]) def get_last_portfolio_value(phase: str, run: int) -> float: index = min(run * EPISODES + EPISODES - 1, len(final_portfolio_values[phase]) - 1) return final_portfolio_values[phase][index] def get_last_v_score(phase: str, run: int) -> float: index = min(run * EPISODES + EPISODES - 1, len(return_pct_per_day[phase]) - 1) return return_pct_per_day[phase][index] for run in range(TRAINING_RUNS): # Create the stock exchange and one traders to train the net starting_cash = dict(train=10000.0, test=2000.0) stock_exchanges = dict(train=stock_exchange.StockExchange(starting_cash["train"]), test=stock_exchange.StockExchange(starting_cash["test"])) traders['train'].append(DeepQLearningTrader(ObscureExpert(Company.A), ObscureExpert(Company.B), False, True)) for episode in range(EPISODES): # logger.info(f"DQL Trader: Starting training episode {episode}") for phase in ['train', 'test']: if phase == 'test': testing_trader = DeepQLearningTrader(ObscureExpert(Company.A), ObscureExpert(Company.B), True, False) if episode == 0: traders[phase].append(testing_trader) else: traders[phase][-1] = testing_trader # replace testing trader from previous episode trader = traders[phase][-1] stock_exchanges[phase].run(data[phase], [trader])