def predict_trade(test_data, capital, window_size=10): total_profit = 0 closes = [] buys = [] sells = [] done = True trader = Trader(window_size, True) trader.portfolio = [] num_steps = len(test_data) - 1 batch_size = 32 state = state_normalize(test_data, 0, window_size + 1) for t in range(num_steps): action = trader.act(state) closes.append(test_data[t]) next_state = state_normalize(test_data, t + 1, window_size + 1) reward = 0 # buy if action == 1: if capital > test_data[t]: trader.portfolio.append(test_data[t]) buys.append(test_data[t]) sells.append(None) capital -= test_data[t] else: buys.append(None) sells.append(None) # sell elif action == 2: if len(trader.portfolio) > 0: bought_price = trader.portfolio.pop(0) reward = max(test_data[t] - bought_price, 0) total_profit += test_data[t] - bought_price buys.append(None) sells.append(test_data[t]) capital += test_data[t] else: buys.append(None) sells.append(None) elif action == 0: buys.append(None) sells.append(None) if t == num_steps - 1: done = True trader.history.push(state, action, next_state, reward) state = next_state plot_actions(closes, buys, sells, total_profit) return total_profit
def q_learning(training_data, capital, num_episode=100, window_size=10): profits = [] for i in range(10): profit, series = random_action(training_data, capital) profits.append(profit) mean_profit = np.mean(profits) while profit > mean_profit: profit, series = random_action(training_data, capital) trader = Trader(window_size) num_step = len(training_data) - 1 starttime = time.time() for e in range(num_episode + 1): cumulative_reward = [] done = False state = state_normalize(training_data, 0, window_size + 1) total_profit = 0 trader.portfolio = [] x_data = range(num_step) for t in range(num_step): action = trader.act(state) next_state = state_normalize(training_data, t + 1, window_size + 1) reward = 0 # buy action if action == 1: if capital > training_data[t]: trader.portfolio.append(training_data[t]) capital -= training_data[t] # sell action elif action == 2: if len(trader.portfolio) > 0: bought_price = trader.portfolio.pop(0) reward = max(training_data[t] - bought_price, 0) total_profit += training_data[t] - bought_price capital += training_data[t] if t == num_step - 1: done = True trader.history.push(state, action, next_state, reward) state = next_state cumulative_reward.append(total_profit) trader.optimize() if total_profit == 0: try: trader.predict_net = torch.load('predict_model_backup', map_location=device) trader.target_net = torch.load('target_model_backup', map_location=device) except: return 0 else: torch.save(trader.predict_net, "predict_model_backup") torch.save(trader.target_net, "target_model_backup") if e % 10 == 0: trader.target_net.load_state_dict(trader.predict_net.state_dict()) torch.save(trader.predict_net, "predict_model") torch.save(trader.target_net, "target_model") plot_comparison(cumulative_reward, series)