def run_dqn(name_asset, n_features, n_neurons, n_episodes, batch_size, random_action_decay, future_reward_importance): # returns a list of stocks closing price df = pd.read_csv(INPUT_CSV_TEMPLATE % name_asset) data = df['Close'].astype( float).tolist() #https://www.kaggle.com/camnugent/sandp500 l = len(data) - 1 print( f'Running {n_episodes} episodes, on {name_asset} (has {l} rows), features={n_features}, ' f'batch={batch_size}, random_action_decay={random_action_decay}') dqn = Dqn() profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, num_trains, eps = \ dqn.learn(data, n_episodes, n_features, batch_size, USE_EXISTING_MODEL, RANDOM_ACTION_MIN, random_action_decay, n_neurons, future_reward_importance) print(f'Learning completed. Backtest the model {model_name} on any stock') print('python backtest.py ') print(f'see plot of profit_vs_episode = {profit_vs_episode[:10]}') plot_barchart(profit_vs_episode, "episode vs profit", "episode vs profit", "total profit", "episode", 'green') print(f'see plot of trades_vs_episode = {trades_vs_episode[:10]}') plot_barchart(trades_vs_episode, "episode vs trades", "episode vs trades", "total trades", "episode", 'blue') text = f'{name_asset} ({l}), features={n_features}, nn={n_neurons},batch={batch_size}, ' \ f'epi={n_episodes}({num_trains}), eps={np.round(eps, 1)}({np.round(random_action_decay, 5)})' print(f'see plot of epsilon_vs_episode = {epsilon_vs_episode[:10]}') plot_barchart(epsilon_vs_episode, "episode vs epsilon", "episode vs epsilon", "epsilon(probability of random action)", text, 'red') print(text)
future_reward_importance = 0.9500 # (float) 0-1 aka decay or discount rate, determines the importance of future # rewards.If=0 then agent will only learn to consider current rewards. if=1 it will make it strive for a long-term # high reward. # do not touch those params random_action_min = 0.0 # (float) 0-1 do not touch this use_existing_model = False # (bool) do not touch this data = getStockDataVec(stock_name) # https://www.kaggle.com/camnugent/sandp500 l = len(data) - 1 print( f'Running {episodes} episodes, on {stock_name} (has {l} rows), features={num_features}, batch={batch_size}, random_action_decay={random_action_decay}' ) dqn = Dqn() profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, num_trains, eps = \ dqn.learn(data, episodes, num_features, batch_size, use_existing_model, random_action_min, random_action_decay, num_neurons, future_reward_importance) print( f'i think i learned to trade. now u can backtest the model {model_name} on any stock' ) print('python backtest.py ') minutes = np.round((time.time() - start_time) / 60, 1) # minutes text = f'{stock_name} ({l}),t={minutes}, features={num_features}, nn={num_neurons},batch={batch_size}, epi={episodes}({num_trains}), eps={np.round(eps, 1)}({np.round(random_action_decay, 5)})' print(f'see plot of profit_vs_episode = {profit_vs_episode[:10]}') plot_barchart(profit_vs_episode, "episode vs profit", "episode vs profit", "total profit", "episode", 'green') print(f'see plot of trades_vs_episode = {trades_vs_episode[:10]}') plot_barchart(trades_vs_episode, "episode vs trades", "episode vs trades", "total trades", "episode", 'blue')
state.next_state = State(next_state) if (learn): nnet.addToMemory(state) # memorize the step transition values state = state.next_state episode_rewards += reward if done: RESULT_TOTALS.append(episode_rewards) break if (learn): nnet.learn(np.average(RESULT_TOTALS)) else: if (LEARN_SAVE): save(nnet) LEARN_SAVE = False plt.plot(RESULT_TOTALS) plt.grid(b=True, which='major', axis='y', color='r', linestyle='-', linewidth=.5) plt.show() print(RESULT_TOTALS)