]) env = default.create( portfolio=portfolio, action_scheme=default.actions.ManagedRiskOrders( stop=[0.02, 0.05, 0.1], take=[0.02, 0.05, 0.1, 0.125, 0.15, 0.2]), reward_scheme=default.rewards.RiskAdjustedReturns(window_size=40), feed=feed, renderer_feed=renderer_feed, renderer=default.renderers.ScreenLogger(), window_size=40) # %% env.observer.feed.next() # %% agent = DQNAgent(env) agent.train(n_steps=200, n_episodes=10) # %% portfolio.ledger.as_frame() # %% portfolio.performance[0] # %% portfolio.performance[200] # %% portfolio.net_worth # %% plotly_renderer = default.renderers.PlotlyTradingChart() # %% plotly_renderer.render(env)
# which is running on the main thread # # if you see that the processes don't finish at the same time reduce n_envs, # this can cause the training to slow down because finished processes will wait for slower processes # if you have a strong enough cpu to handle more than one env per core than you can try increasing n_envs # # more processes don't always lead to dramatically better/faster results # you should treat n_envs as another hyperparameter reward, test_env = agent.train(n_envs=mp.cpu_count() - 1, n_steps=500, n_episodes=10, save_path=save_path, batch_size=1000, test_model=True, memory_capacity=5000) test_env.portfolio.performance.net_worth.plot() plt.show() else: from tensortrade.agents import DQNAgent env = create_env() agent = DQNAgent(env) reward = agent.train(n_steps=200, n_episodes=25, save_path=save_path) env.portfolio.performance.net_worth.plot() plt.show() print("P/L: {}".format(env.portfolio.profit_loss)) print("mean_reward: {}".format(reward))
height= 800, # affects both displayed and saved file height. None for 100% height. save_format="html", # save the chart to an HTML file auto_open_html=False, # open the saved HTML chart in a new browser tab ) file_logger = FileLogger( filename="example.log", # omit or None for automatic file name path= "training_logs", # create a new directory if doesn't exist, None for no directory ) renderer_feed = DataFeed([ Stream.source(price_history[c].tolist(), dtype="float").rename(c) for c in price_history ]) env = default.create( portfolio=portfolio, action_scheme="managed-risk", reward_scheme="risk-adjusted", feed=feed, window_size=20, renderer_feed=renderer_feed, renderer=[chart_renderer, file_logger, "screen-log"], ) agent = DQNAgent(env) # Set render_interval to None to render at episode ends only agent.train(n_episodes=2, n_steps=200, render_interval=10)
action_scheme="managed-risk", reward_scheme="risk-adjusted", feed=feed, renderer_feed=renderer_feed, renderer="screen-log", window_size=20) # %% env.observer.feed.next() # %% [markdown] # ## Setup and Train DQN Agent # %% from tensortrade.agents import DQNAgent agent = DQNAgent(env) reward = agent.train(n_steps=100, save_path="agents/", n_episodes=100) # %% # DQN-Model from stable_baselines.deepq.policies import MlpPolicy agent = DQN(MlpPolicy, env, verbose=1, tensorboard_log=os.path.join(currentdir, "tf_board_log", "DQN")) agent.learn(total_timesteps=25000) agent.save( save_path=os.path.join(currentdir, "agents", "DQN_MlpPolicy_02.zip")) # %% # PPO2-Model
nodes.append(Stream(list(data[name]), name)) data_feed = DataFeed([node_stream]) data_feed.next() exchange = Exchange("sim-exchange", service=execute_order)(Stream(list(data['BTC-USD_close']), "USD-BTC"), Stream(list(data['ETH-USD_close']), "USD-ETH"), Stream(list(data['LTC-USD_close']), "USD-LTC")) portfolio = Portfolio(base_instrument=USD, wallets=[ Wallet(exchange, 100000 * USD), Wallet(exchange, 0 * BTC), Wallet(exchange, 0 * LTC), Wallet(exchange, 0 * ETH) ]) env = TradingEnvironment(feed=data_feed, portfolio=portfolio, action_scheme='managed-risk', reward_scheme='risk-adjusted', window_size=20) agent = DQNAgent(env) agent.train(n_steps=300, n_episodes=500) portfolio.performance.net_worth.plot() portfolio.performance.plot()