# Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_start_steps=0) # After training is done, we save the final weights. agent.save_weights('ddpg_{}_weights.h5f'.format("test"), overwrite=True) observation = env.reset() # Obtain an initial observation of the environment while True: print observation action = agent.select_action([observation]) print action action = action.argmax() observation, reward, done, info = env.step(action) graph_stk, graph_holding, graph_liquidasset, graph_staticasset = env.graphing( ) if done: fig, axarr = plt.subplots(3, 1) fig.suptitle("DDPG Agent", fontsize=10) axarr[0].plot(graph_holding) axarr[0].set_title('Stocks held') axarr[1].plot(graph_stk) axarr[1].set_title('Stocks value') axarr[2].plot(graph_liquidasset, color='red') axarr[2].plot(graph_staticasset, color='blue') axarr[2].set_title('Comparision of stagnant and RL-bot asset value')
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, random_process=random_process, gamma=.99, target_model_update=1e-3) agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) agent.load_weights('ddpg_test_weights.h5f') observation = env.reset() # Obtain an initial observation of the environment while True: action = agent.select_action([observation]).argmax() observation, reward, done, info = env.step(action) graph_stk,graph_holding,graph_liquidasset,graph_staticasset = env.graphing() if done: fig, axarr = plt.subplots(3, 1) fig.suptitle("DDPG Agent", fontsize=10) axarr[0].plot(graph_holding) axarr[0].set_title('Stocks held') axarr[1].plot(graph_stk) axarr[1].set_title('Stocks value') axarr[2].plot(graph_liquidasset,color='red') axarr[2].plot(graph_staticasset,color='blue') axarr[2].set_title('Comparision of stagnant and RL-bot asset value') fig.tight_layout() fig.subplots_adjust(top=0.88) plt.savefig("training/BOT_4/train.png")