# agent.load_weights('sq_{}_weights.h5f'.format(ENV_NAME)) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. agent.load_weights('ddpg_fixed_weights_{}_weights.h5f'.format(ENV_NAME)) # env.is_train = True # agent.fit(env, nb_steps=50000, visualize=False, verbose=1, nb_max_episode_steps=20) # After training is done, we save the final weights. # agent.save_weights('ddpg_fixed_weights_{}_weights.h5f'.format(ENV_NAME), overwrite=True) # Finally, evaluate our algorithm for 5 episodes. env.is_train = False env.plot_row = 1 env.plot_col = 5 q_values = pd.DataFrame() st = status.reshape([-1, 1]) for action in actions: state1_batch_with_action = [ st, np.ones(st.shape).reshape(-1, 1, 1) * action ] q_values = pd.concat([ q_values, pd.DataFrame( agent.target_critic.predict_on_batch(state1_batch_with_action)) ], axis=1)
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and # even the metrics! memory = SequentialMemory(limit=50000, window_length=1) policy = BoltzmannQPolicy() dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=20, target_model_update=1e-2, policy=policy) dqn.compile(Adam(lr=1e-3), metrics=['mae']) # Okay, now it's time to learn something! We visualize the training here for show, but this # slows down training quite a lot. You can always safely abort the training prematurely using # Ctrl + C. env.is_train = True dqn.load_weights('dqn_{}_weights.h5f'.format(ENV_NAME)) dqn.fit(env, nb_steps=100000, visualize=False, verbose=2) # After training is done, we save the final weights. dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) with open('dqn_action.json', 'w') as fw: observation = status.tolist() action = [ float(actions[dqn.forward(np.array([obs]))]) for obs in observation ] json.dump({'observation': observation, 'action': action}, fw) state_batch = status.reshape([-1, 1, 1])