critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=1000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.3) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=300, random_process=random_process, gamma=.995, target_model_update=2e-3) agent.compile(Adam(lr=.005, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=555, visualize=False, verbose=0, nb_max_episode_steps=95) agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True) agent.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=95) env.graph("plots/test_plot_")
env=env, memory=(memory, episode_mem), critic_goal_input=goal_input, delta_clip=1.0, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=0.98, target_model_update=0.001, val_goal=args.val_goal, vectorized=args.vectorized) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) agent.fit(env, nb_steps=200000, folder_to_save=folder_to_save, visualize=False, verbose=1, reward_type=reward_type, nb_max_episode_steps=50, max_tau=12) agent.save_weights(os.path.join( folder_to_save, 'ddpg_{}_weights_{}.h5f'.format(ENV_NAME, reward_type)), overwrite=True) sys.exit() # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=50)