Ejemplo n.º 1
0
critic = Model(inputs=[action_input, observation_input], outputs=x)

memory = SequentialMemory(limit=1000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                          theta=.15,
                                          mu=0.,
                                          sigma=.3)
agent = DDPGAgent(nb_actions=nb_actions,
                  actor=actor,
                  critic=critic,
                  critic_action_input=action_input,
                  memory=memory,
                  nb_steps_warmup_critic=100,
                  nb_steps_warmup_actor=300,
                  random_process=random_process,
                  gamma=.995,
                  target_model_update=2e-3)
agent.compile(Adam(lr=.005, clipnorm=1.), metrics=['mae'])

agent.fit(env,
          nb_steps=555,
          visualize=False,
          verbose=0,
          nb_max_episode_steps=95)

agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"),
                   overwrite=True)

agent.test(env, nb_episodes=1, visualize=True, nb_max_episode_steps=95)

env.graph("plots/test_plot_")
Ejemplo n.º 2
0
                      env=env,
                      memory=(memory, episode_mem),
                      critic_goal_input=goal_input,
                      delta_clip=1.0,
                      nb_steps_warmup_critic=1000,
                      nb_steps_warmup_actor=1000,
                      random_process=random_process,
                      gamma=0.98,
                      target_model_update=0.001,
                      val_goal=args.val_goal,
                      vectorized=args.vectorized)

    agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

    agent.fit(env,
              nb_steps=200000,
              folder_to_save=folder_to_save,
              visualize=False,
              verbose=1,
              reward_type=reward_type,
              nb_max_episode_steps=50,
              max_tau=12)

    agent.save_weights(os.path.join(
        folder_to_save, 'ddpg_{}_weights_{}.h5f'.format(ENV_NAME,
                                                        reward_type)),
                       overwrite=True)
    sys.exit()
    # Finally, evaluate our algorithm for 5 episodes.
    agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=50)