actor.add(Dense(8)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('sigmoid')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + (11,), name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=10, random_process=random_process, gamma=.995, target_model_update=1e-3) agent.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=10000, visualize=False, verbose=0, nb_max_episode_steps=95) #agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True) agent.test(env, nb_episodes=15, visualize=False, nb_max_episode_steps=95, plt="")
env=env, memory=(memory, episode_mem), critic_goal_input=goal_input, delta_clip=1.0, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=0.98, target_model_update=0.001, val_goal=args.val_goal, vectorized=args.vectorized) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) agent.fit(env, nb_steps=200000, folder_to_save=folder_to_save, visualize=False, verbose=1, reward_type=reward_type, nb_max_episode_steps=50, max_tau=12) agent.save_weights(os.path.join( folder_to_save, 'ddpg_{}_weights_{}.h5f'.format(ENV_NAME, reward_type)), overwrite=True) sys.exit() # Finally, evaluate our algorithm for 5 episodes. agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=50)