def build_agent(OD, OD_tensor, args): '''Build model''' build_func = get_build_func(OD_tensor, args) nb_regions = OD.shape[-1] nb_actions = get_nb_actions(args.action_mode, nb_regions) def get_prob_imitation(steps): if steps < args.prob_imitation_steps: p = (1 - 1 / (1 + np.exp( (-steps / args.prob_imitation_steps + 0.5) * 10)) ) * args.base_prob_imitation else: p = 0 return max(p, args.min_prob_imitation) def get_std_adapt(): if args.std_adapt_steps <= 0: return None def std_adapt(steps): if steps < args.std_adapt_steps: return 1 - 1 / (1 + np.exp( (-steps / args.std_adapt_steps + 0.5) * 10)) else: return 0 return std_adapt memory = SequentialMemory(limit=args.memory_limit, window_length=1) if args.action_noise == True: random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=args.rd_theta, mu=0, sigma=args.rd_sigma, dt=args.rd_dt) else: random_process = None if args.param_noise == True: param_noise = AdaptiveParamNoiseSpec( initial_stddev=args.init_std, desired_action_stddev=args.action_std, adoption_coefficient=args.adapt, min_action_std=args.min_action_std, std_adapt=get_std_adapt()) else: param_noise = None agent = DDPGAgent(nb_actions=nb_actions, build_func=build_func, nb_regions=nb_regions, start_step=args.start_step, memory=memory, nb_steps_warmup_critic=args.warmup_steps, nb_steps_warmup_actor=args.warmup_steps, exp_policy=get_exp_policy(OD, args), batch_size=args.batch_size, param_noise=param_noise, get_prob_imitation=get_prob_imitation, train_interval=args.train_interval, random_process=random_process, gamma=args.decay, target_model_update=args.update, delta_clip=args.delta_clip) agent.compile(eval(args.optimizer)(lr=args.lr, clipnorm=1.), metrics=['mae']) return agent
actor.add(Dense(8)) actor.add(Activation('relu')) actor.add(Dense(nb_actions)) actor.add(Activation('sigmoid')) action_input = Input(shape=(nb_actions,), name='action_input') observation_input = Input(shape=(1,) + (11,), name='observation_input') flattened_observation = Flatten()(observation_input) x = Concatenate()([action_input, flattened_observation]) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(32)(x) x = Activation('relu')(x) x = Dense(1)(x) x = Activation('linear')(x) critic = Model(inputs=[action_input, observation_input], outputs=x) memory = SequentialMemory(limit=100000, window_length=1) random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1) agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=10, random_process=random_process, gamma=.995, target_model_update=1e-3) agent.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae']) agent.fit(env, nb_steps=10000, visualize=False, verbose=0, nb_max_episode_steps=95) #agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True) agent.test(env, nb_episodes=15, visualize=False, nb_max_episode_steps=95, plt="")
critic=critic, critic_action_input=action_input, tau_input=tau_input, env=env, memory=(memory, episode_mem), critic_goal_input=goal_input, delta_clip=1.0, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, random_process=random_process, gamma=0.98, target_model_update=0.001, val_goal=args.val_goal, vectorized=args.vectorized) agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) agent.fit(env, nb_steps=200000, folder_to_save=folder_to_save, visualize=False, verbose=1, reward_type=reward_type, nb_max_episode_steps=50, max_tau=12) agent.save_weights(os.path.join( folder_to_save, 'ddpg_{}_weights_{}.h5f'.format(ENV_NAME, reward_type)), overwrite=True) sys.exit()