Esempio n. 1
0
def build_agent(OD, OD_tensor, args):
    '''Build model'''
    build_func = get_build_func(OD_tensor, args)
    nb_regions = OD.shape[-1]
    nb_actions = get_nb_actions(args.action_mode, nb_regions)

    def get_prob_imitation(steps):
        if steps < args.prob_imitation_steps:
            p = (1 - 1 / (1 + np.exp(
                (-steps / args.prob_imitation_steps + 0.5) * 10))
                 ) * args.base_prob_imitation
        else:
            p = 0

        return max(p, args.min_prob_imitation)

    def get_std_adapt():
        if args.std_adapt_steps <= 0:
            return None

        def std_adapt(steps):
            if steps < args.std_adapt_steps:
                return 1 - 1 / (1 + np.exp(
                    (-steps / args.std_adapt_steps + 0.5) * 10))
            else:
                return 0

        return std_adapt

    memory = SequentialMemory(limit=args.memory_limit, window_length=1)

    if args.action_noise == True:
        random_process = OrnsteinUhlenbeckProcess(size=nb_actions,
                                                  theta=args.rd_theta,
                                                  mu=0,
                                                  sigma=args.rd_sigma,
                                                  dt=args.rd_dt)
    else:
        random_process = None

    if args.param_noise == True:
        param_noise = AdaptiveParamNoiseSpec(
            initial_stddev=args.init_std,
            desired_action_stddev=args.action_std,
            adoption_coefficient=args.adapt,
            min_action_std=args.min_action_std,
            std_adapt=get_std_adapt())
    else:
        param_noise = None

    agent = DDPGAgent(nb_actions=nb_actions,
                      build_func=build_func,
                      nb_regions=nb_regions,
                      start_step=args.start_step,
                      memory=memory,
                      nb_steps_warmup_critic=args.warmup_steps,
                      nb_steps_warmup_actor=args.warmup_steps,
                      exp_policy=get_exp_policy(OD, args),
                      batch_size=args.batch_size,
                      param_noise=param_noise,
                      get_prob_imitation=get_prob_imitation,
                      train_interval=args.train_interval,
                      random_process=random_process,
                      gamma=args.decay,
                      target_model_update=args.update,
                      delta_clip=args.delta_clip)

    agent.compile(eval(args.optimizer)(lr=args.lr, clipnorm=1.),
                  metrics=['mae'])

    return agent
Esempio n. 2
0
actor.add(Dense(8))
actor.add(Activation('relu'))
actor.add(Dense(nb_actions))
actor.add(Activation('sigmoid'))


action_input = Input(shape=(nb_actions,), name='action_input')
observation_input = Input(shape=(1,) + (11,), name='observation_input')
flattened_observation = Flatten()(observation_input)
x = Concatenate()([action_input, flattened_observation])
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(32)(x)
x = Activation('relu')(x)
x = Dense(1)(x)
x = Activation('linear')(x)
critic = Model(inputs=[action_input, observation_input], outputs=x)

memory = SequentialMemory(limit=100000, window_length=1)
random_process = OrnsteinUhlenbeckProcess(size=nb_actions, theta=.15, mu=0., sigma=.1)

agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input,
                  memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=10,
                  random_process=random_process, gamma=.995, target_model_update=1e-3)

agent.compile(Adam(lr=.0005, clipnorm=1.), metrics=['mae'])
agent.fit(env, nb_steps=10000, visualize=False, verbose=0, nb_max_episode_steps=95)   
#agent.save_weights('weights/ddpg_{}_weights.h5f'.format("stormwater"), overwrite=True)
agent.test(env, nb_episodes=15, visualize=False, nb_max_episode_steps=95, plt="") 
Esempio n. 3
0
                      critic=critic,
                      critic_action_input=action_input,
                      tau_input=tau_input,
                      env=env,
                      memory=(memory, episode_mem),
                      critic_goal_input=goal_input,
                      delta_clip=1.0,
                      nb_steps_warmup_critic=1000,
                      nb_steps_warmup_actor=1000,
                      random_process=random_process,
                      gamma=0.98,
                      target_model_update=0.001,
                      val_goal=args.val_goal,
                      vectorized=args.vectorized)

    agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae'])

    agent.fit(env,
              nb_steps=200000,
              folder_to_save=folder_to_save,
              visualize=False,
              verbose=1,
              reward_type=reward_type,
              nb_max_episode_steps=50,
              max_tau=12)

    agent.save_weights(os.path.join(
        folder_to_save, 'ddpg_{}_weights_{}.h5f'.format(ENV_NAME,
                                                        reward_type)),
                       overwrite=True)
    sys.exit()