Example #1
0
def single_wrappers(single_venv, scheduler, our_idx, normalize, rew_shape,
                    rew_shape_params, victim_index, victim_path, victim_type,
                    debug, env_name, load_policy, lookback_params,
                    transparent_params, log_callbacks, save_callbacks):
    if rew_shape:
        rew_shape_venv = apply_reward_wrapper(single_env=single_venv,
                                              scheduler=scheduler,
                                              shaping_params=rew_shape_params,
                                              agent_idx=our_idx)
        log_callbacks.append(lambda logger, locals, globals: rew_shape_venv.
                             log_callback(logger))
        single_venv = rew_shape_venv

        for anneal_type in ['noise', 'rew_shape']:
            if scheduler.is_conditional(anneal_type):
                scheduler.set_annealer_get_logs(anneal_type,
                                                rew_shape_venv.get_logs)

    if lookback_params['lb_num'] > 0:
        lookback_venv = LookbackRewardVecWrapper(single_venv, env_name, debug,
                                                 victim_index, victim_path,
                                                 victim_type,
                                                 transparent_params,
                                                 **lookback_params)
        single_venv = lookback_venv

    if normalize:
        normalized_venv = VecNormalize(single_venv)

        if load_policy['path'] is not None:
            if load_policy['type'] == 'zoo':
                raise ValueError(
                    "Trying to normalize twice. Bansal et al's Zoo agents normalize "
                    "implicitly. Please set normalize=False to disable VecNormalize."
                )

                normalized_venv.load_running_average(load_policy['path'])

        save_callbacks.append(
            lambda root_dir: normalized_venv.save_running_average(root_dir))
        single_venv = normalized_venv

    return single_venv
Example #2
0
def single_wrappers(single_venv, scheduler, our_idx, normalize, load_policy,
                    rew_shape, rew_shape_params, log_callbacks, save_callbacks):
    if rew_shape:
        rew_shape_venv = apply_reward_wrapper(single_env=single_venv, scheduler=scheduler,
                                              shaping_params=rew_shape_params, agent_idx=our_idx)
        log_callbacks.append(lambda logger, locals, globals: rew_shape_venv.log_callback(logger))
        single_venv = rew_shape_venv

        for anneal_type in ['noise', 'rew_shape']:
            if scheduler.is_conditional(anneal_type):
                scheduler.set_annealer_get_logs(anneal_type, rew_shape_venv.get_logs)

    if normalize:
        if load_policy['type'] == 'zoo':
            raise ValueError("Trying to normalize twice. Bansal et al's Zoo agents normalize "
                             "implicitly. Please set normalize=False to disable VecNormalize.")

        normalized_venv = VecNormalize(single_venv)
        save_callbacks.append(lambda root_dir: normalized_venv.save_running_average(root_dir))
        single_venv = normalized_venv

    return single_venv
Example #3
0
def single_wrappers(
    single_venv,
    scheduler,
    our_idx,
    normalize,
    normalize_observations,
    rew_shape,
    rew_shape_params,
    embed_index,
    embed_paths,
    embed_types,
    debug,
    env_name,
    load_policy,
    lookback_params,
    transparent_params,
    log_callbacks,
    save_callbacks,
):
    if rew_shape:
        rew_shape_venv = apply_reward_wrapper(
            single_env=single_venv,
            scheduler=scheduler,
            shaping_params=rew_shape_params,
            agent_idx=our_idx,
        )
        log_callbacks.append(lambda logger, locals, globals: rew_shape_venv.log_callback(logger))
        single_venv = rew_shape_venv

        for anneal_type in ["noise", "rew_shape"]:
            if scheduler.is_conditional(anneal_type):
                scheduler.set_annealer_get_logs(anneal_type, rew_shape_venv.get_logs)

    if lookback_params["lb_num"] > 0:
        if len(embed_types) > 1:
            raise ValueError("Lookback is not supported with multiple embedded agents")
        embed_path = embed_paths[0]
        embed_type = embed_types[0]
        lookback_venv = LookbackRewardVecWrapper(
            single_venv,
            env_name,
            debug,
            embed_index,
            embed_path,
            embed_type,
            transparent_params,
            **lookback_params,
        )
        single_venv = lookback_venv

    if normalize:
        if normalize_observations:
            if load_policy["path"] is not None:
                if load_policy["type"] == "zoo":
                    raise ValueError(
                        "Trying to normalize twice. Bansal et al's Zoo agents normalize "
                        "implicitly. Please set normalize=False to disable VecNormalize."
                    )
            normalized_venv = VecNormalize(single_venv)
        else:
            normalized_venv = VecNormalize(single_venv, norm_obs=False)

        if load_policy["path"] is not None and load_policy["type"] != "zoo":
            normalized_venv.load_running_average(load_policy["path"])

        save_callbacks.append(lambda root_dir: normalized_venv.save_running_average(root_dir))
        single_venv = normalized_venv

    return single_venv
Example #4
0
                 env=vec_norm_env,
                 learning_rate=lambda f: 1e-4 * f,
                 ent_coef=1e-2,
                 n_steps=256,
                 nminibatches=8,
                 noptepochs=8,
                 cliprange=0.2,
                 cliprange_vf=0.2,
                 tensorboard_log='logs',
                 verbose=1)

    model.learn(total_timesteps=int(1e7),
                seed=0,
                log_interval=100,
                tb_log_name=log_str,
                callback=stop_by_keyboard)
    signal.signal(signal.SIGINT, original_sigint_handler)

    model.save("logs/{}_1/trained_model".format(log_str))
    vec_norm_env.save_running_average("logs/{}_1".format(log_str))

    # render trained agent
    env = VecNormalize(DummyVecEnv([lambda: gym.make(env_id)]),
                       norm_reward=True)
    env.load_running_average("logs/{}_1".format(log_str))

    obs = env.reset()
    while True:
        action, _states = model.predict(obs, deterministic=True)
        obs, rewards, dones, info = env.step(action)
        env.render()