Beispiel #1
0
def init_adv(adv_env_id, disable_adv=False, env_kwargs=None):
    bridge = Bridge()
    default_env_kwargs = {
        'renders' if 'CartPole' in adv_env_id else 'render': render
    }
    if env_kwargs is None:
        env_kwargs = {}
    env_kwargs.update(default_env_kwargs)
    env = make_vec_env(adv_env_id, env_kwargs=env_kwargs, seed=seed)
    env = VecNormalize(env)
    prot_agent = PPO('MlpPolicy',
                     env,
                     verbose=verbose,
                     seed=seed,
                     n_steps=ts,
                     bridge=bridge,
                     is_protagonist=True)
    if disable_adv:
        bridge.link_agents(prot_agent, None)
    else:
        adv_agent = PPO('MlpPolicy',
                        env,
                        verbose=verbose,
                        seed=seed,
                        n_steps=ts,
                        bridge=bridge,
                        is_protagonist=False)
        bridge.link_agents(prot_agent, adv_agent)
    return prot_agent, env
Beispiel #2
0
def setup(args):
    bridge = Bridge()

    render_key = "renders" if 'CartPole' in args.env else "render"
    env_kwargs = {
        render_key: args.render,
        "adv_force": args.adv_force,
        "mass_percentage": args.mass_percentage,
        "friction_percentage": args.friction_percentage,
        "simple_reward": args.simple_reward,
    }

    env = make_vec_env(args.env,
                       env_kwargs=env_kwargs,
                       seed=args.seed,
                       monitor_dir=args.monitor_dir)

    if args.evaluate:
        env = VecNormalize.load(f'{args.pickle}-{args.envname}', env)
        prot_agent = PPO.load(f'{args.pickle}-{args.prot_name}', device='cpu')
        if prot_agent.seed != args.seed:
            logging.info(
                f'warning: {prot_agent.seed=} does not match { args.seed=}')

        if args.adversarial:
            adv_agent = PPO.load(args.adv_pickle, device='cpu')
            if adv_agent.seed != args.seed:
                logging.info(
                    f'warning: {adv_agent.seed=} does not match { args.seed=}')
        else:
            adv_agent = None
    else:
        env = VecNormalize(env)
        prot_logname = f'{args.logs}-{args.prot_name}' if args.logs else None
        prot_agent = PPO("MlpPolicy",
                         env,
                         verbose=args.verbose,
                         seed=args.seed,
                         tensorboard_log=prot_logname,
                         n_steps=args.N_steps,
                         is_protagonist=True,
                         bridge=bridge,
                         device='cpu')

        if args.adversarial:
            adv_logname = f'{args.logs}-{args.adv_name}' if args.logs else None
            adv_agent = PPO("MlpPolicy",
                            env,
                            verbose=args.verbose,
                            seed=args.seed,
                            tensorboard_log=adv_logname,
                            n_steps=args.N_steps,
                            is_protagonist=False,
                            bridge=bridge,
                            device='cpu')
        else:
            adv_agent = None

    bridge.link_agents(prot_agent, adv_agent)

    return prot_agent, adv_agent, env