def init_adv(adv_env_id, disable_adv=False, env_kwargs=None): bridge = Bridge() default_env_kwargs = { 'renders' if 'CartPole' in adv_env_id else 'render': render } if env_kwargs is None: env_kwargs = {} env_kwargs.update(default_env_kwargs) env = make_vec_env(adv_env_id, env_kwargs=env_kwargs, seed=seed) env = VecNormalize(env) prot_agent = PPO('MlpPolicy', env, verbose=verbose, seed=seed, n_steps=ts, bridge=bridge, is_protagonist=True) if disable_adv: bridge.link_agents(prot_agent, None) else: adv_agent = PPO('MlpPolicy', env, verbose=verbose, seed=seed, n_steps=ts, bridge=bridge, is_protagonist=False) bridge.link_agents(prot_agent, adv_agent) return prot_agent, env
def setup(args): bridge = Bridge() render_key = "renders" if 'CartPole' in args.env else "render" env_kwargs = { render_key: args.render, "adv_force": args.adv_force, "mass_percentage": args.mass_percentage, "friction_percentage": args.friction_percentage, "simple_reward": args.simple_reward, } env = make_vec_env(args.env, env_kwargs=env_kwargs, seed=args.seed, monitor_dir=args.monitor_dir) if args.evaluate: env = VecNormalize.load(f'{args.pickle}-{args.envname}', env) prot_agent = PPO.load(f'{args.pickle}-{args.prot_name}', device='cpu') if prot_agent.seed != args.seed: logging.info( f'warning: {prot_agent.seed=} does not match { args.seed=}') if args.adversarial: adv_agent = PPO.load(args.adv_pickle, device='cpu') if adv_agent.seed != args.seed: logging.info( f'warning: {adv_agent.seed=} does not match { args.seed=}') else: adv_agent = None else: env = VecNormalize(env) prot_logname = f'{args.logs}-{args.prot_name}' if args.logs else None prot_agent = PPO("MlpPolicy", env, verbose=args.verbose, seed=args.seed, tensorboard_log=prot_logname, n_steps=args.N_steps, is_protagonist=True, bridge=bridge, device='cpu') if args.adversarial: adv_logname = f'{args.logs}-{args.adv_name}' if args.logs else None adv_agent = PPO("MlpPolicy", env, verbose=args.verbose, seed=args.seed, tensorboard_log=adv_logname, n_steps=args.N_steps, is_protagonist=False, bridge=bridge, device='cpu') else: adv_agent = None bridge.link_agents(prot_agent, adv_agent) return prot_agent, adv_agent, env