def __init__(self, venv, env_name, use_debug, victim_index, victim_path, victim_type, transparent_params, lb_mul, lb_num, lb_path, lb_type): super().__init__(venv) self.lb_num = lb_num self.lb_mul = lb_mul if transparent_params is None: raise ValueError( "LookbackRewardVecWrapper assumes transparent policies and venvs." ) self.transparent_params = transparent_params self.victim_index = victim_index self._policy = load_policy(lb_type, lb_path, self.venv.unwrapped, env_name, 1 - victim_index, transparent_params=None) self._action = None self._obs = None self._state = None self._new_lb_state = None self._dones = [False] * self.num_envs self.ep_lens = np.zeros(self.num_envs).astype(int) self.lb_tuples = self._create_lb_tuples(env_name, use_debug, victim_index, victim_path, victim_type) self.use_debug = use_debug if self.use_debug: # create a debug file for this venv and also every lookback venv ordinally self.debug_files = [ open(f'debug{i}.pkl', 'wb') for i in range(self.lb_num + 1) ] self.get_debug_venv().set_debug_file(self.debug_files[0])
def _create_lb_tuples(self, env_name, use_debug, victim_index, victim_path, victim_type): """Create lookback data structures which are used to compare our episode rollouts against those of an environment where a lookback base policy acted instead. params victim_index, victim_path, victim_type are the same as in policy_loader.load_policy :param use_debug (bool): Use DummyVecEnv instead of SubprocVecEnv :return: (list<LookbackTuple>) lb_tuples """ def env_fn(i): return make_env( env_name, 0, i, out_dir='data/lookbacks/', pre_wrappers=[GymCompeteToOurs, OldMujocoResettableWrapper]) lb_tuples = [] for _ in range(self.lb_num): make_vec_env = make_dummy_vec_multi_env if use_debug else make_subproc_vec_multi_env multi_venv = make_vec_env( [lambda: env_fn(i) for i in range(self.num_envs)]) if use_debug: multi_venv = DebugVenv(multi_venv) victim = load_policy(policy_path=victim_path, policy_type=victim_type, env=multi_venv, env_name=env_name, index=victim_index, transparent_params=self.transparent_params) multi_venv = EmbedVictimWrapper(multi_env=multi_venv, victim=victim, victim_index=victim_index, transparent=True, deterministic=True) single_venv = FlattenSingletonVecEnv(multi_venv) data_dict = { 'state': None, 'action': None, 'info': defaultdict(dict) } lb_tuples.append(LookbackTuple(venv=single_venv, data=data_dict)) return lb_tuples
def wrap_adv_noise_ball(env_name, our_idx, multi_venv, adv_noise_params, victim_path, victim_type, deterministic): adv_noise_agent_val = adv_noise_params['noise_val'] base_policy_path = adv_noise_params['base_path'] base_policy_type = adv_noise_params['base_type'] base_policy = load_policy(policy_path=base_policy_path, policy_type=base_policy_type, env=multi_venv, env_name=env_name, index=our_idx) base_action_space = multi_venv.action_space.spaces[our_idx] adv_noise_action_space = Box( low=adv_noise_agent_val * base_action_space.low, high=adv_noise_agent_val * base_action_space.high) multi_venv = MergeAgentVecEnv(venv=multi_venv, policy=base_policy, replace_action_space=adv_noise_action_space, merge_agent_idx=our_idx, deterministic=deterministic) return multi_venv
def maybe_embed_victim(multi_venv, our_idx, scheduler, log_callbacks, env_name, victim_type, victim_path, victim_index, victim_noise, victim_noise_params, adv_noise_params, transparent_params, lookback_params): if victim_type != 'none': deterministic = lookback_params is not None # If we are actually training an epsilon-ball noise agent on top of a zoo agent if adv_noise_params['noise_val'] is not None: multi_venv = wrap_adv_noise_ball(env_name, our_idx, multi_venv, deterministic=deterministic) # Load the victim and then wrap it if appropriate. victim = load_policy(policy_path=victim_path, policy_type=victim_type, env=multi_venv, env_name=env_name, index=victim_index, transparent_params=transparent_params) if victim_noise: victim = apply_victim_wrapper(victim=victim, noise_params=victim_noise_params, scheduler=scheduler) log_callbacks.append( lambda logger, locals, globals: victim.log_callback(logger)) # Curry the victim transparent = transparent_params is not None multi_venv = EmbedVictimWrapper(multi_env=multi_venv, victim=victim, victim_index=victim_index, transparent=transparent, deterministic=deterministic) return multi_venv
def score_agent(_run, _seed, env_name, agent_a_path, agent_b_path, agent_a_type, agent_b_type, record_traj, record_traj_params, transparent_params, num_env, videos, video_params, mask_agent_index, noisy_agent_index, noisy_agent_magnitude, mask_agent_noise): save_dir = video_params['save_dir'] if videos: if save_dir is None: score_ex_logger.info("No directory provided for saving videos; using a tmpdir instead," "but videos will be saved to Sacred run directory") tmp_dir = tempfile.TemporaryDirectory() save_dir = tmp_dir.name else: tmp_dir = None video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)] pre_wrappers = [GymCompeteToOurs] if 'multicomp' in env_name else [] agent_wrappers = {} if mask_agent_index is not None: mask_agent_kwargs = {} if mask_agent_noise is not None: mask_agent_kwargs['noise_magnitude'] = mask_agent_noise agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index, **mask_agent_kwargs) video_params = utils.sacred_copy(video_params) # Sacred issue #499 def env_fn(i): env = make_env(env_name, _seed, i, None, pre_wrappers=pre_wrappers, agent_wrappers=agent_wrappers) if videos: if video_params['annotated']: if 'multicomp' in env_name: assert num_env == 1, "pretty videos requires num_env=1" env = AnnotatedGymCompete(env, env_name, agent_a_type, agent_a_path, agent_b_type, agent_b_path, mask_agent_index, **video_params['annotation_params']) else: warnings.warn(f"Annotated videos not supported for environment '{env_name}'") env = VideoWrapper(env, video_dirs[i], video_params['single_file']) return env env_fns = [functools.partial(env_fn, i) for i in range(num_env)] if num_env > 1: venv = make_subproc_vec_multi_env(env_fns) else: venv = make_dummy_vec_multi_env(env_fns) if record_traj: venv = TrajectoryRecorder(venv, record_traj_params['agent_indices']) if venv.num_agents == 1 and agent_b_path != 'none': raise ValueError("Set agent_b_path to 'none' if environment only uses one agent.") agent_paths = [agent_a_path, agent_b_path] agent_types = [agent_a_type, agent_b_type] zipped = list(zip(agent_types, agent_paths)) agents = [load_policy(policy_type, policy_path, venv, env_name, i, transparent_params) for i, (policy_type, policy_path) in enumerate(zipped[:venv.num_agents])] if noisy_agent_index is not None: agents[noisy_agent_index] = NoisyAgentWrapper(agents[noisy_agent_index], noise_annealer=lambda: noisy_agent_magnitude) score = get_empirical_score(venv, agents) for agent in agents: if agent.sess is not None: agent.sess.close() if record_traj: save_paths = venv.save(save_dir=record_traj_params['save_dir']) for save_path in save_paths: score_ex.add_artifact(save_path, name="victim_activations.npz") venv.close() if videos: for env_video_dir in video_dirs: try: for file_path in os.listdir(env_video_dir): _save_video_or_metadata(env_video_dir, file_path) except FileNotFoundError: warnings.warn("Can't find path {}; no videos from that path added as artifacts" .format(env_video_dir)) if tmp_dir is not None: tmp_dir.cleanup() for observer in score_ex.observers: if hasattr(observer, 'dir'): _clean_video_directory_structure(observer) return score