def __init__(self, venv, env_name, use_debug, victim_index, victim_path,
                 victim_type, transparent_params, lb_mul, lb_num, lb_path,
                 lb_type):
        super().__init__(venv)
        self.lb_num = lb_num
        self.lb_mul = lb_mul
        if transparent_params is None:
            raise ValueError(
                "LookbackRewardVecWrapper assumes transparent policies and venvs."
            )
        self.transparent_params = transparent_params
        self.victim_index = victim_index

        self._policy = load_policy(lb_type,
                                   lb_path,
                                   self.venv.unwrapped,
                                   env_name,
                                   1 - victim_index,
                                   transparent_params=None)
        self._action = None
        self._obs = None
        self._state = None
        self._new_lb_state = None
        self._dones = [False] * self.num_envs
        self.ep_lens = np.zeros(self.num_envs).astype(int)
        self.lb_tuples = self._create_lb_tuples(env_name, use_debug,
                                                victim_index, victim_path,
                                                victim_type)
        self.use_debug = use_debug
        if self.use_debug:
            # create a debug file for this venv and also every lookback venv ordinally
            self.debug_files = [
                open(f'debug{i}.pkl', 'wb') for i in range(self.lb_num + 1)
            ]
            self.get_debug_venv().set_debug_file(self.debug_files[0])
    def _create_lb_tuples(self, env_name, use_debug, victim_index, victim_path,
                          victim_type):
        """Create lookback data structures which are used to compare our episode rollouts against
        those of an environment where a lookback base policy acted instead.

        params victim_index, victim_path, victim_type are the same as in policy_loader.load_policy
        :param use_debug (bool): Use DummyVecEnv instead of SubprocVecEnv
        :return: (list<LookbackTuple>) lb_tuples
        """
        def env_fn(i):
            return make_env(
                env_name,
                0,
                i,
                out_dir='data/lookbacks/',
                pre_wrappers=[GymCompeteToOurs, OldMujocoResettableWrapper])

        lb_tuples = []
        for _ in range(self.lb_num):
            make_vec_env = make_dummy_vec_multi_env if use_debug else make_subproc_vec_multi_env
            multi_venv = make_vec_env(
                [lambda: env_fn(i) for i in range(self.num_envs)])
            if use_debug:
                multi_venv = DebugVenv(multi_venv)

            victim = load_policy(policy_path=victim_path,
                                 policy_type=victim_type,
                                 env=multi_venv,
                                 env_name=env_name,
                                 index=victim_index,
                                 transparent_params=self.transparent_params)

            multi_venv = EmbedVictimWrapper(multi_env=multi_venv,
                                            victim=victim,
                                            victim_index=victim_index,
                                            transparent=True,
                                            deterministic=True)

            single_venv = FlattenSingletonVecEnv(multi_venv)
            data_dict = {
                'state': None,
                'action': None,
                'info': defaultdict(dict)
            }
            lb_tuples.append(LookbackTuple(venv=single_venv, data=data_dict))
        return lb_tuples
Ejemplo n.º 3
0
def wrap_adv_noise_ball(env_name, our_idx, multi_venv, adv_noise_params,
                        victim_path, victim_type, deterministic):
    adv_noise_agent_val = adv_noise_params['noise_val']
    base_policy_path = adv_noise_params['base_path']
    base_policy_type = adv_noise_params['base_type']
    base_policy = load_policy(policy_path=base_policy_path,
                              policy_type=base_policy_type,
                              env=multi_venv,
                              env_name=env_name,
                              index=our_idx)

    base_action_space = multi_venv.action_space.spaces[our_idx]
    adv_noise_action_space = Box(
        low=adv_noise_agent_val * base_action_space.low,
        high=adv_noise_agent_val * base_action_space.high)
    multi_venv = MergeAgentVecEnv(venv=multi_venv,
                                  policy=base_policy,
                                  replace_action_space=adv_noise_action_space,
                                  merge_agent_idx=our_idx,
                                  deterministic=deterministic)
    return multi_venv
Ejemplo n.º 4
0
def maybe_embed_victim(multi_venv, our_idx, scheduler, log_callbacks, env_name,
                       victim_type, victim_path, victim_index, victim_noise,
                       victim_noise_params, adv_noise_params,
                       transparent_params, lookback_params):
    if victim_type != 'none':
        deterministic = lookback_params is not None
        # If we are actually training an epsilon-ball noise agent on top of a zoo agent
        if adv_noise_params['noise_val'] is not None:
            multi_venv = wrap_adv_noise_ball(env_name,
                                             our_idx,
                                             multi_venv,
                                             deterministic=deterministic)

        # Load the victim and then wrap it if appropriate.
        victim = load_policy(policy_path=victim_path,
                             policy_type=victim_type,
                             env=multi_venv,
                             env_name=env_name,
                             index=victim_index,
                             transparent_params=transparent_params)

        if victim_noise:
            victim = apply_victim_wrapper(victim=victim,
                                          noise_params=victim_noise_params,
                                          scheduler=scheduler)
            log_callbacks.append(
                lambda logger, locals, globals: victim.log_callback(logger))

        # Curry the victim
        transparent = transparent_params is not None
        multi_venv = EmbedVictimWrapper(multi_env=multi_venv,
                                        victim=victim,
                                        victim_index=victim_index,
                                        transparent=transparent,
                                        deterministic=deterministic)

    return multi_venv
Ejemplo n.º 5
0
def score_agent(_run, _seed, env_name, agent_a_path, agent_b_path, agent_a_type, agent_b_type,
                record_traj, record_traj_params, transparent_params, num_env,
                videos, video_params, mask_agent_index, noisy_agent_index,
                noisy_agent_magnitude, mask_agent_noise):
    save_dir = video_params['save_dir']
    if videos:
        if save_dir is None:
            score_ex_logger.info("No directory provided for saving videos; using a tmpdir instead,"
                                 "but videos will be saved to Sacred run directory")
            tmp_dir = tempfile.TemporaryDirectory()
            save_dir = tmp_dir.name
        else:
            tmp_dir = None
        video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)]
    pre_wrappers = [GymCompeteToOurs] if 'multicomp' in env_name else []

    agent_wrappers = {}
    if mask_agent_index is not None:
        mask_agent_kwargs = {}
        if mask_agent_noise is not None:
            mask_agent_kwargs['noise_magnitude'] = mask_agent_noise

        agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index, **mask_agent_kwargs)

    video_params = utils.sacred_copy(video_params)  # Sacred issue #499

    def env_fn(i):
        env = make_env(env_name, _seed, i, None,
                       pre_wrappers=pre_wrappers,
                       agent_wrappers=agent_wrappers)
        if videos:
            if video_params['annotated']:
                if 'multicomp' in env_name:
                    assert num_env == 1, "pretty videos requires num_env=1"
                    env = AnnotatedGymCompete(env, env_name, agent_a_type, agent_a_path,
                                              agent_b_type, agent_b_path, mask_agent_index,
                                              **video_params['annotation_params'])
                else:
                    warnings.warn(f"Annotated videos not supported for environment '{env_name}'")
            env = VideoWrapper(env, video_dirs[i], video_params['single_file'])
        return env
    env_fns = [functools.partial(env_fn, i) for i in range(num_env)]

    if num_env > 1:
        venv = make_subproc_vec_multi_env(env_fns)
    else:
        venv = make_dummy_vec_multi_env(env_fns)

    if record_traj:
        venv = TrajectoryRecorder(venv, record_traj_params['agent_indices'])

    if venv.num_agents == 1 and agent_b_path != 'none':
        raise ValueError("Set agent_b_path to 'none' if environment only uses one agent.")

    agent_paths = [agent_a_path, agent_b_path]
    agent_types = [agent_a_type, agent_b_type]
    zipped = list(zip(agent_types, agent_paths))
    agents = [load_policy(policy_type, policy_path, venv, env_name, i, transparent_params)
              for i, (policy_type, policy_path) in enumerate(zipped[:venv.num_agents])]

    if noisy_agent_index is not None:
        agents[noisy_agent_index] = NoisyAgentWrapper(agents[noisy_agent_index],
                                                      noise_annealer=lambda: noisy_agent_magnitude)

    score = get_empirical_score(venv, agents)

    for agent in agents:
        if agent.sess is not None:
            agent.sess.close()

    if record_traj:
        save_paths = venv.save(save_dir=record_traj_params['save_dir'])
        for save_path in save_paths:
            score_ex.add_artifact(save_path, name="victim_activations.npz")

    venv.close()

    if videos:
        for env_video_dir in video_dirs:
            try:
                for file_path in os.listdir(env_video_dir):
                    _save_video_or_metadata(env_video_dir, file_path)

            except FileNotFoundError:
                warnings.warn("Can't find path {}; no videos from that path added as artifacts"
                              .format(env_video_dir))

        if tmp_dir is not None:
            tmp_dir.cleanup()

    for observer in score_ex.observers:
        if hasattr(observer, 'dir'):
            _clean_video_directory_structure(observer)

    return score