예제 #1
0
def test_vec_env(spec):
    """Test that our {Dummy,Subproc}VecMultiEnv gives the same results as
       each other."""
    env_fns = [lambda: make_env(spec, i) for i in range(4)]
    venv1 = multi_agent.make_dummy_vec_multi_env(env_fns)
    venv2 = multi_agent.make_subproc_vec_multi_env(env_fns)
    is_multicomp = spec.id.startswith("multicomp/")
    # Can't easily compare info dicts returned by multicomp/ environments, so just skip that check
    assert_envs_equal(venv1, venv2, 100, check_info=not is_multicomp)
def create_multi_agent_curried_policy_wrapper(mon_dir,
                                              env_name,
                                              num_envs,
                                              embed_index,
                                              max_steps,
                                              state_shape=None,
                                              add_zoo=False,
                                              num_zoo=5):
    def episode_limit(env):
        return time_limit.TimeLimit(env, max_episode_steps=max_steps)

    def env_fn(i):
        return make_env(env_name,
                        seed=42,
                        i=i,
                        out_dir=mon_dir,
                        pre_wrappers=[episode_limit])

    vec_env = make_dummy_vec_multi_env(
        [lambda: env_fn(i) for i in range(num_envs)])

    zoo = load_policy(
        policy_path="1",
        policy_type="zoo",
        env=vec_env,
        env_name=env_name,
        index=1 - embed_index,
        transparent_params=None,
    )
    half_env = FakeSingleSpacesVec(vec_env, agent_id=embed_index)
    policies = [
        _get_constant_policy(half_env,
                             constant_value=half_env.action_space.sample(),
                             state_shape=state_shape) for _ in range(10)
    ]
    if add_zoo:
        policies += [zoo] * num_zoo

    policy_wrapper = MultiPolicyWrapper(policies=policies, num_envs=num_envs)

    vec_env = CurryVecEnv(venv=vec_env,
                          policy=policy_wrapper,
                          agent_idx=embed_index,
                          deterministic=False)
    vec_env = FlattenSingletonVecEnv(vec_env)

    yield vec_env, policy_wrapper, zoo
    policy_wrapper.close()
예제 #3
0
def score_agent(
    _run,
    _seed,
    env_name,
    agent_a_path,
    agent_b_path,
    agent_a_type,
    agent_b_type,
    record_traj,
    record_traj_params,
    transparent_params,
    num_env,
    videos,
    video_params,
    mask_agent_index,
    noisy_agent_index,
    noisy_agent_magnitude,
    mask_agent_noise,
):
    save_dir = video_params["save_dir"]
    if videos:
        if save_dir is None:
            score_ex_logger.info(
                "No directory provided for saving videos; using a tmpdir instead,"
                " but videos will be saved to Sacred run directory")
            tmp_dir = tempfile.TemporaryDirectory(prefix="score-videos")
            save_dir = tmp_dir.name
        else:
            tmp_dir = None
        video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)]

    agent_wrappers = {}
    if mask_agent_index is not None:
        mask_agent_kwargs = {}
        if mask_agent_noise is not None:
            mask_agent_kwargs["noise_magnitude"] = mask_agent_noise

        agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index,
                                                  **mask_agent_kwargs)

    video_params = utils.sacred_copy(video_params)  # Sacred issue #499

    def env_fn(i):
        env = make_env(env_name, _seed, i, None, agent_wrappers=agent_wrappers)
        if videos:
            if video_params["annotated"]:
                if "multicomp" in env_name:
                    assert num_env == 1, "pretty videos requires num_env=1"
                    env = AnnotatedGymCompete(
                        env,
                        env_name,
                        agent_a_type,
                        agent_a_path,
                        agent_b_type,
                        agent_b_path,
                        mask_agent_index,
                        **video_params["annotation_params"],
                    )
                else:
                    warnings.warn(
                        f"Annotated videos not supported for environment '{env_name}'"
                    )
            env = VideoWrapper(env, video_dirs[i], video_params["single_file"])
        return env

    env_fns = [functools.partial(env_fn, i) for i in range(num_env)]

    if num_env > 1:
        venv = make_subproc_vec_multi_env(env_fns)
    else:
        venv = make_dummy_vec_multi_env(env_fns)

    if record_traj:
        venv = TrajectoryRecorder(venv, record_traj_params["agent_indices"])

    if venv.num_agents == 1 and agent_b_path != "none":
        raise ValueError(
            "Set agent_b_path to 'none' if environment only uses one agent.")

    agent_paths = [agent_a_path, agent_b_path]
    agent_types = [agent_a_type, agent_b_type]
    zipped = list(zip(agent_types, agent_paths))
    agents = [
        load_policy(policy_type, policy_path, venv, env_name, i,
                    transparent_params)
        for i, (policy_type,
                policy_path) in enumerate(zipped[:venv.num_agents])
    ]

    if noisy_agent_index is not None:
        agents[noisy_agent_index] = NoisyAgentWrapper(
            agents[noisy_agent_index],
            noise_annealer=lambda: noisy_agent_magnitude)

    score = get_empirical_score(venv, agents)

    for agent in agents:
        if agent.sess is not None:
            agent.sess.close()

    if record_traj:
        save_paths = venv.save(save_dir=record_traj_params["save_dir"])
        for save_path in save_paths:
            score_ex.add_artifact(save_path, name="victim_activations.npz")

    venv.close()

    if videos:
        for env_video_dir in video_dirs:
            added = False
            for file_path in os.listdir(env_video_dir):
                added |= _save_video_or_metadata(env_video_dir, file_path)
            if not added:
                raise FileNotFoundError(
                    f"No video artifacts found in path {env_video_dir}.")

        if tmp_dir is not None:
            tmp_dir.cleanup()

    for observer in score_ex.observers:
        if hasattr(observer, "dir"):
            _clean_video_directory_structure(observer)

    return score
예제 #4
0
def score_agent(_run, _seed, env_name, agent_a_path, agent_b_path,
                agent_a_type, agent_b_type, record_traj, record_traj_params,
                transparent_params, num_env, videos, video_params,
                mask_agent_index, noisy_agent_index, noisy_agent_magnitude,
                mask_agent_noise):
    save_dir = video_params['save_dir']
    if videos:
        if save_dir is None:
            score_ex_logger.info(
                "No directory provided for saving videos; using a tmpdir instead,"
                "but videos will be saved to Sacred run directory")
            tmp_dir = tempfile.TemporaryDirectory()
            save_dir = tmp_dir.name
        else:
            tmp_dir = None
        video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)]
    pre_wrappers = [GymCompeteToOurs] if 'multicomp' in env_name else []

    agent_wrappers = {}
    if mask_agent_index is not None:
        mask_agent_kwargs = {}
        if mask_agent_noise is not None:
            mask_agent_kwargs['noise_magnitude'] = mask_agent_noise

        agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index,
                                                  **mask_agent_kwargs)

    def env_fn(i):
        env = make_env(env_name,
                       _seed,
                       i,
                       None,
                       pre_wrappers=pre_wrappers,
                       agent_wrappers=agent_wrappers)
        if videos:
            if video_params['annotated']:
                if 'multicomp' in env_name:
                    assert num_env == 1, "pretty videos requires num_env=1"
                    env = AnnotatedGymCompete(
                        env, env_name, agent_a_type, agent_a_path,
                        agent_b_type, agent_b_path, mask_agent_index,
                        **video_params['annotation_params'])
                else:
                    warnings.warn(
                        f"Annotated videos not supported for environment '{env_name}'"
                    )
            env = VideoWrapper(env, video_dirs[i], video_params['single_file'])
        return env

    env_fns = [functools.partial(env_fn, i) for i in range(num_env)]

    if num_env > 1:
        venv = make_subproc_vec_multi_env(env_fns)
    else:
        venv = make_dummy_vec_multi_env(env_fns)

    if record_traj:
        venv = TrajectoryRecorder(venv, record_traj_params['agent_indices'])

    if venv.num_agents == 1 and agent_b_path != 'none':
        raise ValueError(
            "Set agent_b_path to 'none' if environment only uses one agent.")

    agent_paths = [agent_a_path, agent_b_path]
    agent_types = [agent_a_type, agent_b_type]
    zipped = list(zip(agent_types, agent_paths))
    agents = [
        load_policy(policy_type, policy_path, venv, env_name, i,
                    transparent_params)
        for i, (policy_type,
                policy_path) in enumerate(zipped[:venv.num_agents])
    ]

    if noisy_agent_index is not None:
        agents[noisy_agent_index] = NoisyAgentWrapper(
            agents[noisy_agent_index],
            noise_annealer=lambda: noisy_agent_magnitude)

    score = get_empirical_score(venv, agents)

    for agent in agents:
        if agent.sess is not None:
            agent.sess.close()

    if record_traj:
        save_paths = venv.save(save_dir=record_traj_params['save_dir'])
        for save_path in save_paths:
            score_ex.add_artifact(save_path, name="victim_activations.npz")

    venv.close()

    if videos:
        for env_video_dir in video_dirs:
            try:
                for file_path in os.listdir(env_video_dir):
                    _save_video_or_metadata(env_video_dir, file_path)

            except FileNotFoundError:
                warnings.warn(
                    "Can't find path {}; no videos from that path added as artifacts"
                    .format(env_video_dir))

        if tmp_dir is not None:
            tmp_dir.cleanup()

    for observer in score_ex.observers:
        if hasattr(observer, 'dir'):
            _clean_video_directory_structure(observer)

    return score