def test_vec_env(spec): """Test that our {Dummy,Subproc}VecMultiEnv gives the same results as each other.""" env_fns = [lambda: make_env(spec, i) for i in range(4)] venv1 = multi_agent.make_dummy_vec_multi_env(env_fns) venv2 = multi_agent.make_subproc_vec_multi_env(env_fns) is_multicomp = spec.id.startswith("multicomp/") # Can't easily compare info dicts returned by multicomp/ environments, so just skip that check assert_envs_equal(venv1, venv2, 100, check_info=not is_multicomp)
def create_multi_agent_curried_policy_wrapper(mon_dir, env_name, num_envs, embed_index, max_steps, state_shape=None, add_zoo=False, num_zoo=5): def episode_limit(env): return time_limit.TimeLimit(env, max_episode_steps=max_steps) def env_fn(i): return make_env(env_name, seed=42, i=i, out_dir=mon_dir, pre_wrappers=[episode_limit]) vec_env = make_dummy_vec_multi_env( [lambda: env_fn(i) for i in range(num_envs)]) zoo = load_policy( policy_path="1", policy_type="zoo", env=vec_env, env_name=env_name, index=1 - embed_index, transparent_params=None, ) half_env = FakeSingleSpacesVec(vec_env, agent_id=embed_index) policies = [ _get_constant_policy(half_env, constant_value=half_env.action_space.sample(), state_shape=state_shape) for _ in range(10) ] if add_zoo: policies += [zoo] * num_zoo policy_wrapper = MultiPolicyWrapper(policies=policies, num_envs=num_envs) vec_env = CurryVecEnv(venv=vec_env, policy=policy_wrapper, agent_idx=embed_index, deterministic=False) vec_env = FlattenSingletonVecEnv(vec_env) yield vec_env, policy_wrapper, zoo policy_wrapper.close()
def score_agent( _run, _seed, env_name, agent_a_path, agent_b_path, agent_a_type, agent_b_type, record_traj, record_traj_params, transparent_params, num_env, videos, video_params, mask_agent_index, noisy_agent_index, noisy_agent_magnitude, mask_agent_noise, ): save_dir = video_params["save_dir"] if videos: if save_dir is None: score_ex_logger.info( "No directory provided for saving videos; using a tmpdir instead," " but videos will be saved to Sacred run directory") tmp_dir = tempfile.TemporaryDirectory(prefix="score-videos") save_dir = tmp_dir.name else: tmp_dir = None video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)] agent_wrappers = {} if mask_agent_index is not None: mask_agent_kwargs = {} if mask_agent_noise is not None: mask_agent_kwargs["noise_magnitude"] = mask_agent_noise agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index, **mask_agent_kwargs) video_params = utils.sacred_copy(video_params) # Sacred issue #499 def env_fn(i): env = make_env(env_name, _seed, i, None, agent_wrappers=agent_wrappers) if videos: if video_params["annotated"]: if "multicomp" in env_name: assert num_env == 1, "pretty videos requires num_env=1" env = AnnotatedGymCompete( env, env_name, agent_a_type, agent_a_path, agent_b_type, agent_b_path, mask_agent_index, **video_params["annotation_params"], ) else: warnings.warn( f"Annotated videos not supported for environment '{env_name}'" ) env = VideoWrapper(env, video_dirs[i], video_params["single_file"]) return env env_fns = [functools.partial(env_fn, i) for i in range(num_env)] if num_env > 1: venv = make_subproc_vec_multi_env(env_fns) else: venv = make_dummy_vec_multi_env(env_fns) if record_traj: venv = TrajectoryRecorder(venv, record_traj_params["agent_indices"]) if venv.num_agents == 1 and agent_b_path != "none": raise ValueError( "Set agent_b_path to 'none' if environment only uses one agent.") agent_paths = [agent_a_path, agent_b_path] agent_types = [agent_a_type, agent_b_type] zipped = list(zip(agent_types, agent_paths)) agents = [ load_policy(policy_type, policy_path, venv, env_name, i, transparent_params) for i, (policy_type, policy_path) in enumerate(zipped[:venv.num_agents]) ] if noisy_agent_index is not None: agents[noisy_agent_index] = NoisyAgentWrapper( agents[noisy_agent_index], noise_annealer=lambda: noisy_agent_magnitude) score = get_empirical_score(venv, agents) for agent in agents: if agent.sess is not None: agent.sess.close() if record_traj: save_paths = venv.save(save_dir=record_traj_params["save_dir"]) for save_path in save_paths: score_ex.add_artifact(save_path, name="victim_activations.npz") venv.close() if videos: for env_video_dir in video_dirs: added = False for file_path in os.listdir(env_video_dir): added |= _save_video_or_metadata(env_video_dir, file_path) if not added: raise FileNotFoundError( f"No video artifacts found in path {env_video_dir}.") if tmp_dir is not None: tmp_dir.cleanup() for observer in score_ex.observers: if hasattr(observer, "dir"): _clean_video_directory_structure(observer) return score
def score_agent(_run, _seed, env_name, agent_a_path, agent_b_path, agent_a_type, agent_b_type, record_traj, record_traj_params, transparent_params, num_env, videos, video_params, mask_agent_index, noisy_agent_index, noisy_agent_magnitude, mask_agent_noise): save_dir = video_params['save_dir'] if videos: if save_dir is None: score_ex_logger.info( "No directory provided for saving videos; using a tmpdir instead," "but videos will be saved to Sacred run directory") tmp_dir = tempfile.TemporaryDirectory() save_dir = tmp_dir.name else: tmp_dir = None video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)] pre_wrappers = [GymCompeteToOurs] if 'multicomp' in env_name else [] agent_wrappers = {} if mask_agent_index is not None: mask_agent_kwargs = {} if mask_agent_noise is not None: mask_agent_kwargs['noise_magnitude'] = mask_agent_noise agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index, **mask_agent_kwargs) def env_fn(i): env = make_env(env_name, _seed, i, None, pre_wrappers=pre_wrappers, agent_wrappers=agent_wrappers) if videos: if video_params['annotated']: if 'multicomp' in env_name: assert num_env == 1, "pretty videos requires num_env=1" env = AnnotatedGymCompete( env, env_name, agent_a_type, agent_a_path, agent_b_type, agent_b_path, mask_agent_index, **video_params['annotation_params']) else: warnings.warn( f"Annotated videos not supported for environment '{env_name}'" ) env = VideoWrapper(env, video_dirs[i], video_params['single_file']) return env env_fns = [functools.partial(env_fn, i) for i in range(num_env)] if num_env > 1: venv = make_subproc_vec_multi_env(env_fns) else: venv = make_dummy_vec_multi_env(env_fns) if record_traj: venv = TrajectoryRecorder(venv, record_traj_params['agent_indices']) if venv.num_agents == 1 and agent_b_path != 'none': raise ValueError( "Set agent_b_path to 'none' if environment only uses one agent.") agent_paths = [agent_a_path, agent_b_path] agent_types = [agent_a_type, agent_b_type] zipped = list(zip(agent_types, agent_paths)) agents = [ load_policy(policy_type, policy_path, venv, env_name, i, transparent_params) for i, (policy_type, policy_path) in enumerate(zipped[:venv.num_agents]) ] if noisy_agent_index is not None: agents[noisy_agent_index] = NoisyAgentWrapper( agents[noisy_agent_index], noise_annealer=lambda: noisy_agent_magnitude) score = get_empirical_score(venv, agents) for agent in agents: if agent.sess is not None: agent.sess.close() if record_traj: save_paths = venv.save(save_dir=record_traj_params['save_dir']) for save_path in save_paths: score_ex.add_artifact(save_path, name="victim_activations.npz") venv.close() if videos: for env_video_dir in video_dirs: try: for file_path in os.listdir(env_video_dir): _save_video_or_metadata(env_video_dir, file_path) except FileNotFoundError: warnings.warn( "Can't find path {}; no videos from that path added as artifacts" .format(env_video_dir)) if tmp_dir is not None: tmp_dir.cleanup() for observer in score_ex.observers: if hasattr(observer, 'dir'): _clean_video_directory_structure(observer) return score