def run(
        base_config: Dict[str, Any],
        ray_server: str,
        init_kwargs: Dict[str, Any],
        exp_name: str,
        spec: Dict[str, Any],
    ) -> ray.tune.ExperimentAnalysis:
        ray.init(address=ray_server, **init_kwargs)

        # We have to register the function we're going to call with Ray.
        # We partially apply worker_fn, so it's different for each experiment.
        # Compute a hash based on the config to make sure it has a unique name!
        # Note Ray does let you pass a worker_fn directly without registering, but then
        # it registers using the function name (which may not be unique).
        cfg = {
            # ReadOnlyDict's aren't serializable: see sacred issue #499
            "base_config": utils.sacred_copy(base_config),
            "exp_name": exp_name,
        }
        cfg_str = json.dumps(cfg)
        hasher = hashlib.md5()  # we are not worried about security here
        hasher.update(cfg_str.encode("utf8"))
        cfg_hash = hasher.hexdigest()

        trainable_name = f"{worker_name}-{cfg_hash}"
        base_config = utils.sacred_copy(base_config)
        trainable_fn = functools.partial(worker_fn, base_config)
        tune.register_trainable(trainable_name, trainable_fn)

        exp_id = f"{ex.path}/{exp_name}/{utils.make_timestamp()}-{uuid.uuid4().hex}"
        spec = utils.sacred_copy(spec)

        # Disable TensorBoard logger: fails due to the spec containing string variables.
        tune_loggers = [tune.logger.JsonLogger, tune.logger.CSVLogger]
        sync_config = None
        if "sync_config" in spec:
            sync_config = tune.SyncConfig(**spec["sync_config"])
        try:
            result = tune.run(
                trainable_name,
                name=exp_id,
                config=spec["config"],
                sync_config=sync_config,
                loggers=tune_loggers,
                **spec["run_kwargs"],
            )
        finally:
            ray.shutdown()

        return result, exp_id
Esempio n. 2
0
    def run(
        base_config: Dict[str, Any],
        ray_server: str,
        init_kwargs: Dict[str, Any],
        exp_name: str,
        spec: Dict[str, Any],
    ) -> ray.tune.ExperimentAnalysis:
        ray.init(redis_address=ray_server, **init_kwargs)

        # We have to register the function we're going to call with Ray.
        # We partially apply worker_fn, so it's different for each experiment.
        # Compute a hash based on the config to make sure it has a unique name!
        # Note Ray does let you pass a worker_fn directly without registering, but then
        # it registers using the function name (which may not be unique).
        cfg = {
            # ReadOnlyDict's aren't serializable: see sacred issue #499
            "base_config": utils.sacred_copy(base_config),
            "exp_name": exp_name,
        }
        cfg_str = json.dumps(cfg)
        hasher = hashlib.md5()  # we are not worried about security here
        hasher.update(cfg_str.encode("utf8"))
        cfg_hash = hasher.hexdigest()

        trainable_name = f"{worker_name}-{cfg_hash}"
        base_config = utils.sacred_copy(base_config)
        trainable_fn = functools.partial(worker_fn, base_config)
        tune.register_trainable(trainable_name, trainable_fn)

        exp_id = f"{ex.path}/{exp_name}/{utils.make_timestamp()}-{uuid.uuid4().hex}"
        spec = utils.sacred_copy(spec)

        try:
            result = tune.run(
                trainable_name,
                name=exp_id,
                config=spec["config"],
                # TODO(adam): delete next line when ray #6126 merged
                checkpoint_freq=10000000,
                **spec["run_kwargs"],
            )
        finally:
            ray.shutdown()

        return result, exp_id
Esempio n. 3
0
def fit_model(
    _run,
    ray_server: str,
    init_kwargs: Dict[str, Any],
    activation_glob: str,
    output_root: str,
    max_timesteps: int,
    data_type,
    model_class,
    model_kwargs,
    train_opponent,
    train_percentage,
):
    """Fits density models for each environment and victim type in activation_dir,
       saving resulting models to output_root. Works by repeatedly calling `density_fitter`,
       running in parallel via Ray."""
    try:
        ray.init(address=ray_server, **init_kwargs)

        # Find activation paths for each environment & victim-path tuple
        stem_pattern = re.compile(r"(.*)_opponent_.*\.npz")
        opponent_pattern = re.compile(r".*_opponent_([^\s]+)+\.npz")
        # activation_paths is indexed by [env_victim][opponent_type] where env_victim is
        # e.g. 'SumoHumans-v0_victim_zoo_1' and opponent_type is e.g. 'ppo2_1'.
        activation_paths = {}

        for activation_path in glob.glob(activation_glob):
            activation_dir = os.path.basename(activation_path)
            stem_match = stem_pattern.match(activation_dir)
            if stem_match is None:
                logger.debug(f"Skipping {activation_path}")
                continue
            stem = stem_match.groups()[0]

            opponent_match = opponent_pattern.match(activation_dir)
            opponent_type = opponent_match.groups()[0]

            activation_paths.setdefault(stem,
                                        {})[opponent_type] = activation_path

        # Create temporary output directory (if needed)
        tmp_dir = None
        if output_root is None:
            tmp_dir = tempfile.TemporaryDirectory()
            output_root = tmp_dir.name
        else:
            exp_name = gen_exp_name(model_class, model_kwargs)
            output_root = os.path.join(output_root, exp_name)

        # Fit density model and save weights
        results = []
        for stem, paths in activation_paths.items():
            output_dir = osp.join(output_root, stem)
            os.makedirs(output_dir)
            future = density_fitter.remote(
                paths,
                output_dir,
                model_class,
                utils.sacred_copy(model_kwargs),
                max_timesteps,
                data_type,
                train_opponent,
                train_percentage,
            )
            results.append(future)

        ray.get(results)  # block until all jobs have finished
        utils.add_artifacts(_run, output_root, ingredient=fit_model_ex)
    finally:
        # Clean up temporary directory (if needed)
        if tmp_dir is not None:
            tmp_dir.cleanup()

        ray.shutdown()
def score_agent(
    _run,
    _seed,
    env_name,
    agent_a_path,
    agent_b_path,
    agent_a_type,
    agent_b_type,
    record_traj,
    record_traj_params,
    transparent_params,
    num_env,
    videos,
    video_params,
    mask_agent_index,
    noisy_agent_index,
    noisy_agent_magnitude,
    mask_agent_noise,
):
    save_dir = video_params["save_dir"]
    if videos:
        if save_dir is None:
            score_ex_logger.info(
                "No directory provided for saving videos; using a tmpdir instead,"
                " but videos will be saved to Sacred run directory")
            tmp_dir = tempfile.TemporaryDirectory(prefix="score-videos")
            save_dir = tmp_dir.name
        else:
            tmp_dir = None
        video_dirs = [osp.join(save_dir, str(i)) for i in range(num_env)]

    agent_wrappers = {}
    if mask_agent_index is not None:
        mask_agent_kwargs = {}
        if mask_agent_noise is not None:
            mask_agent_kwargs["noise_magnitude"] = mask_agent_noise

        agent_wrappers = make_mask_agent_wrappers(env_name, mask_agent_index,
                                                  **mask_agent_kwargs)

    video_params = utils.sacred_copy(video_params)  # Sacred issue #499

    def env_fn(i):
        env = make_env(env_name, _seed, i, None, agent_wrappers=agent_wrappers)
        if videos:
            if video_params["annotated"]:
                if "multicomp" in env_name:
                    assert num_env == 1, "pretty videos requires num_env=1"
                    env = AnnotatedGymCompete(
                        env,
                        env_name,
                        agent_a_type,
                        agent_a_path,
                        agent_b_type,
                        agent_b_path,
                        mask_agent_index,
                        **video_params["annotation_params"],
                    )
                else:
                    warnings.warn(
                        f"Annotated videos not supported for environment '{env_name}'"
                    )
            env = VideoWrapper(env, video_dirs[i], video_params["single_file"])
        return env

    env_fns = [functools.partial(env_fn, i) for i in range(num_env)]

    if num_env > 1:
        venv = make_subproc_vec_multi_env(env_fns)
    else:
        venv = make_dummy_vec_multi_env(env_fns)

    if record_traj:
        venv = TrajectoryRecorder(venv, record_traj_params["agent_indices"])

    if venv.num_agents == 1 and agent_b_path != "none":
        raise ValueError(
            "Set agent_b_path to 'none' if environment only uses one agent.")

    agent_paths = [agent_a_path, agent_b_path]
    agent_types = [agent_a_type, agent_b_type]
    zipped = list(zip(agent_types, agent_paths))
    agents = [
        load_policy(policy_type, policy_path, venv, env_name, i,
                    transparent_params)
        for i, (policy_type,
                policy_path) in enumerate(zipped[:venv.num_agents])
    ]

    if noisy_agent_index is not None:
        agents[noisy_agent_index] = NoisyAgentWrapper(
            agents[noisy_agent_index],
            noise_annealer=lambda: noisy_agent_magnitude)

    score = get_empirical_score(venv, agents)

    for agent in agents:
        if agent.sess is not None:
            agent.sess.close()

    if record_traj:
        save_paths = venv.save(save_dir=record_traj_params["save_dir"])
        for save_path in save_paths:
            score_ex.add_artifact(save_path, name="victim_activations.npz")

    venv.close()

    if videos:
        for env_video_dir in video_dirs:
            added = False
            for file_path in os.listdir(env_video_dir):
                added |= _save_video_or_metadata(env_video_dir, file_path)
            if not added:
                raise FileNotFoundError(
                    f"No video artifacts found in path {env_video_dir}.")

        if tmp_dir is not None:
            tmp_dir.cleanup()

    for observer in score_ex.observers:
        if hasattr(observer, "dir"):
            _clean_video_directory_structure(observer)

    return score