Example #1
0
def gather_tb_directories(
    source_dir: str,
    run_name: Optional[str],
    env_name: Optional[str],
    skip_failed_runs: bool,
) -> dict:
    """Gather Tensorboard directories from a `parallel_ex` run.

    The directories are copied to a unique directory in `/tmp/analysis_tb/` under
    subdirectories matching the Tensorboard events' Ray Tune trial names.

    Undocumented arguments are the same as in `analyze_imitation()`.

    Args:
      source_dir: A local_dir for Ray. For example, `~/ray_results/`.

    Returns:
      A dict with two keys. "gather_dir" (str) is a path to a /tmp/
      directory containing all the TensorBoard runs filtered from `source_dir`.
      "n_tb_dirs" (int) is the number of TensorBoard directories that were
      filtered.
    """
    sacred_dicts = _get_sacred_dicts(source_dir, run_name, env_name,
                                     skip_failed_runs)
    os.makedirs("/tmp/analysis_tb", exist_ok=True)
    tmp_dir = tempfile.mkdtemp(dir="/tmp/analysis_tb/")

    tb_dirs_count = 0
    for sd in sacred_dicts:
        # Expecting a path like "~/ray_results/{run_name}/sacred/1".
        # Want to search for all Tensorboard dirs inside
        # "~/ray_results/{run_name}".
        sacred_dir = sd.sacred_dir.rstrip("/")
        run_dir = osp.dirname(osp.dirname(sacred_dir))
        run_name = osp.basename(run_dir)

        # "tb" is TensorBoard directory built by our codebase. "sb_tb" is Stable
        # Baselines TensorBoard directory. There should be at most one of each
        # directory.
        for basename in ["tb", "sb_tb"]:
            tb_src_dirs = tuple(
                sacred_util.filter_subdirs(
                    run_dir, lambda path: osp.basename(path) == basename))
            if tb_src_dirs:
                assert len(
                    tb_src_dirs) == 1, "expect at most one TB dir of each type"
                tb_src_dir = tb_src_dirs[0]

                symlinks_dir = osp.join(tmp_dir, basename)
                os.makedirs(symlinks_dir, exist_ok=True)

                tb_symlink = osp.join(symlinks_dir, run_name)
                os.symlink(tb_src_dir, tb_symlink)
                tb_dirs_count += 1

    tf.logging.info(
        f"Symlinked {tb_dirs_count} TensorBoard dirs to {tmp_dir}.")
    tf.logging.info(
        f"Start Tensorboard with `tensorboard --logdir {tmp_dir}`.")
    return {"n_tb_dirs": tb_dirs_count, "gather_dir": tmp_dir}
Example #2
0
def _get_sacred_dicts(source_dir: str, run_name: str, env_name: str,
                      skip_failed_runs: bool) -> List[sacred_util.SacredDicts]:
    sacred_dirs = sacred_util.filter_subdirs(source_dir)
    sacred_dicts = [
        sacred_util.SacredDicts.load_from_dir(sacred_dir)
        for sacred_dir in sacred_dirs
    ]

    if run_name is not None:
        sacred_dicts = filter(
            lambda sd: get(sd.run, "experiment.name") == run_name,
            sacred_dicts)

    if env_name is not None:
        sacred_dicts = filter(
            lambda sd: get(sd.config, "env_name") == env_name, sacred_dicts)

    if skip_failed_runs:
        sacred_dicts = filter(lambda sd: get(sd.run, "status") != "FAILED",
                              sacred_dicts)

    return list(sacred_dicts)