예제 #1
0
def plot_pm_reward(
    styles: Iterable[str],
    env_name: str,
    discount: float,
    models: Sequence[Tuple[str, str, str]],
    data_root: str,
    # Mesh parameters
    pos_lim: float,
    pos_density: int,
    vel_lim: float,
    act_lim: float,
    density: int,
    # Figure parameters
    ncols: int,
    cbar_kwargs: Mapping[str, Any],
    log_dir: str,
    fmt: str,
) -> xr.DataArray:
    """Entry-point into script to visualize a reward model for point mass."""
    with stylesheets.setup_styles(styles):
        env = gym.make(env_name)
        venv = vec_env.DummyVecEnv([lambda: env])
        goal = np.array([0.0])

        rewards = {}
        with networks.make_session():
            for model_name, reward_type, reward_path in models:
                reward_path = os.path.join(data_root, reward_path)
                model = serialize.load_reward(reward_type, reward_path, venv,
                                              discount)
                reward = point_mass_analysis.evaluate_reward_model(
                    env,
                    model,
                    goal=goal,
                    pos_lim=pos_lim,
                    pos_density=pos_density,
                    vel_lim=vel_lim,
                    act_lim=act_lim,
                    density=density,
                )
                rewards[model_name] = reward

        if len(rewards) == 1:
            reward = next(iter(rewards.values()))
            kwargs = {"col_wrap": ncols}
        else:
            reward = xr.Dataset(rewards).to_array("model")
            kwargs = {"row": "Model"}

        fig = point_mass_analysis.plot_reward(reward,
                                              cbar_kwargs=cbar_kwargs,
                                              **kwargs)
        save_path = os.path.join(log_dir, "reward")
        visualize.save_fig(save_path, fig, fmt=fmt)

        return reward
예제 #2
0
def distance_over_time(
    vals_filtered: ValsFiltered,
    pretty_algorithms: Mapping[str, common_config.RewardCfg],
    pretty_models: Mapping[str, common_config.RewardCfg],
    log_dir: str,
    styles: Iterable[str],
    prefix: str = "bootstrap",
) -> None:
    """
    Plots timeseries of distances.

    Only works with certain configs, like `point_maze_checkpoints`.
    """
    _timeseries_distances_curried = functools.partial(
        _timeseries_distances,
        pretty_algorithms=pretty_algorithms,
        pretty_models=pretty_models)
    lower = _timeseries_distances_curried(vals_filtered[f"{prefix}_lower"])
    mid = _timeseries_distances_curried(vals_filtered[f"{prefix}_middle"])
    upper = _timeseries_distances_curried(vals_filtered[f"{prefix}_upper"])

    algo_categories = list(mid["Algorithm"].dtype.categories) + ["RL *"]
    for algorithm in algo_categories:
        custom_styles = []
        if algorithm == "RL *":
            custom_styles = [style + "-tall-legend" for style in styles]
            custom_styles = [
                style for style in custom_styles if style in stylesheets.STYLES
            ]

        with stylesheets.setup_styles(list(styles) + custom_styles):
            fig = _make_distance_over_time_plot(mid, lower, upper, "Algorithm",
                                                algorithm, "Reward")
            visualize.save_fig(
                os.path.join(log_dir, "timeseries", f"algorithm_{algorithm}"),
                fig)

    for reward in mid["Reward"].dtype.categories:
        with stylesheets.setup_styles(styles):
            fig = _make_distance_over_time_plot(mid, lower, upper, "Reward",
                                                reward, "Algorithm")
            visualize.save_fig(
                os.path.join(log_dir, "timeseries", f"reward_{reward}"), fig)
예제 #3
0
def save_artifacts(vals: Mapping[str, pd.Series], styles: Iterable[str],
                   log_dir: str, heatmap_kwargs, save_kwargs) -> None:
    """Plot a figure for each entry in `vals`, and save figures as well as pickled raw values."""
    os.makedirs(log_dir, exist_ok=True)

    logging.info("Saving raw values")
    with open(os.path.join(log_dir, "vals.pkl"), "wb") as f:
        pickle.dump(vals, f)

    logging.info("Plotting figures")
    with stylesheets.setup_styles(styles):
        try:
            figs = multi_heatmaps(vals, **heatmap_kwargs)
            visualize.save_figs(log_dir, figs.items(), **save_kwargs)
        finally:
            for fig in figs:
                plt.close(fig)
예제 #4
0
def plot_gridworld_heatmap(
    normalize: bool,
    styles: Iterable[str],
    reward_subset: Optional[Iterable[str]],
    heatmap_kwargs: Dict[str, Any],
    kind: str,
    discount: float,
    log_dir: str,
    save_kwargs: Mapping[str, Any],
) -> None:
    """Entry-point into script to produce divergence heatmaps.

    Args:
        normalize: whether to divide by distance from Zero.
        styles: styles to apply from `evaluating_rewards.analysis.stylesheets`.
        reward_subset: if specified, subset of keys to plot.
        discount: discount rate of MDP.
        log_dir: directory to write figures and other logging to.
        save_kwargs: passed through to `analysis.save_figs`.
    """
    with stylesheets.setup_styles(styles):
        rewards = gridworld_rewards.REWARDS
        if reward_subset is not None:
            rewards = {k: rewards[k] for k in reward_subset}
            divergence = compute_divergence(rewards, discount, kind)

        if normalize:
            divergence = normalize_dissimilarity(divergence)

        figs = heatmaps.compact_heatmaps(dissimilarity=divergence,
                                         **heatmap_kwargs)
        try:
            # Since tick labels are names not emojis for gridworlds, rotate to save space
            plt.xticks(rotation=45)
            plt.yticks(rotation=45)
            visualize.save_figs(log_dir, figs.items(), **save_kwargs)
        finally:
            for fig in figs:
                plt.close(fig)
예제 #5
0
def _plot_heatmap(
    fn: Callable[[], Mapping[str, plt.Figure]],
    vals_path: str,
    styles: Iterable[str],
    styles_for_env: Iterable[str],
    log_dir: str,
    timestamp: str,
    save_kwargs: Mapping[str, Any],
) -> None:
    """Plots a figure for each entry loaded from `vals_path`.

    Args:
        vals_path: path to pickle file containing aggregated values.
            Produced by `evaluating_rewards.scripts.distances.*`.
        data_root: the root with respect to canonicalize reward configurations.
        x_reward_cfgs: tuples of reward_type and reward_path for x-axis.
        y_reward_cfgs: tuples of reward_type and reward_path for y-axis.
        styles: styles to apply from `evaluating_rewards.analysis.stylesheets`.
        styles_for_env: extra styles to apply, concatenated with above.
        log_dir: directory to save data to.
        timestamp: timestamp + unique identifier, usually a component of `log_dir`.
        heatmap_kwargs: passed through to `heatmaps.compact_heatmaps`.
        save_kwargs: passed through to `analysis.save_figs`.
    """
    logging.info("Plotting figures")
    vals_dir = os.path.dirname(vals_path)
    plots_sym_dir = os.path.join(vals_dir, "plots")
    os.makedirs(plots_sym_dir, exist_ok=True)
    plots_sym_path = os.path.join(plots_sym_dir, timestamp)
    os.symlink(log_dir, plots_sym_path)

    styles = list(styles) + list(styles_for_env)
    with stylesheets.setup_styles(styles):
        try:
            figs = fn()
            visualize.save_figs(log_dir, figs.items(), **save_kwargs)
        finally:
            for fig in figs:
                plt.close(fig)
예제 #6
0
def plot_gridworld_reward(
    discount: float,
    styles: Iterable[str],
    rewards: Iterable[Tuple[str, str]],
    ncols: int,
    log_dir: str,
    fmt: str,
    vmin: Optional[float],
    vmax: Optional[float],
) -> None:
    """Plots a heatmap of a reward for the gridworld.

    Args:
        - state_reward: a dict containing the name of the reward and a 2D array.
        - potential: a dict containing the name of the potential and a 2D array.
        - styles: styles defined in `stylesheets` to apply.
        - log_dir: the directory to save the figure in.
        - fmt: the format to save the figure in.

    Returns:
        The generated figure.
    """
    reward_arrays = {}
    for pretty_name, reward_key in rewards:
        cfg = gridworld_rewards.REWARDS[reward_key]
        rew = gridworld_reward_heatmap.shape(
            _normalize(cfg["state_reward"]), _normalize(cfg["potential"]), discount
        )
        reward_arrays[pretty_name] = rew

    with stylesheets.setup_styles(styles):
        try:
            fig = gridworld_reward_heatmap.plot_gridworld_rewards(
                reward_arrays, ncols=ncols, discount=discount, vmin=vmin, vmax=vmax
            )
            visualize.save_fig(os.path.join(log_dir, "fig"), fig, fmt, transparent=False)
        finally:
            plt.close(fig)
예제 #7
0
def compute_vals(
    x_reward_cfgs: Iterable[cli_common.RewardCfg],
    y_reward_cfgs: Iterable[cli_common.RewardCfg],
    normalize: bool,
    styles: Iterable[str],
    data_root: str,
    data_subdir: Optional[str],
    search: Mapping[str, Any],
    aggregate_fns: Mapping[str, cli_common.AggregateFn],
    log_dir: str,
    save_kwargs: Mapping[str, Any],
) -> Mapping[str, pd.Series]:
    """Computes values for dissimilarity heatmaps.

    Args:
        x_reward_cfgs: tuples of reward_type and reward_path for x-axis (target).
        y_reward_cfgs: tuples of reward_type and reward_path for y-axis (source).
        normalize: whether to divide by distance from Zero. Distances should then all be
            between 0 and 1 (although may exceed it due to optimisation error).
        styles: styles to apply from `evaluating_rewards.analysis.stylesheets`.
        data_root: where to load data from.
        data_subdir: subdirectory to load data from.
        search: mapping which Sacred configs must match to be included in results.
        aggregate_fns: Mapping from strings to aggregators to be applied on sequences of floats.
        log_dir: directory to write figures and other logging to.
        save_kwargs: passed through to `analysis.save_figs`.

    Returns:
        A mapping of keywords to Series.
    """
    # Sacred turns our tuples into lists :(, undo
    x_reward_cfgs = [
        cli_common.canonicalize_reward_cfg(cfg, data_root)
        for cfg in x_reward_cfgs
    ]
    y_reward_cfgs = [
        cli_common.canonicalize_reward_cfg(cfg, data_root)
        for cfg in y_reward_cfgs
    ]
    y_reward_cfgs.append(("evaluating_rewards/Zero-v0", "dummy"))

    data_dir = data_root
    if data_subdir is not None:
        data_dir = os.path.join(data_dir, data_subdir)
    # Workaround tags reserved by Sacred
    search = dict(search)
    for k, v in search.items():
        if isinstance(v, dict):
            search[k] = {
                inner_k.replace("escape/", ""): inner_v
                for inner_k, inner_v in v.items()
            }

    def cfg_filter(cfg):
        matches_search = all((cfg.get(k) == v for k, v in search.items()))
        source_cfg = cfg.get("source_reward_type"), cfg.get(
            "source_reward_path")
        matches_source = cli_common.canonicalize_reward_cfg(
            source_cfg, data_root) in y_reward_cfgs
        target_cfg = cfg.get("target_reward_type"), cfg.get(
            "target_reward_path")
        matches_target = cli_common.canonicalize_reward_cfg(
            target_cfg, data_root) in x_reward_cfgs
        return matches_search and matches_source and matches_target

    keys = (
        "source_reward_type",
        "source_reward_path",
        "target_reward_type",
        "target_reward_path",
        "seed",
    )
    stats = results.load_multiple_stats(data_dir, keys, cfg_filter=cfg_filter)
    res = results.pipeline(stats)
    loss = res["loss"]["loss"]

    with stylesheets.setup_styles(styles):
        figs = {}
        figs["loss"] = loss_heatmap(loss, res["loss"]["unwrapped_loss"])
        figs["affine"] = affine_heatmap(res["affine"]["scales"],
                                        res["affine"]["constants"])
        visualize.save_figs(log_dir, figs.items(), **save_kwargs)

    if normalize:
        loss = heatmaps.normalize_dissimilarity(loss)

    vals = {}
    for name, aggregate_fn in aggregate_fns.items():
        logger.info(f"Aggregating {name}")

        aggregated = loss.groupby(list(keys[:-1])).apply(aggregate_fn)
        vals.update({
            f"{name}_{k}": aggregated.loc[(slice(None), slice(None),
                                           slice(None), slice(None), k)]
            for k in aggregated.index.levels[-1]
        })

    return vals