def plot_pm_reward( styles: Iterable[str], env_name: str, discount: float, models: Sequence[Tuple[str, str, str]], data_root: str, # Mesh parameters pos_lim: float, pos_density: int, vel_lim: float, act_lim: float, density: int, # Figure parameters ncols: int, cbar_kwargs: Mapping[str, Any], log_dir: str, fmt: str, ) -> xr.DataArray: """Entry-point into script to visualize a reward model for point mass.""" with stylesheets.setup_styles(styles): env = gym.make(env_name) venv = vec_env.DummyVecEnv([lambda: env]) goal = np.array([0.0]) rewards = {} with networks.make_session(): for model_name, reward_type, reward_path in models: reward_path = os.path.join(data_root, reward_path) model = serialize.load_reward(reward_type, reward_path, venv, discount) reward = point_mass_analysis.evaluate_reward_model( env, model, goal=goal, pos_lim=pos_lim, pos_density=pos_density, vel_lim=vel_lim, act_lim=act_lim, density=density, ) rewards[model_name] = reward if len(rewards) == 1: reward = next(iter(rewards.values())) kwargs = {"col_wrap": ncols} else: reward = xr.Dataset(rewards).to_array("model") kwargs = {"row": "Model"} fig = point_mass_analysis.plot_reward(reward, cbar_kwargs=cbar_kwargs, **kwargs) save_path = os.path.join(log_dir, "reward") visualize.save_fig(save_path, fig, fmt=fmt) return reward
def distance_over_time( vals_filtered: ValsFiltered, pretty_algorithms: Mapping[str, common_config.RewardCfg], pretty_models: Mapping[str, common_config.RewardCfg], log_dir: str, styles: Iterable[str], prefix: str = "bootstrap", ) -> None: """ Plots timeseries of distances. Only works with certain configs, like `point_maze_checkpoints`. """ _timeseries_distances_curried = functools.partial( _timeseries_distances, pretty_algorithms=pretty_algorithms, pretty_models=pretty_models) lower = _timeseries_distances_curried(vals_filtered[f"{prefix}_lower"]) mid = _timeseries_distances_curried(vals_filtered[f"{prefix}_middle"]) upper = _timeseries_distances_curried(vals_filtered[f"{prefix}_upper"]) algo_categories = list(mid["Algorithm"].dtype.categories) + ["RL *"] for algorithm in algo_categories: custom_styles = [] if algorithm == "RL *": custom_styles = [style + "-tall-legend" for style in styles] custom_styles = [ style for style in custom_styles if style in stylesheets.STYLES ] with stylesheets.setup_styles(list(styles) + custom_styles): fig = _make_distance_over_time_plot(mid, lower, upper, "Algorithm", algorithm, "Reward") visualize.save_fig( os.path.join(log_dir, "timeseries", f"algorithm_{algorithm}"), fig) for reward in mid["Reward"].dtype.categories: with stylesheets.setup_styles(styles): fig = _make_distance_over_time_plot(mid, lower, upper, "Reward", reward, "Algorithm") visualize.save_fig( os.path.join(log_dir, "timeseries", f"reward_{reward}"), fig)
def save_artifacts(vals: Mapping[str, pd.Series], styles: Iterable[str], log_dir: str, heatmap_kwargs, save_kwargs) -> None: """Plot a figure for each entry in `vals`, and save figures as well as pickled raw values.""" os.makedirs(log_dir, exist_ok=True) logging.info("Saving raw values") with open(os.path.join(log_dir, "vals.pkl"), "wb") as f: pickle.dump(vals, f) logging.info("Plotting figures") with stylesheets.setup_styles(styles): try: figs = multi_heatmaps(vals, **heatmap_kwargs) visualize.save_figs(log_dir, figs.items(), **save_kwargs) finally: for fig in figs: plt.close(fig)
def plot_gridworld_heatmap( normalize: bool, styles: Iterable[str], reward_subset: Optional[Iterable[str]], heatmap_kwargs: Dict[str, Any], kind: str, discount: float, log_dir: str, save_kwargs: Mapping[str, Any], ) -> None: """Entry-point into script to produce divergence heatmaps. Args: normalize: whether to divide by distance from Zero. styles: styles to apply from `evaluating_rewards.analysis.stylesheets`. reward_subset: if specified, subset of keys to plot. discount: discount rate of MDP. log_dir: directory to write figures and other logging to. save_kwargs: passed through to `analysis.save_figs`. """ with stylesheets.setup_styles(styles): rewards = gridworld_rewards.REWARDS if reward_subset is not None: rewards = {k: rewards[k] for k in reward_subset} divergence = compute_divergence(rewards, discount, kind) if normalize: divergence = normalize_dissimilarity(divergence) figs = heatmaps.compact_heatmaps(dissimilarity=divergence, **heatmap_kwargs) try: # Since tick labels are names not emojis for gridworlds, rotate to save space plt.xticks(rotation=45) plt.yticks(rotation=45) visualize.save_figs(log_dir, figs.items(), **save_kwargs) finally: for fig in figs: plt.close(fig)
def _plot_heatmap( fn: Callable[[], Mapping[str, plt.Figure]], vals_path: str, styles: Iterable[str], styles_for_env: Iterable[str], log_dir: str, timestamp: str, save_kwargs: Mapping[str, Any], ) -> None: """Plots a figure for each entry loaded from `vals_path`. Args: vals_path: path to pickle file containing aggregated values. Produced by `evaluating_rewards.scripts.distances.*`. data_root: the root with respect to canonicalize reward configurations. x_reward_cfgs: tuples of reward_type and reward_path for x-axis. y_reward_cfgs: tuples of reward_type and reward_path for y-axis. styles: styles to apply from `evaluating_rewards.analysis.stylesheets`. styles_for_env: extra styles to apply, concatenated with above. log_dir: directory to save data to. timestamp: timestamp + unique identifier, usually a component of `log_dir`. heatmap_kwargs: passed through to `heatmaps.compact_heatmaps`. save_kwargs: passed through to `analysis.save_figs`. """ logging.info("Plotting figures") vals_dir = os.path.dirname(vals_path) plots_sym_dir = os.path.join(vals_dir, "plots") os.makedirs(plots_sym_dir, exist_ok=True) plots_sym_path = os.path.join(plots_sym_dir, timestamp) os.symlink(log_dir, plots_sym_path) styles = list(styles) + list(styles_for_env) with stylesheets.setup_styles(styles): try: figs = fn() visualize.save_figs(log_dir, figs.items(), **save_kwargs) finally: for fig in figs: plt.close(fig)
def plot_gridworld_reward( discount: float, styles: Iterable[str], rewards: Iterable[Tuple[str, str]], ncols: int, log_dir: str, fmt: str, vmin: Optional[float], vmax: Optional[float], ) -> None: """Plots a heatmap of a reward for the gridworld. Args: - state_reward: a dict containing the name of the reward and a 2D array. - potential: a dict containing the name of the potential and a 2D array. - styles: styles defined in `stylesheets` to apply. - log_dir: the directory to save the figure in. - fmt: the format to save the figure in. Returns: The generated figure. """ reward_arrays = {} for pretty_name, reward_key in rewards: cfg = gridworld_rewards.REWARDS[reward_key] rew = gridworld_reward_heatmap.shape( _normalize(cfg["state_reward"]), _normalize(cfg["potential"]), discount ) reward_arrays[pretty_name] = rew with stylesheets.setup_styles(styles): try: fig = gridworld_reward_heatmap.plot_gridworld_rewards( reward_arrays, ncols=ncols, discount=discount, vmin=vmin, vmax=vmax ) visualize.save_fig(os.path.join(log_dir, "fig"), fig, fmt, transparent=False) finally: plt.close(fig)
def compute_vals( x_reward_cfgs: Iterable[cli_common.RewardCfg], y_reward_cfgs: Iterable[cli_common.RewardCfg], normalize: bool, styles: Iterable[str], data_root: str, data_subdir: Optional[str], search: Mapping[str, Any], aggregate_fns: Mapping[str, cli_common.AggregateFn], log_dir: str, save_kwargs: Mapping[str, Any], ) -> Mapping[str, pd.Series]: """Computes values for dissimilarity heatmaps. Args: x_reward_cfgs: tuples of reward_type and reward_path for x-axis (target). y_reward_cfgs: tuples of reward_type and reward_path for y-axis (source). normalize: whether to divide by distance from Zero. Distances should then all be between 0 and 1 (although may exceed it due to optimisation error). styles: styles to apply from `evaluating_rewards.analysis.stylesheets`. data_root: where to load data from. data_subdir: subdirectory to load data from. search: mapping which Sacred configs must match to be included in results. aggregate_fns: Mapping from strings to aggregators to be applied on sequences of floats. log_dir: directory to write figures and other logging to. save_kwargs: passed through to `analysis.save_figs`. Returns: A mapping of keywords to Series. """ # Sacred turns our tuples into lists :(, undo x_reward_cfgs = [ cli_common.canonicalize_reward_cfg(cfg, data_root) for cfg in x_reward_cfgs ] y_reward_cfgs = [ cli_common.canonicalize_reward_cfg(cfg, data_root) for cfg in y_reward_cfgs ] y_reward_cfgs.append(("evaluating_rewards/Zero-v0", "dummy")) data_dir = data_root if data_subdir is not None: data_dir = os.path.join(data_dir, data_subdir) # Workaround tags reserved by Sacred search = dict(search) for k, v in search.items(): if isinstance(v, dict): search[k] = { inner_k.replace("escape/", ""): inner_v for inner_k, inner_v in v.items() } def cfg_filter(cfg): matches_search = all((cfg.get(k) == v for k, v in search.items())) source_cfg = cfg.get("source_reward_type"), cfg.get( "source_reward_path") matches_source = cli_common.canonicalize_reward_cfg( source_cfg, data_root) in y_reward_cfgs target_cfg = cfg.get("target_reward_type"), cfg.get( "target_reward_path") matches_target = cli_common.canonicalize_reward_cfg( target_cfg, data_root) in x_reward_cfgs return matches_search and matches_source and matches_target keys = ( "source_reward_type", "source_reward_path", "target_reward_type", "target_reward_path", "seed", ) stats = results.load_multiple_stats(data_dir, keys, cfg_filter=cfg_filter) res = results.pipeline(stats) loss = res["loss"]["loss"] with stylesheets.setup_styles(styles): figs = {} figs["loss"] = loss_heatmap(loss, res["loss"]["unwrapped_loss"]) figs["affine"] = affine_heatmap(res["affine"]["scales"], res["affine"]["constants"]) visualize.save_figs(log_dir, figs.items(), **save_kwargs) if normalize: loss = heatmaps.normalize_dissimilarity(loss) vals = {} for name, aggregate_fn in aggregate_fns.items(): logger.info(f"Aggregating {name}") aggregated = loss.groupby(list(keys[:-1])).apply(aggregate_fn) vals.update({ f"{name}_{k}": aggregated.loc[(slice(None), slice(None), slice(None), slice(None), k)] for k in aggregated.index.levels[-1] }) return vals