scope and third key the experiment kind, like with config_updates. Values at the leaf are tuples of named configs. The dicts across namespaces are recursively merged using `recursive_dict_merge`. output_fn: Function to call to generate saved output. """ # Merge named_configs. We have a faux top-level layer to workaround Sacred being unable to # have named configs build on top of each others definitions in a particular order. named_configs = [copy.deepcopy(cfg) for cfg in named_configs.values()] named_configs = functools.reduce(script_utils.recursive_dict_merge, named_configs) _input_validation(named_configs=named_configs) # pylint:disable=no-value-for-parameter if vals_paths: vals = load_vals(vals_paths) else: vals = compute_vals(named_configs=named_configs) # pylint:disable=no-value-for-parameter with open(os.path.join(log_dir, "vals.pkl"), "wb") as f: pickle.dump(vals, f) # TODO(adam): how to get generator reward? that might be easiest as side-channel. # or separate script, which you could potentially combine here. vals_filtered = filter_values(vals) # pylint:disable=no-value-for-parameter output_fn(vals_filtered) if __name__ == "__main__": script_utils.experiment_main(combined_distances_ex, "combined_distances")
styles: styles to apply from `evaluating_rewards.analysis.stylesheets`. reward_subset: if specified, subset of keys to plot. discount: discount rate of MDP. log_dir: directory to write figures and other logging to. save_kwargs: passed through to `analysis.save_figs`. """ with stylesheets.setup_styles(styles): rewards = gridworld_rewards.REWARDS if reward_subset is not None: rewards = {k: rewards[k] for k in reward_subset} divergence = compute_divergence(rewards, discount, kind) if normalize: divergence = normalize_dissimilarity(divergence) figs = heatmaps.compact_heatmaps(dissimilarity=divergence, **heatmap_kwargs) try: # Since tick labels are names not emojis for gridworlds, rotate to save space plt.xticks(rotation=45) plt.yticks(rotation=45) visualize.save_figs(log_dir, figs.items(), **save_kwargs) finally: for fig in figs: plt.close(fig) if __name__ == "__main__": script_utils.experiment_main(plot_gridworld_heatmap_ex, "plot_gridworld_heatmap")
trajectory_factory_kwargs: arguments to pass to the factory. n_episodes: the number of episodes to compute correlation over. log_dir: directory to save data to. Returns: Nested dictionary of aggregated distance values. """ models, _, sess = common.load_models_create_sess( env_name, discount, itertools.chain(x_reward_cfgs, y_reward_cfgs)) logger.info("Sampling trajectories") with trajectory_factory( **trajectory_factory_kwargs) as trajectory_callable: with sess.as_default(): returns = batch_compute_returns(trajectory_callable, models, discount, n_episodes) logger.info("Saving episode returns") with open(os.path.join(log_dir, "returns.pkl"), "wb") as f: pickle.dump(returns, f) aggregated = correlation_distance( # pylint:disable=no-value-for-parameter returns, x_reward_cfgs, y_reward_cfgs) return aggregated common.make_main(erc_distance_ex, compute_vals) if __name__ == "__main__": script_utils.experiment_main(erc_distance_ex, "erc_distance")
"""Thin wrapper around imitation.scripts.train_adversarial.""" import os from imitation.scripts import train_adversarial from evaluating_rewards import serialize from evaluating_rewards.scripts import script_utils @train_adversarial.train_ex.named_config def point_maze(): """IRL config for PointMaze environment.""" env_name = "imitation/PointMazeLeftVel-v0" rollout_path = os.path.join( serialize.get_output_dir(), "train_experts/ground_truth/20201203_105631_297835/imitation_PointMazeLeftVel-v0", "evaluating_rewards_PointMazeGroundTruthWithCtrl-v0/best/rollouts/final.pkl", ) total_timesteps = 1e6 _ = locals() del _ if __name__ == "__main__": script_utils.add_logging_config(train_adversarial.train_ex, "train_adversarial") script_utils.experiment_main(train_adversarial.train_ex, "train_adversarial", sacred_symlink=False)
# Copyright 2019 DeepMind Technologies Limited # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Thin wrapper around imitation.scripts.eval_policy.""" from imitation.scripts import eval_policy from evaluating_rewards.scripts import script_utils if __name__ == "__main__": script_utils.add_logging_config(eval_policy.eval_policy_ex, "eval_policy") script_utils.experiment_main(eval_policy.eval_policy_ex, "eval_policy", sacred_symlink=False)
# Specify in terms of total_timesteps so longer trajectory_length # does not give model more data. total_comparisons = total_timesteps // trajectory_length return trainer.fit_synthetic( venv, policy=policy, target=target, trajectory_length=trajectory_length, total_comparisons=total_comparisons, callback=callback, ) return regress_utils.regress( seed=_seed, env_name=env_name, discount=discount, make_source=make_source, source_init=True, make_trainer=make_trainer, do_training=do_training, target_reward_type=target_reward_type, target_reward_path=target_reward_path, log_dir=log_dir, checkpoint_interval=checkpoint_interval, ) if __name__ == "__main__": script_utils.experiment_main(train_preferences_ex, "train_preferences")
model_reward_type) def make_trainer(model, model_scope, target): del model_scope return comparisons.RegressModel(model, target, learning_rate=learning_rate) def do_training(target, trainer): del target return trainer.fit(dataset_generator, total_timesteps=total_timesteps, batch_size=batch_size) return regress_utils.regress( seed=_seed, env_name=env_name, discount=discount, make_source=make_source, source_init=True, make_trainer=make_trainer, do_training=do_training, target_reward_type=target_reward_type, target_reward_path=target_reward_path, log_dir=log_dir, ) if __name__ == "__main__": script_utils.experiment_main(train_regress_ex, "train_regress")
Args: - state_reward: a dict containing the name of the reward and a 2D array. - potential: a dict containing the name of the potential and a 2D array. - styles: styles defined in `stylesheets` to apply. - log_dir: the directory to save the figure in. - fmt: the format to save the figure in. Returns: The generated figure. """ reward_arrays = {} for pretty_name, reward_key in rewards: cfg = gridworld_rewards.REWARDS[reward_key] rew = gridworld_reward_heatmap.shape( _normalize(cfg["state_reward"]), _normalize(cfg["potential"]), discount ) reward_arrays[pretty_name] = rew with stylesheets.setup_styles(styles): try: fig = gridworld_reward_heatmap.plot_gridworld_rewards( reward_arrays, ncols=ncols, discount=discount, vmin=vmin, vmax=vmax ) visualize.save_fig(os.path.join(log_dir, "fig"), fig, fmt, transparent=False) finally: plt.close(fig) if __name__ == "__main__": script_utils.experiment_main(plot_gridworld_reward_ex, "plot_gridworld_reward")
# Copyright 2019 DeepMind Technologies Limited # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Thin wrapper around imitation.scripts.expert_demos.""" from imitation.scripts import expert_demos from evaluating_rewards.scripts import script_utils if __name__ == "__main__": script_utils.add_logging_config(expert_demos.expert_demos_ex, "expert_demos") script_utils.experiment_main(expert_demos.expert_demos_ex, "expert_demos", sacred_symlink=False)
loss = res["loss"]["loss"] with stylesheets.setup_styles(styles): figs = {} figs["loss"] = loss_heatmap(loss, res["loss"]["unwrapped_loss"]) figs["affine"] = affine_heatmap(res["affine"]["scales"], res["affine"]["constants"]) visualize.save_figs(log_dir, figs.items(), **save_kwargs) if normalize: loss = heatmaps.normalize_dissimilarity(loss) vals = {} for name, aggregate_fn in aggregate_fns.items(): logger.info(f"Aggregating {name}") aggregated = loss.groupby(list(keys[:-1])).apply(aggregate_fn) vals.update({ f"{name}_{k}": aggregated.loc[(slice(None), slice(None), slice(None), slice(None), k)] for k in aggregated.index.levels[-1] }) return vals cli_common.make_main(plot_npec_heatmap_ex, compute_vals) if __name__ == "__main__": script_utils.experiment_main(plot_npec_heatmap_ex, "plot_npec_heatmap")
ray.shutdown() stats = {} for k, v in zip(keys, values): stats.setdefault(k, []).append(v) logger.info("Saving raw statistics") with open(os.path.join(log_dir, "stats.pkl"), "wb") as f: pickle.dump(stats, f) dissimilarities = {k: [v["loss"][-1]["singleton"] for v in s] for k, s in stats.items()} if normalize: mean = {k: np.mean(v) for k, v in dissimilarities.items()} normalized = {} for k, v in dissimilarities.items(): target, source = k if source == ZERO_CFG: continue zero_mean = mean[(target, ZERO_CFG)] normalized[k] = [x / zero_mean for x in v] dissimilarities = normalized return common.aggregate_seeds(aggregate_fns, dissimilarities) common.make_main(npec_distance_ex, compute_vals) if __name__ == "__main__": script_utils.experiment_main(npec_distance_ex, "npec_distance")
def make_trainer(model, model_scope, target): del model_scope return comparison_class(model, target, **comparison_kwargs) def do_training(target, trainer): del target return trainer.fit( dataset_generator, total_timesteps=total_timesteps, batch_size=batch_size, affine_size=affine_size, **fit_kwargs, ) return regress_utils.regress( seed=_seed, env_name=env_name, discount=discount, make_source=make_source, source_init=False, make_trainer=make_trainer, do_training=do_training, target_reward_type=target_reward_type, target_reward_path=target_reward_path, log_dir=log_dir, ) if __name__ == "__main__": script_utils.experiment_main(model_comparison_ex, "model_comparison")
"""Entry-point into script to train expert policies specified by config. Args: ray_kwargs: arguments passed to `ray.init`. num_cpus_fudge_factor: factor by which to scale `num_vec` to compute CPU requirements. global_configs: configuration to apply to all environment-reward pairs. configs: configuration for each environment-reward pair. log_dir: the root directory to log experiments to. Returns: Statistics `stats` for all policies, where `stats[(env_name, (reward_type, reward_path))][i]` are the statistics for seed `i` of the given environment and reward pair. """ ray.init(**ray_kwargs) try: stats = rl_common.parallel_training(global_configs, configs, num_cpus_fudge_factor, log_dir) select_best(stats, log_dir) finally: ray.shutdown() print(tabulate_stats(stats)) return stats if __name__ == "__main__": script_utils.experiment_main(experts_ex, "train_experts")
reward = point_mass_analysis.evaluate_reward_model( env, model, goal=goal, pos_lim=pos_lim, pos_density=pos_density, vel_lim=vel_lim, act_lim=act_lim, density=density, ) rewards[model_name] = reward if len(rewards) == 1: reward = next(iter(rewards.values())) kwargs = {"col_wrap": ncols} else: reward = xr.Dataset(rewards).to_array("model") kwargs = {"row": "Model"} fig = point_mass_analysis.plot_reward(reward, cbar_kwargs=cbar_kwargs, **kwargs) save_path = os.path.join(log_dir, "reward") visualize.save_fig(save_path, fig, fmt=fmt) return reward if __name__ == "__main__": script_utils.experiment_main(plot_pm_reward_ex, "plot_pm_reward")