def main(): tdm_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/", criteria={ 'exp_id': '16', 'algorithm': 'DDPG-TDM', }) mb_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/", criteria={ 'exp_id': '2', 'algorithm': 'Model-Based-Dagger', }) ddpg_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/", criteria={ 'exp_id': '7', 'algorithm': 'DDPG', }) her_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/", criteria={ 'exp_id': '10', 'algorithm': 'HER-Andrychowicz', }) ddpg_sparse_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-gym-pusher3d/", criteria={ 'exp_id': '8', 'algorithm': 'DDPG-Sparse', }) MAX_ITERS = 1000000 plt.figure() base_key = 'Multitask Final L2 distance to goal Mean' for trials, name, key in [ (tdm_trials, 'TDMs', base_key), (ddpg_trials, 'DDPG', base_key), (ddpg_sparse_trials, 'DDPG-Sparse', base_key), (her_trials, 'HER', base_key), (mb_trials, 'Model-Based', base_key), ]: key = key.replace(" ", "_") all_values = [] for trial in trials: try: values_ts = trial.data[key] except: import ipdb ipdb.set_trace() values_ts = sliding_mean(values_ts, window=10) all_values.append(values_ts) min_len = min(map(len, all_values)) costs = np.vstack([values[:min_len] for values in all_values]) costs = costs[:, :min(costs.shape[1], MAX_ITERS)] mean = np.mean(costs, axis=0) std = np.std(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label=name) plt.xlabel("Environment Samples (x1,000)") plt.ylabel("Final Euclidean Distance to Goal Position") plt.legend() plt.savefig('results/iclr2018/gym-pusher-3d.jpg') plt.show()
def main(): tdm_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/", criteria={ 'algorithm': 'DDPG-TDM', 'exp_id': '4', } ) ddpg_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-04-ddpg-ant-max-d-6-post-sweep/", criteria={ 'algorithm': 'DDPG', 'exp_id': '7', } ) ddpg_sparse_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/", criteria={ 'algorithm': 'DDPG-Sparse', 'exp_id': '0', } ) her_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/", criteria={ 'algorithm': 'HER-Andrychowicz', 'exp_id': '13', } ) mb_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/01-03-final-ant-max-distance-6/", criteria={ 'algorithm': 'Model-Based-Dagger', 'exp_id': '2', } ) MAX_ITERS = 1000000 plt.figure() key = 'Final Distance to goal Mean' # key = 'Multitask Final L2 distance to goal Mean' for trials, name in [ (tdm_trials, 'TDM'), (ddpg_trials, 'DDPG'), (her_trials, 'HER'), (ddpg_sparse_trials, 'DDPG-Sparse'), (mb_trials, 'Model-Based'), ]: key = key.replace(" ", "_") all_values = [] for trial in trials: try: values_ts = trial.data[key] except: import ipdb; ipdb.set_trace() values_ts = sliding_mean(values_ts, window=10) all_values.append(values_ts) # if len(values_ts) > 450: # all_values.append(values_ts) min_len = min(map(len, all_values)) max_len = max(map(len, all_values)) all_values = [ np.pad(values, (0, max_len - len(values)), 'constant', constant_values=np.nan) for values in all_values ] costs = np.vstack([ # values[:min_len] values for values in all_values ]) costs = costs[:, :min(costs.shape[1], MAX_ITERS)] mean = np.nanmean(costs, axis=0) std = np.nanstd(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label=name) plt.xlabel("Environment Samples (x1,000)") plt.ylabel("Final Distance to Goal Position") plt.legend() plt.savefig('results/iclr2018/ant-max-distance-6.jpg') plt.show()
import os from railrl.misc.data_processing import get_trials from subprocess import call base_dir = '/home/vitchyr/git/railrl/data/papers/nips2018/for-ashvin' \ '/reacher-main-results-ours' trials = get_trials( '/home/vitchyr/git/railrl/data/papers/nips2018/reacher-abalation-resample-strategy/', criteria={ 'replay_kwargs.fraction_resampled_goals_are_env_goals': 0.5, 'replay_kwargs.fraction_goals_are_rollout_goals': 0.2, 'algo_kwargs.num_updates_per_env_step': 5, } ) output_dir = base_dir os.makedirs(output_dir, exist_ok=True) print("Making dir", output_dir) for trial in trials: dir = trial[2] print("cp -r {} {}".format(dir, output_dir)) call(["cp", "-r", dir, output_dir])
import matplotlib.pyplot as plt from railrl.misc.data_processing import get_trials from railrl.visualization.plot_util import plot_trials, padded_ma_filter plt.style.use("ggplot") vae_trials = get_trials( # '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-reach-vae-rl-log-prob-rewards-2', '/home/vitchyr/git/railrl/data/doodads3/05-14-paper-sawyer-reach-vae-rl-lprob-rewards-min-var-after-fact/', criteria={ 'replay_kwargs.fraction_resampled_goals_are_env_goals': 0.5, 'replay_kwargs.fraction_goals_are_rollout_goals': 0.2, 'reward_params.min_variance': 1, 'vae_wrapped_env_kwargs.sample_from_true_prior': False, }) state_her_td3 = get_trials( '/home/vitchyr/git/railrl/data/doodads3/05-13-full-state-sawyer-reach-2/', criteria={ 'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': 0.5, 'replay_buffer_kwargs.fraction_goals_are_rollout_goals': 0.2, 'exploration_type': 'ou', }) state_tdm_ddpg = get_trials( '/home/vitchyr/git/railrl/data/doodads3/05-14-tdm-ddpg-reach-sweep-2/', criteria={ 'env_class.$class': 'railrl.envs.mujoco.sawyer_gripper_env.SawyerXYEnv', 'algo_kwargs.base_kwargs.num_updates_per_env_step': 10, 'algo_kwargs.tdm_kwargs.max_tau': 5, },
def main(): her_andry_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/12-24-her-andrychowicz-pusher-rebutal/", criteria={ 'exp_id': '11', }, ) ddpg_path = "/mnt/data-backup-12-02-2017/doodads3/10-25-ddpg-pusher-again-baseline-with-reward-bonus/" ddpg_criteria = { 'algo_params.num_updates_per_env_step': 5, 'algo_params.scale_reward': 1, 'algo_params.tau': 0.01, } mb_path = "/mnt/data-backup-12-02-2017/doodads3/10-25-abhishek-mb-baseline-pusher-again-shaped/" mb_criteria = None our_path = "/mnt/data-backup-12-02-2017/doodads3/11-02-get-results-handxyxy-small-sweep" our_criteria = { 'algo_params.num_updates_per_env_step': 5, 'epoch_discount_schedule_params.value': 5, 'algo_params.tau': 0.001, } ddpg_trials = get_trials(ddpg_path, criteria=ddpg_criteria) mb_trials = get_trials(mb_path, criteria=mb_criteria) our_trials = get_trials(our_path, criteria=our_criteria) ddpg_sparse_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-sparse-sweep-4/", criteria={ 'env_class.$class': 'railrl.envs.multitask.pusher2d.CylinderXYPusher2DEnv', 'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': 1, } ) MAX_ITERS = 100 base_key = 'Final_Euclidean_distance_to_goal_Mean' plt.figure() for trials, name, key in [ (our_trials, 'TDM', 'test_'+base_key), (ddpg_trials, 'DDPG', base_key), (her_andry_trials, 'HER', "Final Distance object to goal Mean"), (ddpg_sparse_trials, 'DDPG-Sparse', "Final Distance object to goal Mean"), (mb_trials, 'Model Based', base_key), ]: key = key.replace(" ", "_") all_values = [] min_len = np.inf for trial in trials: values_ts = trial.data[key] min_len = min(min_len, len(values_ts)) all_values.append(values_ts) try: costs = np.vstack([ values[:min_len] for values in all_values ]) except ValueError as e: import ipdb; ipdb.set_trace() costs = costs[:, :min(costs.shape[1], MAX_ITERS)] if name == 'HER': costs = sliding_mean(costs, 20) mean = np.mean(costs, axis=0) std = np.std(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label=name) # plt.xscale('log') plt.xlabel("Environment Samples (x1000)") plt.ylabel("Final Distance to Goal Position") plt.legend() plt.savefig('results/iclr2018/pusher.jpg') plt.show()
import matplotlib.pyplot as plt from railrl.misc.data_processing import get_trials from railrl.visualization.plot_util import plot_trials plt.style.use("ggplot") state_trials = get_trials( '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy/', criteria={ 'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': 0.5, 'replay_buffer_kwargs.fraction_goals_are_rollout_goals': 0.2, 'exploration_type': 'ou' }) td3_trials = get_trials( '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy/', criteria={ 'replay_buffer_kwargs.fraction_goals_are_rollout_goals': 1.0, 'exploration_type': 'ou' }) my_trials = get_trials( '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy-vae-rl', criteria={ 'replay_kwargs.fraction_resampled_goals_are_env_goals': 0.5, 'replay_kwargs.fraction_goals_are_rollout_goals': 0.2, }) vae_td3_trials = get_trials( '/home/vitchyr/git/railrl/data/doodads3/05-12-sawyer-push-and-reach-easy-vae-rl', criteria={ 'replay_kwargs.fraction_goals_are_rollout_goals': 1., })
from railrl.misc.data_processing import get_trials import matplotlib.pyplot as plt import numpy as np her_andry_trials = get_trials( '/home/vitchyr/git/railrl/data/doodads3/12-24-her-andrychowicz-reacher-rebutal/', criteria={ 'exp_id': '7', }, ) mb_path = "/home/vitchyr/git/railrl/data/doodads3/12-24-model-based-reacher-multitask-fixed-2/" ddpg_indicator_trials = get_trials( "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-sparse-sweep-4/", criteria={ 'env_class.$class': "railrl.envs.multitask.reacher_7dof.Reacher7DofXyzGoalState", 'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': 1, }, ) tdm_trials = get_trials( "/mnt/data-backup-12-02-2017/doodads3/10-27-sdql-reacher-get-long-results", criteria={ 'epoch_discount_schedule_params.value': 15, 'eval_with_oc_policy': False, 'algo_params.num_updates_per_env_step': 25, }) ddpg_trials = get_trials( "/mnt/data-backup-12-02-2017/doodads3/10-25-ddpg-reacher-pusher-baseline/",
import os import shutil from railrl.misc.data_processing import get_trials from subprocess import call base_dir = '/home/vitchyr/git/railrl/data/papers/nips2018/for-ashvin' \ '/reacher-baseline-oracle' trials = get_trials( '/home/vitchyr/git/railrl/data/doodads3/05-16-paper-reacher-results-full-state-oracle-ish/', criteria={ 'replay_buffer_kwargs.fraction_resampled_goals_are_env_goals': 1.0, 'algo_kwargs.num_updates_per_env_step': 5, 'exploration_type': 'epsilon', }) output_dir = base_dir os.makedirs(output_dir, exist_ok=True) print("Making dir", output_dir) for trial in trials: dir = trial[2] print("cp -r {} {}".format(dir, output_dir)) call(["cp", "-r", dir, output_dir])
vitchyr_base_dir, format_func, configure_matplotlib, ) import matplotlib.pyplot as plt from railrl.visualization import plot_util as plot from railrl.misc import data_processing as dp configure_matplotlib(matplotlib) pusher_dir = vitchyr_base_dir + 'papers/nips2018/pusher-online-vae' online_pusher = dp.get_trials(pusher_dir, criteria={ 'rdim': 250, 'algo_kwargs.should_train_vae.$function': 'railrl.torch.vae.vae_schedules.every_three', }) offline_pusher = dp.get_trials( pusher_dir, criteria={ 'rdim': 250, 'algo_kwargs.should_train_vae.$function': 'railrl.torch.vae.vae_schedules.never_train', }) plt.figure(figsize=(6, 5)) plot.plot_trials( OrderedDict([ ("Online", online_pusher),
def get_fanova_info( base_dir, params_to_ignore=('seed', 'exp_id', 'unique_id', 'exp_name'), ylabel='AverageReturn', ): data_and_variants = get_trials(base_dir) experiment_data_list, variants_list = zip(*data_and_variants) ylabel = ylabel.replace(' ', '_') ylabel = ylabel.replace('-', '') if ylabel not in experiment_data_list[0].dtype.names: print("Possible ylabels:") for name in experiment_data_list[0].dtype.names: print(" - {}".format(name)) raise ValueError("Invalid ylabel: {}".format(ylabel)) indices_of_experiments_with_data = [ i for i, exp in enumerate(experiment_data_list) if exp[ylabel].size >= 1 ] if len(indices_of_experiments_with_data) != len(experiment_data_list): print("WARNING: Skipping some experiments. Probably because they only " "have one data point.") valid_experiment_data_list = [ d for i, d in enumerate(experiment_data_list) if i in indices_of_experiments_with_data ] variants_list = [ v for i, v in enumerate(variants_list) if i in indices_of_experiments_with_data ] Y = np.array([ exp[ylabel][-1] if exp[ylabel].size > 1 else np.array( float(exp[ylabel]), dtype=np.double) for exp in valid_experiment_data_list ]) filtered_variants_list = remove_keys_with_nonunique_values( variants_list, params_to_ignore=params_to_ignore) filtered_variants_to_values = get_dict_key_to_values( filtered_variants_list) names = list(filtered_variants_list[0].keys()) X_raw = _extract_features(filtered_variants_list, names) config_space, X, categorical_remapping = ( _get_config_space_and_new_features( X_raw, names, filtered_variants_to_values, )) # Not sure why, but config_space shuffles the order of the hyperparameters new_name_order = [ config_space.get_hyperparameter_by_idx(i) for i in range(len(names)) ] new_order = [names.index(name) for name in new_name_order] X = [X[i] for i in new_order] # X has be [feature_dim X batch_size], but Fanova expects the transpose X = np.array(X, dtype=object).T return FanovaInfo( fANOVA(X, Y, config_space=config_space), config_space, X, Y, categorical_remapping, variants_list, )
from visualization.grill.config import ( output_dir, vitchyr_base_dir, format_func, configure_matplotlib, ) import matplotlib.pyplot as plt from railrl.visualization import plot_util as plot from railrl.misc import data_processing as dp configure_matplotlib(matplotlib) reacher_dir = vitchyr_base_dir + 'papers/nips2018/reacher_online_vae' online_reacher = dp.get_trials( reacher_dir, criteria={ 'algo_kwargs.should_train_vae.$function': 'railrl.torch.vae.vae_schedules.always_train', }) offline_reacher = dp.get_trials( reacher_dir, criteria={ 'algo_kwargs.should_train_vae.$function': 'railrl.torch.vae.vae_schedules.never_train', }) plt.figure(figsize=(6, 5)) plot.plot_trials( OrderedDict([ ("Online", online_reacher), ("Offline", offline_reacher), ]),
def main(): parser = argparse.ArgumentParser() parser.add_argument("expdir", help="experiment dir, e.g., /tmp/experiments") parser.add_argument("--ylabel", default='AverageReturn') args = parser.parse_args() y_label = args.ylabel """ Load data """ trials = get_trials(args.expdir) data = trials[0][0] if y_label not in data.dtype.names: print("Invalid ylabel. Valid ylabels:") for name in sorted(data.dtype.names): print(name) return """ Get the unique parameters """ _, all_variants = zip(*trials) unique_param_to_values = get_unique_param_to_values(all_variants) unique_numeric_param_to_values = { k: unique_param_to_values[k] for k in unique_param_to_values if is_numeric(list(unique_param_to_values[k])[0]) } # TODO(vitchyr): Use bar plot if xlabel is not numeric, rather than just # ignoring it value_to_unique_params = defaultdict(dict) """ Plot results """ num_params = len(unique_numeric_param_to_values) fig, axes = plt.subplots(num_params) if num_params == 1: axes = [axes] for i, x_label in enumerate(unique_numeric_param_to_values): x_value_to_y_values = defaultdict(list) for data, variant in trials: if len(data[y_label]) > 0: print( "WARNING. data is missing this label: {}".format(y_label)) x_value_to_y_values[variant[x_label]].append(data[y_label][-1]) y_means = [] y_stds = [] x_values = [] for x_value, y_values in x_value_to_y_values.items(): x_values.append(x_value) y_means.append(np.mean(y_values)) y_stds.append(np.std(y_values)) value_to_unique_params[np.mean(y_values)][x_label] = x_value x_values, y_means, y_stds = sort_by_first(x_values, y_means, y_stds) axes[i].errorbar(x_values, y_means, yerr=y_stds) axes[i].set_ylabel(y_label) axes[i].set_xlabel(x_label) """ Display information about the best parameters """ value_and_unique_params = sorted(value_to_unique_params.items(), key=lambda v_and_params: -v_and_params[0]) unique_params = list(unique_numeric_param_to_values.keys()) default_params = {k: variant[k] for k in variant if k not in unique_params} print("Default Param", default_params) print("Top 3 params") for value, params in value_and_unique_params[:3]: for k, v in params.items(): print("\t{}: {}".format(k, v)) print("Value", value) plt.show()