def main(): relabel_exp = Experiment( "/home/vitchyr/git/railrl/data/doodads3/01-02-ddpg-tdm-ant-nupo-sweep/", criteria={ 'relabel': True, 'ddpg_tdm_kwargs.base_kwargs.reward_scale': 1, }) no_relabel_exp = Experiment( "/home/vitchyr/git/railrl/data/doodads3/01-02-ddpg-tdm-ant-nupo-sweep/", criteria={ 'relabel': False, 'ddpg_tdm_kwargs.base_kwargs.reward_scale': 1, }) MAX_ITERS = 100 # ax1 = fig.add_subplot(211) # ax2 = fig.add_subplot(212) plot_key = 'Final Distance to goal Mean'.replace(' ', '_') # for ax, exp, name in [ # (ax1, relabel_exp, 'Relabel'), # (ax2, no_relabel_exp, 'No Relabel'), # ]: for exp, name in [ (relabel_exp, 'Relabel'), (no_relabel_exp, 'No Relabel'), ]: fig = plt.figure() for nupo in [1, 5, 10, 20, 30]: trials = exp.get_trials( {'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': nupo}) all_values = [] for trial in trials: try: values_ts = trial.data[plot_key] values_ts = sliding_mean(values_ts, window=10) except: import ipdb ipdb.set_trace() all_values.append(values_ts) min_len = min(map(len, all_values)) costs = np.vstack([values[:min_len] for values in all_values]) costs = costs[:, :min(costs.shape[1], MAX_ITERS)] mean = np.mean(costs, axis=0) std = np.std(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label="{} updates per step".format(nupo)) # plt.title(name) plt.xlabel("Environment Samples (x1,000)") plt.ylabel("Final Distance to Goal Position") plt.legend() # print(fig.get_size_inches()) # fig.set_size_inches(6.4*1, 4.8*2) plt.savefig('results/iclr2018/ant-nupo-sweep-{}.jpg'.format( name.lower().replace(' ', '-')), # transparent=True, bbox_inches='tight', pad_inches=0, ) plt.show()
def main(): tdm_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/01-02-ddpg-tdm-ant-nupo-sweep/", criteria={ 'exp_id': '27', # 23 for NUPO = 20, 27 for NUPO = 10 } ).get_trials() mb_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/01-02-ant-distance-3-to-5/", criteria={ 'exp_id': '0', 'algorithm': 'Model-Based-Dagger', } ).get_trials() ddpg_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/01-02-ant-distance-3-to-5/", criteria={ 'exp_id': '3', 'algorithm': 'DDPG', } ).get_trials() MAX_ITERS = 1000000 plt.figure() base_key = 'Final Distance to goal Mean' for trials, name, key in [ (tdm_trials, 'TDMs', base_key), (ddpg_trials, 'DDPG', base_key), (mb_trials, 'Model-Based', base_key), ]: key = key.replace(" ", "_") all_values = [] for trial in trials: try: values_ts = trial.data[key] except: import ipdb; ipdb.set_trace() values_ts = sliding_mean(values_ts, window=10) all_values.append(values_ts) min_len = min(map(len, all_values)) costs = np.vstack([ values[:min_len] for values in all_values ]) costs = costs[:, :min(costs.shape[1], MAX_ITERS)] mean = np.mean(costs, axis=0) std = np.std(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label=name) plt.xlabel("Environment Samples (x1,000)") plt.ylabel("Final Euclidean Distance to Goal Position") plt.legend() plt.savefig('results/iclr2018/ant-distance-3-to-5.jpg') plt.show()
def main(): tdm_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-25-tdm-ddpg-walker-position-long/" ).get_trials({ 'exp_id': '1', }) trpo_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-25-trpo-walker-position-long/" ).get_trials({ 'exp_id': '4', }) mb_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-25-mb-dagger-walker-position-long-take2/" ).get_trials({ 'exp_id': '2', }) ddpg_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-25-ddpg-walker-position-long-take2/" ).get_trials({ 'exp_id': '1', }) MAX_ITERS = 10001 plt.figure() base_key = 'Final xpos errors Mean' for trials, name, key in [ (tdm_trials, 'TDMs', base_key), (mb_trials, 'Model-Based', base_key), (ddpg_trials, 'DDPG', base_key), (trpo_trials, 'TRPO', base_key), ]: key = key.replace(" ", "_") all_values = [] for trial in trials: try: values_ts = trial.data[key] except: import ipdb; ipdb.set_trace() all_values.append(values_ts) min_len = min(map(len, all_values)) costs = np.vstack([ values[:min_len] for values in all_values ]) costs = costs[:, :min(costs.shape[1], MAX_ITERS)] mean = np.mean(costs, axis=0) std = np.std(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label=name) plt.xlabel("Environment Samples (x10,000)") plt.ylabel("Final Distance to Goal Position") plt.legend() plt.savefig('results/iclr2018/walker.jpg') plt.show()
def main(): ddpg_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-29-find-pusher3d-mismatch-2", criteria={ 'env_kwargs.reward_coefs': [1, 0, 0], 'exp_id': '1', }).get_trials() mb_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-30-mb-dagger-pusher3d-fixed-2/", ).get_trials() MAX_ITERS = 10000 plt.figure() base_key = 'Final Distance to goal Mean' for trials, name, key in [ (ddpg_trials, 'DDPG', base_key), (mb_trials, 'Model-Based', base_key), ]: key = key.replace(" ", "_") all_values = [] for trial in trials: try: values_ts = trial.data[key] except: import ipdb ipdb.set_trace() all_values.append(values_ts) min_len = min(map(len, all_values)) costs = np.vstack([values[:min_len] for values in all_values]) costs = costs[:, :min(costs.shape[1], MAX_ITERS)] mean = np.mean(costs, axis=0) std = np.std(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label=name) plt.xlabel("Environment Samples (x1,000)") plt.ylabel("Final Euclidean Distance to Goal Position") plt.legend() plt.savefig('results/iclr2018/pusher3d.jpg') plt.show()
from railrl.misc.data_processing import Experiment import matplotlib.pyplot as plt import numpy as np from railrl.misc.visualization_util import sliding_mean mb_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-30-cheetah-xpos-increase-distance/", criteria={ 'algorithm': 'Model-Based-Dagger', 'env_kwargs.max_distance': 40, }, ).get_trials() ddpg_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-30-cheetah-xpos-increase-distance/", criteria={ 'algorithm': 'DDPG', 'env_kwargs.max_distance': 40, 'exp_id': '10', }, ).get_trials() MAX_ITERS = 10000 plt.figure() base_key = 'Final_Distance_to_goal_Mean' for trials, name, key in [ (mb_trials, 'Model Based', base_key), (ddpg_trials, 'DDPG', base_key), ]: all_values = [] min_len = np.inf
def main(): # matplotlib.rcParams.update({'font.size': 39}) # base_dir = "/home/vitchyr/git/rllab-rail/railrl/data/papers/icml2017/watermaze/ablation" base_dir = "/home/vitchyr/git/rllab-rail/railrl/data/papers/icml2017/watermaze/ablation2" experiment = Experiment(base_dir) version_to_list_of_final_scores = OrderedDict() subtraj_lengths = [1, 5, 10, 15, 20, 25] for do_not_load_memories, ignore_memories, name in [ [False, False, 'Our Method'], [True, False, 'No Memory State Loaded'], [False, True, 'No Memory State for Critic'], [True, True, 'No Memory State (Truncated BPTT)'], ]: version_to_list_of_final_scores[name] = [] for subtraj_length in subtraj_lengths: trials = experiment.get_trials({ 'algo_params.do_not_load_initial_memories': do_not_load_memories, 'qf_params.ignore_memory': ignore_memories, 'algo_params.subtraj_length': subtraj_length, }) final_scores = np.array([t.data['AverageReturn'][-1] for t in trials]) version_to_list_of_final_scores[name].append(final_scores) cmap = matplotlib.cm.get_cmap('plasma') index_to_color_and_pattern = { 0: (cmap(0), ''), 1: (cmap(0.33), '/'), 2: (cmap(0.66), '.'), 3: (cmap(1.), 'x'), } x_axis = subtraj_lengths N = len(x_axis) ind = np.arange(N) width = 0.2 fig, ax = plt.subplots(figsize=(32.0, 20.0)) legend_rects = [] legend_names = [] for i, (version_name, final_scores) in enumerate( version_to_list_of_final_scores.items() ): color, pattern = index_to_color_and_pattern[i] y_means = [np.mean(score) for score in final_scores] y_stds = [np.std(score) for score in final_scores] assert len(y_means) == len(y_stds) == len(x_axis) rect = ax.bar( ind + width * i, y_means, width, color=color, yerr=y_stds, hatch=pattern, ecolor='red', capsize=10, linewidth=10, ) legend_rects.append(rect[0]) legend_names.append(version_name) fontsize = 50 ax.set_xticks(ind + width / 2) ax.set_xticklabels(x_axis) ax.legend(legend_rects, legend_names, prop={'size': 30}, bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) plt.xlabel("Subtrajectory Length", fontsize=fontsize) plt.ylabel("Average Return", fontsize=fontsize) plt.xticks(fontsize=fontsize) plt.yticks(fontsize=fontsize) ltext = plt.gca().get_legend().get_texts() plt.setp(ltext[0], fontsize=fontsize) plt.savefig("test.png", bbox_inches='tight') plt.show()
from railrl.misc.data_processing import Experiment, get_trials import matplotlib.pyplot as plt import numpy as np path = "/mnt/data-backup-12-02-2017/doodads3/10-21-sdql-compare-vectorized-delta-normal-big-sweep/" exp = Experiment(path) base_criteria = { 'env_class.$class': "railrl.envs.multitask.reacher_7dof.Reacher7DofFullGoalState" } algos = [ 'railrl.algos.state_distance.state_distance_q_learning.HorizonFedStateDistanceQLearning', 'railrl.algos.state_distance.vectorized_sdql.VectorizedTauSdql', ] algo_to_trials = {} for algo in algos: criteria = base_criteria.copy() criteria['algo_class.$class'] = algo algo_to_trials[algo] = exp.get_trials(criteria) key = 'Final_Euclidean_distance_to_goal_Mean' MAX_ITERS = 50 for algo, trials in algo_to_trials.items(): if algo == 'railrl.algos.state_distance.state_distance_q_learning' \ '.HorizonFedStateDistanceQLearning': name = 'Scalar' else: name = 'Vector' all_values = [] min_len = np.inf for trial in trials:
def main(): # fontsize = 50 fontsize = 12 linewidth = 5 plt.rc('legend', fontsize=fontsize) plt.rc('xtick', labelsize=fontsize) plt.rc('ytick', labelsize=fontsize) # sns.set_style("whitegrid") base_dir = "/home/vitchyr/git/rllab-rail/railrl/data/papers/icml2017" \ "/watermaze/watermaze-memory" experiment = Experiment(base_dir) fig, axes = plt.subplots(3, 1, figsize=(10.0, 7.5)) method_to_our_data = OrderedDict() subtraj_lengths = [1, 10, 20, 25] for subtraj_length in subtraj_lengths: name = "Our Method, Subtrajectory Length = {}".format(subtraj_length) trials = experiment.get_trials( { 'algo_params.subtraj_length': subtraj_length, 'version': "Our Method", }, ignore_missing_keys=True, ) final_scores = [ t.data['AverageReturn'][:50] for t in trials if len(t.data['AverageReturn']) >= 50 # some things crashed ] method_to_our_data[name] = final_scores ax = axes[0] method_names = [] cmap = matplotlib.cm.get_cmap('plasma') num_values = len(method_to_our_data) index_to_color = { i: cmap((i) / (num_values)) for i in range(num_values) } index_to_linestyle = { 0: '-', 1: '--', 2: ':', 3: '-.', } for i, (method, data) in enumerate(method_to_our_data.items()): method_names.append(method) data_combined = np.vstack(data) sns.tsplot( data=data_combined, color=index_to_color[i], linestyle=index_to_linestyle[i], condition=method, ax=ax, ) ax.set_ylabel("Average Return", fontsize=fontsize) ax.set_xlabel("Environment samples (x100)", fontsize=fontsize) ax.legend(method_names, bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0., markerscale=10) method_to_ddpg_data = OrderedDict() for name in [ ['DDPG'], ['Memory States + DDPG'], ['Recurrent DPG'], # ['TRPO'], # ['Memory States + TRPO'], # ['Recurrent TRPO'], # ['Our Method'], ]: name = name[0] trials = experiment.get_trials( { 'algo_params.subtraj_length': 25, 'version': name, }, ignore_missing_keys=True, ) final_scores = [ t.data['AverageReturn'][:50] for t in trials if len(t.data['AverageReturn']) >= 50 # some things crashed ] method_to_ddpg_data[name] = final_scores method_names = [] cmap = matplotlib.cm.get_cmap('plasma') num_values = len(method_to_ddpg_data) index_to_color = { i: cmap((i) / (num_values)) for i in range(num_values) } index_to_linestyle = { 0: '-', 1: '--', 2: ':', 3: '-.', } ax = axes[1] for i, (method, data) in enumerate(method_to_ddpg_data.items()): method_names.append(method) data_combined = np.vstack(data) sns.tsplot( data=data_combined, color=index_to_color[i], linestyle=index_to_linestyle[i], condition=method, ax=ax, ) ax.set_ylabel("Average Return", fontsize=fontsize) ax.set_xlabel("Environment samples (x100)", fontsize=fontsize) ax.legend(method_names, bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0., markerscale=100) method_to_trpo_data = OrderedDict() for name in [ ['TRPO'], ['Memory States + TRPO'], ['Recurrent TRPO'], ]: name = name[0] trials = experiment.get_trials( { 'algo_params.subtraj_length': 25, 'version': name, }, ignore_missing_keys=True, ) final_scores = [ t.data['AverageReturn'][:100] for t in trials if len(t.data['AverageReturn']) >= 100 # some things crashed ] method_to_trpo_data[name] = final_scores ax = axes[2] method_names = [] cmap = matplotlib.cm.get_cmap('plasma') num_values = len(method_to_trpo_data) index_to_color = { i: cmap((i) / (num_values)) for i in range(num_values) } index_to_linestyle = { 0: '-', 1: '--', 2: ':', 3: '-.', } for i, (method, data) in enumerate(method_to_trpo_data.items()): method_names.append(method) data_combined = np.vstack(data) sns.tsplot( data=data_combined, color=index_to_color[i], linestyle=index_to_linestyle[i], condition=method, ax=ax, ) ax.set_ylabel("Average Return", fontsize=fontsize) ax.set_xlabel("Environment samples (x1000)", fontsize=fontsize) ax.legend(method_names, bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0., markerscale=10) # for legobj in legend.legendHandles: # legobj.set_linewidth(linewidth) fig.subplots_adjust(hspace=1) plt.savefig("comparison.png", bbox_inches='tight', dpi=1000) plt.savefig("comparison.eps", bbox_inches='tight') plt.savefig("comparison.svg", bbox_inches='tight') plt.show()
from railrl.misc.data_processing import Experiment import matplotlib.pyplot as plt import numpy as np mb_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-24-dagger-mb-ant-cheetah-pos-and-vel/", criteria={ 'exp_id': '0', }, ).get_trials() ddpg_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-12-tdm-half-cheetah-short-epoch-nupo-sweep/", criteria={ 'exp_id': '5', 'algorithm': 'DDPG', }, ).get_trials() tdm_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-12-tdm-half-cheetah-short-epoch-nupo-sweep/", criteria={ 'exp_id': '8', 'algorithm': 'DDPG-TDM', }).get_trials() ddpg_indicator_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-24-ddpg-sparse-no-relabel-cheetah-xvel/", criteria={ 'exp_id': '7', }).get_trials() her_andry_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-24-her-andrychowicz-cheetah-xvel-rebutal/", criteria={
from railrl.misc.data_processing import Experiment import matplotlib.pyplot as plt import numpy as np path = "/mnt/data-backup-12-02-2017/doodads3/10-27-get-results-handxyxy-best-hp-no-oc-sampling-nspe1000/" exp = Experiment(path) base_criteria = { 'algo_params.num_updates_per_env_step': 25, } tau_to_criteria = {} taus = [1, 5, 15, 50] for tau in taus: criteria = base_criteria.copy() criteria['epoch_discount_schedule_params.value'] = tau tau_to_criteria[tau] = criteria tau_to_trials = {} for tau in taus: tau_to_trials[tau] = exp.get_trials(tau_to_criteria[tau]) # key = 'Final_Euclidean_distance_to_goal_Mean' key = 'test_Final_Euclidean_distance_to_goal_Mean' MAX_ITERS = 35 for tau in taus: trials = tau_to_trials[tau] all_values = [] min_len = np.inf for trial in trials: values_ts = trial.data[key] min_len = min(min_len, len(values_ts)) all_values.append(values_ts)
def main(): ddpg_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-nupo-sweep-ant/", criteria={ 'exp_id': '16', }, ).get_trials() her_andrychowicz_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-23-her-andrychowicz-ant-rebutal/", criteria={ 'exp_id': '14', }, ).get_trials() # Ant results with batch size of 128 tdm_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-24-ddpg-nupo-sweep-ant/", criteria={ 'exp_id': '16', } ).get_trials() # Accidentally called this pusher, but it's really ant # Here, x-axis is 10k steps. # tdm_trials = Experiment( # "/home/vitchyr/git/railrl/data/doodads3/12-27-pusher-reward-scale-tau-uniform-or-truncated-geo-sweep-2/", # criteria={ # 'ddpg_tdm_kwargs.base_kwargs.reward_scale': 100, # 'ddpg_tdm_kwargs.tdm_kwargs.tau_sample_strategy': # 'truncated_geometric', # } # ).get_trials() ddpg_indicator_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-23-ddpg-sparse-sweep-4/", criteria={ 'env_class.$class': 'railrl.envs.multitask.ant_env.GoalXYPosAnt', 'ddpg_tdm_kwargs.base_kwargs.num_updates_per_env_step': 1, }, ).get_trials() mb_trials = Experiment( "/home/vitchyr/git/railrl/data/doodads3/12-24-dagger-mb-ant-cheetah-pos-and-vel/", criteria={ 'exp_id': '1', }, ).get_trials() # MAX_ITERS = 10001 MAX_ITERS = 200 plt.figure() base_key = 'Final Distance to goal Mean' for trials, name, key in [ (tdm_trials, 'TDM', base_key), (mb_trials, 'Model-Based', base_key), (ddpg_trials, 'DDPG', base_key), (her_andrychowicz_trials, 'HER', base_key), (ddpg_indicator_trials, 'DDPG-Sparse', base_key), ]: key = key.replace(" ", "_") all_values = [] for trial in trials: try: values_ts = trial.data[key] except: import ipdb; ipdb.set_trace() all_values.append(values_ts) min_len = min(map(len, all_values)) costs = np.vstack([ values[:min_len] for values in all_values ]) costs = costs[:, :min(costs.shape[1], MAX_ITERS)] mean = np.mean(costs, axis=0) std = np.std(costs, axis=0) epochs = np.arange(0, len(costs[0])) plt.fill_between(epochs, mean - std, mean + std, alpha=0.1) plt.plot(epochs, mean, label=name) plt.xlabel("Environment Samples (x1,000)") plt.ylabel("Final Euclidean Distance to Goal Position") plt.legend() plt.savefig('results/iclr2018/ant.jpg')