[arr[:shortest_len] for arr in all_running_mins]) rs_mean = np.mean(stacked_running_mins, axis=0) rs_min = np.min(stacked_running_mins, axis=0) rs_max = np.max(stacked_running_mins, axis=0) # ================================================================== # Load the vanilla ES results # ================================================================== base_dir = 'saves/many_hparams/es/val_sum_loss' metric_key = 'perf/unroll_obj' all_es_running_mins = [] all_es_iterations = [] for exp_dir in os.listdir(base_dir): try: log = plot_utils.load_log(os.path.join(base_dir, exp_dir), 'iteration.csv') running_min = np.minimum.accumulate(log[metric_key]) all_es_running_mins.append(running_min) all_es_iterations.append(log['perf/total_inner_iterations']) except: pass shortest_len = min([len(arr) for arr in all_es_running_mins]) es_iterations = all_es_iterations[0][:shortest_len] es_stacked_running_mins = np.stack( [arr[:shortest_len] for arr in all_es_running_mins]) es_mean = np.mean(es_stacked_running_mins, axis=0) es_min = np.min(es_stacked_running_mins, axis=0) es_max = np.max(es_stacked_running_mins, axis=0) # ==================================================================
plot_utils.plot_heatmap( 'saves/mnist_grid_sgdm/grid_mnist_mlp_lr:inverse-time-decay_sgdm_T_5000_N_40/seed_3/result.pkl', key='unroll_obj', xlabel='Log LR Decay', ylabel='Log Init LR', cmap=plt.cm.Purples_r, levels=30, sigma=1.0, use_smoothing=True, show_contours=True, contour_alpha=0.2, figsize=(8, 6), ) es_K10 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/es-mnist-mlp-obj:train_sum_loss-tune:lr:inverse-time-decay-T:5000-K:10-nc:1-npc:1000-sigma:0.1-olr:0.01-ob1:0.9-ob2:0.999-ic:-1-oc:-1-seed:3', fname='frequent.csv') pes_K10 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/pes-mnist-mlp-obj:train_sum_loss-tune:lr:inverse-time-decay-T:5000-K:10-nc:1-npc:1000-sigma:0.1-olr:0.01-ob1:0.9-ob2:0.999-ic:-1-oc:-1-seed:3', fname='frequent.csv') es_K100 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/es-mnist-mlp-obj:train_sum_loss-tune:lr:inverse-time-decay-T:5000-K:100-nc:1-npc:1000-sigma:0.1-olr:0.01-ob1:0.9-ob2:0.999-ic:-1-oc:-1-seed:3', fname='frequent.csv') pes_K100 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/pes-mnist-mlp-obj:train_sum_loss-tune:lr:inverse-time-decay-T:5000-K:100-nc:1-npc:1000-sigma:0.1-olr:0.01-ob1:0.9-ob2:0.999-ic:-1-oc:-1-seed:3', fname='frequent.csv') plt.plot(np.log10(np.exp(pes_K10['lr_1']))[:2000], np.log10(np.exp(pes_K10['lr_0']))[:2000], color=colors[1],
plot_utils.plot_heatmap( 'saves/mnist_grid/grid_mnist_mlp_lr:inverse-time-decay_sgdm_T_5000_N_100/seed_3/result.pkl', key='train_sum_loss', xlabel='Decay', ylabel='Initial LR', cmap=plt.cm.Purples_r, levels=10, figsize=(8, 6), ) plt.xticks([-2, -1, 0, 1, 2], fontsize=18) plt.yticks([-3, -2, -1, 0, 1], fontsize=18) es_K100 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/es-mnist-mlp-obj:train_sum_loss-tune:lr:itd-T:5000-K:100-N:100-sigma:0.1-olr:0.1-seed:3', fname='frequent.csv') es_K10 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/es-mnist-mlp-obj:train_sum_loss-tune:lr:itd-T:5000-K:10-N:100-sigma:0.1-olr:0.01-seed:3', fname='frequent.csv') es_K1 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/es-mnist-mlp-obj:train_sum_loss-tune:lr:itd-T:5000-K:1-N:100-sigma:0.1-olr:0.001-seed:3', fname='frequent.csv') pes_K100 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/pes-mnist-mlp-obj:train_sum_loss-tune:lr:itd-T:5000-K:100-N:100-sigma:0.1-olr:0.03-seed:3', fname='frequent.csv') pes_K10 = plot_utils.load_log( 'saves/mnist_lr_decay/train_sum_loss/pes-mnist-mlp-obj:train_sum_loss-tune:lr:itd-T:5000-K:10-N:100-sigma:0.1-olr:0.001-seed:3', fname='frequent.csv') pes_K1 = plot_utils.load_log(
), ('ES K=100', 'saves/control/es-K100/es-Swimmer-v2-lr:0.1-sigma:0.1-N:10-T:1000-K:100-c:0-d:0' ), ('PES K=100', 'saves/control/pes-K100/pes-Swimmer-v2-lr:0.1-sigma:0.3-N:10-T:1000-K:100-c:0-d:0' ), ] for (label, exp_dir) in exps: steps = [] rewards = [] min_num_rewards = 1e9 for seed_dir in os.listdir(exp_dir): log = plot_utils.load_log(os.path.join(exp_dir, seed_dir), fname='iteration.csv') steps.append(log['total_steps']) rewards.append(log['reward_mean']) if len(log['reward_mean']) < min_num_rewards: min_num_rewards = len(log['reward_mean']) for list_idx in range(len(rewards)): rewards[list_idx] = rewards[list_idx][:min_num_rewards] means = np.array(rewards).mean(axis=0) stds = np.array(rewards).std(axis=0) steps = steps[0][:min_num_rewards] plt.plot(steps, means, linewidth=2, label=label) plt.fill_between(steps, means - stds, means + stds, linewidth=2, alpha=0.3)