def plot_env_cg(): '''Experiments with varying Gopt''' env = envs[0] plot_rwds(cg_labels, cdict=color_dict, fdir=fdir + env + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title='Gradient Optimizers') return
def plot_env_varnoise(env, fnoise_dir='../new_final_results/Swimmer-v1/NOISE/', noise_vals=np.arange(0.0, 1.0, 0.1)): ''' Experiments with obsnoise''' noise_paths = {} [noise_paths.update({'joint_%.1f' % noise: '%.1f/lite-cont/lite-cont.pkl' % noise, 'obs_%.1f' % noise: '%.1f/obsVR/obsVR.pkl' % noise, 'AR_%.1f' % noise: '%.1f/arVR/arVR.pkl' % noise}) for noise in noise_vals] colors_noise1 = plt.cm.winter(np.linspace(0, 1, len(noise_vals))) colors_noise2 = plt.cm.autumn(np.linspace(0, 1, len(noise_vals))) colors_noise3 = plt.cm.cool(np.linspace(0, 1, len(noise_vals))) ncolor_dict = {} ncolor_dict.update(dict( [(k, colors_noise1[-1]) for (i, k) in enumerate(list(filter(lambda k: k[:4] == 'join', noise_paths.keys())))])) ncolor_dict.update(dict( [(k, colors_noise2[-1]) for (i, k) in enumerate(list(filter(lambda k: k[:3] == 'obs', noise_paths.keys())))])) ncolor_dict.update(dict( [(k, colors_noise3[-1]) for (i, k) in enumerate(list(filter(lambda k: k[:2] == 'AR', noise_paths.keys())))])) cn_dict = lambda key: ncolor_dict.get(key) n_paths = lambda key: noise_paths.get(key) nkeys = [['joint_%.1f' % noise, 'obs_%.1f' % noise, 'AR_%.1f' % noise] for noise in noise_vals]; for i, i_nkeys in enumerate(nkeys): plot_rwds(i_nkeys, cdict=cn_dict, fdir=fnoise_dir, model_paths=n_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title='Noise_performance%.1f' % noise_vals[i]) return
def plot_env_opt(): '''Experiments for RPSP opts''' env = envs[0] plot_rwds(opt_labels, cdict=color_dict, fdir=fdir + env + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title='RPSP optimizers') return
def plot_envs_vrpggru(): '''Experiments for RPSP/gru minimal comparison''' for i in range(len(envs)): plot_rwds(VRPGgru_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + ' VRPGvsGRU comparison', step=max_lims[envs[i]][1], shape_dict=shape_dict2) return
def plot_env_init(): '''Experiments with random initialization''' env = envs[0] plot_rwds(init_labels, cdict=color_dict, fdir=fdir + env + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title='RPSP Initialization') return
def plot_envs_ar(): '''Experiments with varying window''' for i in range(len(envs)): plot_rwds(AR_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + ' FM $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=shape_dict, kfold=max_lims[envs[i]][1] * MAX_TRIALS) return
def plot_envs_rpsp(): '''Experiments for RPSP variants''' for i in range(len(envs)): plot_rwds(RPSP_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + ' RPSP variants $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=shape_dict, kfold=max_lims[envs[i]][1] * MAX_TRIALS) return
def plot_envs_gru(): for i in range(len(envs)): '''plot for GRUs''' plot_rwds(gru_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + r' GRU $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=shape_dict, kfold=max_lims[envs[i]][1] * MAX_TRIALS) return
def plot_envs_reactive(): ''' Experiments for each environment''' for i in range(len(envs)): env = envs[i] plot_rwds(reactive_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + 'Prediction as regularization $R_{iter}$ ', shape_dict=shape_dict3, ncol=3, kfold=max_lims[envs[i]][1] * MAX_TRIALS) return
def plot_envs_rpspgru(): '''Experiments for RPSP/gru minimal comparison''' for i in range(len(envs)): env = envs[i] plot_rwds(RPSPgru_labels, cdict=best_colors(envs[i]), fdir=fdir + envs[i] + '-v1/', model_paths=best_paths(envs[i]), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + ' GRU Filter comparison $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=best_shape(envs[i]), kfold=max_lims[envs[i]][1] * MAX_TRIALS) return
def plot_envs_overall(): """ plot overall environment performance @return: """ AUC = [] R_iter = [] for i in range(len(envs)): env = envs[i] print(env) bestmodels, R_cum, aucurve = plot_rwds(envs_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + ' $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=shape_dict, kfold=max_lims[envs[i]][1] * MAX_TRIALS) print('done print') R_iter.append(bestmodels) R = np.asarray(aucurve, dtype=float) AUC.extend(zip(np.mean(R, axis=1), np.std(R, axis=1) / np.sqrt(R.shape[1]))) print('done. Computing AUC') for j in [1, 2]: for m in range(len(envs_labels) - 1): print_ttest(envs_labels, R, m, j) print('\n') # Swimmer # t-test ( FM RPSP-VRPG )= [ -0.676586869332 , 0.507653821772 ] # t-test ( GRU RPSP-VRPG )= [ -11.4782215848 , 9.9484555701e-07 ] # t-test ( RPSP-Alt RPSP-VRPG )= [ -3.10454361915 , 0.00933872173406 ] # t-test ( FM RPSP-Alt )= [ 1.73508240067 , 0.111134237192 ] # t-test ( GRU RPSP-Alt )= [ -20.2985459977 , 2.4045809148e-09 ] # Hopper # t-test ( FM RPSP-VRPG )= [ -1.97496384693 , 0.0638921448438 ] # t-test ( GRU RPSP-VRPG )= [ -2.98367426585 , 0.0121047687853 ] # t-test ( RPSP-Alt RPSP-VRPG )= [ 2.25405603636 , 0.0382107798284 ] # t-test ( FM RPSP-Alt )= [ -3.92498523554 , 0.00123896806466 ] # t-test ( GRU RPSP-Alt )= [ -4.909161505 , 0.000577310309778 ] # Walker2d # t-test ( FM RPSP-VRPG )= [ -0.0793340586349 , 0.937667000948 ] # t-test ( GRU RPSP-VRPG )= [ -2.9657440937 , 0.00870691843556 ] # t-test ( RPSP-Alt RPSP-VRPG )= [ 2.23688724131 , 0.0412540309747 ] # t-test ( FM RPSP-Alt )= [ -2.05494189388 , 0.0601025184967 ] # t-test ( GRU RPSP-Alt )= [ -6.51606587482 , 5.3677973935e-06 ] # CartPole # t-test ( FM RPSP-VRPG )= [ -4.49013758712 , 0.00103201257316 ] # t-test ( GRU RPSP-VRPG )= [ 1.23597901022 , 0.232353991956 ] # t-test ( RPSP-Alt RPSP-VRPG )= [ -0.358340067592 , 0.726097854222 ] # t-test ( FM RPSP-Alt )= [ -8.06880999537 , 6.44596048562e-07 ] # t-test ( GRU RPSP-Alt )= [ 1.95090137725 , 0.0740240842398 ] # print (np.round(np.asarray(AUC)[[0, 1, 2, 3, 5, 9], :] / 1000.0, decimals=1)) # print (reg_labels, filter_labels) return R_iter, AUC
def plot_envs_reg(): '''Experiments for RPSP/gru minimal comparison''' auc_filter = [] for i in range(len(envs)): env = envs[i] x, x, aucurve = plot_rwds(reg_labels, cdict=best_colors(envs[i]), fdir=fdir + envs[i] + '-v1/', model_paths=best_paths(envs[i]), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + ' Filter regularization $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=best_shape(envs[i]), ylim=[-10, max_lims[envs[i]][0]], kfold=max_lims[envs[i]][1] * MAX_TRIALS) Rfilter = np.asarray(aucurve, dtype=float) auc_filter.extend(zip(np.mean(Rfilter, axis=1), np.std(Rfilter, axis=1) / np.sqrt(Rfilter.shape[1]))) for k in [0, 1, 3]: print_ttest(reg_labels, Rfilter, 2, k) return auc_filter
def plot_envs_filter(): '''Experiments for RPSP/gru minimal comparison''' gru_auc = [] for i in range(len(envs)): env = envs[i] x, x, aucurve = plot_rwds(filter_labels, cdict=best_colors(envs[i]), fdir=fdir + envs[i] + '-v1/', model_paths=best_paths(envs[i]), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title=envs[i] + ' PSR vs. GRU filtering $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=best_shape(envs[i]), ylim=[-10, max_lims[envs[i]][0]], kfold=max_lims[envs[i]][1] * MAX_TRIALS, ncol=5) Rgru = np.asarray(aucurve, dtype=float) print_ttest(filter_labels, Rgru, 1, 0) gru_auc.extend(zip(np.mean(Rgru, axis=1), np.std(Rgru, axis=1) / np.sqrt(Rgru.shape[1]))) return gru_auc
def plot_envs_noise(): for i in range(len(envs)): env = envs[i] R_cumulative = [] f, ax = plt.subplots(nrows=1, ncols=len(noise_levels), figsize=(15, 6), sharey=True) ax[0].set_ylabel(envs[i] + ' $R_{iter}$ ') for j, noise in enumerate(noise_levels): R_iter, R_cum, auc = plot_rwds(noise_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/', model_paths=noise_paths(noise), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS, shape_dict=shape_dict3, ylim=[-10, 800], # max_lims[envs[i]][0]], f=f, ax=ax[j], legend=(j == 1), kfold=max_lims[envs[i]][1] * MAX_TRIALS) # title=envs[i]+' Observation noise ', R_cumulative.extend(R_cum) plt.subplots_adjust(wspace=0.1, top=0.9, bottom=0.2, left=0.08, right=0.99, ) ax[j].set_title("noise $\sigma$=" + str(noise), position=(0.5, 0.92), fontsize=20) f.savefig(fdir + envs[i] + '-v1/' + envs[i] + 'noiseobs_rwdperiter_best%d_step%d.png' % (BEST, STEP), dpi=300) # print 'donenoise' # plot_table(R_cumulative, noise_labels, noise_levels, envs[i]+' Obstacle noise AUC', 'AUC (cumulative Return $10^3$)', fdir+env+'-v1/', incr=1000) return
def plot_env_ar(): '''Experiments with varying window''' env = envs[0] plot_rwds(AR_labels, cdict=color_dict, fdir=fdir + env + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS, title='AR window') return