Beispiel #1
0
def plot_env_cg():
    '''Experiments with varying Gopt'''
    env = envs[0]
    plot_rwds(cg_labels, cdict=color_dict, fdir=fdir + env + '-v1/', model_paths=model_paths, BEST=BEST,
              MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
              title='Gradient Optimizers')
    return
Beispiel #2
0
def plot_env_varnoise(env, fnoise_dir='../new_final_results/Swimmer-v1/NOISE/', noise_vals=np.arange(0.0, 1.0, 0.1)):
    ''' Experiments with obsnoise'''
    noise_paths = {}
    [noise_paths.update({'joint_%.1f' % noise: '%.1f/lite-cont/lite-cont.pkl' % noise,
                         'obs_%.1f' % noise: '%.1f/obsVR/obsVR.pkl' % noise,
                         'AR_%.1f' % noise: '%.1f/arVR/arVR.pkl' % noise}) for noise in noise_vals]
    colors_noise1 = plt.cm.winter(np.linspace(0, 1, len(noise_vals)))
    colors_noise2 = plt.cm.autumn(np.linspace(0, 1, len(noise_vals)))
    colors_noise3 = plt.cm.cool(np.linspace(0, 1, len(noise_vals)))
    ncolor_dict = {}
    ncolor_dict.update(dict(
        [(k, colors_noise1[-1]) for (i, k) in enumerate(list(filter(lambda k: k[:4] == 'join', noise_paths.keys())))]))
    ncolor_dict.update(dict(
        [(k, colors_noise2[-1]) for (i, k) in enumerate(list(filter(lambda k: k[:3] == 'obs', noise_paths.keys())))]))
    ncolor_dict.update(dict(
        [(k, colors_noise3[-1]) for (i, k) in enumerate(list(filter(lambda k: k[:2] == 'AR', noise_paths.keys())))]))
    cn_dict = lambda key: ncolor_dict.get(key)
    n_paths = lambda key: noise_paths.get(key)

    nkeys = [['joint_%.1f' % noise, 'obs_%.1f' % noise, 'AR_%.1f' % noise] for noise in noise_vals];
    for i, i_nkeys in enumerate(nkeys):
        plot_rwds(i_nkeys, cdict=cn_dict, fdir=fnoise_dir,
                  model_paths=n_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                  title='Noise_performance%.1f' % noise_vals[i])
    return
Beispiel #3
0
def plot_env_opt():
    '''Experiments for RPSP opts'''
    env = envs[0]
    plot_rwds(opt_labels, cdict=color_dict, fdir=fdir + env + '-v1/',
              model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
              title='RPSP optimizers')
    return
Beispiel #4
0
def plot_envs_vrpggru():
    '''Experiments for RPSP/gru minimal comparison'''
    for i in range(len(envs)):
        plot_rwds(VRPGgru_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/',
                  model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                  title=envs[i] + ' VRPGvsGRU comparison', step=max_lims[envs[i]][1], shape_dict=shape_dict2)
    return
Beispiel #5
0
def plot_env_init():
    '''Experiments with random initialization'''
    env = envs[0]
    plot_rwds(init_labels, cdict=color_dict, fdir=fdir + env + '-v1/',
              model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
              title='RPSP Initialization')
    return
Beispiel #6
0
def plot_envs_ar():
    '''Experiments with varying window'''
    for i in range(len(envs)):
        plot_rwds(AR_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/',
                  model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                  title=envs[i] + ' FM $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=shape_dict,
                  kfold=max_lims[envs[i]][1] * MAX_TRIALS)
    return
Beispiel #7
0
def plot_envs_rpsp():
    '''Experiments for RPSP variants'''
    for i in range(len(envs)):
        plot_rwds(RPSP_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/',
                  model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                  title=envs[i] + ' RPSP variants $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=shape_dict,
                  kfold=max_lims[envs[i]][1] * MAX_TRIALS)
    return
Beispiel #8
0
def plot_envs_gru():
    for i in range(len(envs)):
        '''plot for GRUs'''
        plot_rwds(gru_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/',
                  model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                  title=envs[i] + r' GRU $R_{iter}$ ', step=max_lims[envs[i]][1], shape_dict=shape_dict,
                  kfold=max_lims[envs[i]][1] * MAX_TRIALS)
    return
Beispiel #9
0
def plot_envs_reactive():
    ''' Experiments for each environment'''
    for i in range(len(envs)):
        env = envs[i]
        plot_rwds(reactive_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/',
                  model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                  title=envs[i] + 'Prediction as regularization $R_{iter}$ ', shape_dict=shape_dict3, ncol=3,
                  kfold=max_lims[envs[i]][1] * MAX_TRIALS)
    return
Beispiel #10
0
def plot_envs_rpspgru():
    '''Experiments for RPSP/gru minimal comparison'''
    for i in range(len(envs)):
        env = envs[i]
        plot_rwds(RPSPgru_labels, cdict=best_colors(envs[i]), fdir=fdir + envs[i] + '-v1/',
                  model_paths=best_paths(envs[i]), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP,
                  MAX_TRIALS=MAX_TRIALS,
                  title=envs[i] + ' GRU Filter comparison $R_{iter}$ ', step=max_lims[envs[i]][1],
                  shape_dict=best_shape(envs[i]),
                  kfold=max_lims[envs[i]][1] * MAX_TRIALS)
    return
Beispiel #11
0
def plot_envs_overall():
    """
    plot overall environment performance
    @return:
    """
    AUC = []
    R_iter = []
    for i in range(len(envs)):
        env = envs[i]
        print(env)
        bestmodels, R_cum, aucurve = plot_rwds(envs_labels, cdict=color_dict,
                                               fdir=fdir + envs[i] + '-v1/',
                                               model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(envs[i]),
                                               STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                                               title=envs[i] + ' $R_{iter}$ ', step=max_lims[envs[i]][1],
                                               shape_dict=shape_dict,
                                               kfold=max_lims[envs[i]][1] * MAX_TRIALS)
        print('done print')
        R_iter.append(bestmodels)
        R = np.asarray(aucurve, dtype=float)
        AUC.extend(zip(np.mean(R, axis=1), np.std(R, axis=1) / np.sqrt(R.shape[1])))
        print('done. Computing AUC')
        for j in [1, 2]:
            for m in range(len(envs_labels) - 1):
                print_ttest(envs_labels, R, m, j)
        print('\n')

        # Swimmer
        # t-test ( FM RPSP-VRPG )= [ -0.676586869332 ,  0.507653821772 ]
        # t-test ( GRU RPSP-VRPG )= [ -11.4782215848 ,  9.9484555701e-07 ]
        # t-test ( RPSP-Alt RPSP-VRPG )= [ -3.10454361915 ,  0.00933872173406 ]
        # t-test ( FM RPSP-Alt )= [ 1.73508240067 ,  0.111134237192 ]
        # t-test ( GRU RPSP-Alt )= [ -20.2985459977 ,  2.4045809148e-09 ]
        # Hopper
        # t-test ( FM RPSP-VRPG )= [ -1.97496384693 ,  0.0638921448438 ]
        # t-test ( GRU RPSP-VRPG )= [ -2.98367426585 ,  0.0121047687853 ]
        # t-test ( RPSP-Alt RPSP-VRPG )= [ 2.25405603636 ,  0.0382107798284 ]
        # t-test ( FM RPSP-Alt )= [ -3.92498523554 ,  0.00123896806466 ]
        # t-test ( GRU RPSP-Alt )= [ -4.909161505 ,  0.000577310309778 ]
        # Walker2d
        # t-test ( FM RPSP-VRPG )= [ -0.0793340586349 ,  0.937667000948 ]
        # t-test ( GRU RPSP-VRPG )= [ -2.9657440937 ,  0.00870691843556 ]
        # t-test ( RPSP-Alt RPSP-VRPG )= [ 2.23688724131 ,  0.0412540309747 ]
        # t-test ( FM RPSP-Alt )= [ -2.05494189388 ,  0.0601025184967 ]
        # t-test ( GRU RPSP-Alt )= [ -6.51606587482 ,  5.3677973935e-06 ]
        # CartPole
        # t-test ( FM RPSP-VRPG )= [ -4.49013758712 ,  0.00103201257316 ]
        # t-test ( GRU RPSP-VRPG )= [ 1.23597901022 ,  0.232353991956 ]
        # t-test ( RPSP-Alt RPSP-VRPG )= [ -0.358340067592 ,  0.726097854222 ]
        # t-test ( FM RPSP-Alt )= [ -8.06880999537 ,  6.44596048562e-07 ]
        # t-test ( GRU RPSP-Alt )= [ 1.95090137725 ,  0.0740240842398 ]
        # print (np.round(np.asarray(AUC)[[0, 1, 2, 3, 5, 9], :] / 1000.0, decimals=1))
        # print (reg_labels, filter_labels)
    return R_iter, AUC
Beispiel #12
0
def plot_envs_reg():
    '''Experiments for RPSP/gru minimal comparison'''
    auc_filter = []
    for i in range(len(envs)):
        env = envs[i]
        x, x, aucurve = plot_rwds(reg_labels, cdict=best_colors(envs[i]), fdir=fdir + envs[i] + '-v1/',
                                  model_paths=best_paths(envs[i]), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP,
                                  MAX_TRIALS=MAX_TRIALS,
                                  title=envs[i] + ' Filter regularization $R_{iter}$ ', step=max_lims[envs[i]][1],
                                  shape_dict=best_shape(envs[i]), ylim=[-10, max_lims[envs[i]][0]],
                                  kfold=max_lims[envs[i]][1] * MAX_TRIALS)
        Rfilter = np.asarray(aucurve, dtype=float)
        auc_filter.extend(zip(np.mean(Rfilter, axis=1), np.std(Rfilter, axis=1) / np.sqrt(Rfilter.shape[1])))
        for k in [0, 1, 3]:
            print_ttest(reg_labels, Rfilter, 2, k)
    return auc_filter
Beispiel #13
0
def plot_envs_filter():
    '''Experiments for RPSP/gru minimal comparison'''
    gru_auc = []
    for i in range(len(envs)):
        env = envs[i]
        x, x, aucurve = plot_rwds(filter_labels, cdict=best_colors(envs[i]),
                                  fdir=fdir + envs[i] + '-v1/',
                                  model_paths=best_paths(envs[i]), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]), STEP=STEP,
                                  MAX_TRIALS=MAX_TRIALS,
                                  title=envs[i] + ' PSR vs. GRU filtering $R_{iter}$ ', step=max_lims[envs[i]][1],
                                  shape_dict=best_shape(envs[i]), ylim=[-10, max_lims[envs[i]][0]],
                                  kfold=max_lims[envs[i]][1] * MAX_TRIALS, ncol=5)
        Rgru = np.asarray(aucurve, dtype=float)
        print_ttest(filter_labels, Rgru, 1, 0)
        gru_auc.extend(zip(np.mean(Rgru, axis=1), np.std(Rgru, axis=1) / np.sqrt(Rgru.shape[1])))
    return gru_auc
Beispiel #14
0
def plot_envs_noise():
    for i in range(len(envs)):
        env = envs[i]
        R_cumulative = []
        f, ax = plt.subplots(nrows=1, ncols=len(noise_levels), figsize=(15, 6), sharey=True)
        ax[0].set_ylabel(envs[i] + ' $R_{iter}$ ')
        for j, noise in enumerate(noise_levels):
            R_iter, R_cum, auc = plot_rwds(noise_labels, cdict=color_dict, fdir=fdir + envs[i] + '-v1/',
                                           model_paths=noise_paths(noise), BEST=BEST, MAX_LEN=MAX_LEN(envs[i]),
                                           STEP=STEP, MAX_TRIALS=MAX_TRIALS,
                                           shape_dict=shape_dict3, ylim=[-10, 800],  # max_lims[envs[i]][0]],
                                           f=f, ax=ax[j], legend=(j == 1), kfold=max_lims[envs[i]][1] * MAX_TRIALS)
            # title=envs[i]+' Observation noise ',
            R_cumulative.extend(R_cum)
            plt.subplots_adjust(wspace=0.1, top=0.9, bottom=0.2, left=0.08, right=0.99, )
            ax[j].set_title("noise $\sigma$=" + str(noise), position=(0.5, 0.92), fontsize=20)
        f.savefig(fdir + envs[i] + '-v1/' + envs[i] + 'noiseobs_rwdperiter_best%d_step%d.png' % (BEST, STEP),
                  dpi=300)
        # print 'donenoise'
        # plot_table(R_cumulative, noise_labels, noise_levels, envs[i]+' Obstacle noise AUC', 'AUC (cumulative Return $10^3$)', fdir+env+'-v1/', incr=1000)
    return
Beispiel #15
0
def plot_env_ar():
    '''Experiments with varying window'''
    env = envs[0]
    plot_rwds(AR_labels, cdict=color_dict, fdir=fdir + env + '-v1/', model_paths=model_paths, BEST=BEST, MAX_LEN=MAX_LEN(env), STEP=STEP, MAX_TRIALS=MAX_TRIALS,
              title='AR window')
    return