def main():
    parser = arg_parser()
    parser.add_argument('--env',
                        help='environment ID',
                        type=str,
                        default='Swimmer-v2')
    parser.add_argument('--dir', type=str, default='EXP_ON_fix_norm')
    parser.add_argument('--thesis', type=str, default='Online_V0')
    args = parser.parse_args()
    #    dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP1_FINAL/'+args.extra_dir+args.env
    dirname = '~/Desktop/ppo_test/' + args.dir + '/' + args.env
    results = pu.load_results(dirname)
    #    r_copos1,r_copos2,r_trpo,r_ppo=filt(results,'copos1'),filt(results,'copos2'),filt(results,'trpo'),filt(results,'ppo')
    #    r_sil_n2=filt(results,'sil_n2_l0.001')
    #    dt={'copos1':r_copos1, 'copos2':r_copos2,'trpo':r_trpo, 'ppo':r_ppo, 'sil_slight':r_sil_n2}

    r_ppo = filt(results, 'ppo')
    dt = {'ppo': r_ppo}
    for name in dt:
        pu.plot_results(dt[name],
                        xy_fn=pu.progress_default_xy_fn,
                        average_group=True,
                        split_fn=lambda _: '',
                        shaded_err=True,
                        shaded_std=False)
        plt.xlabel('Number of Timesteps [M]')
        plt.ylabel('Average Return [-]')
        plt.tight_layout()
        fig = plt.gcf()
        fig.set_size_inches(9, 7.5)
        #        fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/"+args.extra_dir+args.env+'/'+name+'.pdf', format='pdf')
        #        fig.savefig("/Users/zsbjltwjj/Desktop/thesis/img/"+args.thesis+"/"+args.env+'/'+name+'.pdf', format="pdf")
        fig.savefig("/Users/zsbjltwjj/Desktop/ppo_test/" + args.dir + '-' +
                    name + '.pdf',
                    format="pdf")
def main():
    parser = arg_parser()
    parser.add_argument('--env',
                        help='environment ID',
                        type=str,
                        default='HalfCheetah-v2')
    parser.add_argument('--st_seed',
                        help='start number of seeds',
                        type=int,
                        default=0)
    parser.add_argument('--seeds', help='number of seeds', type=int, default=1)
    parser.add_argument('--num_timesteps', type=str, default="3e4")
    parser.add_argument('--filename',
                        type=str,
                        default='_Offline_Evaluation_nosil.png')
    args = parser.parse_args()
    if args.env == 'Swimmer-v2' or args.env == 'HalfCheetah-v2':
        mbl_args = '--num_samples=1500 --num_elites=10 --horizon=10 --eval_freq=10 --mbl_train_freq=10'
    elif arg.env == 'Reacher-v2' or args.env == 'Ant-v2':
        mbl_args = '--num_samples=1500 --num_elites=10 --horizon=5 --eval_freq=10 --mbl_train_freq=10'


#    algo_names=["ppo2_sil_online","copos_sil_online","ppo2_online","copos_online"]
#    legend_names=["ppo2+sil","copos+sil","ppo2","copos"]
#    argus=["","","",""]

    algo_names = [
        "mbl_ppo2", "ppo2_offline", "mbl_copos", "copos_offline", "mbl_trpo",
        "trpo_offline"
    ]
    #    algo_names=["mbl_ppo2","ppo2_offline",
    #                "mbl_copos","copos_offline"]
    legend_names = [
        "mbl+ppo2", "ppo2", "mbl+copos", "copos", "mbl+trpo", "trpo"
    ]
    #    legend_names=["mbl+ppo2","ppo2",
    #                  "mbl+copos","copos"]
    #argus=['--num_samples=1 --num_elites=1 --horizon=2' for _ in range(len(algo_names))]
    argus = [mbl_args for _ in range(len(algo_names))]

    for i in range(args.st_seed, args.st_seed + args.seeds):
        for j in range(len(algo_names)):
            os.system("python ../algos/" + algo_names[j] + "/run.py --alg=" +
                      algo_names[j] + " --num_timestep=" + args.num_timesteps +
                      " --seed=" + str(i) + " --env=" + args.env +
                      " --log_path=~/Desktop/logs/EXP2_nosil/" + args.env +
                      "/" + legend_names[j] + "-" + str(i) + ' ' + argus[j])

    results = pu.load_results('~/Desktop/logs/EXP2_nosil/' + args.env)

    pu.plot_results(results,
                    xy_fn=pu.progress_itermbl_xy_fn,
                    average_group=True,
                    split_fn=lambda _: '')
    #plt.title(args.env+" Online Evaluation")
    plt.xlabel('Evaluation Epochs [-]')
    plt.ylabel('Average Return [-]')
    fig = plt.gcf()
    fig.set_size_inches(9.5, 7.5)
    fig.savefig(args.env + "_" + args.filename)
Example #3
0
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task, resample=100):
    results = plot_util.load_results(dirs)
    new_results = []
    for result in results:
        dfs = result.monitor
        for df in dfs:
            temp_result = copy.deepcopy(result)
            temp_result = temp_result._replace(monitor=df)
            new_results.append(temp_result)
    plot_util.plot_results(new_results, xy_fn=lambda r: ts2xy(r.monitor, xaxis, yaxis), group_fn=split_fn, average_group=True, resample=resample)
def main():
    parser = arg_parser()
    parser.add_argument('--env',
                        help='environment ID',
                        type=str,
                        default='HalfCheetah-v2')
    parser.add_argument('--extra_dir', type=str, default='')
    args = parser.parse_args()
    dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP1_FINAL/' + args.extra_dir + args.env

    results = pu.load_results(dirname)
    r_copos, r_trpo, r_ppo = filt(results,
                                  'copos'), filt(results,
                                                 'trpo'), filt(results, 'ppo')
    r_sil_n2 = filt(results, 'sil_n2_l0.001')
    dt = {
        'copos': r_copos,
        'trpo': r_trpo,
        'ppo': r_ppo,
        'sil_slight': r_sil_n2
    }

    for name in dt:
        pu.plot_results(dt[name],
                        xy_fn=pu.progress_default_xy_fn,
                        average_group=True,
                        split_fn=lambda _: '',
                        shaded_err=True,
                        shaded_std=False)
        plt.xlabel('Number of Timesteps [M]')
        plt.ylabel('Average Return [-]')
        plt.tight_layout()
        fig = plt.gcf()
        fig.set_size_inches(9, 7.5)
        fig.savefig(
            "/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/" +
            args.env + '/' + name + '.pdf',
            format='pdf')
        if name == 'sil_slight':
            pu.plot_results(dt[name],
                            xy_fn=pu.progress_default_entropy_xy_fn,
                            average_group=True,
                            split_fn=lambda _: '',
                            shaded_err=True,
                            shaded_std=False,
                            legend_entropy=1)
            plt.xlabel('Number of Timesteps [M]')
            plt.ylabel('Entropy [-]')
            plt.tight_layout()
            fig = plt.gcf()
            fig.set_size_inches(9, 7.5)
            fig.savefig(
                "/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/ONLINE/"
                + args.env + '/' + name + '_entropy.pdf',
                format="pdf")
Example #5
0
def plot_results(dirs,
                 num_timesteps=10e6,
                 xaxis=X_TIMESTEPS,
                 yaxis=Y_REWARD,
                 title='',
                 split_fn=split_by_task):
    results = plot_util.load_results(dirs)
    plot_util.plot_results(results,
                           split_fn=split_fn,
                           average_group=True,
                           resample=int(1e6))
Example #6
0
def test_plot_util():
    nruns = 4
    logdirs = [smoketest('--alg=ppo2 --env=CartPole-v0 --num_timesteps=10000') for _ in range(nruns)]
    data = pu.load_results(logdirs)
    assert len(data) == 4

    _, axes = pu.plot_results(data[:1]); assert len(axes) == 1
    _, axes = pu.plot_results(data, tiling='vertical'); assert axes.shape==(4,1)
    _, axes = pu.plot_results(data, tiling='horizontal'); assert axes.shape==(1,4)
    _, axes = pu.plot_results(data, tiling='symmetric'); assert axes.shape==(2,2)
    _, axes = pu.plot_results(data, split_fn=lambda _: ''); assert len(axes) == 1
def main():

    # results = pu.load_results('data_the_best')
    results = pu.load_results('data_Test_obstacle_origin/log_data')
    r = results[0]
    # plt.plot(np.cumsum(r.monitor.l), r.monitor.r)
    # plt.plot(np.cumsum(r.monitor.l), pu.smooth(r.monitor.r, radius=10))
    #### plt.plot(r.progress.total_timesteps, r.progress.eprewmean)

    # print('keys:', r.progress.keys())
    # plt.plot(r.progress['epoch'], r.progress['test/success_rate'])
    # plt.plot(r.progress['epoch'], pu.smooth(r.progress['test/success_rate'], radius=5))

    # pu.plot_results(results)
    pu.plot_results(results, average_group=True, split_fn=lambda _: '')
    set_trace()
Example #8
0
def plot_accuracy(root_dir):
    all_results = load_results(root_dir, verbose=True)

    def xy_fn(r):
        x = np.cumsum(r.progress['xs'])
        y = r.progress['errors']
        return x, y

    def split_fn(r):
        name = r.name
        splits = name.split('-')
        return splits[0]

    def group_fn(r):
        name = r.name
        splits = name.split('-')
        alg_name = splits[1]
        if alg_name == 'SGES':
            return 'SGES'
        elif alg_name == 'CMA':
            return 'CMA-ES'
        elif alg_name == 'GES':
            return 'Guided ES'
        elif alg_name == 'ES':
            return 'Vanilla ES'
        elif alg_name == 'ASEBO':
            return 'ASEBO'
        else:
            raise ValueError('%s not supported' % alg_name)

    _all_results = []
    for result in all_results:
        if 'Sphere' in result.name:
            if 'CMA' in result.name:
                continue
            _all_results.append(result)
    all_results = _all_results

    plt.figure(dpi=300)
    fig, axarr = pu.plot_results(all_results,
                                 xy_fn=xy_fn,
                                 split_fn=split_fn,
                                 group_fn=group_fn,
                                 shaded_std=True,
                                 shaded_err=False,
                                 average_group=True,
                                 tiling='horizontal',
                                 xlabel='Evaluations',
                                 ylabel='Cosine Similarity')
    plt.subplots_adjust(hspace=0.2,
                        wspace=0.2,
                        bottom=0.2,
                        left=0.08,
                        top=0.95)
    for ax in axarr[0]:
        ax.set_xticks(np.arange(0, 12.5e4, 2.5e4))
        ax.set_xticklabels(['0', '25k', '50k', '75k', '100k'])
    plt.savefig('blackbox_accuracy.pdf', bbox_inches='tight')
def plot_data(exp, savefig, ttype):
    savefig = abspath(savefig)
    try:
        results = organize_results(pu.load_results(exp))
        pu.plot_results(results,
                        average_group=True,
                        split_fn=lambda _: '',
                        xy_fn=ep_distance_ratio_train \
                                if ttype == 'train' else ep_distance_ratio_test,
                        shaded_std=False,
                        shaded_err=True)
        if os.path.isfile(savefig): os.remove(savefig)
        plt.savefig(savefig)
        plt.clf()
        #print("Plot saved to: {}".format(savefig))
    except Exception as e:
        print("Plotting failed for {}".format(savefig))
        print("Reason: {}".format(str(e)))
Example #10
0
def plot_save_results(xy_fn, file="logs/time_rewards.png"):
    f, ax = pu.plot_results(
        results,
        xy_fn=xy_fn,
        split_fn=lambda _: "",
        average_group=True,
        shaded_err=False,
    )
    f.savefig(file)
Example #11
0
    def plot_results(self):
        # Create plot directory
        os.makedirs(self.plot_dir, exist_ok=True)

        results = pu.load_results(
            os.path.join(self.log_dir,
                         self.env_name.split('-')[0], ''))
        pu.plot_results(results,
                        average_group=True,
                        split_fn=lambda _: '',
                        shaded_std=False)
        plt.xlabel('Timestep')
        plt.ylabel('Reward')

        fig = plt.gcf()
        plot_path = os.path.join(self.plot_dir, 'plot_' + self.env_name)
        fig.savefig(plot_path, bbox_inches='tight')

        plt.show()
def main():  
    parser = arg_parser()
    parser.add_argument('--env', help='environment ID', type=str, default='HalfCheetah-v2')
    parser.add_argument('--extra_dir', type=str, default='')
    args = parser.parse_args()
    
    args.extra_dir='EXP2_IAS_5M_TRPO_OFF/'
    
    dirname = '~/Desktop/carla_sample_efficient/data/bk/bkup_EXP2_FINAL/'+args.extra_dir+args.env
    
    results = pu.load_results(dirname)
    r_copos_nosil,r_trpo_nosil,r_ppo_nosil=filt(results,'copos-'),filt(results,'trpo-'),filt(results,'ppo-')
    r_copos_sil,r_trpo_sil,r_ppo_sil=filt(results,'copos+sil-'),filt(results,'trpo+sil-'),filt(results,'ppo+sil-')
    r_mbl_sil=filt(results,'mbl+','sil-')
 #   r_mbl_nosil_tmp=[r for r in results if r not in r_mbl_sil]
    r_mbl_nosil=filt_or_or(results,'mbl+copos-','mbl+trpo-','mbl+ppo-')

    r_copos_comp, r_trpo_comp, r_ppo_comp=filt_or(results,'mbl+copos','copos+sil'),filt_or(results,'mbl+trpo','trpo+sil'),filt_or(results,'mbl+ppo','ppo+sil')
    
#    dt={'copos_nosil':r_copos_nosil, 'trpo_nosil':r_trpo_nosil, 'ppo_nosil':r_ppo_nosil,
#        'copos_sil':r_copos_sil, 'trpo_sil':r_trpo_sil, 'ppo_sil':r_ppo_sil,
#        'mbl_nosil':r_mbl_nosil, 'mbl_sil':r_mbl_sil,
#        'copos_comp':r_copos_comp, 'trpo_comp':r_trpo_comp, 'ppo_comp':r_ppo_comp}
    r_trpo_all=filt(results,'trpo')
    dt={'trpo_research':r_trpo_all}
    for name in dt:
        pu.plot_results(dt[name],xy_fn=pu.progress_mbl_vbest_xy_fn,average_group=True,name=name,split_fn=lambda _: '',shaded_err=True,shaded_std=False)
        plt.xlabel('Number of Timesteps [M]')
        plt.ylabel('Best Average Return [-]')
        plt.tight_layout()
        fig = plt.gcf()
        fig.set_size_inches(9, 7.5)
        fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/OFFLINE/"+args.env+'/'+name+'.pdf',format="pdf")
        if name=='mbl_nosil' or name=='mbl_sil':
            pu.plot_results(dt[name],xy_fn=pu.progress_default_entropy_xy_fn,average_group=True,name=name,split_fn=lambda _: '',shaded_err=True,shaded_std=False,legend_entropy=1)
            plt.xlabel('Number of Timesteps [M]')
            plt.ylabel('Entropy [-]')
            plt.tight_layout()
            fig = plt.gcf()
            fig.set_size_inches(9, 7.5)
            fig.savefig("/Users/zsbjltwjj/Desktop/carla_sample_efficient/plot_f/OFFLINE/"+args.env+'/'+name+'_entropy.pdf',format="pdf")
Example #13
0
def plot_k(root_dir):
    all_results = load_results(root_dir, verbose=True)

    def xy_fn(r):
        x = np.cumsum(r.progress['xs'])
        y = r.progress['ys']
        return x, y

    def split_fn(r):
        name = r.name
        splits = name.split('-')
        return splits[0]

    def group_fn(r):
        name = r.name
        splits = name.split('-')
        alg_name = splits[1]
        if alg_name == 'ES':
            return 'Vanilla ES'
        elif alg_name == 'SGES100':
            return 'SGES(k=100)'
        elif alg_name == 'SGES1':
            return 'SGES(k=1)'
        elif alg_name == 'SGES5':
            return 'SGES(k=5)'
        else:
            return 'SGES(k=%s)' % alg_name[-2:]

    plt.figure(dpi=300)
    fig, axarr = pu.plot_results(all_results,
                                 xy_fn=xy_fn,
                                 split_fn=split_fn,
                                 group_fn=group_fn,
                                 shaded_std=True,
                                 shaded_err=False,
                                 average_group=True,
                                 tiling='horizontal',
                                 xlabel='Evaluations',
                                 ylabel='Loss')
    plt.subplots_adjust(hspace=0.2,
                        wspace=0.2,
                        bottom=0.2,
                        left=0.08,
                        top=0.95)
    for ax in axarr[0]:
        ax.set_xticks(np.arange(0, 12.5e4, 2.5e4))
        ax.set_xticklabels(['0', '25k', '50k', '75k', '100k'])
    # fig.text(0.5, 0.05, s='# Evaluation', fontsize=18)
    # fig.text(0.04, 0.5, s='Loss', fontsize=18, rotation='vertical')
    plt.savefig('blackbox_k.pdf', bbox_inches='tight')
Example #14
0
def plot_monitors():
    # If you want to average results for multiple seeds, LOG_DIRS must contain subfolders in the
    # following format: <name_exp0>-0, <name_exp0>-1, <name_exp1>-0, <name_exp1>-1.
    # Where names correspond to experiments you want to compare separated with random seeds by dash.

    LOG_DIRS = '/home/deep3/logs/Humanoid-v2/'
    # LOG_DIRS = '/home/deep3/logs/Hopper-v2/'
    # Uncomment below to see the effect of the timit limits flag
    # LOG_DIRS = 'time_limit_logs/reacher'

    results = pu.load_results(LOG_DIRS, running_agents=3)

    fig, ax = pu.plot_results(results,
                              average_group=True,
                              split_fn=lambda _: '',
                              shaded_std=False)
Example #15
0
from baselines.common import plot_util as pu
import matplotlib.pyplot as plt
import numpy as np

dataDir = 'baselinesData'
envName = 'HalfCheetah-v2'
alg = 'ppo2'
results = pu.load_results(dataDir + '/' + envName)
#r = results[0]
#plt.plot(np.cumsum(r.monitor.l), r.monitor.r)
pu.plot_results(results)
plt.show()
Example #16
0
def plot_results(dirs, num_timesteps=10e6, xaxis=X_TIMESTEPS, yaxis=Y_REWARD, title='', split_fn=split_by_task):
    results = plot_util.load_results(dirs)
    plot_util.plot_results(results, xy_fn=lambda r: ts2xy(r['monitor'], xaxis, yaxis), split_fn=split_fn, average_group=True, resample=int(1e6))
Example #17
0
from baselines.common import plot_util as pu
import matplotlib.pyplot as plt

results = pu.load_results('path/to/your/experiment01')

pu.plot_results(results,
                average_group=True,
                split_fn=lambda _: '',
                shaded_std=False,
                shaded_err=True,
                figsize=(10, 6),
                smooth_step=10.0)

plt.title('Experiment01', fontsize=30)

plt.tight_layout()
plt.show()
from baselines.common import plot_util as pu
import matplotlib.pyplot as plt
import argparse
import os

"""Create a plot of training data for a given environment using baselines plotting utility
Command line arguments:
--env: environment name (ex: --env=RoboschoolHalfCheetah-v1)
--dir: directory where training data is logged default is ./data/
"""

if __name__ == "__main__":
    # Parse command line arguments
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--env', help='Environment name')
    parser.add_argument('--dir', help='Data directory', default='./data')
    args = parser.parse_args()
    envName = args.env
    # Load results with baselines plot utility
    results = pu.load_results('./'+args.dir+'/'+envName)
    # Plot results
    pu.plot_results(results, average_group=True, split_fn=lambda _: '', shaded_std=False)
    # Save plot as a pdf in a subdirectory of the data directory called plots
    if not os.path.exists('./'+args.dir+'/plots'):
        os.mkdir('./'+args.dir+'/plots')
    plt.savefig('./'+args.dir+'/plots/'+envName+'.pdf')

Example #19
0
def main():
    """
    Plot the plots inside the folder given
    """
    # Now plot the common things
    args.files = sorted(args.files)
    splits = args.files[0].split('/')
    if splits[-1] == '':
        splits = splits[-3]
    else:
        splits = splits[-2]
    env = splits
    results = []
    for file in args.files:
        print(file)
        results.extend(
            pu.load_results(file, success=args.success, length=args.length))

    # Print details
    if args.print:
        allrecords = dict()
        for i in range(len(results)):
            key = check_last_name(results[i])
            data = np.array(results[i].monitor)[-10:, 1]
            allrecords[key] = allrecords.get(key, []) + [data]
        # Print results
        for k, v in allrecords.items():
            v = np.concatenate(v)
            vm = v.mean()
            vs = v.std()
            print('{} {} {}'.format(k, vm, vs))
        return None

    fig = pu.plot_results(results,
                          average_group=True,
                          shaded_err=False,
                          shaded_std=True,
                          max_step=args.max_step,
                          smooth_step=args.smooth_step,
                          group_fn=lambda _: check_last_name(_),
                          split_fn=lambda _: '',
                          figsize=(10, 10))

    # Add results for behaviour cloning if present
    '''
    allfiles = []
    for file in args.files:
        for r, dirs, files in os.walk(file):
            txtfiles = list(filter(lambda x: x.endswith('bc.txt'), files))
            allfiles.extend(list(map(lambda x: os.path.join(r, x), txtfiles)))

    if allfiles != []:
        bcreward = []
        for file in allfiles:
            with open(file, 'r') as fi:
                meanrew = float(fi.readlines()[0])
                bcreward.append(meanrew)

        # Get mean and std
        mean = np.mean(bcreward)
        std = np.std(bcreward)
        idxcolor=4
        plt.plot([0, args.max_step], [mean, mean], label='BC', color=COLORS[idxcolor])
        plt.fill_between([0, args.max_step], [mean - std, mean - std], [mean + std, mean + std], alpha=0.2, color=COLORS[idxcolor])
    '''

    plt.xlabel('Number of steps', fontsize=20)
    plt.ylabel('Reward' if not args.length else 'Episode length', fontsize=20)
    #plt.yscale('log')
    plt.title(env, fontsize=24)
    if args.legend != []:
        '''
        if allfiles != []:
            args.legend.append('BC')
        '''
        #plt.legend(args.legend, loc='lower right')
        plt.legend(args.legend)
    #plt.ticklabel_format(useOffset=1)
    plt.savefig(
        '{}.png'.format(env),
        bbox_inches='tight',
    )
    print("saved ", env)
Example #20
0
def main():
    """
    Plot the plots inside the folder given
    """
    # Now plot the common things
    splits = args.files[0].split('/')
    if splits[-1] == '':
        splits = splits[-2]
    else:
        splits = splits[-1]
    env = splits
    results = pu.load_results(args.files, )
    fig = pu.plot_results(results,
                          average_group=True,
                          shaded_err=False,
                          shaded_std=True,
                          group_fn=lambda _: check_last_name(_),
                          split_fn=lambda _: '',
                          figsize=(10, 10))

    # Add results for behaviour cloning if present
    allbcfiles = [args.bcpath]
    allfiles = []
    allrandomfiles = []  # For random agent behavior

    for file in allbcfiles:
        for r, dirs, files in os.walk(file):
            print(files)
            txtfiles = list(filter(lambda x: 'BC_' in x and '.txt' in x,
                                   files))
            rndfiles = list(
                filter(lambda x: 'random_' in x and '.txt' in x, files))
            allfiles.extend(list(map(lambda x: os.path.join(r, x), txtfiles)))
            allrandomfiles.extend(
                list(map(lambda x: os.path.join(r, x), rndfiles)))

    ## Show all files for BC and plot
    print(allfiles)
    if allfiles != []:
        bcreward = []
        for file in allfiles:
            with open(file, 'r') as fi:
                rews = fi.read().split('\n')
                rews = filter(lambda x: x != '', rews)
                rews = list(map(lambda x: float(x), rews))
                bcreward.extend(rews)

        # Get mean and std
        #print(bcreward)
        mean = np.mean(bcreward)
        std = np.std(bcreward)
        idxcolor = 10
        plt.plot([0, args.max_steps], [mean, mean],
                 label='BC',
                 color=COLORS[idxcolor])
        plt.fill_between([0, args.max_steps], [mean - std, mean - std],
                         [mean + std, mean + std],
                         alpha=0.2,
                         color=COLORS[idxcolor])

    ## Get random policy
    if allrandomfiles != []:
        rndreward = []
        for file in allrandomfiles:
            with open(file, 'r') as fi:
                rews = fi.read().split('\n')
                rews = filter(lambda x: x != '', rews)
                rews = list(map(lambda x: float(x), rews))
                rndreward.extend(rews)

        # Get mean and std
        #print(bcreward)
        mean = np.mean(rndreward)
        plt.plot([0, args.max_steps], [mean, mean],
                 label='random',
                 color='gray',
                 linestyle='dashed')

    plt.xlabel('# environment interactions', fontsize=20)
    envnamehere = 'ant'
    if env.lower().startswith(envnamehere):
        plt.ylim(ymin=-5000, ymax=5000)
    if env.lower().startswith(''):
        plt.ylabel('Reward', fontsize=30)
    plt.yscale(args.yscale)
    plt.title(env.replace('BC','').replace('GAIL', '').replace('no', '').replace('alph', ''), \
            fontsize=50)

    if env.lower().startswith(envnamehere):
        if args.legend != []:
            if allfiles != []:
                args.legend.append('BC')
            plt.legend(args.legend, fontsize=30, loc='bottom right')
    else:
        plt.legend().set_visible(False)
    #plt.ticklabel_format(useOffset=1)
    plt.savefig(
        '{}.png'.format(env),
        bbox_inches='tight',
    )
    print("saved ", env)
from baselines.common import plot_util as pu

LOG_DIRS = 'logs/coinrun_500_level/'

results = pu.load_results(LOG_DIRS)

smooth_step = 50.0

fig = pu.plot_results(results,
                      average_group=True,
                      split_fn=lambda _: '',
                      shaded_std=False,
                      smooth_step=smooth_step)
pu.plt.savefig('coinrun_500_level')
Example #22
0
from baselines.common import plot_util as pu
import matplotlib.pyplot as plt
import numpy as np

results = pu.load_results('~/logs/NewHopperCmp/')
print(len(results))
pu.plot_results(results, average_group=True, split_fn=lambda _: '')
#print(np.cumsum(results[0].monitor.l))
#plt.plot(np.cumsum(results[0].monitor.l), pu.smooth(results[0].monitor.r, radius=10))
#plt.show()
Example #23
0
def run_cartpole_dqn(num_batches=1000,
                     batch_size=32,
                     log_dir="./logs/dqn",
                     seed=0):
    os.makedirs(log_dir, exist_ok=True)
    env = CartPoleEnv()
    env.seed(seed)
    torch.manual_seed(seed)
    agent = CartPoleAgent(env.observation_space, env.action_space)
    from baselines.bench import Monitor as BenchMonitor

    env = BenchMonitor(env, log_dir, allow_early_resets=True)
    train(agent, env, num_batches=num_batches, batch_size=batch_size)
    return agent, env


if __name__ == "__main__":
    agent, env = run_cartpole_dqn()
    from baselines.common import plot_util as pu
    from matplotlib import pyplot as plt

    results = pu.load_results("logs")
    f, ax = pu.plot_results(results)
    f.savefig("logs/dqn_cartpole.png")

    env = Monitor(env,
                  "./vid",
                  video_callable=lambda episode_id: True,
                  force=True)
    visualize_it(env, agent)