Ejemplo n.º 1
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--iters', required=True, type=int, nargs='+')
    ap.add_argument('--update', required=True, nargs='+', type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)
    
    params = vars(ap.parse_args())
    params['arch'] = [64, 64]
    params['lr'] = .01
    params['epochs'] = 100

    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    plt.style.use('ggplot')

    iters = params['iters']
    ptype = 'data_used'
    

    parts = [5, 10, 50, 450][::-1]
    dart_names = ['DART ' + str(part) for part in parts]
    dart_data = []
    dart_errs = []
    for part in parts:
        title = 'test_dart'
        ptype = 'data_used'
        params_dart = params.copy()
        params_dart['partition'] = part
        try: 
            means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype)
            dart_data.append(means[-1])
            dart_errs.append(sems[-1])
        except IOError:
            pass

    labels = dart_names
    data = dart_data
    errs = dart_errs
    plt.bar(labels, data, yerr=errs)
    plt.title(params['envname'][:-3])

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_data.pdf")
        plt.savefig(save_path + "svg_" + str(params['envname']) + "_data.svg")
    else:
        plt.show()
Ejemplo n.º 2
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--iters', required=True, type=int, nargs='+')
    ap.add_argument('--update', required=True, nargs='+', type=int)
    ap.add_argument('--save', action='store_true', default=False)

    params = vars(ap.parse_args())
    params['arch'] = [64, 64]
    params['lr'] = .01
    params['epochs'] = 100

    should_save = params['save']
    del params['save']

    plt.style.use('ggplot')

    iters = params['iters']
    ptype = 'surr_loss'

    if params['envname'] == 'Humanoid-v1':
        traces = [0.005, 0.5, 10.0]
    else:
        traces = [0.005, 0.5, 5.0]

    # Rand
    for trace in traces:
        title = 'test_rand'
        ptype = 'sup_loss'
        params_rand = params.copy()
        params_rand[
            'trace'] = trace  # You may adjust the trace to whatever you chose.
        del params_rand['update']
        c = next(color)
        means, sems = utils.extract_data(params_rand, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, color=c, linestyle='--')

        ptype = 'surr_loss'
        means, sems = utils.extract_data(params_rand, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, label='Rand Loss, p = ' + str(trace), color=c)
        plt.fill_between(iters, (means - sems), (means + sems),
                         alpha=.3,
                         color=c)

        # ptype = 'sim_err'
        # means, sems = utils.extract_data(params_rand, iters, title, sub_dir, ptype)
        # plt.plot(iters, means, color=c, linestyle=':')

    # DART
    title = 'test_dart'
    ptype = 'sup_loss'
    params_dart = params.copy()
    c = next(color)
    means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype)
    plt.plot(iters, means, color=c, linestyle='--')

    ptype = 'surr_loss'
    means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype)
    plt.plot(iters, means, label='DART', color=c)
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # ptype = 'sim_err'
    # means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype)
    # plt.plot(iters, means, color=c, linestyle=':')

    plt.title("Loss on " + str(params['envname']))
    plt.xticks(iters)
    plt.legend(loc='upper right')

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_loss_rand.pdf")
    else:
        plt.show()
Ejemplo n.º 3
0
def main():

    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True,
                    type=int)  # number of evaluations
    ap.add_argument('--max_data', required=True,
                    type=int)  # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)

    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    snapshot_ranges = utils.compute_snapshot_ranges(params)

    update_periods = [50, 300]
    update_periods_dart = [300]
    update_periods_dagger = [50, 300]

    if params['envname'] == 'Humanoid-v1':
        update_periods = [200, 1000]
        update_periods_dart = [1000]
        update_periods_dagger = [200, 1000]

    plt.style.use('ggplot')

    # Best supervisor reward
    title = 'test_bc'
    ptype = 'sup_reward'
    params_bc = params.copy()
    means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
    # if not should_normalize:
    #     plt.plot(snapshot_ranges, means, label='Supervisor', color='green')

    sup_means, sup_sems = means, sems

    def normalize(means, sems):
        if should_normalize:
            means = means / sup_means
            sems = sems / sup_means
            return means, sems
        else:
            return means, sems

    # # Noisy supervisor reward using DART
    # partition = .1
    # update_period = update_periods_dart[0]
    # title = 'test_dart'
    # ptype = 'sup_reward'
    # params_dart = params.copy()
    # params_dart['partition'] = partition
    # params_dart['update_period'] = update_period
    # try:
    #     means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
    #     means, sems = normalize(means, sems)
    #     plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--')
    # except IOError:
    #     log("Not found.")
    #     pass

    # BC
    title = 'test_bc'
    ptype = 'sup_reward'
    params_bc = params.copy()
    try:
        means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
        means, sems = normalize(means, sems)
        p = plt.plot(snapshot_ranges, means, label='Behavior Cloning')
        plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                         alpha=.3,
                         color=p[0].get_color())
    except IOError:
        log("Not found.")
        pass

    # DAgger
    title = 'test_dagger'
    ptype = 'reward'
    params_dagger = params.copy()
    params_dagger['beta'] = .5

    for update_period in update_periods_dagger:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir,
                                             ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges,
                         means,
                         label='DAgger ' + str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]
    scales = [1.0]

    for scale in scales:
        title = 'test_iso'
        ptype = 'sup_reward'
        params_iso = params.copy()
        params_iso['scale'] = scale
        try:
            means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    # DART
    partition = .1

    title = 'test_dart'
    ptype = 'sup_reward'
    params_dart = params.copy()
    params_dart['partition'] = partition
    for update_period in update_periods_dart:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir,
                                             ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges,
                         means,
                         label='DART part: ' + str(partition) + ", per: " +
                         str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    # plt.title("Reward on " + str(params['envname']))
    # plt.legend()
    plt.xticks(snapshot_ranges)
    if should_normalize:
        plt.ylim(0, 1.05)
        plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0])
    # plt.title(params['envname'][:-3])

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_reward.pdf")
        plt.savefig(save_path + str(params['envname']) + "_reward.svg")
    else:
        plt.legend()
        plt.show()
Ejemplo n.º 4
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--iters', required=True, type=int, nargs='+')
    ap.add_argument('--update', required=True, nargs='+', type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)

    params = vars(ap.parse_args())
    params['arch'] = [64, 64]
    params['lr'] = .01
    params['epochs'] = 100

    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    plt.style.use('ggplot')

    iters = params['iters']
    ptype = 'data_used'

    # DAgger B
    betas = [.1, .3, .5, .7, .9]
    colors = ['blue', 'red', 'black', 'pink', 'aqua']
    dagger_b_data = []
    dagger_b_sems = []
    for beta, c in zip(betas, colors):

        title = 'test_dagger_b'
        ptype = 'data_used'
        params_dagger_b = params.copy()
        params_dagger_b[
            'beta'] = beta  # You may adjust the prior to whatever you chose.
        try:
            means, sems = utils.extract_data(params_dagger_b, iters, title,
                                             sub_dir, ptype)
            dagger_b_data.append(means)
            dagger_b_sems.append(sems)
        except IOError:
            pass

    dagger_b_data = np.array(dagger_b_data)
    dagger_b_sems = np.array(dagger_b_sems)
    dagger_b_data = np.sum(dagger_b_data[:, -1])
    sems = dagger_b_sems[:, -1]
    dagger_b_sem = np.sqrt(np.sum(sems**2.0))

    parts = [10]
    dart_names = ['DART ' + str(part) for part in parts]
    dart_data = []
    dart_sem = []
    for part in parts:
        title = 'test_dart'
        ptype = 'data_used'
        params_dart = params.copy()
        params_dart['partition'] = part
        try:
            means, sems = utils.extract_data(params_dart, iters, title,
                                             sub_dir, ptype)
            dart_data.append(means[-1])
            dart_sem.append(sems[-1])

        except IOError:
            pass

    labels = ['Dagger-B']
    data = [dagger_b_data]
    errs = [dagger_b_sem]
    labels = labels + dart_names
    data = data + dart_data
    errs = errs + dart_sem
    plt.bar(labels, data, yerr=errs)
    plt.title(params['envname'][:-3])

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_data2.pdf")
        plt.savefig(save_path + "svg_" + str(params['envname']) + "_data2.svg")
    else:
        plt.show()
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--iters', required=True, type=int, nargs='+')
    ap.add_argument('--update', required=True, nargs='+', type=int)
    ap.add_argument('--save', action='store_true', default=False)

    params = vars(ap.parse_args())
    params['arch'] = [64]
    params['lr'] = .01
    params['epochs'] = 100

    should_save = params['save']
    del params['save']

    plt.style.use('ggplot')

    iters = params['iters']

    # Behavior Cloning loss on sup distr
    title = 'test_bc'
    params['mode'] = 'bc'
    ptype = 'biases_learner'
    params_bc = params.copy()
    del params_bc['update']  # Updates are used in behavior cloning

    c = next(color)

    means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype)
    plt.plot(iters, means, color=c, linestyle='--')

    ptype = 'variances_learner'
    means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype)
    plt.plot(iters, means, label='Behavior Cloning', color=c)
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # DAgger
    beta = .5
    title = 'test_dagger'
    params['mode'] = 'dagger'
    ptype = 'biases_learner'
    params_dagger = params.copy()
    params_dagger[
        'beta'] = .5  # You may adjust the prior to whatever you chose.
    del params_dagger['update']
    c = next(color)

    means, sems = utils.extract_data(params_dagger, iters, title, sub_dir,
                                     ptype)
    plt.plot(iters, means, color=c, linestyle='--')

    ptype = 'variances_learner'
    means, sems = utils.extract_data(params_dagger, iters, title, sub_dir,
                                     ptype)
    plt.plot(iters, means, label='DAgger', color=c)
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # Bias Variance Thresh
    title = 'test_bias_variance_switch'
    params['mode'] = 'bias_variance_switch'
    ptype = 'biases_learner'
    params_bias_variance_switch = params.copy()
    del params_bias_variance_switch['update']
    c = next(color)

    means, sems = utils.extract_data(params_bias_variance_switch, iters, title,
                                     sub_dir, ptype)
    plt.plot(iters, means, color=c, linestyle='--')

    ptype = 'variances_learner'
    means, sems = utils.extract_data(params_bias_variance_switch, iters, title,
                                     sub_dir, ptype)
    plt.plot(iters, means, label='Switch', color=c)
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # # DAgger B
    # beta = .5
    # title = 'test_dagger_b'
    # ptype = 'sup_loss'
    # params_dagger_b = params.copy()
    # params_dagger_b['beta'] = beta      # You may adjust the prior to whatever you chose.
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'surr_loss'
    #     means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='DAgger-B', color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    # # Isotropic noise
    # title = 'test_iso'
    # params['mode'] = 'iso'
    # ptype = 'biases_learner'
    # params_iso = params.copy()
    # params_iso['scale'] = 1.0
    # del params_iso['update']
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'variances_learner'
    #     means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='Isotropic Noise 1.0', color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    # # Isotropic noise
    # title = 'test_iso'
    # params['mode'] = 'iso'
    # ptype = 'biases_learner'
    # params_iso = params.copy()
    # params_iso['scale'] = 0.5
    # del params_iso['update']
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'variances_learner'
    #     means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='Isotropic Noise 0.5', color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    # # Isotropic noise
    # title = 'test_iso'
    # params['mode'] = 'iso'
    # ptype = 'biases_learner'
    # params_iso = params.copy()
    # params_iso['scale'] = 2.0
    # del params_iso['update']
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'variances_learner'
    #     means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='Isotropic Noise 2.0', color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    # DART
    partition = 450
    title = 'test_dart'
    params['mode'] = 'dart'
    ptype = 'biases_learner'
    params_dart = params.copy()
    params_dart['partition'] = partition
    c = next(color)
    try:
        means, sems = utils.extract_data(params_dart, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, color=c, linestyle='--')

        ptype = 'variances_learner'
        means, sems = utils.extract_data(params_dart, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, label='DART ' + str(partition), color=c)
        plt.fill_between(iters, (means - sems), (means + sems),
                         alpha=.3,
                         color=c)
    except IOError:
        pass

    # # DART
    # partition = 450
    # title = 'test_dart_min_var'
    # params['mode'] = 'dart'
    # ptype = 'biases_learner'
    # params_dart_min_var = params.copy()
    # params_dart_min_var['partition'] = partition
    # params_dart_min_var['reg_penalty'] = 0.3
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'variances_learner'
    #     means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='DART Reg 0.3 ' + str(partition), color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    # # DART
    # partition = 450
    # title = 'test_dart_min_var'
    # params['mode'] = 'dart'
    # ptype = 'biases_learner'
    # params_dart_min_var = params.copy()
    # params_dart_min_var['partition'] = partition
    # params_dart_min_var['reg_penalty'] = 0.7
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'variances_learner'
    #     means, sems = utils.extract_data(params_dart_min_var, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='DART Reg 0.7 ' + str(partition), color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    # # MIXED with Dagger mixed
    # title = 'test_mixed'
    # params['mode'] = 'mixed'
    # ptype = 'biases_learner'
    # params_mixed = params.copy()
    # params_mixed['dagger_mixed'] = 1
    # del params_mixed['update']
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'variances_learner'
    #     means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='MIXED DAgger', color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    # # MIXED without Dagger mixed
    # title = 'test_mixed'
    # params['mode'] = 'mixed'
    # ptype = 'biases_learner'
    # params_mixed = params.copy()
    # params_mixed['dagger_mixed'] = 0
    # del params_mixed['update']
    # c = next(color)
    # try:
    #     means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, color=c, linestyle='--')

    #     ptype = 'variances_learner'
    #     means, sems = utils.extract_data(params_mixed, iters, title, sub_dir, ptype)
    #     plt.plot(iters, means, label='MIXED', color=c)
    #     plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)
    # except IOError:
    #     pass

    plt.title("Learner Bias/Variance on " + str(params['envname']))
    plt.legend()
    plt.xticks(iters)
    plt.legend(loc='upper right')

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) +
                    "_bias_variance_learner.pdf")
    else:
        plt.show()
Ejemplo n.º 6
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    color = itertools.cycle(( "#FCB716", "#2D3956", "#A0B2D8", "#988ED5", "#F68B20"))

    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True, type=int)             # number of evaluations
    ap.add_argument('--max_data', required=True, type=int)              # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)


    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    snapshot_ranges = utils.compute_snapshot_ranges(params)

    plt.style.use('ggplot')

    # Best supervisor reward
    title = 'test_bc'
    ptype = 'sup_reward'
    params_bc = params.copy()
    means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
    if not should_normalize:
        plt.plot(snapshot_ranges, means, label='Supervisor', color='green')

    sup_means, sup_sems = means, sems
    def normalize(means, sems):
        if should_normalize:
            means = means / sup_means
            sems = sems / sup_means
            return means, sems
        else:
            return means, sems



    # Noisy supervisor reward using DART
    title = 'test_dart'
    ptype = 'sup_reward'
    params_dart = params.copy()
    try:
        means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
        means, sems = normalize(means, sems)
        plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--')
    except IOError:
        log("Not found.")
        pass

    # BC
    degrees = [2, 3, 5, 6, 7]
    configs = ['poly' + str(d) for d in degrees]

    title = 'test_bc'
    ptype = 'reward'
    params_bc = params.copy()
    for config, degree in zip(configs, degrees):
        params_bc['config'] = config
        params_bc['degree'] = degree
        try:
            means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='Behavior Cloning deg: ' + str(degree))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # DAgger
    update_periods = [2, 4, 8]

    title = 'test_dagger'
    ptype = 'reward'
    params_dagger = params.copy()
    params_dagger['beta'] = .5

    for update_period in update_periods:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]
    # for scale in scales: 
    #     title = 'test_iso'
    #     ptype = 'reward'
    #     params_iso = params.copy()
    #     params_iso['scale'] = scale
    #     try:
    #         means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
    #         means, sems = normalize(means, sems)
    #         p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale))
    #         plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
    #     except IOError:
    #         log("Not found.")
    #         pass


    # DART
    update_periods = [2, 4, 8]
    partition = .1

    title = 'test_dart'
    ptype = 'reward'
    params_dart = params.copy()
    params_dart['partition'] = partition
    for update_period in update_periods:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    plt.title("Reward on " + str(params['envname']))
    plt.legend()
    plt.xticks(snapshot_ranges)
    if should_normalize:
        plt.ylim(0, 1.05)
        plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0])
    plt.title(params['envname'][:-3])

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_reward.pdf")
    else:
        plt.legend()
        plt.show()
Ejemplo n.º 7
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True,
                    type=int)  # number of evaluations
    ap.add_argument('--max_data', required=True,
                    type=int)  # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)

    should_save = params['save']
    del params['save']
    snapshot_ranges = utils.compute_snapshot_ranges(params)

    plt.style.use('ggplot')

    # Behavior Cloning loss on sup distr
    degrees = [2, 3, 5, 6, 7]
    configs = ['poly' + str(d) for d in degrees]
    upper_bound = 0

    title = 'test_bc'
    ptype = 'sup_loss'
    params_bc = params.copy()
    for config, degree in zip(configs, degrees):
        params_bc['config'] = config
        params_bc['degree'] = degree
        try:
            means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
            p = plt.plot(snapshot_ranges, means, linestyle='--')

            ptype = 'surr_loss'
            means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
            plt.plot(snapshot_ranges,
                     means,
                     label='Behavior Cloning degree: ' + str(degree),
                     color=p[0].get_color())
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())

            upper_bound = max(np.max(means), upper_bound)
        except IOError:
            log("Not found.")
            pass

    # DAgger
    update_periods = [2, 4, 8]
    beta = .5

    title = 'test_dagger'
    ptype = 'sup_loss'
    params_dagger = params.copy()
    params_dagger['beta'] = beta
    for update_period in update_periods:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir,
                                             ptype)
            p = plt.plot(snapshot_ranges, means, linestyle='--')

            ptype = 'surr_loss'
            means, sems = utils.extract_data(params_dagger, title, sub_dir,
                                             ptype)
            plt.plot(snapshot_ranges,
                     means,
                     label='DAgger per: ' + str(update_period),
                     color=p[0].get_color())
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())

            upper_bound = max(np.max(means), upper_bound)
        except IOError:
            log("Not found.")
            pass

    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]

    # title = 'test_iso'
    # ptype = 'sup_loss'
    # params_iso = params.copy()
    # for scale in scales:
    #     params_iso['scale'] = scale
    #     try:
    #         means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
    #         p = plt.plot(snapshot_ranges, means, linestyle='--')

    #         ptype = 'surr_loss'
    #         means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
    #         plt.plot(snapshot_ranges, means, label='Iso ' + str(scale), color=p[0].get_color())
    #         plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
    #     except IOError:
    # log("Not found.")
    # pass

    # DART
    update_periods = [2, 4, 8]
    partition = .1

    title = 'test_dart'
    ptype = 'sup_loss'
    params_dart = params.copy()
    params_dart['partition'] = partition

    for update_period in update_periods:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir,
                                             ptype)
            p = plt.plot(snapshot_ranges, means, linestyle='--')

            ptype = 'surr_loss'
            means, sems = utils.extract_data(params_dart, title, sub_dir,
                                             ptype)
            plt.plot(snapshot_ranges,
                     means,
                     label='DART part: ' + str(partition) + ", per: " +
                     str(update_period),
                     color=p[0].get_color())
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())

            upper_bound = max(np.max(means), upper_bound)
        except IOError:
            log("Not found.")
            pass

    plt.title("Loss on " + str(params['envname']))
    plt.legend()
    plt.xticks(snapshot_ranges)
    plt.legend(loc='upper right')

    upper_bound = min(20, upper_bound)
    plt.ylim(0, upper_bound)

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_loss.pdf")
    else:
        plt.show()
Ejemplo n.º 8
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--iters', required=True, type=int, nargs='+')
    ap.add_argument('--update', required=True, nargs='+', type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)

    params = vars(ap.parse_args())
    params['arch'] = [64, 64]
    params['lr'] = .01
    params['epochs'] = 50

    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    plt.style.use('ggplot')

    iters = params['iters']
    ptype = 'surr_loss'

    # Best supervisor reward
    title = 'test_bc'
    ptype = 'sup_reward'
    params_bc = params.copy()
    del params_bc['update']  # Updates are used in behavior cloning
    means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype)
    plt.plot(iters, means, label='Supervisor', color='green')

    sup_means, sup_sems = means, sems

    def normalize(means, sems):
        if should_normalize:
            means = means / sup_means
            sems = sems / sup_means
            return means, sems
        else:
            return means, sems

    # Noisy supervisor reward using DART
    title = 'test_dart'
    ptype = 'sup_reward'
    params_dart = params.copy()
    means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype)
    means, sems = normalize(means, sems)
    plt.plot(iters,
             means,
             label='DART Noisy Supervisor',
             color='green',
             linestyle='--')

    # BC
    title = 'test_bc'
    ptype = 'reward'
    params_bc = params.copy()
    del params_bc['update']  # Updates are used in behavior cloning
    c = next(color)
    means, sems = utils.extract_data(params_bc, iters, title, sub_dir, ptype)
    means, sems = normalize(means, sems)
    plt.plot(iters, means, label='Behavior Cloning', color=c)
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # DAgger
    title = 'test_dagger'
    ptype = 'reward'
    params_dagger = params.copy()
    del params_dagger['update']
    params_dagger['beta'] = .5
    c = next(color)
    means, sems = utils.extract_data(params_dagger, iters, title, sub_dir,
                                     ptype)
    means, sems = normalize(means, sems)
    plt.plot(iters, means, label='DAgger', color=c)
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # DAgger B
    title = 'test_dagger_b'
    ptype = 'reward'
    params_dagger_b = params.copy()
    params_dagger_b[
        'beta'] = .5  # You may adjust the prior to whatever you chose.
    c = next(color)
    means, sems = utils.extract_data(params_dagger_b, iters, title, sub_dir,
                                     ptype)
    means, sems = normalize(means, sems)
    plt.plot(iters, means, color=c, label='DAgger-B')
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # Isotropic noise
    title = 'test_iso'
    ptype = 'reward'
    params_iso = params.copy()
    params_iso['scale'] = 1.0
    del params_iso['update']
    c = next(color)
    means, sems = utils.extract_data(params_iso, iters, title, sub_dir, ptype)
    means, sems = normalize(means, sems)
    plt.plot(iters, means, color=c, label='Isotropic')
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    # DART
    title = 'test_dart'
    ptype = 'reward'
    params_dart = params.copy()
    c = next(color)
    means, sems = utils.extract_data(params_dart, iters, title, sub_dir, ptype)
    means, sems = normalize(means, sems)
    plt.plot(iters, means, label='DART', color=c)
    plt.fill_between(iters, (means - sems), (means + sems), alpha=.3, color=c)

    plt.title("Reward on " + str(params['envname']))
    plt.legend()
    plt.xticks(iters)
    plt.legend()
    if should_normalize:
        plt.ylim(0, 1.05)

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_reward.pdf")
    else:
        plt.show()
Ejemplo n.º 9
0
def main():

    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True, type=int)             # number of evaluations
    ap.add_argument('--max_data', required=True, type=int)              # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)


    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    snapshot_ranges = utils.compute_snapshot_ranges(params)

    update_periods = [50, 300]
    update_periods_dart = [300]
    update_periods_dagger = [50, 300]

    if params['envname'] == 'Humanoid-v1':
        update_periods = [200, 1000]
        update_periods_dart = [1000]
        update_periods_dagger = [200, 1000]

    plt.style.use('ggplot')

    def normalize(means, sems):
        return means, sems

    all_means = []

    # BC
    title = 'test_bc'
    ptype = 'total_time'
    params_bc = params.copy()
    try:
        means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
        means, sems = normalize(means, sems)
        all_means.append(means[0])
        # p = plt.plot(snapshot_ranges, means, label='Behavior Cloning')
        # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
    except IOError:
        log("Not found.")
        pass


    # DAgger
    title = 'test_dagger'
    ptype = 'total_time'
    params_dagger = params.copy()
    params_dagger['beta'] = .5

    for update_period in update_periods_dagger:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            all_means.append(means[0])
            # p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period))
            # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]
    scales = [1.0]
    
    for scale in scales: 
        title = 'test_iso'
        ptype = 'total_time'
        params_iso = params.copy()
        params_iso['scale'] = scale
        try:
            means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            all_means.append(means[0])
            # p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale))
            # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # DART
    partition = .1

    title = 'test_dart'
    ptype = 'total_time'
    params_dart = params.copy()
    params_dart['partition'] = partition
    for update_period in update_periods_dart:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            all_means.append(means[0])
            # p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period))
            # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    all_means = np.array(all_means)
    inds = np.arange(len(all_means))
    inds[0] = 3
    inds[1] = 0
    inds[2] = 1
    inds[3] = 2
    inds[4] = 4

    for ind, mean in zip(inds, all_means):
        plt.bar([ind], [mean])

    # plt.legend()
    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    plt.xticks([])
    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_time.pdf")
        plt.savefig(save_path + str(params['envname']) + "_time.svg")
    else:
        plt.legend()
        plt.show()
Ejemplo n.º 10
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--iters', required=True, type=int, nargs='+')
    ap.add_argument('--update', required=True, nargs='+', type=int)
    ap.add_argument('--save', action='store_true', default=False)

    params = vars(ap.parse_args())
    params['arch'] = [64, 64]
    params['lr'] = .01
    params['epochs'] = 100

    should_save = params['save']
    del params['save']

    plt.style.use('ggplot')

    iters = params['iters']
    ptype = 'surr_loss'

    # Behavior Cloning loss on sup distr
    title = 'test_bc'
    ptype = 'sup_loss'
    params_bc = params.copy()
    del params_bc['update']  # Updates are used in behavior cloning
    c = next(color)
    try:
        means, sems = utils.extract_data(params_bc, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, color=c, linestyle='--')

        ptype = 'surr_loss'
        means, sems = utils.extract_data(params_bc, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, label='Behavior Cloning', color=c)
        plt.fill_between(iters, (means - sems), (means + sems),
                         alpha=.3,
                         color=c)
    except IOError:
        pass

    # DAgger
    beta = .5
    title = 'test_dagger'
    ptype = 'sup_loss'
    params_dagger = params.copy()
    params_dagger[
        'beta'] = .5  # You may adjust the prior to whatever you chose.
    del params_dagger['update']
    c = next(color)
    try:
        means, sems = utils.extract_data(params_dagger, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, color=c, linestyle='--')

        ptype = 'surr_loss'
        means, sems = utils.extract_data(params_dagger, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, label='DAgger', color=c)
        plt.fill_between(iters, (means - sems), (means + sems),
                         alpha=.3,
                         color=c)
    except IOError:
        pass

    # DAgger B
    beta = .5
    title = 'test_dagger_b'
    ptype = 'sup_loss'
    params_dagger_b = params.copy()
    params_dagger_b[
        'beta'] = beta  # You may adjust the prior to whatever you chose.
    c = next(color)
    try:
        means, sems = utils.extract_data(params_dagger_b, iters, title,
                                         sub_dir, ptype)
        plt.plot(iters, means, color=c, linestyle='--')

        ptype = 'surr_loss'
        means, sems = utils.extract_data(params_dagger_b, iters, title,
                                         sub_dir, ptype)
        plt.plot(iters, means, label='DAgger-B', color=c)
        plt.fill_between(iters, (means - sems), (means + sems),
                         alpha=.3,
                         color=c)
    except IOError:
        pass

    # Isotropic noise
    title = 'test_iso'
    ptype = 'sup_loss'
    params_iso = params.copy()
    params_iso['scale'] = 1.0
    del params_iso['update']
    c = next(color)
    try:
        means, sems = utils.extract_data(params_iso, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, color=c, linestyle='--')

        ptype = 'surr_loss'
        means, sems = utils.extract_data(params_iso, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, label='Isotropic Noise', color=c)
        plt.fill_between(iters, (means - sems), (means + sems),
                         alpha=.3,
                         color=c)
    except IOError:
        pass

    # DART
    partition = 450
    title = 'test_dart'
    ptype = 'sup_loss'
    params_dart = params.copy()
    params_dart['partition'] = partition
    c = next(color)
    try:
        means, sems = utils.extract_data(params_dart, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, color=c, linestyle='--')

        ptype = 'surr_loss'
        means, sems = utils.extract_data(params_dart, iters, title, sub_dir,
                                         ptype)
        plt.plot(iters, means, label='DART ' + str(partition), color=c)
        plt.fill_between(iters, (means - sems), (means + sems),
                         alpha=.3,
                         color=c)
    except IOError:
        pass

    plt.title("Loss on " + str(params['envname']))
    plt.legend()
    plt.xticks(iters)
    plt.legend(loc='upper right')

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_loss.pdf")
    else:
        plt.show()