예제 #1
0
def main():
    title = 'test_bc'

    ap = framework.get_args()
    args = vars(ap.parse_args())
    args = framework.load_config(args)

    framework.startup(title, args, Test)
예제 #2
0
def main():
    title = 'test_iso'

    ap = framework.get_args()
    ap.add_argument('--scale', required=True,
                    type=float)  # amount to scale the identity matrix
    args = vars(ap.parse_args())
    args = framework.load_config(args)

    framework.startup(title, args, Test)
예제 #3
0
def main():
    title = 'test_rand'

    ap = framework.get_args()
    ap.add_argument(
        '--trace', required=True,
        type=float)  # trace on the amount of error one expects in the learner
    args = vars(ap.parse_args())
    args = framework.load_config(args)

    framework.startup(title, args, Test)
예제 #4
0
def main():
    title = 'test_dagger'

    ap = framework.get_args()
    ap.add_argument('--update_period', required=True,
                    type=int)  # period between updates to the policy
    ap.add_argument('--beta', required=True,
                    type=float)  # beta term, see Ross et al.
    args = vars(ap.parse_args())
    args = framework.load_config(args)

    framework.startup(title, args, Test)
예제 #5
0
def main():
    title = 'test_dart'

    ap = framework.get_args()
    ap.add_argument('--update_period', required=True,
                    type=int)  # period between updates to the policy
    ap.add_argument('--partition', required=True,
                    type=float)  # Integer between 1 and 450 (exclusive),
    args = vars(ap.parse_args())
    args = framework.load_config(args)

    assert args['partition'] < 1.0 and args['partition'] > 0.0

    framework.startup(title, args, Test)
예제 #6
0
def main():

    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True,
                    type=int)  # number of evaluations
    ap.add_argument('--max_data', required=True,
                    type=int)  # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)

    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    snapshot_ranges = utils.compute_snapshot_ranges(params)

    update_periods = [50, 300]
    update_periods_dart = [300]
    update_periods_dagger = [50, 300]

    if params['envname'] == 'Humanoid-v1':
        update_periods = [200, 1000]
        update_periods_dart = [1000]
        update_periods_dagger = [200, 1000]

    plt.style.use('ggplot')

    # Best supervisor reward
    title = 'test_bc'
    ptype = 'sup_reward'
    params_bc = params.copy()
    means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
    # if not should_normalize:
    #     plt.plot(snapshot_ranges, means, label='Supervisor', color='green')

    sup_means, sup_sems = means, sems

    def normalize(means, sems):
        if should_normalize:
            means = means / sup_means
            sems = sems / sup_means
            return means, sems
        else:
            return means, sems

    # # Noisy supervisor reward using DART
    # partition = .1
    # update_period = update_periods_dart[0]
    # title = 'test_dart'
    # ptype = 'sup_reward'
    # params_dart = params.copy()
    # params_dart['partition'] = partition
    # params_dart['update_period'] = update_period
    # try:
    #     means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
    #     means, sems = normalize(means, sems)
    #     plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--')
    # except IOError:
    #     log("Not found.")
    #     pass

    # BC
    title = 'test_bc'
    ptype = 'sup_reward'
    params_bc = params.copy()
    try:
        means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
        means, sems = normalize(means, sems)
        p = plt.plot(snapshot_ranges, means, label='Behavior Cloning')
        plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                         alpha=.3,
                         color=p[0].get_color())
    except IOError:
        log("Not found.")
        pass

    # DAgger
    title = 'test_dagger'
    ptype = 'reward'
    params_dagger = params.copy()
    params_dagger['beta'] = .5

    for update_period in update_periods_dagger:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir,
                                             ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges,
                         means,
                         label='DAgger ' + str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]
    scales = [1.0]

    for scale in scales:
        title = 'test_iso'
        ptype = 'sup_reward'
        params_iso = params.copy()
        params_iso['scale'] = scale
        try:
            means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    # DART
    partition = .1

    title = 'test_dart'
    ptype = 'sup_reward'
    params_dart = params.copy()
    params_dart['partition'] = partition
    for update_period in update_periods_dart:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir,
                                             ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges,
                         means,
                         label='DART part: ' + str(partition) + ", per: " +
                         str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    # plt.title("Reward on " + str(params['envname']))
    # plt.legend()
    plt.xticks(snapshot_ranges)
    if should_normalize:
        plt.ylim(0, 1.05)
        plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0])
    # plt.title(params['envname'][:-3])

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_reward.pdf")
        plt.savefig(save_path + str(params['envname']) + "_reward.svg")
    else:
        plt.legend()
        plt.show()
예제 #7
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    color = itertools.cycle(( "#FCB716", "#2D3956", "#A0B2D8", "#988ED5", "#F68B20"))

    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True, type=int)             # number of evaluations
    ap.add_argument('--max_data', required=True, type=int)              # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)


    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    snapshot_ranges = utils.compute_snapshot_ranges(params)

    plt.style.use('ggplot')

    # Best supervisor reward
    title = 'test_bc'
    ptype = 'sup_reward'
    params_bc = params.copy()
    means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
    if not should_normalize:
        plt.plot(snapshot_ranges, means, label='Supervisor', color='green')

    sup_means, sup_sems = means, sems
    def normalize(means, sems):
        if should_normalize:
            means = means / sup_means
            sems = sems / sup_means
            return means, sems
        else:
            return means, sems



    # Noisy supervisor reward using DART
    title = 'test_dart'
    ptype = 'sup_reward'
    params_dart = params.copy()
    try:
        means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
        means, sems = normalize(means, sems)
        plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--')
    except IOError:
        log("Not found.")
        pass

    # BC
    degrees = [2, 3, 5, 6, 7]
    configs = ['poly' + str(d) for d in degrees]

    title = 'test_bc'
    ptype = 'reward'
    params_bc = params.copy()
    for config, degree in zip(configs, degrees):
        params_bc['config'] = config
        params_bc['degree'] = degree
        try:
            means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='Behavior Cloning deg: ' + str(degree))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # DAgger
    update_periods = [2, 4, 8]

    title = 'test_dagger'
    ptype = 'reward'
    params_dagger = params.copy()
    params_dagger['beta'] = .5

    for update_period in update_periods:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]
    # for scale in scales: 
    #     title = 'test_iso'
    #     ptype = 'reward'
    #     params_iso = params.copy()
    #     params_iso['scale'] = scale
    #     try:
    #         means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
    #         means, sems = normalize(means, sems)
    #         p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale))
    #         plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
    #     except IOError:
    #         log("Not found.")
    #         pass


    # DART
    update_periods = [2, 4, 8]
    partition = .1

    title = 'test_dart'
    ptype = 'reward'
    params_dart = params.copy()
    params_dart['partition'] = partition
    for update_period in update_periods:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period))
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    plt.title("Reward on " + str(params['envname']))
    plt.legend()
    plt.xticks(snapshot_ranges)
    if should_normalize:
        plt.ylim(0, 1.05)
        plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0])
    plt.title(params['envname'][:-3])

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_reward.pdf")
    else:
        plt.legend()
        plt.show()
예제 #8
0
def main():

    # In the event that you change the sub_directory within results, change this to match it.
    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True,
                    type=int)  # number of evaluations
    ap.add_argument('--max_data', required=True,
                    type=int)  # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)

    should_save = params['save']
    del params['save']
    snapshot_ranges = utils.compute_snapshot_ranges(params)

    plt.style.use('ggplot')

    # Behavior Cloning loss on sup distr
    degrees = [2, 3, 5, 6, 7]
    configs = ['poly' + str(d) for d in degrees]
    upper_bound = 0

    title = 'test_bc'
    ptype = 'sup_loss'
    params_bc = params.copy()
    for config, degree in zip(configs, degrees):
        params_bc['config'] = config
        params_bc['degree'] = degree
        try:
            means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
            p = plt.plot(snapshot_ranges, means, linestyle='--')

            ptype = 'surr_loss'
            means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
            plt.plot(snapshot_ranges,
                     means,
                     label='Behavior Cloning degree: ' + str(degree),
                     color=p[0].get_color())
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())

            upper_bound = max(np.max(means), upper_bound)
        except IOError:
            log("Not found.")
            pass

    # DAgger
    update_periods = [2, 4, 8]
    beta = .5

    title = 'test_dagger'
    ptype = 'sup_loss'
    params_dagger = params.copy()
    params_dagger['beta'] = beta
    for update_period in update_periods:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir,
                                             ptype)
            p = plt.plot(snapshot_ranges, means, linestyle='--')

            ptype = 'surr_loss'
            means, sems = utils.extract_data(params_dagger, title, sub_dir,
                                             ptype)
            plt.plot(snapshot_ranges,
                     means,
                     label='DAgger per: ' + str(update_period),
                     color=p[0].get_color())
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())

            upper_bound = max(np.max(means), upper_bound)
        except IOError:
            log("Not found.")
            pass

    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]

    # title = 'test_iso'
    # ptype = 'sup_loss'
    # params_iso = params.copy()
    # for scale in scales:
    #     params_iso['scale'] = scale
    #     try:
    #         means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
    #         p = plt.plot(snapshot_ranges, means, linestyle='--')

    #         ptype = 'surr_loss'
    #         means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
    #         plt.plot(snapshot_ranges, means, label='Iso ' + str(scale), color=p[0].get_color())
    #         plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
    #     except IOError:
    # log("Not found.")
    # pass

    # DART
    update_periods = [2, 4, 8]
    partition = .1

    title = 'test_dart'
    ptype = 'sup_loss'
    params_dart = params.copy()
    params_dart['partition'] = partition

    for update_period in update_periods:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir,
                                             ptype)
            p = plt.plot(snapshot_ranges, means, linestyle='--')

            ptype = 'surr_loss'
            means, sems = utils.extract_data(params_dart, title, sub_dir,
                                             ptype)
            plt.plot(snapshot_ranges,
                     means,
                     label='DART part: ' + str(partition) + ", per: " +
                     str(update_period),
                     color=p[0].get_color())
            plt.fill_between(snapshot_ranges, (means - sems), (means + sems),
                             alpha=.3,
                             color=p[0].get_color())

            upper_bound = max(np.max(means), upper_bound)
        except IOError:
            log("Not found.")
            pass

    plt.title("Loss on " + str(params['envname']))
    plt.legend()
    plt.xticks(snapshot_ranges)
    plt.legend(loc='upper right')

    upper_bound = min(20, upper_bound)
    plt.ylim(0, upper_bound)

    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_loss.pdf")
    else:
        plt.show()
예제 #9
0
def main():

    sub_dir = 'experts'

    ap = argparse.ArgumentParser()
    ap.add_argument('--envname', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--save', action='store_true', default=False)
    ap.add_argument('--normalize', action='store_true', default=False)
    ap.add_argument('--num_evals', required=True, type=int)             # number of evaluations
    ap.add_argument('--max_data', required=True, type=int)              # maximum amount of data
    ap.add_argument('--config', required=True, type=str)

    params = vars(ap.parse_args())
    params = load_config(params)


    should_save = params['save']
    should_normalize = params['normalize']
    del params['save']
    del params['normalize']

    snapshot_ranges = utils.compute_snapshot_ranges(params)

    update_periods = [50, 300]
    update_periods_dart = [300]
    update_periods_dagger = [50, 300]

    if params['envname'] == 'Humanoid-v1':
        update_periods = [200, 1000]
        update_periods_dart = [1000]
        update_periods_dagger = [200, 1000]

    plt.style.use('ggplot')

    def normalize(means, sems):
        return means, sems

    all_means = []

    # BC
    title = 'test_bc'
    ptype = 'total_time'
    params_bc = params.copy()
    try:
        means, sems = utils.extract_data(params_bc, title, sub_dir, ptype)
        means, sems = normalize(means, sems)
        all_means.append(means[0])
        # p = plt.plot(snapshot_ranges, means, label='Behavior Cloning')
        # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
    except IOError:
        log("Not found.")
        pass


    # DAgger
    title = 'test_dagger'
    ptype = 'total_time'
    params_dagger = params.copy()
    params_dagger['beta'] = .5

    for update_period in update_periods_dagger:
        params_dagger['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            all_means.append(means[0])
            # p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period))
            # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # Isotropic noise
    # scales = [1.0, 10.0, 20.0]
    scales = [1.0]
    
    for scale in scales: 
        title = 'test_iso'
        ptype = 'total_time'
        params_iso = params.copy()
        params_iso['scale'] = scale
        try:
            means, sems = utils.extract_data(params_iso, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            all_means.append(means[0])
            # p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale))
            # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass


    # DART
    partition = .1

    title = 'test_dart'
    ptype = 'total_time'
    params_dart = params.copy()
    params_dart['partition'] = partition
    for update_period in update_periods_dart:
        params_dart['update_period'] = update_period
        try:
            means, sems = utils.extract_data(params_dart, title, sub_dir, ptype)
            means, sems = normalize(means, sems)
            all_means.append(means[0])
            # p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period))
            # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color())
        except IOError:
            log("Not found.")
            pass

    all_means = np.array(all_means)
    inds = np.arange(len(all_means))
    inds[0] = 3
    inds[1] = 0
    inds[2] = 1
    inds[3] = 2
    inds[4] = 4

    for ind, mean in zip(inds, all_means):
        plt.bar([ind], [mean])

    # plt.legend()
    save_path = 'images/'
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    plt.xticks([])
    if should_save == True:
        plt.savefig(save_path + str(params['envname']) + "_time.pdf")
        plt.savefig(save_path + str(params['envname']) + "_time.svg")
    else:
        plt.legend()
        plt.show()