def main(): title = 'test_bc' ap = framework.get_args() args = vars(ap.parse_args()) args = framework.load_config(args) framework.startup(title, args, Test)
def main(): title = 'test_iso' ap = framework.get_args() ap.add_argument('--scale', required=True, type=float) # amount to scale the identity matrix args = vars(ap.parse_args()) args = framework.load_config(args) framework.startup(title, args, Test)
def main(): title = 'test_rand' ap = framework.get_args() ap.add_argument( '--trace', required=True, type=float) # trace on the amount of error one expects in the learner args = vars(ap.parse_args()) args = framework.load_config(args) framework.startup(title, args, Test)
def main(): title = 'test_dagger' ap = framework.get_args() ap.add_argument('--update_period', required=True, type=int) # period between updates to the policy ap.add_argument('--beta', required=True, type=float) # beta term, see Ross et al. args = vars(ap.parse_args()) args = framework.load_config(args) framework.startup(title, args, Test)
def main(): title = 'test_dart' ap = framework.get_args() ap.add_argument('--update_period', required=True, type=int) # period between updates to the policy ap.add_argument('--partition', required=True, type=float) # Integer between 1 and 450 (exclusive), args = vars(ap.parse_args()) args = framework.load_config(args) assert args['partition'] < 1.0 and args['partition'] > 0.0 framework.startup(title, args, Test)
def main(): sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] snapshot_ranges = utils.compute_snapshot_ranges(params) update_periods = [50, 300] update_periods_dart = [300] update_periods_dagger = [50, 300] if params['envname'] == 'Humanoid-v1': update_periods = [200, 1000] update_periods_dart = [1000] update_periods_dagger = [200, 1000] plt.style.use('ggplot') # Best supervisor reward title = 'test_bc' ptype = 'sup_reward' params_bc = params.copy() means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) # if not should_normalize: # plt.plot(snapshot_ranges, means, label='Supervisor', color='green') sup_means, sup_sems = means, sems def normalize(means, sems): if should_normalize: means = means / sup_means sems = sems / sup_means return means, sems else: return means, sems # # Noisy supervisor reward using DART # partition = .1 # update_period = update_periods_dart[0] # title = 'test_dart' # ptype = 'sup_reward' # params_dart = params.copy() # params_dart['partition'] = partition # params_dart['update_period'] = update_period # try: # means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) # means, sems = normalize(means, sems) # plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--') # except IOError: # log("Not found.") # pass # BC title = 'test_bc' ptype = 'sup_reward' params_bc = params.copy() try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='Behavior Cloning') plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DAgger title = 'test_dagger' ptype = 'reward' params_dagger = params.copy() params_dagger['beta'] = .5 for update_period in update_periods_dagger: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] scales = [1.0] for scale in scales: title = 'test_iso' ptype = 'sup_reward' params_iso = params.copy() params_iso['scale'] = scale try: means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DART partition = .1 title = 'test_dart' ptype = 'sup_reward' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods_dart: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # plt.title("Reward on " + str(params['envname'])) # plt.legend() plt.xticks(snapshot_ranges) if should_normalize: plt.ylim(0, 1.05) plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0]) # plt.title(params['envname'][:-3]) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_reward.pdf") plt.savefig(save_path + str(params['envname']) + "_reward.svg") else: plt.legend() plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. color = itertools.cycle(( "#FCB716", "#2D3956", "#A0B2D8", "#988ED5", "#F68B20")) sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] snapshot_ranges = utils.compute_snapshot_ranges(params) plt.style.use('ggplot') # Best supervisor reward title = 'test_bc' ptype = 'sup_reward' params_bc = params.copy() means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) if not should_normalize: plt.plot(snapshot_ranges, means, label='Supervisor', color='green') sup_means, sup_sems = means, sems def normalize(means, sems): if should_normalize: means = means / sup_means sems = sems / sup_means return means, sems else: return means, sems # Noisy supervisor reward using DART title = 'test_dart' ptype = 'sup_reward' params_dart = params.copy() try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) plt.plot(snapshot_ranges, means, label='DART Noisy Supervisor', color='green', linestyle='--') except IOError: log("Not found.") pass # BC degrees = [2, 3, 5, 6, 7] configs = ['poly' + str(d) for d in degrees] title = 'test_bc' ptype = 'reward' params_bc = params.copy() for config, degree in zip(configs, degrees): params_bc['config'] = config params_bc['degree'] = degree try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='Behavior Cloning deg: ' + str(degree)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DAgger update_periods = [2, 4, 8] title = 'test_dagger' ptype = 'reward' params_dagger = params.copy() params_dagger['beta'] = .5 for update_period in update_periods: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] # for scale in scales: # title = 'test_iso' # ptype = 'reward' # params_iso = params.copy() # params_iso['scale'] = scale # try: # means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) # means, sems = normalize(means, sems) # p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) # except IOError: # log("Not found.") # pass # DART update_periods = [2, 4, 8] partition = .1 title = 'test_dart' ptype = 'reward' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period)) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass plt.title("Reward on " + str(params['envname'])) plt.legend() plt.xticks(snapshot_ranges) if should_normalize: plt.ylim(0, 1.05) plt.yticks([0.0, 0.25, 0.5, 0.75, 1.0]) plt.title(params['envname'][:-3]) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_reward.pdf") else: plt.legend() plt.show()
def main(): # In the event that you change the sub_directory within results, change this to match it. sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] del params['save'] snapshot_ranges = utils.compute_snapshot_ranges(params) plt.style.use('ggplot') # Behavior Cloning loss on sup distr degrees = [2, 3, 5, 6, 7] configs = ['poly' + str(d) for d in degrees] upper_bound = 0 title = 'test_bc' ptype = 'sup_loss' params_bc = params.copy() for config, degree in zip(configs, degrees): params_bc['config'] = config params_bc['degree'] = degree try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) p = plt.plot(snapshot_ranges, means, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) plt.plot(snapshot_ranges, means, label='Behavior Cloning degree: ' + str(degree), color=p[0].get_color()) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) upper_bound = max(np.max(means), upper_bound) except IOError: log("Not found.") pass # DAgger update_periods = [2, 4, 8] beta = .5 title = 'test_dagger' ptype = 'sup_loss' params_dagger = params.copy() params_dagger['beta'] = beta for update_period in update_periods: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) p = plt.plot(snapshot_ranges, means, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) plt.plot(snapshot_ranges, means, label='DAgger per: ' + str(update_period), color=p[0].get_color()) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) upper_bound = max(np.max(means), upper_bound) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] # title = 'test_iso' # ptype = 'sup_loss' # params_iso = params.copy() # for scale in scales: # params_iso['scale'] = scale # try: # means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) # p = plt.plot(snapshot_ranges, means, linestyle='--') # ptype = 'surr_loss' # means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) # plt.plot(snapshot_ranges, means, label='Iso ' + str(scale), color=p[0].get_color()) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) # except IOError: # log("Not found.") # pass # DART update_periods = [2, 4, 8] partition = .1 title = 'test_dart' ptype = 'sup_loss' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) p = plt.plot(snapshot_ranges, means, linestyle='--') ptype = 'surr_loss' means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period), color=p[0].get_color()) plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) upper_bound = max(np.max(means), upper_bound) except IOError: log("Not found.") pass plt.title("Loss on " + str(params['envname'])) plt.legend() plt.xticks(snapshot_ranges) plt.legend(loc='upper right') upper_bound = min(20, upper_bound) plt.ylim(0, upper_bound) save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_loss.pdf") else: plt.show()
def main(): sub_dir = 'experts' ap = argparse.ArgumentParser() ap.add_argument('--envname', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--save', action='store_true', default=False) ap.add_argument('--normalize', action='store_true', default=False) ap.add_argument('--num_evals', required=True, type=int) # number of evaluations ap.add_argument('--max_data', required=True, type=int) # maximum amount of data ap.add_argument('--config', required=True, type=str) params = vars(ap.parse_args()) params = load_config(params) should_save = params['save'] should_normalize = params['normalize'] del params['save'] del params['normalize'] snapshot_ranges = utils.compute_snapshot_ranges(params) update_periods = [50, 300] update_periods_dart = [300] update_periods_dagger = [50, 300] if params['envname'] == 'Humanoid-v1': update_periods = [200, 1000] update_periods_dart = [1000] update_periods_dagger = [200, 1000] plt.style.use('ggplot') def normalize(means, sems): return means, sems all_means = [] # BC title = 'test_bc' ptype = 'total_time' params_bc = params.copy() try: means, sems = utils.extract_data(params_bc, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='Behavior Cloning') # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DAgger title = 'test_dagger' ptype = 'total_time' params_dagger = params.copy() params_dagger['beta'] = .5 for update_period in update_periods_dagger: params_dagger['update_period'] = update_period try: means, sems = utils.extract_data(params_dagger, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='DAgger ' + str(update_period)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # Isotropic noise # scales = [1.0, 10.0, 20.0] scales = [1.0] for scale in scales: title = 'test_iso' ptype = 'total_time' params_iso = params.copy() params_iso['scale'] = scale try: means, sems = utils.extract_data(params_iso, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='Iso ' + str(scale)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass # DART partition = .1 title = 'test_dart' ptype = 'total_time' params_dart = params.copy() params_dart['partition'] = partition for update_period in update_periods_dart: params_dart['update_period'] = update_period try: means, sems = utils.extract_data(params_dart, title, sub_dir, ptype) means, sems = normalize(means, sems) all_means.append(means[0]) # p = plt.plot(snapshot_ranges, means, label='DART part: ' + str(partition) + ", per: " + str(update_period)) # plt.fill_between(snapshot_ranges, (means - sems), (means + sems), alpha=.3, color=p[0].get_color()) except IOError: log("Not found.") pass all_means = np.array(all_means) inds = np.arange(len(all_means)) inds[0] = 3 inds[1] = 0 inds[2] = 1 inds[3] = 2 inds[4] = 4 for ind, mean in zip(inds, all_means): plt.bar([ind], [mean]) # plt.legend() save_path = 'images/' if not os.path.exists(save_path): os.makedirs(save_path) plt.xticks([]) if should_save == True: plt.savefig(save_path + str(params['envname']) + "_time.pdf") plt.savefig(save_path + str(params['envname']) + "_time.svg") else: plt.legend() plt.show()