def main(): ap = argparse.ArgumentParser() ap.add_argument('--arch', required=True, nargs='+', type=int) ap.add_argument('--lr', required=False, type=int, default=.01) ap.add_argument('--epochs', required=False, type=int, default=100) ap.add_argument('--iters', required=True, type=int) ap.add_argument('--trials', required=True, type=int) ap.add_argument('--env', required=True) ap.add_argument('--t', required=True, type=int) opt = Options() opt.load_args(ap.parse_args()) opt.envname = opt.env opt.filename = '/Users/JonathanLee/experts/' + opt.envname + '.pkl' opt.env = gym.envs.make(opt.envname).env opt.sim = gym.envs.make(opt.envname).env opt.pi = load_policy.load_policy(opt.filename) opt.sess = tf.Session() opt.sup = NetSupervisor(opt.pi, opt.sess) plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) opt.plot_dir = plot_dir opt.data_dir = data_dir train_errs = np.zeros((opt.trials, opt.t)) valid_errs = np.zeros((opt.trials, opt.t)) test_errs = np.zeros((opt.trials, opt.t)) for t in range(opt.trials): train_errs[t, :], valid_errs[t, :], test_errs[t, :] = run_trial(opt, t) train_err = np.mean(train_errs, axis=0) valid_err = np.mean(valid_errs, axis=0) test_err = np.mean(test_errs, axis=0) errs = [train_err, valid_err, test_err] labels = ['train', 'valid', 'test'] width = .2 index = np.arange(opt.t) for i, (err, label) in enumerate(zip(errs, labels)): plt.bar(index + i * width, err, width, label=label) plt.legend() plt.ylim(0, .75) plt.savefig('/Users/JonathanLee/Desktop/bar_original_avg.png') utils.clear()
def plot(datas, labels, opt, title): plt.style.use('ggplot') x = list(range(datas[0].shape[1])) for data, label in zip(datas, labels): mean = statistics.mean(data) ste = statistics.ste(data) plt.plot(x, mean, label=label) plt.fill_between(x, mean - ste, mean + ste, alpha=.3) plt.savefig(opt.plot_dir + title + "_plot.png") utils.clear()
def make_bar_graphs(ocs, trajs_train, trajs_valid, trajs_test, opt, filename=None): train_errs = np.zeros(opt.t) valid_errs = np.zeros(opt.t) test_errs = np.zeros(opt.t) for t in range(opt.t): oc = ocs[t] X_train = [] for traj in trajs_train: X_train.append(traj[t]) X_valid = [] for traj in trajs_valid: X_valid.append(traj[t]) X_test = [] for traj in trajs_test: X_test.append(traj[t]) train_err = eval_oc(oc, X_train) valid_err = eval_oc(oc, X_valid) test_err = eval_oc(oc, X_test) train_errs[t] = train_err valid_errs[t] = valid_err test_errs[t] = test_err plt.style.use('ggplot') errs = [train_errs, valid_errs, test_errs] labels = ['Training', 'Validation', 'Test'] width = .2 index = np.arange(opt.t) for i, (err, label) in enumerate(zip(errs, labels)): plt.bar(index + i * width, err, width, label=label) plt.legend() plt.ylim(0, .75) if filename is None: plt.savefig('/Users/JonathanLee/Desktop/bar_new.png') else: plt.savefig(filename) utils.clear() return errs
# plt.errorbar(x, mean, std, ecolor='black', capthick=1, elinewidth=1, color='#328ABD', capsize=5, errorevery=15) plt.fill_between(x, mean - std, mean + std, color='#328ABD', alpha=.5) plt.legend(loc='lower right', fontsize=20) plt.style.use('ggplot') # for i, (score, cutoff) in enumerate(zip(rec_scores[:50], rec_cutoffs[:50])): # plt.plot(score[:els]/cutoff[:els], color='#988ED5', alpha=.75, linewidth=1.5, label='DF Value' if not i else '_nolegend') # # plt.plot(cutoff[:els], color='black', linestyle='dashed', alpha=.5, label='Cutoff' if not i else '_nolegend_') # plt.plot(np.ones(els), color='black', linestyle='dashed') plt.ylim(.4, 1.1) plt.savefig('tmp_plots/opt.pdf') # plt.show() utils.clear() exit() # for i, (score, cutoff) in enumerate(zip(rec_mo_scores[:100], rec_mo_cutoffs[:100])): # plt.plot(score[:els]/cutoff[:els], color='blue', alpha=.25, linewidth=1.5, label='DF Value' if not i else '_nolegend') # # plt.plot(cutoff[:els], color='black', linestyle='dashed', alpha=.5, label='Cutoff' if not i else '_nolegend_') # plt.plot(np.ones(els), color='red', linestyle='dashed') # plt.ylim(0, 1.2) # plt.show() van_tallies = [] rand_tallies = [] rand_mo_tallies = []
def main(): ap = argparse.ArgumentParser() ap.add_argument('--arch', required=True, nargs='+', type=int) ap.add_argument('--lr', required=False, type=int, default=.01) ap.add_argument('--epochs', required=False, type=int, default=100) ap.add_argument('--iters', required=True, type=int) ap.add_argument('--trials', required=True, type=int) ap.add_argument('--env', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--grads', required=True, type=int) opt = Options() opt.load_args(ap.parse_args()) opt.envname = opt.env opt.filename = '/Users/JonathanLee/experts/' + opt.envname + '.pkl' opt.env = gym.envs.make(opt.envname).env opt.sim = gym.envs.make(opt.envname).env opt.pi = load_policy.load_policy(opt.filename) opt.sess = tf.Session() opt.sup = NetSupervisor(opt.pi, opt.sess) opt.misc = Options() opt.misc.num_evaluations = 10 plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) if not os.path.exists(plot_dir + '/scores'): os.makedirs(plot_dir + '/scores') if not os.path.exists(plot_dir + '/mags'): os.makedirs(plot_dir + '/mags') opt.plot_dir = plot_dir opt.data_dir = data_dir sup_rewards = np.zeros((opt.trials, opt.misc.num_evaluations)) lnr_rewards = np.zeros((opt.trials, opt.misc.num_evaluations)) rob_rewards = np.zeros((opt.trials, opt.misc.num_evaluations)) train_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t)) freq = np.zeros((opt.trials, opt.misc.num_evaluations)) train_bar_errs = np.zeros((opt.trials, opt.t)) valid_bar_errs = np.zeros((opt.trials, opt.t)) test_bar_errs = np.zeros((opt.trials, opt.t)) print "Running Trials:\n\n" try: for t in range(opt.trials): start_time = timer.time() results = run_trial(opt) sup_rewards[t, :] = results['sup_reward'] lnr_rewards[t, :] = results['lnr_reward'] rob_rewards[t, :] = results['rob_reward'] train_err[t, :, :] = results['train_err'] valid_err[t, :, :] = results['valid_err'] test_err[t, :, :] = results['test_err'] robust_err[t, :, :] = results['robust_err'] freq[t, :] = results['correction_freq'] train_bar_errs[t], valid_bar_errs[t], test_bar_errs[t] = results[ 'bar_errs'] sup_rewards_save, lnr_rewards_save, rob_rewards_save = sup_rewards[: t + 1, :], lnr_rewards[: t + 1, :], rob_rewards[: t + 1, :] train_err_save, valid_err_save, test_err_save, robust_err_save = train_err[: t + 1, :, :], valid_err[: t + 1, :, :], test_err[: t + 1, :, :], robust_err[: t + 1, :, :] freq_save = freq[:t + 1, :] pd.DataFrame(sup_rewards_save).to_csv(opt.data_dir + 'sup_rewards.csv', index=False) pd.DataFrame(lnr_rewards_save).to_csv(opt.data_dir + 'lnr_rewards.csv', index=False) pd.DataFrame(rob_rewards_save).to_csv(opt.data_dir + 'rob_rewards.csv', index=False) for tau in range(opt.t): pd.DataFrame(train_err_save[:, :, tau]).to_csv( opt.data_dir + 'train_err_t' + str(tau) + '.csv', index=False) pd.DataFrame(valid_err_save[:, :, tau]).to_csv( opt.data_dir + 'valid_err_t' + str(tau) + '.csv', index=False) pd.DataFrame(test_err_save[:, :, tau]).to_csv( opt.data_dir + 'test_err_t' + str(tau) + '.csv', index=False) pd.DataFrame(robust_err_save[:, :, tau]).to_csv( opt.data_dir + 'robust_err_t' + str(tau) + '.csv', index=False) pd.DataFrame(freq_save).to_csv(opt.data_dir + 'freq.csv', index=False) train_err_avg = np.mean(train_err_save, axis=2) valid_err_avg = np.mean(valid_err_save, axis=2) test_err_avg = np.mean(test_err_save, axis=2) robust_err_avg = np.mean(robust_err_save, axis=2) utils.plot([sup_rewards_save, lnr_rewards_save, rob_rewards_save], ['Supervisor', 'Learner', 'Robust Learner'], opt, "Reward", colors=['red', 'blue', 'green']) utils.plot( [train_err_avg, valid_err_avg, test_err_avg, robust_err_avg], ['Training', 'Validation', 'Learner', 'Robust Learner'], opt, "Error", colors=['red', 'orange', 'blue', 'green']) utils.plot([freq_save], ['Frequency'], opt, 'Correction Frequency', colors=['green']) bar_errs = [ np.mean(train_bar_errs, axis=0), np.mean(valid_bar_errs, axis=0), np.mean(test_bar_errs, axis=0) ] labels = ['train', 'valid', 'test'] width = .2 index = np.arange(opt.t) for i, (err, label) in enumerate(zip(bar_errs, labels)): plt.bar(index + i * width, err, width, label=label) plt.legend() plt.ylim(0, .75) plt.savefig('/Users/JonathanLee/Desktop/bar_new_avg.png') utils.clear() end_time = timer.time() print "Trial time: " + str(end_time - start_time) except KeyboardInterrupt: pass
def main(): ap = argparse.ArgumentParser() ap.add_argument('--arch', required=True, nargs='+', type=int) ap.add_argument('--lr', required=False, type=float, default=.01) ap.add_argument('--epochs', required=False, type=int, default=100) ap.add_argument('--iters', required=True, type=int) ap.add_argument('--trials', required=True, type=int) ap.add_argument('--env', required=True) ap.add_argument('--t', required=True, type=int) ap.add_argument('--grads', required=True, type=int) ap.add_argument('--weights', required=True, nargs='+', type=float, default=[1.0, .1, .5]) ap.add_argument('--ufact', required=True, default=4.0, type=float) ap.add_argument('--id', required=True, default=4.0, type=int) ap.add_argument('--nu', required=True, type=float) ap.add_argument('--gamma', required=True, type=float) opt = Options() args = ap.parse_args() opt.load_args(args) args = vars(args) opt.envname = opt.env opt.env = gym.envs.make(opt.envname).env opt.sim = gym.envs.make(opt.envname).env exp_id = args['id'] opt.env.my_weights = args['weights'] opt.env.ufact = args['ufact'] opt.pi = net.Network([64, 64], .01, 300) suffix = '_' + utils.stringify(args['weights']) + '_' + str(args['ufact']) weights_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str( exp_id) + '_weights' + suffix + '.txt' stats_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str( exp_id) + '_stats' + suffix + '.txt' opt.pi.load_weights(weights_path, stats_path) opt.sup = Supervisor(opt.pi) opt.misc = Options() opt.misc.num_evaluations = 1 opt.misc.samples = 75 rec_results = {} lnr_results = {} plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) if not os.path.exists(plot_dir + '/scores'): os.makedirs(plot_dir + '/scores') if not os.path.exists(plot_dir + '/mags'): os.makedirs(plot_dir + '/mags') try: for t in range(opt.trials): start_time = timer.time() results = run_trial(opt) for key in results['rec'].keys(): if key in rec_results: rec_results[key].append(results['rec'][key]) else: rec_results[key] = [results['rec'][key]] for key in results['lnr'].keys(): if key in lnr_results: lnr_results[key].append(results['lnr'][key]) else: lnr_results[key] = [results['lnr'][key]] except KeyboardInterrupt: pass labels = sorted(list(rec_results.keys())) for key in rec_results.keys(): rec_results[key] = np.array(rec_results[key]) / float(opt.misc.samples) lnr_results[key] = np.array(lnr_results[key]) / float(opt.misc.samples) rec_means = [ np.mean(rec_results[key]) for key in sorted(rec_results.keys()) ] rec_sems = [ scipy.stats.sem(rec_results[key]) for key in sorted(rec_results.keys()) ] lnr_means = [ np.mean(lnr_results[key]) for key in sorted(lnr_results.keys()) ] lnr_sems = [ scipy.stats.sem(lnr_results[key]) for key in sorted(lnr_results.keys()) ] pickle.dump(rec_results, open(data_dir + 'rec_results.pkl', 'w')) pickle.dump(lnr_results, open(data_dir + 'lnr_results.pkl', 'w')) plt.style.use('ggplot') width = .4 index = np.arange(len(rec_means)) plt.bar(index + 0 * width, rec_means, width, label='recovery', yerr=rec_sems) plt.legend() plt.xticks(index, labels) plt.ylim(0, 1) plt.savefig(plot_dir + "rec_bar_graph.png") utils.clear() plt.style.use('ggplot') plt.bar(index + 0 * width, lnr_means, width, label='no recovery', yerr=lnr_sems) plt.legend() plt.xticks(index, labels) plt.ylim(0, 1) plt.savefig(plot_dir + "lnr_bar_graph.png") utils.clear()
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 100 sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) lnr.add_data(states, int_actions) trajs_train.append(states) lnr.train() print "\nCollecting validation samples..." trajs_valid = [] trajs_test = [] for j in range(opt.samples): states_valid, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) states_test, int_actions, taken_actions, r = statistics.collect_traj( opt.env, lnr, opt.t, False, early_stop=False) trajs_valid.append(states_valid) trajs_test.append(states_test) print "Done collecting samples" X_train = [] for traj in trajs_train: X_train += traj oc.fit(X_train) train_errs = np.zeros(opt.t) valid_errs = np.zeros(opt.t) test_errs = np.zeros(opt.t) adver_errs = np.zeros(opt.t) for t in range(opt.t): X_train = [] for traj in trajs_train: X_train.append(traj[t]) X_valid = [] for traj in trajs_valid: X_valid.append(traj[t]) X_test = [] for traj in trajs_test: X_test.append(traj[t]) X_train = np.array(X_train) cov = np.cov(X_train.T) mean = np.mean(X_train, axis=0) X_adver = np.random.multivariate_normal(mean, cov, opt.samples) train_err = eval_oc(oc, X_train) valid_err = eval_oc(oc, X_valid) test_err = eval_oc(oc, X_test) adver_err = eval_oc(oc, X_adver) print "Train Error: " + str(train_err) print "Valid Error: " + str(valid_err) print "Test Error: " + str(test_err) print "Adver Error: " + str(adver_err) print "Support vectors: " + str(oc.support_vectors_.shape) print "\n" train_errs[t] = train_err valid_errs[t] = valid_err test_errs[t] = test_err adver_errs[t] = adver_err plt.style.use('ggplot') #errs = [train_errs, valid_errs, test_errs] #labels = ['Training', 'Validation', 'Test'] errs = [train_errs, valid_errs] labels = ['Training', 'Validation'] width = .2 index = np.arange(opt.t) for i, (err, label) in enumerate(zip(errs, labels)): plt.bar(index + i * width, err, width, label=label) plt.legend() plt.ylim(0, .75) plt.savefig('/Users/JonathanLee/Desktop/bar_single.png') utils.clear() return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }