def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.5, nu=.01) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 10 sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) trajs_train.append(states) lnr.add_data(states, int_actions) lnr.train() trajs_train_array = np.array(trajs_train) for t in range(opt.t): X = trajs_train_array[:, t, :] ocs[t].fit(X) if i % 5 == 0: trajs_valid = [] for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) trajs_valid.append(states_valid) train_oc_errs = eval_ocs(ocs, trajs_train, opt) valid_oc_errs = eval_ocs(ocs, trajs_valid, opt) print "Train errs: " + str(train_oc_errs) print "Valid errs: " + str(valid_oc_errs) print "Max train err: " + str(np.amax(train_oc_errs)) print "Max valid err: " + str(np.amax(valid_oc_errs)) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) opt.plot_dir = plot_dir opt.data_dir = data_dir opt.num_valid_trajs = max(1, int(.25 * opt.iters)) opt.samples = 10 train_trajs = [] valid_trajs = [] sup_rewards = np.zeros((1, opt.iters)) lnr_rewards = np.zeros((opt.samples, opt.iters)) sup_perf = np.zeros((1, opt.iters)) lnr_perf = np.zeros((opt.samples, opt.iters)) for i in range(opt.iters): print "Iteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) sup_rewards[0, i] = r sup_perf[0, i] = opt.env.metric() lnr.add_data(states, int_actions) lnr.train() print "\t" + str(lnr.acc()) for j in range(opt.samples): _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t) lnr_rewards[j, i] = r lnr_perf[j, i] = opt.env.metric() print "Average success: " + str(sup_rewards) print "Learner success: \n" + str(lnr_rewards) pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv') pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv') pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv') pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv') plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward') plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) opt.plot_dir = plot_dir opt.data_dir = data_dir opt.num_valid_trajs = max(1, int(.25 * opt.iters)) opt.samples = 10 train_trajs = [] valid_trajs = [] for i in range(opt.iters): print "Iteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) lnr.add_data(states, int_actions) oc.fit(lnr.X) preds = oc.predict(lnr.X) train_err = len(preds[preds == -1]) / float(len(preds)) print "\nTraining error: " + str(train_err) lnr.train() sup_rewards = np.zeros((20)) lnr_rewards = np.zeros((20)) X_valid = [] X_test = [] for i in range(20): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) states_test, int_actions_test, _, r_test = statistics.collect_traj( opt.env, lnr, opt.t, False) sup_rewards[i] = r_valid lnr_rewards[i] = r_test X_valid += states_valid X_test += states_test valid_preds = oc.predict(X_valid) valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds)) print "Validation erorr: " + str(valid_err) test_preds = oc.predict(X_test) test_err = len(test_preds[test_preds == -1]) / float(len(test_preds)) print "Test erorr: " + str(test_err) print "\n\n" print "Average sup reward: " + str(np.mean(sup_rewards)) + " +/- " + str( scipy.stats.sem(sup_rewards)) print "Average lnr reward: " + str(np.mean(lnr_rewards)) + " +/- " + str( scipy.stats.sem(lnr_rewards)) print "\n\n" def dec(u): x = opt.env.get_x() s, _, _, _ = opt.env.step(u) opt.env.set_x(x) return oc.decision_function([s])[0, 0] rewards = np.zeros((20)) rec_counts = np.zeros((20)) X_robust = [] for i in range(20): s = opt.env.reset() states = [s] for t in range(opt.t): score = oc.decision_function([s])[0, 0] # print "Decision score: " + str(score) if score < .1: alpha = .1 a = alpha * utils.finite_diff1( np.zeros(opt.env.action_space.shape), dec) # print "Recovering: " + str(a) rec_counts[i] += 1.0 s, r, done, _ = opt.env.step(a) else: a = lnr.intended_action(s) s, r, done, _ = opt.env.step(a) rewards[i] += r states.append(s) # if done == True: # break X_robust += states robust_preds = oc.predict(X_robust) robust_err = len(robust_preds[robust_preds == -1]) / float( len(robust_preds)) print "Robust erorr: " + str(robust_err) rec_freq = np.mean(rec_counts / float(opt.t)) print "Recovery frequency: " + str(rec_freq) print "Robust rewards: " + str(np.mean(rewards)) + " +/- " + str( scipy.stats.sem(rewards))
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 20 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False, False) trajs_train.append(states) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." lnr.train() fit_all(ocs, trajs_train) trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False, False) states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple( opt.env, lnr, ocs, opt.t, False, False) states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = stats.collect_robust_traj_multiple( opt.env, opt.sim, lnr, ocs, opt.t, opt, False, False) trajs_valid.append(states_valid) trajs_test.append(states_test) trajs_robust.append(states_robust) sup_iters_rewards[j] = r_valid lnr_iters_rewards[j] = r_test rob_iters_rewards[j] = r_robust freqs[j] = freq if j == 0: utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) train_err[index, :] = eval_ocs(ocs, trajs_train) valid_err[index, :] = eval_ocs(ocs, trajs_valid) test_err[index, :] = eval_ocs(ocs, trajs_test) robust_err[index, :] = eval_ocs(ocs, trajs_robust) sup_reward[index] = np.mean(sup_iters_rewards) lnr_reward[index] = np.mean(lnr_iters_rewards) rob_reward[index] = np.mean(rob_iters_rewards) correction_freq[index] = np.mean(freqs) if index == (opt.misc.num_evaluations - 1): bar_errs = make_bar_graphs(ocs, trajs_train, trajs_valid, trajs_test, opt) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq, "bar_errs": bar_errs }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) opt.plot_dir = plot_dir opt.data_dir = data_dir opt.num_valid_trajs = max(1, int(.25 * opt.iters)) opt.samples = 10 train_trajs = [] valid_trajs = [] sup_rewards = np.zeros((1, opt.iters)) lnr_rewards = np.zeros((opt.samples, opt.iters)) sup_perf = np.zeros((1, opt.iters)) lnr_perf = np.zeros((opt.samples, opt.iters)) for i in range(opt.iters): print "Iteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) sup_rewards[0, i] = r sup_perf[0, i] = opt.env.metric() lnr.add_data(states, int_actions) # lnr.train() # print "\t" + str(lnr.acc()) # for j in range(opt.samples): # _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t) # lnr_rewards[j, i] = r # lnr_perf[j, i] = opt.env.metric() oc.fit(lnr.X) preds = oc.predict(lnr.X) train_err = len(preds[preds == -1]) / float(len(preds)) print "Training error: " + str(train_err) X_valid = [] for i in range(20): states, int_actions, _, _ = statistics.collect_traj( opt.env, opt.sup, opt.t) X_valid += states valid_preds = oc.predict(X_valid) valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds)) print "Validation erorr: " + str(valid_err) lnr.train() X_test = [] for i in range(20): states, int_actions, _, _ = statistics.collect_traj( opt.env, lnr, opt.t) X_test += states test_preds = oc.predict(X_test) test_err = len(test_preds[test_preds == -1]) / float(len(test_preds)) print "Test erorr: " + str(test_err) s = opt.env.reset() reward = 0.0 x = opt.env.get_x() def dec(u): x = opt.env.get_x() s, _, _, _ = opt.env.step(u) opt.env.set_x(x) return oc.decision_function([s])[0, 0] states_visited = [] for t in range(opt.t): opt.env.render() score = oc.decision_function([s]) print "\tDecision score: " + str(score) # if score < .2 and False: # alpha = 1.0 # a = alpha * utils.finite_diff1(np.zeros(opt.env.action_space.shape[0]), dec) # print "\t\tRecovering: " + str(a) # s, r, done, _ = opt.env.step(a) # x = opt.env.get_x() # else: a = lnr.intended_action(s) s, r, done, _ = opt.env.step(a) x = opt.env.get_x() states_visited.append(s) if done == True: break preds = oc.predict(states_visited) err = len(preds[preds == -1]) / float(len(preds)) print "Error: " + str(err) print "\nDone after " + str(t + 1) + " steps" # print "Average success: " + str(sup_rewards) # print "Learner success: \n" + str(lnr_rewards) # pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv') # pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv') # pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv') # pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv') # plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward') # plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance') IPython.embed()
def run_trial(opt): # oc = TrajSV(nu = .01, gamma = .1) oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.1) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 10 train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) trajs = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) trajs.append(states) lnr.add_data(states, int_actions) oc.fit(lnr.X) # oc.fit(trajs) lnr.train() X_valid = [] X_test = [] X_robust = [] sup_iter_rewards = np.zeros(opt.samples) lnr_iter_rewards = np.zeros(opt.samples) rob_iter_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) # states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj(opt.env, lnr, oc, opt.t, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj(opt.env, lnr, oc, opt.t, False) X_valid += states_valid # X_test += states_test # X_robust += states_robust sup_iter_rewards[j] = r_valid # lnr_iter_rewards[j] = r_test # rob_iter_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score])], ['Learner'], opt, "DecisionScore" + str(i), colors=['blue']) # utils.plot([np.array([rob_score])], ['Robust Learner'], opt, "RobustDecisionScore" + str(i), colors=['green']) train_err[i] = eval_oc(oc, lnr.X) valid_err[i] = eval_oc(oc, X_valid) # test_err[i] = eval_oc(oc, X_test) # robust_err[i] = eval_oc(oc, X_robust) sup_reward[i] = np.mean(sup_iter_rewards) lnr_reward[i] = np.mean(lnr_iter_rewards) # rob_reward[i] = np.mean(rob_iter_rewards) correction_freq[i] = np.mean(freqs) print "One class train error: " + str(train_err[i]) print "One class valid error: " + str(valid_err[i]) n = len(trajs) flags = np.zeros(n) for k, traj in enumerate(trajs): for state in traj: pred = oc.predict([state])[0] if pred == -1: flags[k] = 1.0 break print "Fraction of partial trajectories: " + str(np.mean(flags)) counts = np.zeros(opt.t) for t in range(opt.t): for traj in trajs: state = traj[t] pred = oc.predict([state])[0] if pred == -1: counts[t] += 1.0 counts = counts / float(n) print "Max time fraction: " + str(np.amax(counts)) print "Time fractions:\n" + str(counts) print "\n" return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma = .05, nu = .05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 100 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) trajs_train.append(states) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: print "\tEvaluating..." lnr.train() fit_all(ocs, trajs_train) trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): print "\t\tSample: " + str(j) + " rolling out..." states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(opt.env, lnr, ocs, opt.t, False, False) states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, False, False) print "\t\tDone rolling out" trajs_valid.append(states_valid) trajs_test.append(states_test) trajs_robust.append(states_robust) sup_iters_rewards[j] = r_valid lnr_iters_rewards[j] = r_test rob_iters_rewards[j] = r_robust freqs[j] = freq index = i / (opt.iters / opt.misc.num_evaluations) train_err[index, :] = eval_ocs(ocs, trajs_train) valid_err[index, :] = eval_ocs(ocs, trajs_valid) test_err[index, :] = eval_ocs(ocs, trajs_test) robust_err[index, :] = eval_ocs(ocs, trajs_robust) sup_reward[index] = np.mean(sup_iters_rewards) lnr_reward[index] = np.mean(lnr_iters_rewards) rob_reward[index] = np.mean(rob_iters_rewards) correction_freq[index] = np.mean(freqs) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 5 train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) lnr.add_data(states, int_actions) oc.fit(lnr.X) lnr.train() X_valid = [] X_test = [] X_robust = [] sup_iter_rewards = np.zeros(opt.samples) lnr_iter_rewards = np.zeros(opt.samples) rob_iter_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj( opt.env, lnr, oc, opt.t, False) states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj( opt.env, lnr, oc, opt.t, False) X_valid += states_valid X_test += states_test X_robust += states_robust sup_iter_rewards[j] = r_valid lnr_iter_rewards[j] = r_test rob_iter_rewards[j] = r_robust freqs[j] = freq if j == 0: utils.plot([np.array([lnr_score])], ['Learner'], opt, "DecisionScore" + str(i), colors=['blue']) utils.plot([np.array([rob_score])], ['Robust Learner'], opt, "RobustDecisionScore" + str(i), colors=['green']) train_err[i] = eval_oc(oc, lnr.X) valid_err[i] = eval_oc(oc, X_valid) test_err[i] = eval_oc(oc, X_test) robust_err[i] = eval_oc(oc, X_robust) sup_reward[i] = np.mean(sup_iter_rewards) lnr_reward[i] = np.mean(lnr_iter_rewards) rob_reward[i] = np.mean(rob_iter_rewards) correction_freq[i] = np.mean(freqs) print "One class train error: " + str(train_err[i]) print "One class valid error: " + str(valid_err[i]) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 100 sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) lnr.add_data(states, int_actions) trajs_train.append(states) lnr.train() print "\nCollecting validation samples..." trajs_valid = [] trajs_test = [] for j in range(opt.samples): states_valid, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) states_test, int_actions, taken_actions, r = statistics.collect_traj( opt.env, lnr, opt.t, False, early_stop=False) trajs_valid.append(states_valid) trajs_test.append(states_test) print "Done collecting samples" X_train = [] for traj in trajs_train: X_train += traj oc.fit(X_train) train_errs = np.zeros(opt.t) valid_errs = np.zeros(opt.t) test_errs = np.zeros(opt.t) adver_errs = np.zeros(opt.t) for t in range(opt.t): X_train = [] for traj in trajs_train: X_train.append(traj[t]) X_valid = [] for traj in trajs_valid: X_valid.append(traj[t]) X_test = [] for traj in trajs_test: X_test.append(traj[t]) X_train = np.array(X_train) cov = np.cov(X_train.T) mean = np.mean(X_train, axis=0) X_adver = np.random.multivariate_normal(mean, cov, opt.samples) train_err = eval_oc(oc, X_train) valid_err = eval_oc(oc, X_valid) test_err = eval_oc(oc, X_test) adver_err = eval_oc(oc, X_adver) print "Train Error: " + str(train_err) print "Valid Error: " + str(valid_err) print "Test Error: " + str(test_err) print "Adver Error: " + str(adver_err) print "Support vectors: " + str(oc.support_vectors_.shape) print "\n" train_errs[t] = train_err valid_errs[t] = valid_err test_errs[t] = test_err adver_errs[t] = adver_err plt.style.use('ggplot') #errs = [train_errs, valid_errs, test_errs] #labels = ['Training', 'Validation', 'Test'] errs = [train_errs, valid_errs] labels = ['Training', 'Validation'] width = .2 index = np.arange(opt.t) for i, (err, label) in enumerate(zip(errs, labels)): plt.bar(index + i * width, err, width, label=label) plt.legend() plt.ylim(0, .75) plt.savefig('/Users/JonathanLee/Desktop/bar_single.png') utils.clear() return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }