def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.5, nu=.01) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 10 sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) trajs_train.append(states) lnr.add_data(states, int_actions) lnr.train() trajs_train_array = np.array(trajs_train) for t in range(opt.t): X = trajs_train_array[:, t, :] ocs[t].fit(X) if i % 5 == 0: trajs_valid = [] for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) trajs_valid.append(states_valid) train_oc_errs = eval_ocs(ocs, trajs_train, opt) valid_oc_errs = eval_ocs(ocs, trajs_valid, opt) print "Train errs: " + str(train_oc_errs) print "Valid errs: " + str(valid_oc_errs) print "Max train err: " + str(np.amax(train_oc_errs)) print "Max valid err: " + str(np.amax(valid_oc_errs)) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) opt.plot_dir = plot_dir opt.data_dir = data_dir opt.num_valid_trajs = max(1, int(.25 * opt.iters)) opt.samples = 10 train_trajs = [] valid_trajs = [] sup_rewards = np.zeros((1, opt.iters)) lnr_rewards = np.zeros((opt.samples, opt.iters)) sup_perf = np.zeros((1, opt.iters)) lnr_perf = np.zeros((opt.samples, opt.iters)) for i in range(opt.iters): print "Iteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) sup_rewards[0, i] = r sup_perf[0, i] = opt.env.metric() lnr.add_data(states, int_actions) lnr.train() print "\t" + str(lnr.acc()) for j in range(opt.samples): _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t) lnr_rewards[j, i] = r lnr_perf[j, i] = opt.env.metric() print "Average success: " + str(sup_rewards) print "Learner success: \n" + str(lnr_rewards) pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv') pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv') pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv') pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv') plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward') plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." print "Training net..." lnr.train() print "Fitting svms..." fit_all(ocs, trajs_train) print "Done fitting" Ks = [] ts = [] Ls = [] max_K = 0 max_L = 0 max_Kt = -1 max_Lt = -1 for traj_states, traj_actions in zip(trajs_train, actions_train): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_norm = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) L = state_norm / action_norm K = func_diff / action_norm Ls.append(L) Ks.append(K) ts.append(t) if L > max_L: max_L = L max_Lt = t if K > max_K: max_K = K max_Kt = t print "Max train L: " + str(max_L) print "Avg train L: " + str(np.mean(Ls)) print "Max train K: " + str(max_K) print "Avg train K: " + str(np.mean(Ks)) IPython.embed()
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." lnr.train() print "\t\tFitting oc svms..." fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # print "Done loading data." print "Training net..." lnr.train() print "Fitting svms..." fit_all(ocs, trajs_train) print "Done fitting" Ks = [] ts = [] Ls = [] max_K = 0 max_L = 0 max_Kt = -1 max_Lt = -1 for traj_states, traj_actions in zip(trajs_train, actions_train): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_norm = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) K = state_norm / action_norm L = func_diff / action_norm Ls.append(L) Ks.append(K) ts.append(t) if K > max_K: max_K = K max_Kt = t if L > max_L: max_L = L max_Lt = t print "Max train L: " + str(max_L) print "Avg train L: " + str(np.mean(Ls)) print "Max train K: " + str(max_K) print "Avg train K: " + str(np.mean(Ks)) print "\n\nRecovery\n\n" rec_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 rec_failed = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_robust_traj_multiple_rejection_adaptive( opt.env, lnr, ocs, opt.t, opt, max_L, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: rec_failures += 1 print "\t" + str(j) + " failed..." if info['rec_failed'] > -1: rec_failed += 1 print "\t rec failed" rec_results = { "failures": rec_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": rec_failed, } print "fraction of failures: " + str( rec_failures / float(opt.misc.samples)) print "\n\nLearner\n\n" lnr_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_score_traj_multiple_rejection( opt.env, lnr, ocs, opt.t, False, False) reject = results[-1] failed = results[-2] info = results[-3] if reject: print "\tRejecting " + str(j) + " and restarting..." tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: lnr_failures += 1 print "\t" + str(j) + " failed..." print "fraction of failures: " + str( lnr_failures / float(opt.misc.samples)) lnr_results = { "failures": lnr_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": 0 } print "\n\n\nrec_results" print rec_results print "lnr_results" print lnr_results return { "rec": rec_results, "lnr": lnr_results, }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." trajs = trajs_train print "Training net..." lnr.train() print "Fitting svms..." trajs_train = trajs[:-200] trajs_test = trajs[-200:] oc.fit(lnr.X) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros( (len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs( oc.decision_function([state])[0, 0] - oc.decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) IPython.embed() print "\n\nRecovery\n\n" rec_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 rec_failed = 0 completed = 0 comp_before_alarm = 0 comp_before_fail = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_robust_traj_multiple_rejection_adaptive( opt.env, lnr, oc, opt.t, opt, max_KLs, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] completed_results = check_completed(info) if info['first_complete'] > -1: completed += 1 comp_before_fail += completed_results['comp_before_fail'] comp_before_alarm += completed_results['comp_before_alarm'] if failed: rec_failures += 1 print "\t" + str(j) + " failed..." if info['rec_failed'] > -1: rec_failed += 1 print "\t rec failed" rec_results = { "failures": rec_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": rec_failed, "completed": completed, "comp_before_fail": comp_before_fail, "comp_before_alarm": comp_before_alarm } print "fraction of failures: " + str( rec_failures / float(opt.misc.samples)) print "\n\nLearner\n\n" lnr_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 completed = 0 comp_before_alarm = 0 comp_before_fail = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_score_traj_multiple_rejection( opt.env, lnr, oc, opt.t, False, False) reject = results[-1] failed = results[-2] info = results[-3] if reject: print "\tRejecting " + str(j) + " and restarting..." tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] completed_results = check_completed(info) if info['first_complete'] > -1: completed += 1 comp_before_fail += completed_results['comp_before_fail'] comp_before_alarm += completed_results['comp_before_alarm'] if failed: lnr_failures += 1 print "\t" + str(j) + " failed..." print "fraction of failures: " + str( lnr_failures / float(opt.misc.samples)) lnr_results = { "failures": lnr_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": 0, "completed": completed, "comp_before_alarm": comp_before_alarm, "comp_before_fail": comp_before_fail } print "\n\n\nrec_results" print rec_results print "lnr_results" print lnr_results return { "rec": rec_results, "lnr": lnr_results, } return None
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 20 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False, False) trajs_train.append(states) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." lnr.train() fit_all(ocs, trajs_train) trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False, False) states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple( opt.env, lnr, ocs, opt.t, False, False) states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = stats.collect_robust_traj_multiple( opt.env, opt.sim, lnr, ocs, opt.t, opt, False, False) trajs_valid.append(states_valid) trajs_test.append(states_test) trajs_robust.append(states_robust) sup_iters_rewards[j] = r_valid lnr_iters_rewards[j] = r_test rob_iters_rewards[j] = r_robust freqs[j] = freq if j == 0: utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) train_err[index, :] = eval_ocs(ocs, trajs_train) valid_err[index, :] = eval_ocs(ocs, trajs_valid) test_err[index, :] = eval_ocs(ocs, trajs_test) robust_err[index, :] = eval_ocs(ocs, trajs_robust) sup_reward[index] = np.mean(sup_iters_rewards) lnr_reward[index] = np.mean(lnr_iters_rewards) rob_reward[index] = np.mean(rob_iters_rewards) correction_freq[index] = np.mean(freqs) if index == (opt.misc.num_evaluations - 1): bar_errs = make_bar_graphs(ocs, trajs_train, trajs_valid, trajs_test, opt) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq, "bar_errs": bar_errs }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 5 train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) lnr.add_data(states, int_actions) oc.fit(lnr.X) lnr.train() X_valid = [] X_test = [] X_robust = [] sup_iter_rewards = np.zeros(opt.samples) lnr_iter_rewards = np.zeros(opt.samples) rob_iter_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj( opt.env, lnr, oc, opt.t, False) states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj( opt.env, lnr, oc, opt.t, False) X_valid += states_valid X_test += states_test X_robust += states_robust sup_iter_rewards[j] = r_valid lnr_iter_rewards[j] = r_test rob_iter_rewards[j] = r_robust freqs[j] = freq if j == 0: utils.plot([np.array([lnr_score])], ['Learner'], opt, "DecisionScore" + str(i), colors=['blue']) utils.plot([np.array([rob_score])], ['Robust Learner'], opt, "RobustDecisionScore" + str(i), colors=['green']) train_err[i] = eval_oc(oc, lnr.X) valid_err[i] = eval_oc(oc, X_valid) test_err[i] = eval_oc(oc, X_test) robust_err[i] = eval_oc(oc, X_robust) sup_reward[i] = np.mean(sup_iter_rewards) lnr_reward[i] = np.mean(lnr_iter_rewards) rob_reward[i] = np.mean(rob_iter_rewards) correction_freq[i] = np.mean(freqs) print "One class train error: " + str(train_err[i]) print "One class valid error: " + str(valid_err[i]) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) opt.plot_dir = plot_dir opt.data_dir = data_dir opt.num_valid_trajs = max(1, int(.25 * opt.iters)) opt.samples = 10 train_trajs = [] valid_trajs = [] for i in range(opt.iters): print "Iteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) lnr.add_data(states, int_actions) oc.fit(lnr.X) preds = oc.predict(lnr.X) train_err = len(preds[preds == -1]) / float(len(preds)) print "\nTraining error: " + str(train_err) lnr.train() sup_rewards = np.zeros((20)) lnr_rewards = np.zeros((20)) X_valid = [] X_test = [] for i in range(20): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) states_test, int_actions_test, _, r_test = statistics.collect_traj( opt.env, lnr, opt.t, False) sup_rewards[i] = r_valid lnr_rewards[i] = r_test X_valid += states_valid X_test += states_test valid_preds = oc.predict(X_valid) valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds)) print "Validation erorr: " + str(valid_err) test_preds = oc.predict(X_test) test_err = len(test_preds[test_preds == -1]) / float(len(test_preds)) print "Test erorr: " + str(test_err) print "\n\n" print "Average sup reward: " + str(np.mean(sup_rewards)) + " +/- " + str( scipy.stats.sem(sup_rewards)) print "Average lnr reward: " + str(np.mean(lnr_rewards)) + " +/- " + str( scipy.stats.sem(lnr_rewards)) print "\n\n" def dec(u): x = opt.env.get_x() s, _, _, _ = opt.env.step(u) opt.env.set_x(x) return oc.decision_function([s])[0, 0] rewards = np.zeros((20)) rec_counts = np.zeros((20)) X_robust = [] for i in range(20): s = opt.env.reset() states = [s] for t in range(opt.t): score = oc.decision_function([s])[0, 0] # print "Decision score: " + str(score) if score < .1: alpha = .1 a = alpha * utils.finite_diff1( np.zeros(opt.env.action_space.shape), dec) # print "Recovering: " + str(a) rec_counts[i] += 1.0 s, r, done, _ = opt.env.step(a) else: a = lnr.intended_action(s) s, r, done, _ = opt.env.step(a) rewards[i] += r states.append(s) # if done == True: # break X_robust += states robust_preds = oc.predict(X_robust) robust_err = len(robust_preds[robust_preds == -1]) / float( len(robust_preds)) print "Robust erorr: " + str(robust_err) rec_freq = np.mean(rec_counts / float(opt.t)) print "Recovery frequency: " + str(rec_freq) print "Robust rewards: " + str(np.mean(rewards)) + " +/- " + str( scipy.stats.sem(rewards))
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 2 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) # trajs_train = [] # for i in range(opt.iters): # print "\nIteration: " + str(i) # states, int_actions, taken_actions, r = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # trajs_train.append(states) # lnr.add_data(states, int_actions) # if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: # print "\tEvaluating..." # lnr.train() # fit_all(ocs, trajs_train) # trajs_valid = [] # trajs_test = [] # trajs_robust = [] # sup_iters_rewards = np.zeros(opt.samples) # lnr_iters_rewards = np.zeros(opt.samples) # rob_iters_rewards = np.zeros(opt.samples) # freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) # index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # if index == (opt.misc.num_evaluations - 1): # bar_errs = make_bar_graphs(ocs, trajs_train, trajs_valid, trajs_test, opt) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) trajs = pickle.load(open('data/trajs_train.pkl', 'r')) print "Done loading data" trajs_train, trajs_test = trajs[:-50], trajs[-50:] lnr.train() fit_all(ocs, trajs_train) print eval_ocs(ocs, trajs_train) print eval_ocs(ocs, trajs_test) s = opt.env.reset() env = opt.env for k in range(10): score = ocs[k].decision_function([s])[0, 0] print "Score: " + str(score) s, a, a, a = env.step(lnr.intended_action(s)) score = ocs[k + 1].decision_function([s])[0, 0] print "Score: " + str(score) IPython.embed() return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq, "bar_errs": bar_errs }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt)) data_dir = utils.generate_data_dir('initial', 'experts', vars(opt)) if not os.path.exists(plot_dir): os.makedirs(plot_dir) if not os.path.exists(data_dir): os.makedirs(data_dir) opt.plot_dir = plot_dir opt.data_dir = data_dir opt.num_valid_trajs = max(1, int(.25 * opt.iters)) opt.samples = 10 train_trajs = [] valid_trajs = [] sup_rewards = np.zeros((1, opt.iters)) lnr_rewards = np.zeros((opt.samples, opt.iters)) sup_perf = np.zeros((1, opt.iters)) lnr_perf = np.zeros((opt.samples, opt.iters)) for i in range(opt.iters): print "Iteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) sup_rewards[0, i] = r sup_perf[0, i] = opt.env.metric() lnr.add_data(states, int_actions) # lnr.train() # print "\t" + str(lnr.acc()) # for j in range(opt.samples): # _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t) # lnr_rewards[j, i] = r # lnr_perf[j, i] = opt.env.metric() oc.fit(lnr.X) preds = oc.predict(lnr.X) train_err = len(preds[preds == -1]) / float(len(preds)) print "Training error: " + str(train_err) X_valid = [] for i in range(20): states, int_actions, _, _ = statistics.collect_traj( opt.env, opt.sup, opt.t) X_valid += states valid_preds = oc.predict(X_valid) valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds)) print "Validation erorr: " + str(valid_err) lnr.train() X_test = [] for i in range(20): states, int_actions, _, _ = statistics.collect_traj( opt.env, lnr, opt.t) X_test += states test_preds = oc.predict(X_test) test_err = len(test_preds[test_preds == -1]) / float(len(test_preds)) print "Test erorr: " + str(test_err) s = opt.env.reset() reward = 0.0 x = opt.env.get_x() def dec(u): x = opt.env.get_x() s, _, _, _ = opt.env.step(u) opt.env.set_x(x) return oc.decision_function([s])[0, 0] states_visited = [] for t in range(opt.t): opt.env.render() score = oc.decision_function([s]) print "\tDecision score: " + str(score) # if score < .2 and False: # alpha = 1.0 # a = alpha * utils.finite_diff1(np.zeros(opt.env.action_space.shape[0]), dec) # print "\t\tRecovering: " + str(a) # s, r, done, _ = opt.env.step(a) # x = opt.env.get_x() # else: a = lnr.intended_action(s) s, r, done, _ = opt.env.step(a) x = opt.env.get_x() states_visited.append(s) if done == True: break preds = oc.predict(states_visited) err = len(preds[preds == -1]) / float(len(preds)) print "Error: " + str(err) print "\nDone after " + str(t + 1) + " steps" # print "Average success: " + str(sup_rewards) # print "Learner success: \n" + str(lnr_rewards) # pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv') # pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv') # pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv') # pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv') # plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward') # plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance') IPython.embed()
def run_trial(opt): # oc = TrajSV(nu = .01, gamma = .1) oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.1) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 10 train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) trajs = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t) trajs.append(states) lnr.add_data(states, int_actions) oc.fit(lnr.X) # oc.fit(trajs) lnr.train() X_valid = [] X_test = [] X_robust = [] sup_iter_rewards = np.zeros(opt.samples) lnr_iter_rewards = np.zeros(opt.samples) rob_iter_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): states_valid, int_actions_valid, _, r_valid = statistics.collect_traj( opt.env, opt.sup, opt.t, False) # states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj(opt.env, lnr, oc, opt.t, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj(opt.env, lnr, oc, opt.t, False) X_valid += states_valid # X_test += states_test # X_robust += states_robust sup_iter_rewards[j] = r_valid # lnr_iter_rewards[j] = r_test # rob_iter_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score])], ['Learner'], opt, "DecisionScore" + str(i), colors=['blue']) # utils.plot([np.array([rob_score])], ['Robust Learner'], opt, "RobustDecisionScore" + str(i), colors=['green']) train_err[i] = eval_oc(oc, lnr.X) valid_err[i] = eval_oc(oc, X_valid) # test_err[i] = eval_oc(oc, X_test) # robust_err[i] = eval_oc(oc, X_robust) sup_reward[i] = np.mean(sup_iter_rewards) lnr_reward[i] = np.mean(lnr_iter_rewards) # rob_reward[i] = np.mean(rob_iter_rewards) correction_freq[i] = np.mean(freqs) print "One class train error: " + str(train_err[i]) print "One class valid error: " + str(valid_err[i]) n = len(trajs) flags = np.zeros(n) for k, traj in enumerate(trajs): for state in traj: pred = oc.predict([state])[0] if pred == -1: flags[k] = 1.0 break print "Fraction of partial trajectories: " + str(np.mean(flags)) counts = np.zeros(opt.t) for t in range(opt.t): for traj in trajs: state = traj[t] pred = oc.predict([state])[0] if pred == -1: counts[t] += 1.0 counts = counts / float(n) print "Max time fraction: " + str(np.amax(counts)) print "Time fractions:\n" + str(counts) print "\n" return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." lnr.train() print "\t\tFitting oc svms..." fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # print "Done loading data." print "Training net..." lnr.train() print "Fitting svms..." fit_all(ocs, trajs_train) print "Done fitting" opt.misc.samples = 300 print "\n\nRecovery\n\n" rec_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 rec_failed = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_robust_traj_multiple_rejection( opt.env, lnr, ocs, opt.t, opt, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: rec_failures += 1 print "\t" + str(j) + " failed..." if info['rec_failed'] > -1: rec_failed += 1 print "\t rec failed" rec_results = { "failures": rec_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "rec_failed": rec_failed, } print "fraction of failures: " + str( rec_failures / float(opt.misc.samples)) print "\n\nLearner\n\n" lnr_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_score_traj_multiple_rejection( opt.env, lnr, ocs, opt.t, False, False) reject = results[-1] failed = results[-2] info = results[-3] if reject: print "\tRejecting " + str(j) + " and restarting..." tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: lnr_failures += 1 print "\t" + str(j) + " failed..." print "fraction of failures: " + str( lnr_failures / float(opt.misc.samples)) lnr_results = { "failures": lnr_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, } print "\n\n\nrec_results" print rec_results print "lnr_results" print lnr_results print "\n\n\n" # print "\n\nSupervisor\n\n" # sup_failures = 0 # for j in range(num_samples): # print "Iteration: " + str(j) # reject = True # while reject: # results = statistics.collect_score_traj_multiple_rejection(opt.env, opt.sup, ocs, opt.t, False, False) # reject = results[-1] # failed = results[-2] # if reject: # print "\tRejecting " + str(j) + " and restarting..." # if failed: # sup_failures += 1 # print "\t" + str(j) + " failed..." # print "fraction of failures: " + str(sup_failures / float(num_samples)) IPython.embed() return { "rec_results": rec_results, "lnr_results": lnr_results, }
def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # lnr.X = lnr.X # lnr.y = lnr.y # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # actions_train = pickle.load(open('data/actions_train.pkl', 'r')) # print "Done loading data." trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." # trajs_train = trajs[:-200] # trajs_test = trajs[-200:] # fit_all(ocs, trajs_train) # print eval_ocs(ocs, trajs_train) # print eval_ocs(ocs, trajs_test) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t+1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) max_rec = 1000 opt.env.reset() init_state = opt.env.get_pos_vel() print "\n\nRandom Controls\n\n" rand_scores = np.zeros((opt.misc.samples, max_rec + 1)) rand_cutoffs = np.zeros((opt.misc.samples, max_rec + 1)) for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) + "" triggered = False k = 0 while not triggered: print "\t\tNot yet triggered" results = rec_statistics.collect_rec_random(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) triggered = results[-3]['triggered'] if k >= 20: print "Had to pick new initial state" opt.env.reset() init_state = opt.env.get_pos_vel() k = 0 else: k += 1 rand_scores[i, :] = results[-3]['rec_scores'] rand_cutoffs[i, :] = results[-3]['rec_cutoffs'] print "\n\nApprox Grad Controls\n\n" approx_grad_scores = np.zeros((opt.misc.samples, max_rec + 1)) approx_grad_cutoffs = np.zeros((opt.misc.samples, max_rec + 1)) for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) + "" triggered = False while not triggered: print "\t\tNot yet triggered" results = rec_statistics.collect_rec_approx_grad(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) triggered = results[-3]['triggered'] approx_grad_scores[i, :] = results[-3]['rec_scores'] approx_grad_cutoffs[i, :] = results[-3]['rec_cutoffs'] return { 'rand_scores': rand_scores, 'rand_cutoffs': rand_cutoffs, 'approx_grad_scores': approx_grad_scores, 'approx_grad_cutoffs': approx_grad_cutoffs }
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma = .05, nu = .05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 100 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) trajs_train.append(states) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: print "\tEvaluating..." lnr.train() fit_all(ocs, trajs_train) trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) for j in range(opt.samples): print "\t\tSample: " + str(j) + " rolling out..." states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(opt.env, lnr, ocs, opt.t, False, False) states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, False, False) print "\t\tDone rolling out" trajs_valid.append(states_valid) trajs_test.append(states_test) trajs_robust.append(states_robust) sup_iters_rewards[j] = r_valid lnr_iters_rewards[j] = r_test rob_iters_rewards[j] = r_robust freqs[j] = freq index = i / (opt.iters / opt.misc.num_evaluations) train_err[index, :] = eval_ocs(ocs, trajs_train) valid_err[index, :] = eval_ocs(ocs, trajs_valid) test_err[index, :] = eval_ocs(ocs, trajs_test) robust_err[index, :] = eval_ocs(ocs, trajs_robust) sup_reward[index] = np.mean(sup_iters_rewards) lnr_reward[index] = np.mean(lnr_iters_rewards) rob_reward[index] = np.mean(rob_iters_rewards) correction_freq[index] = np.mean(freqs) return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }
def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 1 trajs_train = [] actions_train = [] trial_data = {} for iteration in range(opt.iters): print "Iteration: " + str(iteration) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (iteration + 1) % (opt.iters/opt.misc.num_evaluations) == 0: key = iteration + 1 trial_data[key] = [] print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t+1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.max(Ls, axis=0) max_KLs = np.max(KLs, axis=0) max_rec = 500 for k in range(60): reject = True while reject: print "Determing whether to reject initial state..." s = opt.env.reset() reject = ocs[0].predict([s])[0] == -1 init_state = opt.env.get_pos_vel() print "\n\nVanilla Learner\n\n" van_completed = 0 van_failed = 0 van_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=False, early_stop=False, init_state = init_state) van_completed += int(results[-3]['completed']) van_failed += int(results[-3]['failed']) van_failed_in_support += int(results[-3]['failed_in_support']) van_completed = van_completed / float(opt.misc.samples) van_failed = van_failed / float(opt.misc.samples) van_failed_in_support = van_failed_in_support / float(opt.misc.samples) print "\n\nRand Control Recovery Strategy\n\n" rand_completed = 0 rand_failed = 0 rand_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec) rand_completed += int(results[-3]['completed']) rand_failed += int(results[-3]['failed']) rand_failed_in_support += int(results[-3]['failed_in_support']) rand_completed = rand_completed / float(opt.misc.samples) rand_failed = rand_failed / float(opt.misc.samples) rand_failed_in_support = rand_failed_in_support / float(opt.misc.samples) print "\n\nEarly Stopping Strategy\n\n" es_completed = 0 es_failed = 0 es_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec) es_completed += int(results[-3]['completed']) es_failed += int(results[-3]['failed']) es_failed_in_support += int(results[-3]['failed_in_support']) es_completed = es_completed / float(opt.misc.samples) es_failed = es_failed / float(opt.misc.samples) es_failed_in_support = es_failed_in_support / float(opt.misc.samples) results = { 'van_tallies': [van_completed, van_failed, van_failed_in_support], 'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support], 'es_tallies': [es_completed, es_failed, es_failed_in_support], } trial_data[key].append(results) print "Saving to: " + opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl' pickle.dump(trial_data, open(opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl', 'w')) return trial_data
#net.load_weights('meta/weights.txt', 'meta/stats.txt') suffix = '_' + utils.stringify(args['weights']) + '_' + str(args['ufact']) weights_path = 'meta/' + 'test' + '/' + envname + '_' + str( exp_id) + '_weights' + suffix + '.txt' stats_path = 'meta/' + 'test' + '/' + envname + '_' + str( exp_id) + '_stats' + suffix + '.txt' net.load_weights(weights_path, stats_path) net_sup = Supervisor(net) opt = Options opt.env = env opt.sup = net_sup opt.t = 100 est = knet.Network([64, 64], learning_rate=.01, epochs=100) lnr = learner.Learner(est) oc = OneClassSVM(kernel='rbf', gamma=.01, nu=.01) ITERATIONS = 500 print "\n\nSup rollouts\n\n" sup_failures = 0 initial_states = [] for i in range(ITERATIONS): print "iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation:
def run_trial(opt): ocs = [OCSVM(kernel='rbf', gamma=.1, nu=.03) for t in range(opt.t)] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) # trajs_train = [] # actions_train = [] # for i in range(opt.iters): # print "Iteration: " + str(i) # violation = True # while violation: # states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) # if violation: # print "\tViolation, restarting" # trajs_train.append(states) # actions_train.append(int_actions) # lnr.add_data(states, int_actions) # if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: # # print "\tEvaluating..." # # print "\t\tTraining learner..." # # # lnr.train() # # print "\t\tFitting oc svms..." # # # fit_all(ocs, trajs_train) # # print "\t\tDone fitting" # trajs_valid = [] # trajs_test = [] # trajs_robust = [] # sup_iters_rewards = np.zeros(opt.samples) # lnr_iters_rewards = np.zeros(opt.samples) # rob_iters_rewards = np.zeros(opt.samples) # freqs = np.zeros(opt.samples) # # for j in range(opt.samples): # # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # # trajs_valid.append(states_valid) # # trajs_test.append(states_test) # # trajs_robust.append(states_robust) # # sup_iters_rewards[j] = r_valid # # lnr_iters_rewards[j] = r_test # # rob_iters_rewards[j] = r_robust # # freqs[j] = freq # # if j == 0: # # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) # index = i / (opt.iters / opt.misc.num_evaluations) # # train_err[index, :] = eval_ocs(ocs, trajs_train) # # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # # test_err[index, :] = eval_ocs(ocs, trajs_test) # # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # # sup_reward[index] = np.mean(sup_iters_rewards) # # lnr_reward[index] = np.mean(lnr_iters_rewards) # # rob_reward[index] = np.mean(rob_iters_rewards) # # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." trajs = trajs_train print "Fitting svms..." trajs_train = trajs[:-200] trajs_test = trajs[-200:] fit_all(ocs, trajs_train) print eval_ocs(ocs, trajs_train) print eval_ocs(ocs, trajs_test) fit_all(ocs, trajs) print "Done fitting" print "Training net..." lnr.train() Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros( (len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) IPython.embed() total_failures = 0 total_failures_in_support = 0 samples_failed_in_support = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: #results = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) results = statistics.collect_robust_traj_multiple_rejection_adaptive( opt.env, lnr, ocs, opt.t, opt, max_KLs, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] total_failures += info['count_failures'] total_failures_in_support += info['count_fail_in_support'] if info['count_fail_in_support'] > 0: print "Failed in support" samples_failed_in_support += 1 if total_failures > 0: print "Fails in support: " + str( float(total_failures_in_support) / total_failures) print total_failures_in_support print total_failures print str(samples_failed_in_support) + " failed in support" print "Fraction failed in support: " + str( float(samples_failed_in_support) / opt.misc.samples)
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=opt.gamma, nu=opt.nu) for t in range(opt.t) ] # ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) # trajs_train = [] # actions_train = [] # for i in range(opt.iters): # print "Iteration: " + str(i) # violation = True # while violation: # states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) # if violation: # print "\tViolation, restarting" # trajs_train.append(states) # actions_train.append(int_actions) # lnr.add_data(states, int_actions) # if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: # print "\tEvaluating..." # print "\t\tTraining learner..." # # lnr.train() # print "\t\tFitting oc svms..." # # fit_all(ocs, trajs_train) # print "\t\tDone fitting" # trajs_valid = [] # trajs_test = [] # trajs_robust = [] # sup_iters_rewards = np.zeros(opt.samples) # lnr_iters_rewards = np.zeros(opt.samples) # rob_iters_rewards = np.zeros(opt.samples) # freqs = np.zeros(opt.samples) # # for j in range(opt.samples): # # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # # trajs_valid.append(states_valid) # # trajs_test.append(states_test) # # trajs_robust.append(states_robust) # # sup_iters_rewards[j] = r_valid # # lnr_iters_rewards[j] = r_test # # rob_iters_rewards[j] = r_robust # # freqs[j] = freq # # if j == 0: # # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) # index = i / (opt.iters / opt.misc.num_evaluations) # # train_err[index, :] = eval_ocs(ocs, trajs_train) # # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # # test_err[index, :] = eval_ocs(ocs, trajs_test) # # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # # sup_reward[index] = np.mean(sup_iters_rewards) # # lnr_reward[index] = np.mean(lnr_iters_rewards) # # rob_reward[index] = np.mean(rob_iters_rewards) # # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) lnr.X = lnr.X lnr.y = lnr.y trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." # trajs_train = trajs[:-200] # trajs_test = trajs[-200:] # fit_all(ocs, trajs_train) # print eval_ocs(ocs, trajs_train) # print eval_ocs(ocs, trajs_test) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros( (len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) max_rec = 500 trials_data = [] for i in range(100): reject = True while reject: print "Determing whether to reject initial state..." s = opt.env.reset() reject = ocs[0].predict([s])[0] == -1 init_state = opt.env.get_pos_vel() print "\n\nVanilla Learner\n\n" van_completed = 0 van_failed = 0 van_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=True, early_stop=False, init_state=init_state) van_completed += int(results[-3]['completed']) van_failed += int(results[-3]['failed']) van_failed_in_support += int(results[-3]['failed_in_support']) print "\n\nRand Control Recovery Strategy\n\n" rand_completed = 0 rand_failed = 0 rand_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec( rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) rand_completed += int(results[-3]['completed']) rand_failed += int(results[-3]['failed']) rand_failed_in_support += int(results[-3]['failed_in_support']) print "\n\nApprox Grad Recovery Strategy\n\n" ag_completed = 0 ag_failed = 0 ag_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec( rec_statistics.approx_grad_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) ag_completed += int(results[-3]['completed']) ag_failed += int(results[-3]['failed']) ag_failed_in_support += int(results[-3]['failed_in_support']) print "\n\Finite Diff Recovery Strategy\n\n" fd_completed = 0 fd_failed = 0 fd_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec( rec_statistics.finite_diff_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) fd_completed += int(results[-3]['completed']) fd_failed += int(results[-3]['failed']) fd_failed_in_support += int(results[-3]['failed_in_support']) print "\n\nEarly Stopping Strategy\n\n" es_completed = 0 es_failed = 0 es_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) es_completed += int(results[-3]['completed']) es_failed += int(results[-3]['failed']) es_failed_in_support += int(results[-3]['failed_in_support']) results = { 'van_tallies': [van_completed, van_failed, van_failed_in_support], 'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support], 'es_tallies': [es_completed, es_failed, es_failed_in_support], 'ag_tallies': [ag_completed, ag_failed, ag_failed_in_support], 'fd_tallies': [fd_completed, fd_failed, fd_failed_in_support] } trials_data.append(results) pickle.dump(trials_data, open(opt.data_dir + 'full/trials_data.pkl', 'w')) return trials_data
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 100 sup_reward = np.zeros(opt.iters) lnr_reward = np.zeros(opt.iters) rob_reward = np.zeros(opt.iters) train_err = np.zeros(opt.iters) valid_err = np.zeros(opt.iters) test_err = np.zeros(opt.iters) robust_err = np.zeros(opt.iters) correction_freq = np.zeros(opt.iters) trajs_train = [] for i in range(opt.iters): print "\nIteration: " + str(i) states, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) lnr.add_data(states, int_actions) trajs_train.append(states) lnr.train() print "\nCollecting validation samples..." trajs_valid = [] trajs_test = [] for j in range(opt.samples): states_valid, int_actions, taken_actions, r = statistics.collect_traj( opt.env, opt.sup, opt.t, False) states_test, int_actions, taken_actions, r = statistics.collect_traj( opt.env, lnr, opt.t, False, early_stop=False) trajs_valid.append(states_valid) trajs_test.append(states_test) print "Done collecting samples" X_train = [] for traj in trajs_train: X_train += traj oc.fit(X_train) train_errs = np.zeros(opt.t) valid_errs = np.zeros(opt.t) test_errs = np.zeros(opt.t) adver_errs = np.zeros(opt.t) for t in range(opt.t): X_train = [] for traj in trajs_train: X_train.append(traj[t]) X_valid = [] for traj in trajs_valid: X_valid.append(traj[t]) X_test = [] for traj in trajs_test: X_test.append(traj[t]) X_train = np.array(X_train) cov = np.cov(X_train.T) mean = np.mean(X_train, axis=0) X_adver = np.random.multivariate_normal(mean, cov, opt.samples) train_err = eval_oc(oc, X_train) valid_err = eval_oc(oc, X_valid) test_err = eval_oc(oc, X_test) adver_err = eval_oc(oc, X_adver) print "Train Error: " + str(train_err) print "Valid Error: " + str(valid_err) print "Test Error: " + str(test_err) print "Adver Error: " + str(adver_err) print "Support vectors: " + str(oc.support_vectors_.shape) print "\n" train_errs[t] = train_err valid_errs[t] = valid_err test_errs[t] = test_err adver_errs[t] = adver_err plt.style.use('ggplot') #errs = [train_errs, valid_errs, test_errs] #labels = ['Training', 'Validation', 'Test'] errs = [train_errs, valid_errs] labels = ['Training', 'Validation'] width = .2 index = np.arange(opt.t) for i, (err, label) in enumerate(zip(errs, labels)): plt.bar(index + i * width, err, width, label=label) plt.legend() plt.ylim(0, .75) plt.savefig('/Users/JonathanLee/Desktop/bar_single.png') utils.clear() return { "sup_reward": sup_reward, "lnr_reward": lnr_reward, "rob_reward": rob_reward, "train_err": train_err, "valid_err": valid_err, "test_err": test_err, "robust_err": robust_err, "correction_freq": correction_freq }