def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) # trajs_train = [] # actions_train = [] # for i in range(opt.iters): # print "Iteration: " + str(i) # violation = True # while violation: # states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) # if violation: # print "\tViolation, restarting" # trajs_train.append(states) # actions_train.append(int_actions) # lnr.add_data(states, int_actions) # if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: # print "\tEvaluating..." # print "\t\tTraining learner..." # # lnr.train() # print "\t\tFitting oc svms..." # # fit_all(ocs, trajs_train) # print "\t\tDone fitting" # trajs_valid = [] # trajs_test = [] # trajs_robust = [] # sup_iters_rewards = np.zeros(opt.samples) # lnr_iters_rewards = np.zeros(opt.samples) # rob_iters_rewards = np.zeros(opt.samples) # freqs = np.zeros(opt.samples) # # for j in range(opt.samples): # # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # # trajs_valid.append(states_valid) # # trajs_test.append(states_test) # # trajs_robust.append(states_robust) # # sup_iters_rewards[j] = r_valid # # lnr_iters_rewards[j] = r_test # # rob_iters_rewards[j] = r_robust # # freqs[j] = freq # # if j == 0: # # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) # index = i / (opt.iters / opt.misc.num_evaluations) # # train_err[index, :] = eval_ocs(ocs, trajs_train) # # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # # test_err[index, :] = eval_ocs(ocs, trajs_test) # # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # # sup_reward[index] = np.mean(sup_iters_rewards) # # lnr_reward[index] = np.mean(lnr_iters_rewards) # # rob_reward[index] = np.mean(rob_iters_rewards) # # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) lnr.X = lnr.X lnr.y = lnr.y trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." # trajs_train = trajs[:-200] # trajs_test = trajs[-200:] # fit_all(ocs, trajs_train) # print eval_ocs(ocs, trajs_train) # print eval_ocs(ocs, trajs_test) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t+1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) max_rec = 500 reject = True while reject: print "Determing whether to reject initial state..." s = opt.env.reset() reject = ocs[0].predict([s])[0] == -1 init_state = opt.env.get_pos_vel() print "\n\nVanilla Learner\n\n" van_completed = 0 van_failed = 0 van_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=True, early_stop=False, init_state = init_state) van_completed += int(results[-3]['completed']) van_failed += int(results[-3]['failed']) van_failed_in_support += int(results[-3]['failed_in_support']) print "\n\nRand Control Recovery Strategy\n\n" rand_completed = 0 rand_failed = 0 rand_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) rand_completed += int(results[-3]['completed']) rand_failed += int(results[-3]['failed']) rand_failed_in_support += int(results[-3]['failed_in_support']) print "\n\nApprox Grad Recovery Strategy\n\n" ag_completed = 0 ag_failed = 0 ag_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.approx_grad_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) ag_completed += int(results[-3]['completed']) ag_failed += int(results[-3]['failed']) ag_failed_in_support += int(results[-3]['failed_in_support']) print "\n\Finite Diff Recovery Strategy\n\n" fd_completed = 0 fd_failed = 0 fd_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.finite_diff_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) fd_completed += int(results[-3]['completed']) fd_failed += int(results[-3]['failed']) fd_failed_in_support += int(results[-3]['failed_in_support']) print "\n\nEarly Stopping Strategy\n\n" es_completed = 0 es_failed = 0 es_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) es_completed += int(results[-3]['completed']) es_failed += int(results[-3]['failed']) es_failed_in_support += int(results[-3]['failed_in_support']) results = { 'van_tallies': [van_completed, van_failed, van_failed_in_support], 'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support], 'es_tallies': [es_completed, es_failed, es_failed_in_support], 'ag_tallies': [ag_completed, ag_failed, ag_failed_in_support], 'fd_tallies': [fd_completed, fd_failed, fd_failed_in_support] } return results
def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma=opt.gamma, nu=opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) # trajs_train = [] # actions_train = [] # for i in range(opt.iters): # print "Iteration: " + str(i) # violation = True # while violation: # states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) # if violation: # print "\tViolation, restarting" # trajs_train.append(states) # actions_train.append(int_actions) # lnr.add_data(states, int_actions) # if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: # print "\tEvaluating..." # print "\t\tTraining learner..." # # lnr.train() # print "\t\tFitting oc svms..." # # fit_all(ocs, trajs_train) # print "\t\tDone fitting" # trajs_valid = [] # trajs_test = [] # trajs_robust = [] # sup_iters_rewards = np.zeros(opt.samples) # lnr_iters_rewards = np.zeros(opt.samples) # rob_iters_rewards = np.zeros(opt.samples) # freqs = np.zeros(opt.samples) # # for j in range(opt.samples): # # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # # trajs_valid.append(states_valid) # # trajs_test.append(states_test) # # trajs_robust.append(states_robust) # # sup_iters_rewards[j] = r_valid # # lnr_iters_rewards[j] = r_test # # rob_iters_rewards[j] = r_robust # # freqs[j] = freq # # if j == 0: # # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) # index = i / (opt.iters / opt.misc.num_evaluations) # # train_err[index, :] = eval_ocs(ocs, trajs_train) # # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # # test_err[index, :] = eval_ocs(ocs, trajs_test) # # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # # sup_reward[index] = np.mean(sup_iters_rewards) # # lnr_reward[index] = np.mean(lnr_iters_rewards) # # rob_reward[index] = np.mean(rob_iters_rewards) # # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." # trajs_train = trajs[:-200] # trajs_test = trajs[-200:] # fit_all(ocs, trajs_train) # print eval_ocs(ocs, trajs_train) # print eval_ocs(ocs, trajs_test) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros( (len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) IPython.embed() print "\n\nRecovery\n\n" rec_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 rec_failed = 0 completed = 0 comp_before_alarm = 0 comp_before_fail = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True k = 0 while reject and k < 30: results = statistics.collect_robust_traj_multiple_rejection_adaptive( opt.env, lnr, ocs, opt.t, opt, max_KLs, False, False) reject = results[-1] failed = results[-2] k += 1 if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] completed_results = check_completed(info) if info['first_complete'] > -1: completed += 1 comp_before_fail += completed_results['comp_before_fail'] comp_before_alarm += completed_results['comp_before_alarm'] if failed: rec_failures += 1 print "\t" + str(j) + " failed..." if info['rec_failed'] > -1: rec_failed += 1 print "\t rec failed" rec_results = { "Failed": rec_failures, "Failed in support": false_negatives, "False alarm": false_positives, "Caught failure": true_positives, "No violations": true_negatives, "Caused_fail": rec_failed, "Completed": completed, "Comp before fail": comp_before_fail, "Comp before alarm": comp_before_alarm } print "fraction of failures: " + str( rec_failures / float(opt.misc.samples)) print "\n\nLearner\n\n" lnr_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 completed = 0 comp_before_alarm = 0 comp_before_fail = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True k = 0 while reject and k < 30: results = statistics.collect_score_traj_multiple_rejection( opt.env, lnr, ocs, opt.t, False, False) reject = results[-1] failed = results[-2] info = results[-3] k += 1 if reject: print "\tRejecting " + str(j) + " and restarting..." tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] completed_results = check_completed(info) if info['first_complete'] > -1: completed += 1 comp_before_fail += completed_results['comp_before_fail'] comp_before_alarm += completed_results['comp_before_alarm'] if failed: lnr_failures += 1 print "\t" + str(j) + " failed..." print "fraction of failures: " + str( lnr_failures / float(opt.misc.samples)) lnr_results = { "Failed": lnr_failures, "Failed in support": false_negatives, "False alarm": false_positives, "Caught failure": true_positives, "No violations": true_negatives, "Caused_fail": 0, "Completed": completed, "Comp before fail": comp_before_fail, "Comp before alarm": comp_before_alarm } print "\n\n\nrec_results" print rec_results print "lnr_results" print lnr_results return { "rec": rec_results, "lnr": lnr_results, } return None
def run_trial(opt): ocs = [OCSVM(kernel='rbf', gamma=.1, nu=.03) for t in range(opt.t)] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) # trajs_train = [] # actions_train = [] # for i in range(opt.iters): # print "Iteration: " + str(i) # violation = True # while violation: # states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) # if violation: # print "\tViolation, restarting" # trajs_train.append(states) # actions_train.append(int_actions) # lnr.add_data(states, int_actions) # if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: # # print "\tEvaluating..." # # print "\t\tTraining learner..." # # # lnr.train() # # print "\t\tFitting oc svms..." # # # fit_all(ocs, trajs_train) # # print "\t\tDone fitting" # trajs_valid = [] # trajs_test = [] # trajs_robust = [] # sup_iters_rewards = np.zeros(opt.samples) # lnr_iters_rewards = np.zeros(opt.samples) # rob_iters_rewards = np.zeros(opt.samples) # freqs = np.zeros(opt.samples) # # for j in range(opt.samples): # # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # # trajs_valid.append(states_valid) # # trajs_test.append(states_test) # # trajs_robust.append(states_robust) # # sup_iters_rewards[j] = r_valid # # lnr_iters_rewards[j] = r_test # # rob_iters_rewards[j] = r_robust # # freqs[j] = freq # # if j == 0: # # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) # index = i / (opt.iters / opt.misc.num_evaluations) # # train_err[index, :] = eval_ocs(ocs, trajs_train) # # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # # test_err[index, :] = eval_ocs(ocs, trajs_test) # # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # # sup_reward[index] = np.mean(sup_iters_rewards) # # lnr_reward[index] = np.mean(lnr_iters_rewards) # # rob_reward[index] = np.mean(rob_iters_rewards) # # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." trajs = trajs_train print "Fitting svms..." trajs_train = trajs[:-200] trajs_test = trajs[-200:] fit_all(ocs, trajs_train) print eval_ocs(ocs, trajs_train) print eval_ocs(ocs, trajs_test) fit_all(ocs, trajs) print "Done fitting" print "Training net..." lnr.train() Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros( (len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) IPython.embed() total_failures = 0 total_failures_in_support = 0 samples_failed_in_support = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: #results = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) results = statistics.collect_robust_traj_multiple_rejection_adaptive( opt.env, lnr, ocs, opt.t, opt, max_KLs, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] total_failures += info['count_failures'] total_failures_in_support += info['count_fail_in_support'] if info['count_fail_in_support'] > 0: print "Failed in support" samples_failed_in_support += 1 if total_failures > 0: print "Fails in support: " + str( float(total_failures_in_support) / total_failures) print total_failures_in_support print total_failures print str(samples_failed_in_support) + " failed in support" print "Fraction failed in support: " + str( float(samples_failed_in_support) / opt.misc.samples)
def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # lnr.X = lnr.X # lnr.y = lnr.y # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # actions_train = pickle.load(open('data/actions_train.pkl', 'r')) # print "Done loading data." trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." # trajs_train = trajs[:-200] # trajs_test = trajs[-200:] # fit_all(ocs, trajs_train) # print eval_ocs(ocs, trajs_train) # print eval_ocs(ocs, trajs_test) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t+1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) max_rec = 1000 opt.env.reset() init_state = opt.env.get_pos_vel() print "\n\nRandom Controls\n\n" rand_scores = np.zeros((opt.misc.samples, max_rec + 1)) rand_cutoffs = np.zeros((opt.misc.samples, max_rec + 1)) for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) + "" triggered = False k = 0 while not triggered: print "\t\tNot yet triggered" results = rec_statistics.collect_rec_random(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) triggered = results[-3]['triggered'] if k >= 20: print "Had to pick new initial state" opt.env.reset() init_state = opt.env.get_pos_vel() k = 0 else: k += 1 rand_scores[i, :] = results[-3]['rec_scores'] rand_cutoffs[i, :] = results[-3]['rec_cutoffs'] print "\n\nApprox Grad Controls\n\n" approx_grad_scores = np.zeros((opt.misc.samples, max_rec + 1)) approx_grad_cutoffs = np.zeros((opt.misc.samples, max_rec + 1)) for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) + "" triggered = False while not triggered: print "\t\tNot yet triggered" results = rec_statistics.collect_rec_approx_grad(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) triggered = results[-3]['triggered'] approx_grad_scores[i, :] = results[-3]['rec_scores'] approx_grad_cutoffs[i, :] = results[-3]['rec_cutoffs'] return { 'rand_scores': rand_scores, 'rand_cutoffs': rand_cutoffs, 'approx_grad_scores': approx_grad_scores, 'approx_grad_cutoffs': approx_grad_cutoffs }