opt.t = 100 est = knet.Network([64, 64], learning_rate=.01, epochs=100) lnr = learner.Learner(est) oc = OneClassSVM(kernel='rbf', gamma=.01, nu=.01) ITERATIONS = 500 print "\n\nSup rollouts\n\n" sup_failures = 0 initial_states = [] for i in range(ITERATIONS): print "iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "Violation at iteration " + str(i) + ", restarting" sup_failures += 1 initial_states.append(states[0]) lnr.add_data(states, int_actions) print "Sup failures: " + str(sup_failures / float(ITERATIONS)) # oc.fit(initial_states) # print "\n\nValidation rollouts\n\n" # pred_y = [] # actual_y = []
def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 1 trajs_train = [] actions_train = [] trial_data = {} for iteration in range(opt.iters): print "Iteration: " + str(iteration) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (iteration + 1) % (opt.iters/opt.misc.num_evaluations) == 0: key = iteration + 1 trial_data[key] = [] print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t+1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.max(Ls, axis=0) max_KLs = np.max(KLs, axis=0) max_rec = 500 for k in range(60): reject = True while reject: print "Determing whether to reject initial state..." s = opt.env.reset() reject = ocs[0].predict([s])[0] == -1 init_state = opt.env.get_pos_vel() print "\n\nVanilla Learner\n\n" van_completed = 0 van_failed = 0 van_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=False, early_stop=False, init_state = init_state) van_completed += int(results[-3]['completed']) van_failed += int(results[-3]['failed']) van_failed_in_support += int(results[-3]['failed_in_support']) van_completed = van_completed / float(opt.misc.samples) van_failed = van_failed / float(opt.misc.samples) van_failed_in_support = van_failed_in_support / float(opt.misc.samples) print "\n\nRand Control Recovery Strategy\n\n" rand_completed = 0 rand_failed = 0 rand_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec) rand_completed += int(results[-3]['completed']) rand_failed += int(results[-3]['failed']) rand_failed_in_support += int(results[-3]['failed_in_support']) rand_completed = rand_completed / float(opt.misc.samples) rand_failed = rand_failed / float(opt.misc.samples) rand_failed_in_support = rand_failed_in_support / float(opt.misc.samples) print "\n\nEarly Stopping Strategy\n\n" es_completed = 0 es_failed = 0 es_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec) es_completed += int(results[-3]['completed']) es_failed += int(results[-3]['failed']) es_failed_in_support += int(results[-3]['failed_in_support']) es_completed = es_completed / float(opt.misc.samples) es_failed = es_failed / float(opt.misc.samples) es_failed_in_support = es_failed_in_support / float(opt.misc.samples) results = { 'van_tallies': [van_completed, van_failed, van_failed_in_support], 'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support], 'es_tallies': [es_completed, es_failed, es_failed_in_support], } trial_data[key].append(results) print "Saving to: " + opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl' pickle.dump(trial_data, open(opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl', 'w')) return trial_data
def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # lnr.X = lnr.X # lnr.y = lnr.y # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # actions_train = pickle.load(open('data/actions_train.pkl', 'r')) # print "Done loading data." trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." # trajs_train = trajs[:-200] # trajs_test = trajs[-200:] # fit_all(ocs, trajs_train) # print eval_ocs(ocs, trajs_train) # print eval_ocs(ocs, trajs_test) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t+1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.max(Ls, axis=0) max_KLs = np.max(KLs, axis=0) max_rec = 500 trial_data = [] rec_scores = [] rec_cutoffs = [] for k in range(20): reject = True while reject: print "Determing whether to reject initial state..." s = opt.env.reset() reject = ocs[0].predict([s])[0] == -1 init_state = opt.env.get_pos_vel() print "\n\nVanilla Learner\n\n" van_completed = 0 van_failed = 0 van_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=False, early_stop=False, init_state = init_state) van_completed += int(results[-3]['completed']) van_failed += int(results[-3]['failed']) van_failed_in_support += int(results[-3]['failed_in_support']) van_completed = van_completed / float(opt.misc.samples) van_failed = van_failed / float(opt.misc.samples) van_failed_in_support = van_failed_in_support / float(opt.misc.samples) print "\n\nRand Control Recovery Strategy\n\n" rand_completed = 0 rand_failed = 0 rand_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec) rand_completed += int(results[-3]['completed']) rand_failed += int(results[-3]['failed']) rand_failed_in_support += int(results[-3]['failed_in_support']) rand_completed = rand_completed / float(opt.misc.samples) rand_failed = rand_failed / float(opt.misc.samples) rand_failed_in_support = rand_failed_in_support / float(opt.misc.samples) rec_scores += results[-3]['rec_scores'] rec_cutoffs += results[-3]['rec_cutoffs'] print "\n\nEarly Stopping Strategy\n\n" es_completed = 0 es_failed = 0 es_failed_in_support = 0 for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec) es_completed += int(results[-3]['completed']) es_failed += int(results[-3]['failed']) es_failed_in_support += int(results[-3]['failed_in_support']) es_completed = es_completed / float(opt.misc.samples) es_failed = es_failed / float(opt.misc.samples) es_failed_in_support = es_failed_in_support / float(opt.misc.samples) results = { 'van_tallies': [van_completed, van_failed, van_failed_in_support], 'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support], 'es_tallies': [es_completed, es_failed, es_failed_in_support], } trial_data.append(results) print "Saving to: " + opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl' pickle.dump(trial_data, open(opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl', 'w')) pickle.dump(rec_scores, open(opt.data_dir + 'multiple_trials/rec_scores' + str(opt.t_value) + '.pkl', 'w')) pickle.dump(rec_cutoffs, open(opt.data_dir + 'multiple_trials/rec_cutoffs' + str(opt.t_value) + '.pkl', 'w')) info = {'rec_scores': rec_scores, 'rec_cutoffs': rec_cutoffs} return trial_data, info
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." print "Training net..." lnr.train() print "Fitting svms..." fit_all(ocs, trajs_train) print "Done fitting" Ks = [] ts = [] Ls = [] max_K = 0 max_L = 0 max_Kt = -1 max_Lt = -1 for traj_states, traj_actions in zip(trajs_train, actions_train): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_norm = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) L = state_norm / action_norm K = func_diff / action_norm Ls.append(L) Ks.append(K) ts.append(t) if L > max_L: max_L = L max_Lt = t if K > max_K: max_K = K max_Kt = t print "Max train L: " + str(max_L) print "Avg train L: " + str(np.mean(Ls)) print "Max train K: " + str(max_K) print "Avg train K: " + str(np.mean(Ks)) IPython.embed()
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." lnr.train() print "\t\tFitting oc svms..." fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # print "Done loading data." print "Training net..." lnr.train() print "Fitting svms..." fit_all(ocs, trajs_train) print "Done fitting" Ks = [] ts = [] Ls = [] max_K = 0 max_L = 0 max_Kt = -1 max_Lt = -1 for traj_states, traj_actions in zip(trajs_train, actions_train): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_norm = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0, 0] - ocs[t].decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) K = state_norm / action_norm L = func_diff / action_norm Ls.append(L) Ks.append(K) ts.append(t) if K > max_K: max_K = K max_Kt = t if L > max_L: max_L = L max_Lt = t print "Max train L: " + str(max_L) print "Avg train L: " + str(np.mean(Ls)) print "Max train K: " + str(max_K) print "Avg train K: " + str(np.mean(Ks)) print "\n\nRecovery\n\n" rec_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 rec_failed = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_robust_traj_multiple_rejection_adaptive( opt.env, lnr, ocs, opt.t, opt, max_L, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: rec_failures += 1 print "\t" + str(j) + " failed..." if info['rec_failed'] > -1: rec_failed += 1 print "\t rec failed" rec_results = { "failures": rec_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": rec_failed, } print "fraction of failures: " + str( rec_failures / float(opt.misc.samples)) print "\n\nLearner\n\n" lnr_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_score_traj_multiple_rejection( opt.env, lnr, ocs, opt.t, False, False) reject = results[-1] failed = results[-2] info = results[-3] if reject: print "\tRejecting " + str(j) + " and restarting..." tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: lnr_failures += 1 print "\t" + str(j) + " failed..." print "fraction of failures: " + str( lnr_failures / float(opt.misc.samples)) lnr_results = { "failures": lnr_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": 0 } print "\n\n\nrec_results" print rec_results print "lnr_results" print lnr_results return { "rec": rec_results, "lnr": lnr_results, }
def run_trial(opt): oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) print "Loading data..." lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) lnr.y = pickle.load(open('data/lnry.pkl', 'r')) trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) actions_train = pickle.load(open('data/actions_train.pkl', 'r')) print "Done loading data." trajs = trajs_train print "Training net..." lnr.train() print "Fitting svms..." trajs_train = trajs[:-200] trajs_test = trajs[-200:] oc.fit(lnr.X) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros( (len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t + 1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs( oc.decision_function([state])[0, 0] - oc.decision_function([state_next])[0, 0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) IPython.embed() print "\n\nRecovery\n\n" rec_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 rec_failed = 0 completed = 0 comp_before_alarm = 0 comp_before_fail = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_robust_traj_multiple_rejection_adaptive( opt.env, lnr, oc, opt.t, opt, max_KLs, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] completed_results = check_completed(info) if info['first_complete'] > -1: completed += 1 comp_before_fail += completed_results['comp_before_fail'] comp_before_alarm += completed_results['comp_before_alarm'] if failed: rec_failures += 1 print "\t" + str(j) + " failed..." if info['rec_failed'] > -1: rec_failed += 1 print "\t rec failed" rec_results = { "failures": rec_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": rec_failed, "completed": completed, "comp_before_fail": comp_before_fail, "comp_before_alarm": comp_before_alarm } print "fraction of failures: " + str( rec_failures / float(opt.misc.samples)) print "\n\nLearner\n\n" lnr_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 completed = 0 comp_before_alarm = 0 comp_before_fail = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_score_traj_multiple_rejection( opt.env, lnr, oc, opt.t, False, False) reject = results[-1] failed = results[-2] info = results[-3] if reject: print "\tRejecting " + str(j) + " and restarting..." tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] completed_results = check_completed(info) if info['first_complete'] > -1: completed += 1 comp_before_fail += completed_results['comp_before_fail'] comp_before_alarm += completed_results['comp_before_alarm'] if failed: lnr_failures += 1 print "\t" + str(j) + " failed..." print "fraction of failures: " + str( lnr_failures / float(opt.misc.samples)) lnr_results = { "failures": lnr_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "caused_fail": 0, "completed": completed, "comp_before_alarm": comp_before_alarm, "comp_before_fail": comp_before_fail } print "\n\n\nrec_results" print rec_results print "lnr_results" print lnr_results return { "rec": rec_results, "lnr": lnr_results, } return None
def run_trial(opt): ocs = [ svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection( opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." lnr.train() print "\t\tFitting oc svms..." fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # print "Done loading data." print "Training net..." lnr.train() print "Fitting svms..." fit_all(ocs, trajs_train) print "Done fitting" opt.misc.samples = 300 print "\n\nRecovery\n\n" rec_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 rec_failed = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_robust_traj_multiple_rejection( opt.env, lnr, ocs, opt.t, opt, False, False) reject = results[-1] failed = results[-2] if reject: print "\tRejecting " + str(j) + " and restarting..." info = results[-3] tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: rec_failures += 1 print "\t" + str(j) + " failed..." if info['rec_failed'] > -1: rec_failed += 1 print "\t rec failed" rec_results = { "failures": rec_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, "rec_failed": rec_failed, } print "fraction of failures: " + str( rec_failures / float(opt.misc.samples)) print "\n\nLearner\n\n" lnr_failures = 0 false_negatives = 0 false_positives = 0 true_positives = 0 true_negatives = 0 for j in range(opt.misc.samples): print "Iteration: " + str(j) reject = True while reject: results = statistics.collect_score_traj_multiple_rejection( opt.env, lnr, ocs, opt.t, False, False) reject = results[-1] failed = results[-2] info = results[-3] if reject: print "\tRejecting " + str(j) + " and restarting..." tup = check_predictions(info) false_negatives += tup[0] false_positives += tup[1] true_positives += tup[2] true_negatives += tup[3] if failed: lnr_failures += 1 print "\t" + str(j) + " failed..." print "fraction of failures: " + str( lnr_failures / float(opt.misc.samples)) lnr_results = { "failures": lnr_failures, "false_negatives": false_negatives, "false_positives": false_positives, "true_positives": true_positives, "true_negatives": true_negatives, } print "\n\n\nrec_results" print rec_results print "lnr_results" print lnr_results print "\n\n\n" # print "\n\nSupervisor\n\n" # sup_failures = 0 # for j in range(num_samples): # print "Iteration: " + str(j) # reject = True # while reject: # results = statistics.collect_score_traj_multiple_rejection(opt.env, opt.sup, ocs, opt.t, False, False) # reject = results[-1] # failed = results[-2] # if reject: # print "\tRejecting " + str(j) + " and restarting..." # if failed: # sup_failures += 1 # print "\t" + str(j) + " failed..." # print "fraction of failures: " + str(sup_failures / float(num_samples)) IPython.embed() return { "rec_results": rec_results, "lnr_results": lnr_results, }
def run_trial(opt): ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ] est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs) lnr = learner.Learner(est) opt.samples = 1 sup_reward = np.zeros(opt.misc.num_evaluations) lnr_reward = np.zeros(opt.misc.num_evaluations) rob_reward = np.zeros(opt.misc.num_evaluations) train_err = np.zeros((opt.misc.num_evaluations, opt.t)) valid_err = np.zeros((opt.misc.num_evaluations, opt.t)) test_err = np.zeros((opt.misc.num_evaluations, opt.t)) robust_err = np.zeros((opt.misc.num_evaluations, opt.t)) correction_freq = np.zeros(opt.misc.num_evaluations) trajs_train = [] actions_train = [] for i in range(opt.iters): print "Iteration: " + str(i) violation = True while violation: states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False) if violation: print "\tViolation, restarting" trajs_train.append(states) actions_train.append(int_actions) lnr.add_data(states, int_actions) if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0: print "\tEvaluating..." print "\t\tTraining learner..." # lnr.train() print "\t\tFitting oc svms..." # fit_all(ocs, trajs_train) print "\t\tDone fitting" trajs_valid = [] trajs_test = [] trajs_robust = [] sup_iters_rewards = np.zeros(opt.samples) lnr_iters_rewards = np.zeros(opt.samples) rob_iters_rewards = np.zeros(opt.samples) freqs = np.zeros(opt.samples) # for j in range(opt.samples): # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False) # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False) # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False) # trajs_valid.append(states_valid) # trajs_test.append(states_test) # trajs_robust.append(states_robust) # sup_iters_rewards[j] = r_valid # lnr_iters_rewards[j] = r_test # rob_iters_rewards[j] = r_robust # freqs[j] = freq # if j == 0: # utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green']) # utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green']) index = i / (opt.iters / opt.misc.num_evaluations) # train_err[index, :] = eval_ocs(ocs, trajs_train) # valid_err[index, :] = eval_ocs(ocs, trajs_valid) # test_err[index, :] = eval_ocs(ocs, trajs_test) # robust_err[index, :] = eval_ocs(ocs, trajs_robust) # sup_reward[index] = np.mean(sup_iters_rewards) # lnr_reward[index] = np.mean(lnr_iters_rewards) # rob_reward[index] = np.mean(rob_iters_rewards) # correction_freq[index] = np.mean(freqs) # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w')) # pickle.dump(lnr.y, open('data/lnry.pkl', 'w')) # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w')) # pickle.dump(actions_train, open('data/actions_train.pkl', 'w')) # print "Loading data..." # lnr.X = pickle.load(open('data/lnrX.pkl', 'r')) # lnr.y = pickle.load(open('data/lnry.pkl', 'r')) # lnr.X = lnr.X # lnr.y = lnr.y # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r')) # actions_train = pickle.load(open('data/actions_train.pkl', 'r')) # print "Done loading data." trajs = trajs_train fit_all(ocs, trajs) print "Training net..." lnr.train() print "Fitting svms..." # trajs_train = trajs[:-200] # trajs_test = trajs[-200:] # fit_all(ocs, trajs_train) # print eval_ocs(ocs, trajs_train) # print eval_ocs(ocs, trajs_test) print "Done fitting" Ls = np.zeros((len(trajs_train), opt.t)) KLs = np.zeros((len(trajs_train), opt.t)) state_diffs = np.zeros((len(trajs_train), opt.t)) func_diffs = np.zeros((len(trajs_train), opt.t)) action_norms = np.zeros((len(trajs_train), opt.t)) actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0])) for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)): zipped = zip(traj_states, traj_actions) for t, (state, action) in enumerate(zipped[:-1]): state_next, action_next = zipped[t+1] state_diff = np.linalg.norm(state_next - state) func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0]) action_norm = np.linalg.norm(action) Ls[i, t] = state_diff / action_norm KLs[i, t] = func_diff / action_norm state_diffs[i, t] = state_diff func_diffs[i, t] = func_diff action_norms[i, t] = action_norm actions[i, t, :] = action max_Ls = np.amax(Ls, axis=0) max_KLs = np.amax(KLs, axis=0) max_rec = 1000 opt.env.reset() init_state = opt.env.get_pos_vel() print "\n\nRandom Controls\n\n" rand_scores = np.zeros((opt.misc.samples, max_rec + 1)) rand_cutoffs = np.zeros((opt.misc.samples, max_rec + 1)) for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) + "" triggered = False k = 0 while not triggered: print "\t\tNot yet triggered" results = rec_statistics.collect_rec_random(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) triggered = results[-3]['triggered'] if k >= 20: print "Had to pick new initial state" opt.env.reset() init_state = opt.env.get_pos_vel() k = 0 else: k += 1 rand_scores[i, :] = results[-3]['rec_scores'] rand_cutoffs[i, :] = results[-3]['rec_cutoffs'] print "\n\nApprox Grad Controls\n\n" approx_grad_scores = np.zeros((opt.misc.samples, max_rec + 1)) approx_grad_cutoffs = np.zeros((opt.misc.samples, max_rec + 1)) for i in range(opt.misc.samples): print "Eval Iteration: " + str(i) + "" triggered = False while not triggered: print "\t\tNot yet triggered" results = rec_statistics.collect_rec_approx_grad(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec) triggered = results[-3]['triggered'] approx_grad_scores[i, :] = results[-3]['rec_scores'] approx_grad_cutoffs[i, :] = results[-3]['rec_cutoffs'] return { 'rand_scores': rand_scores, 'rand_cutoffs': rand_cutoffs, 'approx_grad_scores': approx_grad_scores, 'approx_grad_cutoffs': approx_grad_cutoffs }