def run_trial(opt):
    ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    # trajs_train = []
    # actions_train = []
    # for i in range(opt.iters):

    #     print "Iteration: " + str(i)
    #     violation = True
    #     while violation:
    #         states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
    #         if violation:
    #             print "\tViolation, restarting"

    #     trajs_train.append(states)
    #     actions_train.append(int_actions)
    #     lnr.add_data(states, int_actions)


    #     if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
    #         print "\tEvaluating..."
    #         print "\t\tTraining learner..."
    #         # lnr.train()
    #         print "\t\tFitting oc svms..."
    #         # fit_all(ocs, trajs_train)
    #         print "\t\tDone fitting"

    #         trajs_valid = []
    #         trajs_test = []
    #         trajs_robust = []

    #         sup_iters_rewards = np.zeros(opt.samples)
    #         lnr_iters_rewards = np.zeros(opt.samples)
    #         rob_iters_rewards = np.zeros(opt.samples)
    #         freqs = np.zeros(opt.samples)

    #         # for j in range(opt.samples):
    #             # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
    #             # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
    #             # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

    #             # trajs_valid.append(states_valid)
    #             # trajs_test.append(states_test)
    #             # trajs_robust.append(states_robust)

    #             # sup_iters_rewards[j] = r_valid
    #             # lnr_iters_rewards[j] = r_test
    #             # rob_iters_rewards[j] = r_robust

    #             # freqs[j] = freq

    #             # if j == 0:
    #             #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
    #             #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])


    #         index = i / (opt.iters / opt.misc.num_evaluations)
    #         # train_err[index, :] = eval_ocs(ocs, trajs_train)
    #         # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
    #         # test_err[index, :] = eval_ocs(ocs, trajs_test)
    #         # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

    #         # sup_reward[index] = np.mean(sup_iters_rewards)
    #         # lnr_reward[index] = np.mean(lnr_iters_rewards)
    #         # rob_reward[index] = np.mean(rob_iters_rewards)

    #         # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))


    print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    lnr.X = lnr.X
    lnr.y = lnr.y
    trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."


    trajs = trajs_train

    fit_all(ocs, trajs)


    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    # trajs_train = trajs[:-200]
    # trajs_test = trajs[-200:]
    # fit_all(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_test)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t+1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0])
            action_norm = np.linalg.norm(action)
            
            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    max_rec = 500


    reject = True
    while reject:
        print "Determing whether to reject initial state..."
        s = opt.env.reset()
        reject = ocs[0].predict([s])[0] == -1
    init_state = opt.env.get_pos_vel()


    print "\n\nVanilla Learner\n\n"

    van_completed = 0
    van_failed = 0
    van_failed_in_support = 0

    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i)
        results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=True, early_stop=False, init_state = init_state)
        van_completed += int(results[-3]['completed'])
        van_failed += int(results[-3]['failed'])
        van_failed_in_support += int(results[-3]['failed_in_support'])


    print "\n\nRand Control Recovery Strategy\n\n"

    rand_completed = 0
    rand_failed = 0
    rand_failed_in_support = 0

    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i)
        results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
        rand_completed += int(results[-3]['completed'])
        rand_failed += int(results[-3]['failed'])
        rand_failed_in_support += int(results[-3]['failed_in_support'])


    print "\n\nApprox Grad Recovery Strategy\n\n"
    ag_completed = 0
    ag_failed = 0
    ag_failed_in_support = 0

    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i)
        results = rec_statistics.collect_rec(rec_statistics.approx_grad_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
        ag_completed += int(results[-3]['completed'])
        ag_failed += int(results[-3]['failed'])
        ag_failed_in_support += int(results[-3]['failed_in_support'])


    print "\n\Finite Diff Recovery Strategy\n\n"
    fd_completed = 0
    fd_failed = 0
    fd_failed_in_support = 0

    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i)
        results = rec_statistics.collect_rec(rec_statistics.finite_diff_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
        fd_completed += int(results[-3]['completed'])
        fd_failed += int(results[-3]['failed'])
        fd_failed_in_support += int(results[-3]['failed_in_support'])


    print "\n\nEarly Stopping Strategy\n\n"
    es_completed = 0
    es_failed = 0
    es_failed_in_support = 0

    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i)
        results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
        es_completed += int(results[-3]['completed'])
        es_failed += int(results[-3]['failed'])
        es_failed_in_support += int(results[-3]['failed_in_support'])

    results = {
        'van_tallies': [van_completed, van_failed, van_failed_in_support],
        'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support],
        'es_tallies': [es_completed, es_failed, es_failed_in_support],
        'ag_tallies': [ag_completed, ag_failed, ag_failed_in_support],
        'fd_tallies': [fd_completed, fd_failed, fd_failed_in_support]
    }

    return results
def run_trial(opt):
    ocs = [
        OCSVM(kernel='rbf', gamma=opt.gamma, nu=opt.nu) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    # trajs_train = []
    # actions_train = []
    # for i in range(opt.iters):

    #     print "Iteration: " + str(i)
    #     violation = True
    #     while violation:
    #         states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
    #         if violation:
    #             print "\tViolation, restarting"

    #     trajs_train.append(states)
    #     actions_train.append(int_actions)
    #     lnr.add_data(states, int_actions)

    #     if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
    #         print "\tEvaluating..."
    #         print "\t\tTraining learner..."
    #         # lnr.train()
    #         print "\t\tFitting oc svms..."
    #         # fit_all(ocs, trajs_train)
    #         print "\t\tDone fitting"

    #         trajs_valid = []
    #         trajs_test = []
    #         trajs_robust = []

    #         sup_iters_rewards = np.zeros(opt.samples)
    #         lnr_iters_rewards = np.zeros(opt.samples)
    #         rob_iters_rewards = np.zeros(opt.samples)
    #         freqs = np.zeros(opt.samples)

    #         # for j in range(opt.samples):
    #             # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
    #             # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
    #             # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

    #             # trajs_valid.append(states_valid)
    #             # trajs_test.append(states_test)
    #             # trajs_robust.append(states_robust)

    #             # sup_iters_rewards[j] = r_valid
    #             # lnr_iters_rewards[j] = r_test
    #             # rob_iters_rewards[j] = r_robust

    #             # freqs[j] = freq

    #             # if j == 0:
    #             #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
    #             #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

    #         index = i / (opt.iters / opt.misc.num_evaluations)
    #         # train_err[index, :] = eval_ocs(ocs, trajs_train)
    #         # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
    #         # test_err[index, :] = eval_ocs(ocs, trajs_test)
    #         # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

    #         # sup_reward[index] = np.mean(sup_iters_rewards)
    #         # lnr_reward[index] = np.mean(lnr_iters_rewards)
    #         # rob_reward[index] = np.mean(rob_iters_rewards)

    #         # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))

    print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    trajs = trajs_train

    fit_all(ocs, trajs)

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    # trajs_train = trajs[:-200]
    # trajs_test = trajs[-200:]
    # fit_all(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_test)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros(
        (len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states,
            traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    IPython.embed()

    print "\n\nRecovery\n\n"

    rec_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0
    rec_failed = 0

    completed = 0
    comp_before_alarm = 0
    comp_before_fail = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        k = 0
        while reject and k < 30:
            results = statistics.collect_robust_traj_multiple_rejection_adaptive(
                opt.env, lnr, ocs, opt.t, opt, max_KLs, False, False)
            reject = results[-1]
            failed = results[-2]
            k += 1
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        completed_results = check_completed(info)
        if info['first_complete'] > -1:
            completed += 1
        comp_before_fail += completed_results['comp_before_fail']
        comp_before_alarm += completed_results['comp_before_alarm']

        if failed:
            rec_failures += 1
            print "\t" + str(j) + " failed..."

        if info['rec_failed'] > -1:
            rec_failed += 1
            print "\t rec failed"

    rec_results = {
        "Failed": rec_failures,
        "Failed in support": false_negatives,
        "False alarm": false_positives,
        "Caught failure": true_positives,
        "No violations": true_negatives,
        "Caused_fail": rec_failed,
        "Completed": completed,
        "Comp before fail": comp_before_fail,
        "Comp before alarm": comp_before_alarm
    }
    print "fraction of failures: " + str(
        rec_failures / float(opt.misc.samples))

    print "\n\nLearner\n\n"

    lnr_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0

    completed = 0
    comp_before_alarm = 0
    comp_before_fail = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        k = 0
        while reject and k < 30:
            results = statistics.collect_score_traj_multiple_rejection(
                opt.env, lnr, ocs, opt.t, False, False)
            reject = results[-1]
            failed = results[-2]
            info = results[-3]
            k += 1
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        completed_results = check_completed(info)
        if info['first_complete'] > -1:
            completed += 1
        comp_before_fail += completed_results['comp_before_fail']
        comp_before_alarm += completed_results['comp_before_alarm']

        if failed:
            lnr_failures += 1
            print "\t" + str(j) + " failed..."

    print "fraction of failures: " + str(
        lnr_failures / float(opt.misc.samples))

    lnr_results = {
        "Failed": lnr_failures,
        "Failed in support": false_negatives,
        "False alarm": false_positives,
        "Caught failure": true_positives,
        "No violations": true_negatives,
        "Caused_fail": 0,
        "Completed": completed,
        "Comp before fail": comp_before_fail,
        "Comp before alarm": comp_before_alarm
    }

    print "\n\n\nrec_results"
    print rec_results

    print "lnr_results"
    print lnr_results

    return {
        "rec": rec_results,
        "lnr": lnr_results,
    }
    return None
Esempio n. 3
0
def run_trial(opt):
    ocs = [OCSVM(kernel='rbf', gamma=.1, nu=.03) for t in range(opt.t)]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    # trajs_train = []
    # actions_train = []
    # for i in range(opt.iters):

    #     print "Iteration: " + str(i)
    #     violation = True
    #     while violation:
    #         states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
    #         if violation:
    #             print "\tViolation, restarting"

    #     trajs_train.append(states)
    #     actions_train.append(int_actions)
    #     lnr.add_data(states, int_actions)

    #     if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
    #         # print "\tEvaluating..."
    #         # print "\t\tTraining learner..."
    #         # # lnr.train()
    #         # print "\t\tFitting oc svms..."
    #         # # fit_all(ocs, trajs_train)
    #         # print "\t\tDone fitting"

    #         trajs_valid = []
    #         trajs_test = []
    #         trajs_robust = []

    #         sup_iters_rewards = np.zeros(opt.samples)
    #         lnr_iters_rewards = np.zeros(opt.samples)
    #         rob_iters_rewards = np.zeros(opt.samples)
    #         freqs = np.zeros(opt.samples)

    #         # for j in range(opt.samples):
    #             # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
    #             # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
    #             # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

    #             # trajs_valid.append(states_valid)
    #             # trajs_test.append(states_test)
    #             # trajs_robust.append(states_robust)

    #             # sup_iters_rewards[j] = r_valid
    #             # lnr_iters_rewards[j] = r_test
    #             # rob_iters_rewards[j] = r_robust

    #             # freqs[j] = freq

    #             # if j == 0:
    #             #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
    #             #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

    #         index = i / (opt.iters / opt.misc.num_evaluations)
    #         # train_err[index, :] = eval_ocs(ocs, trajs_train)
    #         # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
    #         # test_err[index, :] = eval_ocs(ocs, trajs_test)
    #         # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

    #         # sup_reward[index] = np.mean(sup_iters_rewards)
    #         # lnr_reward[index] = np.mean(lnr_iters_rewards)
    #         # rob_reward[index] = np.mean(rob_iters_rewards)

    #         # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))

    print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    trajs = trajs_train

    print "Fitting svms..."
    trajs_train = trajs[:-200]
    trajs_test = trajs[-200:]
    fit_all(ocs, trajs_train)
    print eval_ocs(ocs, trajs_train)
    print eval_ocs(ocs, trajs_test)
    fit_all(ocs, trajs)
    print "Done fitting"
    print "Training net..."
    lnr.train()

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros(
        (len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states,
            traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    IPython.embed()

    total_failures = 0
    total_failures_in_support = 0
    samples_failed_in_support = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            #results = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            results = statistics.collect_robust_traj_multiple_rejection_adaptive(
                opt.env, lnr, ocs, opt.t, opt, max_KLs, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        total_failures += info['count_failures']
        total_failures_in_support += info['count_fail_in_support']

        if info['count_fail_in_support'] > 0:
            print "Failed in support"
            samples_failed_in_support += 1

    if total_failures > 0:
        print "Fails in support: " + str(
            float(total_failures_in_support) / total_failures)
    print total_failures_in_support
    print total_failures

    print str(samples_failed_in_support) + " failed in support"
    print "Fraction failed in support: " + str(
        float(samples_failed_in_support) / opt.misc.samples)
def run_trial(opt):
    ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)


        if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
                # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
                # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
                # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

                # trajs_valid.append(states_valid)
                # trajs_test.append(states_test)
                # trajs_robust.append(states_robust)

                # sup_iters_rewards[j] = r_valid
                # lnr_iters_rewards[j] = r_test
                # rob_iters_rewards[j] = r_robust

                # freqs[j] = freq

                # if j == 0:
                #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
                #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])


            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))


    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # lnr.X = lnr.X
    # lnr.y = lnr.y
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    # print "Done loading data."


    trajs = trajs_train

    fit_all(ocs, trajs)


    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    # trajs_train = trajs[:-200]
    # trajs_test = trajs[-200:]
    # fit_all(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_test)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t+1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0])
            action_norm = np.linalg.norm(action)
            
            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    max_rec = 1000
    opt.env.reset()
    init_state = opt.env.get_pos_vel()

    print "\n\nRandom Controls\n\n"

    rand_scores = np.zeros((opt.misc.samples, max_rec + 1))
    rand_cutoffs = np.zeros((opt.misc.samples, max_rec + 1))
    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i) + ""
        triggered = False
        k = 0
        while not triggered:
            print "\t\tNot yet triggered"
            results = rec_statistics.collect_rec_random(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
            triggered = results[-3]['triggered']
            if k >= 20:
                print "Had to pick new initial state"
                opt.env.reset()
                init_state = opt.env.get_pos_vel()
                k = 0
            else:
                k += 1
        rand_scores[i, :] = results[-3]['rec_scores']
        rand_cutoffs[i, :] = results[-3]['rec_cutoffs']


    print "\n\nApprox Grad Controls\n\n"

    approx_grad_scores = np.zeros((opt.misc.samples, max_rec + 1))
    approx_grad_cutoffs = np.zeros((opt.misc.samples, max_rec + 1))
    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i) + ""
        triggered = False
        while not triggered:
            print "\t\tNot yet triggered"
            results = rec_statistics.collect_rec_approx_grad(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
            triggered = results[-3]['triggered']
        approx_grad_scores[i, :] = results[-3]['rec_scores']
        approx_grad_cutoffs[i, :] = results[-3]['rec_cutoffs']


    return {
        'rand_scores': rand_scores,
        'rand_cutoffs': rand_cutoffs,
        'approx_grad_scores': approx_grad_scores,
        'approx_grad_cutoffs': approx_grad_cutoffs
    }