Beispiel #1
0
opt.t = 100

est = knet.Network([64, 64], learning_rate=.01, epochs=100)
lnr = learner.Learner(est)

oc = OneClassSVM(kernel='rbf', gamma=.01, nu=.01)

ITERATIONS = 500
print "\n\nSup rollouts\n\n"
sup_failures = 0
initial_states = []
for i in range(ITERATIONS):
    print "iteration: " + str(i)
    violation = True
    while violation:
        states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
            opt.env, opt.sup, opt.t, False, False)
        if violation:
            print "Violation at iteration " + str(i) + ", restarting"
            sup_failures += 1

    initial_states.append(states[0])
    lnr.add_data(states, int_actions)

print "Sup failures: " + str(sup_failures / float(ITERATIONS))

# oc.fit(initial_states)

# print "\n\nValidation rollouts\n\n"

# pred_y = []
# actual_y = []
Beispiel #2
0
def run_trial(opt):
    ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    trajs_train = []
    actions_train = []

    trial_data = {}

    for iteration in range(opt.iters):

        print "Iteration: " + str(iteration)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)


        if (iteration + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            key = iteration + 1
            trial_data[key] = []

            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)


            trajs = trajs_train
            fit_all(ocs, trajs)


            print "Training net..."
            lnr.train()
            print "Fitting svms..."
            print "Done fitting"

            Ls = np.zeros((len(trajs_train), opt.t))
            KLs = np.zeros((len(trajs_train), opt.t))
            state_diffs = np.zeros((len(trajs_train), opt.t))
            func_diffs = np.zeros((len(trajs_train), opt.t))
            action_norms = np.zeros((len(trajs_train), opt.t))
            actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0]))

            for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)):
                zipped = zip(traj_states, traj_actions)
                for t, (state, action) in enumerate(zipped[:-1]):
                    state_next, action_next = zipped[t+1]
                    state_diff = np.linalg.norm(state_next - state)
                    func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0])
                    action_norm = np.linalg.norm(action)
                    
                    Ls[i, t] = state_diff / action_norm
                    KLs[i, t] = func_diff / action_norm
                    state_diffs[i, t] = state_diff
                    func_diffs[i, t] = func_diff
                    action_norms[i, t] = action_norm
                    actions[i, t, :] = action

            max_Ls = np.max(Ls, axis=0)
            max_KLs = np.max(KLs, axis=0)

            max_rec = 500

            for k in range(60):
                reject = True
                while reject:
                    print "Determing whether to reject initial state..."
                    s = opt.env.reset()
                    reject = ocs[0].predict([s])[0] == -1
                init_state = opt.env.get_pos_vel()

                print "\n\nVanilla Learner\n\n"

                van_completed = 0
                van_failed = 0
                van_failed_in_support = 0

                for i in range(opt.misc.samples):
                    print "Eval Iteration: " + str(i)
                    results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=False, early_stop=False, init_state = init_state)
                    van_completed += int(results[-3]['completed'])
                    van_failed += int(results[-3]['failed'])
                    van_failed_in_support += int(results[-3]['failed_in_support'])

                van_completed = van_completed / float(opt.misc.samples)
                van_failed = van_failed / float(opt.misc.samples)
                van_failed_in_support = van_failed_in_support / float(opt.misc.samples)


                print "\n\nRand Control Recovery Strategy\n\n"

                rand_completed = 0
                rand_failed = 0
                rand_failed_in_support = 0

                for i in range(opt.misc.samples):
                    print "Eval Iteration: " + str(i)
                    results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec)
                    rand_completed += int(results[-3]['completed'])
                    rand_failed += int(results[-3]['failed'])
                    rand_failed_in_support += int(results[-3]['failed_in_support'])

                rand_completed = rand_completed / float(opt.misc.samples)
                rand_failed = rand_failed / float(opt.misc.samples)
                rand_failed_in_support = rand_failed_in_support / float(opt.misc.samples)


                print "\n\nEarly Stopping Strategy\n\n"
                es_completed = 0
                es_failed = 0
                es_failed_in_support = 0

                for i in range(opt.misc.samples):
                    print "Eval Iteration: " + str(i)
                    results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec)
                    es_completed += int(results[-3]['completed'])
                    es_failed += int(results[-3]['failed'])
                    es_failed_in_support += int(results[-3]['failed_in_support'])

                es_completed = es_completed / float(opt.misc.samples)
                es_failed = es_failed / float(opt.misc.samples)
                es_failed_in_support = es_failed_in_support / float(opt.misc.samples)


                results = {
                    'van_tallies': [van_completed, van_failed, van_failed_in_support],
                    'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support],
                    'es_tallies': [es_completed, es_failed, es_failed_in_support],
                }

                trial_data[key].append(results)
                print "Saving to: " + opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl'
                pickle.dump(trial_data, open(opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl', 'w'))


    return trial_data
Beispiel #3
0
def run_trial(opt):
    ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)


        if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
                # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
                # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
                # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

                # trajs_valid.append(states_valid)
                # trajs_test.append(states_test)
                # trajs_robust.append(states_robust)

                # sup_iters_rewards[j] = r_valid
                # lnr_iters_rewards[j] = r_test
                # rob_iters_rewards[j] = r_robust

                # freqs[j] = freq

                # if j == 0:
                #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
                #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])


            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))


    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # lnr.X = lnr.X
    # lnr.y = lnr.y
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    # print "Done loading data."


    trajs = trajs_train
    fit_all(ocs, trajs)


    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    # trajs_train = trajs[:-200]
    # trajs_test = trajs[-200:]
    # fit_all(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_test)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t+1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0])
            action_norm = np.linalg.norm(action)
            
            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.max(Ls, axis=0)
    max_KLs = np.max(KLs, axis=0)

    max_rec = 500

    trial_data = []
    rec_scores = []
    rec_cutoffs = []
    for k in range(20):
        reject = True
        while reject:
            print "Determing whether to reject initial state..."
            s = opt.env.reset()
            reject = ocs[0].predict([s])[0] == -1
        init_state = opt.env.get_pos_vel()

        print "\n\nVanilla Learner\n\n"

        van_completed = 0
        van_failed = 0
        van_failed_in_support = 0

        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=False, early_stop=False, init_state = init_state)
            van_completed += int(results[-3]['completed'])
            van_failed += int(results[-3]['failed'])
            van_failed_in_support += int(results[-3]['failed_in_support'])

        van_completed = van_completed / float(opt.misc.samples)
        van_failed = van_failed / float(opt.misc.samples)
        van_failed_in_support = van_failed_in_support / float(opt.misc.samples)


        print "\n\nRand Control Recovery Strategy\n\n"

        rand_completed = 0
        rand_failed = 0
        rand_failed_in_support = 0

        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec)
            rand_completed += int(results[-3]['completed'])
            rand_failed += int(results[-3]['failed'])
            rand_failed_in_support += int(results[-3]['failed_in_support'])

        rand_completed = rand_completed / float(opt.misc.samples)
        rand_failed = rand_failed / float(opt.misc.samples)
        rand_failed_in_support = rand_failed_in_support / float(opt.misc.samples)
        rec_scores += results[-3]['rec_scores']
        rec_cutoffs += results[-3]['rec_cutoffs']

        print "\n\nEarly Stopping Strategy\n\n"
        es_completed = 0
        es_failed = 0
        es_failed_in_support = 0


        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec)
            es_completed += int(results[-3]['completed'])
            es_failed += int(results[-3]['failed'])
            es_failed_in_support += int(results[-3]['failed_in_support'])

        es_completed = es_completed / float(opt.misc.samples)
        es_failed = es_failed / float(opt.misc.samples)
        es_failed_in_support = es_failed_in_support / float(opt.misc.samples)

        results = {
            'van_tallies':  [van_completed, van_failed, van_failed_in_support],
            'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support],
            'es_tallies':   [es_completed, es_failed, es_failed_in_support],
        }

        trial_data.append(results)
        print "Saving to: " + opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl'
        pickle.dump(trial_data, open(opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl', 'w'))
        pickle.dump(rec_scores, open(opt.data_dir + 'multiple_trials/rec_scores' + str(opt.t_value) + '.pkl', 'w'))
        pickle.dump(rec_cutoffs, open(opt.data_dir + 'multiple_trials/rec_cutoffs' + str(opt.t_value) + '.pkl', 'w'))

    info = {'rec_scores': rec_scores, 'rec_cutoffs': rec_cutoffs}
    return trial_data, info
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))

    print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    fit_all(ocs, trajs_train)
    print "Done fitting"

    Ks = []
    ts = []
    Ls = []
    max_K = 0
    max_L = 0
    max_Kt = -1
    max_Lt = -1
    for traj_states, traj_actions in zip(trajs_train, actions_train):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_norm = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            L = state_norm / action_norm
            K = func_diff / action_norm

            Ls.append(L)
            Ks.append(K)
            ts.append(t)
            if L > max_L:
                max_L = L
                max_Lt = t
            if K > max_K:
                max_K = K
                max_Kt = t

    print "Max train L: " + str(max_L)
    print "Avg train L: " + str(np.mean(Ls))
    print "Max train K: " + str(max_K)
    print "Avg train K: " + str(np.mean(Ks))

    IPython.embed()
Beispiel #5
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            lnr.train()
            print "\t\tFitting oc svms..."
            fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))

    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # print "Done loading data."

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    fit_all(ocs, trajs_train)
    print "Done fitting"

    Ks = []
    ts = []
    Ls = []
    max_K = 0
    max_L = 0
    max_Kt = -1
    max_Lt = -1
    for traj_states, traj_actions in zip(trajs_train, actions_train):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_norm = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            K = state_norm / action_norm
            L = func_diff / action_norm

            Ls.append(L)
            Ks.append(K)
            ts.append(t)
            if K > max_K:
                max_K = K
                max_Kt = t
            if L > max_L:
                max_L = L
                max_Lt = t

    print "Max train L: " + str(max_L)
    print "Avg train L: " + str(np.mean(Ls))
    print "Max train K: " + str(max_K)
    print "Avg train K: " + str(np.mean(Ks))

    print "\n\nRecovery\n\n"

    rec_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0
    rec_failed = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_robust_traj_multiple_rejection_adaptive(
                opt.env, lnr, ocs, opt.t, opt, max_L, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            rec_failures += 1
            print "\t" + str(j) + " failed..."

        if info['rec_failed'] > -1:
            rec_failed += 1
            print "\t rec failed"

    rec_results = {
        "failures": rec_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": rec_failed,
    }
    print "fraction of failures: " + str(
        rec_failures / float(opt.misc.samples))

    print "\n\nLearner\n\n"

    lnr_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_score_traj_multiple_rejection(
                opt.env, lnr, ocs, opt.t, False, False)
            reject = results[-1]
            failed = results[-2]
            info = results[-3]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            lnr_failures += 1
            print "\t" + str(j) + " failed..."
    print "fraction of failures: " + str(
        lnr_failures / float(opt.misc.samples))

    lnr_results = {
        "failures": lnr_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": 0
    }

    print "\n\n\nrec_results"
    print rec_results

    print "lnr_results"
    print lnr_results

    return {
        "rec": rec_results,
        "lnr": lnr_results,
    }
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))

    print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    trajs = trajs_train

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    trajs_train = trajs[:-200]
    trajs_test = trajs[-200:]
    oc.fit(lnr.X)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros(
        (len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states,
            traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(
                oc.decision_function([state])[0, 0] -
                oc.decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    IPython.embed()

    print "\n\nRecovery\n\n"

    rec_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0
    rec_failed = 0

    completed = 0
    comp_before_alarm = 0
    comp_before_fail = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_robust_traj_multiple_rejection_adaptive(
                opt.env, lnr, oc, opt.t, opt, max_KLs, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        completed_results = check_completed(info)
        if info['first_complete'] > -1:
            completed += 1
        comp_before_fail += completed_results['comp_before_fail']
        comp_before_alarm += completed_results['comp_before_alarm']

        if failed:
            rec_failures += 1
            print "\t" + str(j) + " failed..."

        if info['rec_failed'] > -1:
            rec_failed += 1
            print "\t rec failed"

    rec_results = {
        "failures": rec_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": rec_failed,
        "completed": completed,
        "comp_before_fail": comp_before_fail,
        "comp_before_alarm": comp_before_alarm
    }
    print "fraction of failures: " + str(
        rec_failures / float(opt.misc.samples))

    print "\n\nLearner\n\n"

    lnr_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0

    completed = 0
    comp_before_alarm = 0
    comp_before_fail = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_score_traj_multiple_rejection(
                opt.env, lnr, oc, opt.t, False, False)
            reject = results[-1]
            failed = results[-2]
            info = results[-3]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        completed_results = check_completed(info)
        if info['first_complete'] > -1:
            completed += 1
        comp_before_fail += completed_results['comp_before_fail']
        comp_before_alarm += completed_results['comp_before_alarm']

        if failed:
            lnr_failures += 1
            print "\t" + str(j) + " failed..."

    print "fraction of failures: " + str(
        lnr_failures / float(opt.misc.samples))

    lnr_results = {
        "failures": lnr_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": 0,
        "completed": completed,
        "comp_before_alarm": comp_before_alarm,
        "comp_before_fail": comp_before_fail
    }

    print "\n\n\nrec_results"
    print rec_results

    print "lnr_results"
    print lnr_results

    return {
        "rec": rec_results,
        "lnr": lnr_results,
    }
    return None
Beispiel #7
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            lnr.train()
            print "\t\tFitting oc svms..."
            fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))

    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # print "Done loading data."

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    fit_all(ocs, trajs_train)
    print "Done fitting"
    opt.misc.samples = 300

    print "\n\nRecovery\n\n"

    rec_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0
    rec_failed = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_robust_traj_multiple_rejection(
                opt.env, lnr, ocs, opt.t, opt, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            rec_failures += 1
            print "\t" + str(j) + " failed..."

        if info['rec_failed'] > -1:
            rec_failed += 1
            print "\t rec failed"

    rec_results = {
        "failures": rec_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "rec_failed": rec_failed,
    }
    print "fraction of failures: " + str(
        rec_failures / float(opt.misc.samples))

    print "\n\nLearner\n\n"

    lnr_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_score_traj_multiple_rejection(
                opt.env, lnr, ocs, opt.t, False, False)
            reject = results[-1]
            failed = results[-2]
            info = results[-3]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            lnr_failures += 1
            print "\t" + str(j) + " failed..."
    print "fraction of failures: " + str(
        lnr_failures / float(opt.misc.samples))

    lnr_results = {
        "failures": lnr_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
    }

    print "\n\n\nrec_results"
    print rec_results

    print "lnr_results"
    print lnr_results

    print "\n\n\n"
    # print "\n\nSupervisor\n\n"

    # sup_failures = 0
    # for j in range(num_samples):
    #     print "Iteration: " + str(j)
    #     reject = True
    #     while reject:
    #         results = statistics.collect_score_traj_multiple_rejection(opt.env, opt.sup, ocs, opt.t, False, False)
    #         reject = results[-1]
    #         failed = results[-2]
    #         if reject:
    #             print "\tRejecting " + str(j) + " and restarting..."
    #     if failed:
    #         sup_failures += 1
    #         print "\t" + str(j) + " failed..."
    # print "fraction of failures: " + str(sup_failures / float(num_samples))

    IPython.embed()

    return {
        "rec_results": rec_results,
        "lnr_results": lnr_results,
    }
def run_trial(opt):
    ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)


        if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
                # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
                # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
                # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

                # trajs_valid.append(states_valid)
                # trajs_test.append(states_test)
                # trajs_robust.append(states_robust)

                # sup_iters_rewards[j] = r_valid
                # lnr_iters_rewards[j] = r_test
                # rob_iters_rewards[j] = r_robust

                # freqs[j] = freq

                # if j == 0:
                #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
                #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])


            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))


    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # lnr.X = lnr.X
    # lnr.y = lnr.y
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    # print "Done loading data."


    trajs = trajs_train

    fit_all(ocs, trajs)


    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    # trajs_train = trajs[:-200]
    # trajs_test = trajs[-200:]
    # fit_all(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_test)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t+1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0])
            action_norm = np.linalg.norm(action)
            
            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    max_rec = 1000
    opt.env.reset()
    init_state = opt.env.get_pos_vel()

    print "\n\nRandom Controls\n\n"

    rand_scores = np.zeros((opt.misc.samples, max_rec + 1))
    rand_cutoffs = np.zeros((opt.misc.samples, max_rec + 1))
    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i) + ""
        triggered = False
        k = 0
        while not triggered:
            print "\t\tNot yet triggered"
            results = rec_statistics.collect_rec_random(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
            triggered = results[-3]['triggered']
            if k >= 20:
                print "Had to pick new initial state"
                opt.env.reset()
                init_state = opt.env.get_pos_vel()
                k = 0
            else:
                k += 1
        rand_scores[i, :] = results[-3]['rec_scores']
        rand_cutoffs[i, :] = results[-3]['rec_cutoffs']


    print "\n\nApprox Grad Controls\n\n"

    approx_grad_scores = np.zeros((opt.misc.samples, max_rec + 1))
    approx_grad_cutoffs = np.zeros((opt.misc.samples, max_rec + 1))
    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i) + ""
        triggered = False
        while not triggered:
            print "\t\tNot yet triggered"
            results = rec_statistics.collect_rec_approx_grad(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
            triggered = results[-3]['triggered']
        approx_grad_scores[i, :] = results[-3]['rec_scores']
        approx_grad_cutoffs[i, :] = results[-3]['rec_cutoffs']


    return {
        'rand_scores': rand_scores,
        'rand_cutoffs': rand_cutoffs,
        'approx_grad_scores': approx_grad_scores,
        'approx_grad_cutoffs': approx_grad_cutoffs
    }