Ejemplo n.º 1
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.5, nu=.01) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 10

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)
    correction_freq = np.zeros(opt.iters)

    trajs_train = []

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        trajs_train.append(states)

        lnr.add_data(states, int_actions)
        lnr.train()

        trajs_train_array = np.array(trajs_train)
        for t in range(opt.t):
            X = trajs_train_array[:, t, :]
            ocs[t].fit(X)

        if i % 5 == 0:
            trajs_valid = []
            for j in range(opt.samples):
                states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                    opt.env, opt.sup, opt.t, False)
                trajs_valid.append(states_valid)

            train_oc_errs = eval_ocs(ocs, trajs_train, opt)
            valid_oc_errs = eval_ocs(ocs, trajs_valid, opt)

            print "Train errs: " + str(train_oc_errs)
            print "Valid errs: " + str(valid_oc_errs)
            print "Max train err: " + str(np.amax(train_oc_errs))
            print "Max valid err: " + str(np.amax(valid_oc_errs))

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }
Ejemplo n.º 2
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    sup_rewards = np.zeros((1, opt.iters))
    lnr_rewards = np.zeros((opt.samples, opt.iters))

    sup_perf = np.zeros((1, opt.iters))
    lnr_perf = np.zeros((opt.samples, opt.iters))

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        sup_rewards[0, i] = r
        sup_perf[0, i] = opt.env.metric()

        lnr.add_data(states, int_actions)
        lnr.train()

        print "\t" + str(lnr.acc())
        for j in range(opt.samples):
            _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t)
            lnr_rewards[j, i] = r
            lnr_perf[j, i] = opt.env.metric()

    print "Average success: " + str(sup_rewards)
    print "Learner success: \n" + str(lnr_rewards)

    pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv')
    pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv')
    pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv')
    pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv')

    plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward')
    plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')
Ejemplo n.º 3
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))

    print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    fit_all(ocs, trajs_train)
    print "Done fitting"

    Ks = []
    ts = []
    Ls = []
    max_K = 0
    max_L = 0
    max_Kt = -1
    max_Lt = -1
    for traj_states, traj_actions in zip(trajs_train, actions_train):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_norm = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            L = state_norm / action_norm
            K = func_diff / action_norm

            Ls.append(L)
            Ks.append(K)
            ts.append(t)
            if L > max_L:
                max_L = L
                max_Lt = t
            if K > max_K:
                max_K = K
                max_Kt = t

    print "Max train L: " + str(max_L)
    print "Avg train L: " + str(np.mean(Ls))
    print "Max train K: " + str(max_K)
    print "Avg train K: " + str(np.mean(Ks))

    IPython.embed()
Ejemplo n.º 4
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            lnr.train()
            print "\t\tFitting oc svms..."
            fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))

    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # print "Done loading data."

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    fit_all(ocs, trajs_train)
    print "Done fitting"

    Ks = []
    ts = []
    Ls = []
    max_K = 0
    max_L = 0
    max_Kt = -1
    max_Lt = -1
    for traj_states, traj_actions in zip(trajs_train, actions_train):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_norm = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            K = state_norm / action_norm
            L = func_diff / action_norm

            Ls.append(L)
            Ks.append(K)
            ts.append(t)
            if K > max_K:
                max_K = K
                max_Kt = t
            if L > max_L:
                max_L = L
                max_Lt = t

    print "Max train L: " + str(max_L)
    print "Avg train L: " + str(np.mean(Ls))
    print "Max train K: " + str(max_K)
    print "Avg train K: " + str(np.mean(Ks))

    print "\n\nRecovery\n\n"

    rec_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0
    rec_failed = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_robust_traj_multiple_rejection_adaptive(
                opt.env, lnr, ocs, opt.t, opt, max_L, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            rec_failures += 1
            print "\t" + str(j) + " failed..."

        if info['rec_failed'] > -1:
            rec_failed += 1
            print "\t rec failed"

    rec_results = {
        "failures": rec_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": rec_failed,
    }
    print "fraction of failures: " + str(
        rec_failures / float(opt.misc.samples))

    print "\n\nLearner\n\n"

    lnr_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_score_traj_multiple_rejection(
                opt.env, lnr, ocs, opt.t, False, False)
            reject = results[-1]
            failed = results[-2]
            info = results[-3]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            lnr_failures += 1
            print "\t" + str(j) + " failed..."
    print "fraction of failures: " + str(
        lnr_failures / float(opt.misc.samples))

    lnr_results = {
        "failures": lnr_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": 0
    }

    print "\n\n\nrec_results"
    print rec_results

    print "lnr_results"
    print lnr_results

    return {
        "rec": rec_results,
        "lnr": lnr_results,
    }
Ejemplo n.º 5
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))

    print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    trajs = trajs_train

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    trajs_train = trajs[:-200]
    trajs_test = trajs[-200:]
    oc.fit(lnr.X)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros(
        (len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states,
            traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(
                oc.decision_function([state])[0, 0] -
                oc.decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    IPython.embed()

    print "\n\nRecovery\n\n"

    rec_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0
    rec_failed = 0

    completed = 0
    comp_before_alarm = 0
    comp_before_fail = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_robust_traj_multiple_rejection_adaptive(
                opt.env, lnr, oc, opt.t, opt, max_KLs, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        completed_results = check_completed(info)
        if info['first_complete'] > -1:
            completed += 1
        comp_before_fail += completed_results['comp_before_fail']
        comp_before_alarm += completed_results['comp_before_alarm']

        if failed:
            rec_failures += 1
            print "\t" + str(j) + " failed..."

        if info['rec_failed'] > -1:
            rec_failed += 1
            print "\t rec failed"

    rec_results = {
        "failures": rec_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": rec_failed,
        "completed": completed,
        "comp_before_fail": comp_before_fail,
        "comp_before_alarm": comp_before_alarm
    }
    print "fraction of failures: " + str(
        rec_failures / float(opt.misc.samples))

    print "\n\nLearner\n\n"

    lnr_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0

    completed = 0
    comp_before_alarm = 0
    comp_before_fail = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_score_traj_multiple_rejection(
                opt.env, lnr, oc, opt.t, False, False)
            reject = results[-1]
            failed = results[-2]
            info = results[-3]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        completed_results = check_completed(info)
        if info['first_complete'] > -1:
            completed += 1
        comp_before_fail += completed_results['comp_before_fail']
        comp_before_alarm += completed_results['comp_before_alarm']

        if failed:
            lnr_failures += 1
            print "\t" + str(j) + " failed..."

    print "fraction of failures: " + str(
        lnr_failures / float(opt.misc.samples))

    lnr_results = {
        "failures": lnr_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "caused_fail": 0,
        "completed": completed,
        "comp_before_alarm": comp_before_alarm,
        "comp_before_fail": comp_before_fail
    }

    print "\n\n\nrec_results"
    print rec_results

    print "lnr_results"
    print lnr_results

    return {
        "rec": rec_results,
        "lnr": lnr_results,
    }
    return None
Ejemplo n.º 6
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 20

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False, False)
        trajs_train.append(states)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            lnr.train()
            fit_all(ocs, trajs_train)
            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            for j in range(opt.samples):
                states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                    opt.env, opt.sup, opt.t, False, False)
                states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(
                    opt.env, lnr, ocs, opt.t, False, False)
                states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = stats.collect_robust_traj_multiple(
                    opt.env, opt.sim, lnr, ocs, opt.t, opt, False, False)

                trajs_valid.append(states_valid)
                trajs_test.append(states_test)
                trajs_robust.append(states_robust)

                sup_iters_rewards[j] = r_valid
                lnr_iters_rewards[j] = r_test
                rob_iters_rewards[j] = r_robust

                freqs[j] = freq

                if j == 0:
                    utils.plot([np.array([lnr_score]),
                                np.array([rob_score])],
                               ['Learner', 'Robust Learner'],
                               opt,
                               "scores/DecisionScores" + str(i),
                               colors=['blue', 'green'])
                    utils.plot([np.array([mags])], ['Robust Learner'],
                               opt,
                               "mags/RecoveryMagnitudes" + str(i),
                               colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            train_err[index, :] = eval_ocs(ocs, trajs_train)
            valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            test_err[index, :] = eval_ocs(ocs, trajs_test)
            robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            sup_reward[index] = np.mean(sup_iters_rewards)
            lnr_reward[index] = np.mean(lnr_iters_rewards)
            rob_reward[index] = np.mean(rob_iters_rewards)

            correction_freq[index] = np.mean(freqs)

            if index == (opt.misc.num_evaluations - 1):
                bar_errs = make_bar_graphs(ocs, trajs_train, trajs_valid,
                                           trajs_test, opt)

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq,
        "bar_errs": bar_errs
    }
Ejemplo n.º 7
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    opt.samples = 5

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    correction_freq = np.zeros(opt.iters)

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)

        lnr.add_data(states, int_actions)
        oc.fit(lnr.X)

        lnr.train()

        X_valid = []
        X_test = []
        X_robust = []
        sup_iter_rewards = np.zeros(opt.samples)
        lnr_iter_rewards = np.zeros(opt.samples)
        rob_iter_rewards = np.zeros(opt.samples)
        freqs = np.zeros(opt.samples)

        for j in range(opt.samples):
            states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                opt.env, opt.sup, opt.t, False)
            states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj(
                opt.env, lnr, oc, opt.t, False)
            states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj(
                opt.env, lnr, oc, opt.t, False)

            X_valid += states_valid
            X_test += states_test
            X_robust += states_robust

            sup_iter_rewards[j] = r_valid
            lnr_iter_rewards[j] = r_test
            rob_iter_rewards[j] = r_robust

            freqs[j] = freq

            if j == 0:
                utils.plot([np.array([lnr_score])], ['Learner'],
                           opt,
                           "DecisionScore" + str(i),
                           colors=['blue'])
                utils.plot([np.array([rob_score])], ['Robust Learner'],
                           opt,
                           "RobustDecisionScore" + str(i),
                           colors=['green'])

        train_err[i] = eval_oc(oc, lnr.X)
        valid_err[i] = eval_oc(oc, X_valid)
        test_err[i] = eval_oc(oc, X_test)
        robust_err[i] = eval_oc(oc, X_robust)

        sup_reward[i] = np.mean(sup_iter_rewards)
        lnr_reward[i] = np.mean(lnr_iter_rewards)
        rob_reward[i] = np.mean(rob_iter_rewards)

        correction_freq[i] = np.mean(freqs)

        print "One class train error: " + str(train_err[i])
        print "One class valid error: " + str(valid_err[i])

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }
Ejemplo n.º 8
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)

        lnr.add_data(states, int_actions)

    oc.fit(lnr.X)
    preds = oc.predict(lnr.X)
    train_err = len(preds[preds == -1]) / float(len(preds))
    print "\nTraining error: " + str(train_err)

    lnr.train()

    sup_rewards = np.zeros((20))
    lnr_rewards = np.zeros((20))

    X_valid = []
    X_test = []
    for i in range(20):
        states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        states_test, int_actions_test, _, r_test = statistics.collect_traj(
            opt.env, lnr, opt.t, False)

        sup_rewards[i] = r_valid
        lnr_rewards[i] = r_test

        X_valid += states_valid
        X_test += states_test

    valid_preds = oc.predict(X_valid)
    valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds))
    print "Validation erorr: " + str(valid_err)

    test_preds = oc.predict(X_test)
    test_err = len(test_preds[test_preds == -1]) / float(len(test_preds))
    print "Test erorr: " + str(test_err)

    print "\n\n"

    print "Average sup reward: " + str(np.mean(sup_rewards)) + " +/- " + str(
        scipy.stats.sem(sup_rewards))
    print "Average lnr reward: " + str(np.mean(lnr_rewards)) + " +/- " + str(
        scipy.stats.sem(lnr_rewards))

    print "\n\n"

    def dec(u):
        x = opt.env.get_x()
        s, _, _, _ = opt.env.step(u)
        opt.env.set_x(x)
        return oc.decision_function([s])[0, 0]

    rewards = np.zeros((20))
    rec_counts = np.zeros((20))
    X_robust = []
    for i in range(20):

        s = opt.env.reset()
        states = [s]

        for t in range(opt.t):
            score = oc.decision_function([s])[0, 0]
            # print "Decision score: " + str(score)
            if score < .1:
                alpha = .1
                a = alpha * utils.finite_diff1(
                    np.zeros(opt.env.action_space.shape), dec)
                # print "Recovering: " + str(a)
                rec_counts[i] += 1.0
                s, r, done, _ = opt.env.step(a)
            else:
                a = lnr.intended_action(s)
                s, r, done, _ = opt.env.step(a)

            rewards[i] += r
            states.append(s)

            # if done == True:
            #     break

        X_robust += states

    robust_preds = oc.predict(X_robust)
    robust_err = len(robust_preds[robust_preds == -1]) / float(
        len(robust_preds))
    print "Robust erorr: " + str(robust_err)

    rec_freq = np.mean(rec_counts / float(opt.t))
    print "Recovery frequency: " + str(rec_freq)

    print "Robust rewards: " + str(np.mean(rewards)) + " +/- " + str(
        scipy.stats.sem(rewards))
Ejemplo n.º 9
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 2

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    # trajs_train = []
    # for i in range(opt.iters):
    #     print "\nIteration: " + str(i)
    #     states, int_actions, taken_actions, r = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
    #     trajs_train.append(states)
    #     lnr.add_data(states, int_actions)

    #     if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
    #         print "\tEvaluating..."
    #         lnr.train()
    #         fit_all(ocs, trajs_train)
    #         trajs_valid = []
    #         trajs_test = []
    #         trajs_robust = []

    #         sup_iters_rewards = np.zeros(opt.samples)
    #         lnr_iters_rewards = np.zeros(opt.samples)
    #         rob_iters_rewards = np.zeros(opt.samples)
    #         freqs = np.zeros(opt.samples)

    #         for j in range(opt.samples):
    #             states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
    #             states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(opt.env, lnr, ocs, opt.t, False, False)
    #             states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

    #             trajs_valid.append(states_valid)
    #             trajs_test.append(states_test)
    #             trajs_robust.append(states_robust)

    #             sup_iters_rewards[j] = r_valid
    #             lnr_iters_rewards[j] = r_test
    #             rob_iters_rewards[j] = r_robust

    #             freqs[j] = freq

    #             if j == 0:
    #                 utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
    #                 utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

    #         index = i / (opt.iters / opt.misc.num_evaluations)
    #         train_err[index, :] = eval_ocs(ocs, trajs_train)
    #         valid_err[index, :] = eval_ocs(ocs, trajs_valid)
    #         test_err[index, :] = eval_ocs(ocs, trajs_test)
    #         robust_err[index, :] = eval_ocs(ocs, trajs_robust)

    #         sup_reward[index] = np.mean(sup_iters_rewards)
    #         lnr_reward[index] = np.mean(lnr_iters_rewards)
    #         rob_reward[index] = np.mean(rob_iters_rewards)

    #         correction_freq[index] = np.mean(freqs)

    #         if index == (opt.misc.num_evaluations - 1):
    #             bar_errs = make_bar_graphs(ocs, trajs_train, trajs_valid, trajs_test, opt)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))

    # print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    trajs = pickle.load(open('data/trajs_train.pkl', 'r'))
    print "Done loading data"
    trajs_train, trajs_test = trajs[:-50], trajs[-50:]
    lnr.train()
    fit_all(ocs, trajs_train)
    print eval_ocs(ocs, trajs_train)
    print eval_ocs(ocs, trajs_test)

    s = opt.env.reset()
    env = opt.env

    for k in range(10):
        score = ocs[k].decision_function([s])[0, 0]
        print "Score: " + str(score)
        s, a, a, a = env.step(lnr.intended_action(s))
    score = ocs[k + 1].decision_function([s])[0, 0]
    print "Score: " + str(score)

    IPython.embed()

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq,
        "bar_errs": bar_errs
    }
Ejemplo n.º 10
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    sup_rewards = np.zeros((1, opt.iters))
    lnr_rewards = np.zeros((opt.samples, opt.iters))

    sup_perf = np.zeros((1, opt.iters))
    lnr_perf = np.zeros((opt.samples, opt.iters))

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        sup_rewards[0, i] = r
        sup_perf[0, i] = opt.env.metric()

        lnr.add_data(states, int_actions)
        # lnr.train()

        # print "\t" + str(lnr.acc())
        # for j in range(opt.samples):
        #     _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t)
        #     lnr_rewards[j, i] = r
        #     lnr_perf[j, i] = opt.env.metric()

    oc.fit(lnr.X)
    preds = oc.predict(lnr.X)
    train_err = len(preds[preds == -1]) / float(len(preds))
    print "Training error: " + str(train_err)

    X_valid = []
    for i in range(20):
        states, int_actions, _, _ = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        X_valid += states

    valid_preds = oc.predict(X_valid)
    valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds))
    print "Validation erorr: " + str(valid_err)

    lnr.train()
    X_test = []
    for i in range(20):
        states, int_actions, _, _ = statistics.collect_traj(
            opt.env, lnr, opt.t)
        X_test += states

    test_preds = oc.predict(X_test)
    test_err = len(test_preds[test_preds == -1]) / float(len(test_preds))

    print "Test erorr: " + str(test_err)

    s = opt.env.reset()
    reward = 0.0
    x = opt.env.get_x()

    def dec(u):
        x = opt.env.get_x()
        s, _, _, _ = opt.env.step(u)
        opt.env.set_x(x)
        return oc.decision_function([s])[0, 0]

    states_visited = []
    for t in range(opt.t):
        opt.env.render()
        score = oc.decision_function([s])
        print "\tDecision score: " + str(score)

        # if score < .2 and False:
        #     alpha = 1.0
        #     a = alpha * utils.finite_diff1(np.zeros(opt.env.action_space.shape[0]), dec)
        #     print "\t\tRecovering: " + str(a)
        #     s, r, done, _ = opt.env.step(a)
        #     x = opt.env.get_x()
        # else:
        a = lnr.intended_action(s)
        s, r, done, _ = opt.env.step(a)
        x = opt.env.get_x()

        states_visited.append(s)

        if done == True:
            break

    preds = oc.predict(states_visited)
    err = len(preds[preds == -1]) / float(len(preds))
    print "Error: " + str(err)

    print "\nDone after " + str(t + 1) + " steps"

    # print "Average success: " + str(sup_rewards)
    # print "Learner success: \n" + str(lnr_rewards)

    # pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv')
    # pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv')
    # pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv')
    # pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv')

    # plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward')
    # plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')

    IPython.embed()
Ejemplo n.º 11
0
def run_trial(opt):
    # oc = TrajSV(nu = .01, gamma = .1)
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.1)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    opt.samples = 10

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    correction_freq = np.zeros(opt.iters)

    trajs = []

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        trajs.append(states)

        lnr.add_data(states, int_actions)
        oc.fit(lnr.X)
        # oc.fit(trajs)

        lnr.train()

        X_valid = []
        X_test = []
        X_robust = []
        sup_iter_rewards = np.zeros(opt.samples)
        lnr_iter_rewards = np.zeros(opt.samples)
        rob_iter_rewards = np.zeros(opt.samples)
        freqs = np.zeros(opt.samples)

        for j in range(opt.samples):
            states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                opt.env, opt.sup, opt.t, False)
            # states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj(opt.env, lnr, oc, opt.t, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj(opt.env, lnr, oc, opt.t, False)

            X_valid += states_valid
            # X_test += states_test
            # X_robust += states_robust

            sup_iter_rewards[j] = r_valid
            # lnr_iter_rewards[j] = r_test
            # rob_iter_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score])], ['Learner'], opt, "DecisionScore" + str(i), colors=['blue'])
            #     utils.plot([np.array([rob_score])], ['Robust Learner'], opt, "RobustDecisionScore" + str(i), colors=['green'])

        train_err[i] = eval_oc(oc, lnr.X)
        valid_err[i] = eval_oc(oc, X_valid)
        # test_err[i] = eval_oc(oc, X_test)
        # robust_err[i] = eval_oc(oc, X_robust)

        sup_reward[i] = np.mean(sup_iter_rewards)
        lnr_reward[i] = np.mean(lnr_iter_rewards)
        # rob_reward[i] = np.mean(rob_iter_rewards)

        correction_freq[i] = np.mean(freqs)

        print "One class train error: " + str(train_err[i])
        print "One class valid error: " + str(valid_err[i])

        n = len(trajs)
        flags = np.zeros(n)
        for k, traj in enumerate(trajs):
            for state in traj:
                pred = oc.predict([state])[0]
                if pred == -1:
                    flags[k] = 1.0
                    break
        print "Fraction of partial trajectories: " + str(np.mean(flags))

        counts = np.zeros(opt.t)
        for t in range(opt.t):
            for traj in trajs:
                state = traj[t]
                pred = oc.predict([state])[0]
                if pred == -1:
                    counts[t] += 1.0
        counts = counts / float(n)
        print "Max time fraction: " + str(np.amax(counts))
        print "Time fractions:\n" + str(counts)
        print "\n"

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }
Ejemplo n.º 12
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
                opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            lnr.train()
            print "\t\tFitting oc svms..."
            fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
            # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
            # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

            # trajs_valid.append(states_valid)
            # trajs_test.append(states_test)
            # trajs_robust.append(states_robust)

            # sup_iters_rewards[j] = r_valid
            # lnr_iters_rewards[j] = r_test
            # rob_iters_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
            #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))

    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # print "Done loading data."

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    fit_all(ocs, trajs_train)
    print "Done fitting"
    opt.misc.samples = 300

    print "\n\nRecovery\n\n"

    rec_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0
    rec_failed = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_robust_traj_multiple_rejection(
                opt.env, lnr, ocs, opt.t, opt, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            rec_failures += 1
            print "\t" + str(j) + " failed..."

        if info['rec_failed'] > -1:
            rec_failed += 1
            print "\t rec failed"

    rec_results = {
        "failures": rec_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
        "rec_failed": rec_failed,
    }
    print "fraction of failures: " + str(
        rec_failures / float(opt.misc.samples))

    print "\n\nLearner\n\n"

    lnr_failures = 0
    false_negatives = 0
    false_positives = 0
    true_positives = 0
    true_negatives = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            results = statistics.collect_score_traj_multiple_rejection(
                opt.env, lnr, ocs, opt.t, False, False)
            reject = results[-1]
            failed = results[-2]
            info = results[-3]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."

        tup = check_predictions(info)
        false_negatives += tup[0]
        false_positives += tup[1]
        true_positives += tup[2]
        true_negatives += tup[3]

        if failed:
            lnr_failures += 1
            print "\t" + str(j) + " failed..."
    print "fraction of failures: " + str(
        lnr_failures / float(opt.misc.samples))

    lnr_results = {
        "failures": lnr_failures,
        "false_negatives": false_negatives,
        "false_positives": false_positives,
        "true_positives": true_positives,
        "true_negatives": true_negatives,
    }

    print "\n\n\nrec_results"
    print rec_results

    print "lnr_results"
    print lnr_results

    print "\n\n\n"
    # print "\n\nSupervisor\n\n"

    # sup_failures = 0
    # for j in range(num_samples):
    #     print "Iteration: " + str(j)
    #     reject = True
    #     while reject:
    #         results = statistics.collect_score_traj_multiple_rejection(opt.env, opt.sup, ocs, opt.t, False, False)
    #         reject = results[-1]
    #         failed = results[-2]
    #         if reject:
    #             print "\tRejecting " + str(j) + " and restarting..."
    #     if failed:
    #         sup_failures += 1
    #         print "\t" + str(j) + " failed..."
    # print "fraction of failures: " + str(sup_failures / float(num_samples))

    IPython.embed()

    return {
        "rec_results": rec_results,
        "lnr_results": lnr_results,
    }
def run_trial(opt):
    ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    actions_train = []
    for i in range(opt.iters):

        print "Iteration: " + str(i)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)


        if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            # for j in range(opt.samples):
                # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
                # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
                # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

                # trajs_valid.append(states_valid)
                # trajs_test.append(states_test)
                # trajs_robust.append(states_robust)

                # sup_iters_rewards[j] = r_valid
                # lnr_iters_rewards[j] = r_test
                # rob_iters_rewards[j] = r_robust

                # freqs[j] = freq

                # if j == 0:
                #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
                #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])


            index = i / (opt.iters / opt.misc.num_evaluations)
            # train_err[index, :] = eval_ocs(ocs, trajs_train)
            # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            # test_err[index, :] = eval_ocs(ocs, trajs_test)
            # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            # sup_reward[index] = np.mean(sup_iters_rewards)
            # lnr_reward[index] = np.mean(lnr_iters_rewards)
            # rob_reward[index] = np.mean(rob_iters_rewards)

            # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))


    # print "Loading data..."
    # lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    # lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    # lnr.X = lnr.X
    # lnr.y = lnr.y
    # trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    # actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    # print "Done loading data."


    trajs = trajs_train

    fit_all(ocs, trajs)


    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    # trajs_train = trajs[:-200]
    # trajs_test = trajs[-200:]
    # fit_all(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_test)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t+1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0])
            action_norm = np.linalg.norm(action)
            
            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    max_rec = 1000
    opt.env.reset()
    init_state = opt.env.get_pos_vel()

    print "\n\nRandom Controls\n\n"

    rand_scores = np.zeros((opt.misc.samples, max_rec + 1))
    rand_cutoffs = np.zeros((opt.misc.samples, max_rec + 1))
    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i) + ""
        triggered = False
        k = 0
        while not triggered:
            print "\t\tNot yet triggered"
            results = rec_statistics.collect_rec_random(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
            triggered = results[-3]['triggered']
            if k >= 20:
                print "Had to pick new initial state"
                opt.env.reset()
                init_state = opt.env.get_pos_vel()
                k = 0
            else:
                k += 1
        rand_scores[i, :] = results[-3]['rec_scores']
        rand_cutoffs[i, :] = results[-3]['rec_cutoffs']


    print "\n\nApprox Grad Controls\n\n"

    approx_grad_scores = np.zeros((opt.misc.samples, max_rec + 1))
    approx_grad_cutoffs = np.zeros((opt.misc.samples, max_rec + 1))
    for i in range(opt.misc.samples):
        print "Eval Iteration: " + str(i) + ""
        triggered = False
        while not triggered:
            print "\t\tNot yet triggered"
            results = rec_statistics.collect_rec_approx_grad(opt.env, lnr, ocs, opt.t, opt, max_KLs, visualize=True, early_stop=False, init_state=init_state, max_rec=max_rec)
            triggered = results[-3]['triggered']
        approx_grad_scores[i, :] = results[-3]['rec_scores']
        approx_grad_cutoffs[i, :] = results[-3]['rec_cutoffs']


    return {
        'rand_scores': rand_scores,
        'rand_cutoffs': rand_cutoffs,
        'approx_grad_scores': approx_grad_scores,
        'approx_grad_cutoffs': approx_grad_cutoffs
    }
Ejemplo n.º 14
0
def run_trial(opt):
    ocs = [ svm.OneClassSVM(kernel='rbf', gamma = .05, nu = .05) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 100

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
        trajs_train.append(states)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            lnr.train()
            fit_all(ocs, trajs_train)
            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            for j in range(opt.samples):
                print "\t\tSample: " + str(j) + " rolling out..."
                states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
                states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(opt.env, lnr, ocs, opt.t, False, False)
                states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, False, False)
                print "\t\tDone rolling out"

                trajs_valid.append(states_valid)
                trajs_test.append(states_test)
                trajs_robust.append(states_robust)

                sup_iters_rewards[j] = r_valid
                lnr_iters_rewards[j] = r_test
                rob_iters_rewards[j] = r_robust

                freqs[j] = freq

            index = i / (opt.iters / opt.misc.num_evaluations)
            train_err[index, :] = eval_ocs(ocs, trajs_train)
            valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            test_err[index, :] = eval_ocs(ocs, trajs_test)
            robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            sup_reward[index] = np.mean(sup_iters_rewards)
            lnr_reward[index] = np.mean(lnr_iters_rewards)
            rob_reward[index] = np.mean(rob_iters_rewards)

            correction_freq[index] = np.mean(freqs)




    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,

        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,

        "correction_freq": correction_freq
    }
Ejemplo n.º 15
0
def run_trial(opt):
    ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    trajs_train = []
    actions_train = []

    trial_data = {}

    for iteration in range(opt.iters):

        print "Iteration: " + str(iteration)
        violation = True
        while violation:
            states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
            if violation:
                print "\tViolation, restarting"

        trajs_train.append(states)
        actions_train.append(int_actions)
        lnr.add_data(states, int_actions)


        if (iteration + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            key = iteration + 1
            trial_data[key] = []

            print "\tEvaluating..."
            print "\t\tTraining learner..."
            # lnr.train()
            print "\t\tFitting oc svms..."
            # fit_all(ocs, trajs_train)
            print "\t\tDone fitting"

            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)


            trajs = trajs_train
            fit_all(ocs, trajs)


            print "Training net..."
            lnr.train()
            print "Fitting svms..."
            print "Done fitting"

            Ls = np.zeros((len(trajs_train), opt.t))
            KLs = np.zeros((len(trajs_train), opt.t))
            state_diffs = np.zeros((len(trajs_train), opt.t))
            func_diffs = np.zeros((len(trajs_train), opt.t))
            action_norms = np.zeros((len(trajs_train), opt.t))
            actions = np.zeros((len(trajs_train), opt.t, opt.env.action_space.shape[0]))

            for i, (traj_states, traj_actions) in enumerate(zip(trajs_train, actions_train)):
                zipped = zip(traj_states, traj_actions)
                for t, (state, action) in enumerate(zipped[:-1]):
                    state_next, action_next = zipped[t+1]
                    state_diff = np.linalg.norm(state_next - state)
                    func_diff = np.abs(ocs[t].decision_function([state])[0,0] - ocs[t].decision_function([state_next])[0,0])
                    action_norm = np.linalg.norm(action)
                    
                    Ls[i, t] = state_diff / action_norm
                    KLs[i, t] = func_diff / action_norm
                    state_diffs[i, t] = state_diff
                    func_diffs[i, t] = func_diff
                    action_norms[i, t] = action_norm
                    actions[i, t, :] = action

            max_Ls = np.max(Ls, axis=0)
            max_KLs = np.max(KLs, axis=0)

            max_rec = 500

            for k in range(60):
                reject = True
                while reject:
                    print "Determing whether to reject initial state..."
                    s = opt.env.reset()
                    reject = ocs[0].predict([s])[0] == -1
                init_state = opt.env.get_pos_vel()

                print "\n\nVanilla Learner\n\n"

                van_completed = 0
                van_failed = 0
                van_failed_in_support = 0

                for i in range(opt.misc.samples):
                    print "Eval Iteration: " + str(i)
                    results = rec_statistics.collect_traj(opt.env, lnr, ocs, opt.t, visualize=False, early_stop=False, init_state = init_state)
                    van_completed += int(results[-3]['completed'])
                    van_failed += int(results[-3]['failed'])
                    van_failed_in_support += int(results[-3]['failed_in_support'])

                van_completed = van_completed / float(opt.misc.samples)
                van_failed = van_failed / float(opt.misc.samples)
                van_failed_in_support = van_failed_in_support / float(opt.misc.samples)


                print "\n\nRand Control Recovery Strategy\n\n"

                rand_completed = 0
                rand_failed = 0
                rand_failed_in_support = 0

                for i in range(opt.misc.samples):
                    print "Eval Iteration: " + str(i)
                    results = rec_statistics.collect_rec(rec_statistics.random_sample_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec)
                    rand_completed += int(results[-3]['completed'])
                    rand_failed += int(results[-3]['failed'])
                    rand_failed_in_support += int(results[-3]['failed_in_support'])

                rand_completed = rand_completed / float(opt.misc.samples)
                rand_failed = rand_failed / float(opt.misc.samples)
                rand_failed_in_support = rand_failed_in_support / float(opt.misc.samples)


                print "\n\nEarly Stopping Strategy\n\n"
                es_completed = 0
                es_failed = 0
                es_failed_in_support = 0

                for i in range(opt.misc.samples):
                    print "Eval Iteration: " + str(i)
                    results = rec_statistics.collect_rec(rec_statistics.no_rec_loop, opt.env, opt.sim, lnr, ocs, opt.t, opt, max_KLs, visualize=False, early_stop=False, init_state=init_state, max_rec=max_rec)
                    es_completed += int(results[-3]['completed'])
                    es_failed += int(results[-3]['failed'])
                    es_failed_in_support += int(results[-3]['failed_in_support'])

                es_completed = es_completed / float(opt.misc.samples)
                es_failed = es_failed / float(opt.misc.samples)
                es_failed_in_support = es_failed_in_support / float(opt.misc.samples)


                results = {
                    'van_tallies': [van_completed, van_failed, van_failed_in_support],
                    'rand_tallies': [rand_completed, rand_failed, rand_failed_in_support],
                    'es_tallies': [es_completed, es_failed, es_failed_in_support],
                }

                trial_data[key].append(results)
                print "Saving to: " + opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl'
                pickle.dump(trial_data, open(opt.data_dir + 'multiple_trials/trial_data' + str(opt.t_value) + '.pkl', 'w'))


    return trial_data
Ejemplo n.º 16
0
#net.load_weights('meta/weights.txt', 'meta/stats.txt')
suffix = '_' + utils.stringify(args['weights']) + '_' + str(args['ufact'])
weights_path = 'meta/' + 'test' + '/' + envname + '_' + str(
    exp_id) + '_weights' + suffix + '.txt'
stats_path = 'meta/' + 'test' + '/' + envname + '_' + str(
    exp_id) + '_stats' + suffix + '.txt'
net.load_weights(weights_path, stats_path)

net_sup = Supervisor(net)

opt = Options
opt.env = env
opt.sup = net_sup
opt.t = 100

est = knet.Network([64, 64], learning_rate=.01, epochs=100)
lnr = learner.Learner(est)

oc = OneClassSVM(kernel='rbf', gamma=.01, nu=.01)

ITERATIONS = 500
print "\n\nSup rollouts\n\n"
sup_failures = 0
initial_states = []
for i in range(ITERATIONS):
    print "iteration: " + str(i)
    violation = True
    while violation:
        states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(
            opt.env, opt.sup, opt.t, False, False)
        if violation:
Ejemplo n.º 17
0
def run_trial(opt):
    ocs = [OCSVM(kernel='rbf', gamma=.1, nu=.03) for t in range(opt.t)]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    # trajs_train = []
    # actions_train = []
    # for i in range(opt.iters):

    #     print "Iteration: " + str(i)
    #     violation = True
    #     while violation:
    #         states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
    #         if violation:
    #             print "\tViolation, restarting"

    #     trajs_train.append(states)
    #     actions_train.append(int_actions)
    #     lnr.add_data(states, int_actions)

    #     if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
    #         # print "\tEvaluating..."
    #         # print "\t\tTraining learner..."
    #         # # lnr.train()
    #         # print "\t\tFitting oc svms..."
    #         # # fit_all(ocs, trajs_train)
    #         # print "\t\tDone fitting"

    #         trajs_valid = []
    #         trajs_test = []
    #         trajs_robust = []

    #         sup_iters_rewards = np.zeros(opt.samples)
    #         lnr_iters_rewards = np.zeros(opt.samples)
    #         rob_iters_rewards = np.zeros(opt.samples)
    #         freqs = np.zeros(opt.samples)

    #         # for j in range(opt.samples):
    #             # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
    #             # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
    #             # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

    #             # trajs_valid.append(states_valid)
    #             # trajs_test.append(states_test)
    #             # trajs_robust.append(states_robust)

    #             # sup_iters_rewards[j] = r_valid
    #             # lnr_iters_rewards[j] = r_test
    #             # rob_iters_rewards[j] = r_robust

    #             # freqs[j] = freq

    #             # if j == 0:
    #             #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
    #             #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

    #         index = i / (opt.iters / opt.misc.num_evaluations)
    #         # train_err[index, :] = eval_ocs(ocs, trajs_train)
    #         # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
    #         # test_err[index, :] = eval_ocs(ocs, trajs_test)
    #         # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

    #         # sup_reward[index] = np.mean(sup_iters_rewards)
    #         # lnr_reward[index] = np.mean(lnr_iters_rewards)
    #         # rob_reward[index] = np.mean(rob_iters_rewards)

    #         # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))

    print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    trajs = trajs_train

    print "Fitting svms..."
    trajs_train = trajs[:-200]
    trajs_test = trajs[-200:]
    fit_all(ocs, trajs_train)
    print eval_ocs(ocs, trajs_train)
    print eval_ocs(ocs, trajs_test)
    fit_all(ocs, trajs)
    print "Done fitting"
    print "Training net..."
    lnr.train()

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros(
        (len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states,
            traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    IPython.embed()

    total_failures = 0
    total_failures_in_support = 0
    samples_failed_in_support = 0

    for j in range(opt.misc.samples):
        print "Iteration: " + str(j)
        reject = True
        while reject:
            #results = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
            results = statistics.collect_robust_traj_multiple_rejection_adaptive(
                opt.env, lnr, ocs, opt.t, opt, max_KLs, False, False)
            reject = results[-1]
            failed = results[-2]
            if reject:
                print "\tRejecting " + str(j) + " and restarting..."
        info = results[-3]

        total_failures += info['count_failures']
        total_failures_in_support += info['count_fail_in_support']

        if info['count_fail_in_support'] > 0:
            print "Failed in support"
            samples_failed_in_support += 1

    if total_failures > 0:
        print "Fails in support: " + str(
            float(total_failures_in_support) / total_failures)
    print total_failures_in_support
    print total_failures

    print str(samples_failed_in_support) + " failed in support"
    print "Fraction failed in support: " + str(
        float(samples_failed_in_support) / opt.misc.samples)
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=opt.gamma, nu=opt.nu)
        for t in range(opt.t)
    ]
    # ocs = [ OCSVM(kernel='rbf', gamma = opt.gamma, nu = opt.nu) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 1

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    # trajs_train = []
    # actions_train = []
    # for i in range(opt.iters):

    #     print "Iteration: " + str(i)
    #     violation = True
    #     while violation:
    #         states, int_actions, taken_actions, r, violation = statistics.collect_traj_rejection(opt.env, opt.sup, opt.t, False, False)
    #         if violation:
    #             print "\tViolation, restarting"

    #     trajs_train.append(states)
    #     actions_train.append(int_actions)
    #     lnr.add_data(states, int_actions)

    #     if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
    #         print "\tEvaluating..."
    #         print "\t\tTraining learner..."
    #         # lnr.train()
    #         print "\t\tFitting oc svms..."
    #         # fit_all(ocs, trajs_train)
    #         print "\t\tDone fitting"

    #         trajs_valid = []
    #         trajs_test = []
    #         trajs_robust = []

    #         sup_iters_rewards = np.zeros(opt.samples)
    #         lnr_iters_rewards = np.zeros(opt.samples)
    #         rob_iters_rewards = np.zeros(opt.samples)
    #         freqs = np.zeros(opt.samples)

    #         # for j in range(opt.samples):
    #             # states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
    #             # states_test, int_actions_test, _, r_test, _, lnr_score, violation = statistics.collect_score_traj_multiple_rejection(opt.env, lnr, ocs, opt.t, False, False)
    #             # states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, opt, False, False)

    #             # trajs_valid.append(states_valid)
    #             # trajs_test.append(states_test)
    #             # trajs_robust.append(states_robust)

    #             # sup_iters_rewards[j] = r_valid
    #             # lnr_iters_rewards[j] = r_test
    #             # rob_iters_rewards[j] = r_robust

    #             # freqs[j] = freq

    #             # if j == 0:
    #             #     utils.plot([np.array([lnr_score]), np.array([rob_score])], ['Learner', 'Robust Learner'], opt, "scores/DecisionScores" + str(i), colors=['blue', 'green'])
    #             #     utils.plot([np.array([mags])], ['Robust Learner'], opt, "mags/RecoveryMagnitudes" + str(i), colors=['green'])

    #         index = i / (opt.iters / opt.misc.num_evaluations)
    #         # train_err[index, :] = eval_ocs(ocs, trajs_train)
    #         # valid_err[index, :] = eval_ocs(ocs, trajs_valid)
    #         # test_err[index, :] = eval_ocs(ocs, trajs_test)
    #         # robust_err[index, :] = eval_ocs(ocs, trajs_robust)

    #         # sup_reward[index] = np.mean(sup_iters_rewards)
    #         # lnr_reward[index] = np.mean(lnr_iters_rewards)
    #         # rob_reward[index] = np.mean(rob_iters_rewards)

    #         # correction_freq[index] = np.mean(freqs)

    # pickle.dump(lnr.X, open('data/lnrX.pkl', 'w'))
    # pickle.dump(lnr.y, open('data/lnry.pkl', 'w'))
    # pickle.dump(trajs_train, open('data/trajs_train.pkl', 'w'))
    # pickle.dump(actions_train, open('data/actions_train.pkl', 'w'))

    print "Loading data..."
    lnr.X = pickle.load(open('data/lnrX.pkl', 'r'))
    lnr.y = pickle.load(open('data/lnry.pkl', 'r'))
    lnr.X = lnr.X
    lnr.y = lnr.y
    trajs_train = pickle.load(open('data/trajs_train.pkl', 'r'))
    actions_train = pickle.load(open('data/actions_train.pkl', 'r'))
    print "Done loading data."

    trajs = trajs_train

    fit_all(ocs, trajs)

    print "Training net..."
    lnr.train()
    print "Fitting svms..."
    # trajs_train = trajs[:-200]
    # trajs_test = trajs[-200:]
    # fit_all(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_train)
    # print eval_ocs(ocs, trajs_test)
    print "Done fitting"

    Ls = np.zeros((len(trajs_train), opt.t))
    KLs = np.zeros((len(trajs_train), opt.t))
    state_diffs = np.zeros((len(trajs_train), opt.t))
    func_diffs = np.zeros((len(trajs_train), opt.t))
    action_norms = np.zeros((len(trajs_train), opt.t))
    actions = np.zeros(
        (len(trajs_train), opt.t, opt.env.action_space.shape[0]))

    for i, (traj_states,
            traj_actions) in enumerate(zip(trajs_train, actions_train)):
        zipped = zip(traj_states, traj_actions)
        for t, (state, action) in enumerate(zipped[:-1]):
            state_next, action_next = zipped[t + 1]
            state_diff = np.linalg.norm(state_next - state)
            func_diff = np.abs(ocs[t].decision_function([state])[0, 0] -
                               ocs[t].decision_function([state_next])[0, 0])
            action_norm = np.linalg.norm(action)

            Ls[i, t] = state_diff / action_norm
            KLs[i, t] = func_diff / action_norm
            state_diffs[i, t] = state_diff
            func_diffs[i, t] = func_diff
            action_norms[i, t] = action_norm
            actions[i, t, :] = action

    max_Ls = np.amax(Ls, axis=0)
    max_KLs = np.amax(KLs, axis=0)

    max_rec = 500

    trials_data = []
    for i in range(100):
        reject = True
        while reject:
            print "Determing whether to reject initial state..."
            s = opt.env.reset()
            reject = ocs[0].predict([s])[0] == -1
        init_state = opt.env.get_pos_vel()

        print "\n\nVanilla Learner\n\n"

        van_completed = 0
        van_failed = 0
        van_failed_in_support = 0

        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_traj(opt.env,
                                                  lnr,
                                                  ocs,
                                                  opt.t,
                                                  visualize=True,
                                                  early_stop=False,
                                                  init_state=init_state)
            van_completed += int(results[-3]['completed'])
            van_failed += int(results[-3]['failed'])
            van_failed_in_support += int(results[-3]['failed_in_support'])

        print "\n\nRand Control Recovery Strategy\n\n"

        rand_completed = 0
        rand_failed = 0
        rand_failed_in_support = 0

        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_rec(
                rec_statistics.random_sample_loop,
                opt.env,
                opt.sim,
                lnr,
                ocs,
                opt.t,
                opt,
                max_KLs,
                visualize=True,
                early_stop=False,
                init_state=init_state,
                max_rec=max_rec)
            rand_completed += int(results[-3]['completed'])
            rand_failed += int(results[-3]['failed'])
            rand_failed_in_support += int(results[-3]['failed_in_support'])

        print "\n\nApprox Grad Recovery Strategy\n\n"
        ag_completed = 0
        ag_failed = 0
        ag_failed_in_support = 0

        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_rec(
                rec_statistics.approx_grad_loop,
                opt.env,
                opt.sim,
                lnr,
                ocs,
                opt.t,
                opt,
                max_KLs,
                visualize=True,
                early_stop=False,
                init_state=init_state,
                max_rec=max_rec)
            ag_completed += int(results[-3]['completed'])
            ag_failed += int(results[-3]['failed'])
            ag_failed_in_support += int(results[-3]['failed_in_support'])

        print "\n\Finite Diff Recovery Strategy\n\n"
        fd_completed = 0
        fd_failed = 0
        fd_failed_in_support = 0

        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_rec(
                rec_statistics.finite_diff_loop,
                opt.env,
                opt.sim,
                lnr,
                ocs,
                opt.t,
                opt,
                max_KLs,
                visualize=True,
                early_stop=False,
                init_state=init_state,
                max_rec=max_rec)
            fd_completed += int(results[-3]['completed'])
            fd_failed += int(results[-3]['failed'])
            fd_failed_in_support += int(results[-3]['failed_in_support'])

        print "\n\nEarly Stopping Strategy\n\n"
        es_completed = 0
        es_failed = 0
        es_failed_in_support = 0

        for i in range(opt.misc.samples):
            print "Eval Iteration: " + str(i)
            results = rec_statistics.collect_rec(rec_statistics.no_rec_loop,
                                                 opt.env,
                                                 opt.sim,
                                                 lnr,
                                                 ocs,
                                                 opt.t,
                                                 opt,
                                                 max_KLs,
                                                 visualize=True,
                                                 early_stop=False,
                                                 init_state=init_state,
                                                 max_rec=max_rec)
            es_completed += int(results[-3]['completed'])
            es_failed += int(results[-3]['failed'])
            es_failed_in_support += int(results[-3]['failed_in_support'])

        results = {
            'van_tallies': [van_completed, van_failed, van_failed_in_support],
            'rand_tallies':
            [rand_completed, rand_failed, rand_failed_in_support],
            'es_tallies': [es_completed, es_failed, es_failed_in_support],
            'ag_tallies': [ag_completed, ag_failed, ag_failed_in_support],
            'fd_tallies': [fd_completed, fd_failed, fd_failed_in_support]
        }

        trials_data.append(results)
        pickle.dump(trials_data,
                    open(opt.data_dir + 'full/trials_data.pkl', 'w'))

    return trials_data
Ejemplo n.º 19
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 100

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)
    correction_freq = np.zeros(opt.iters)

    trajs_train = []

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        lnr.add_data(states, int_actions)
        trajs_train.append(states)

    lnr.train()
    print "\nCollecting validation samples..."
    trajs_valid = []
    trajs_test = []
    for j in range(opt.samples):
        states_valid, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        states_test, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, lnr, opt.t, False, early_stop=False)
        trajs_valid.append(states_valid)
        trajs_test.append(states_test)
    print "Done collecting samples"

    X_train = []
    for traj in trajs_train:
        X_train += traj

    oc.fit(X_train)

    train_errs = np.zeros(opt.t)
    valid_errs = np.zeros(opt.t)
    test_errs = np.zeros(opt.t)
    adver_errs = np.zeros(opt.t)

    for t in range(opt.t):
        X_train = []
        for traj in trajs_train:
            X_train.append(traj[t])

        X_valid = []
        for traj in trajs_valid:
            X_valid.append(traj[t])

        X_test = []
        for traj in trajs_test:
            X_test.append(traj[t])

        X_train = np.array(X_train)
        cov = np.cov(X_train.T)
        mean = np.mean(X_train, axis=0)
        X_adver = np.random.multivariate_normal(mean, cov, opt.samples)

        train_err = eval_oc(oc, X_train)
        valid_err = eval_oc(oc, X_valid)
        test_err = eval_oc(oc, X_test)
        adver_err = eval_oc(oc, X_adver)
        print "Train Error: " + str(train_err)
        print "Valid Error: " + str(valid_err)
        print "Test Error: " + str(test_err)
        print "Adver Error: " + str(adver_err)
        print "Support vectors: " + str(oc.support_vectors_.shape)
        print "\n"

        train_errs[t] = train_err
        valid_errs[t] = valid_err
        test_errs[t] = test_err
        adver_errs[t] = adver_err

    plt.style.use('ggplot')
    #errs = [train_errs, valid_errs, test_errs]
    #labels = ['Training', 'Validation', 'Test']
    errs = [train_errs, valid_errs]
    labels = ['Training', 'Validation']

    width = .2
    index = np.arange(opt.t)

    for i, (err, label) in enumerate(zip(errs, labels)):
        plt.bar(index + i * width, err, width, label=label)
    plt.legend()
    plt.ylim(0, .75)
    plt.savefig('/Users/JonathanLee/Desktop/bar_single.png')
    utils.clear()

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }