Beispiel #1
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.5, nu=.01) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 10

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)
    correction_freq = np.zeros(opt.iters)

    trajs_train = []

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        trajs_train.append(states)

        lnr.add_data(states, int_actions)
        lnr.train()

        trajs_train_array = np.array(trajs_train)
        for t in range(opt.t):
            X = trajs_train_array[:, t, :]
            ocs[t].fit(X)

        if i % 5 == 0:
            trajs_valid = []
            for j in range(opt.samples):
                states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                    opt.env, opt.sup, opt.t, False)
                trajs_valid.append(states_valid)

            train_oc_errs = eval_ocs(ocs, trajs_train, opt)
            valid_oc_errs = eval_ocs(ocs, trajs_valid, opt)

            print "Train errs: " + str(train_oc_errs)
            print "Valid errs: " + str(valid_oc_errs)
            print "Max train err: " + str(np.amax(train_oc_errs))
            print "Max valid err: " + str(np.amax(valid_oc_errs))

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }
Beispiel #2
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    sup_rewards = np.zeros((1, opt.iters))
    lnr_rewards = np.zeros((opt.samples, opt.iters))

    sup_perf = np.zeros((1, opt.iters))
    lnr_perf = np.zeros((opt.samples, opt.iters))

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        sup_rewards[0, i] = r
        sup_perf[0, i] = opt.env.metric()

        lnr.add_data(states, int_actions)
        lnr.train()

        print "\t" + str(lnr.acc())
        for j in range(opt.samples):
            _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t)
            lnr_rewards[j, i] = r
            lnr_perf[j, i] = opt.env.metric()

    print "Average success: " + str(sup_rewards)
    print "Learner success: \n" + str(lnr_rewards)

    pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv')
    pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv')
    pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv')
    pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv')

    plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward')
    plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')
Beispiel #3
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)

        lnr.add_data(states, int_actions)

    oc.fit(lnr.X)
    preds = oc.predict(lnr.X)
    train_err = len(preds[preds == -1]) / float(len(preds))
    print "\nTraining error: " + str(train_err)

    lnr.train()

    sup_rewards = np.zeros((20))
    lnr_rewards = np.zeros((20))

    X_valid = []
    X_test = []
    for i in range(20):
        states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        states_test, int_actions_test, _, r_test = statistics.collect_traj(
            opt.env, lnr, opt.t, False)

        sup_rewards[i] = r_valid
        lnr_rewards[i] = r_test

        X_valid += states_valid
        X_test += states_test

    valid_preds = oc.predict(X_valid)
    valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds))
    print "Validation erorr: " + str(valid_err)

    test_preds = oc.predict(X_test)
    test_err = len(test_preds[test_preds == -1]) / float(len(test_preds))
    print "Test erorr: " + str(test_err)

    print "\n\n"

    print "Average sup reward: " + str(np.mean(sup_rewards)) + " +/- " + str(
        scipy.stats.sem(sup_rewards))
    print "Average lnr reward: " + str(np.mean(lnr_rewards)) + " +/- " + str(
        scipy.stats.sem(lnr_rewards))

    print "\n\n"

    def dec(u):
        x = opt.env.get_x()
        s, _, _, _ = opt.env.step(u)
        opt.env.set_x(x)
        return oc.decision_function([s])[0, 0]

    rewards = np.zeros((20))
    rec_counts = np.zeros((20))
    X_robust = []
    for i in range(20):

        s = opt.env.reset()
        states = [s]

        for t in range(opt.t):
            score = oc.decision_function([s])[0, 0]
            # print "Decision score: " + str(score)
            if score < .1:
                alpha = .1
                a = alpha * utils.finite_diff1(
                    np.zeros(opt.env.action_space.shape), dec)
                # print "Recovering: " + str(a)
                rec_counts[i] += 1.0
                s, r, done, _ = opt.env.step(a)
            else:
                a = lnr.intended_action(s)
                s, r, done, _ = opt.env.step(a)

            rewards[i] += r
            states.append(s)

            # if done == True:
            #     break

        X_robust += states

    robust_preds = oc.predict(X_robust)
    robust_err = len(robust_preds[robust_preds == -1]) / float(
        len(robust_preds))
    print "Robust erorr: " + str(robust_err)

    rec_freq = np.mean(rec_counts / float(opt.t))
    print "Recovery frequency: " + str(rec_freq)

    print "Robust rewards: " + str(np.mean(rewards)) + " +/- " + str(
        scipy.stats.sem(rewards))
Beispiel #4
0
def run_trial(opt):
    ocs = [
        svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05) for t in range(opt.t)
    ]
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 20

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False, False)
        trajs_train.append(states)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters / opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            lnr.train()
            fit_all(ocs, trajs_train)
            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            for j in range(opt.samples):
                states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                    opt.env, opt.sup, opt.t, False, False)
                states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(
                    opt.env, lnr, ocs, opt.t, False, False)
                states_robust, int_actions_robust, _, r_robust, freq, rob_score, mags = stats.collect_robust_traj_multiple(
                    opt.env, opt.sim, lnr, ocs, opt.t, opt, False, False)

                trajs_valid.append(states_valid)
                trajs_test.append(states_test)
                trajs_robust.append(states_robust)

                sup_iters_rewards[j] = r_valid
                lnr_iters_rewards[j] = r_test
                rob_iters_rewards[j] = r_robust

                freqs[j] = freq

                if j == 0:
                    utils.plot([np.array([lnr_score]),
                                np.array([rob_score])],
                               ['Learner', 'Robust Learner'],
                               opt,
                               "scores/DecisionScores" + str(i),
                               colors=['blue', 'green'])
                    utils.plot([np.array([mags])], ['Robust Learner'],
                               opt,
                               "mags/RecoveryMagnitudes" + str(i),
                               colors=['green'])

            index = i / (opt.iters / opt.misc.num_evaluations)
            train_err[index, :] = eval_ocs(ocs, trajs_train)
            valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            test_err[index, :] = eval_ocs(ocs, trajs_test)
            robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            sup_reward[index] = np.mean(sup_iters_rewards)
            lnr_reward[index] = np.mean(lnr_iters_rewards)
            rob_reward[index] = np.mean(rob_iters_rewards)

            correction_freq[index] = np.mean(freqs)

            if index == (opt.misc.num_evaluations - 1):
                bar_errs = make_bar_graphs(ocs, trajs_train, trajs_valid,
                                           trajs_test, opt)

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq,
        "bar_errs": bar_errs
    }
Beispiel #5
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    sup_rewards = np.zeros((1, opt.iters))
    lnr_rewards = np.zeros((opt.samples, opt.iters))

    sup_perf = np.zeros((1, opt.iters))
    lnr_perf = np.zeros((opt.samples, opt.iters))

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        sup_rewards[0, i] = r
        sup_perf[0, i] = opt.env.metric()

        lnr.add_data(states, int_actions)
        # lnr.train()

        # print "\t" + str(lnr.acc())
        # for j in range(opt.samples):
        #     _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t)
        #     lnr_rewards[j, i] = r
        #     lnr_perf[j, i] = opt.env.metric()

    oc.fit(lnr.X)
    preds = oc.predict(lnr.X)
    train_err = len(preds[preds == -1]) / float(len(preds))
    print "Training error: " + str(train_err)

    X_valid = []
    for i in range(20):
        states, int_actions, _, _ = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        X_valid += states

    valid_preds = oc.predict(X_valid)
    valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds))
    print "Validation erorr: " + str(valid_err)

    lnr.train()
    X_test = []
    for i in range(20):
        states, int_actions, _, _ = statistics.collect_traj(
            opt.env, lnr, opt.t)
        X_test += states

    test_preds = oc.predict(X_test)
    test_err = len(test_preds[test_preds == -1]) / float(len(test_preds))

    print "Test erorr: " + str(test_err)

    s = opt.env.reset()
    reward = 0.0
    x = opt.env.get_x()

    def dec(u):
        x = opt.env.get_x()
        s, _, _, _ = opt.env.step(u)
        opt.env.set_x(x)
        return oc.decision_function([s])[0, 0]

    states_visited = []
    for t in range(opt.t):
        opt.env.render()
        score = oc.decision_function([s])
        print "\tDecision score: " + str(score)

        # if score < .2 and False:
        #     alpha = 1.0
        #     a = alpha * utils.finite_diff1(np.zeros(opt.env.action_space.shape[0]), dec)
        #     print "\t\tRecovering: " + str(a)
        #     s, r, done, _ = opt.env.step(a)
        #     x = opt.env.get_x()
        # else:
        a = lnr.intended_action(s)
        s, r, done, _ = opt.env.step(a)
        x = opt.env.get_x()

        states_visited.append(s)

        if done == True:
            break

    preds = oc.predict(states_visited)
    err = len(preds[preds == -1]) / float(len(preds))
    print "Error: " + str(err)

    print "\nDone after " + str(t + 1) + " steps"

    # print "Average success: " + str(sup_rewards)
    # print "Learner success: \n" + str(lnr_rewards)

    # pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv')
    # pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv')
    # pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv')
    # pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv')

    # plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward')
    # plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')

    IPython.embed()
Beispiel #6
0
def run_trial(opt):
    # oc = TrajSV(nu = .01, gamma = .1)
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.1)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    opt.samples = 10

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    correction_freq = np.zeros(opt.iters)

    trajs = []

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        trajs.append(states)

        lnr.add_data(states, int_actions)
        oc.fit(lnr.X)
        # oc.fit(trajs)

        lnr.train()

        X_valid = []
        X_test = []
        X_robust = []
        sup_iter_rewards = np.zeros(opt.samples)
        lnr_iter_rewards = np.zeros(opt.samples)
        rob_iter_rewards = np.zeros(opt.samples)
        freqs = np.zeros(opt.samples)

        for j in range(opt.samples):
            states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                opt.env, opt.sup, opt.t, False)
            # states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj(opt.env, lnr, oc, opt.t, False)
            # states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj(opt.env, lnr, oc, opt.t, False)

            X_valid += states_valid
            # X_test += states_test
            # X_robust += states_robust

            sup_iter_rewards[j] = r_valid
            # lnr_iter_rewards[j] = r_test
            # rob_iter_rewards[j] = r_robust

            # freqs[j] = freq

            # if j == 0:
            #     utils.plot([np.array([lnr_score])], ['Learner'], opt, "DecisionScore" + str(i), colors=['blue'])
            #     utils.plot([np.array([rob_score])], ['Robust Learner'], opt, "RobustDecisionScore" + str(i), colors=['green'])

        train_err[i] = eval_oc(oc, lnr.X)
        valid_err[i] = eval_oc(oc, X_valid)
        # test_err[i] = eval_oc(oc, X_test)
        # robust_err[i] = eval_oc(oc, X_robust)

        sup_reward[i] = np.mean(sup_iter_rewards)
        lnr_reward[i] = np.mean(lnr_iter_rewards)
        # rob_reward[i] = np.mean(rob_iter_rewards)

        correction_freq[i] = np.mean(freqs)

        print "One class train error: " + str(train_err[i])
        print "One class valid error: " + str(valid_err[i])

        n = len(trajs)
        flags = np.zeros(n)
        for k, traj in enumerate(trajs):
            for state in traj:
                pred = oc.predict([state])[0]
                if pred == -1:
                    flags[k] = 1.0
                    break
        print "Fraction of partial trajectories: " + str(np.mean(flags))

        counts = np.zeros(opt.t)
        for t in range(opt.t):
            for traj in trajs:
                state = traj[t]
                pred = oc.predict([state])[0]
                if pred == -1:
                    counts[t] += 1.0
        counts = counts / float(n)
        print "Max time fraction: " + str(np.amax(counts))
        print "Time fractions:\n" + str(counts)
        print "\n"

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }
Beispiel #7
0
def run_trial(opt):
    ocs = [ svm.OneClassSVM(kernel='rbf', gamma = .05, nu = .05) for t in range(opt.t) ]
    est = knet.Network(opt.arch, learning_rate = opt.lr, epochs = opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 100

    sup_reward = np.zeros(opt.misc.num_evaluations)
    lnr_reward = np.zeros(opt.misc.num_evaluations)
    rob_reward = np.zeros(opt.misc.num_evaluations)

    train_err = np.zeros((opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.misc.num_evaluations, opt.t))
    correction_freq = np.zeros(opt.misc.num_evaluations)

    trajs_train = []
    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
        trajs_train.append(states)
        lnr.add_data(states, int_actions)

        if (i + 1) % (opt.iters/opt.misc.num_evaluations) == 0:
            print "\tEvaluating..."
            lnr.train()
            fit_all(ocs, trajs_train)
            trajs_valid = []
            trajs_test = []
            trajs_robust = []

            sup_iters_rewards = np.zeros(opt.samples)
            lnr_iters_rewards = np.zeros(opt.samples)
            rob_iters_rewards = np.zeros(opt.samples)
            freqs = np.zeros(opt.samples)

            for j in range(opt.samples):
                print "\t\tSample: " + str(j) + " rolling out..."
                states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(opt.env, opt.sup, opt.t, False, False)
                states_test, int_actions_test, _, r_test, _, lnr_score = statistics.collect_score_traj_multiple(opt.env, lnr, ocs, opt.t, False, False)
                states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj_multiple(opt.env, lnr, ocs, opt.t, False, False)
                print "\t\tDone rolling out"

                trajs_valid.append(states_valid)
                trajs_test.append(states_test)
                trajs_robust.append(states_robust)

                sup_iters_rewards[j] = r_valid
                lnr_iters_rewards[j] = r_test
                rob_iters_rewards[j] = r_robust

                freqs[j] = freq

            index = i / (opt.iters / opt.misc.num_evaluations)
            train_err[index, :] = eval_ocs(ocs, trajs_train)
            valid_err[index, :] = eval_ocs(ocs, trajs_valid)
            test_err[index, :] = eval_ocs(ocs, trajs_test)
            robust_err[index, :] = eval_ocs(ocs, trajs_robust)

            sup_reward[index] = np.mean(sup_iters_rewards)
            lnr_reward[index] = np.mean(lnr_iters_rewards)
            rob_reward[index] = np.mean(rob_iters_rewards)

            correction_freq[index] = np.mean(freqs)




    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,

        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,

        "correction_freq": correction_freq
    }
Beispiel #8
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    opt.samples = 5

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    correction_freq = np.zeros(opt.iters)

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)

        lnr.add_data(states, int_actions)
        oc.fit(lnr.X)

        lnr.train()

        X_valid = []
        X_test = []
        X_robust = []
        sup_iter_rewards = np.zeros(opt.samples)
        lnr_iter_rewards = np.zeros(opt.samples)
        rob_iter_rewards = np.zeros(opt.samples)
        freqs = np.zeros(opt.samples)

        for j in range(opt.samples):
            states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
                opt.env, opt.sup, opt.t, False)
            states_test, int_actions_test, _, r_test, freq, lnr_score = statistics.collect_score_traj(
                opt.env, lnr, oc, opt.t, False)
            states_robust, int_actions_robust, _, r_robust, freq, rob_score = statistics.collect_robust_traj(
                opt.env, lnr, oc, opt.t, False)

            X_valid += states_valid
            X_test += states_test
            X_robust += states_robust

            sup_iter_rewards[j] = r_valid
            lnr_iter_rewards[j] = r_test
            rob_iter_rewards[j] = r_robust

            freqs[j] = freq

            if j == 0:
                utils.plot([np.array([lnr_score])], ['Learner'],
                           opt,
                           "DecisionScore" + str(i),
                           colors=['blue'])
                utils.plot([np.array([rob_score])], ['Robust Learner'],
                           opt,
                           "RobustDecisionScore" + str(i),
                           colors=['green'])

        train_err[i] = eval_oc(oc, lnr.X)
        valid_err[i] = eval_oc(oc, X_valid)
        test_err[i] = eval_oc(oc, X_test)
        robust_err[i] = eval_oc(oc, X_robust)

        sup_reward[i] = np.mean(sup_iter_rewards)
        lnr_reward[i] = np.mean(lnr_iter_rewards)
        rob_reward[i] = np.mean(rob_iter_rewards)

        correction_freq[i] = np.mean(freqs)

        print "One class train error: " + str(train_err[i])
        print "One class valid error: " + str(valid_err[i])

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }
Beispiel #9
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', gamma=.05, nu=.05)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)
    opt.samples = 100

    sup_reward = np.zeros(opt.iters)
    lnr_reward = np.zeros(opt.iters)
    rob_reward = np.zeros(opt.iters)

    train_err = np.zeros(opt.iters)
    valid_err = np.zeros(opt.iters)
    test_err = np.zeros(opt.iters)
    robust_err = np.zeros(opt.iters)
    correction_freq = np.zeros(opt.iters)

    trajs_train = []

    for i in range(opt.iters):
        print "\nIteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        lnr.add_data(states, int_actions)
        trajs_train.append(states)

    lnr.train()
    print "\nCollecting validation samples..."
    trajs_valid = []
    trajs_test = []
    for j in range(opt.samples):
        states_valid, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        states_test, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, lnr, opt.t, False, early_stop=False)
        trajs_valid.append(states_valid)
        trajs_test.append(states_test)
    print "Done collecting samples"

    X_train = []
    for traj in trajs_train:
        X_train += traj

    oc.fit(X_train)

    train_errs = np.zeros(opt.t)
    valid_errs = np.zeros(opt.t)
    test_errs = np.zeros(opt.t)
    adver_errs = np.zeros(opt.t)

    for t in range(opt.t):
        X_train = []
        for traj in trajs_train:
            X_train.append(traj[t])

        X_valid = []
        for traj in trajs_valid:
            X_valid.append(traj[t])

        X_test = []
        for traj in trajs_test:
            X_test.append(traj[t])

        X_train = np.array(X_train)
        cov = np.cov(X_train.T)
        mean = np.mean(X_train, axis=0)
        X_adver = np.random.multivariate_normal(mean, cov, opt.samples)

        train_err = eval_oc(oc, X_train)
        valid_err = eval_oc(oc, X_valid)
        test_err = eval_oc(oc, X_test)
        adver_err = eval_oc(oc, X_adver)
        print "Train Error: " + str(train_err)
        print "Valid Error: " + str(valid_err)
        print "Test Error: " + str(test_err)
        print "Adver Error: " + str(adver_err)
        print "Support vectors: " + str(oc.support_vectors_.shape)
        print "\n"

        train_errs[t] = train_err
        valid_errs[t] = valid_err
        test_errs[t] = test_err
        adver_errs[t] = adver_err

    plt.style.use('ggplot')
    #errs = [train_errs, valid_errs, test_errs]
    #labels = ['Training', 'Validation', 'Test']
    errs = [train_errs, valid_errs]
    labels = ['Training', 'Validation']

    width = .2
    index = np.arange(opt.t)

    for i, (err, label) in enumerate(zip(errs, labels)):
        plt.bar(index + i * width, err, width, label=label)
    plt.legend()
    plt.ylim(0, .75)
    plt.savefig('/Users/JonathanLee/Desktop/bar_single.png')
    utils.clear()

    return {
        "sup_reward": sup_reward,
        "lnr_reward": lnr_reward,
        "rob_reward": rob_reward,
        "train_err": train_err,
        "valid_err": valid_err,
        "test_err": test_err,
        "robust_err": robust_err,
        "correction_freq": correction_freq
    }