Example #1
0
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=int, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)

    opt = Options()
    opt.load_args(ap.parse_args())

    opt.envname = opt.env
    opt.filename = '/Users/JonathanLee/experts/' + opt.envname + '.pkl'
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    opt.pi = load_policy.load_policy(opt.filename)
    opt.sess = tf.Session()
    opt.sup = NetSupervisor(opt.pi, opt.sess)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    train_errs = np.zeros((opt.trials, opt.t))
    valid_errs = np.zeros((opt.trials, opt.t))
    test_errs = np.zeros((opt.trials, opt.t))


    for t in range(opt.trials):
        train_errs[t, :], valid_errs[t, :], test_errs[t, :] = run_trial(opt, t)

    train_err = np.mean(train_errs, axis=0)
    valid_err = np.mean(valid_errs, axis=0)
    test_err = np.mean(test_errs, axis=0)

    errs = [train_err, valid_err, test_err]
    labels = ['train', 'valid', 'test']

    width = .2
    index = np.arange(opt.t)

    for i, (err, label) in enumerate(zip(errs, labels)):
        plt.bar(index + i * width, err, width, label=label)
    plt.legend()
    plt.ylim(0, .75)
    plt.savefig('/Users/JonathanLee/Desktop/bar_original_avg.png')
    utils.clear()
Example #2
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    sup_rewards = np.zeros((1, opt.iters))
    lnr_rewards = np.zeros((opt.samples, opt.iters))

    sup_perf = np.zeros((1, opt.iters))
    lnr_perf = np.zeros((opt.samples, opt.iters))

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        sup_rewards[0, i] = r
        sup_perf[0, i] = opt.env.metric()

        lnr.add_data(states, int_actions)
        lnr.train()

        print "\t" + str(lnr.acc())
        for j in range(opt.samples):
            _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t)
            lnr_rewards[j, i] = r
            lnr_perf[j, i] = opt.env.metric()

    print "Average success: " + str(sup_rewards)
    print "Learner success: \n" + str(lnr_rewards)

    pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv')
    pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv')
    pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv')
    pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv')

    plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward')
    plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=float, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--grads', required=True, type=int)

    ap.add_argument('--weights',
                    required=True,
                    nargs='+',
                    type=float,
                    default=[1.0, .1, .5])
    ap.add_argument('--ufact', required=True, default=4.0, type=float)
    ap.add_argument('--id', required=True, default=4.0, type=int)

    ap.add_argument('--nu', required=True, type=float)
    ap.add_argument('--gamma', required=True, type=float)

    opt = Options()
    args = ap.parse_args()
    opt.load_args(args)
    args = vars(args)

    opt.envname = opt.env
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    exp_id = args['id']
    opt.env.my_weights = args['weights']
    opt.env.ufact = args['ufact']
    opt.pi = net.Network([64, 64], .01, 300)
    suffix = '_' + utils.stringify(args['weights']) + '_' + str(args['ufact'])
    weights_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(
        exp_id) + '_weights' + suffix + '.txt'
    stats_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(
        exp_id) + '_stats' + suffix + '.txt'
    opt.pi.load_weights(weights_path, stats_path)
    opt.sup = Supervisor(opt.pi)
    opt.misc = Options()
    opt.misc.num_evaluations = 1

    opt.misc.samples = 5
    rec_results = {}
    lnr_results = {}

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if not os.path.exists(data_dir + '/full'):
        os.makedirs(data_dir + '/full')
    if not os.path.exists(plot_dir + '/full'):
        os.makedirs(plot_dir + '/full')

    opt.data_dir = data_dir
    opt.plot_dir = plot_dir

    trials_data = run_trial(opt)
Example #4
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)

        lnr.add_data(states, int_actions)

    oc.fit(lnr.X)
    preds = oc.predict(lnr.X)
    train_err = len(preds[preds == -1]) / float(len(preds))
    print "\nTraining error: " + str(train_err)

    lnr.train()

    sup_rewards = np.zeros((20))
    lnr_rewards = np.zeros((20))

    X_valid = []
    X_test = []
    for i in range(20):
        states_valid, int_actions_valid, _, r_valid = statistics.collect_traj(
            opt.env, opt.sup, opt.t, False)
        states_test, int_actions_test, _, r_test = statistics.collect_traj(
            opt.env, lnr, opt.t, False)

        sup_rewards[i] = r_valid
        lnr_rewards[i] = r_test

        X_valid += states_valid
        X_test += states_test

    valid_preds = oc.predict(X_valid)
    valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds))
    print "Validation erorr: " + str(valid_err)

    test_preds = oc.predict(X_test)
    test_err = len(test_preds[test_preds == -1]) / float(len(test_preds))
    print "Test erorr: " + str(test_err)

    print "\n\n"

    print "Average sup reward: " + str(np.mean(sup_rewards)) + " +/- " + str(
        scipy.stats.sem(sup_rewards))
    print "Average lnr reward: " + str(np.mean(lnr_rewards)) + " +/- " + str(
        scipy.stats.sem(lnr_rewards))

    print "\n\n"

    def dec(u):
        x = opt.env.get_x()
        s, _, _, _ = opt.env.step(u)
        opt.env.set_x(x)
        return oc.decision_function([s])[0, 0]

    rewards = np.zeros((20))
    rec_counts = np.zeros((20))
    X_robust = []
    for i in range(20):

        s = opt.env.reset()
        states = [s]

        for t in range(opt.t):
            score = oc.decision_function([s])[0, 0]
            # print "Decision score: " + str(score)
            if score < .1:
                alpha = .1
                a = alpha * utils.finite_diff1(
                    np.zeros(opt.env.action_space.shape), dec)
                # print "Recovering: " + str(a)
                rec_counts[i] += 1.0
                s, r, done, _ = opt.env.step(a)
            else:
                a = lnr.intended_action(s)
                s, r, done, _ = opt.env.step(a)

            rewards[i] += r
            states.append(s)

            # if done == True:
            #     break

        X_robust += states

    robust_preds = oc.predict(X_robust)
    robust_err = len(robust_preds[robust_preds == -1]) / float(
        len(robust_preds))
    print "Robust erorr: " + str(robust_err)

    rec_freq = np.mean(rec_counts / float(opt.t))
    print "Recovery frequency: " + str(rec_freq)

    print "Robust rewards: " + str(np.mean(rewards)) + " +/- " + str(
        scipy.stats.sem(rewards))
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=float, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--grads', required=True, type=int)

    ap.add_argument('--weights', required=True, nargs='+', type=float, default=[1.0, .1, .5])
    ap.add_argument('--ufact', required=True, default=4.0, type=float)
    ap.add_argument('--id', required=True, default=4.0, type=int)

    ap.add_argument('--nu', required=True, type=float)
    ap.add_argument('--gamma', required=True, type=float)

    opt = Options()
    args = ap.parse_args()
    opt.load_args(args)
    args = vars(args)

    opt.envname = opt.env
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    exp_id = args['id']
    opt.env.my_weights = args['weights']
    opt.env.ufact = args['ufact']
    opt.pi = net.Network([64, 64], .01, 300)
    suffix = '_' + utils.stringify(args['weights']) + '_' + str(args['ufact'])
    weights_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(exp_id) + '_weights' + suffix + '.txt'
    stats_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(exp_id) + '_stats' + suffix + '.txt'
    opt.pi.load_weights(weights_path, stats_path) 
    opt.sup = Supervisor(opt.pi)
    opt.misc = Options()
    opt.misc.num_evaluations = 1


    opt.misc.samples = 5
    rec_results = {}
    lnr_results = {}

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if not os.path.exists(data_dir  + '/opt'):
        os.makedirs(data_dir + '/opt')
    if not os.path.exists(plot_dir + '/opt'):
        os.makedirs(plot_dir + '/opt')

    trials_data = []
    try:
        for t in range(opt.trials):
            print "Trial: " + str(t)

            start_time = timer.time()
            results = run_trial(opt)

            trials_data.append(results)
            pickle.dump(trials_data, open(data_dir + 'opt/trials_data.pkl', 'w'))
    except KeyboardInterrupt:
        pass
Example #6
0
ap.add_argument('--gamma', required=True, type=float)


opt = Options()
args = ap.parse_args()
opt.load_args(args)
opt.envname = opt.env
args = vars(args)

print "\n"
print "nu:    " + str(args['nu'])
print "gamma: " + str(args['gamma'])



plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))

all_trials = pickle.load(open(data_dir + 'multiple_trials/trials_data.pkl', 'r'))
rec_scores = pickle.load(open(data_dir + 'multiple_trials/rec_scores.pkl', 'r'))
rec_cutoffs = pickle.load(open(data_dir + 'multiple_trials/rec_cutoffs.pkl', 'r'))
rec_mo_scores = pickle.load(open(data_dir + 'multiple_trials/rec_mo_scores.pkl', 'r'))
rec_mo_cutoffs = pickle.load(open(data_dir + 'multiple_trials/rec_mo_cutoffs.pkl', 'r'))

print "Averaging over " + str(len(all_trials)) + " trials"

rec_scores = rec_scores[:]
rec_cutoffs = rec_cutoffs[:]
rec_mo_scores = rec_mo_scores[:]
rec_mo_cutoffs = rec_mo_cutoffs[:]
els = 150
Example #7
0
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=float, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--grads', required=True, type=int)

    ap.add_argument('--weights', required=True, nargs='+', type=float, default=[1.0, .1, .5])
    ap.add_argument('--ufact', required=True, default=4.0, type=float)
    ap.add_argument('--id', required=True, default=4.0, type=int)

    ap.add_argument('--nu', required=True, type=float)
    ap.add_argument('--gamma', required=True, type=float)

    opt = Options()
    args = ap.parse_args()
    opt.load_args(args)
    args = vars(args)

    opt.envname = opt.env
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    exp_id = args['id']
    opt.env.my_weights = args['weights']
    opt.env.ufact = args['ufact']
    opt.pi = net.Network([64, 64], .01, opt.epochs)
    suffix = '_' + utils.stringify(args['weights']) + '_' + str(args['ufact'])
    weights_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(exp_id) + '_weights' + suffix + '.txt'
    stats_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(exp_id) + '_stats' + suffix + '.txt'
    opt.pi.load_weights(weights_path, stats_path) 
    opt.sup = Supervisor(opt.pi)
    opt.misc = Options()
    opt.misc.num_evaluations = 1


    opt.misc.samples = 1
    rec_results = {}
    lnr_results = {}

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if not os.path.exists(data_dir  + '/multiple_trials'):
        os.makedirs(data_dir + '/multiple_trials')
    if not os.path.exists(plot_dir + '/multiple_trials'):
        os.makedirs(plot_dir + '/multiple_trials')

    opt.data_dir = data_dir
    opt.plot_dir = plot_dir


    trials_data = []
    rec_scores = []
    rec_cutoffs = []
    for t in range(opt.trials):
        print "\n\nTrial: " + str(t) + "\n\n"
        opt.t_value = t
        trial_data, info = run_trial(opt)
        
        trials_data.append(trial_data)
        rec_scores += info['rec_scores'] 
        rec_cutoffs += info['rec_cutoffs']


        print "Saving to: "  + str(opt.data_dir + 'multiple_trials/trials_data.pkl')
        pickle.dump(trials_data, open(opt.data_dir + 'multiple_trials/trials_data.pkl', 'w'))
        pickle.dump(rec_scores, open(opt.data_dir + 'multiple_trials/rec_scores.pkl', 'w'))
        pickle.dump(rec_cutoffs, open(opt.data_dir + 'multiple_trials/rec_cutoffs.pkl', 'w'))
Example #8
0
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=int, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--grads', required=True, type=int)

    opt = Options()
    opt.load_args(ap.parse_args())

    opt.envname = opt.env
    opt.filename = '/Users/JonathanLee/experts/' + opt.envname + '.pkl'
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    opt.pi = load_policy.load_policy(opt.filename)
    opt.sess = tf.Session()
    opt.sup = NetSupervisor(opt.pi, opt.sess)
    opt.misc = Options()
    opt.misc.num_evaluations = 10

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if not os.path.exists(plot_dir + '/scores'):
        os.makedirs(plot_dir + '/scores')
    if not os.path.exists(plot_dir + '/mags'):
        os.makedirs(plot_dir + '/mags')

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    sup_rewards = np.zeros((opt.trials, opt.misc.num_evaluations))
    lnr_rewards = np.zeros((opt.trials, opt.misc.num_evaluations))
    rob_rewards = np.zeros((opt.trials, opt.misc.num_evaluations))

    train_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))
    valid_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))
    test_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))
    robust_err = np.zeros((opt.trials, opt.misc.num_evaluations, opt.t))

    freq = np.zeros((opt.trials, opt.misc.num_evaluations))

    train_bar_errs = np.zeros((opt.trials, opt.t))
    valid_bar_errs = np.zeros((opt.trials, opt.t))
    test_bar_errs = np.zeros((opt.trials, opt.t))

    print "Running Trials:\n\n"

    try:
        for t in range(opt.trials):
            start_time = timer.time()
            results = run_trial(opt)
            sup_rewards[t, :] = results['sup_reward']
            lnr_rewards[t, :] = results['lnr_reward']
            rob_rewards[t, :] = results['rob_reward']

            train_err[t, :, :] = results['train_err']
            valid_err[t, :, :] = results['valid_err']
            test_err[t, :, :] = results['test_err']
            robust_err[t, :, :] = results['robust_err']

            freq[t, :] = results['correction_freq']
            train_bar_errs[t], valid_bar_errs[t], test_bar_errs[t] = results[
                'bar_errs']

            sup_rewards_save, lnr_rewards_save, rob_rewards_save = sup_rewards[:
                                                                               t
                                                                               +
                                                                               1, :], lnr_rewards[:
                                                                                                  t
                                                                                                  +
                                                                                                  1, :], rob_rewards[:
                                                                                                                     t
                                                                                                                     +
                                                                                                                     1, :]
            train_err_save, valid_err_save, test_err_save, robust_err_save = train_err[:
                                                                                       t
                                                                                       +
                                                                                       1, :, :], valid_err[:
                                                                                                           t
                                                                                                           +
                                                                                                           1, :, :], test_err[:
                                                                                                                              t
                                                                                                                              +
                                                                                                                              1, :, :], robust_err[:
                                                                                                                                                   t
                                                                                                                                                   +
                                                                                                                                                   1, :, :]
            freq_save = freq[:t + 1, :]

            pd.DataFrame(sup_rewards_save).to_csv(opt.data_dir +
                                                  'sup_rewards.csv',
                                                  index=False)
            pd.DataFrame(lnr_rewards_save).to_csv(opt.data_dir +
                                                  'lnr_rewards.csv',
                                                  index=False)
            pd.DataFrame(rob_rewards_save).to_csv(opt.data_dir +
                                                  'rob_rewards.csv',
                                                  index=False)

            for tau in range(opt.t):
                pd.DataFrame(train_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'train_err_t' + str(tau) + '.csv',
                    index=False)
                pd.DataFrame(valid_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'valid_err_t' + str(tau) + '.csv',
                    index=False)
                pd.DataFrame(test_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'test_err_t' + str(tau) + '.csv',
                    index=False)
                pd.DataFrame(robust_err_save[:, :, tau]).to_csv(
                    opt.data_dir + 'robust_err_t' + str(tau) + '.csv',
                    index=False)

            pd.DataFrame(freq_save).to_csv(opt.data_dir + 'freq.csv',
                                           index=False)

            train_err_avg = np.mean(train_err_save, axis=2)
            valid_err_avg = np.mean(valid_err_save, axis=2)
            test_err_avg = np.mean(test_err_save, axis=2)
            robust_err_avg = np.mean(robust_err_save, axis=2)

            utils.plot([sup_rewards_save, lnr_rewards_save, rob_rewards_save],
                       ['Supervisor', 'Learner', 'Robust Learner'],
                       opt,
                       "Reward",
                       colors=['red', 'blue', 'green'])
            utils.plot(
                [train_err_avg, valid_err_avg, test_err_avg, robust_err_avg],
                ['Training', 'Validation', 'Learner', 'Robust Learner'],
                opt,
                "Error",
                colors=['red', 'orange', 'blue', 'green'])
            utils.plot([freq_save], ['Frequency'],
                       opt,
                       'Correction Frequency',
                       colors=['green'])

            bar_errs = [
                np.mean(train_bar_errs, axis=0),
                np.mean(valid_bar_errs, axis=0),
                np.mean(test_bar_errs, axis=0)
            ]
            labels = ['train', 'valid', 'test']
            width = .2
            index = np.arange(opt.t)
            for i, (err, label) in enumerate(zip(bar_errs, labels)):
                plt.bar(index + i * width, err, width, label=label)
            plt.legend()
            plt.ylim(0, .75)
            plt.savefig('/Users/JonathanLee/Desktop/bar_new_avg.png')
            utils.clear()

            end_time = timer.time()
            print "Trial time: " + str(end_time - start_time)

    except KeyboardInterrupt:
        pass
Example #9
0
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=int, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)

    opt = Options()
    opt.load_args(ap.parse_args())

    opt.envname = opt.env
    opt.filename = '/Users/JonathanLee/experts/' + opt.envname + '.pkl'
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    opt.pi = load_policy.load_policy(opt.filename)
    opt.sess = tf.Session()
    opt.sup = NetSupervisor(opt.pi, opt.sess)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    sup_rewards = np.zeros((opt.trials, opt.iters))
    lnr_rewards = np.zeros((opt.trials, opt.iters))
    rob_rewards = np.zeros((opt.trials, opt.iters))

    train_err = np.zeros((opt.trials, opt.iters))
    valid_err = np.zeros((opt.trials, opt.iters))
    test_err = np.zeros((opt.trials, opt.iters))
    robust_err = np.zeros((opt.trials, opt.iters))

    freq = np.zeros((opt.trials, opt.iters))

    for t in range(opt.trials):
        results = run_trial(opt)
        sup_rewards[t, :] = results['sup_reward']
        lnr_rewards[t, :] = results['lnr_reward']
        rob_rewards[t, :] = results['rob_reward']

        train_err[t, :] = results['train_err']
        valid_err[t, :] = results['valid_err']
        test_err[t, :] = results['test_err']
        robust_err[t, :] = results['robust_err']

        freq[t, :] = results['correction_freq']

    pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv',
                                     index=False)
    pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv',
                                     index=False)
    pd.DataFrame(rob_rewards).to_csv(opt.data_dir + 'rob_rewards.csv',
                                     index=False)

    pd.DataFrame(train_err).to_csv(opt.data_dir + 'train_err.csv', index=False)
    pd.DataFrame(valid_err).to_csv(opt.data_dir + 'valid_err.csv', index=False)
    pd.DataFrame(test_err).to_csv(opt.data_dir + 'test_err.csv', index=False)
    pd.DataFrame(robust_err).to_csv(opt.data_dir + 'robust_err.csv',
                                    index=False)

    pd.DataFrame(freq).to_csv(opt.data_dir + 'freq.csv', index=False)

    utils.plot([sup_rewards, lnr_rewards, rob_rewards],
               ['Supervisor', 'Learner', 'Robust Learner'],
               opt,
               "Reward",
               colors=['red', 'blue', 'green'])
    utils.plot([train_err, valid_err, test_err, robust_err],
               ['Training', 'Validation', 'Learner', 'Robust Learner'],
               opt,
               "Error",
               colors=['red', 'orange', 'blue', 'green'])
    utils.plot([freq], ['Frequency'],
               opt,
               'Correction Frequency',
               colors=['green'])
Example #10
0
def run_trial(opt):
    oc = svm.OneClassSVM(kernel='rbf', nu=.01, gamma=.01)
    est = knet.Network(opt.arch, learning_rate=opt.lr, epochs=opt.epochs)
    lnr = learner.Learner(est)

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    opt.plot_dir = plot_dir
    opt.data_dir = data_dir

    opt.num_valid_trajs = max(1, int(.25 * opt.iters))
    opt.samples = 10

    train_trajs = []
    valid_trajs = []

    sup_rewards = np.zeros((1, opt.iters))
    lnr_rewards = np.zeros((opt.samples, opt.iters))

    sup_perf = np.zeros((1, opt.iters))
    lnr_perf = np.zeros((opt.samples, opt.iters))

    for i in range(opt.iters):
        print "Iteration: " + str(i)
        states, int_actions, taken_actions, r = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        sup_rewards[0, i] = r
        sup_perf[0, i] = opt.env.metric()

        lnr.add_data(states, int_actions)
        # lnr.train()

        # print "\t" + str(lnr.acc())
        # for j in range(opt.samples):
        #     _, _, _, r = statistics.collect_traj(opt.env, lnr, opt.t)
        #     lnr_rewards[j, i] = r
        #     lnr_perf[j, i] = opt.env.metric()

    oc.fit(lnr.X)
    preds = oc.predict(lnr.X)
    train_err = len(preds[preds == -1]) / float(len(preds))
    print "Training error: " + str(train_err)

    X_valid = []
    for i in range(20):
        states, int_actions, _, _ = statistics.collect_traj(
            opt.env, opt.sup, opt.t)
        X_valid += states

    valid_preds = oc.predict(X_valid)
    valid_err = len(valid_preds[valid_preds == -1]) / float(len(valid_preds))
    print "Validation erorr: " + str(valid_err)

    lnr.train()
    X_test = []
    for i in range(20):
        states, int_actions, _, _ = statistics.collect_traj(
            opt.env, lnr, opt.t)
        X_test += states

    test_preds = oc.predict(X_test)
    test_err = len(test_preds[test_preds == -1]) / float(len(test_preds))

    print "Test erorr: " + str(test_err)

    s = opt.env.reset()
    reward = 0.0
    x = opt.env.get_x()

    def dec(u):
        x = opt.env.get_x()
        s, _, _, _ = opt.env.step(u)
        opt.env.set_x(x)
        return oc.decision_function([s])[0, 0]

    states_visited = []
    for t in range(opt.t):
        opt.env.render()
        score = oc.decision_function([s])
        print "\tDecision score: " + str(score)

        # if score < .2 and False:
        #     alpha = 1.0
        #     a = alpha * utils.finite_diff1(np.zeros(opt.env.action_space.shape[0]), dec)
        #     print "\t\tRecovering: " + str(a)
        #     s, r, done, _ = opt.env.step(a)
        #     x = opt.env.get_x()
        # else:
        a = lnr.intended_action(s)
        s, r, done, _ = opt.env.step(a)
        x = opt.env.get_x()

        states_visited.append(s)

        if done == True:
            break

    preds = oc.predict(states_visited)
    err = len(preds[preds == -1]) / float(len(preds))
    print "Error: " + str(err)

    print "\nDone after " + str(t + 1) + " steps"

    # print "Average success: " + str(sup_rewards)
    # print "Learner success: \n" + str(lnr_rewards)

    # pd.DataFrame(sup_rewards).to_csv(opt.data_dir + 'sup_rewards.csv')
    # pd.DataFrame(lnr_rewards).to_csv(opt.data_dir + 'lnr_rewards.csv')
    # pd.DataFrame(sup_perf).to_csv(opt.data_dir + 'sup_perf.csv')
    # pd.DataFrame(lnr_perf).to_csv(opt.data_dir + 'lnr_perf.csv')

    # plot([sup_rewards, lnr_rewards], ['sup', 'lnr'], opt, 'Reward')
    # plot([sup_perf, lnr_perf], ['sup', 'lnr'], opt, 'Performance')

    IPython.embed()
def main():
    ap = argparse.ArgumentParser()

    ap.add_argument('--arch', required=True, nargs='+', type=int)
    ap.add_argument('--lr', required=False, type=float, default=.01)
    ap.add_argument('--epochs', required=False, type=int, default=100)
    ap.add_argument('--iters', required=True, type=int)
    ap.add_argument('--trials', required=True, type=int)
    ap.add_argument('--env', required=True)
    ap.add_argument('--t', required=True, type=int)
    ap.add_argument('--grads', required=True, type=int)

    ap.add_argument('--weights',
                    required=True,
                    nargs='+',
                    type=float,
                    default=[1.0, .1, .5])
    ap.add_argument('--ufact', required=True, default=4.0, type=float)
    ap.add_argument('--id', required=True, default=4.0, type=int)

    ap.add_argument('--nu', required=True, type=float)
    ap.add_argument('--gamma', required=True, type=float)

    opt = Options()
    args = ap.parse_args()
    opt.load_args(args)
    args = vars(args)

    opt.envname = opt.env
    opt.env = gym.envs.make(opt.envname).env
    opt.sim = gym.envs.make(opt.envname).env
    exp_id = args['id']
    opt.env.my_weights = args['weights']
    opt.env.ufact = args['ufact']
    opt.pi = net.Network([64, 64], .01, 300)
    suffix = '_' + utils.stringify(args['weights']) + '_' + str(args['ufact'])
    weights_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(
        exp_id) + '_weights' + suffix + '.txt'
    stats_path = 'meta/' + 'test' + '/' + opt.envname + '_' + str(
        exp_id) + '_stats' + suffix + '.txt'
    opt.pi.load_weights(weights_path, stats_path)
    opt.sup = Supervisor(opt.pi)
    opt.misc = Options()
    opt.misc.num_evaluations = 1

    opt.misc.samples = 200
    rec_results = {}
    lnr_results = {}

    plot_dir = utils.generate_plot_dir('initial', 'experts', vars(opt))
    data_dir = utils.generate_data_dir('initial', 'experts', vars(opt))
    if not os.path.exists(plot_dir):
        os.makedirs(plot_dir)
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    if not os.path.exists(plot_dir + '/scores'):
        os.makedirs(plot_dir + '/scores')
    if not os.path.exists(plot_dir + '/mags'):
        os.makedirs(plot_dir + '/mags')

    try:
        for t in range(opt.trials):
            start_time = timer.time()
            results = run_trial(opt)

            for key in results['rec'].keys():
                if key in rec_results:
                    rec_results[key].append(results['rec'][key])
                else:
                    rec_results[key] = [results['rec'][key]]

            for key in results['lnr'].keys():
                if key in lnr_results:
                    lnr_results[key].append(results['lnr'][key])
                else:
                    lnr_results[key] = [results['lnr'][key]]

    except KeyboardInterrupt:
        pass

    labels = sorted(list(rec_results.keys()))
    for key in rec_results.keys():
        rec_results[key] = np.array(rec_results[key]) / float(opt.misc.samples)
        lnr_results[key] = np.array(lnr_results[key]) / float(opt.misc.samples)

    rec_means = [
        np.mean(rec_results[key]) for key in sorted(rec_results.keys())
    ]
    rec_sems = [
        scipy.stats.sem(rec_results[key]) for key in sorted(rec_results.keys())
    ]
    lnr_means = [
        np.mean(lnr_results[key]) for key in sorted(lnr_results.keys())
    ]
    lnr_sems = [
        scipy.stats.sem(lnr_results[key]) for key in sorted(lnr_results.keys())
    ]