for _ in range(ITER): print " Iteration: " + str(_) print " Retraining with " + str(len(dagger.net.data)) + " examples" dagger.retrain() acc[_] = dagger.svm.acc() iteration_states = [] dagger.record = True for i in range(SAMP): if i >= LIMIT_DATA: dagger.record = False dagger.rollout() iteration_states += dagger.get_recent_rollout_states().tolist() r[_] = r[_] + dagger.get_reward() / SAMP if _ == ITER - 1 and t == 0: dagger_analysis.count_states(np.array(iteration_states)) dagger_analysis.save_states("comparisons/boost_dt_comparisons/boost_dt_dagger_final.png") dagger_analysis.show_states() if t == 0: dagger_analysis.reset_density() dagger_analysis.count_states(dagger.get_states()) dagger_analysis.save_states("comparisons/boost_dt_comparisons/boost_dt_dagger.png") dagger_analysis.show_states() plotter.plot_state_actions(mdp.pi, rewards=rewards, sinks=sinks, filename='comparisons/boost_dt_comparisons/boost_dt_dagger_state_action.png') dagger_data[t,:] = r dagger_acc[t,:] = acc # print value_iter_data # print classic_il_data # print dagger_data
for _ in range(ITER): print " Iteration: " + str(_) print " Retraining with " + str(len(dagger.net.data)) + " examples" dagger.retrain() acc[_] = dagger.svm.acc() iteration_states = [] dagger.record = True for i in range(SAMP): if i >= LIMIT_DATA: dagger.record = False dagger.rollout() iteration_states += dagger.get_recent_rollout_states().tolist() r[_] = r[_] + dagger.get_reward() / SAMP if _ == ITER - 1 and t == 0: dagger_analysis.count_states(np.array(iteration_states)) dagger_analysis.save_states(comparisons_directory + "boost_svm_rollouts_dagger_final.png") dagger_analysis.show_states() if t == 0: dagger_analysis.reset_density() dagger_analysis.count_states(dagger.get_states()) dagger_analysis.save_states(comparisons_directory + "boost_svm_rollouts_dagger.png") dagger_analysis.show_states() plotter.plot_state_actions(mdp.pi, rewards=rewards, sinks=sinks, filename=comparisons_directory + 'boost_svm_rollouts_dagger_state_action.png') dagger_data[t,:] = r dagger_acc[t,:] = acc # print value_iter_data # print classic_il_data # print dagger_data
for t in range(TRIALS): mdp.load_policy() sup = SVMSupervise(grid, mdp) sup.sample_policy() value_iter_analysis = Analysis(W, H, ITER, rewards=rewards, sinks=sinks, desc='Value iter policy') r = 0.0 for _ in range(ITER * SAMP): sup.rollout() r = r + sup.get_reward() / (ITER * SAMP) print "Value iter reward: " + str(r) if t == 0: value_iter_analysis.count_states(sup.get_states()) value_iter_analysis.save_states("comparisons/svm_comparisons/value_iter.png") value_iter_analysis.show_states() sup.train() classic_il_acc[t,:] = np.zeros(ITER) + sup.svm.acc() value_iter_data[t,:] = np.zeros(ITER) + r r = 0.0 sup.net.clear_data() sup.sample_policy() il_analysis = Analysis(H, W, ITER, rewards=rewards, sinks=sinks, desc="IL's policy") for _ in range(SAMP * ITER): sup.animate = False sup.rollout() r = r + sup.get_reward() / (SAMP * ITER)
def run(ne, lr): plotter = plot_class.Plotter() comparisons_directory, data_directory = make_name(ne, lr) if not os.path.exists(comparisons_directory): os.makedirs(comparisons_directory) if not os.path.exists(data_directory): os.makedirs(data_directory) #ITER = 25 #TRIALS = 10 #SAMP = 20 ITER = 10 TRIALS = 3 SAMP = 10 LIMIT_DATA = 1 DEPTH = 6 H = 15 W = 15 grid = BasicGrid(H, W) rewards = scenarios.scenario3['rewards'] sinks = scenarios.scenario3['sinks'] grid.reward_states = rewards grid.sink_states = sinks mdp = ClassicMDP(ClassicPolicy(grid), grid) #mdp.value_iteration() #mdp.save_policy('scen4.p') mdp.load_policy('scen4.p') value_iter_pi = mdp.pi plotter.plot_state_actions(value_iter_pi, rewards = grid.reward_states, sinks = grid.sink_states, filename=comparisons_directory + 'value_iter_state_action.png') value_iter_data = np.zeros([TRIALS, ITER]) classic_il_data = np.zeros([TRIALS, ITER]) classic_il_acc = np.zeros([TRIALS, ITER]) classic_il_loss = np.zeros([TRIALS, ITER]) for t in range(TRIALS): print "\nIL Trial: " + str(t) mdp.load_policy('scen4.p') boost = SVC(kernel='linear') boost = AdaBoostClassifier(base_estimator=boost, algorithm='SAMME', n_estimators=ne, learning_rate=lr) sup = ScikitSupervise(grid, mdp, Classifier=boost) sup.sample_policy() value_iter_analysis = Analysis(W, H, ITER, rewards=rewards, sinks=sinks, desc='Value iter policy') value_iter_r = np.zeros(ITER) classic_il_r = np.zeros(ITER) acc = np.zeros(ITER) loss = np.zeros(ITER) sup.record = True #for _ in range(4): # sup.rollout() for i in range(ITER): print " Iteration: " + str(i) mdp.pi = value_iter_pi sup.record = True for _ in range(SAMP): if _ >= LIMIT_DATA: sup.record = False sup.rollout() value_iter_r[i] += sup.get_reward() / (SAMP) sup.record = False print " Training on " + str(len(sup.net.data)) + " examples" sup.train() acc[i] = sup.svm.acc() for _ in range(SAMP): sup.record=False sup.rollout() loss[i] += sup.get_loss() / float(SAMP) classic_il_r[i] += sup.get_reward() / SAMP #print acc if t == 0: plotter.plot_state_actions(mdp.pi, rewards=rewards, sinks=sinks, filename=comparisons_directory + 'svm_classic_il_state_action.png') classic_il_data[t,:] = classic_il_r value_iter_data[t,:] = value_iter_r classic_il_acc[t,:] = acc classic_il_loss[t,:] = loss #DAGGER dagger_data = np.zeros((TRIALS, ITER)) dagger_analysis = Analysis(H, W, ITER, rewards = grid.reward_states, sinks=grid.sink_states, desc="Dagger's policy progression") dagger_acc = np.zeros((TRIALS, ITER)) dagger_loss = np.zeros((TRIALS, ITER)) for t in range(TRIALS): print "DAgger Trial: " + str(t) mdp.load_policy('scen4.p') dagger = SVMDagger(grid, mdp, depth=DEPTH) dagger.svm.nonlinear=False dagger.record = True dagger.rollout() #for _ in range(5): # dagger.rollout() r = np.zeros(ITER) acc = np.zeros(ITER) loss = np.zeros(ITER) for _ in range(ITER): print " Iteration: " + str(_) print " Retraining with " + str(len(dagger.net.data)) + " examples" dagger.retrain() acc[_] = dagger.svm.acc() iteration_states = [] dagger.record = True for i in range(SAMP): if i >= LIMIT_DATA: dagger.record = False dagger.rollout() loss[_] += dagger.get_loss() / float(SAMP) iteration_states += dagger.get_recent_rollout_states().tolist() r[_] = r[_] + dagger.get_reward() / SAMP #if _ == ITER - 1 and t == 0: if _ == 0 and t ==0: dagger_analysis.count_states(np.array(iteration_states)) dagger_analysis.save_states(comparisons_directory + "svm_dagger_final.png") dagger_analysis.show_states() if t == 0: dagger_analysis.reset_density() dagger_analysis.count_states(dagger.get_states()) dagger_analysis.save_states(comparisons_directory + "svm_dagger.png") dagger_analysis.show_states() plotter.plot_state_actions(mdp.pi, rewards=rewards, sinks=sinks, filename=comparisons_directory + 'svm_dagger_state_action.png') dagger_data[t,:] = r dagger_acc[t,:] = acc dagger_loss[t,:] = loss # print value_iter_data # print classic_il_data # print dagger_data print classic_il_loss print dagger_loss np.save(data_directory + 'svm_sup_data.npy', value_iter_data) np.save(data_directory + 'svm_classic_il_data.npy', classic_il_data) np.save(data_directory + 'svm_dagger_data.npy', dagger_data) np.save(data_directory + 'svm_dagger_acc.npy', dagger_acc) np.save(data_directory + 'svm_classic_il_acc.npy', classic_il_acc) analysis = Analysis(H, W, ITER, rewards=rewards, sinks=sinks, desc="General comparison") analysis.get_perf(value_iter_data) analysis.get_perf(classic_il_data) analysis.get_perf(dagger_data) #analysis.plot(names = ['Value iteration', 'Adaboost IL'], filename=comparisons_directory + 'svm_reward_comparison.png', ylims=[-60, 100]) analysis.plot(names = ['Value iteration', 'LSVM Boosted IL', 'LSVM DAgger'], filename=comparisons_directory + 'svm_reward_comparison.png', ylims=[-60, 100]) print "Saving analysis to: " + comparisons_directory + 'svm_reward_comparison.png' acc_analysis = Analysis(H, W, ITER, rewards = grid.reward_states, sinks=grid.sink_states, desc="Accuracy comparison") acc_analysis.get_perf(classic_il_acc) acc_analysis.get_perf(dagger_acc) acc_analysis.plot(names = ['LSVM Boosted Acc.', 'LSVM DAgger Acc.'], label='Accuracy', filename=comparisons_directory + 'svm_acc_comparison.png', ylims=[0,1]) #acc_analysis.plot(names = ['Adaboost IL Acc.'], label='Accuracy', filename=comparisons_directory + 'svm_acc_comparison.png', ylims=[0,1]) loss_analysis = Analysis(H, W, ITER, rewards=rewards, sinks=sinks, desc="Loss plot") loss_analysis.get_perf(classic_il_loss) loss_analysis.get_perf(dagger_loss) loss_analysis.plot(names = ['LSVM Boosted IL loss', 'LSVM DAgger loss'], filename=comparisons_directory + 'loss_plot.png', ylims=[0, 1])
classic_il_data = np.zeros([TRIALS, ITER]) for t in range(TRIALS): mdp.load_policy() sup = Supervise(grid, mdp) sup.sample_policy() supervisor_analysis = Analysis(H, W, ITER, rewards = grid.reward_states, sinks=grid.sink_states, desc="Supervisor's policy") r = 0.0 for _ in range(ITER * SAMP): sup.rollout() r = r + sup.get_reward() / (ITER * SAMP) print "Value iter reward: " + str(r) if t == 0: supervisor_analysis.count_states(sup.get_states()) supervisor_analysis.save_states("comparisons/comparisons/value_iter.png") supervisor_analysis.show_states() sup.train() classic_train, classic_test = sup.net.return_stats() classic_train = np.zeros((TRIALS, ITER)) + classic_train classic_test = np.zeros((TRIALS, ITER)) + classic_test sup_data[t,:] = np.zeros(ITER) + r r = 0.0 sup.net.clear_data() sup.sample_policy() il_analysis = Analysis(H, W, ITER, rewards = grid.reward_states, sinks = grid.sink_states, desc="IL's policy") print sup.get_states() for _ in range(SAMP * ITER):
for _ in range(ITER): print " Iteration: " + str(_) print " Retraining with " + str(len(dagger.net.data)) + " examples" dagger.retrain() acc[_] = dagger.svm.acc() iteration_states = [] dagger.record = True for i in range(SAMP): if i >= LIMIT_DATA: dagger.record = False dagger.rollout() iteration_states += dagger.get_recent_rollout_states().tolist() r[_] = r[_] + dagger.get_reward() / SAMP if _ == ITER - 1 and t == 0: dagger_analysis.count_states(np.array(iteration_states)) dagger_analysis.save_states("comparisons/percept_comparisons/percept_dagger_final.png") dagger_analysis.show_states() if t == 0: dagger_analysis.reset_density() dagger_analysis.count_states(dagger.get_states()) dagger_analysis.save_states("comparisons/percept_comparisons/percept_dagger.png") dagger_analysis.show_states() plotter.plot_state_actions(mdp.pi, rewards=rewards, sinks=sinks, filename='comparisons/percept_comparisons/percept_dagger_state_action.png') dagger_data[t,:] = r dagger_acc[t,:] = acc # print value_iter_data # print classic_il_data # print dagger_data
print " Retraining with " + str(len(dagger.net.data)) + " examples" dagger.retrain() acc[_] = dagger.svm.acc() iteration_states = [] dagger.record = True for i in range(SAMP): if i >= LIMIT_DATA: dagger.record = False dagger.rollout() loss[_] += dagger.get_loss() / float(SAMP) iteration_states += dagger.get_recent_rollout_states().tolist() r[_] = r[_] + dagger.get_reward() / SAMP #if _ == ITER - 1 and t == 0: if _ == 0 and t ==0: dagger_analysis.count_states(np.array(iteration_states)) dagger_analysis.save_states(comparisons_directory + "svm_dagger_final.png") dagger_analysis.show_states() if t == 0: dagger_analysis.reset_density() dagger_analysis.count_states(dagger.get_states()) dagger_analysis.save_states(comparisons_directory + "svm_dagger.png") dagger_analysis.show_states() plotter.plot_state_actions(mdp.pi, rewards=rewards, sinks=sinks, filename=comparisons_directory + 'svm_dagger_state_action.png') dagger_data[t,:] = r dagger_acc[t,:] = acc dagger_loss[t,:] = loss # print value_iter_data # print classic_il_data
for _ in range(ITER): print " Iteration: " + str(_) print " Retraining with " + str(len(dagger.net.data)) + " examples" dagger.retrain() acc[_] = dagger.svm.acc() iteration_states = [] dagger.record = True for i in range(SAMP): if i >= LIMIT_DATA: dagger.record = False dagger.rollout() iteration_states += dagger.get_recent_rollout_states().tolist() r[_] = r[_] + dagger.get_reward() / SAMP if _ == ITER - 1 and t == 0: dagger_analysis.count_states(np.array(iteration_states)) dagger_analysis.save_states(comparisons_directory + "boost_dt_sup_dagger_final.png") dagger_analysis.show_states() if t == 0: dagger_analysis.reset_density() dagger_analysis.count_states(dagger.get_states()) dagger_analysis.save_states(comparisons_directory + "boost_dt_sup_dagger.png") dagger_analysis.show_states() plotter.plot_state_actions(mdp.pi, rewards=rewards, sinks=sinks, filename=comparisons_directory + 'boost_dt_sup_dagger_state_action.png') dagger_data[t,:] = r dagger_acc[t,:] = acc # print value_iter_data # print classic_il_data # print dagger_data