def experiment_data_size( expert_feature=toy_problem_simple, apprentice_feature=toy_problem_simple, name="simple_feature", iterations_per_run=50, steps=15, runs=6, ): direc = "results/aamas" # initial_states = [disc.quantityToState([0,0,1,2,2]),disc.quantityToState([0,0,3,4,1]),disc.quantityToState([0,1,2,2,2]),disc.quantityToState([0,0,3,2,1])] # test_states =[disc.quantityToState([0,0,2,2,1]),disc.quantityToState([0,0,2,4,2]),disc.quantityToState([0,0,3,1,3]),disc.quantityToState([0,0,3,2,1])] fn.make_dir(direc + "/" + name) results_array = [] disc = DiscModel(target=[4, 4], boundaries=[4, 4], feature=expert_feature) disc_a = DiscModel(target=[4, 4], boundaries=[4, 4], feature=apprentice_feature) training_sizes = [2, 5, 25, 50, 100] fail = np.zeros([len(training_sizes), runs]) normal = np.zeros([len(training_sizes), runs]) slow = np.zeros([len(training_sizes), runs]) if expert_feature != apprentice_feature: expert_2_test = Model(disc, "obstacle2", load_saved=False) expert_1_test = Model(disc, "avoid_reach", load_saved=True) expert2 = Model(disc_a, "obstacle2", load_saved=False) expert2.reward_f = expert_2_test.reward_f expert1 = Model(disc_a, "avoid_reach", load_saved=True) expert1.reward_f = expert_1_test.reward_f else: expert2 = Model(disc, "obstacle2", load_saved=False) expert1 = Model(disc, "avoid_reach", load_saved=True) test_states = np.random.randint(0, disc.tot_states, 10) bad_states = np.random.randint(0, disc.tot_states, 5) for enn, size in enumerate(training_sizes): print "SIZE=", size print "============================================================================" for n, i in enumerate(range(runs)): print "RUN", i apprentice = Model(disc_a, "dual_reward", load_saved=True) # initial_states = np.random.randint(0,disc.tot_states,5) initial_states = np.random.randint(0, disc.tot_states, size) results_failure = learn_from_failure( expert1, expert2, apprentice, iterations_per_run, steps, initial_states, test_states, failure="L1", initial_bad_states=bad_states, ) fail[enn, i] = results_failure.e_on_e - results_failure.a_o_e[-1] apprentice = Model(disc_a, "uniform", load_saved=True) results_normal = learn_from_failure( expert1, expert2, apprentice, iterations_per_run, steps, initial_states, test_states, failure="false", initial_bad_states=bad_states, ) normal[enn, i] = results_normal.e_on_e - results_normal.a_o_e[-1] apprentice = Model(disc_a, "dual_reward", load_saved=True) results_slow = learn_from_failure( expert1, expert2, apprentice, iterations_per_run, steps, initial_states, test_states, failure="slow", initial_bad_states=bad_states, ) slow[enn, i] = results_slow.e_on_e - results_slow.a_o_e[-1] results_array.append([results_failure, results_normal, results_slow]) fn.pickle_saver((results_array, fail, normal, slow), direc + "/" + name + ".pkl")
def experiment_cvx_contrasting( expert_feature=toy_problem_simple, apprentice_feature=toy_problem_simple, name="simple_feature", iterations_per_run=60, steps=15, runs=20, ): direc = "results/aamas" # initial_states = [disc.quantityToState([0,0,1,2,2]),disc.quantityToState([0,0,3,4,1]),disc.quantityToState([0,1,2,2,2]),disc.quantityToState([0,0,3,2,1])] # test_states =[disc.quantityToState([0,0,2,2,1]),disc.quantityToState([0,0,2,4,2]),disc.quantityToState([0,0,3,1,3]),disc.quantityToState([0,0,3,2,1])] fn.make_dir(direc + "/" + name) results_array = [] disc = DiscModel(target=[4, 4], boundaries=[4, 4], feature=expert_feature) disc_a = DiscModel(target=[4, 4], boundaries=[4, 4], feature=apprentice_feature) expert2 = Model(disc, "obstacle2", load_saved=False) expert1 = Model(disc, "avoid_reach", load_saved=True) test_states = np.random.randint(0, disc.tot_states, 100) bad_states = np.random.randint(0, disc.tot_states, 5) for i in range(runs): apprentice = Model(disc_a, "dual_reward", load_saved=True) initial_states = np.random.randint(0, disc.tot_states, 10) results_failure = learn_from_failure( expert1, expert2, apprentice, iterations_per_run, steps, initial_states, test_states, failure="L1", initial_bad_states=bad_states, ) if i == 0: apprentice.visualise_reward() apprentice = Model(disc_a, "uniform", load_saved=True) results_normal = learn_from_failure( expert1, expert2, apprentice, iterations_per_run, steps, initial_states, test_states, failure="false", initial_bad_states=bad_states, ) if i == 0: apprentice.visualise_reward() apprentice = Model(disc_a, "dual_reward", load_saved=True) results_slow = learn_from_failure( expert1, expert2, apprentice, iterations_per_run, steps, initial_states, test_states, failure="cvx", initial_bad_states=bad_states, ) if i == 0: apprentice.visualise_reward() results_array.append([results_failure, results_normal, results_slow]) fn.pickle_saver(results_array, direc + "/" + name + ".pkl")