def experiment_overlapping(expert_feature = toy_problem_simple,apprentice_feature = toy_problem_simple,name = "simple_feature",iterations_per_run=60,steps=15,runs=20):
		direc = "results/aamas"
		#initial_states = [disc.quantityToState([0,0,1,2,2]),disc.quantityToState([0,0,3,4,1]),disc.quantityToState([0,1,2,2,2]),disc.quantityToState([0,0,3,2,1])]
		#test_states =[disc.quantityToState([0,0,2,2,1]),disc.quantityToState([0,0,2,4,2]),disc.quantityToState([0,0,3,1,3]),disc.quantityToState([0,0,3,2,1])]
		fn.make_dir(direc+"/"+name)
		results_array = []
		disc = DiscModel(target = [4,4],boundaries = [4,4],feature = expert_feature)
		disc_a = DiscModel(target = [4,4],boundaries = [4,4],feature = apprentice_feature)

		expert2 = Model(disc,"obstacle2_reach", load_saved = False)
		expert1 = Model(disc,"avoid_reach", load_saved = True)
		test_states = np.random.randint(0,disc.tot_states,10)
		bad_states = np.random.randint(0,disc.tot_states,5)	
		for i in range(runs):
			apprentice = Model(disc_a,"dual_reward", load_saved = True)
			initial_states = np.random.randint(0,disc.tot_states,5)
			results_failure = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "L1",initial_bad_states = bad_states)
			if i ==0: 
				apprentice.visualise_reward()
			apprentice = Model(disc_a,"dual_reward", load_saved = True)
			results_normal = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "false",initial_bad_states = bad_states)
			if i ==0: 
				apprentice.visualise_reward()			
			apprentice = Model(disc_a,"dual_reward", load_saved = True)
			results_slow = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "slow",initial_bad_states = bad_states)
			if i ==0: 
				apprentice.visualise_reward()			
			results_array.append([results_failure,results_normal,results_slow])
		fn.pickle_saver(results_array,direc+"/"+name+".pkl")
	def experiment_contrasting(expert_feature = toy_problem_simple,apprentice_feature = toy_problem_simple,name = "simple_feature",iterations_per_run=60,steps=15,runs=20):
		direc = "results/aamas"
		#initial_states = [disc.quantityToState([0,0,1,2,2]),disc.quantityToState([0,0,3,4,1]),disc.quantityToState([0,1,2,2,2]),disc.quantityToState([0,0,3,2,1])]
		#test_states =[disc.quantityToState([0,0,2,2,1]),disc.quantityToState([0,0,2,4,2]),disc.quantityToState([0,0,3,1,3]),disc.quantityToState([0,0,3,2,1])]
		fn.make_dir(direc+"/"+name)
		results_array = []
		disc = DiscModel(target = [4,4],boundaries = [4,4],feature = expert_feature)
		disc_a = DiscModel(target = [4,4],boundaries = [4,4],feature = apprentice_feature)

		expert2 = Model(disc,"obstacle2", load_saved = False)
		expert1 = Model(disc,"avoid_reach", load_saved = True)
		test_states = np.random.randint(0,disc.tot_states,100)
		bad_states = np.random.randint(0,disc.tot_states,5)	
		for i in range(runs):
			apprentice = Model(disc_a,"dual_reward", load_saved = True)
			initial_states = np.random.randint(0,disc.tot_states,10)
			results_failure = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "L1",initial_bad_states = bad_states)
			if i ==0: 
				apprentice.visualise_reward()
			apprentice = Model(disc_a,"uniform", load_saved = True)
			results_normal = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "false",initial_bad_states = bad_states)
			if i ==0: 
				apprentice.visualise_reward()			
			apprentice = Model(disc_a,"dual_reward", load_saved = True)
			results_slow = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "slow",initial_bad_states = bad_states)
			if i ==0: 
				apprentice.visualise_reward()			
			results_array.append([results_failure,results_normal,results_slow])
		fn.pickle_saver(results_array,direc+"/"+name+".pkl")
	def __init__(self,discretisation,reward_type,load_saved  = False):
		self.disc = discretisation#discretisation model
		if load_saved == True:
			self.feature_f = fn.pickle_loader("saved/feature_f")
			self.transition = Transition()
			self.transition = fn.pickle_loader("saved/transition_f")
		else:
			self.buildTransitionFunction(1)
			self.buildFeatureFunction()
			fn.pickle_saver(self.feature_f,"saved/feature_f")
			fn.pickle_saver(self.transition,"saved/transition_f")
		self.choose_reward_function(reward_type)
		self.reward_f_initial = self.buildRewardFunction()
		self.reward_f = self.buildRewardFunction()
예제 #4
0
	def __init__(self,discretisation,reward_type,load_saved  = False):
		self.disc = discretisation#discretisation model
		if load_saved == True:
			self.feature_f = fn.pickle_loader("saved/feature_f")
			self.transition = Transition()
			self.transition = fn.pickle_loader("saved/transition_f")
		else:
			self.buildTransitionFunction(1)
			self.buildFeatureFunction()
			fn.pickle_saver(self.feature_f,"saved/feature_f")
			fn.pickle_saver(self.transition,"saved/transition_f")
		self.choose_reward_function(reward_type)
		self.reward_f_initial = self.buildRewardFunction()
		self.reward_f = self.buildRewardFunction()
	def experiment_data_size(expert_feature = toy_problem_simple,apprentice_feature = toy_problem_simple,name = "simple_feature",iterations_per_run=50,steps=15,runs=6):
		direc = "results/aamas"
		#initial_states = [disc.quantityToState([0,0,1,2,2]),disc.quantityToState([0,0,3,4,1]),disc.quantityToState([0,1,2,2,2]),disc.quantityToState([0,0,3,2,1])]
		#test_states =[disc.quantityToState([0,0,2,2,1]),disc.quantityToState([0,0,2,4,2]),disc.quantityToState([0,0,3,1,3]),disc.quantityToState([0,0,3,2,1])]
		fn.make_dir(direc+"/"+name)
		results_array = []
		disc = DiscModel(target = [4,4],boundaries = [4,4],feature = expert_feature)
		disc_a = DiscModel(target = [4,4],boundaries = [4,4],feature = apprentice_feature)
		training_sizes = [2,5,25,50,100]
		fail = np.zeros([len(training_sizes),runs]);normal = np.zeros([len(training_sizes),runs]);slow = np.zeros([len(training_sizes),runs])
		if expert_feature != apprentice_feature:
			expert_2_test = Model(disc,"obstacle2", load_saved = False)
			expert_1_test = Model(disc,"avoid_reach", load_saved = True)
			expert2 = Model(disc_a,"obstacle2", load_saved = False)
			expert2.reward_f = expert_2_test.reward_f
			expert1 = Model(disc_a,"avoid_reach", load_saved = True)
			expert1.reward_f = expert_1_test.reward_f
		else:
			expert2 = Model(disc,"obstacle2", load_saved = False)
			expert1 = Model(disc,"avoid_reach", load_saved = True)
		test_states = np.random.randint(0,disc.tot_states,10)
		bad_states = np.random.randint(0,disc.tot_states,5)	
		for enn,size in enumerate(training_sizes):
			print "SIZE=",size
			print "============================================================================"
			for n,i in enumerate(range(runs)):
				print "RUN",i
				apprentice = Model(disc_a,"dual_reward", load_saved = True)
				#initial_states = np.random.randint(0,disc.tot_states,5)
				initial_states = np.random.randint(0,disc.tot_states,size)
				results_failure = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "L1",initial_bad_states = bad_states)
				fail[enn,i] = results_failure.e_on_e - results_failure.a_o_e[-1]
				apprentice = Model(disc_a,"uniform", load_saved = True)
				results_normal = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "false",initial_bad_states = bad_states)
				normal[enn,i] = results_normal.e_on_e - results_normal.a_o_e[-1]
				apprentice = Model(disc_a,"dual_reward", load_saved = True)
				results_slow = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "slow",initial_bad_states = bad_states)
				slow[enn,i] = results_slow.e_on_e - results_slow.a_o_e[-1]
				results_array.append([results_failure,results_normal,results_slow])
		fn.pickle_saver((results_array,fail,normal,slow),direc+"/"+name+".pkl")
	def experiment_data_size(expert_feature = toy_problem_simple,apprentice_feature = toy_problem_simple,name = "simple_feature",iterations_per_run=50,steps=15,runs=6):
		direc = "results/aamas"
		#initial_states = [disc.quantityToState([0,0,1,2,2]),disc.quantityToState([0,0,3,4,1]),disc.quantityToState([0,1,2,2,2]),disc.quantityToState([0,0,3,2,1])]
		#test_states =[disc.quantityToState([0,0,2,2,1]),disc.quantityToState([0,0,2,4,2]),disc.quantityToState([0,0,3,1,3]),disc.quantityToState([0,0,3,2,1])]
		fn.make_dir(direc+"/"+name)
		results_array = []
		disc = DiscModel(target = [4,4],boundaries = [4,4],feature = expert_feature)
		disc_a = DiscModel(target = [4,4],boundaries = [4,4],feature = apprentice_feature)
		training_sizes = [2,5,25,50,100]
		fail = np.zeros([len(training_sizes),runs]);normal = np.zeros([len(training_sizes),runs]);slow = np.zeros([len(training_sizes),runs])
		if expert_feature != apprentice_feature:
			expert_2_test = Model(disc,"obstacle2", load_saved = False)
			expert_1_test = Model(disc,"avoid_reach", load_saved = True)
			expert2 = Model(disc_a,"obstacle2", load_saved = False)
			expert2.reward_f = expert_2_test.reward_f
			expert1 = Model(disc_a,"avoid_reach", load_saved = True)
			expert1.reward_f = expert_1_test.reward_f
		else:
			expert2 = Model(disc,"obstacle2", load_saved = False)
			expert1 = Model(disc,"avoid_reach", load_saved = True)
		test_states = np.random.randint(0,disc.tot_states,10)
		bad_states = np.random.randint(0,disc.tot_states,5)	
		for enn,size in enumerate(training_sizes):
			print "SIZE=",size
			print "============================================================================"
			for n,i in enumerate(range(runs)):
				print "RUN",i
				apprentice = Model(disc_a,"dual_reward", load_saved = True)
				#initial_states = np.random.randint(0,disc.tot_states,5)
				initial_states = np.random.randint(0,disc.tot_states,size)
				results_failure = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "sign",initial_bad_states = bad_states)
				fail[enn,i] = results_failure.e_on_e - results_failure.a_o_e[-1]
				apprentice = Model(disc_a,"uniform", load_saved = True)
				results_normal = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "false",initial_bad_states = bad_states)
				normal[enn,i] = results_normal.e_on_e - results_normal.a_o_e[-1]
				apprentice = Model(disc_a,"dual_reward", load_saved = True)
				results_slow = learn_from_failure(expert1,expert2,apprentice,iterations_per_run,steps,initial_states,test_states,failure = "L1",initial_bad_states = bad_states)
				slow[enn,i] = results_slow.e_on_e - results_slow.a_o_e[-1]
				results_array.append([results_failure,results_normal,results_slow])
		fn.pickle_saver((results_array,fail,normal,slow),direc+"/"+name+".pkl")