Exemple #1
0
def evaluate_pong_performance(test_brain, visualization_mode):

	#top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]
	


	best_score = 0
	desired_score = 2000.0
	brain_speed = 5
	trials = 100
	output_count = 3
	for i in range(trials):
		env = gym.make('Pong-ram-v0')
		test_instance = copy.deepcopy(test_brain)
		observations = env.reset()
		score = 0
		while 1:
			
			#score += 1
			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				env.render()



			output = [0] * output_count
			#inputs = utils.extract_observations(top_indices, observations)
		

			for i in range(len(observations)):
				brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i])
				brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i])


			raw_output = []
			for i in range(brain_speed):
				raw_output.append ( test_instance.advance(observations, 3))
				if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: 
					visualization.visualize_brain(brain.print_brain_to_json(test_instance))	

			for i in range(output_count):
				for c in range(brain_speed):	
					output[i] += raw_output[c][i]
				output[i] = int(output[i] > int(brain_speed/2))	



			action = min(utils.binary_array_to_decimal(output), 5)

			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				print('ACTION: ' + str(action))
		
			observations,reward,done,info = env.step(action)
			score += 1#reward
			if done:
				best_score += score #+ 21
				env.close()
				break
		

	return ((best_score)/(desired_score* trials)) * 100
    def test_mutation_params_default_1(self):

        param_instance = brain.Mutation_params()
        param_instance.swap_prob = 999
        param_instance.set_mutation_to_default_1()
        self.assertEqual(brain.Mutation_params().neuron_start_count, 1)
        self.assertEqual(param_instance.swap_prob, .1)
        self.assertEqual(param_instance.neuron_count_prob, .5)
        self.assertEqual(param_instance.neuron_count_bias, .5)
        self.assertEqual(param_instance.target_limit, 5)
        self.assertEqual(param_instance.target_count_prob, .25)
        self.assertEqual(param_instance.target_count_bias, .5)
        self.assertEqual(param_instance.retarget_prob, .25)
        self.assertEqual(param_instance.potential_prob, .1)
        self.assertEqual(param_instance.potential_strength, .1)
        self.assertEqual(param_instance.threshold_prob, .1)
        self.assertEqual(param_instance.threshold_strength, .1)
        self.assertEqual(param_instance.reflex_pair_prob, 0)
        self.assertEqual(param_instance.input_count, 10)
        self.assertEqual(param_instance.output_count, 10)

        self.assertEqual(param_instance.sensory_prob, .25)
        self.assertEqual(param_instance.actuating_prob, .25)
        self.assertEqual(param_instance.hidden_prob, .1)

        self.assertEqual(param_instance.mutation_cycles, 1)
        assert len(param_instance.upper_input_bounds) is 0
        assert len(param_instance.lower_input_bounds) is 0
Exemple #3
0
    def advance_agents(self, visualization_mode):
        ## get randomly ordered list of agents, sense, run, harvest actuation and publish to action_queue
        agent_keys = list(self.agents.keys())
        shuffle(agent_keys)
        for key in agent_keys:  #useful to note here that each 'key' is a tuple containing agent location in (x,y) format
            agent = self.agents[key]
            assert (self.grid[key[1]][key[0]] == Object_type.AGENT)
            agent.energy -= 1
            #sense
            observations = self.sense(key, agent.direction)
            output = []
            for i in range(
                    len(observations)
            ):  ## setting our bounds appropriately for threshold mutations
                brain.Mutation_params().upper_input_bounds[i] = max(
                    brain.Mutation_params().upper_input_bounds[i],
                    observations[i])
                brain.Mutation_params().lower_input_bounds[i] = min(
                    brain.Mutation_params().lower_input_bounds[i],
                    observations[i])

            result = agent.brain.advance_n_with_mode(
                observations, brain.Mutation_params.output_count, 10,
                visualization_mode)

            numerical_result = utils.binary_array_to_decimal(result)
            #	print(result)
            #	print(numerical_result)
            if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
                print(result)
                print(numerical_result)
            agent.generate_action(numerical_result, self, key)
Exemple #4
0
def population_learn(existing_brain, eval_function):
	
	population_size  = brain.Mutation_params().population_size
	input_size = brain.Mutation_params().input_count
	output_size = brain.Mutation_params().output_count


	population = []

	if existing_brain == None:
		for i in range(population_size):
			population.append( [0,brain.Brain()] )
	else:
		for i in range(population_size):
			population.append( [0,copy.deepcopy(existing_brain)] )

	
	best_score = float("-inf")
	best_brain = None


	while best_score < 100:

		for i in range(len(population)):
			test_instance = copy.deepcopy(population[i][1])
			population[i][0] = eval_function(test_instance,  visualization.Visualization_flags.VISUALIZATION_OFF) 
			if population[i][0] >= best_score:
				best_score = population[i][0]
				best_brain = population[i][1]
		
		#print(len(population))
		population = sorted(population, key= lambda x : -x[0])
		population = population[:population_size]
		#print(len(population))

		print('\nSCORES: '),
		for p in population:
			print(str(p[0]) + ',' ) ,

		for i in range(population_size):

			new_pair = [randrange(population_size), randrange(population_size)]
			while new_pair[0] == new_pair[1]:

				new_pair[1] = randrange(population_size)

	
			
			#print(population[new_pair[0]][1])
			#print(population[new_pair[1]][1])
			new_offspring = brain.cross_over(population[new_pair[0]][1], population[new_pair[1]][1])
			new_offspring.verify_network_consistency()		
			new_offspring.default_mutation(input_size, output_size)
			population.append([0,new_offspring])
Exemple #5
0
def evaluate_chopper_performance(test_brain, visualization_mode):

    #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]

    best_score = 0.0
    desired_score = 1000
    trials = 5
    output_count = 5
    brain_speed = 5
    for i in range(trials):
        env = gym.make('ChopperCommand-ram-v0')
        observations = env.reset()
        score = 0
        for c in range(1000):
            #score += 1
            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                env.render()

            output = [0] * 5
            #inputs = utils.extract_observations(top_indices, observations)

            for i in range(len(observations)):
                brain.Mutation_params().upper_input_bounds[i] = max(
                    brain.Mutation_params().upper_input_bounds[i],
                    observations[i])
                brain.Mutation_params().lower_input_bounds[i] = min(
                    brain.Mutation_params().lower_input_bounds[i],
                    observations[i])

            raw_output = []
            for i in range(brain_speed):
                raw_output.append(test_brain.advance(observations, 5))
                if visualization_mode == Learning_flags.VISUALIZATION_ON:
                    visualization.visualize_brain(
                        brain.print_brain_to_json(test_brain))

            for i in range(output_count):
                for c in range(brain_speed):
                    output[i] += raw_output[c][i]
                output[i] = int(output[i] > int(output_count / 2))

            action = min(utils.binary_array_to_decimal(output), 17)

            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                print('ACTION: ' + str(action))

            observations, reward, done, info = env.step(action)
            score += reward
            if done:
                break
        best_score += score
        env.close()
    return (best_score / (desired_score * trials)) * 100
Exemple #6
0
def learn(existing_brain, eval_function):




	input_size = brain.Mutation_params().input_count
	output_size = brain.Mutation_params().output_count

	if existing_brain != None:
		best_brain = existing_brain
	else:
		best_brain = brain.Brain()
	benchmark_instance = copy.deepcopy(best_brain)
	best_score = eval_function(benchmark_instance, visualization.Visualization_flags.VISUALIZATION_OFF)
	print('NEW BEST SCORE: ' + str(best_score))
	
	counter = 0
	average = 0	
	
	while best_score < 100:

		counter += 1
		score = 0
		
		mutant = copy.deepcopy(best_brain) 


		for i in range(1):
			mutant.default_mutation(input_size,output_size)

		test_instance = copy.deepcopy(mutant)

		score = eval_function(test_instance, visualization.Visualization_flags.VISUALIZATION_OFF)
		
		average += score
		if ((counter % 100) == 0):
			print ('LAST 100 AVERAGE: ' + str(average/100))		
			average = 0
		if score >= best_score:
			print('NEW BEST SCORE: ' + str(score))
			brain.print_brain_to_file(mutant)

			best_score = score

			best_brain = copy.deepcopy(mutant)

	

	return best_brain
Exemple #7
0
def evalute_pendulum_cart_performance(test_brain, visualization_mode):
	
	
	total_score = 0.0
	desired_score = 200
	trials = 100
	for c in range(trials):
		env = gym.make('CartPole-v0')
		observations = env.reset()
		test_instance = copy.deepcopy(test_brain)
		score = 0
		while True:#for h in range(desired_score):
			#score += 1
			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				env.render()


			output = [0] * 3

		

			for i in range(len(observations)):
				brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i])
				brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i])


			sum = 0
			for i in range(5):
				sum += test_instance.advance(observations, 1)[0]
				if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: 
					visualization.visualize_brain(brain.print_brain_to_json(test_instance))	
			action = int(sum >=3)

			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				print('ACTION: ' + str(action))
				

		
		
			observations,reward,done,info = env.step(action)
			score += reward
			if done:
				total_score += score
				break
		env.close()
	return (total_score / (trials * desired_score)) * 100
Exemple #8
0
def evaluate_space_invaders_performance(test_brain, visualization_mode):

    #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]

    best_score = 0.0
    desired_score = 500
    trials = 100
    for i in range(trials):
        env = gym.make('SpaceInvaders-ram-v0')
        observations = env.reset()
        score = 0
        while 1:
            #score += 1
            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                env.render()

            output = [0] * 3
            #inputs = utils.extract_observations(top_indices, observations)

            for i in range(len(observations)):
                brain.Mutation_params().upper_input_bounds[i] = max(
                    brain.Mutation_params().upper_input_bounds[i],
                    observations[i])
                brain.Mutation_params().lower_input_bounds[i] = min(
                    brain.Mutation_params().lower_input_bounds[i],
                    observations[i])

            for i in range(1):
                output = test_brain.advance(observations, 3)

            action = min(utils.binary_array_to_decimal(output), 5)

            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                print('ACTION: ' + str(action))

            observations, reward, done, info = env.step(action)
            score += reward
            if done:
                best_score += score
                env.close()
                break

    return (best_score / (desired_score * trials)) * 100
Exemple #9
0
def learn(eval_function):

    input_size = brain.Mutation_params().input_count
    output_size = brain.Mutation_params().output_count

    best_brain = brain.Brain(1)

    best_score = 0

    counter = 0
    average = 0

    while best_score < 100:

        counter += 1
        score = 0

        mutant = copy.deepcopy(best_brain)

        for i in range(1):
            mutant.default_mutation(input_size, output_size)

        test_instance = copy.deepcopy(mutant)

        score = eval_function(test_instance, Learning_flags.VISUALIZATION_OFF)

        #print(score)

        average += score
        if ((counter % 100) == 0):
            print('LAST 100 AVERAGE: ' + str(average / 100))
            average = 0
        if score >= best_score:
            print('NEW BEST SCORE: ' + str(score))
            brain.print_brain_to_file(mutant)

            best_score = score

            best_brain = copy.deepcopy(mutant)

    return best_brain
Exemple #10
0
def impatient_learn(existing_brain, eval_function):




	input_size = brain.Mutation_params().input_count
	output_size = brain.Mutation_params().output_count

	if existing_brain != None:
		best_brain = existing_brain
	else:
		best_brain = brain.Brain()
	benchmark_instance = copy.deepcopy(best_brain)

	best_score = eval_function(benchmark_instance, visualization.Visualization_flags.VISUALIZATION_OFF)
	print('NEW BEST SCORE: ' + str(best_score))
	
	counter = 0
	average = 0	

	chaos = 1.0
	chaos_ceiling = 5.0
	
	while best_score < 100:

		counter += 1
		score = 0
		
		mutant = copy.deepcopy(best_brain) 


		for i in range(1):
			mutant.default_mutation(input_size,output_size)

		test_instance = copy.deepcopy(mutant)

		score = eval_function(test_instance, visualization.Visualization_flags.VISUALIZATION_OFF)
		
		average += score
		if ((counter % 100) == 0):
			print ('LAST 100 AVERAGE: ' + str(average/100))
			print('CHAOS: ' + str(chaos))		
			#if uniform(0,1) > .5:
		#		print('SUPRESSING MUTATION')
		#		brain.Mutation_params().supress_mutation()
		#	else:	
		#		print('AMPLIFYING MUTATION')
		#		brain.Mutation_params().amplify_mutation()
			average = 0
		if score >= best_score:
			chaos = 1
			print('NEW BEST SCORE: ' + str(score))
			brain.print_brain_to_file(mutant)

			best_score = score

			best_brain = copy.deepcopy(mutant)
		elif chaos < chaos_ceiling:
			chaos += .01
		brain.Mutation_params().mutation_cycles = randrange(int(chaos)) + 1
	

	return best_brain
Exemple #11
0
def evaluate_biped_performance(test_brain, visualization_mode):

	#top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]



	best_score = 0
	desired_score = 1000
	trials = 100
	output_count = 8
	brain_speed = 5
	for i in range(trials):
		

		env = gym.make('BipedalWalker-v2')
		test_instance = copy.deepcopy(test_brain)
		observations = env.reset()
		score = 0
		counter = 0
		while counter < 1000:
			counter += 1
			#score += 1
			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				env.render()



			output = [0] * 4
			#inputs = utils.extract_observations(top_indices, observations)
		

			for i in range(len(observations)):
				brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i])
				brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i])

			

			raw_output = []
			for i in range(brain_speed):
				raw_output.append ( test_instance.advance(observations, output_count))
				#if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: 
				#	visualization.visualize_brain(brain.print_brain_to_json(test_instance))	

			for i in range(int(output_count/2)):
				for c in range(brain_speed):	
					output[i] += raw_output[c][i]
					output[i] -= raw_output[c][i + int(output_count/2)]

				output[i] = (output[i]/float(brain_speed))

			action = output
			
	
			



			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				print(counter)
				print('ACTION: ' + str(action))
		
			observations,reward,done,info = env.step(action)
			#if reward > 0:			
			score += reward
			if done:
				break
		best_score += score
		env.close()

	return ((best_score/(desired_score* trials)) * 100)		
    def test_mutation_params_constructor(self):
        self.assertEqual(brain.Mutation_params().neuron_start_count, 1)
        self.assertEqual(brain.Mutation_params().swap_prob, .1)
        self.assertEqual(brain.Mutation_params().neuron_count_prob, .5)
        self.assertEqual(brain.Mutation_params().neuron_count_bias, .5)
        self.assertEqual(brain.Mutation_params().target_limit, 5)
        self.assertEqual(brain.Mutation_params().target_count_prob, .25)
        self.assertEqual(brain.Mutation_params().target_count_bias, .5)
        self.assertEqual(brain.Mutation_params().retarget_prob, .25)
        self.assertEqual(brain.Mutation_params().potential_prob, .1)
        self.assertEqual(brain.Mutation_params().potential_strength, .1)
        self.assertEqual(brain.Mutation_params().threshold_prob, .1)
        self.assertEqual(brain.Mutation_params().threshold_strength, .1)
        self.assertEqual(brain.Mutation_params().reflex_pair_prob, .1)
        self.assertEqual(brain.Mutation_params().input_count, 10)
        self.assertEqual(brain.Mutation_params().output_count, 10)

        self.assertEqual(brain.Mutation_params().sensory_prob, .25)
        self.assertEqual(brain.Mutation_params().actuating_prob, .25)
        self.assertEqual(brain.Mutation_params().hidden_prob, .1)

        self.assertEqual(brain.Mutation_params().mutation_cycles, 1)
        assert len(brain.Mutation_params().upper_input_bounds) is 0
        assert len(brain.Mutation_params().lower_input_bounds) is 0