Ejemplo n.º 1
0
 def test_binary_array_to_decimal(self):
     input = [1,0,1,0,0,1]
     expected_output = 41
     self.assertEqual(utils.binary_array_to_decimal(input), expected_output)
     input = [1,0,1,0,1,0]
     expected_output = 42
     self.assertEqual(utils.binary_array_to_decimal(input), expected_output)
     input = [1,1,0,0,1,1]
     expected_output = 51
     self.assertEqual(utils.binary_array_to_decimal(input), expected_output)
Ejemplo n.º 2
0
def evaluate_pong_performance(test_brain, visualization_mode):

	#top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]
	


	best_score = 0
	desired_score = 2000.0
	brain_speed = 5
	trials = 100
	output_count = 3
	for i in range(trials):
		env = gym.make('Pong-ram-v0')
		test_instance = copy.deepcopy(test_brain)
		observations = env.reset()
		score = 0
		while 1:
			
			#score += 1
			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				env.render()



			output = [0] * output_count
			#inputs = utils.extract_observations(top_indices, observations)
		

			for i in range(len(observations)):
				brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i])
				brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i])


			raw_output = []
			for i in range(brain_speed):
				raw_output.append ( test_instance.advance(observations, 3))
				if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: 
					visualization.visualize_brain(brain.print_brain_to_json(test_instance))	

			for i in range(output_count):
				for c in range(brain_speed):	
					output[i] += raw_output[c][i]
				output[i] = int(output[i] > int(brain_speed/2))	



			action = min(utils.binary_array_to_decimal(output), 5)

			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				print('ACTION: ' + str(action))
		
			observations,reward,done,info = env.step(action)
			score += 1#reward
			if done:
				best_score += score #+ 21
				env.close()
				break
		

	return ((best_score)/(desired_score* trials)) * 100
Ejemplo n.º 3
0
    def advance_agents(self, visualization_mode):
        ## get randomly ordered list of agents, sense, run, harvest actuation and publish to action_queue
        agent_keys = list(self.agents.keys())
        shuffle(agent_keys)
        for key in agent_keys:  #useful to note here that each 'key' is a tuple containing agent location in (x,y) format
            agent = self.agents[key]
            assert (self.grid[key[1]][key[0]] == Object_type.AGENT)
            agent.energy -= 1
            #sense
            observations = self.sense(key, agent.direction)
            output = []
            for i in range(
                    len(observations)
            ):  ## setting our bounds appropriately for threshold mutations
                brain.Mutation_params().upper_input_bounds[i] = max(
                    brain.Mutation_params().upper_input_bounds[i],
                    observations[i])
                brain.Mutation_params().lower_input_bounds[i] = min(
                    brain.Mutation_params().lower_input_bounds[i],
                    observations[i])

            result = agent.brain.advance_n_with_mode(
                observations, brain.Mutation_params.output_count, 10,
                visualization_mode)

            numerical_result = utils.binary_array_to_decimal(result)
            #	print(result)
            #	print(numerical_result)
            if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
                print(result)
                print(numerical_result)
            agent.generate_action(numerical_result, self, key)
Ejemplo n.º 4
0
def evaluate_chopper_performance(test_brain, visualization_mode):

    #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]

    best_score = 0.0
    desired_score = 1000
    trials = 5
    output_count = 5
    brain_speed = 5
    for i in range(trials):
        env = gym.make('ChopperCommand-ram-v0')
        observations = env.reset()
        score = 0
        for c in range(1000):
            #score += 1
            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                env.render()

            output = [0] * 5
            #inputs = utils.extract_observations(top_indices, observations)

            for i in range(len(observations)):
                brain.Mutation_params().upper_input_bounds[i] = max(
                    brain.Mutation_params().upper_input_bounds[i],
                    observations[i])
                brain.Mutation_params().lower_input_bounds[i] = min(
                    brain.Mutation_params().lower_input_bounds[i],
                    observations[i])

            raw_output = []
            for i in range(brain_speed):
                raw_output.append(test_brain.advance(observations, 5))
                if visualization_mode == Learning_flags.VISUALIZATION_ON:
                    visualization.visualize_brain(
                        brain.print_brain_to_json(test_brain))

            for i in range(output_count):
                for c in range(brain_speed):
                    output[i] += raw_output[c][i]
                output[i] = int(output[i] > int(output_count / 2))

            action = min(utils.binary_array_to_decimal(output), 17)

            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                print('ACTION: ' + str(action))

            observations, reward, done, info = env.step(action)
            score += reward
            if done:
                break
        best_score += score
        env.close()
    return (best_score / (desired_score * trials)) * 100
Ejemplo n.º 5
0
def evaluate_space_invaders_performance(test_brain, visualization_mode):

    #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]

    best_score = 0.0
    desired_score = 500
    trials = 100
    for i in range(trials):
        env = gym.make('SpaceInvaders-ram-v0')
        observations = env.reset()
        score = 0
        while 1:
            #score += 1
            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                env.render()

            output = [0] * 3
            #inputs = utils.extract_observations(top_indices, observations)

            for i in range(len(observations)):
                brain.Mutation_params().upper_input_bounds[i] = max(
                    brain.Mutation_params().upper_input_bounds[i],
                    observations[i])
                brain.Mutation_params().lower_input_bounds[i] = min(
                    brain.Mutation_params().lower_input_bounds[i],
                    observations[i])

            for i in range(1):
                output = test_brain.advance(observations, 3)

            action = min(utils.binary_array_to_decimal(output), 5)

            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                print('ACTION: ' + str(action))

            observations, reward, done, info = env.step(action)
            score += reward
            if done:
                best_score += score
                env.close()
                break

    return (best_score / (desired_score * trials)) * 100