def test_binary_array_to_decimal(self): input = [1,0,1,0,0,1] expected_output = 41 self.assertEqual(utils.binary_array_to_decimal(input), expected_output) input = [1,0,1,0,1,0] expected_output = 42 self.assertEqual(utils.binary_array_to_decimal(input), expected_output) input = [1,1,0,0,1,1] expected_output = 51 self.assertEqual(utils.binary_array_to_decimal(input), expected_output)
def evaluate_pong_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0 desired_score = 2000.0 brain_speed = 5 trials = 100 output_count = 3 for i in range(trials): env = gym.make('Pong-ram-v0') test_instance = copy.deepcopy(test_brain) observations = env.reset() score = 0 while 1: #score += 1 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: env.render() output = [0] * output_count #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i]) brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i]) raw_output = [] for i in range(brain_speed): raw_output.append ( test_instance.advance(observations, 3)) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: visualization.visualize_brain(brain.print_brain_to_json(test_instance)) for i in range(output_count): for c in range(brain_speed): output[i] += raw_output[c][i] output[i] = int(output[i] > int(brain_speed/2)) action = min(utils.binary_array_to_decimal(output), 5) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations,reward,done,info = env.step(action) score += 1#reward if done: best_score += score #+ 21 env.close() break return ((best_score)/(desired_score* trials)) * 100
def advance_agents(self, visualization_mode): ## get randomly ordered list of agents, sense, run, harvest actuation and publish to action_queue agent_keys = list(self.agents.keys()) shuffle(agent_keys) for key in agent_keys: #useful to note here that each 'key' is a tuple containing agent location in (x,y) format agent = self.agents[key] assert (self.grid[key[1]][key[0]] == Object_type.AGENT) agent.energy -= 1 #sense observations = self.sense(key, agent.direction) output = [] for i in range( len(observations) ): ## setting our bounds appropriately for threshold mutations brain.Mutation_params().upper_input_bounds[i] = max( brain.Mutation_params().upper_input_bounds[i], observations[i]) brain.Mutation_params().lower_input_bounds[i] = min( brain.Mutation_params().lower_input_bounds[i], observations[i]) result = agent.brain.advance_n_with_mode( observations, brain.Mutation_params.output_count, 10, visualization_mode) numerical_result = utils.binary_array_to_decimal(result) # print(result) # print(numerical_result) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print(result) print(numerical_result) agent.generate_action(numerical_result, self, key)
def evaluate_chopper_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0.0 desired_score = 1000 trials = 5 output_count = 5 brain_speed = 5 for i in range(trials): env = gym.make('ChopperCommand-ram-v0') observations = env.reset() score = 0 for c in range(1000): #score += 1 if visualization_mode == Learning_flags.VISUALIZATION_ON: env.render() output = [0] * 5 #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max( brain.Mutation_params().upper_input_bounds[i], observations[i]) brain.Mutation_params().lower_input_bounds[i] = min( brain.Mutation_params().lower_input_bounds[i], observations[i]) raw_output = [] for i in range(brain_speed): raw_output.append(test_brain.advance(observations, 5)) if visualization_mode == Learning_flags.VISUALIZATION_ON: visualization.visualize_brain( brain.print_brain_to_json(test_brain)) for i in range(output_count): for c in range(brain_speed): output[i] += raw_output[c][i] output[i] = int(output[i] > int(output_count / 2)) action = min(utils.binary_array_to_decimal(output), 17) if visualization_mode == Learning_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations, reward, done, info = env.step(action) score += reward if done: break best_score += score env.close() return (best_score / (desired_score * trials)) * 100
def evaluate_space_invaders_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0.0 desired_score = 500 trials = 100 for i in range(trials): env = gym.make('SpaceInvaders-ram-v0') observations = env.reset() score = 0 while 1: #score += 1 if visualization_mode == Learning_flags.VISUALIZATION_ON: env.render() output = [0] * 3 #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max( brain.Mutation_params().upper_input_bounds[i], observations[i]) brain.Mutation_params().lower_input_bounds[i] = min( brain.Mutation_params().lower_input_bounds[i], observations[i]) for i in range(1): output = test_brain.advance(observations, 3) action = min(utils.binary_array_to_decimal(output), 5) if visualization_mode == Learning_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations, reward, done, info = env.step(action) score += reward if done: best_score += score env.close() break return (best_score / (desired_score * trials)) * 100