def evaluate_pong_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0 desired_score = 2000.0 brain_speed = 5 trials = 100 output_count = 3 for i in range(trials): env = gym.make('Pong-ram-v0') test_instance = copy.deepcopy(test_brain) observations = env.reset() score = 0 while 1: #score += 1 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: env.render() output = [0] * output_count #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i]) brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i]) raw_output = [] for i in range(brain_speed): raw_output.append ( test_instance.advance(observations, 3)) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: visualization.visualize_brain(brain.print_brain_to_json(test_instance)) for i in range(output_count): for c in range(brain_speed): output[i] += raw_output[c][i] output[i] = int(output[i] > int(brain_speed/2)) action = min(utils.binary_array_to_decimal(output), 5) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations,reward,done,info = env.step(action) score += 1#reward if done: best_score += score #+ 21 env.close() break return ((best_score)/(desired_score* trials)) * 100
def test_mutation_params_default_1(self): param_instance = brain.Mutation_params() param_instance.swap_prob = 999 param_instance.set_mutation_to_default_1() self.assertEqual(brain.Mutation_params().neuron_start_count, 1) self.assertEqual(param_instance.swap_prob, .1) self.assertEqual(param_instance.neuron_count_prob, .5) self.assertEqual(param_instance.neuron_count_bias, .5) self.assertEqual(param_instance.target_limit, 5) self.assertEqual(param_instance.target_count_prob, .25) self.assertEqual(param_instance.target_count_bias, .5) self.assertEqual(param_instance.retarget_prob, .25) self.assertEqual(param_instance.potential_prob, .1) self.assertEqual(param_instance.potential_strength, .1) self.assertEqual(param_instance.threshold_prob, .1) self.assertEqual(param_instance.threshold_strength, .1) self.assertEqual(param_instance.reflex_pair_prob, 0) self.assertEqual(param_instance.input_count, 10) self.assertEqual(param_instance.output_count, 10) self.assertEqual(param_instance.sensory_prob, .25) self.assertEqual(param_instance.actuating_prob, .25) self.assertEqual(param_instance.hidden_prob, .1) self.assertEqual(param_instance.mutation_cycles, 1) assert len(param_instance.upper_input_bounds) is 0 assert len(param_instance.lower_input_bounds) is 0
def advance_agents(self, visualization_mode): ## get randomly ordered list of agents, sense, run, harvest actuation and publish to action_queue agent_keys = list(self.agents.keys()) shuffle(agent_keys) for key in agent_keys: #useful to note here that each 'key' is a tuple containing agent location in (x,y) format agent = self.agents[key] assert (self.grid[key[1]][key[0]] == Object_type.AGENT) agent.energy -= 1 #sense observations = self.sense(key, agent.direction) output = [] for i in range( len(observations) ): ## setting our bounds appropriately for threshold mutations brain.Mutation_params().upper_input_bounds[i] = max( brain.Mutation_params().upper_input_bounds[i], observations[i]) brain.Mutation_params().lower_input_bounds[i] = min( brain.Mutation_params().lower_input_bounds[i], observations[i]) result = agent.brain.advance_n_with_mode( observations, brain.Mutation_params.output_count, 10, visualization_mode) numerical_result = utils.binary_array_to_decimal(result) # print(result) # print(numerical_result) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print(result) print(numerical_result) agent.generate_action(numerical_result, self, key)
def population_learn(existing_brain, eval_function): population_size = brain.Mutation_params().population_size input_size = brain.Mutation_params().input_count output_size = brain.Mutation_params().output_count population = [] if existing_brain == None: for i in range(population_size): population.append( [0,brain.Brain()] ) else: for i in range(population_size): population.append( [0,copy.deepcopy(existing_brain)] ) best_score = float("-inf") best_brain = None while best_score < 100: for i in range(len(population)): test_instance = copy.deepcopy(population[i][1]) population[i][0] = eval_function(test_instance, visualization.Visualization_flags.VISUALIZATION_OFF) if population[i][0] >= best_score: best_score = population[i][0] best_brain = population[i][1] #print(len(population)) population = sorted(population, key= lambda x : -x[0]) population = population[:population_size] #print(len(population)) print('\nSCORES: '), for p in population: print(str(p[0]) + ',' ) , for i in range(population_size): new_pair = [randrange(population_size), randrange(population_size)] while new_pair[0] == new_pair[1]: new_pair[1] = randrange(population_size) #print(population[new_pair[0]][1]) #print(population[new_pair[1]][1]) new_offspring = brain.cross_over(population[new_pair[0]][1], population[new_pair[1]][1]) new_offspring.verify_network_consistency() new_offspring.default_mutation(input_size, output_size) population.append([0,new_offspring])
def evaluate_chopper_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0.0 desired_score = 1000 trials = 5 output_count = 5 brain_speed = 5 for i in range(trials): env = gym.make('ChopperCommand-ram-v0') observations = env.reset() score = 0 for c in range(1000): #score += 1 if visualization_mode == Learning_flags.VISUALIZATION_ON: env.render() output = [0] * 5 #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max( brain.Mutation_params().upper_input_bounds[i], observations[i]) brain.Mutation_params().lower_input_bounds[i] = min( brain.Mutation_params().lower_input_bounds[i], observations[i]) raw_output = [] for i in range(brain_speed): raw_output.append(test_brain.advance(observations, 5)) if visualization_mode == Learning_flags.VISUALIZATION_ON: visualization.visualize_brain( brain.print_brain_to_json(test_brain)) for i in range(output_count): for c in range(brain_speed): output[i] += raw_output[c][i] output[i] = int(output[i] > int(output_count / 2)) action = min(utils.binary_array_to_decimal(output), 17) if visualization_mode == Learning_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations, reward, done, info = env.step(action) score += reward if done: break best_score += score env.close() return (best_score / (desired_score * trials)) * 100
def learn(existing_brain, eval_function): input_size = brain.Mutation_params().input_count output_size = brain.Mutation_params().output_count if existing_brain != None: best_brain = existing_brain else: best_brain = brain.Brain() benchmark_instance = copy.deepcopy(best_brain) best_score = eval_function(benchmark_instance, visualization.Visualization_flags.VISUALIZATION_OFF) print('NEW BEST SCORE: ' + str(best_score)) counter = 0 average = 0 while best_score < 100: counter += 1 score = 0 mutant = copy.deepcopy(best_brain) for i in range(1): mutant.default_mutation(input_size,output_size) test_instance = copy.deepcopy(mutant) score = eval_function(test_instance, visualization.Visualization_flags.VISUALIZATION_OFF) average += score if ((counter % 100) == 0): print ('LAST 100 AVERAGE: ' + str(average/100)) average = 0 if score >= best_score: print('NEW BEST SCORE: ' + str(score)) brain.print_brain_to_file(mutant) best_score = score best_brain = copy.deepcopy(mutant) return best_brain
def evalute_pendulum_cart_performance(test_brain, visualization_mode): total_score = 0.0 desired_score = 200 trials = 100 for c in range(trials): env = gym.make('CartPole-v0') observations = env.reset() test_instance = copy.deepcopy(test_brain) score = 0 while True:#for h in range(desired_score): #score += 1 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: env.render() output = [0] * 3 for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i]) brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i]) sum = 0 for i in range(5): sum += test_instance.advance(observations, 1)[0] if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: visualization.visualize_brain(brain.print_brain_to_json(test_instance)) action = int(sum >=3) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations,reward,done,info = env.step(action) score += reward if done: total_score += score break env.close() return (total_score / (trials * desired_score)) * 100
def evaluate_space_invaders_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0.0 desired_score = 500 trials = 100 for i in range(trials): env = gym.make('SpaceInvaders-ram-v0') observations = env.reset() score = 0 while 1: #score += 1 if visualization_mode == Learning_flags.VISUALIZATION_ON: env.render() output = [0] * 3 #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max( brain.Mutation_params().upper_input_bounds[i], observations[i]) brain.Mutation_params().lower_input_bounds[i] = min( brain.Mutation_params().lower_input_bounds[i], observations[i]) for i in range(1): output = test_brain.advance(observations, 3) action = min(utils.binary_array_to_decimal(output), 5) if visualization_mode == Learning_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations, reward, done, info = env.step(action) score += reward if done: best_score += score env.close() break return (best_score / (desired_score * trials)) * 100
def learn(eval_function): input_size = brain.Mutation_params().input_count output_size = brain.Mutation_params().output_count best_brain = brain.Brain(1) best_score = 0 counter = 0 average = 0 while best_score < 100: counter += 1 score = 0 mutant = copy.deepcopy(best_brain) for i in range(1): mutant.default_mutation(input_size, output_size) test_instance = copy.deepcopy(mutant) score = eval_function(test_instance, Learning_flags.VISUALIZATION_OFF) #print(score) average += score if ((counter % 100) == 0): print('LAST 100 AVERAGE: ' + str(average / 100)) average = 0 if score >= best_score: print('NEW BEST SCORE: ' + str(score)) brain.print_brain_to_file(mutant) best_score = score best_brain = copy.deepcopy(mutant) return best_brain
def impatient_learn(existing_brain, eval_function): input_size = brain.Mutation_params().input_count output_size = brain.Mutation_params().output_count if existing_brain != None: best_brain = existing_brain else: best_brain = brain.Brain() benchmark_instance = copy.deepcopy(best_brain) best_score = eval_function(benchmark_instance, visualization.Visualization_flags.VISUALIZATION_OFF) print('NEW BEST SCORE: ' + str(best_score)) counter = 0 average = 0 chaos = 1.0 chaos_ceiling = 5.0 while best_score < 100: counter += 1 score = 0 mutant = copy.deepcopy(best_brain) for i in range(1): mutant.default_mutation(input_size,output_size) test_instance = copy.deepcopy(mutant) score = eval_function(test_instance, visualization.Visualization_flags.VISUALIZATION_OFF) average += score if ((counter % 100) == 0): print ('LAST 100 AVERAGE: ' + str(average/100)) print('CHAOS: ' + str(chaos)) #if uniform(0,1) > .5: # print('SUPRESSING MUTATION') # brain.Mutation_params().supress_mutation() # else: # print('AMPLIFYING MUTATION') # brain.Mutation_params().amplify_mutation() average = 0 if score >= best_score: chaos = 1 print('NEW BEST SCORE: ' + str(score)) brain.print_brain_to_file(mutant) best_score = score best_brain = copy.deepcopy(mutant) elif chaos < chaos_ceiling: chaos += .01 brain.Mutation_params().mutation_cycles = randrange(int(chaos)) + 1 return best_brain
def evaluate_biped_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0 desired_score = 1000 trials = 100 output_count = 8 brain_speed = 5 for i in range(trials): env = gym.make('BipedalWalker-v2') test_instance = copy.deepcopy(test_brain) observations = env.reset() score = 0 counter = 0 while counter < 1000: counter += 1 #score += 1 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: env.render() output = [0] * 4 #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i]) brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i]) raw_output = [] for i in range(brain_speed): raw_output.append ( test_instance.advance(observations, output_count)) #if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: # visualization.visualize_brain(brain.print_brain_to_json(test_instance)) for i in range(int(output_count/2)): for c in range(brain_speed): output[i] += raw_output[c][i] output[i] -= raw_output[c][i + int(output_count/2)] output[i] = (output[i]/float(brain_speed)) action = output if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print(counter) print('ACTION: ' + str(action)) observations,reward,done,info = env.step(action) #if reward > 0: score += reward if done: break best_score += score env.close() return ((best_score/(desired_score* trials)) * 100)
def test_mutation_params_constructor(self): self.assertEqual(brain.Mutation_params().neuron_start_count, 1) self.assertEqual(brain.Mutation_params().swap_prob, .1) self.assertEqual(brain.Mutation_params().neuron_count_prob, .5) self.assertEqual(brain.Mutation_params().neuron_count_bias, .5) self.assertEqual(brain.Mutation_params().target_limit, 5) self.assertEqual(brain.Mutation_params().target_count_prob, .25) self.assertEqual(brain.Mutation_params().target_count_bias, .5) self.assertEqual(brain.Mutation_params().retarget_prob, .25) self.assertEqual(brain.Mutation_params().potential_prob, .1) self.assertEqual(brain.Mutation_params().potential_strength, .1) self.assertEqual(brain.Mutation_params().threshold_prob, .1) self.assertEqual(brain.Mutation_params().threshold_strength, .1) self.assertEqual(brain.Mutation_params().reflex_pair_prob, .1) self.assertEqual(brain.Mutation_params().input_count, 10) self.assertEqual(brain.Mutation_params().output_count, 10) self.assertEqual(brain.Mutation_params().sensory_prob, .25) self.assertEqual(brain.Mutation_params().actuating_prob, .25) self.assertEqual(brain.Mutation_params().hidden_prob, .1) self.assertEqual(brain.Mutation_params().mutation_cycles, 1) assert len(brain.Mutation_params().upper_input_bounds) is 0 assert len(brain.Mutation_params().lower_input_bounds) is 0