def evaluate_pong_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0 desired_score = 2000.0 brain_speed = 5 trials = 100 output_count = 3 for i in range(trials): env = gym.make('Pong-ram-v0') test_instance = copy.deepcopy(test_brain) observations = env.reset() score = 0 while 1: #score += 1 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: env.render() output = [0] * output_count #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i]) brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i]) raw_output = [] for i in range(brain_speed): raw_output.append ( test_instance.advance(observations, 3)) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: visualization.visualize_brain(brain.print_brain_to_json(test_instance)) for i in range(output_count): for c in range(brain_speed): output[i] += raw_output[c][i] output[i] = int(output[i] > int(brain_speed/2)) action = min(utils.binary_array_to_decimal(output), 5) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations,reward,done,info = env.step(action) score += 1#reward if done: best_score += score #+ 21 env.close() break return ((best_score)/(desired_score* trials)) * 100
def evaluate_chopper_performance(test_brain, visualization_mode): #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74] best_score = 0.0 desired_score = 1000 trials = 5 output_count = 5 brain_speed = 5 for i in range(trials): env = gym.make('ChopperCommand-ram-v0') observations = env.reset() score = 0 for c in range(1000): #score += 1 if visualization_mode == Learning_flags.VISUALIZATION_ON: env.render() output = [0] * 5 #inputs = utils.extract_observations(top_indices, observations) for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max( brain.Mutation_params().upper_input_bounds[i], observations[i]) brain.Mutation_params().lower_input_bounds[i] = min( brain.Mutation_params().lower_input_bounds[i], observations[i]) raw_output = [] for i in range(brain_speed): raw_output.append(test_brain.advance(observations, 5)) if visualization_mode == Learning_flags.VISUALIZATION_ON: visualization.visualize_brain( brain.print_brain_to_json(test_brain)) for i in range(output_count): for c in range(brain_speed): output[i] += raw_output[c][i] output[i] = int(output[i] > int(output_count / 2)) action = min(utils.binary_array_to_decimal(output), 17) if visualization_mode == Learning_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations, reward, done, info = env.step(action) score += reward if done: break best_score += score env.close() return (best_score / (desired_score * trials)) * 100
def evalute_pendulum_cart_performance(test_brain, visualization_mode): total_score = 0.0 desired_score = 200 trials = 100 for c in range(trials): env = gym.make('CartPole-v0') observations = env.reset() test_instance = copy.deepcopy(test_brain) score = 0 while True:#for h in range(desired_score): #score += 1 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: env.render() output = [0] * 3 for i in range(len(observations)): brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i]) brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i]) sum = 0 for i in range(5): sum += test_instance.advance(observations, 1)[0] if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: visualization.visualize_brain(brain.print_brain_to_json(test_instance)) action = int(sum >=3) if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('ACTION: ' + str(action)) observations,reward,done,info = env.step(action) score += reward if done: total_score += score break env.close() return (total_score / (trials * desired_score)) * 100
def evaluate_potion_store_performance(test_brain, visualization_mode): time_limit = 200 time = 0 potion_count = 6 health_potion_count = 2 incorrect_limit = 5 correct_limit = 100.0 brain_speed = 5 total_score = 0 indices = [i for i in range(potion_count)] coms = combinations(indices, health_potion_count) for com in coms: assert (health_potion_count <= potion_count) test_instance = copy.deepcopy(test_brain) health_potion_indices = com correct_potions = 0.0 incorrect_potions = 0.0 correct_bit = 0 incorrect_bit = 0 time = 0 while correct_potions < correct_limit and incorrect_potions < incorrect_limit and time < time_limit: potion_offer = randrange(potion_count) input = [0] * potion_count input[potion_offer] = 1 input = [correct_bit, incorrect_bit] + input assert(len(input)== 8) output = 0 for i in range(brain_speed): output += test_instance.advance(input, 1)[0] if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: visualization.visualize_brain(brain.print_brain_to_json(test_instance)) output = int(output > int(brain_speed/2)) if output: if (potion_offer not in health_potion_indices): incorrect_potions += 1 incorrect_bit = 1 correct_bit = 0 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('DRANK ' + str(potion_offer) + ': POISION') else: correct_potions += 1 correct_bit = 1 incorrect_bit = 0 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('DRANK ' + str(potion_offer) + ': HEALTH POTION') else: correct_bit = 0 incorrect_bit = 0 if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print('DECLINED OFFER FOR ' + str(potion_offer)) time += 1 #if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: #test_instance.print_activation_record() total_score += correct_potions if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: print("NUMBER CORRECT: " + str(correct_potions)) return float(total_score/(correct_limit*15)) * 100.0