예제 #1
0
def evaluate_pong_performance(test_brain, visualization_mode):

	#top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]
	


	best_score = 0
	desired_score = 2000.0
	brain_speed = 5
	trials = 100
	output_count = 3
	for i in range(trials):
		env = gym.make('Pong-ram-v0')
		test_instance = copy.deepcopy(test_brain)
		observations = env.reset()
		score = 0
		while 1:
			
			#score += 1
			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				env.render()



			output = [0] * output_count
			#inputs = utils.extract_observations(top_indices, observations)
		

			for i in range(len(observations)):
				brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i])
				brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i])


			raw_output = []
			for i in range(brain_speed):
				raw_output.append ( test_instance.advance(observations, 3))
				if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: 
					visualization.visualize_brain(brain.print_brain_to_json(test_instance))	

			for i in range(output_count):
				for c in range(brain_speed):	
					output[i] += raw_output[c][i]
				output[i] = int(output[i] > int(brain_speed/2))	



			action = min(utils.binary_array_to_decimal(output), 5)

			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				print('ACTION: ' + str(action))
		
			observations,reward,done,info = env.step(action)
			score += 1#reward
			if done:
				best_score += score #+ 21
				env.close()
				break
		

	return ((best_score)/(desired_score* trials)) * 100
예제 #2
0
파일: learning.py 프로젝트: mivanit/mltests
def evaluate_chopper_performance(test_brain, visualization_mode):

    #top_indices = [87, 79, 80, 77, 112, 1, 8, 72, 6, 28, 3, 110, 82, 85, 78, 9, 81, 90, 106, 74]

    best_score = 0.0
    desired_score = 1000
    trials = 5
    output_count = 5
    brain_speed = 5
    for i in range(trials):
        env = gym.make('ChopperCommand-ram-v0')
        observations = env.reset()
        score = 0
        for c in range(1000):
            #score += 1
            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                env.render()

            output = [0] * 5
            #inputs = utils.extract_observations(top_indices, observations)

            for i in range(len(observations)):
                brain.Mutation_params().upper_input_bounds[i] = max(
                    brain.Mutation_params().upper_input_bounds[i],
                    observations[i])
                brain.Mutation_params().lower_input_bounds[i] = min(
                    brain.Mutation_params().lower_input_bounds[i],
                    observations[i])

            raw_output = []
            for i in range(brain_speed):
                raw_output.append(test_brain.advance(observations, 5))
                if visualization_mode == Learning_flags.VISUALIZATION_ON:
                    visualization.visualize_brain(
                        brain.print_brain_to_json(test_brain))

            for i in range(output_count):
                for c in range(brain_speed):
                    output[i] += raw_output[c][i]
                output[i] = int(output[i] > int(output_count / 2))

            action = min(utils.binary_array_to_decimal(output), 17)

            if visualization_mode == Learning_flags.VISUALIZATION_ON:
                print('ACTION: ' + str(action))

            observations, reward, done, info = env.step(action)
            score += reward
            if done:
                break
        best_score += score
        env.close()
    return (best_score / (desired_score * trials)) * 100
예제 #3
0
def evalute_pendulum_cart_performance(test_brain, visualization_mode):
	
	
	total_score = 0.0
	desired_score = 200
	trials = 100
	for c in range(trials):
		env = gym.make('CartPole-v0')
		observations = env.reset()
		test_instance = copy.deepcopy(test_brain)
		score = 0
		while True:#for h in range(desired_score):
			#score += 1
			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				env.render()


			output = [0] * 3

		

			for i in range(len(observations)):
				brain.Mutation_params().upper_input_bounds[i] = max(brain.Mutation_params().upper_input_bounds[i],observations[i])
				brain.Mutation_params().lower_input_bounds[i] = min(brain.Mutation_params().lower_input_bounds[i],observations[i])


			sum = 0
			for i in range(5):
				sum += test_instance.advance(observations, 1)[0]
				if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: 
					visualization.visualize_brain(brain.print_brain_to_json(test_instance))	
			action = int(sum >=3)

			if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				print('ACTION: ' + str(action))
				

		
		
			observations,reward,done,info = env.step(action)
			score += reward
			if done:
				total_score += score
				break
		env.close()
	return (total_score / (trials * desired_score)) * 100
예제 #4
0
def evaluate_potion_store_performance(test_brain, visualization_mode):




	time_limit = 200
	time = 0
	potion_count = 6
	health_potion_count = 2
	incorrect_limit = 5
	correct_limit = 100.0
	brain_speed = 5
	total_score = 0


	indices = [i for i  in range(potion_count)]
	coms = combinations(indices, health_potion_count)
	

	for com in coms:
		assert (health_potion_count <= potion_count)
		test_instance = copy.deepcopy(test_brain)

		health_potion_indices = com

		correct_potions = 0.0
		incorrect_potions = 0.0
	
		correct_bit = 0
		incorrect_bit = 0
		time = 0


		while correct_potions < correct_limit and incorrect_potions < incorrect_limit and time < time_limit:
			potion_offer = randrange(potion_count)
			input = [0] * potion_count
			input[potion_offer]  = 1
			input = [correct_bit, incorrect_bit] + input

			assert(len(input)== 8)

			output = 0
			for i in range(brain_speed):	
				output += test_instance.advance(input, 1)[0]
				if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON: 
					visualization.visualize_brain(brain.print_brain_to_json(test_instance))	

			
			output = int(output > int(brain_speed/2))
		


			if output:
				if (potion_offer not in health_potion_indices):
					incorrect_potions += 1
					incorrect_bit = 1
					correct_bit = 0
					if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
						print('DRANK ' + str(potion_offer) + ': POISION')
		
				else: 
					correct_potions += 1
					correct_bit = 1
					incorrect_bit = 0
					if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
						print('DRANK ' + str(potion_offer) + ': HEALTH POTION')

			else: 	
				correct_bit = 0
				incorrect_bit = 0
				if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
						print('DECLINED OFFER FOR ' + str(potion_offer))
		

			time += 1
			#if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
				#test_instance.print_activation_record()
		total_score += correct_potions
		if visualization_mode == visualization.Visualization_flags.VISUALIZATION_ON:
			print("NUMBER CORRECT: " + str(correct_potions))
	return float(total_score/(correct_limit*15)) * 100.0