def __init__(self, enemies):
        self.enemies = enemies

        experiment_num = 0
        while True:
            self.experiment_name = 'task2_generalist3_enemies_{}_{}'.format(
                enemies, experiment_num)
            if not os.path.exists(self.experiment_name):
                break
            experiment_num += 1
        os.makedirs(self.experiment_name)

        self.env = Environment(
            experiment_name=self.experiment_name,
            level=2,
            player_controller=player_controller(N_HIDDEN_NEURONS),
            enemies=[enemies[0]],
            speed="fastest")

        self.n_vars = (self.env.get_num_sensors() +
                       1) * N_HIDDEN_NEURONS + (N_HIDDEN_NEURONS + 1) * 5
        self.rot_size = int((self.n_vars * (self.n_vars - 1)) / 2)
        self.dev = np.random.uniform(0, INIT_SD, (NPOP, self.n_vars))
        self.rot = np.random.uniform(-np.pi, np.pi, (NPOP, self.rot_size))
        self.saw = np.ones(np.shape(enemies))

        self.init = Initialization(DOM_L, DOM_U)
        self.evaluator = Evaluation(self.env, enemies, SHARE_SIZE)
        self.selector = Selection()
        self.logger = Logger(self.experiment_name)
        self.recombinator = Recombination()
        self.mutator = Mutation(MIN_DEV, ROTATION_MUTATION, STANDARD_DEVIATION,
                                DOM_L, DOM_U)
class Generalist2:

    def __init__(self,  enemies):
        self.enemies = enemies
        self.experiment_name = 'task2_generalist2_enemies_{}'.format(enemies)

        if not os.path.exists(self.experiment_name):
            os.makedirs(self.experiment_name)

        self.env = Environment(experiment_name=self.experiment_name, level=2,
                               player_controller=player_controller(N_HIDDEN_NEURONS),
                               enemies=[enemies[0]],
                               speed="fastest")

        self.n_vars = (self.env.get_num_sensors() + 1) * N_HIDDEN_NEURONS + (N_HIDDEN_NEURONS + 1) * 5
        self.rot_size = int((self.n_vars * (self.n_vars - 1)) / 2)
        self.dev = np.random.uniform(0, INIT_SD, (NPOP, self.n_vars))
        self.rot = np.random.uniform(-np.pi, np.pi, (NPOP, self.rot_size))

        self.init = Initialization(DOM_L, DOM_U)
        self.evaluator = Evaluation(self.env, enemies, SHARE_SIZE)
        self.selector = Selection()
        self.logger = Logger(self.experiment_name)
        self.recombinator = Recombination()
        self.mutator = Mutation(MIN_DEV, ROTATION_MUTATION, STANDARD_DEVIATION, DOM_L, DOM_U)

    def __compare_to_ultimate__(self, individual_gain, wins, champion):
        ultimate_performance_file = open("Logs/Task1/UltimateChampion/UltimatePerformance.txt", "r+")
        ultimate_performance, ultimate_wins = eval(ultimate_performance_file.read())

        #declare new ultimate champion if either has more wins or same amount of wins and greater performance
        if wins >= ultimate_wins and ((wins > ultimate_wins) or (individual_gain > ultimate_performance)):
            ultimate_file = open("Logs/Task1/UltimateChampion/UltimateChampion.txt", "w")
            ultimate_file.write(np.array_str(champion))

            ultimate_performance_file.seek(0)
            ultimate_performance_file.truncate()
            ultimate_performance_file.write("".join(map(str, (individual_gain,", ", wins))))


    def __run_best_against_all__(self):
        player_array, enemy_array = [], []
        wins = 0
        for i in range(1, 9):
            self.env.update_parameter('enemies', [i])
            _, player_life, enemy_life, _ = self.env.play(pcont=np.array(self.best_individual[0]))
            player_array.append(player_life)
            enemy_array.append(enemy_life)
            if enemy_life == 0:
                wins += 1
        return (sum(player_array) - sum(enemy_array)), wins

    def __share_of__(self, ind1, ind2):
        dist = np.linalg.norm(ind1 - ind2)
        if dist > SHARE_SIZE:
            return 0
        return 1 - dist / SHARE_SIZE

    def __share_fitness__(self, pop, fitness):
        new_fitness = []
        length = len(pop)
        for i in range(length):
            divisor = sum([self.__share_of__(pop[i], pop[j]) for j in range(length)])
            new_fitness.append(fitness[i] / divisor)
        return np.array(new_fitness)

    def store_best_champion(self, pop, fit, gen):
        if fit.max() > self.highest_fitness:
            self.best_individual = self.selector.select_best_n(pop,fit,1)
            self.best_gen = gen
            self.highest_fitness = fit.max()

    def run(self):
        population = self.init.uniform_initialization(NPOP, self.n_vars)

        self.best_gen = 0
        self.highest_fitness = -100000
        self.best_individual = None

        for generation in range(1,NGEN+1):
            print("EVALUATION GENERATION %d OF %d \n" %(generation, NGEN))

            fitness_list = self.evaluator.sharing_generalist_eval(population)

            '''Log fitness'''
            self.logger.log_results(fitness_list, population)
            self.store_best_champion(population, fitness_list, generation)
            min_fitness = np.amin(fitness_list)
            if min_fitness < 0:
                fitness_list = [x - min_fitness for x in fitness_list]
            fitness_list = self.__share_fitness__(population, fitness_list)

            '''create next gen'''
            if generation != NGEN:
                parents = self.selector.tournament_percentage(population, fitness_list)
                survivors = self.selector.select_best_percentage(population, fitness_list, BEST_SURVIVOR_PERCENTAGE)

                '''create children'''
                children = self.recombinator.blend(parents, NPOP-len(survivors))
                #children, self.dev, self.rot = self.mutator.correlated_mutation(children, self.dev, self.rot)
                children, self.dev = self.mutator.uncorrelated_mutation_n_step_size(children, self.dev)

                '''combine survivors and children'''
                population = np.append(children, survivors, axis=0)


        # Run the best individual of all generations
        print("The best fitness was in generation %d and had a fitness of %.3f" %(self.best_gen, self.highest_fitness))

        total_individual_gain = 0
        total_wins = 0
        for i in range(5): #!change back to 5
            individual_gain, wins = self.__run_best_against_all__()
            total_individual_gain += individual_gain
            total_wins += wins

        average_ig = total_individual_gain/5
        average_wins = total_wins/5
        self.__compare_to_ultimate__(average_ig, average_wins, self.best_individual[0])
        self.logger.log_individual(average_ig)
if not os.path.exists(experiment_name):
    os.makedirs(experiment_name)

env = Environment(experiment_name=experiment_name,
                  level=2,
                  player_controller=player_controller(N_HIDDEN_NEURONS),
                  enemies=ENEMY,
                  speed="fastest")

n_vars = (env.get_num_sensors() + 1) * N_HIDDEN_NEURONS + (N_HIDDEN_NEURONS +
                                                           1) * 5
rot_size = int((n_vars * (n_vars - 1)) / 2)
dev = np.random.uniform(0, INIT_SD, (NPOP, n_vars))
rot = np.random.uniform(-np.pi, np.pi, (NPOP, rot_size))

init = Initialization(DOM_L, DOM_U)
evaluator = Evaluation(env)
selector = Selection()
logger = Logger(experiment_name)
recombinator = Recombination()
mutator = Mutation(MIN_DEV, ROTATION_MUTATION, STANDARD_DEVIATION, DOM_L,
                   DOM_U)
'''
Changes with regards to specialist1:

* Specialist 2 uses tournament selection (instead of selecting the best)
* Specialist 2 uses blend recombination (instead of simple)
* Specialist 2 uses correlated mutation (instead of nonuniform)

'''
from task1_logger import Logger
from demo_controller import player_controller

from environment import Environment

experiment_name = 'task1_specialist1_enemy_{}'.format(ENEMY)
if not os.path.exists(experiment_name):
    os.makedirs(experiment_name)


env = Environment(experiment_name=experiment_name,level=2, enemies=ENEMY,
                  player_controller=player_controller(N_HIDDEN_NEURONS),
                  speed="fastest")
N_VARS = (env.get_num_sensors() + 1) * N_HIDDEN_NEURONS + (N_HIDDEN_NEURONS + 1) * 5

init = Initialization(DOM_L, DOM_U)
evaluator = Evaluation(env)
selector = Selection()
logger = Logger(experiment_name)
recombinator = Recombination()
mutator = Mutation(MIN_DEV, ROTATION_MUTATION, STANDARD_DEVIATION, DOM_L, DOM_U)

population = init.uniform_initialization(NPOP, N_VARS)
best = None
for i in range(NGEN):
    print(i)
    fitness_list = evaluator.simple_eval(population)
    logger.log_results(fitness_list)
    parents = selector.select_best_percentage(population, fitness_list)
    ind = np.argmax(fitness_list)
    if best is None:
class Generalist3:
    def __init__(self, enemies):
        self.enemies = enemies

        experiment_num = 0
        while True:
            self.experiment_name = 'task2_generalist3_enemies_{}_{}'.format(
                enemies, experiment_num)
            if not os.path.exists(self.experiment_name):
                break
            experiment_num += 1
        os.makedirs(self.experiment_name)

        self.env = Environment(
            experiment_name=self.experiment_name,
            level=2,
            player_controller=player_controller(N_HIDDEN_NEURONS),
            enemies=[enemies[0]],
            speed="fastest")

        self.n_vars = (self.env.get_num_sensors() +
                       1) * N_HIDDEN_NEURONS + (N_HIDDEN_NEURONS + 1) * 5
        self.rot_size = int((self.n_vars * (self.n_vars - 1)) / 2)
        self.dev = np.random.uniform(0, INIT_SD, (NPOP, self.n_vars))
        self.rot = np.random.uniform(-np.pi, np.pi, (NPOP, self.rot_size))
        self.saw = np.ones(np.shape(enemies))

        self.init = Initialization(DOM_L, DOM_U)
        self.evaluator = Evaluation(self.env, enemies, SHARE_SIZE)
        self.selector = Selection()
        self.logger = Logger(self.experiment_name)
        self.recombinator = Recombination()
        self.mutator = Mutation(MIN_DEV, ROTATION_MUTATION, STANDARD_DEVIATION,
                                DOM_L, DOM_U)

    def __compare_to_ultimate__(self, individual_gain, wins, champion):
        ultimate_performance_file = open(
            "Logs/Task1/UltimateChampion/UltimatePerformance.txt", "r+")
        ultimate_performance, ultimate_wins = eval(
            ultimate_performance_file.read())

        #declare new ultimate champion if either has more wins or same amount of wins and greater performance
        if wins >= ultimate_wins and (
            (wins > ultimate_wins) or
            (individual_gain > ultimate_performance)):
            ultimate_file = open(
                "Logs/Task1/UltimateChampion/UltimateChampion.txt", "w")
            ultimate_file.write(np.array_str(champion))

            ultimate_performance_file.seek(0)
            ultimate_performance_file.truncate()
            ultimate_performance_file.write("".join(
                map(str, (individual_gain, ", ", wins))))

    def __run_best_against_all__(self):
        player_array, enemy_array = [], []
        wins = 0
        for i in range(1, 9):
            self.env.update_parameter('enemies', [i])
            _, player_life, enemy_life, _ = self.env.play(
                pcont=np.array(self.best_individual[0]))
            player_array.append(player_life)
            enemy_array.append(enemy_life)
            if enemy_life == 0:
                wins += 1
        return (sum(player_array) - sum(enemy_array)), wins

    def __stepwise_adaption_of_weights__(self, saw):
        fitness_array = np.zeros(8)
        for i in range(1, 9):
            self.env.update_parameter('enemies', [i])
            fitness, _, _, _ = self.env.play(
                pcont=np.array(self.best_individual[0]))
            fitness_array[i - 1] = fitness

        fitness_indices = np.argsort(fitness_array)
        max_index = 7
        min_index = 0
        while max_index != min_index:
            if saw[fitness_indices[max_index]] <= 0.11:
                max_index -= 1
            elif saw[fitness_indices[min_index]] >= 1.89:
                min_index += 1
            else:
                saw[fitness_indices[max_index]] -= 0.1
                saw[fitness_indices[min_index]] += 0.1
                break
        return saw

    def __share_of__(self, ind1, ind2):
        dist = np.linalg.norm(ind1 - ind2)
        if dist > SHARE_SIZE:
            return 0
        return 1 - dist / SHARE_SIZE

    def __share_fitness__(self, pop, fitness):
        new_fitness = []
        length = len(pop)
        for i in range(length):
            divisor = sum(
                [self.__share_of__(pop[i], pop[j]) for j in range(length)])
            new_fitness.append(fitness[i] / divisor)
        return np.array(new_fitness)

    def store_best_champion(self, pop, fit, gen):
        if fit.max() > self.highest_fitness:
            self.best_individual = self.selector.select_best_n(pop, fit, 1)
            self.best_gen = gen
            self.highest_fitness = fit.max()

    def run(self):
        population = self.init.uniform_initialization(NPOP, self.n_vars)

        self.best_gen = 0
        self.highest_fitness = -100000
        self.best_individual = None

        for generation in itertools.count(start=1):
            print("\nEVALUATION GENERATION %d \n" % generation)

            fitness_list = self.evaluator.sharing_generalist_eval(population,
                                                                  saw=self.saw)
            '''Log fitness'''
            self.logger.log_results(fitness_list, population)
            self.store_best_champion(population, fitness_list, generation)
            min_fitness = np.amin(fitness_list)
            print("Fitness before normalization:\n" + str(fitness_list))
            if min_fitness < 0:
                fitness_list = [x - min_fitness for x in fitness_list]
                print("Fitness after normalization:\n" + str(fitness_list))
            fitness_list = self.__share_fitness__(population, fitness_list)
            print("Fitness after sharing:\n" + str(fitness_list))
            '''recalculate SAW array'''
            self.saw = self.__stepwise_adaption_of_weights__(self.saw)
            print("\nSAW: ", self.saw, "\n")
            '''every 10 generations check champion against ultimate'''
            if generation % 10 == 0:
                print(
                    "The best fitness was in generation %d and had a fitness of %.3f"
                    % (self.best_gen, self.highest_fitness))

                total_individual_gain = 0
                total_wins = 0
                for i in range(5):
                    print("CHAMPION OF GENERATION %d, RUN %d OF %d \n" %
                          (generation, (i + 1), 5))
                    individual_gain, wins = self.__run_best_against_all__()
                    total_individual_gain += individual_gain
                    total_wins += wins

                average_ig = total_individual_gain / 5
                average_wins = total_wins / 5
                print("CHAMPION OF GENERATION %d, IG: %d\n" %
                      (generation, average_ig))
                self.__compare_to_ultimate__(average_ig, average_wins,
                                             self.best_individual[0])
                self.logger.log_individual(average_ig)
            '''create next gen'''
            parents = self.selector.tournament_percentage(
                population, fitness_list)
            survivors = self.selector.select_best_percentage(
                population, fitness_list, BEST_SURVIVOR_PERCENTAGE)
            '''create children'''
            children = self.recombinator.blend(parents, NPOP - len(survivors))
            #children, self.dev, self.rot = self.mutator.correlated_mutation(children, self.dev, self.rot)
            children, self.dev = self.mutator.uncorrelated_mutation_n_step_size(
                children, self.dev)
            '''combine survivors and children'''
            population = np.append(children, survivors, axis=0)
Esempio n. 6
0
                        rotations_added += 1
                    else:  #there is not a correlation
                        covariance_matrix[j, k] = 0

            zeros_array = np.zeros(n_vars)
            mutation_amounts = np.random.multivariate_normal(
                mean=zeros_array, cov=covariance_matrix)
            new_pop[i] = np.clip(pop[i] + mutation_amounts, self.dom_l,
                                 self.dom_u)

        return new_pop, new_dev, new_rot


if __name__ == "__main__":
    m = Mutation(MIN_DEV, ROTATION_MUTATION, STANDARD_DEVIATION, DOM_L, DOM_U)
    i = Initialization()

    size = 10
    rot_size = int((size * (size - 1)) / 2)
    pop = i.uniform_initialization(size, size)
    dev = np.random.uniform(0, 0.0001, (size, size))
    rot = np.random.uniform(-np.pi, np.pi, (size, rot_size))

    for i in range(3):
        print(pop)
        print(dev)
        print(rot)
        print()
        pop, dev, rot = m.correlated_mutation(pop, dev, rot)
        print(pop)
        print(dev)