Esempio n. 1
0
    def __init__(self, x_training_data, y_training_data, config,
                 fitness_threshold):
        # Where all the parameters are saved
        self.config = config
        # Takes care of reproduction of populations
        self.reproduction = Reproduce(stagnation=Stagnation, config=config)
        self.generation_tracker = GenerationStatistics()
        # Track the best genome across generations
        self.best_all_time_genome = None
        # If the fitness threshold is met it will stop the algorithm (if set)
        self.fitness_threshold = fitness_threshold
        # A class containing the different species within the population
        self.species_set = SpeciesSet(
            config=config, generation_tracker=self.generation_tracker)
        self.x_train = x_training_data
        self.y_train = y_training_data

        # Initialise the starting population
        self.population = self.reproduction.create_new_population(
            population_size=self.config.population_size,
            num_features=x_training_data.shape[1])

        # Speciate the initial population
        self.species_set.speciate(population=self.population,
                                  compatibility_threshold=3,
                                  generation=0)
    def __init__(self, key, size, elitism=1, state=None):
        '''
		Class for populations. 

		key		-- population key
		size 	-- population size
		elitism -- number of members that must be passed from previous gen to next gen
		'''
        self.key = key
        self.size = size
        self.best_genome = None
        self.max_complex_genome = None
        self.min_complex_genome = None
        self.avg_complexity = None
        self.max_dict = {}
        self.last_best = 0
        self.current_gen = 0
        self.elitism = elitism
        self.reproduction = Reproduction()
        self.species = SpeciesSet(3.5)

        if state == None:
            # Create new population
            self.population = self.reproduction.create_new_population(
                self.size)
            self.species.speciate(self.population, 0)
        else:
            # Assign values from state
            self.population, self.reproduction = state
Esempio n. 3
0
    def __init__(self, seed_genome):

        self.best_genome = None
        self.best_species = None
        self.species_set = SpeciesSet()
        self.generation = -1

        self.species_set.speciate([seed_genome])

        # copy the seed genome for generation 0
        offspring = self.reproduce(0)
        self.species_set.speciate(offspring)
Esempio n. 4
0
    def __init__(self,
                 x_training_data,
                 y_training_data,
                 x_test_data,
                 y_test_data,
                 config,
                 fitness_threshold,
                 f1_score_threshold,
                 algorithm_running=None):
        # Where all the parameters are saved
        self.config = config
        # Takes care of reproduction of populations
        self.reproduction = Reproduce(stagnation=Stagnation, config=config)
        self.generation_tracker = GenerationStatistics()
        # Track the best genome across generations
        self.best_all_time_genome = None
        # If the fitness threshold is met it will stop the algorithm (if set)
        self.fitness_threshold = fitness_threshold
        self.f1_score_threshold = f1_score_threshold
        # A class containing the different species within the population
        self.species_set = SpeciesSet(
            config=config, generation_tracker=self.generation_tracker)
        self.x_train = x_training_data
        self.y_train = y_training_data
        self.x_test = x_test_data
        self.y_test = y_test_data

        # Keep track of best genome through generations
        self.best_genome_history = {}

        # Keeps information of population complexity for each generation
        self.population_complexity_tracker = {}

        if algorithm_running:
            # Defines which of the algorithms is being currently tested (e.g. xor with 5000 examples of xor with 200
            # examples and noise)
            self.algorithm_running = algorithm_running

        # Initialise the starting population
        self.population = self.reproduction.create_new_population(
            population_size=self.config.population_size,
            num_features=x_training_data.shape[1])

        # Speciate the initial population
        self.species_set.speciate(population=self.population,
                                  compatibility_threshold=3,
                                  generation=0)
Esempio n. 5
0
class NEAT:
    def __init__(self, x_training_data, y_training_data, config,
                 fitness_threshold):
        # Where all the parameters are saved
        self.config = config
        # Takes care of reproduction of populations
        self.reproduction = Reproduce(stagnation=Stagnation, config=config)
        self.generation_tracker = GenerationStatistics()
        # Track the best genome across generations
        self.best_all_time_genome = None
        # If the fitness threshold is met it will stop the algorithm (if set)
        self.fitness_threshold = fitness_threshold
        # A class containing the different species within the population
        self.species_set = SpeciesSet(
            config=config, generation_tracker=self.generation_tracker)
        self.x_train = x_training_data
        self.y_train = y_training_data

        # Initialise the starting population
        self.population = self.reproduction.create_new_population(
            population_size=self.config.population_size,
            num_features=x_training_data.shape[1])

        # Speciate the initial population
        self.species_set.speciate(population=self.population,
                                  compatibility_threshold=3,
                                  generation=0)

    def evaluate_population(self, use_backprop, generation):
        """
        Calculates the fitness value for each individual genome in the population
        :type use_backprop: True or false on whether you're calculating the fitness using backprop or not
        :param generation: Which generation number it currently is
        :return: The best genome of the population
        """

        # Should return the best genome
        current_best_genome = None

        for genome in self.population.values():

            genome_nn = GenomeNeuralNetwork(
                genome=genome,
                x_train=self.x_train,
                y_train=self.y_train,
                create_weights_bias_from_genome=True,
                activation_type='sigmoid',
                learning_rate=0.1,
                num_epochs=1000,
                batch_size=64)

            # Optimise the neural_network_first. However, the generation should allow for one pass so that we are not
            #  just optimising all the same topologies
            if use_backprop and generation > 1:
                print('\n')
                print('OPTIMISING GENOME')
                # NOTE: Genome fitness can be none due to crossover because fitness value not carried over
                print('Genome Fitness Before: {}'.format(genome.fitness))
                genome_nn.optimise(print_epoch=False)

            # We use genome_nn.x_train instead of self.x_train because the genome_nn might have deleted a row if there
            # is no connection to one of the sources
            cost = genome_nn.run_one_pass(input_data=genome_nn.x_train,
                                          labels=self.y_train,
                                          return_cost_only=True)

            # The fitness is the negative of the cost. Because less cost = greater fitness
            genome.fitness = -cost

            # Only print genome fitness after is back prop is used since back prop takes a long time so this can be a
            #  way of tracking progress in the meantime
            if use_backprop and generation > 1:
                print('Genome Fitness After: {}'.format(genome.fitness))

            if current_best_genome is None or genome.fitness > current_best_genome.fitness:
                current_best_genome = genome

        return current_best_genome

    def update_population_toplogy_info(self):
        num_nodes_overall = []
        num_nodes_enabled = []
        num_connections_overall = []
        num_connections_enabled = []
        all_fitnesses = []
        for genome in self.population.values():
            num_nodes_overall.append(len(genome.nodes))
            num_nodes_enabled.append(len(genome.get_active_nodes()))
            num_connections_overall.append(len(genome.connections))
            num_connections_enabled.append(
                genome.check_connection_enabled_amount())
            if genome.fitness:
                all_fitnesses.append(genome.fitness)

        self.generation_tracker.mean_number_connections_enabled = np.mean(
            num_connections_enabled)
        self.generation_tracker.mean_number_connections_overall = np.mean(
            num_connections_overall)

        self.generation_tracker.mean_number_nodes_enabled = np.mean(
            num_nodes_enabled)
        self.generation_tracker.mean_number_nodes_overall = np.mean(
            num_nodes_overall)

        self.generation_tracker.average_population_fitness = np.mean(
            all_fitnesses)

    def run(self, max_num_generations, use_backprop,
            print_generation_information):
        """
        Run the algorithm
        """

        current_gen = 0
        while current_gen < max_num_generations:
            # Every generation increment
            current_gen += 1

            self.generation_tracker.population_size = len(self.population)

            start_evaluate_time = time.time()
            # Evaluate the current generation and get the best genome in the current generation
            best_current_genome = self.evaluate_population(
                use_backprop=use_backprop, generation=current_gen)
            end_evaluate_time = time.time()
            self.generation_tracker.evaluate_execute_time = end_evaluate_time - start_evaluate_time

            # Keep track of the best genome across generations
            if self.best_all_time_genome is None or best_current_genome.fitness > self.best_all_time_genome.fitness:
                self.best_all_time_genome = best_current_genome

            self.generation_tracker.best_all_time_genome_fitness = self.best_all_time_genome.fitness

            # If the fitness threshold is met, stop the algorithm
            if self.best_all_time_genome.fitness > self.fitness_threshold:
                break

            start_reproduce_time = time.time()

            # Reset attributes for the current generation
            self.generation_tracker.reset_tracker_attributes()

            # Reproduce and get the next generation
            self.population = self.reproduction.reproduce(
                species_set=self.species_set,
                population_size=self.config.population_size,
                generation=current_gen,
                generation_tracker=self.generation_tracker,
                # current_gen should be greater than one ot use
                # backprop_mutation because we let the first generation
                # mutate just as if it was the normal genetic algorithm,
                # so that we're not optimising all of the same structure
                backprop_mutation=(use_backprop and current_gen > 1))
            end_reproduce_time = time.time()
            self.generation_tracker.reproduce_execute_time = end_reproduce_time - start_reproduce_time

            # TODO: Uncomment this if it causes an issue
            # # Allow for some leaway in population size (+- 5)
            # range_value = 5
            # range_of_population_sizes = set(range(self.config.population_size - range_value,
            #                                       self.config.population_size + range_value + 1))
            # if len(self.population) not in range_of_population_sizes:
            #     raise Exception('There is an incorrect number of genomes in the population')

            # Check to ensure no genes share the same connection gene addresses. (This problem has been fixed but is
            # here just incase now).
            self.ensure_no_duplicate_genes()

            # Check if there are any species, if not raise an exception. TODO: Let user reset population if extinction
            if not self.species_set.species:
                raise CompleteExtinctionException()

            start_specify_time = time.time()
            # Speciate the current generation
            self.species_set.speciate(
                population=self.population,
                generation=current_gen,
                compatibility_threshold=self.config.compatibility_threshold,
                generation_tracker=self.generation_tracker)
            end_specify_time = time.time()
            self.generation_tracker.species_execute_time = end_specify_time - start_specify_time

            self.update_population_toplogy_info()

            self.generation_tracker.update_generation_information(
                generation=current_gen)
            if print_generation_information:
                self.generation_tracker.print_generation_information(
                    generation_interval_for_graph=150)

            # Gives distribution of the weights in the population connections
            self.reproduction.show_population_weight_distribution(
                population=self.population)

        return self.best_all_time_genome

    def ensure_no_duplicate_genes(self):
        connection_gene_dict = {}
        for genome in self.population.values():
            for connection in genome.connections.values():
                if connection not in connection_gene_dict:
                    connection_gene_dict[connection] = 1
                else:
                    connection_gene_dict[connection] += 1

        for connection_gene, amount in connection_gene_dict.items():
            if amount > 1:
                raise Exception('You have duplicated a connection gene')
class Population():
    def __init__(self, key, size, elitism=1, state=None):
        '''
		Class for populations. 

		key		-- population key
		size 	-- population size
		elitism -- number of members that must be passed from previous gen to next gen
		'''
        self.key = key
        self.size = size
        self.best_genome = None
        self.max_complex_genome = None
        self.min_complex_genome = None
        self.avg_complexity = None
        self.max_dict = {}
        self.last_best = 0
        self.current_gen = 0
        self.elitism = elitism
        self.reproduction = Reproduction()
        self.species = SpeciesSet(3.5)

        if state == None:
            # Create new population
            self.population = self.reproduction.create_new_population(
                self.size)
            self.species.speciate(self.population, 0)
        else:
            # Assign values from state
            self.population, self.reproduction = state

    def run(self, task, goal, generations=None):
        '''
		Run evolution on a given task for a number of generations or until
		a goal is reached.

		task -- the task to be solved
		goal -- the goal to reach for the given task that defines a solution
		generations -- the max number of generations to run evolution for
		'''
        self.current_gen = 0
        reached_goal = False
        # Plot data
        best_fitnesses = []
        max_complexity = []
        min_complexity = []
        avg_complexity = []
        while self.current_gen < generations and not reached_goal:
            # Assess fitness of current population
            task(list(iteritems(self.population)))
            # Find best genome in current generation and update avg fitness
            curr_best = None
            curr_max_complex = None
            curr_min_complex = None
            avg_complexities = 0
            for genome in itervalues(self.population):
                avg_complexities += genome.complexity()
                # Update generation's most fit
                if curr_best is None or genome.fitness > curr_best.fitness:
                    curr_best = genome
                # Update generation's most complex
                if curr_max_complex is None or genome.complexity(
                ) > curr_max_complex.complexity():
                    curr_max_complex = genome
                # Update generation's least complex
                if curr_min_complex is None or genome.complexity(
                ) < curr_min_complex.complexity():
                    curr_min_complex = genome

            # Update global best genome if possible
            if self.best_genome is None or curr_best.fitness > self.best_genome.fitness:
                self.best_genome = curr_best

            # Update global most and least complex genomes
            if self.max_complex_genome is None or curr_max_complex.complexity(
            ) > self.max_complex_genome.complexity():
                self.max_complex_genome = curr_max_complex
            if self.min_complex_genome is None or curr_min_complex.complexity(
            ) < self.min_complex_genome.complexity():
                self.min_complex_genome = curr_min_complex

            self.max_dict[self.current_gen] = self.max_complex_genome

            # Reporters
            report_fitness(self)
            report_species(self.species, self.current_gen)
            report_output(self)
            best_fitnesses.append(self.best_genome.fitness)
            max_complexity.append(self.max_complex_genome.complexity())
            min_complexity.append(self.min_complex_genome.complexity())
            avg_complexity.append(
                (avg_complexities + 0.0) / len(self.population))
            self.avg_complex = (avg_complexities + 0.0) / len(self.population)
            avg_complexities = 0

            # Reached fitness goal, we can stop
            if self.best_genome.fitness >= goal:
                reached_goal = True

            # Create new unspeciated popuation based on current population's fitness
            self.population = self.reproduction.reproduce_with_species(
                self.species, self.size, self.current_gen)
            # Check for species extinction (species did not perform well)
            if not self.species.species:
                print("!!! Species went extinct !!!")
                self.population = self.reproduction.create_new_population(
                    self.size)

            # Speciate new population
            self.species.speciate(self.population, self.current_gen)
            self.current_gen += 1

        generations = range(self.current_gen)
        plot_fitness(generations, best_fitnesses)
        return self.best_genome
class NEATMultiClass:

    def __init__(self, x_training_data, y_training_data, x_test_data, y_test_data, config, fitness_threshold,
                 f1_score_threshold, algorithm_running=None):
        # Where all the parameters are saved
        self.config = config
        # Takes care of reproduction of populations
        self.reproduction = ReproduceMultiClass(stagnation=Stagnation, config=config)
        self.generation_tracker = GenerationStatistics()
        # Track the best genome across generations
        self.best_all_time_genome = None
        # If the fitness threshold is met it will stop the algorithm (if set)
        self.fitness_threshold = fitness_threshold
        self.f1_score_threshold = f1_score_threshold
        # A class containing the different species within the population
        self.species_set = SpeciesSet(config=config, generation_tracker=self.generation_tracker)
        self.x_train = x_training_data
        self.y_train = y_training_data
        self.x_test = x_test_data
        self.y_test = y_test_data

        # Keep track of best genome through generations
        self.best_genome_history = {}

        # Keeps information of population complexity for each generation
        self.population_complexity_tracker = {}

        if algorithm_running:
            # Defines which of the algorithms is being currently tested (e.g. xor with 5000 examples of xor with 200
            # examples and noise)
            self.algorithm_running = algorithm_running

        # Initialise the starting population
        self.population = self.reproduction.create_new_population(population_size=self.config.population_size,
                                                                  num_features=x_training_data.shape[1],
                                                                  num_classes=y_training_data.shape[1])

        # Speciate the initial population
        self.species_set.speciate(population=self.population, compatibility_threshold=3, generation=0)

    @staticmethod
    def create_genome_nn(genome, x_data, y_data, algorithm_running=None):
        # TODO: I encountered a bug where I trained a genome on a relu activation function, but when I recreated using this function I had problems because I forgot that everything defined inside here uses sigmoid. Should improve implementation of this
        # TODO: The x_data, y_data isn't always used, particularly if we only create the network to get a prediction. This implementation should be improved for clarity
        if algorithm_running == 'xor_full':
            learning_rate = 0.1
            num_epochs = 1000
            batch_size = 64
            activation_type = 'sigmoid'
        elif algorithm_running == 'xor_small_noise':
            learning_rate = 0.1
            num_epochs = 5000
            batch_size = 10
            activation_type = 'sigmoid'
        elif algorithm_running == 'circle_data':
            learning_rate = 0.1
            num_epochs = 5000
            batch_size = 50
            activation_type = 'sigmoid'
        elif algorithm_running == 'shm_two_class':
            learning_rate = 0.1
            num_epochs = 5000
            batch_size = 50
            activation_type = 'sigmoid'
        elif algorithm_running == 'shm_multi_class':
            learning_rate = 0.1
            num_epochs = 250
            # num_epochs = 500
            batch_size = 64
            activation_type = 'sigmoid'
        # TODO: Choose more suitable default
        else:
            learning_rate = 0.1
            num_epochs = 500
            batch_size = 64
            activation_type = 'sigmoid'

        return GenomeNeuralNetworkMultiClass(genome=genome, x_train=x_data, y_train=y_data,
                                             create_weights_bias_from_genome=True, activation_type=activation_type,
                                             learning_rate=learning_rate, num_epochs=num_epochs, batch_size=batch_size)

    def evaluate_population(self, use_backprop, generation):
        """
        Calculates the fitness value for each individual genome in the population
        :type use_backprop: True or false on whether you're calculating the fitness using backprop or not
        :param generation: Which generation number it currently is
        :return: The best genome of the population
        """

        # Should return the best genome
        current_best_genome = None
        current_worst_genome = None

        for genome in self.population.values():

            genome_nn = self.create_genome_nn(genome=genome, x_data=self.x_train, y_data=self.y_train,
                                              algorithm_running=self.algorithm_running)

            # Optimise the neural_network_first. However, the generation should allow for one pass so that we are not
            #  just optimising all the same topologies
            genome_fitness_before = genome.fitness
            if use_backprop and generation > 1:
                print('\n')
                print('OPTIMISING GENOME')
                genome_nn.optimise(print_epoch=False)

            # We use genome_nn.x_train instead of self.x_train because the genome_nn might have deleted a row if there
            # is no connection to one of the sources
            cost = genome_nn.run_one_pass(input_data=genome_nn.x_train, labels=self.y_train, return_cost_only=True)

            # The fitness is the negative of the cost. Because less cost = greater fitness
            genome.fitness = -cost

            # Only print genome fitness after is back prop is used since back prop takes a long time so this can be a
            #  way of tracking progress in the meantime
            if use_backprop and generation > 1:
                # NOTE: Genome fitness can be none due to crossover because fitness value not carried over
                print('Genome Fitness Before: {}'.format(genome_fitness_before))
                print('Genome Fitness After: {}'.format(genome.fitness))

            if current_best_genome is None or genome.fitness > current_best_genome.fitness:
                current_best_genome = genome
            if current_worst_genome is None or genome.fitness < current_worst_genome.fitness:
                current_worst_genome = genome

        return current_best_genome, current_worst_genome

    def update_population_toplogy_info(self, current_gen):
        num_nodes_overall = []
        num_nodes_enabled = []
        num_connections_overall = []
        num_connections_enabled = []
        all_fitnesses = []
        for genome in self.population.values():
            num_nodes_overall.append(len(genome.nodes))
            num_nodes_enabled.append(len(genome.get_active_nodes()))
            num_connections_overall.append(len(genome.connections))
            num_connections_enabled.append(genome.check_connection_enabled_amount())
            if genome.fitness:
                all_fitnesses.append(genome.fitness)

        avg_num_connections_enabled = np.mean(num_connections_enabled)
        avg_num_connections_overall = np.mean(num_connections_overall)
        avg_num_nodes_enabled = np.mean(num_nodes_enabled)
        avg_num_nodes_overall = np.mean(num_nodes_overall)

        complexity_tracker = {'num_connections_enabled': avg_num_connections_enabled,
                              'num_connections_overall': avg_num_connections_overall,
                              'num_nodes_enabled': avg_num_nodes_enabled, 'num_nodes_overall': avg_num_nodes_overall}
        self.population_complexity_tracker[current_gen] = complexity_tracker

        self.generation_tracker.mean_number_connections_enabled = avg_num_connections_enabled
        self.generation_tracker.mean_number_connections_overall = avg_num_connections_overall
        self.generation_tracker.mean_number_nodes_enabled = avg_num_nodes_enabled
        self.generation_tracker.mean_number_nodes_overall = avg_num_nodes_overall

        self.generation_tracker.average_population_fitness = np.mean(all_fitnesses)

    def add_successful_genome_for_test(self, current_gen, use_this_genome):
        """
        This function adds a pre programmed genome which is known to converge for the XOR dataset.
        :param current_gen:
        :param use_this_genome: Whether this genome should be added to the population or not
        :return:
        """
        # Wait for current_gen > 1 because if using backprop the first gen skips using backprop.
        if current_gen > 1 and use_this_genome:
            node_list = [
                NodeGene(node_id=0, node_type='source'),
                NodeGene(node_id=1, node_type='source'),
                NodeGene(node_id=2, node_type='output', bias=0.5),
                NodeGene(node_id=3, node_type='hidden', bias=1),
                NodeGene(node_id=4, node_type='hidden', bias=1),
                NodeGene(node_id=5, node_type='hidden', bias=1),
                NodeGene(node_id=6, node_type='hidden', bias=1),
            ]

            connection_list = [ConnectionGene(input_node=0, output_node=3, innovation_number=1, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=1, output_node=3, innovation_number=2, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=0, output_node=4, innovation_number=3, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=1, output_node=4, innovation_number=4, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=3, output_node=5, innovation_number=5, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=4, output_node=5, innovation_number=6, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=3, output_node=6, innovation_number=7, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=4, output_node=6, innovation_number=8, enabled=True,
                                              weight=np.random.randn()),
                               ConnectionGene(input_node=5, output_node=2, innovation_number=9, enabled=True,
                                              weight=np.random.rand()),
                               ConnectionGene(input_node=6, output_node=2, innovation_number=10, enabled=True,
                                              weight=np.random.randn())
                               ]

            test_genome = Genome(connections=connection_list, nodes=node_list, key=1)
            test_genome.fitness = -99999999999
            self.population[32131231] = test_genome

    @staticmethod
    def calculate_f_statistic(genome, x_test_data, y_test_data):
        genome_nn = NEATMultiClass.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data)
        prediction_array = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True)
        prediction_real = np.zeros((y_test_data.shape[0], y_test_data.shape[1]))
        for row in range(prediction_array.shape[0]):
            prediction_index = np.argmax(prediction_array[row, :])
            prediction_real[row, prediction_index] = 1.0
        return sklearn.metrics.f1_score(y_test_data, prediction_real, average='samples')

    @staticmethod
    def calculate_accuracy(genome, x_test_data, y_test_data):
        genome_nn = NEATMultiClass.create_genome_nn(genome=genome, x_data=x_test_data, y_data=y_test_data)
        prediction_array = genome_nn.run_one_pass(input_data=x_test_data, return_prediction_only=True)
        prediction_real = np.zeros((y_test_data.shape[0], y_test_data.shape[1]))
        for row in range(prediction_array.shape[0]):
            prediction_index = np.argmax(prediction_array[row, :])
            prediction_real[row, prediction_index] = 1.0
        num_correct = 0
        for row in range(y_test_data.shape[0]):
            if np.array_equal(prediction_real[row, :], y_test_data[row, :]):
                num_correct += 1

        percentage_correct = (num_correct / y_test_data.shape[0]) * 100
        return percentage_correct

    def save_run_information(self, current_gen):
        base_filepath = 'algorithm_runs_multi'
        if not os.path.exists(base_filepath):
            # Make the directory before saving graphs
            os.makedirs(base_filepath)

        folders = len(os.listdir('{}/{}'.format(base_filepath, self.algorithm_running)))

        # Folders + 1 because it will be the next folder in the sub directory
        file_path_for_run = '{}/{}/run_{}'.format(base_filepath, self.algorithm_running, (folders + 1))

        # Make the directory before saving all other files
        os.makedirs(file_path_for_run)

        # Save best genome in pickle
        outfile = open('{}/best_genome_pickle'.format(file_path_for_run), 'wb')
        pickle.dump(self.best_all_time_genome, outfile)
        outfile.close()

        # Save graph information
        self.generation_tracker.plot_graphs(current_gen=current_gen, save_plots=True,
                                            file_path=file_path_for_run)

        # Save generation tracker in pickle
        outfile = open('{}/generation_tracker'.format(file_path_for_run), 'wb')
        pickle.dump(self.generation_tracker, outfile)
        outfile.close()

        # Save NEAT class instance so we can access the population again later
        outfile = open('{}/NEAT_instance'.format(file_path_for_run), 'wb')
        pickle.dump(self, outfile)
        outfile.close()

    def check_algorithm_break_point(self, current_gen, f1_score_of_best_all_time_genome, max_num_generations):
        break_point_reached = False
        if self.fitness_threshold and self.best_all_time_genome.fitness > self.fitness_threshold:
            break_point_reached = True
        if self.f1_score_threshold and f1_score_of_best_all_time_genome > self.f1_score_threshold:
            break_point_reached = True
        if current_gen > max_num_generations:
            break_point_reached = True

        if break_point_reached:
            self.save_run_information(current_gen=current_gen)

            return True
        return False

    def run(self, max_num_generations, use_backprop, print_generation_information, show_population_weight_distribution):
        """
        Run the algorithm
        """

        current_gen = 0
        # Break condition now in function
        while True:
            # Every generation increment
            current_gen += 1

            self.add_successful_genome_for_test(current_gen=current_gen, use_this_genome=False)

            self.generation_tracker.population_size = len(self.population)

            start_evaluate_time = time.time()
            # Evaluate the current generation and get the best genome in the current generation
            best_current_genome, worst_current_genome = self.evaluate_population(use_backprop=use_backprop,
                                                                                 generation=current_gen)
            print('WORST CURRENT GENOME FITNESS: {}'.format(worst_current_genome.fitness))
            end_evaluate_time = time.time()
            self.update_population_toplogy_info(current_gen=current_gen)
            self.generation_tracker.evaluate_execute_time = end_evaluate_time - start_evaluate_time

            # Keep track of the best genome across generations
            if self.best_all_time_genome is None or best_current_genome.fitness > self.best_all_time_genome.fitness:
                # Keep track of the best genome through generations
                self.best_genome_history[current_gen] = best_current_genome
                self.best_all_time_genome = best_current_genome

            self.generation_tracker.best_all_time_genome_fitness = self.best_all_time_genome.fitness

            start_reproduce_time = time.time()

            # Reset attributes for the current generation
            self.generation_tracker.reset_tracker_attributes()

            # Reproduce and get the next generation
            self.population = self.reproduction.reproduce(species_set=self.species_set,
                                                          population_size=self.config.population_size,
                                                          generation=current_gen,
                                                          generation_tracker=self.generation_tracker,
                                                          # current_gen should be greater than one ot use
                                                          # backprop_mutation because we let the first generation
                                                          # mutate just as if it was the normal genetic algorithm,
                                                          # so that we're not optimising all of the same structure
                                                          backprop_mutation=(use_backprop and current_gen > 1))
            end_reproduce_time = time.time()
            self.generation_tracker.reproduce_execute_time = end_reproduce_time - start_reproduce_time

            # Check to ensure no genes share the same connection gene addresses. (This problem has been fixed but is
            # here just incase now).
            self.ensure_no_duplicate_genes()

            # Check if there are any species, if not raise an exception. TODO: Let user reset population if extinction
            if not self.species_set.species:
                raise CompleteExtinctionException()

            start_specify_time = time.time()
            # Speciate the current generation
            self.species_set.speciate(population=self.population, generation=current_gen,
                                      compatibility_threshold=self.config.compatibility_threshold,
                                      generation_tracker=self.generation_tracker)
            end_specify_time = time.time()
            self.generation_tracker.species_execute_time = end_specify_time - start_specify_time

            f1_score_of_best_all_time_genome = self.calculate_f_statistic(
                self.best_all_time_genome, self.x_test, self.y_test)

            best_all_time_genome_accuracy = self.calculate_accuracy(genome=self.best_all_time_genome,
                                                                    x_test_data=self.x_test, y_test_data=self.y_test)

            self.generation_tracker.best_all_time_genome_f1_score = f1_score_of_best_all_time_genome
            self.generation_tracker.best_all_time_genome_accuracy = best_all_time_genome_accuracy
            self.generation_tracker.update_generation_information(generation=current_gen)

            if print_generation_information:
                self.generation_tracker.print_generation_information(generation_interval_for_graph=1,
                                                                     plot_graphs_every_gen=False)

            if self.check_algorithm_break_point(f1_score_of_best_all_time_genome=f1_score_of_best_all_time_genome,
                                                current_gen=current_gen, max_num_generations=max_num_generations):
                break

            # Gives distribution of the weights in the population connections
            if show_population_weight_distribution:
                self.reproduction.show_population_weight_distribution(population=self.population)

        print('f1 score for best genome after optimising is: {}'.format(f1_score_of_best_all_time_genome))

        return self.best_all_time_genome

    def ensure_no_duplicate_genes(self):
        connection_gene_dict = {}
        for genome in self.population.values():
            for connection in genome.connections.values():
                if connection not in connection_gene_dict:
                    connection_gene_dict[connection] = 1
                else:
                    connection_gene_dict[connection] += 1

        for connection_gene, amount in connection_gene_dict.items():
            if amount > 1:
                raise Exception('You have duplicated a connection gene')
Esempio n. 8
0
class Population(object):

    def __init__(self, seed_genome):

        self.best_genome = None
        self.best_species = None
        self.species_set = SpeciesSet()
        self.generation = -1

        self.species_set.speciate([seed_genome])

        # copy the seed genome for generation 0
        offspring = self.reproduce(0)
        self.species_set.speciate(offspring)

    def reproduce(self, num_elites):

        # calculate the allowed number of offspring
        # and tell the species to reproduce.

        # The reason for doing things this way is to avoid
        # a gap where the full population size isn't used.
        # (See "The Extra Pixel Problem" or "The Thin White Stripe"

        offspring = []

        population_fitness = 0

        for s in self.species_set.species:
            population_fitness += s.total_adjusted_fitness()

        running_fraction = 0

        # Each species reproduces in proportion to the
        # total adjusted fitness of that species
        for s in self.species_set.species:
            top = int((constants.POPULATION_SIZE - num_elites) * running_fraction)
            running_fraction += s.total_adjusted_fitness() / population_fitness
            next_top = int((constants.POPULATION_SIZE - num_elites) * running_fraction)
            offspring.extend(s.reproduce(next_top - top))

        return offspring

    def run(self, fitness_function, generations, iter, train):

        filename = str(iter) + "-" + str(train) + ".csv"
        with open(filename, 'w') as csvfile:
            spamwriter = csv.writer(csvfile, delimiter=' ',
                                    quotechar='|', quoting=csv.QUOTE_MINIMAL)

            for generation in range(generations):
                self.generation += 1

                self.calculate_fitnesses(fitness_function)

                self.report()

                if self.best_genome.fitness > constants.MAX_FITNESS:
                    break

                spamwriter.writerow([self.best_genome.fitness])

                # Kill off inferior organisms
                self.species_set.cull()

                elites = self.elites()

                # Create the next generation from the current generation.
                offspring = self.reproduce(len(elites))

                # Add in elites
                offspring.extend(elites)

                # Clear out the old population
                self.species_set.clear_species()

                # Update species age.
                for s in self.species_set.species:
                    s.age += 1

                # Divide the new population into species.
                self.species_set.speciate(offspring)
                
            spamwriter.writerow([constants.SURVIVAL_THRESHOLD, K2GraphGenome.P_ADD_NODE, K2GraphGenome.P_ADD_LINK, K2GraphGenome.P_NODE, K2GraphGenome.P_LINK, K2GraphGenome.MUTATE_STD ])

    def calculate_fitnesses(self, fitness_function):

        lowest = None
        highest = None
        hg = None
        hs = None


        
        for s in self.species_set.species:
            pool = Pool(24) 

            results = pool.map(fitness_function, s.members)

            pool.close()
            pool.join()

            count = 0

            for o in s.members:

                fitness = results[count]
                o.fitness = fitness

                if highest is None or fitness > highest:
                    highest = fitness
                    hg = o
                    hs = s

                if lowest is None or fitness < lowest:
                    lowest = fitness

                count += 1


        # the reason for adding lowest fitness to o.fitness is to handle
        # the case when fitnesses are negative
        for s in self.species_set.species:
            for o in s.members:
                o.adjusted_fitness = (o.fitness + abs(lowest)) / len(s.members)

        if self.best_genome is None or hg.fitness >= self.best_genome.fitness:
            self.best_genome = hg
            self.best_species = hs

    def elites(self):

        elites = []

        for s in self.species_set.species:
            elites.extend(s.members[0 : constants.ELITISM: 1])

        return elites

    def report(self):

        print("___GENERATION", self.generation, "___")
        print("Number of species: ", len(self.species_set.species))
        for specie in self.species_set.species:
            print("species id:", specie.id_, " size: ", len(specie.members))
        print("  Best Organism: ")
        print("    species ", self.best_species.id_)
        print("    fitness", self.best_genome.fitness)
        print("    adjusted fitness", self.best_genome.adjusted_fitness)

        #two_dimensional = False
        #symmetric = True

        #visualize(self.best_genome.get_phenotype(), two_dimensional, symmetric)

        '''
        if self.generation % 100 == 0:
            pheno = self.best_genome.get_phenotype()
            pheno = imresize(pheno, (32, 32), interp='nearest')
            name = str(self.generation) + '.png'
            imsave( name, pheno)

        '''
        
    
        #print("printing nodes")
        #for node in self.best_genome.network.nodes(data=True):
        #    print(node)

        #print("printing edges")
        #for edge in self.best_genome.network.edges(data=True, keys=True):
        #    print(edge)

        print(constants.SURVIVAL_THRESHOLD)
        print(K2GraphGenome.P_ADD_NODE)
        print(K2GraphGenome.P_ADD_LINK)
        print(K2GraphGenome.P_NODE)
        print(K2GraphGenome.P_LINK)
        print(K2GraphGenome.MUTATE_STD)