def get_offspring(self, offspring_key, genome1: Genome, genome2: Genome): assert isinstance(genome1.fitness, (int, float)) assert isinstance(genome2.fitness, (int, float)) if genome1.fitness > genome2.fitness: parent1, parent2 = genome1, genome2 else: parent1, parent2 = genome2, genome1 offspring = Genome(key=offspring_key) # Inherit connection genes for key, cg1 in parent1.connection_genes.items(): cg2 = parent2.connection_genes.get(key) if cg2 is None: # Excess or disjoint gene: copy from the fittest parent. logger.debug('Use connection copy') offspring.connection_genes[key] = cg1.copy() else: # Homologous gene: combine genes from both parents. offspring.connection_genes[ key] = self._get_connection_crossover(cg1, cg2) # Inherit node genes for key, ng1 in parent1.node_genes.items(): ng2 = parent2.node_genes.get(key) assert key not in offspring.node_genes if ng2 is None: # Extra gene: copy from the fittest parent logger.debug('Use node copy') offspring.node_genes[key] = ng1.copy() else: # Homologous gene: combine genes from both parents. offspring.node_genes[key] = self._get_node_crossover(ng1, ng2) return offspring
def _is_next_layer_input(layer_node_keys): ''' Given all the keys in a layer will return True if all the keys are negative. ''' logger.debug(layer_node_keys) is_negative = True for key in layer_node_keys: is_negative *= True if key < 0 else False return is_negative
def reproduce(self, species, pop_size, generation): """ Disclaimer: this is taken from Python-NEAT Handles creation of genomes, either from scratch or by sexual or asexual reproduction from parents. """ all_fitnesses = [] remaining_species = [] for stag_sid, stag_s, stagnant in self.stagnation_engine.get_stagnant_species( species, generation): if stagnant: logger.debug(f'Stagnant specie: {stag_sid} - {stag_s}') else: all_fitnesses.extend(m.fitness for m in stag_s.members.values()) remaining_species.append(stag_s) # No species left. if not remaining_species: species.species = {} raise ValueError('No species left. Reproduction failed...') # return {} # was [] # Find minimum/maximum fitness across the entire population, for use in # species adjusted fitness computation. min_fitness = min(all_fitnesses) max_fitness = max(all_fitnesses) # Do not allow the fitness range to be zero, as we divide by it below. # TODO: The ``1.0`` below is rather arbitrary, and should be configurable. fitness_range = max(1.0, max_fitness - min_fitness) for afs in remaining_species: # Compute adjusted fitness. msf = np.mean([m.fitness for m in afs.members.values()]) af = (msf - min_fitness) / fitness_range afs.adjusted_fitness = af adjusted_fitnesses = [s.adjusted_fitness for s in remaining_species] avg_adjusted_fitness = np.mean(adjusted_fitnesses) # type: float # TODO: LOG # self.reporters.info("Average adjusted fitness: {:.3f}".format(avg_adjusted_fitness)) # Compute the number of new members for each species in the new generation. previous_sizes = [len(s.members) for s in remaining_species] min_species_size = self.min_species_size # Isn't the effective min_species_size going to be max(min_species_size, # self.reproduction_config.elitism)? That would probably produce more accurate tracking # of population sizes and relative fitnesses... doing. TODO: document. min_species_size = max(min_species_size, self.elitism) spawn_amounts = self.compute_spawn(adjusted_fitnesses, previous_sizes, pop_size, min_species_size) new_population = self._create_new_population(remaining_species, spawn_amounts) return new_population
def create(self): # layer_node_keys = _filter_nodes_without_input_connection(node_keys=self.layer_node_keys, # connections=self.connections) layer_node_keys = self.nodes_per_layer[self.layer_counter] layer_node_keys.sort() n_output = len(layer_node_keys) layer_connections = dict() input_node_keys = set(self.nodes_per_layer[self.layer_counter + 1]) for connection_key, connection in self.connections.items(): if isinstance(connection_key, str): connection_key_corrected = Genome._get_connection_key_from_key_str( connection_key) else: connection_key_corrected = connection_key input_node_key, output_node_key = connection_key_corrected if output_node_key in layer_node_keys: input_node_keys = input_node_keys.union({input_node_key}) layer_connections[connection_key_corrected] = self.connections[ connection_key] input_node_keys = list(input_node_keys) n_input = len(input_node_keys) self.layer = Layer(key=self.layer_counter, n_input=n_input, n_output=n_output) # sorted input keys logger.debug(f'Layer: {self.layer_counter}') original_input_keys = self.nodes_per_layer[self.layer_counter + 1] original_input_keys.sort() external_input_keys = self._get_external_input_keys( input_node_keys, original_input_keys) input_node_keys = original_input_keys + external_input_keys logger.debug(f' Input Keys: {input_node_keys}') self.layer.input_keys = input_node_keys self.layer.original_input_keys = original_input_keys self.layer.external_input_keys = external_input_keys self.layer.output_keys = layer_node_keys # set parameters self.layer.weight_mean, self.layer.weight_log_var = \ self._build_weight_tensors(layer_connections=layer_connections, input_node_keys=input_node_keys, layer_node_keys=layer_node_keys, n_input=n_input, n_output=n_output) self.layer.bias_mean, self.layer.bias_log_var = self._build_bias_tensors( layer_node_keys, self.nodes) self.layer.validate() return self
def _remove_connection_that_introduces_cycles( genome: Genome, possible_connection_set: set) -> set: connections_to_remove = [] for connection in possible_connection_set: connections = list(genome.connection_genes.keys()) + [connection] if exist_cycle(connections=connections): connections_to_remove.append(connection) logger.debug( f'connections that introduce cycles: {connections_to_remove}') possible_connection_set -= set(connections_to_remove) return possible_connection_set
def create(self): # layer_node_keys = _filter_nodes_without_input_connection(node_keys=self.layer_node_keys, # connections=self.connections) layer_node_keys = self.nodes_per_layer[self.layer_counter] layer_node_keys.sort() n_output = len(layer_node_keys) layer_connections = dict() input_node_keys = set(self.nodes_per_layer[self.layer_counter + 1]) for connection_key, connection in self.connections.items(): input_node_key, output_node_key = connection_key if output_node_key in layer_node_keys: input_node_keys = input_node_keys.union({input_node_key}) if connection.enabled: layer_connections[connection_key] = self.connections[ connection_key] input_node_keys = list(input_node_keys) n_input = len(input_node_keys) self.layer = Layer(key=self.layer_counter, n_input=n_input, n_output=n_output) # sorted input keys logger.debug(f'Layer: {self.layer_counter}') original_input_keys = self.nodes_per_layer[self.layer_counter + 1] # TODO: why do i need this sort? original_input_keys.sort() external_input_keys = self._get_external_input_keys( input_node_keys, original_input_keys) input_node_keys = original_input_keys + external_input_keys logger.debug(f' Input Keys: {input_node_keys}') self.layer.input_keys = input_node_keys self.layer.original_input_keys = original_input_keys self.layer.external_input_keys = external_input_keys self.layer.output_keys = layer_node_keys # set parameters self.layer.weight_mean, self.layer.weight_log_var = \ self._build_weight_tensors(layer_connections=layer_connections, input_node_keys=input_node_keys, layer_node_keys=layer_node_keys, n_input=n_input, n_output=n_output) self.layer.bias_mean, self.layer.bias_log_var = self._build_bias_tensors( layer_node_keys, self.nodes) self.layer.validate() return self
def _update_best(self, generation_report, population): if self.best_individual is None or self.best_individual[ 'fitness'] < generation_report.best_individual_fitness: self.best_individual = population.get( generation_report.best_individual_key) logger.info( f''' New best individual ({self.best_individual['key']}) found ''' f'''with fitness {round(self.best_individual['fitness'], 3)}''' ) logger.debug( f''' best individual has {len(self.best_individual['nodes'])} Nodes ''' f'''and {len(self.best_individual['connections'])} Connections''' ) return True return False
def _update_best(self, potential_best_individual): if potential_best_individual.fitness == np.nan: return False if self.best_individual is None or self.best_individual.fitness < potential_best_individual.fitness or \ self.best_individual.fitness == np.nan: self.best_individual = potential_best_individual logger.info( f' New best individual ({self.best_individual.key}) found ' f'with fitness {round(self.best_individual.fitness, 3)}') logger.debug( f' best individual has {len(self.best_individual.node_genes)} Nodes ' f'and {len(self.best_individual.connection_genes)} Connections' ) return True return False
def speciate(self, population: dict, generation: int): """ Disclaimer: code copied from NEAT-Python: https://neat-python.readthedocs.io/en/latest/ Place genomes into species by genetic similarity. Note that this method assumes the current representatives of the species are from the old generation, and that after speciation has been performed, the old representatives should be dropped and replaced with representatives from the new generation. If you violate this assumption, you should make sure other necessary parts of the code are updated to reflect the new behavior. """ if len(self.species) == 0: self.species = self._generate_initial_species(population) return if len(self.species) == 0: return ValueError('All species have died') unspeciated_genomes = list(population.keys()) distances = DistanceCalculation() new_representatives = {} new_members = {} # using last generation species it search for the distance between the genomes in the new population and the # representative from the specie self._define_new_representatives(distances, new_members, new_representatives, population, unspeciated_genomes) # fill species if some have died: look at initialization while len(self.species) < self.n_species: # create more species pass # Partition population into species based on genetic similarity. new_representatives, new_members = \ self._assign_genome_to_specie(distances, new_members, new_representatives, population, unspeciated_genomes) # Update species collection based on new speciation. self.species = self._members_to_species(self.species, new_members, new_representatives, population) self.gdmean = distances.get_mean_distance() self.gdstdev = distances.get_std_distance() logger.debug(f'Number of species: {len(self.species)}')
def evaluate(self, population: dict): ''' population: is a Dict{Int, Genome} ''' # TODO: make n_samples increase with generation number n_samples = self.config.n_samples if self.parallel_evaluation: tasks = [] for genome in population.values(): logger.debug(f'Genome {genome.key}: {genome.get_graph()}') x = (genome, get_loss(problem_type=self.config.problem_type), self.config.beta_type, self.config.problem_type, IS_TESTING, self.batch_size, n_samples, self.is_gpu) tasks.append(x) # TODO: fix logging when using multiprocessing. Easy fix is to disable fitnesses = list(self.pool.imap(evaluate_genome_task, tasks, chunksize=len(population)//self.n_processes)) for i, genome in enumerate(population.values()): genome.fitness = fitnesses[i] else: self.dataset = self._get_dataset() self.loss = self._get_loss() for genome in population.values(): logger.debug(f'Genome {genome.key}: {genome.get_graph()}') genome.fitness = - evaluate_genome(genome=genome, problem_type=self.config.problem_type, dataset=self.dataset, loss=self.loss, is_testing=IS_TESTING, beta_type=self.config.beta_type, batch_size=self.batch_size, n_samples=n_samples, is_gpu=self.is_gpu) return population
def calculate_nodes_per_layer(links: list or tuple, output_node_keys: list, input_node_keys: list): ''' You need to make sure that nodes are connected. Otherwise they won't be included. ''' nodes_per_layer = {} nodes_per_layer[0] = output_node_keys layer_counter = 1 layer_keys = output_node_keys is_not_done = True while is_not_done: previous_layer_keys = [] for node_key in layer_keys: for in_node_key, out_node_key in links: if node_key == out_node_key: previous_layer_keys.append(in_node_key) nodes_per_layer[layer_counter] = previous_layer_keys if _is_next_layer_input(previous_layer_keys): is_not_done = False nodes_per_layer[layer_counter] = input_node_keys else: layer_counter += 1 layer_keys = previous_layer_keys layers_indices = list(nodes_per_layer.keys()) layers_indices.sort(reverse=True) node_keys = set(input_node_keys) for layer_index in layers_indices[1:]: logger.debug(f'Layer index: {layer_index}') repeated_nodes = set(nodes_per_layer[layer_index]).intersection(node_keys) node_keys = node_keys.union(set(nodes_per_layer[layer_index])) # if len(repeated_nodes) > 0: logger.debug(f'Repeated_nodes: {layer_index}') # remove repeated_nodes from layer nodes_per_layer[layer_index] = list(set(nodes_per_layer[layer_index]) - repeated_nodes) return nodes_per_layer
def speciate(self, population: dict, generation: int): """ Disclaimer: code copied from NEAT-Python: https://neat-python.readthedocs.io/en/latest/ Place genomes into species by genetic similarity. Note that this method assumes the current representatives of the species are from the old generation, and that after speciation has been performed, the old representatives should be dropped and replaced with representatives from the new generation. If you violate this assumption, you should make sure other necessary parts of the code are updated to reflect the new behavior. """ if generation > 0 and len(self.species) == 0: return ValueError('All species have died') unspeciated_genomes = list(population.keys()) distances = DistanceCalculation() new_representatives = {} new_members = {} # using last generation species it search for the distance between the genomes in the new population and the # representative from the specie for species_key, specie in self.species.items(): candidates = [] for genome_key in unspeciated_genomes: genome = population[genome_key] d = distances.get_distance(genome_0=specie.representative, genome_1=genome) candidates.append((d, genome)) if self._enough_candidates(candidates): # The new representative is the genome closest to the current representative. _, new_rep = min(candidates, key=lambda x: x[0]) new_rid = new_rep.key new_representatives[species_key] = new_rid new_members[species_key] = [new_rid] unspeciated_genomes.remove(new_rid) # Partition population into species based on genetic similarity. while unspeciated_genomes: gid = unspeciated_genomes.pop() g = population[gid] # Find the species with the most similar representative. candidates = [] for sid, rid in new_representatives.items(): rep = population[rid] d = distances.get_distance(genome_0=rep, genome_1=g) if d < self.compatibility_threshold: candidates.append((d, sid)) if candidates: ignored_sdist, sid = min(candidates, key=lambda x: x[0]) new_members[sid].append(gid) else: # No species is similar enough, create a new species, using # this genome as its representative. sid = next(self.indexer) new_representatives[sid] = gid new_members[sid] = [gid] # Update species collection based on new speciation. self.genome_to_species = {} for sid, rid in new_representatives.items(): s = self.species.get(sid) if s is None: logger.debug(f'New specie') s = Specie(sid, generation) self.species[sid] = s members = new_members[sid] for gid in members: self.genome_to_species[gid] = sid member_dict = dict((gid, population[gid]) for gid in members) s.update(population[rid], member_dict) self.gdmean = distances.get_mean_distance() self.gdstdev = distances.get_std_distance() logger.debug(f'Number of species: {len(self.species)}')
def _get_external_input_keys(self, input_node_key, original_input_keys): external_input_keys = list( set(input_node_key) - set(original_input_keys)) external_input_keys.sort() logger.debug(f' External Input Keys: {external_input_keys}') return external_input_keys
def transform_genome_to_layers(genome: Genome) -> dict: layers = dict() nodes = genome.node_genes connections = genome.connection_genes nodes_per_layer = calculate_nodes_per_layer( links=list(connections.keys()), input_node_keys=genome.get_input_nodes_keys(), output_node_keys=genome.get_output_nodes_keys()) layer_indices = list(nodes_per_layer.keys()) layer_indices.sort() for layer_index in layer_indices[:-1]: original_nodes_in_layer = nodes_per_layer[layer_index] layer = LayerBuilder(nodes=nodes, connections=connections, layer_node_keys=original_nodes_in_layer, nodes_per_layer=nodes_per_layer, layer_counter=layer_index) \ .create() \ .get_layer() layers[layer_index] = layer # enrich layers for layer_counter, layer in layers.items(): # logger.debug(f'Layer: {layer_counter}') # add needed indices for node_key in layer.external_input_keys: index = None for layer_2 in layers.values(): if node_key in layer_2.original_input_keys: index = (layer_2.key, layer_2.input_keys.index(node_key)) break assert index is not None layer.indices_of_needed_nodes.append(index) layer.needed_nodes[node_key] = index # add indices to cache for node_key in layer.original_input_keys: for layer_2 in layers.values(): if node_key in layer_2.external_input_keys: index = layer.input_keys.index(node_key) # add if not in list if index not in layer.indices_of_nodes_to_cache: layer.indices_of_nodes_to_cache.append(index) if len(layer.indices_of_needed_nodes) > 1: needed_node_keys = list(layer.needed_nodes.keys()) needed_node_keys.sort() sorted_indices_of_needed_nodes = [] for node_key in needed_node_keys: sorted_indices_of_needed_nodes.append( layer.needed_nodes[node_key]) assert len(sorted_indices_of_needed_nodes) == len( layer.indices_of_needed_nodes) layer.indices_of_needed_nodes = sorted_indices_of_needed_nodes logger.debug(f'Indices to cache: {layer.indices_of_nodes_to_cache}') logger.debug( f'Indices needed from cache: {layer.indices_of_needed_nodes}') return layers