def calculate_metrics_by_dispersion_quantile(self, log=False): if self.config.problem_type == 'classification': metrics = CLASSFICATION_METRICS_DICT elif self.config.problem_type == 'regression': metrics = REGRESSION_METRICS_DICT metrics_by_quantile_list = [] for i in range(0, len(self.results_enriched) - EXTREME_QUANTILES): results_filtered = self.results_enriched.loc[ self.results_enriched['order_std'] > i] mean_std = np.mean(results_filtered['std'].values) if log: logger.info( f'Mean dispersion: {mean_std} for {len(results_filtered)} points' ) row = [i] for metric_name, metric in metrics.items(): if metric == f1_score: metric_value = metric(results_filtered['y_true'], results_filtered['y_pred'], average='weighted') else: metric_value = metric(results_filtered['y_true'], results_filtered['y_pred']) row.append(metric_value) metrics_by_quantile_list.append(row) columns = ['order_std'] + list(metrics.keys()) self.metrics_by_quantile = pd.DataFrame(metrics_by_quantile_list, columns=columns) return self
def _prepare_population_report(self): self.best_individual_key = -1 self.best_individual_fitness = -1000000 fitness_all = [] all_n_parameters = [] for key, genome in self.population.items(): fitness_all.append(genome.fitness) all_n_parameters.append(genome.calculate_number_of_parameters()) if genome.fitness > self.best_individual_fitness: self.best_individual_fitness = genome.fitness self.best_individual_key = genome.key self.generation_data[ 'best_individual_fitness'] = self.best_individual_fitness self.generation_data['best_individual_key'] = self.best_individual_key # self.generation_data['best_individual_graph'] = self.population.get(self.best_individual_key).get_graph() self.generation_data['all_fitness'] = fitness_all self.generation_data['min_fitness'] = round(min(fitness_all), 3) self.generation_data['max_fitness'] = round(max(fitness_all), 3) self.generation_data['mean_fitness'] = round(np.mean(fitness_all), 3) self.generation_data['std_fitness'] = round(np.std(fitness_all), 3) # species data n_species = len(self.species) representative_fitnesses_by_specie = \ [round(specie.representative.fitness, 3) for specie in self.species.values()] best_fitnesses_by_specie = \ [round(specie.fitness, 3) if specie.fitness else None for specie in self.species.values()] n_genomes_by_specie = [ len(specie.members) for specie in self.species.values() ] self.generation_data['n_species'] = n_species self.generation_data[ 'representative_fitnesses_by_specie'] = representative_fitnesses_by_specie self.generation_data[ 'best_fitnesses_by_specie'] = best_fitnesses_by_specie self.generation_data['n_genomes_by_specie'] = n_genomes_by_specie # best_n_parameters = self.population.get(self.best_individual_key).calculate_number_of_parameters() best_n_parameters = calculate_number_of_parameters( self.population.get(self.best_individual_key)) logger.info( f'Generation {self.generation}. Best fitness: {round(max(fitness_all), 3)}. ' f'N-Parameters Best: {best_n_parameters}') logger.info( f' Mean fitness: {round(np.mean(fitness_all), 3)}. ' f'Mean N-Parameters: {round(np.mean(all_n_parameters), 3)}') logger.info(f' N-Species: {n_species}.') logger.info( f' N-Genomes by Specie: {n_genomes_by_specie}' ) logger.info( f' Best Fitness by Specie: {best_fitnesses_by_specie}' )
def _run_generation(self, generation): logger.info(f'Genaration {generation}') # read # population = self._read_population(generation=generation) population = self._read_population_dict(generation=generation) population = self.evaluation_engine.evaluate(population=population) self._write_fitness(population=population, generation=generation) # report self.report.report_new_generation(generation=generation, population=population, species=None)
def _update_best(self, generation_report, population): if self.best_individual is None or self.best_individual[ 'fitness'] < generation_report.best_individual_fitness: self.best_individual = population.get( generation_report.best_individual_key) logger.info( f''' New best individual ({self.best_individual['key']}) found ''' f'''with fitness {round(self.best_individual['fitness'], 3)}''' ) logger.debug( f''' best individual has {len(self.best_individual['nodes'])} Nodes ''' f'''and {len(self.best_individual['connections'])} Connections''' ) return True return False
def _update_best(self, potential_best_individual): if potential_best_individual.fitness == np.nan: return False if self.best_individual is None or self.best_individual.fitness < potential_best_individual.fitness or \ self.best_individual.fitness == np.nan: self.best_individual = potential_best_individual logger.info( f' New best individual ({self.best_individual.key}) found ' f'with fitness {round(self.best_individual.fitness, 3)}') logger.debug( f' best individual has {len(self.best_individual.node_genes)} Nodes ' f'and {len(self.best_individual.connection_genes)} Connections' ) return True return False
def evaluate(self, population: dict): ''' population: is a Dict{Int, Genome} ''' logger.info(f'Population size is {len(population)}') # TODO: make n_samples increase with generation number n_samples = self.config.n_samples if self.parallel_evaluation: tasks = [] for genome in population.values(): # logger.debug(f'Genome {genome.key}: {genome.get_graph()}') x = (genome, get_loss(problem_type=self.config.problem_type), self.config.beta_type, self.config.problem_type, self.config.n_input, self.config.n_output, self.config.node_activation, self.batch_size, n_samples, self.is_gpu) tasks.append(x) # TODO: fix logging when using multiprocessing. Easy fix is to disable fitnesses = list( self.pool.imap(evaluate_genome_task_jupyneat, tasks, chunksize=len(population) // self.n_processes)) for i, genome in enumerate(population.values()): genome['fitness'] = fitnesses[i] else: self.dataset = self._get_dataset() self.loss = self._get_loss() for genome in population.values(): genome['fitness'] = -evaluate_genome_jupyneat( genome=genome, problem_type=self.config.problem_type, n_input=self.config.n_input, n_output=self.config.n_output, activation_type=self.config.node_activation, dataset=self.dataset, loss=self.loss, beta_type=self.config.beta_type, batch_size=self.batch_size, n_samples=n_samples, is_gpu=self.is_gpu) return population
def run(self): logger.info('Started evolutionary process') end_condition = 'normal' # try: # initialize population self.population = self.population_engine.initialize_population() self.speciation_engine.speciate(self.population, generation=0) self.population = self.evaluation_engine.evaluate( population=self.population) # report self.report.report_new_generation( generation=0, population=self.population, species=self.speciation_engine.species) for generation in range(1, self.n_generations + 1): self._run_generation(generation) elapsed = time.perf_counter() - self.start_time if elapsed > TIMEOUT_SECONDS: end_condition = 'timeout' break if self.evolution_configuration.is_fine_tuning: fine_tuner = FineTuner(species=self.speciation_engine.species, config=self.evolution_configuration, is_cuda=self.is_cuda, only_best=False) fine_tuner.run() best_genomes = fine_tuner.species_best_genome # best_genomes = self.evaluation_engine.evaluate(population=best_genomes) self.report.report_fine_tuning(best_genomes) self.evaluation_engine.close() self.report.generate_final_report(end_condition=end_condition)\ .persist_report() self.report.persist_logs() # self.notifier.send(str(self.report.get_best_individual())) self._send_final_message() logger.info('Finished evolutionary process')
def run(self): end_condition = 'normal' logger.info('Started evolutionary process') # try: '''launch Julia Evolutionary service''' self._launch_evolutionary_service() '''launch Python Evaluation service''' for generation in range(0, self.configuration.n_generations + 1): self._run_generation(generation) self.evaluation_engine.close() # except Exception as e: # end_condition = 'exception' # logger.exception(str(e)) # self.notifier.send(str(e)) # finally: # self.report.generate_final_report(end_condition=end_condition) \ # .persist_report() # self.report.persist_logs() # self.notifier.send(str(self.report.get_best_individual())) logger.info('Finished evolutionary process')
def _prepare_population_report(self): self.best_individual_key = -1 self.best_individual_fitness = -1000000 fitness_all = [] all_n_parameters = [] for key, genome in self.population.items(): fitness_all.append(genome['fitness']) all_n_parameters.append(calculate_number_of_parameters(genome)) if genome['fitness'] > self.best_individual_fitness: self.best_individual_key = genome['key'] self.best_individual_fitness = genome['fitness'] self.generation_data[ 'best_individual_fitness'] = self.best_individual_fitness self.generation_data['best_individual_key'] = self.best_individual_key # self.generation_data['best_individual_graph'] = self.population.get(self.best_individual_key).get_graph() self.generation_data['all_fitness'] = fitness_all self.generation_data['min_fitness'] = round(min(fitness_all), 3) self.generation_data['max_fitness'] = round(max(fitness_all), 3) self.generation_data['mean_fitness'] = round(np.mean(fitness_all), 3) self.generation_data['std_fitness'] = round(np.std(fitness_all), 3) # best_n_parameters = self.population.get(self.best_individual_key).calculate_number_of_parameters() best_n_parameters = calculate_number_of_parameters( self.population.get(self.best_individual_key)) logger.info( f'Generation {self.generation}. Best fitness: {round(max(fitness_all), 3)}. ' f'N-Parameters Best: {best_n_parameters}') logger.info( f' Mean fitness: {round(np.mean(fitness_all), 3)}. ' f'Mean N-Parameters: {round(np.mean(all_n_parameters), 3)}') logger.info(f' N-Species: {len(self.species)}')
def _run_generation(self, generation): # create new generation's population self.population = self.population_engine.reproduce( species=self.speciation_engine.species, pop_size=self.population_engine.pop_size, generation=generation) # evaluate self.population = self.evaluation_engine.evaluate( population=self.population) # create new species based on new population self.speciation_engine.speciate(self.population, generation=generation) # generation report self.report.report_new_generation( generation=generation, population=self.population, species=self.speciation_engine.species) # schedule parameters if generation == self.evolution_configuration.generation_fix_architecture: logger.info('Fixing Architecture') self.evolution_configuration.fix_architecture = True
def _start_julia_service(self): logger.info("Launching Julia Thread") x = threading.Thread(target=launch_julia_neat, args=(self._get_configuration_directory(), ), daemon=True) x.start()