Beispiel #1
0
    def calculate_metrics_by_dispersion_quantile(self, log=False):
        if self.config.problem_type == 'classification':
            metrics = CLASSFICATION_METRICS_DICT
        elif self.config.problem_type == 'regression':
            metrics = REGRESSION_METRICS_DICT

        metrics_by_quantile_list = []
        for i in range(0, len(self.results_enriched) - EXTREME_QUANTILES):
            results_filtered = self.results_enriched.loc[
                self.results_enriched['order_std'] > i]

            mean_std = np.mean(results_filtered['std'].values)
            if log:
                logger.info(
                    f'Mean dispersion: {mean_std} for {len(results_filtered)} points'
                )

            row = [i]
            for metric_name, metric in metrics.items():
                if metric == f1_score:
                    metric_value = metric(results_filtered['y_true'],
                                          results_filtered['y_pred'],
                                          average='weighted')
                else:
                    metric_value = metric(results_filtered['y_true'],
                                          results_filtered['y_pred'])
                row.append(metric_value)

            metrics_by_quantile_list.append(row)

        columns = ['order_std'] + list(metrics.keys())
        self.metrics_by_quantile = pd.DataFrame(metrics_by_quantile_list,
                                                columns=columns)
        return self
Beispiel #2
0
    def _prepare_population_report(self):
        self.best_individual_key = -1
        self.best_individual_fitness = -1000000
        fitness_all = []
        all_n_parameters = []
        for key, genome in self.population.items():
            fitness_all.append(genome.fitness)
            all_n_parameters.append(genome.calculate_number_of_parameters())
            if genome.fitness > self.best_individual_fitness:
                self.best_individual_fitness = genome.fitness
                self.best_individual_key = genome.key

        self.generation_data[
            'best_individual_fitness'] = self.best_individual_fitness
        self.generation_data['best_individual_key'] = self.best_individual_key
        # self.generation_data['best_individual_graph'] = self.population.get(self.best_individual_key).get_graph()
        self.generation_data['all_fitness'] = fitness_all
        self.generation_data['min_fitness'] = round(min(fitness_all), 3)
        self.generation_data['max_fitness'] = round(max(fitness_all), 3)
        self.generation_data['mean_fitness'] = round(np.mean(fitness_all), 3)
        self.generation_data['std_fitness'] = round(np.std(fitness_all), 3)

        # species data
        n_species = len(self.species)
        representative_fitnesses_by_specie = \
            [round(specie.representative.fitness, 3) for specie in self.species.values()]
        best_fitnesses_by_specie = \
            [round(specie.fitness, 3) if specie.fitness else None for specie in self.species.values()]

        n_genomes_by_specie = [
            len(specie.members) for specie in self.species.values()
        ]
        self.generation_data['n_species'] = n_species
        self.generation_data[
            'representative_fitnesses_by_specie'] = representative_fitnesses_by_specie
        self.generation_data[
            'best_fitnesses_by_specie'] = best_fitnesses_by_specie
        self.generation_data['n_genomes_by_specie'] = n_genomes_by_specie

        # best_n_parameters = self.population.get(self.best_individual_key).calculate_number_of_parameters()
        best_n_parameters = calculate_number_of_parameters(
            self.population.get(self.best_individual_key))

        logger.info(
            f'Generation {self.generation}. Best fitness: {round(max(fitness_all), 3)}. '
            f'N-Parameters Best: {best_n_parameters}')
        logger.info(
            f'                         Mean fitness: {round(np.mean(fitness_all), 3)}. '
            f'Mean N-Parameters: {round(np.mean(all_n_parameters), 3)}')
        logger.info(f'                         N-Species: {n_species}.')
        logger.info(
            f'                                N-Genomes by Specie: {n_genomes_by_specie}'
        )
        logger.info(
            f'                                Best Fitness by Specie: {best_fitnesses_by_specie}'
        )
Beispiel #3
0
    def _run_generation(self, generation):
        logger.info(f'Genaration {generation}')
        # read
        # population = self._read_population(generation=generation)
        population = self._read_population_dict(generation=generation)

        population = self.evaluation_engine.evaluate(population=population)
        self._write_fitness(population=population, generation=generation)
        # report
        self.report.report_new_generation(generation=generation,
                                          population=population,
                                          species=None)
Beispiel #4
0
 def _update_best(self, generation_report, population):
     if self.best_individual is None or self.best_individual[
             'fitness'] < generation_report.best_individual_fitness:
         self.best_individual = population.get(
             generation_report.best_individual_key)
         logger.info(
             f'''    New best individual ({self.best_individual['key']}) found '''
             f'''with fitness {round(self.best_individual['fitness'], 3)}'''
         )
         logger.debug(
             f'''         best individual has {len(self.best_individual['nodes'])} Nodes '''
             f'''and {len(self.best_individual['connections'])} Connections'''
         )
         return True
     return False
Beispiel #5
0
 def _update_best(self, potential_best_individual):
     if potential_best_individual.fitness == np.nan:
         return False
     if self.best_individual is None or self.best_individual.fitness < potential_best_individual.fitness or \
             self.best_individual.fitness == np.nan:
         self.best_individual = potential_best_individual
         logger.info(
             f'    New best individual ({self.best_individual.key}) found '
             f'with fitness {round(self.best_individual.fitness, 3)}')
         logger.debug(
             f'         best individual has {len(self.best_individual.node_genes)} Nodes '
             f'and {len(self.best_individual.connection_genes)} Connections'
         )
         return True
     return False
Beispiel #6
0
    def evaluate(self, population: dict):
        '''
        population: is a Dict{Int, Genome}
        '''
        logger.info(f'Population size is {len(population)}')
        # TODO: make n_samples increase with generation number
        n_samples = self.config.n_samples
        if self.parallel_evaluation:
            tasks = []
            for genome in population.values():
                # logger.debug(f'Genome {genome.key}: {genome.get_graph()}')
                x = (genome, get_loss(problem_type=self.config.problem_type),
                     self.config.beta_type, self.config.problem_type,
                     self.config.n_input, self.config.n_output,
                     self.config.node_activation, self.batch_size, n_samples,
                     self.is_gpu)
                tasks.append(x)

            # TODO: fix logging when using multiprocessing. Easy fix is to disable
            fitnesses = list(
                self.pool.imap(evaluate_genome_task_jupyneat,
                               tasks,
                               chunksize=len(population) // self.n_processes))

            for i, genome in enumerate(population.values()):
                genome['fitness'] = fitnesses[i]

        else:
            self.dataset = self._get_dataset()
            self.loss = self._get_loss()
            for genome in population.values():
                genome['fitness'] = -evaluate_genome_jupyneat(
                    genome=genome,
                    problem_type=self.config.problem_type,
                    n_input=self.config.n_input,
                    n_output=self.config.n_output,
                    activation_type=self.config.node_activation,
                    dataset=self.dataset,
                    loss=self.loss,
                    beta_type=self.config.beta_type,
                    batch_size=self.batch_size,
                    n_samples=n_samples,
                    is_gpu=self.is_gpu)

        return population
Beispiel #7
0
    def run(self):
        logger.info('Started evolutionary process')
        end_condition = 'normal'
        # try:
        # initialize population
        self.population = self.population_engine.initialize_population()
        self.speciation_engine.speciate(self.population, generation=0)

        self.population = self.evaluation_engine.evaluate(
            population=self.population)

        # report
        self.report.report_new_generation(
            generation=0,
            population=self.population,
            species=self.speciation_engine.species)

        for generation in range(1, self.n_generations + 1):
            self._run_generation(generation)

            elapsed = time.perf_counter() - self.start_time
            if elapsed > TIMEOUT_SECONDS:
                end_condition = 'timeout'
                break

        if self.evolution_configuration.is_fine_tuning:
            fine_tuner = FineTuner(species=self.speciation_engine.species,
                                   config=self.evolution_configuration,
                                   is_cuda=self.is_cuda,
                                   only_best=False)
            fine_tuner.run()
            best_genomes = fine_tuner.species_best_genome
            # best_genomes = self.evaluation_engine.evaluate(population=best_genomes)
            self.report.report_fine_tuning(best_genomes)

        self.evaluation_engine.close()

        self.report.generate_final_report(end_condition=end_condition)\
                   .persist_report()
        self.report.persist_logs()
        # self.notifier.send(str(self.report.get_best_individual()))
        self._send_final_message()
        logger.info('Finished evolutionary process')
Beispiel #8
0
 def run(self):
     end_condition = 'normal'
     logger.info('Started evolutionary process')
     # try:
     '''launch Julia Evolutionary service'''
     self._launch_evolutionary_service()
     '''launch Python Evaluation service'''
     for generation in range(0, self.configuration.n_generations + 1):
         self._run_generation(generation)
     self.evaluation_engine.close()
     # except Exception as e:
     #     end_condition = 'exception'
     #     logger.exception(str(e))
     #     self.notifier.send(str(e))
     # finally:
     #     self.report.generate_final_report(end_condition=end_condition) \
     #         .persist_report()
     #     self.report.persist_logs()
     #     self.notifier.send(str(self.report.get_best_individual()))
     logger.info('Finished evolutionary process')
Beispiel #9
0
    def _prepare_population_report(self):
        self.best_individual_key = -1
        self.best_individual_fitness = -1000000
        fitness_all = []
        all_n_parameters = []
        for key, genome in self.population.items():
            fitness_all.append(genome['fitness'])

            all_n_parameters.append(calculate_number_of_parameters(genome))
            if genome['fitness'] > self.best_individual_fitness:
                self.best_individual_key = genome['key']
                self.best_individual_fitness = genome['fitness']

        self.generation_data[
            'best_individual_fitness'] = self.best_individual_fitness
        self.generation_data['best_individual_key'] = self.best_individual_key
        # self.generation_data['best_individual_graph'] = self.population.get(self.best_individual_key).get_graph()
        self.generation_data['all_fitness'] = fitness_all
        self.generation_data['min_fitness'] = round(min(fitness_all), 3)
        self.generation_data['max_fitness'] = round(max(fitness_all), 3)
        self.generation_data['mean_fitness'] = round(np.mean(fitness_all), 3)
        self.generation_data['std_fitness'] = round(np.std(fitness_all), 3)

        # best_n_parameters = self.population.get(self.best_individual_key).calculate_number_of_parameters()
        best_n_parameters = calculate_number_of_parameters(
            self.population.get(self.best_individual_key))

        logger.info(
            f'Generation {self.generation}. Best fitness: {round(max(fitness_all), 3)}. '
            f'N-Parameters Best: {best_n_parameters}')
        logger.info(
            f'                         Mean fitness: {round(np.mean(fitness_all), 3)}. '
            f'Mean N-Parameters: {round(np.mean(all_n_parameters), 3)}')
        logger.info(f'                         N-Species: {len(self.species)}')
Beispiel #10
0
    def _run_generation(self, generation):
        # create new generation's population
        self.population = self.population_engine.reproduce(
            species=self.speciation_engine.species,
            pop_size=self.population_engine.pop_size,
            generation=generation)

        # evaluate
        self.population = self.evaluation_engine.evaluate(
            population=self.population)

        # create new species based on new population
        self.speciation_engine.speciate(self.population, generation=generation)

        # generation report
        self.report.report_new_generation(
            generation=generation,
            population=self.population,
            species=self.speciation_engine.species)

        # schedule parameters
        if generation == self.evolution_configuration.generation_fix_architecture:
            logger.info('Fixing Architecture')
            self.evolution_configuration.fix_architecture = True
Beispiel #11
0
 def _start_julia_service(self):
     logger.info("Launching Julia Thread")
     x = threading.Thread(target=launch_julia_neat,
                          args=(self._get_configuration_directory(), ),
                          daemon=True)
     x.start()