コード例 #1
0
def generate_history(generations_quantity, pop_size):
    history = ComposingHistory()
    for gen in range(generations_quantity):
        new_pop = []
        for idx in range(pop_size):
            chain = create_chain()
            new_pop.append(chain)
        history.add_to_history(new_pop)
    return history
コード例 #2
0
def generate_history(generations, pop_size):
    history = ComposingHistory()
    for gen in range(generations):
        new_pop = []
        for idx in range(pop_size):
            chain = chain_first()
            chain.fitness = 1 / (gen * idx + 1)
            new_pop.append(chain)
        history.add_to_history(new_pop)
    return history
コード例 #3
0
    def __init__(self,
                 initial_chain,
                 requirements,
                 chain_generation_params,
                 metrics: List[MetricsEnum],
                 parameters: Optional[GPChainOptimiserParameters] = None,
                 log: Log = None,
                 archive_type=None):
        self.chain_generation_params = chain_generation_params
        self.primary_node_func = self.chain_generation_params.primary_node_func
        self.secondary_node_func = self.chain_generation_params.secondary_node_func
        self.chain_class = self.chain_generation_params.chain_class
        self.requirements = requirements
        self.archive = archive_type
        self.parameters = GPChainOptimiserParameters(
        ) if parameters is None else parameters
        self.max_depth = self.requirements.start_depth \
            if self.parameters.with_auto_depth_configuration and self.requirements.start_depth \
            else self.requirements.max_depth
        self.generation_num = 0
        self.num_of_gens_without_improvements = 0
        if not log:
            self.log = default_log(__name__)
        else:
            self.log = log

        generation_depth = self.max_depth if self.requirements.start_depth is None else self.requirements.start_depth

        self.chain_generation_function = partial(
            random_chain,
            chain_generation_params=self.chain_generation_params,
            requirements=self.requirements,
            max_depth=generation_depth)

        necessary_attrs = ['add_node', 'root_node', 'update_node']
        if not all(
            [hasattr(self.chain_class, attr) for attr in necessary_attrs]):
            ex = f'Object chain_class has no required attributes for gp_optimizer'
            self.log.error(ex)
            raise AttributeError(ex)

        if not self.requirements.pop_size:
            self.requirements.pop_size = 10

        if initial_chain and type(initial_chain) != list:
            self.population = [
                deepcopy(initial_chain)
                for _ in range(self.requirements.pop_size)
            ]
        else:
            self.population = initial_chain

        self.history = ComposingHistory(metrics)
コード例 #4
0
ファイル: gp_optimiser.py プロジェクト: STATAN/FEDOT
class GPChainOptimiser:
    """
    Base class of evolutionary chain optimiser

    :param initial_chain: chain which was initialized outside the optimiser
    :param requirements: composer requirements
    :param chain_generation_params: parameters for new chain generation
    :param parameters: parameters of chain optimiser
    :param log: optional parameter for log oject
    """
    def __init__(self,
                 initial_chain,
                 requirements,
                 chain_generation_params,
                 parameters: Optional[GPChainOptimiserParameters] = None,
                 log: Log = None):
        self.chain_generation_params = chain_generation_params
        self.primary_node_func = self.chain_generation_params.primary_node_func
        self.secondary_node_func = self.chain_generation_params.secondary_node_func
        self.chain_class = self.chain_generation_params.chain_class
        self.requirements = requirements
        self.parameters = GPChainOptimiserParameters(
        ) if parameters is None else parameters
        self.max_depth = self.parameters.start_depth if self.parameters.with_auto_depth_configuration else \
            self.requirements.max_depth

        self.generation_num = 0
        if not log:
            self.log = default_log(__name__)
        else:
            self.log = log

        self.chain_generation_function = partial(
            random_chain,
            chain_generation_params=self.chain_generation_params,
            requirements=self.requirements,
            max_depth=self.max_depth)

        necessary_attrs = [
            'add_node', 'root_node', 'replace_node_with_parents',
            'update_node', 'node_childs'
        ]
        if not all(
            [hasattr(self.chain_class, attr) for attr in necessary_attrs]):
            ex = f'Object chain_class has no required attributes for gp_optimizer'
            self.log.error(ex)
            raise AttributeError(ex)

        if not self.requirements.pop_size:
            self.requirements.pop_size = 10

        if initial_chain and type(initial_chain) != list:
            self.population = [
                deepcopy(initial_chain)
                for _ in range(self.requirements.pop_size)
            ]
        else:
            self.population = initial_chain

        self.history = ComposingHistory()

    def optimise(self,
                 objective_function,
                 offspring_rate: float = 0.5,
                 on_next_iteration_callback: Optional[Callable] = None):
        if on_next_iteration_callback is None:
            on_next_iteration_callback = self.default_on_next_iteration_callback

        if self.population is None:
            self.population = self._make_population(self.requirements.pop_size)

        num_of_new_individuals = self.offspring_size(offspring_rate)

        with CompositionTimer() as t:

            if self.requirements.add_single_model_chains:
                best_single_model, self.requirements.primary = \
                    self._best_single_models(objective_function)

            for ind in self.population:
                ind.fitness = objective_function(ind)

            on_next_iteration_callback(self.population)

            self.log.info(f'Best metric is {self.best_individual.fitness}')

            for self.generation_num in range(
                    self.requirements.num_of_generations - 1):
                self.log.info(f'Generation num: {self.generation_num}')
                self.num_of_gens_without_improvements = self.update_stagnation_counter(
                )
                self.log.info(
                    f'max_depth: {self.max_depth}, no improvements: {self.num_of_gens_without_improvements}'
                )

                if self.parameters.with_auto_depth_configuration and self.generation_num != 0:
                    self.max_depth_recount()

                individuals_to_select = regularized_population(
                    reg_type=self.parameters.regularization_type,
                    population=self.population,
                    objective_function=objective_function,
                    chain_class=self.chain_class)

                num_of_parents = num_of_parents_in_crossover(
                    num_of_new_individuals)

                selected_individuals = selection(
                    types=self.parameters.selection_types,
                    population=individuals_to_select,
                    pop_size=num_of_parents)

                new_population = []

                for parent_num in range(0, len(selected_individuals), 2):
                    new_population += self.reproduce(
                        selected_individuals[parent_num],
                        selected_individuals[parent_num + 1])

                    new_population[parent_num].fitness = objective_function(
                        new_population[parent_num])
                    new_population[parent_num +
                                   1].fitness = objective_function(
                                       new_population[parent_num + 1])

                self.prev_best = deepcopy(self.best_individual)

                self.population = inheritance(
                    self.parameters.genetic_scheme_type,
                    self.parameters.selection_types, self.population,
                    new_population, self.num_of_inds_in_next_pop)

                if self.with_elitism:
                    self.population.append(self.prev_best)

                on_next_iteration_callback(self.population)
                self.log.info(
                    f'spent time: {round(t.minutes_from_start, 1)} min')
                self.log.info(f'Best metric is {self.best_individual.fitness}')

                if t.is_time_limit_reached(self.requirements.max_lead_time,
                                           self.generation_num):
                    break

            best = self.best_individual

            if self.requirements.add_single_model_chains and \
                    (best_single_model.fitness <= best.fitness):
                best = best_single_model
        return best

    @property
    def best_individual(self) -> Any:
        return self.get_best_individual(self.population)

    @property
    def with_elitism(self) -> bool:
        return self.requirements.pop_size > 1

    @property
    def num_of_inds_in_next_pop(self):
        return self.requirements.pop_size - 1 if self.with_elitism else self.requirements.pop_size

    def update_stagnation_counter(self) -> int:
        value = 0
        if self.generation_num != 0:
            if self.is_equal_fitness(self.prev_best.fitness,
                                     self.best_individual.fitness):
                value = self.num_of_gens_without_improvements + 1

        return value

    def max_depth_recount(self):
        if self.num_of_gens_without_improvements == self.parameters.depth_increase_step and \
                self.max_depth + 1 <= self.requirements.max_depth:
            self.max_depth += 1

    def get_best_individual(self,
                            individuals: List[Any],
                            equivalents_from_current_pop=True) -> Any:
        best_ind = min(individuals, key=lambda ind: ind.fitness)
        if equivalents_from_current_pop:
            equivalents = self.simpler_equivalents_of_best_ind(best_ind)
        else:
            equivalents = self.simpler_equivalents_of_best_ind(
                best_ind, individuals)

        if equivalents:
            best_candidate_id = min(equivalents, key=equivalents.get)
            best_ind = individuals[best_candidate_id]
        return best_ind

    def simpler_equivalents_of_best_ind(self,
                                        best_ind: Any,
                                        inds: List[Any] = None) -> dict:
        individuals = self.population if inds is None else inds

        sort_inds = np.argsort([ind.fitness for ind in individuals])[1:]
        simpler_equivalents = {}
        for i in sort_inds:
            is_fitness_equals_to_best = self.is_equal_fitness(
                best_ind.fitness, individuals[i].fitness)
            has_less_num_of_models_than_best = len(individuals[i].nodes) < len(
                best_ind.nodes)
            if is_fitness_equals_to_best and has_less_num_of_models_than_best:
                simpler_equivalents[i] = len(individuals[i].nodes)
        return simpler_equivalents

    def reproduce(self,
                  selected_individual_first,
                  selected_individual_second=None) -> Tuple[Any]:
        if selected_individual_second:
            new_inds = crossover(
                self.parameters.crossover_types,
                selected_individual_first,
                selected_individual_second,
                crossover_prob=self.requirements.crossover_prob,
                max_depth=self.requirements.max_depth)
        else:
            new_inds = [selected_individual_first]

        new_inds = tuple([
            mutation(types=self.parameters.mutation_types,
                     chain_generation_params=self.chain_generation_params,
                     chain=new_ind,
                     requirements=self.requirements,
                     max_depth=self.max_depth) for new_ind in new_inds
        ])

        return new_inds

    def _make_population(self, pop_size: int) -> List[Any]:
        model_chains = []
        while len(model_chains) < pop_size:
            chain = self.chain_generation_function()
            if constraint_function(chain):
                model_chains.append(chain)
        return model_chains

    def _best_single_models(self,
                            objective_function: Callable,
                            num_best: int = 7):
        single_models_inds = []
        for model in self.requirements.primary:
            single_models_ind = self.chain_class(
                [self.primary_node_func(model)])
            single_models_ind.fitness = objective_function(single_models_ind)
            single_models_inds.append(single_models_ind)
        best_inds = sorted(single_models_inds, key=lambda ind: ind.fitness)
        return best_inds[0], [i.nodes[0].model.model_type
                              for i in best_inds][:num_best]

    def offspring_size(self, offspring_rate: float = None):
        default_offspring_rate = 0.5 if not offspring_rate else offspring_rate
        if self.parameters.genetic_scheme_type == GeneticSchemeTypesEnum.steady_state:
            num_of_new_individuals = math.ceil(self.requirements.pop_size *
                                               default_offspring_rate)
        else:
            num_of_new_individuals = self.requirements.pop_size - 1
        return num_of_new_individuals

    def is_equal_fitness(self,
                         first_fitness,
                         second_fitness,
                         atol=1e-10,
                         rtol=1e-10):
        return np.isclose(first_fitness, second_fitness, atol=atol, rtol=rtol)

    def default_on_next_iteration_callback(self, individuals):
        self.history.add_to_history(individuals)
コード例 #5
0
class GPChainOptimiser:
    """
    Base class of evolutionary chain optimiser

    :param initial_chain: chain which was initialized outside the optimiser
    :param requirements: composer requirements
    :param chain_generation_params: parameters for new chain generation
    :param metrics: quality metrics
    :param parameters: parameters of chain optimiser
    :param log: optional parameter for log object
    :param archive_type: type of archive with best individuals
    """
    def __init__(self,
                 initial_chain,
                 requirements,
                 chain_generation_params,
                 metrics: List[MetricsEnum],
                 parameters: Optional[GPChainOptimiserParameters] = None,
                 log: Log = None,
                 archive_type=None):
        self.chain_generation_params = chain_generation_params
        self.primary_node_func = self.chain_generation_params.primary_node_func
        self.secondary_node_func = self.chain_generation_params.secondary_node_func
        self.chain_class = self.chain_generation_params.chain_class
        self.requirements = requirements
        self.archive = archive_type
        self.parameters = GPChainOptimiserParameters(
        ) if parameters is None else parameters
        self.max_depth = self.requirements.start_depth \
            if self.parameters.with_auto_depth_configuration and self.requirements.start_depth \
            else self.requirements.max_depth
        self.generation_num = 0
        self.num_of_gens_without_improvements = 0
        if not log:
            self.log = default_log(__name__)
        else:
            self.log = log

        generation_depth = self.max_depth if self.requirements.start_depth is None else self.requirements.start_depth

        self.chain_generation_function = partial(
            random_chain,
            chain_generation_params=self.chain_generation_params,
            requirements=self.requirements,
            max_depth=generation_depth)

        necessary_attrs = ['add_node', 'root_node', 'update_node']
        if not all(
            [hasattr(self.chain_class, attr) for attr in necessary_attrs]):
            ex = f'Object chain_class has no required attributes for gp_optimizer'
            self.log.error(ex)
            raise AttributeError(ex)

        if not self.requirements.pop_size:
            self.requirements.pop_size = 10

        if initial_chain and type(initial_chain) != list:
            self.population = [
                deepcopy(initial_chain)
                for _ in range(self.requirements.pop_size)
            ]
        else:
            self.population = initial_chain

        self.history = ComposingHistory(metrics)

    def optimise(self,
                 objective_function,
                 offspring_rate: float = 0.5,
                 on_next_iteration_callback: Optional[Callable] = None):
        if on_next_iteration_callback is None:
            on_next_iteration_callback = self.default_on_next_iteration_callback

        if self.population is None:
            self.population = self._make_population(self.requirements.pop_size)

        num_of_new_individuals = self.offspring_size(offspring_rate)

        with CompositionTimer(
                log=self.log,
                max_lead_time=self.requirements.max_lead_time) as t:

            if self.requirements.allow_single_operations:
                self.best_single_operation, self.requirements.primary = \
                    self._best_single_operations(objective_function, timer=t)

            self._evaluate_individuals(self.population,
                                       objective_function,
                                       timer=t)

            if self.archive is not None:
                self.archive.update(self.population)

            on_next_iteration_callback(self.population, self.archive)

            self.log_info_about_best()

            while t.is_time_limit_reached(self.generation_num) is False \
                    and self.generation_num != self.requirements.num_of_generations - 1:

                self.log.info(f'Generation num: {self.generation_num}')

                self.num_of_gens_without_improvements = self.update_stagnation_counter(
                )
                self.log.info(
                    f'max_depth: {self.max_depth}, no improvements: {self.num_of_gens_without_improvements}'
                )

                if self.parameters.with_auto_depth_configuration and self.generation_num != 0:
                    self.max_depth_recount()

                individuals_to_select = regularized_population(
                    reg_type=self.parameters.regularization_type,
                    population=self.population,
                    objective_function=objective_function,
                    chain_class=self.chain_class,
                    timer=t)

                if self.parameters.multi_objective:
                    filtered_archive_items = duplicates_filtration(
                        archive=self.archive, population=individuals_to_select)
                    individuals_to_select = deepcopy(
                        individuals_to_select) + filtered_archive_items

                num_of_parents = num_of_parents_in_crossover(
                    num_of_new_individuals)

                selected_individuals = selection(
                    types=self.parameters.selection_types,
                    population=individuals_to_select,
                    pop_size=num_of_parents)

                new_population = []

                for parent_num in range(0, len(selected_individuals), 2):
                    new_population += self.reproduce(
                        selected_individuals[parent_num],
                        selected_individuals[parent_num + 1])

                self._evaluate_individuals(new_population,
                                           objective_function,
                                           timer=t)

                self.prev_best = deepcopy(self.best_individual)

                self.population = inheritance(
                    self.parameters.genetic_scheme_type,
                    self.parameters.selection_types, self.population,
                    new_population, self.num_of_inds_in_next_pop)

                if not self.parameters.multi_objective and self.with_elitism:
                    self.population.append(self.prev_best)

                if self.archive is not None:
                    self.archive.update(self.population)

                on_next_iteration_callback(self.population, self.archive)
                self.log.info(
                    f'spent time: {round(t.minutes_from_start, 1)} min')
                self.log_info_about_best()

                self.generation_num += 1

            best = self.result_individual()
            self.log.info('Result:')
            self.log_info_about_best()

        return best

    @property
    def best_individual(self) -> Any:
        if self.parameters.multi_objective:
            return self.archive
        else:
            return self.get_best_individual(self.population)

    @property
    def with_elitism(self) -> bool:
        if self.parameters.multi_objective:
            return False
        else:
            return self.requirements.pop_size > MIN_POPULATION_SIZE_WITH_ELITISM

    @property
    def num_of_inds_in_next_pop(self):
        return self.requirements.pop_size - 1 if self.with_elitism and not self.parameters.multi_objective \
            else self.requirements.pop_size

    def update_stagnation_counter(self) -> int:
        value = 0
        if self.generation_num != 0:
            if self.parameters.multi_objective:
                equal_best = is_equal_archive(self.prev_best, self.archive)
            else:
                equal_best = is_equal_fitness(self.prev_best.fitness,
                                              self.best_individual.fitness)
            if equal_best:
                value = self.num_of_gens_without_improvements + 1

        return value

    def log_info_about_best(self):
        if self.parameters.multi_objective:
            self.log.info(
                f'Pareto Frontier: '
                f'{[item.fitness.values for item in self.archive.items if item.fitness is not None]}'
            )
        else:
            self.log.info(f'Best metric is {self.best_individual.fitness}')

    def max_depth_recount(self):
        if self.num_of_gens_without_improvements == self.parameters.depth_increase_step and \
                self.max_depth + 1 <= self.requirements.max_depth:
            self.max_depth += 1

    def get_best_individual(self,
                            individuals: List[Any],
                            equivalents_from_current_pop=True) -> Any:
        best_ind = min(individuals, key=lambda ind: ind.fitness)
        if equivalents_from_current_pop:
            equivalents = self.simpler_equivalents_of_best_ind(best_ind)
        else:
            equivalents = self.simpler_equivalents_of_best_ind(
                best_ind, individuals)

        if equivalents:
            best_candidate_id = min(equivalents, key=equivalents.get)
            best_ind = individuals[best_candidate_id]
        return best_ind

    def simpler_equivalents_of_best_ind(self,
                                        best_ind: Any,
                                        inds: List[Any] = None) -> dict:
        individuals = self.population if inds is None else inds

        sort_inds = np.argsort([ind.fitness for ind in individuals])[1:]
        simpler_equivalents = {}
        for i in sort_inds:
            is_fitness_equals_to_best = is_equal_fitness(
                best_ind.fitness, individuals[i].fitness)
            has_less_num_of_operations_than_best = len(
                individuals[i].nodes) < len(best_ind.nodes)
            if is_fitness_equals_to_best and has_less_num_of_operations_than_best:
                simpler_equivalents[i] = len(individuals[i].nodes)
        return simpler_equivalents

    def reproduce(self,
                  selected_individual_first,
                  selected_individual_second=None) -> Tuple[Any]:
        if selected_individual_second:
            new_inds = crossover(
                self.parameters.crossover_types,
                selected_individual_first,
                selected_individual_second,
                crossover_prob=self.requirements.crossover_prob,
                max_depth=self.max_depth,
                log=self.log)
        else:
            new_inds = [selected_individual_first]

        new_inds = tuple([
            mutation(types=self.parameters.mutation_types,
                     chain_generation_params=self.chain_generation_params,
                     chain=new_ind,
                     requirements=self.requirements,
                     max_depth=self.max_depth,
                     log=self.log) for new_ind in new_inds
        ])
        for ind in new_inds:
            ind.fitness = None
        return new_inds

    def _make_population(self, pop_size: int) -> List[Any]:
        operation_chains = []
        iter_number = 0
        while len(operation_chains) < pop_size:
            iter_number += 1
            chain = self.chain_generation_function()
            if constraint_function(chain):
                operation_chains.append(chain)

            if iter_number > MAX_NUM_OF_GENERATED_INDS:
                self.log.debug(
                    f'More than {MAX_NUM_OF_GENERATED_INDS} generated In population making function. '
                    f'Process is stopped')
                break

        return operation_chains

    def _best_single_operations(self,
                                objective_function: Callable,
                                num_best: int = 7,
                                timer=None):
        is_process_skipped = False
        single_operations_inds = []
        for operation in self.requirements.primary:
            single_operations_ind = self.chain_class(
                [self.primary_node_func(operation)])
            single_operations_ind.fitness = calculate_objective(
                single_operations_ind, objective_function,
                self.parameters.multi_objective)
            if single_operations_ind.fitness is not None:
                single_operations_inds.append(single_operations_ind)

            if single_operations_inds:
                if timer is not None:
                    if timer.is_time_limit_reached():
                        break

        best_inds = sorted(single_operations_inds, key=lambda ind: ind.fitness)
        if is_process_skipped:
            self.population = [best_inds[0]]

        if timer is not None:
            single_operations_eval_time = timer.minutes_from_start
            self.log.info(
                f'Single operations evaluation time: {single_operations_eval_time}'
            )
            timer.set_init_time(single_operations_eval_time)
        return best_inds[0], [
            i.nodes[0].operation.operation_type for i in best_inds
        ][:num_best]

    def offspring_size(self, offspring_rate: float = None):
        default_offspring_rate = 0.5 if not offspring_rate else offspring_rate
        if self.parameters.genetic_scheme_type == GeneticSchemeTypesEnum.steady_state:
            num_of_new_individuals = math.ceil(self.requirements.pop_size *
                                               default_offspring_rate)
        else:
            num_of_new_individuals = self.requirements.pop_size
        return num_of_new_individuals

    def is_equal_fitness(self,
                         first_fitness,
                         second_fitness,
                         atol=1e-10,
                         rtol=1e-10):
        return np.isclose(first_fitness, second_fitness, atol=atol, rtol=rtol)

    def default_on_next_iteration_callback(self, individuals, archive):
        self.history.add_to_history(individuals)
        archive = deepcopy(archive)
        if archive is not None:
            self.history.add_to_archive_history(archive.items)

    def result_individual(self) -> Union[Any, List[Any]]:
        if not self.parameters.multi_objective:
            best = self.best_individual

            if self.requirements.allow_single_operations and \
                    (self.best_single_operation.fitness <= best.fitness):
                best = self.best_single_operation
        else:
            best = self.archive.items
        return best

    def _evaluate_individuals(self,
                              individuals_set,
                              objective_function,
                              timer=None):
        evaluate_individuals(
            individuals_set=individuals_set,
            objective_function=objective_function,
            timer=timer,
            is_multi_objective=self.parameters.multi_objective)