Esempio n. 1
0
def create_individual():
    first = OptNode(content='logit')
    second = OptNode(content='lda')
    final = OptNode(content='knn', nodes_from=[first, second])

    indiviual = Individual(graph=OptGraph(final))
    indiviual.fitness = 1
    return indiviual
Esempio n. 2
0
def test_evaluate_individuals():
    project_root_path = str(fedot_project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_classification.csv')
    full_path_train = os.path.join(str(fedot_project_root()), file_path_train)

    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(full_path_train, task=task)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    composer_requirements = GPComposerRequirements(
        primary=available_model_types, secondary=available_model_types)

    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function)

    composer = builder.build()

    pipelines_to_evaluate = [
        pipeline_first(),
        pipeline_second(),
        pipeline_third(),
        pipeline_fourth()
    ]

    train_data, test_data = train_test_data_setup(
        dataset_to_compose,
        sample_split_ratio_for_tasks[dataset_to_compose.task.task_type])
    metric_function_for_nodes = partial(composer.composer_metric,
                                        composer.metrics, train_data,
                                        test_data)
    adapter = PipelineAdapter()
    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=0.001)
    params = GraphGenerationParams(adapter=PipelineAdapter(),
                                   advisor=PipelineChangeAdvisor())
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 1
    assert population[0].fitness is not None

    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=5)
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 4
    assert all([ind.fitness is not None for ind in population])
Esempio n. 3
0
def generate_history(generations, pop_size):
    history = OptHistory()
    converter = GraphGenerationParams().adapter
    for gen in range(generations):
        new_pop = []
        for idx in range(pop_size):
            pipeline = pipeline_first()
            ind = Individual(converter.adapt(pipeline))
            ind.fitness = 1 / (gen * idx + 1)
            new_pop.append(ind)
        history.add_to_history(new_pop)
    return history
Esempio n. 4
0
def test_drop_mutation_for_linear_graph():
    """
    Tests single_drop mutation can remove node
    """

    linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')]))

    linear_one_node = OptGraph(OptNode('logit'))

    composer_requirements = GPComposerRequirements(primary=['scaling'],
                                                   secondary=['logit'],
                                                   mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_drop = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[MutationTypesEnum.single_drop],
                                        params=graph_params,
                                        ind=Individual(linear_two_nodes),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=2).graph
        if not successful_mutation_drop:
            successful_mutation_drop = \
                graph_after_mutation.root_node.descriptive_id == linear_one_node.root_node.descriptive_id
        else:
            break
    assert successful_mutation_drop
Esempio n. 5
0
def test_edge_mutation_for_graph():
    """
    Tests edge mutation can add edge between nodes
    """
    graph_without_edge = \
        OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])]))

    primary = OptNode('scaling')
    graph_with_edge = \
        OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [primary]), primary]))

    composer_requirements = GPComposerRequirements(
        primary=['scaling', 'one_hot_encoding'],
        secondary=['logit', 'scaling'],
        mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_edge = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[MutationTypesEnum.single_edge],
                                        params=graph_params,
                                        ind=Individual(graph_without_edge),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=graph_with_edge.depth).graph
        if not successful_mutation_edge:
            successful_mutation_edge = \
                graph_after_mutation.root_node.descriptive_id == graph_with_edge.root_node.descriptive_id
        else:
            break
    assert successful_mutation_edge
Esempio n. 6
0
def test_intermediate_add_mutation_for_linear_graph():
    """
    Tests single_add mutation can add node between two existing nodes
    """

    linear_two_nodes = OptGraph(OptNode('logit', [OptNode('scaling')]))
    linear_three_nodes_inner = \
        OptGraph(OptNode('logit', [OptNode('one_hot_encoding', [OptNode('scaling')])]))

    composer_requirements = GPComposerRequirements(
        primary=['scaling'], secondary=['one_hot_encoding'], mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=DirectAdapter(), rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_inner = False

    for _ in range(100):
        graph_after_mutation = mutation(types=[MutationTypesEnum.single_add],
                                        params=graph_params,
                                        ind=Individual(linear_two_nodes),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=3).graph
        if not successful_mutation_inner:
            successful_mutation_inner = \
                graph_after_mutation.root_node.descriptive_id == linear_three_nodes_inner.root_node.descriptive_id
        else:
            break

    assert successful_mutation_inner
Esempio n. 7
0
def test_filter_duplicates():
    archive = tools.ParetoFront()
    archive_items = [pipeline_first(), pipeline_second(), pipeline_third()]
    adapter = PipelineAdapter()

    population = [
        Individual(adapter.adapt(c)) for c in [
            pipeline_first(),
            pipeline_second(),
            pipeline_third(),
            pipeline_fourth()
        ]
    ]
    archive_items_fitness = ((-0.80001, 0.25), (-0.7, 0.1), (-0.9, 0.7))
    population_fitness = ((-0.8, 0.25), (-0.59, 0.25), (-0.9, 0.7), (-0.7,
                                                                     0.1))
    weights = tuple([-1 for _ in range(len(population_fitness[0]))])
    for ind_num in range(len(archive_items)):
        archive_items[ind_num].fitness = MultiObjFitness(
            values=archive_items_fitness[ind_num], weights=weights)
    for ind_num in range(len(population)):
        population[ind_num].fitness = MultiObjFitness(
            values=population_fitness[ind_num], weights=weights)
    archive.update(archive_items)
    filtered_archive = filter_duplicates(archive, population)
    assert len(filtered_archive) == 1
    assert filtered_archive[0].fitness.values[0] == -0.80001
    assert filtered_archive[0].fitness.values[1] == 0.25
Esempio n. 8
0
def crossover(types: List[Union[CrossoverTypesEnum, Callable]],
              ind_first: Individual, ind_second: Individual,
              max_depth: int, log: Log,
              crossover_prob: float = 0.8, params: 'GraphGenerationParams' = None) -> Any:
    crossover_type = choice(types)
    is_custom_crossover = isinstance(crossover_type, Callable)
    try:
        if will_crossover_be_applied(ind_first.graph, ind_second.graph, crossover_prob, crossover_type):
            if crossover_type in crossover_by_type.keys() or is_custom_crossover:
                for _ in range(MAX_NUM_OF_ATTEMPTS):
                    if is_custom_crossover:
                        crossover_func = crossover_type
                    else:
                        crossover_func = crossover_by_type[crossover_type]
                    new_inds = []

                    is_custom_operator = isinstance(ind_first, OptGraph)
                    input_obj_first = deepcopy(ind_first.graph)
                    input_obj_second = deepcopy(ind_first.graph)
                    if is_custom_operator:
                        input_obj_first = params.adapter.restore(input_obj_first)
                        input_obj_second = params.adapter.restore(input_obj_second)

                    new_graphs = crossover_func(input_obj_first,
                                                input_obj_second, max_depth)

                    if is_custom_operator:
                        for graph_id, graph in enumerate(new_graphs):
                            new_graphs[graph_id] = params.adapter.adapt(graph)

                    are_correct = \
                        all([constraint_function(new_graph, params)
                             for new_graph in new_graphs])

                    if are_correct:
                        for graph in new_graphs:
                            new_ind = Individual(graph)
                            new_ind.parent_operators.append(
                                ParentOperator(operator_type='crossover',
                                               operator_name=str(crossover_type),
                                               parent_objects=[
                                                   params.adapter.restore_as_template(ind_first.graph),
                                                   params.adapter.restore_as_template(ind_second.graph)
                                               ]))
                            new_inds.append(new_ind)
                        return new_inds
            else:
                raise ValueError(f'Required crossover type not found: {crossover_type}')

            log.debug('Number of crossover attempts exceeded. '
                      'Please check composer requirements for correctness.')
    except Exception as ex:
        log.error(f'Crossover ex: {ex}')

    graph_first_copy = deepcopy(ind_first)
    graph_second_copy = deepcopy(ind_second)
    return graph_first_copy, graph_second_copy
Esempio n. 9
0
def test_mutation():
    adapter = PipelineAdapter()
    ind = Individual(adapter.adapt(pipeline_first()))
    mutation_types = [MutationTypesEnum.none]
    log = default_log(__name__)
    graph_gener_params = GraphGenerationParams()
    task = Task(TaskTypesEnum.classification)
    primary_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)
    secondary_model_types = ['xgboost', 'knn', 'lda', 'qda']
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=1)
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
    mutation_types = [MutationTypesEnum.growth]
    composer_requirements = GPComposerRequirements(
        primary=primary_model_types,
        secondary=secondary_model_types,
        mutation_prob=0)
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
    ind = Individual(adapter.adapt(pipeline_fifth()))
    new_ind = mutation(mutation_types,
                       graph_gener_params,
                       ind,
                       composer_requirements,
                       log=log,
                       max_depth=3)
    assert new_ind.graph == ind.graph
Esempio n. 10
0
def test_crossover():
    adapter = PipelineAdapter()
    graph_example_first = adapter.adapt(pipeline_first())
    graph_example_second = adapter.adapt(pipeline_second())
    log = default_log(__name__)
    crossover_types = [CrossoverTypesEnum.none]
    new_graphs = crossover(crossover_types,
                           Individual(graph_example_first),
                           Individual(graph_example_second),
                           max_depth=3,
                           log=log,
                           crossover_prob=1)
    assert new_graphs[0].graph == graph_example_first
    assert new_graphs[1].graph == graph_example_second
    crossover_types = [CrossoverTypesEnum.subtree]
    new_graphs = crossover(crossover_types,
                           Individual(graph_example_first),
                           Individual(graph_example_second),
                           max_depth=3,
                           log=log,
                           crossover_prob=0)
    assert new_graphs[0].graph == graph_example_first
    assert new_graphs[1].graph == graph_example_second
Esempio n. 11
0
    def _init_population(self):
        if self.initial_graph:
            if type(self.initial_graph) != list:
                initial_graph = self.graph_generation_params.adapter.adapt(
                    self.initial_graph)
                self.population = self._create_randomized_pop_from_inital_pipeline(
                    initial_graph)
            else:
                self.population = \
                    [Individual(graph=self.graph_generation_params.adapter.adapt(o))
                     for o in self.initial_graph]

        if self.population is None:
            self.population = self._make_population(self.requirements.pop_size)
        return self.population
Esempio n. 12
0
    def _make_population(self, pop_size: int) -> List[Any]:
        pop = []
        iter_number = 0
        while len(pop) < pop_size:
            iter_number += 1
            graph = self.graph_generation_function()
            if constraint_function(graph, self.graph_generation_params):
                pop.append(Individual(graph))

            if iter_number > MAX_NUM_OF_GENERATED_INDS:
                self.log.debug(
                    f'More than {MAX_NUM_OF_GENERATED_INDS} generated in population making function. '
                    f'Process is stopped')
                break

        return pop
Esempio n. 13
0
def rand_population_gener_and_eval(pop_size=4):
    models_set = ['knn', 'logit', 'rf']
    requirements = GPComposerRequirements(primary=models_set,
                                          secondary=models_set,
                                          max_depth=1)
    pipeline_gener_params = GraphGenerationParams(
        advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter())
    random_pipeline_function = partial(random_graph,
                                       params=pipeline_gener_params,
                                       requirements=requirements)
    population = [
        Individual(random_pipeline_function()) for _ in range(pop_size)
    ]
    # evaluation
    for ind in population:
        ind.fitness = obj_function()
    return population
Esempio n. 14
0
def test_boosting_mutation_for_linear_graph():
    """
    Tests boosting mutation can add correct boosting cascade
    """

    linear_one_node = OptGraph(OptNode('knn', [OptNode('scaling')]))

    init_node = OptNode('scaling')
    model_node = OptNode('knn', [init_node])

    boosting_graph = \
        OptGraph(
            OptNode('logit',
                    [model_node, OptNode('linear',
                                         [OptNode('class_decompose',
                                                  [model_node, init_node])])]))

    composer_requirements = GPComposerRequirements(primary=['scaling'],
                                                   secondary=['logit'],
                                                   mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=PipelineAdapter(),
        advisor=PipelineChangeAdvisor(task=Task(TaskTypesEnum.classification)),
        rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_boosting = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[boosting_mutation],
                                        params=graph_params,
                                        ind=Individual(linear_one_node),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=2).graph
        if not successful_mutation_boosting:
            successful_mutation_boosting = \
                graph_after_mutation.root_node.descriptive_id == boosting_graph.root_node.descriptive_id
        else:
            break
    assert successful_mutation_boosting

    # check that obtained pipeline can be fitted
    pipeline = PipelineAdapter().restore(graph_after_mutation)
    data = file_data()
    pipeline.fit(data)
    result = pipeline.predict(data)
    assert result is not None
Esempio n. 15
0
 def _create_randomized_pop_from_inital_pipeline(
         self, initial_pipeline) -> List[Individual]:
     """
     Fill first population with mutated variants of the initial_pipeline
     :param initial_pipeline: Initial assumption for first population
     :return: list of individuals
     """
     initial_req = deepcopy(self.requirements)
     initial_req.mutation_prob = 1
     randomized_pop = ([
         mutation(types=self.parameters.mutation_types,
                  params=self.graph_generation_params,
                  ind=Individual(deepcopy(initial_pipeline)),
                  requirements=initial_req,
                  max_depth=self.max_depth,
                  log=self.log,
                  add_to_history=False)
         for _ in range(self.requirements.pop_size)
     ])
     return randomized_pop