Beispiel #1
0
    def build(self) -> Composer:
        optimiser_type = GPGraphOptimiser
        if self.optimiser_parameters.genetic_scheme_type == GeneticSchemeTypesEnum.parameter_free:
            optimiser_type = GPGraphParameterFreeOptimiser

        graph_generation_params = GraphGenerationParams(
            adapter=PipelineAdapter(self._composer.log),
            advisor=PipelineChangeAdvisor())

        archive_type = None
        if len(self._composer.metrics) > 1:
            archive_type = tools.ParetoFront()
            # TODO add possibility of using regularization in MO alg
            self.optimiser_parameters.regularization_type = RegularizationTypesEnum.none
            self.optimiser_parameters.multi_objective = True

        if self.optimiser_parameters.mutation_types is None:
            self.optimiser_parameters.mutation_types = [
                boosting_mutation, parameter_change_mutation,
                single_edge_mutation, single_change_mutation,
                single_drop_mutation, single_add_mutation
            ]

        optimiser = optimiser_type(
            initial_graph=self._composer.initial_pipeline,
            requirements=self._composer.composer_requirements,
            graph_generation_params=graph_generation_params,
            parameters=self.optimiser_parameters,
            log=self._composer.log,
            archive_type=archive_type,
            metrics=self._composer.metrics)

        self._composer.optimiser = optimiser

        return self._composer
Beispiel #2
0
class ComposerRequirements:
    """
    This dataclass is for defining the requirements of composition process

    :attribute primary: List of operation types (str) for Primary Nodes
    :attribute secondary: List of operation types (str) for Secondary Nodes
    :attribute timeout: max time in minutes available for composition process
    :attribute max_depth: max depth of the result pipeline
    :attribute max_pipeline_fit_time: time constraint for operation fitting (minutes)
    :attribute max_arity: maximal number of parent for node
    :attribute min_arity: minimal number of parent for node
    :attribute cv_folds: integer or None to use cross validation
    """
    primary: List[str]
    secondary: List[str]
    timeout: Optional[datetime.timedelta] = datetime.timedelta(minutes=5)
    max_pipeline_fit_time: Optional[datetime.timedelta] = None
    max_depth: int = 3
    max_arity: int = 2
    min_arity: int = 2
    cv_folds: Optional[int] = None
    advisor: Optional[PipelineChangeAdvisor] = PipelineChangeAdvisor()

    def __post_init__(self):
        if self.max_depth < 0:
            raise ValueError(f'invalid max_depth value')
        if self.max_arity < 0:
            raise ValueError(f'invalid max_arity value')
        if self.min_arity < 0:
            raise ValueError(f'invalid min_arity value')
        if self.cv_folds is not None and self.cv_folds <= 1:
            raise ValueError(f'Number of folds for KFold cross validation must be 2 or more.')
Beispiel #3
0
def test_evaluate_individuals():
    project_root_path = str(fedot_project_root())
    file_path_train = os.path.join(project_root_path,
                                   'test/data/simple_classification.csv')
    full_path_train = os.path.join(str(fedot_project_root()), file_path_train)

    task = Task(TaskTypesEnum.classification)
    dataset_to_compose = InputData.from_csv(full_path_train, task=task)
    available_model_types, _ = OperationTypesRepository().suitable_operation(
        task_type=task.task_type)

    metric_function = ClassificationMetricsEnum.ROCAUC_penalty
    composer_requirements = GPComposerRequirements(
        primary=available_model_types, secondary=available_model_types)

    builder = GPComposerBuilder(task=task).with_requirements(composer_requirements). \
        with_metrics(metric_function)

    composer = builder.build()

    pipelines_to_evaluate = [
        pipeline_first(),
        pipeline_second(),
        pipeline_third(),
        pipeline_fourth()
    ]

    train_data, test_data = train_test_data_setup(
        dataset_to_compose,
        sample_split_ratio_for_tasks[dataset_to_compose.task.task_type])
    metric_function_for_nodes = partial(composer.composer_metric,
                                        composer.metrics, train_data,
                                        test_data)
    adapter = PipelineAdapter()
    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=0.001)
    params = GraphGenerationParams(adapter=PipelineAdapter(),
                                   advisor=PipelineChangeAdvisor())
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 1
    assert population[0].fitness is not None

    population = [Individual(adapter.adapt(c)) for c in pipelines_to_evaluate]
    timeout = datetime.timedelta(minutes=5)
    with OptimisationTimer(timeout=timeout) as t:
        evaluate_individuals(individuals_set=population,
                             objective_function=metric_function_for_nodes,
                             graph_generation_params=params,
                             is_multi_objective=False,
                             timer=t)
    assert len(population) == 4
    assert all([ind.fitness is not None for ind in population])
Beispiel #4
0
def test_selection():
    num_of_inds = 2
    population = rand_population_gener_and_eval(pop_size=4)
    graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(),
                                         adapter=PipelineAdapter())

    selected_individuals = selection(types=[SelectionTypesEnum.tournament],
                                     population=population,
                                     pop_size=num_of_inds,
                                     params=graph_params)
    assert (all([ind in population for ind in selected_individuals])
            and len(selected_individuals) == num_of_inds)
Beispiel #5
0
def test_individuals_selection_random_individuals():
    num_of_inds = 2
    population = rand_population_gener_and_eval(pop_size=4)
    types = [SelectionTypesEnum.tournament]
    graph_params = GraphGenerationParams(advisor=PipelineChangeAdvisor(),
                                         adapter=PipelineAdapter())
    selected_individuals = individuals_selection(types=types,
                                                 individuals=population,
                                                 pop_size=num_of_inds,
                                                 graph_params=graph_params)
    selected_individuals_ref = [str(ind) for ind in selected_individuals]
    assert (len(set(selected_individuals_ref)) == len(selected_individuals)
            and len(selected_individuals) == num_of_inds)
Beispiel #6
0
def rand_population_gener_and_eval(pop_size=4):
    models_set = ['knn', 'logit', 'rf']
    requirements = GPComposerRequirements(primary=models_set,
                                          secondary=models_set,
                                          max_depth=1)
    pipeline_gener_params = GraphGenerationParams(
        advisor=PipelineChangeAdvisor(), adapter=PipelineAdapter())
    random_pipeline_function = partial(random_graph,
                                       params=pipeline_gener_params,
                                       requirements=requirements)
    population = [
        Individual(random_pipeline_function()) for _ in range(pop_size)
    ]
    # evaluation
    for ind in population:
        ind.fitness = obj_function()
    return population
Beispiel #7
0
def test_boosting_mutation_for_linear_graph():
    """
    Tests boosting mutation can add correct boosting cascade
    """

    linear_one_node = OptGraph(OptNode('knn', [OptNode('scaling')]))

    init_node = OptNode('scaling')
    model_node = OptNode('knn', [init_node])

    boosting_graph = \
        OptGraph(
            OptNode('logit',
                    [model_node, OptNode('linear',
                                         [OptNode('class_decompose',
                                                  [model_node, init_node])])]))

    composer_requirements = GPComposerRequirements(primary=['scaling'],
                                                   secondary=['logit'],
                                                   mutation_prob=1)

    graph_params = GraphGenerationParams(
        adapter=PipelineAdapter(),
        advisor=PipelineChangeAdvisor(task=Task(TaskTypesEnum.classification)),
        rules_for_constraint=DEFAULT_DAG_RULES)
    successful_mutation_boosting = False
    for _ in range(100):
        graph_after_mutation = mutation(types=[boosting_mutation],
                                        params=graph_params,
                                        ind=Individual(linear_one_node),
                                        requirements=composer_requirements,
                                        log=default_log(__name__),
                                        max_depth=2).graph
        if not successful_mutation_boosting:
            successful_mutation_boosting = \
                graph_after_mutation.root_node.descriptive_id == boosting_graph.root_node.descriptive_id
        else:
            break
    assert successful_mutation_boosting

    # check that obtained pipeline can be fitted
    pipeline = PipelineAdapter().restore(graph_after_mutation)
    data = file_data()
    pipeline.fit(data)
    result = pipeline.predict(data)
    assert result is not None