コード例 #1
0
ファイル: test_composer.py プロジェクト: timur9831/FEDOT
def test_random_composer(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data

    available_model_types, _ = ModelTypesRepository().suitable_model(
        task_type=TaskTypesEnum.classification)

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    random_composer = RandomSearchComposer(iter_num=1)
    req = ComposerRequirements(primary=available_model_types,
                               secondary=available_model_types)
    chain_random_composed = random_composer.compose_chain(
        data=dataset_to_compose,
        initial_chain=None,
        composer_requirements=req,
        metrics=metric_function)
    chain_random_composed.fit_from_scratch(input_data=dataset_to_compose)

    predicted_random_composed = chain_random_composed.predict(
        dataset_to_validate)

    roc_on_valid_random_composed = roc_auc(
        y_true=dataset_to_validate.target,
        y_score=predicted_random_composed.predict)

    assert roc_on_valid_random_composed > 0.6
コード例 #2
0
ファイル: test_composer.py プロジェクト: timur9831/FEDOT
def test_dummy_composer_flat_chain_build_correct():
    composer = DummyComposer(DummyChainTypeEnum.flat)
    empty_data = InputData(idx=np.zeros(1),
                           features=np.zeros(1),
                           target=np.zeros(1),
                           task=Task(TaskTypesEnum.classification),
                           data_type=DataTypesEnum.table)

    primary = ['logit']
    secondary = ['logit', 'xgboost']

    composer_requirements = ComposerRequirements(primary=primary,
                                                 secondary=secondary)
    new_chain = composer.compose_chain(
        data=empty_data,
        initial_chain=None,
        composer_requirements=composer_requirements,
        metrics=None)

    assert len(new_chain.nodes) == 3
    assert isinstance(new_chain.nodes[0], PrimaryNode)
    assert isinstance(new_chain.nodes[1], SecondaryNode)
    assert isinstance(new_chain.nodes[2], SecondaryNode)
    assert new_chain.nodes[1].nodes_from[0] is new_chain.nodes[0]
    assert new_chain.nodes[2].nodes_from[0] is new_chain.nodes[1]
    assert new_chain.nodes[0].nodes_from is None
コード例 #3
0
def compose_chain(data: InputData) -> Chain:
    dummy_composer = DummyComposer(DummyChainTypeEnum.hierarchical)
    composer_requirements = ComposerRequirements(primary=[ModelTypesIdsEnum.lasso, ModelTypesIdsEnum.ridge],
                                                 secondary=[ModelTypesIdsEnum.linear])

    metric_function = MetricsRepository().metric_by_id(RegressionMetricsEnum.RMSE)

    chain = dummy_composer.compose_chain(data=data,
                                         initial_chain=None,
                                         composer_requirements=composer_requirements,
                                         metrics=metric_function, is_visualise=False)
    return chain
コード例 #4
0
def compose_chain(data: InputData) -> Chain:
    dummy_composer = DummyComposer(DummyChainTypeEnum.hierarchical)
    composer_requirements = ComposerRequirements(primary=[ModelTypesIdsEnum.kmeans, ModelTypesIdsEnum.kmeans],
                                                 secondary=[ModelTypesIdsEnum.logit])

    metric_function = MetricsRepository().metric_by_id(ClassificationMetricsEnum.ROCAUC)

    chain = dummy_composer.compose_chain(data=data,
                                         initial_chain=None,
                                         composer_requirements=composer_requirements,
                                         metrics=metric_function, is_visualise=False)
    return chain
コード例 #5
0
def test_dummy_composer_hierarchical_chain_build_correct():
    composer = DummyComposer(DummyChainTypeEnum.hierarchical)
    empty_data = InputData(idx=np.zeros(1),
                           features=np.zeros(1),
                           target=np.zeros(1),
                           task_type=MachineLearningTasksEnum.classification)
    primary = [ModelTypesIdsEnum.logit, ModelTypesIdsEnum.xgboost]
    secondary = [ModelTypesIdsEnum.logit]
    composer_requirements = ComposerRequirements(primary=primary,
                                                 secondary=secondary)
    new_chain = composer.compose_chain(
        data=empty_data,
        initial_chain=None,
        composer_requirements=composer_requirements,
        metrics=None)

    assert len(new_chain.nodes) == 3
    assert isinstance(new_chain.nodes[0], PrimaryNode)
    assert isinstance(new_chain.nodes[1], PrimaryNode)
    assert isinstance(new_chain.nodes[2], SecondaryNode)
    assert new_chain.nodes[2].nodes_from[0] is new_chain.nodes[0]
    assert new_chain.nodes[2].nodes_from[1] is new_chain.nodes[1]
    assert new_chain.nodes[1].nodes_from is None
コード例 #6
0
def test_random_composer(data_fixture, request):
    random.seed(1)
    np.random.seed(1)
    data = request.getfixturevalue(data_fixture)
    dataset_to_compose = data
    dataset_to_validate = data

    models_repo = ModelTypesRepository()
    available_model_types, _ = models_repo.search_models(
        desired_metainfo=ModelMetaInfoTemplate(
            input_type=NumericalDataTypesEnum.table,
            output_type=CategoricalDataTypesEnum.vector,
            task_type=MachineLearningTasksEnum.classification,
            can_be_initial=True,
            can_be_secondary=True))

    metric_function = MetricsRepository().metric_by_id(
        ClassificationMetricsEnum.ROCAUC)

    random_composer = RandomSearchComposer(iter_num=1)
    req = ComposerRequirements(primary=available_model_types,
                               secondary=available_model_types)
    chain_random_composed = random_composer.compose_chain(
        data=dataset_to_compose,
        initial_chain=None,
        composer_requirements=req,
        metrics=metric_function)
    chain_random_composed.fit_from_scratch(input_data=dataset_to_compose)

    predicted_random_composed = chain_random_composed.predict(
        dataset_to_validate)

    roc_on_valid_random_composed = roc_auc(
        y_true=dataset_to_validate.target,
        y_score=predicted_random_composed.predict)

    assert roc_on_valid_random_composed > 0.6
コード例 #7
0
# the search of the models provided by the framework that can be used as nodes in a chain for the selected task
models_repo = ModelTypesRepository()
available_model_types, _ = models_repo.search_models(
    desired_metainfo=ModelMetaInfoTemplate(
        input_type=NumericalDataTypesEnum.table,
        output_type=CategoricalDataTypesEnum.vector,
        task_type=problem_class,
        can_be_initial=True,
        can_be_secondary=True))

# the choice of the metric for the chain quality assessment during composition
metric_function = MetricsRepository().metric_by_id(RegressionMetricsEnum.RMSE)

# the choice and initialisation

single_composer_requirements = ComposerRequirements(
    primary=[ModelTypesIdsEnum.ar], secondary=[])
chain_single = DummyComposer(DummyChainTypeEnum.flat).compose_chain(
    data=dataset_to_compose,
    initial_chain=None,
    composer_requirements=single_composer_requirements,
    metrics=metric_function)
train_prediction = chain_single.fit(input_data=dataset_to_compose,
                                    verbose=True)
print("Composition finished")

compare_plot(train_prediction, dataset_to_compose)

# the quality assessment for the obtained composite models
rmse_on_valid_single = calculate_validation_metric(chain_single,
                                                   dataset_to_validate)
コード例 #8
0
# the choice and initialisation

composer_requirements = GPComposerRequirements(
    primary=available_model_types,
    secondary=available_model_types,
    max_arity=2,
    max_depth=2,
    pop_size=10,
    num_of_generations=10,
    crossover_prob=0.8,
    mutation_prob=0.8,
    max_lead_time=datetime.timedelta(minutes=3))

single_composer_requirements = ComposerRequirements(
    primary=[ModelTypesIdsEnum.lasso, ModelTypesIdsEnum.ridge],
    secondary=[ModelTypesIdsEnum.linear])
chain_static = DummyComposer(DummyChainTypeEnum.hierarchical).compose_chain(
    data=dataset_to_compose,
    initial_chain=None,
    composer_requirements=single_composer_requirements,
    metrics=metric_function)
chain_static.fit(input_data=dataset_to_compose, verbose=False)

# Create GP-based composer
composer = GPComposer()

# the optimal chain generation by composition - the most time-consuming task
chain_evo_composed = composer.compose_chain(
    data=dataset_to_compose,
    initial_chain=None,
コード例 #9
0
def run_credit_scoring_problem(train_file_path, test_file_path,
                               max_lead_time: datetime.timedelta = datetime.timedelta(minutes=20),
                               gp_optimiser_params: Optional[GPChainOptimiserParameters] = None):
    dataset_to_compose = InputData.from_csv(train_file_path)
    dataset_to_validate = InputData.from_csv(test_file_path)
    # the search of the models provided by the framework that can be used as nodes in a chain for the selected task
    models_repo = ModelTypesRepository()
    available_model_types, _ = models_repo.search_models(
        desired_metainfo=ModelMetaInfoTemplate(input_type=NumericalDataTypesEnum.table,
                                               output_type=CategoricalDataTypesEnum.vector,
                                               task_type=[MachineLearningTasksEnum.classification,
                                                          MachineLearningTasksEnum.clustering],
                                               can_be_initial=True,
                                               can_be_secondary=True))

    # the choice of the metric for the chain quality assessment during composition
    metric_function = MetricsRepository().metric_by_id(ClassificationMetricsEnum.ROCAUC)

    if gp_optimiser_params:
        optimiser_parameters = gp_optimiser_params
    else:
        optimiser_parameters = GPChainOptimiserParameters(selection_types=[SelectionTypesEnum.tournament],
                                                          crossover_types=[CrossoverTypesEnum.subtree],
                                                          mutation_types=[MutationTypesEnum.growth],
                                                          regularization_type=RegularizationTypesEnum.decremental,
                                                          chain_generation_function=random_ml_chain,
                                                          crossover_types_dict=crossover_by_type,
                                                          mutation_types_dict=mutation_by_type)
    composer_requirements = GPComposerRequirements(
        primary=available_model_types,
        secondary=available_model_types, max_arity=4,
        max_depth=3, pop_size=5, num_of_generations=5,
        crossover_prob=0.8, mutation_prob=0.8, max_lead_time=max_lead_time)

    # Create GP-based composer
    composer = GPComposer()

    # the optimal chain generation by composition - the most time-consuming task
    chain_evo_composed = composer.compose_chain(data=dataset_to_compose,
                                                initial_chain=None,
                                                composer_requirements=composer_requirements,
                                                metrics=metric_function, optimiser_parameters=optimiser_parameters,
                                                is_visualise=False)
    chain_evo_composed.fit(input_data=dataset_to_compose, verbose=True)

    # the choice and initialisation of the dummy_composer
    dummy_composer = DummyComposer(DummyChainTypeEnum.hierarchical)

    chain_static = dummy_composer.compose_chain(data=dataset_to_compose,
                                                initial_chain=None,
                                                composer_requirements=composer_requirements,
                                                metrics=metric_function, is_visualise=True)
    chain_static.fit(input_data=dataset_to_compose, verbose=True)
    # the single-model variant of optimal chain
    single_composer_requirements = ComposerRequirements(primary=[ModelTypesIdsEnum.xgboost],
                                                        secondary=[])
    chain_single = DummyComposer(DummyChainTypeEnum.flat).compose_chain(data=dataset_to_compose,
                                                                        initial_chain=None,
                                                                        composer_requirements=single_composer_requirements,
                                                                        metrics=metric_function)
    chain_single.fit(input_data=dataset_to_compose, verbose=True)
    print("Composition finished")

    ComposerVisualiser.visualise(chain_static)
    ComposerVisualiser.visualise(chain_evo_composed)

    # the quality assessment for the obtained composite models
    roc_on_valid_static = calculate_validation_metric(chain_static, dataset_to_validate)
    roc_on_valid_single = calculate_validation_metric(chain_single, dataset_to_validate)
    roc_on_valid_evo_composed = calculate_validation_metric(chain_evo_composed, dataset_to_validate)

    print(f'Composed ROC AUC is {round(roc_on_valid_evo_composed, 3)}')
    print(f'Static ROC AUC is {round(roc_on_valid_static, 3)}')
    print(f'Single-model ROC AUC is {round(roc_on_valid_single, 3)}')

    return (roc_on_valid_evo_composed, chain_evo_composed), (chain_static, roc_on_valid_static), (
        chain_single, roc_on_valid_single)