Beispiel #1
0
def test_classification_quality_improvement():
    # input data initialization
    train_data_path = f'{project_root()}/cases/data/scoring/scoring_train.csv'
    test_data_path = f'{project_root()}/cases/data/scoring/scoring_test.csv'

    problem = 'classification'

    baseline_model = Fedot(problem=problem)
    baseline_model.fit(features=train_data_path, target='target', predefined_model='xgboost')
    expected_baseline_quality = 0.823

    baseline_model.predict_proba(features=test_data_path)
    baseline_metrics = baseline_model.get_metrics()

    # Define parameters for composing
    composer_params = {'max_depth': 3,
                       'max_arity': 3,
                       'pop_size': 20,
                       'num_of_generations': 20,
                       'learning_time': 10,
                       'with_tuning': True}

    auto_model = Fedot(problem=problem, composer_params=composer_params, seed=42, verbose_level=4)
    auto_model.fit(features=train_data_path, target='target')
    auto_model.predict_proba(features=test_data_path)
    auto_metrics = auto_model.get_metrics()
    print(auto_metrics['roc_auc'])
    assert auto_metrics['roc_auc'] > baseline_metrics['roc_auc'] >= expected_baseline_quality
Beispiel #2
0
def test_baseline_with_api():
    train_data, test_data, threshold = get_dataset('classification')

    # task selection, initialisation of the framework
    baseline_model = Fedot(problem='classification')

    # fit model without optimisation - single XGBoost node is used
    baseline_model.fit(features=train_data, target='target', predefined_model='xgboost')

    # evaluate the prediction with test data
    prediction = baseline_model.predict_proba(features=test_data)

    assert len(prediction) == len(test_data.target)

    # evaluate quality metric for the test sample
    baseline_metrics = baseline_model.get_metrics(metric_names='f1')

    assert baseline_metrics['f1'] > 0
Beispiel #3
0
def run_classification_example(timeout=None):
    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'

    problem = 'classification'

    baseline_model = Fedot(problem=problem, timeout=timeout)
    baseline_model.fit(features=train_data_path,
                       target='target',
                       predefined_model='xgboost')

    baseline_model.predict(features=test_data_path)
    print(baseline_model.get_metrics())

    auto_model = Fedot(problem=problem, seed=42, timeout=timeout)
    auto_model.fit(features=train_data_path, target='target')
    prediction = auto_model.predict_proba(features=test_data_path)
    print(auto_model.get_metrics())

    return prediction
Beispiel #4
0
def run_classification_multiobj_example(with_plot=True):
    train_data = pd.read_csv(
        f'{project_root()}/examples/data/Hill_Valley_with_noise_Training.data')
    test_data = pd.read_csv(
        f'{project_root()}/examples/data/Hill_Valley_with_noise_Testing.data')
    target = test_data['class']
    del test_data['class']
    problem = 'classification'

    auto_model = Fedot(problem=problem,
                       learning_time=2,
                       preset='light',
                       composer_params={'metric': ['f1', 'node_num']},
                       seed=42)
    auto_model.fit(features=train_data, target='class')
    prediction = auto_model.predict_proba(features=test_data)
    print(auto_model.get_metrics(target))

    if with_plot:
        auto_model.best_models.show()

    return prediction
Beispiel #5
0
def run_additional_learning_example():
    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'

    train_data = pd.read_csv(train_data_path)
    test_data = pd.read_csv(test_data_path)
    test_data_target = test_data['target']
    del test_data['target']

    problem = 'classification'

    auto_model = Fedot(problem=problem,
                       seed=42,
                       preset='light',
                       timeout=5,
                       composer_params={
                           'initial_pipeline':
                           Pipeline(
                               SecondaryNode(
                                   'logit',
                                   nodes_from=[PrimaryNode('scaling')]))
                       })

    auto_model.fit(features=deepcopy(train_data.head(1000)), target='target')
    auto_model.predict_proba(features=deepcopy(test_data))
    print('auto_model',
          auto_model.get_metrics(target=deepcopy(test_data_target)))

    prev_model = auto_model.current_pipeline
    prev_model.show()

    prev_model.unfit()
    atomized_model = Pipeline(
        SecondaryNode(operation_type=AtomizedModel(prev_model),
                      nodes_from=[PrimaryNode('scaling')]))
    non_atomized_model = deepcopy(prev_model)

    train_data = train_data.head(5000)
    timeout = 1

    auto_model_from_atomized = Fedot(
        problem=problem,
        seed=42,
        preset='light',
        timeout=timeout,
        composer_params={'initial_pipeline': atomized_model},
        verbose_level=2)
    auto_model_from_atomized.fit(features=deepcopy(train_data),
                                 target='target')
    auto_model_from_atomized.predict_proba(features=deepcopy(test_data))
    auto_model_from_atomized.current_pipeline.show()
    print('auto_model_from_atomized',
          auto_model_from_atomized.get_metrics(deepcopy(test_data_target)))

    auto_model_from_pipeline = Fedot(
        problem=problem,
        seed=42,
        preset='light',
        timeout=timeout,
        composer_params={'initial_pipeline': non_atomized_model},
        verbose_level=2)
    auto_model_from_pipeline.fit(features=deepcopy(train_data),
                                 target='target')
    auto_model_from_pipeline.predict_proba(features=deepcopy(test_data))
    auto_model_from_pipeline.current_pipeline.show()
    print('auto_model_from_pipeline',
          auto_model_from_pipeline.get_metrics(deepcopy(test_data_target)))