Beispiel #1
0
def test_classification_models_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    train_data, test_data = train_test_data_setup(data=data)
    roc_threshold = 0.95
    logger = default_log('default_test_logger')

    with OperationTypesRepository() as repo:
        model_names, _ = repo.suitable_operation(
            task_type=TaskTypesEnum.classification,
            data_type=data.data_type,
            tags=['ml'])

    for model_name in model_names:
        logger.info(f"Test classification model: {model_name}.")
        model = Model(operation_type=model_name)
        _, train_predicted = model.fit(data=train_data)
        test_pred = model.predict(fitted_operation=_,
                                  data=test_data,
                                  is_fit_pipeline_stage=False)
        roc_on_test = get_roc_auc(valid_data=test_data,
                                  predicted_data=test_pred)
        if model_name not in ['bernb', 'multinb']:
            assert roc_on_test >= roc_threshold
        else:
            assert roc_on_test >= 0.5
Beispiel #2
0
def test_log_clustering_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    train_data, test_data = train_test_data_setup(data=data)

    # Scaling pipeline. Fit predict it
    scaling_pipeline = Pipeline(PrimaryNode('normalization'))
    scaling_pipeline.fit(train_data)
    scaled_data = scaling_pipeline.predict(train_data)

    kmeans = Model(operation_type='kmeans')
    _, train_predicted = kmeans.fit(data=scaled_data)

    assert all(np.unique(train_predicted.predict) == [0, 1])
Beispiel #3
0
def test_svc_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    train_data, test_data = train_test_data_setup(data=data)

    # Scaling pipeline. Fit predict it
    scaling_pipeline = Pipeline(PrimaryNode('normalization'))
    scaling_pipeline.fit(train_data)
    scaled_data = scaling_pipeline.predict(train_data)

    svc = Model(operation_type='svc')
    _, train_predicted = svc.fit(data=scaled_data)

    roc_on_train = get_roc_auc(valid_data=train_data,
                               predicted_data=train_predicted)
    roc_threshold = 0.95
    assert roc_on_train >= roc_threshold
Beispiel #4
0
def test_random_forest_fit_correct(data_fixture, request):
    data = request.getfixturevalue(data_fixture)
    train_data, test_data = train_test_data_setup(data=data)

    # Scaling chain. Fit predict it
    scaling_chain = Chain(PrimaryNode('normalization'))
    scaling_chain.fit(train_data)
    scaled_data = scaling_chain.predict(train_data)

    random_forest = Model(operation_type='rf')
    _, train_predicted = random_forest.fit(data=scaled_data)

    roc_on_train = get_roc_auc(valid_data=train_data,
                               predicted_data=train_predicted)
    roc_threshold = 0.95
    assert roc_on_train >= roc_threshold
Beispiel #5
0
def test_log_regression_fit_correct(classification_dataset):
    data = classification_dataset
    train_data, test_data = train_test_data_setup(data=data)

    # Scaling chain. Fit predict it
    scaling_chain = Chain(PrimaryNode('normalization'))
    scaling_chain.fit(train_data)
    scaled_data = scaling_chain.predict(train_data)

    log_reg = Model(operation_type='logit')
    _, train_predicted = log_reg.fit(data=scaled_data)

    roc_on_train = get_roc_auc(valid_data=train_data,
                               predicted_data=train_predicted)
    roc_threshold = 0.95
    assert roc_on_train >= roc_threshold
Beispiel #6
0
def test_node_factory_log_reg_correct(data_setup):
    model_type = 'logit'
    node = PrimaryNode(operation_type=model_type)

    expected_model = Model(operation_type=model_type).__class__
    actual_model = node.operation.__class__

    assert node.__class__ == PrimaryNode
    assert expected_model == actual_model
Beispiel #7
0
def test_ts_models_fit_correct():
    train_data, test_data = get_ts_data(forecast_length=5)
    logger = default_log('default_test_logger')

    with OperationTypesRepository() as repo:
        model_names, _ = repo.suitable_operation(
            task_type=TaskTypesEnum.ts_forecasting, tags=['time_series'])

    for model_name in model_names:
        logger.info(f"Test time series model: {model_name}.")
        model = Model(operation_type=model_name)
        _, train_predicted = model.fit(data=deepcopy(train_data))
        test_pred = model.predict(fitted_operation=_,
                                  data=test_data,
                                  is_fit_pipeline_stage=False)
        mae_value_test = mean_absolute_error(y_true=test_data.target,
                                             y_pred=test_pred.predict[0])

        mae_threshold = np.var(test_data.target) * 2
        assert mae_value_test < mae_threshold
Beispiel #8
0
def test_regression_models_fit_correct():
    data = get_synthetic_regression_data(n_samples=1000, random_state=42)
    train_data, test_data = train_test_data_setup(data)
    logger = default_log('default_test_logger')

    with OperationTypesRepository() as repo:
        model_names, _ = repo.suitable_operation(
            task_type=TaskTypesEnum.regression, tags=['ml'])

    for model_name in model_names:
        logger.info(f"Test regression model: {model_name}.")
        model = Model(operation_type=model_name)
        _, train_predicted = model.fit(data=train_data)
        test_pred = model.predict(fitted_operation=_,
                                  data=test_data,
                                  is_fit_pipeline_stage=False)
        rmse_value_test = mean_squared_error(y_true=test_data.target,
                                             y_pred=test_pred.predict)

        rmse_threshold = np.std(test_data.target)**2
        assert rmse_value_test < rmse_threshold
Beispiel #9
0
def test_logger_write_logs_correctly():
    test_file_path = str(os.path.dirname(__file__))
    test_log_file = os.path.join(test_file_path, 'test_log.log')
    test_log = default_log('test_log', log_file=test_log_file)

    # Model data preparation
    file = os.path.join('../data', 'advanced_classification.csv')
    data = InputData.from_csv(os.path.join(test_file_path, file))
    train_data, test_data = train_test_data_setup(data=data)

    try:
        knn = Model(operation_type='knnreg', log=test_log)
        model, _ = knn.fit(train_data, is_fit_chain_stage=True)
    except Exception:
        print('Captured error')

    if os.path.exists(test_log_file):
        with open(test_log_file, 'r') as file:
            content = file.readlines()

    release_log(logger=test_log, log_file=test_log_file)
    assert 'Can not find evaluation strategy' in content[0]
Beispiel #10
0
    def get_operation(self) -> Operation:
        """
        Factory method returns the desired object of the 'Data_operation' or
        'Model' class which depends on model_type variable

        """

        if self.operation_type == 'model':
            operation = Model(operation_type=self.operation_name)
        elif self.operation_type == 'data_operation':
            operation = DataOperation(operation_type=self.operation_name)
        else:
            raise ValueError(f'Operation type {self.operation_type} is not supported')

        return operation