def test_invalid_target_data_check_valid_labels_for_nonnegative_objectives(objective):
    X = pd.DataFrame({'column_one': [100, 100, 200, 300, 100, 200, 100] * 25})
    y = pd.Series([2, 2, 3, 3, 1, 1, 1] * 25)

    data_checks = DataChecks([InvalidTargetDataCheck], {"InvalidTargetDataCheck": {"problem_type": "multiclass",
                                                                                   "objective": objective}})
    assert data_checks.validate(X, y) == {"warnings": [], "errors": [], "actions": []}
def test_invalid_target_data_check_invalid_labels_for_nonnegative_objective_names(objective):
    X = pd.DataFrame({'column_one': [100, 200, 100, 200, 200, 100, 200, 100] * 25})
    y = pd.Series([2, 2, 3, 3, -1, -1, 1, 1] * 25)

    data_checks = DataChecks([InvalidTargetDataCheck], {"InvalidTargetDataCheck": {"problem_type": "multiclass",
                                                                                   "objective": objective}})
    assert data_checks.validate(X, y) == {
        "warnings": [],
        "errors": [DataCheckError(
            message=f"Target has non-positive values which is not supported for {objective}",
            data_check_name=invalid_targets_data_check_name,
            message_code=DataCheckMessageCode.TARGET_INCOMPATIBLE_OBJECTIVE,
            details={"Count of offending values": sum(val <= 0 for val in y.values.flatten())}).to_dict()],
        "actions": []
    }

    X = pd.DataFrame({'column_one': [100, 200, 100, 200, 100]})
    y = pd.Series([2, 3, 0, 1, 1])

    invalid_targets_check = InvalidTargetDataCheck(problem_type="regression", objective=objective)

    assert invalid_targets_check.validate(X, y) == {
        "warnings": [],
        "errors": [DataCheckError(
            message=f"Target has non-positive values which is not supported for {objective}",
            data_check_name=invalid_targets_data_check_name,
            message_code=DataCheckMessageCode.TARGET_INCOMPATIBLE_OBJECTIVE,
            details={"Count of offending values": sum(val <= 0 for val in y.values.flatten())}).to_dict()],
        "actions": []
    }
def test_invalid_target_data_check_invalid_labels_for_objectives(
        time_series_core_objectives):
    X = pd.DataFrame(
        {'column_one': [100, 200, 100, 200, 200, 100, 200, 100] * 25})
    y = pd.Series([2, 2, 3, 3, -1, -1, 1, 1] * 25)

    for objective in time_series_core_objectives:
        if not objective.positive_only:
            data_checks = DataChecks(
                [InvalidTargetDataCheck], {
                    "InvalidTargetDataCheck": {
                        "problem_type": "multiclass",
                        "objective": objective
                    }
                })
            assert data_checks.validate(X, y) == {"warnings": [], "errors": []}

    X = pd.DataFrame({'column_one': [100, 200, 100, 200, 100]})
    y = pd.Series([2, 3, 0, 1, 1])

    for objective in time_series_core_objectives:
        if not objective.positive_only:
            invalid_targets_check = InvalidTargetDataCheck(
                problem_type="regression", objective=objective)
            assert invalid_targets_check.validate(X, y) == {
                "warnings": [],
                "errors": []
            }
Exemple #4
0
def test_data_checks_do_not_duplicate_actions(X_y_binary):
    X, y = X_y_binary

    class MockDataCheck(DataCheck):
        def validate(self, X, y):
            return {
                "warnings": [],
                "errors": [],
                "actions": [
                    DataCheckAction(DataCheckActionCode.DROP_COL,
                                    metadata={
                                        "column": 'col_to_drop'
                                    }).to_dict()
                ]
            }

    class MockDataCheckWithSameAction(DataCheck):
        def validate(self, X, y):
            return {"warnings": [], "errors": [], "actions": []}

    data_checks_list = [MockDataCheck, MockDataCheckWithSameAction]
    data_checks = DataChecks(data_checks=data_checks_list)

    # Check duplicate actions are returned once
    assert data_checks.validate(X, y) == {
        "warnings": [],
        "errors": [],
        "actions": [
            DataCheckAction(DataCheckActionCode.DROP_COL,
                            metadata={
                                "column": 'col_to_drop'
                            }).to_dict()
        ]
    }
Exemple #5
0
def test_data_checks(X_y_binary):
    X, y = X_y_binary

    class MockDataCheck(DataCheck):
        def validate(self, X, y):
            return {"warnings": [], "errors": [], "actions": []}

    class MockDataCheckWarning(DataCheck):
        def validate(self, X, y):
            return {"warnings": [DataCheckWarning(message="warning one", data_check_name=self.name, message_code=None).to_dict()],
                    "errors": [],
                    "actions": []}

    class MockDataCheckError(DataCheck):
        def validate(self, X, y):
            return {"warnings": [],
                    "errors": [DataCheckError(message="error one", data_check_name=self.name, message_code=None).to_dict()],
                    "actions": []}

    class MockDataCheckErrorAndWarning(DataCheck):
        def validate(self, X, y):
            return {"warnings": [DataCheckWarning(message="warning two", data_check_name=self.name, message_code=None).to_dict()],
                    "errors": [DataCheckError(message="error two", data_check_name=self.name, message_code=None).to_dict()],
                    "actions": []}

    data_checks_list = [MockDataCheck, MockDataCheckWarning, MockDataCheckError, MockDataCheckErrorAndWarning]
    data_checks = DataChecks(data_checks=data_checks_list)
    assert data_checks.validate(X, y) == {
        "warnings": [DataCheckWarning(message="warning one", data_check_name="MockDataCheckWarning").to_dict(),
                     DataCheckWarning(message="warning two", data_check_name="MockDataCheckErrorAndWarning").to_dict()],
        "errors": [DataCheckError(message="error one", data_check_name="MockDataCheckError").to_dict(),
                   DataCheckError(message="error two", data_check_name="MockDataCheckErrorAndWarning").to_dict()],
        "actions": []
    }
Exemple #6
0
def test_data_checks_drop_index(X_y_binary):
    X, y = X_y_binary
    X = pd.DataFrame(X)
    X['index_col'] = pd.Series(range(len(X)))
    X = ww.DataTable(X)
    X = X.set_index('index_col')

    class MockDataCheck(DataCheck):
        def validate(self, X, y):
            return {"warnings": [], "errors": [], "actions": []}

    assert MockDataCheck().validate(X, y)

    MockDataCheck.validate = MagicMock()
    checks = DataChecks([MockDataCheck, MockDataCheck, MockDataCheck])
    checks.validate(X, y)

    validate_args = MockDataCheck.validate.call_args_list
    for arg in validate_args:
        assert 'index_col' not in arg[0][0].columns