def test_validate_with_test_data_classification_target_filter_validations(
            self, target_filter_type):
        test_data_classification = get_toy_binary_classification_dataset()

        with pytest.raises(
                UserConfigValidationException,
                match="{0} can only be configured with "
                      "filter {1} for classification".format(
                          target_filter_type,
                          CohortFilterMethods.METHOD_INCLUDES)):
            cohort_filter_classification = CohortFilter(
                method=CohortFilterMethods.METHOD_EXCLUDES,
                arg=['X'],
                column=target_filter_type)
            cohort_filter_classification._validate_with_test_data(
                test_data=test_data_classification,
                target_column="target",
                categorical_features=[],
                is_classification=True)

        with pytest.raises(
            UserConfigValidationException,
            match="Found a class in arg which is not present in "
                  "test data"):
            cohort_filter_classification = CohortFilter(
                method=CohortFilterMethods.METHOD_INCLUDES,
                arg=['Z'],
                column=target_filter_type)
            cohort_filter_classification._validate_with_test_data(
                test_data=test_data_classification,
                target_column="target",
                categorical_features=[],
                is_classification=True)
    def test_validate_with_test_data_with_dataset_validations(
            self):
        test_data = pd.DataFrame(
            data=[[23, 'new', 'A'], [25, 'new, ''B'], [25, 'old', 'B']],
            columns=["age", 'type', "target"])

        with pytest.raises(
                UserConfigValidationException,
                match="{0} is a categorical feature and should be only "
                      "configured with {1} cohort filter.".format(
                          "type",
                          CohortFilterMethods.METHOD_INCLUDES)):
            cohort_filter = CohortFilter(
                method=CohortFilterMethods.METHOD_EXCLUDES,
                arg=['new'],
                column='type')
            cohort_filter._validate_with_test_data(
                test_data=test_data,
                target_column="target",
                categorical_features=['type'],
                is_classification=True)

        with pytest.raises(
                UserConfigValidationException,
                match="Found a category {0} in arg which is not present "
                      "in test data column {1}.".format('mid', 'type')):
            cohort_filter = CohortFilter(
                method=CohortFilterMethods.METHOD_INCLUDES,
                arg=['mid'],
                column='type')
            cohort_filter._validate_with_test_data(
                test_data=test_data,
                target_column="target",
                categorical_features=['type'],
                is_classification=True)
    def test_validate_with_test_data_index_filter_validations(self):
        test_data = get_toy_binary_classification_dataset()

        cohort_filter_index_excludes = CohortFilter(
            method=CohortFilterMethods.METHOD_EXCLUDES,
            arg=[65], column=CohortFilter.INDEX)
        with pytest.raises(
                UserConfigValidationException,
                match="excludes filter is not supported with Index based "
                      "selection."):
            cohort_filter_index_excludes._validate_with_test_data(
                test_data=test_data, target_column="target",
                categorical_features=[]
            )

        cohort_filter_index_incorrect_args = CohortFilter(
            method=CohortFilterMethods.METHOD_GREATER,
            arg=[65.0], column=CohortFilter.INDEX)
        with pytest.raises(
                UserConfigValidationException,
                match="All entries in arg should be of type int."):
            cohort_filter_index_incorrect_args._validate_with_test_data(
                test_data=test_data, target_column="target",
                categorical_features=[]
            )
    def test_validate_with_test_data_high_level_validations(self):
        test_data = get_toy_binary_classification_dataset()

        cohort_filter_not_a_feature = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[65], column='fake_column')

        with pytest.raises(
                UserConfigValidationException,
                match="Unknown column fake_column specified in cohort filter"):
            cohort_filter_not_a_feature._validate_with_test_data(
                test_data=test_data, target_column="target",
                categorical_features=[])
    def test_validate_with_test_data_regression_error_filter_validations(
            self):
        test_data_regression = get_toy_regression_dataset()

        cohort_filter_regression = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[2.5],
            column=CohortFilter.REGRESSION_ERROR)

        with pytest.raises(
                UserConfigValidationException,
                match="Error cannot be configured for classification"
                      " scenarios."):
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=True)

        with pytest.raises(
                UserConfigValidationException,
                match="Error cannot be configured with either includes"
                      " or excludes."):
            cohort_filter_regression.method = \
                CohortFilterMethods.METHOD_INCLUDES
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)

        with pytest.raises(
                UserConfigValidationException,
                match="Error cannot be configured with either includes"
                      " or excludes."):
            cohort_filter_regression.method = \
                CohortFilterMethods.METHOD_EXCLUDES
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)

        with pytest.raises(
                UserConfigValidationException,
                match="All entries in arg should be of type int or float"
                      " for Error cohort."):
            cohort_filter_regression.method = \
                CohortFilterMethods.METHOD_GREATER
            cohort_filter_regression.arg = ['val1', 'val2']
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)
    def test_validate_with_test_data_regression_target_filter_validations(
            self, target_filter_type, method):
        test_data_regression = get_toy_regression_dataset()

        with pytest.raises(
                UserConfigValidationException,
                match="{0} cannot be configured with "
                      "filter {1} for regression.".format(target_filter_type,
                                                          method)):
            cohort_filter_regression = CohortFilter(
                method=method,
                arg=[2.5],
                column=target_filter_type)
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)
    def test_validate_with_test_data_classification_error_filter_validations(
            self):
        test_data_multiclass = get_toy_multiclass_classification_dataset()

        test_data_binary = get_toy_binary_classification_dataset()

        cohort_filter_classification_excludes = CohortFilter(
            method=CohortFilterMethods.METHOD_EXCLUDES,
            arg=[ClassificationOutcomes.FALSE_NEGATIVE],
            column=CohortFilter.CLASSIFICATION_OUTCOME)

        cohort_filter_classification_includes = CohortFilter(
            method=CohortFilterMethods.METHOD_INCLUDES,
            arg=["random"],
            column=CohortFilter.CLASSIFICATION_OUTCOME)

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome cannot be "
                      "configured for multi-class classification"
                      " and regression scenarios."):
            cohort_filter_classification_excludes._validate_with_test_data(
                test_data=test_data_multiclass, target_column="target",
                categorical_features=[], is_classification=True
            )

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome cannot be "
                      "configured for multi-class classification"
                      " and regression scenarios."):
            cohort_filter_classification_excludes._validate_with_test_data(
                test_data=test_data_binary, target_column="target",
                categorical_features=[], is_classification=False
            )

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome can only be configured with "
                      "cohort filter includes."):
            cohort_filter_classification_excludes._validate_with_test_data(
                test_data=test_data_binary, target_column="target",
                categorical_features=[], is_classification=True
            )

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome can only take argument values "
                      "from False negative or False positive or True "
                      "negative or True positive."):
            cohort_filter_classification_includes._validate_with_test_data(
                test_data=test_data_binary, target_column="target",
                categorical_features=[], is_classification=True)