Esempio n. 1
0
    def test_responsibleai_adult_with_ill_defined_cohorts(
            self, create_rai_insights_object_classification):
        ri = create_rai_insights_object_classification

        cohort_filter_continuous_1 = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[65],
            column='Age')
        cohort_filter_continuous_2 = CohortFilter(
            method=CohortFilterMethods.METHOD_GREATER,
            arg=[40],
            column='Hours per week')

        user_cohort_continuous = Cohort(name='Cohort Continuous')
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_1)
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_2)

        with pytest.raises(
                UserConfigValidationException,
                match="cohort_list parameter should be a list."):
            ResponsibleAIDashboard(ri, cohort_list={})

        with pytest.raises(
                UserConfigValidationException,
                match="All entries in cohort_list should be of type Cohort."):
            ResponsibleAIDashboard(
                ri, cohort_list=[user_cohort_continuous, {}])
    def test_cohort_serialization_deserialization_include_exclude_methods(
            self, method):
        cohort_filter_str = CohortFilter(method=method,
                                         arg=['val1', 'val2', 'val3'],
                                         column='age')
        cohort_str = Cohort(name="Cohort New Str")
        cohort_str.add_cohort_filter(cohort_filter_str)

        json_str = cohort_str.to_json()
        assert method in json_str
        assert 'val1' in json_str
        assert 'val2' in json_str
        assert 'val3' in json_str
        assert 'age' in json_str
        cohort_str_new = Cohort.from_json(json_str)
        assert cohort_str == cohort_str_new

        cohort_filter_int = CohortFilter(method=method,
                                         arg=[1, 2, 3],
                                         column='age')
        cohort_int = Cohort(name="Cohort New Int")
        cohort_int.add_cohort_filter(cohort_filter_int)

        json_str = cohort_int.to_json()
        assert method in json_str
        assert '1' in json_str
        assert '2' in json_str
        assert '3' in json_str
        assert 'age' in json_str

        cohort_int_new = Cohort.from_json(json_str)
        assert cohort_int == cohort_int_new
    def test_cohort_filter_validate_arg(self):
        with pytest.raises(
                UserConfigValidationException,
                match="Got unexpected type <class 'int'> for arg. "
                      "Expected list type."):
            CohortFilter(method=CohortFilterMethods.METHOD_GREATER,
                         arg=1, column="age")

        with pytest.raises(
                UserConfigValidationException,
                match="Empty list supplied for arg."):
            CohortFilter(method=CohortFilterMethods.METHOD_GREATER,
                         arg=[], column="age")
    def test_validate_with_test_data_high_level_validations(self):
        test_data = get_toy_binary_classification_dataset()

        cohort_filter_not_a_feature = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[65], column='fake_column')

        with pytest.raises(
                UserConfigValidationException,
                match="Unknown column fake_column specified in cohort filter"):
            cohort_filter_not_a_feature._validate_with_test_data(
                test_data=test_data, target_column="target",
                categorical_features=[])
    def test_validate_with_test_data_with_dataset_validations(
            self):
        test_data = pd.DataFrame(
            data=[[23, 'new', 'A'], [25, 'new, ''B'], [25, 'old', 'B']],
            columns=["age", 'type', "target"])

        with pytest.raises(
                UserConfigValidationException,
                match="{0} is a categorical feature and should be only "
                      "configured with {1} cohort filter.".format(
                          "type",
                          CohortFilterMethods.METHOD_INCLUDES)):
            cohort_filter = CohortFilter(
                method=CohortFilterMethods.METHOD_EXCLUDES,
                arg=['new'],
                column='type')
            cohort_filter._validate_with_test_data(
                test_data=test_data,
                target_column="target",
                categorical_features=['type'],
                is_classification=True)

        with pytest.raises(
                UserConfigValidationException,
                match="Found a category {0} in arg which is not present "
                      "in test data column {1}.".format('mid', 'type')):
            cohort_filter = CohortFilter(
                method=CohortFilterMethods.METHOD_INCLUDES,
                arg=['mid'],
                column='type')
            cohort_filter._validate_with_test_data(
                test_data=test_data,
                target_column="target",
                categorical_features=['type'],
                is_classification=True)
    def test_validate_with_test_data_classification_target_filter_validations(
            self, target_filter_type):
        test_data_classification = get_toy_binary_classification_dataset()

        with pytest.raises(
                UserConfigValidationException,
                match="{0} can only be configured with "
                      "filter {1} for classification".format(
                          target_filter_type,
                          CohortFilterMethods.METHOD_INCLUDES)):
            cohort_filter_classification = CohortFilter(
                method=CohortFilterMethods.METHOD_EXCLUDES,
                arg=['X'],
                column=target_filter_type)
            cohort_filter_classification._validate_with_test_data(
                test_data=test_data_classification,
                target_column="target",
                categorical_features=[],
                is_classification=True)

        with pytest.raises(
            UserConfigValidationException,
            match="Found a class in arg which is not present in "
                  "test data"):
            cohort_filter_classification = CohortFilter(
                method=CohortFilterMethods.METHOD_INCLUDES,
                arg=['Z'],
                column=target_filter_type)
            cohort_filter_classification._validate_with_test_data(
                test_data=test_data_classification,
                target_column="target",
                categorical_features=[],
                is_classification=True)
    def test_validate_with_test_data_index_filter_validations(self):
        test_data = get_toy_binary_classification_dataset()

        cohort_filter_index_excludes = CohortFilter(
            method=CohortFilterMethods.METHOD_EXCLUDES,
            arg=[65], column=CohortFilter.INDEX)
        with pytest.raises(
                UserConfigValidationException,
                match="excludes filter is not supported with Index based "
                      "selection."):
            cohort_filter_index_excludes._validate_with_test_data(
                test_data=test_data, target_column="target",
                categorical_features=[]
            )

        cohort_filter_index_incorrect_args = CohortFilter(
            method=CohortFilterMethods.METHOD_GREATER,
            arg=[65.0], column=CohortFilter.INDEX)
        with pytest.raises(
                UserConfigValidationException,
                match="All entries in arg should be of type int."):
            cohort_filter_index_incorrect_args._validate_with_test_data(
                test_data=test_data, target_column="target",
                categorical_features=[]
            )
Esempio n. 8
0
    def test_responsibleai_housing_with_pre_defined_cohorts(
            self, create_rai_insights_object_regression):
        ri = create_rai_insights_object_regression

        cohort_filter_continuous_1 = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[30.5],
            column='HouseAge')
        cohort_filter_continuous_2 = CohortFilter(
            method=CohortFilterMethods.METHOD_GREATER,
            arg=[3.0],
            column='AveRooms')

        user_cohort_continuous = Cohort(name='Cohort Continuous')
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_1)
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_2)

        cohort_filter_index = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[20],
            column='Index')

        user_cohort_index = Cohort(name='Cohort Index')
        user_cohort_index.add_cohort_filter(cohort_filter_index)

        cohort_filter_predicted_y = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[5.0],
            column='Predicted Y')

        user_cohort_predicted_y = Cohort(name='Cohort Predicted Y')
        user_cohort_predicted_y.add_cohort_filter(cohort_filter_predicted_y)

        cohort_filter_true_y = CohortFilter(
            method=CohortFilterMethods.METHOD_GREATER,
            arg=[1.0],
            column='True Y')

        user_cohort_true_y = Cohort(name='Cohort True Y')
        user_cohort_true_y.add_cohort_filter(cohort_filter_true_y)

        widget = ResponsibleAIDashboard(
            ri,
            cohort_list=[user_cohort_continuous,
                         user_cohort_index,
                         user_cohort_predicted_y,
                         user_cohort_true_y])

        self.validate_rai_dashboard_data(widget)
    def test_cohort_filter_validate_in_range_methods_type_arg_entries(
            self):
        with pytest.raises(
                UserConfigValidationException,
                match="Expected int or float type for arg "
                      "with cohort method in the range of."):
            CohortFilter(method=CohortFilterMethods.METHOD_RANGE,
                         arg=[1, 'val'], column="age")

        with pytest.raises(
                UserConfigValidationException,
                match="Expected int or float type for arg "
                      "with cohort method in the range of."):
            CohortFilter(method=CohortFilterMethods.METHOD_RANGE,
                         arg=['val', 2], column="age")
 def test_cohort_filter_validate_column(self):
     with pytest.raises(
             UserConfigValidationException,
             match="Got unexpected type <class 'int'> for column. "
                   "Expected string type."):
         CohortFilter(method=CohortFilterMethods.METHOD_GREATER,
                      arg=[], column=1)
    def test_cohort_filter_validate_method(self):
        with pytest.raises(
                UserConfigValidationException,
                match="Got unexpected type <class 'int'> for method. "
                      "Expected string type."):
            CohortFilter(method=1,
                         arg=[], column=1)

        with pytest.raises(
                UserConfigValidationException,
                match="Got unexpected value random for method. "
                      "Expected either of greater or "
                      "greater and equal or "
                      "less or less and equal or "
                      "equal or includes or "
                      "excludes or in the range of."):
            CohortFilter(method="random", arg=[], column="random")
 def test_cohort_filter_validate_single_value_methods_num_arg_entries(
         self, method):
     with pytest.raises(
             UserConfigValidationException,
             match="Expected a single value in arg "
                   "for cohort methods greater and "
                   "equal or greater or less and equal or less or equal."):
         CohortFilter(method=method, arg=[1, 9], column="age")
 def test_cohort_filter_validate_single_value_methods_type_arg_entries(
         self, method):
     with pytest.raises(
             UserConfigValidationException,
             match="Expected int or float type for arg "
                   "with cohort methods greater and "
                   "equal or greater or less and equal or less or equal."):
         CohortFilter(method=method, arg=["val"], column="age")
 def test_cohort_filter_validate_in_range_methods_num_arg_entries(
         self):
     with pytest.raises(
             UserConfigValidationException,
             match="Expected two entries in arg for "
                   "cohort method in the range of."):
         CohortFilter(method=CohortFilterMethods.METHOD_RANGE,
                      arg=[1], column="age")
 def test_cohort_filter_serialization_single_value_methods(self, method):
     cohort_filter_1 = CohortFilter(method=method,
                                    arg=[65.0], column='age')
     json_str = json.dumps(cohort_filter_1,
                           default=cohort_filter_json_converter)
     assert method in json_str
     assert '[65.0]' in json_str
     assert 'age' in json_str
    def test_cohort_validate_with_test_data(self):
        cohort_filter_1 = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[65], column='age')
        cohort_1 = Cohort(name="Cohort New")
        cohort_1.add_cohort_filter(cohort_filter_1)
        test_data = get_toy_binary_classification_dataset()

        with pytest.raises(
                UserConfigValidationException,
                match="The test_data should be a pandas DataFrame"):
            cohort_1._validate_with_test_data(
                test_data=[], target_column='target',
                categorical_features=[])

        with pytest.raises(
                UserConfigValidationException,
                match="The target_column should be string."):
            cohort_1._validate_with_test_data(
                test_data=test_data,
                target_column=1,
                categorical_features=[])

        with pytest.raises(
                UserConfigValidationException,
                match="The target_column fake_target "
                      "was not found in test_data."):
            cohort_1._validate_with_test_data(
                test_data=test_data,
                target_column="fake_target",
                categorical_features=[])

        with pytest.raises(
                UserConfigValidationException,
                match="Expected a list type for "
                      "categorical columns."):
            cohort_1._validate_with_test_data(
                test_data=test_data,
                target_column="target",
                categorical_features={})

        with pytest.raises(
                UserConfigValidationException,
                match="Feature 1 in categorical_features need to be of "
                      "string type."):
            cohort_1._validate_with_test_data(
                test_data=test_data,
                target_column="target",
                categorical_features=[1, 2])

        with pytest.raises(
                UserConfigValidationException,
                match="Found categorical feature hours-per-week which is not"
                      " present in test data."):
            cohort_1._validate_with_test_data(
                test_data=test_data,
                target_column="target",
                categorical_features=["hours-per-week"])
    def test_validate_with_test_data_regression_target_filter_validations(
            self, target_filter_type, method):
        test_data_regression = get_toy_regression_dataset()

        with pytest.raises(
                UserConfigValidationException,
                match="{0} cannot be configured with "
                      "filter {1} for regression.".format(target_filter_type,
                                                          method)):
            cohort_filter_regression = CohortFilter(
                method=method,
                arg=[2.5],
                column=target_filter_type)
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)
 def test_cohort_filter_serialization_in_range_method(self):
     cohort_filter_1 = CohortFilter(
         method=CohortFilterMethods.METHOD_RANGE,
         arg=[65.0, 70.0], column='age')
     json_str = json.dumps(cohort_filter_1,
                           default=cohort_filter_json_converter)
     assert CohortFilterMethods.METHOD_RANGE in json_str
     assert '65.0' in json_str
     assert '70.0' in json_str
     assert 'age' in json_str
    def test_cohort_serialization_single_value_method(self, method):
        cohort_filter_1 = CohortFilter(method=method,
                                       arg=[65], column='age')
        cohort_1 = Cohort(name="Cohort New")
        cohort_1.add_cohort_filter(cohort_filter_1)
        json_str = cohort_1.to_json()

        assert 'Cohort New' in json_str
        assert method in json_str
        assert '[65]' in json_str
        assert 'age' in json_str
Esempio n. 20
0
    def test_responsibleai_adult_with_pre_defined_cohorts(
            self, create_rai_insights_object_classification):
        ri = create_rai_insights_object_classification

        cohort_filter_continuous_1 = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[65],
            column='Age')
        cohort_filter_continuous_2 = CohortFilter(
            method=CohortFilterMethods.METHOD_GREATER,
            arg=[40],
            column='Hours per week')

        user_cohort_continuous = Cohort(name='Cohort Continuous')
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_1)
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_2)

        cohort_filter_categorical = CohortFilter(
            method=CohortFilterMethods.METHOD_INCLUDES,
            arg=[2, 6, 4],
            column='Marital Status')

        user_cohort_categorical = Cohort(name='Cohort Categorical')
        user_cohort_categorical.add_cohort_filter(cohort_filter_categorical)

        cohort_filter_index = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[20],
            column='Index')

        user_cohort_index = Cohort(name='Cohort Index')
        user_cohort_index.add_cohort_filter(cohort_filter_index)

        widget = ResponsibleAIDashboard(
            ri,
            cohort_list=[user_cohort_continuous,
                         user_cohort_categorical,
                         user_cohort_index])

        self.validate_rai_dashboard_data(widget)
    def test_responsibleai_adult_duplicate_cohort_names(
            self, create_rai_insights_object_classification):
        ri = create_rai_insights_object_classification

        cohort_filter_continuous_1 = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS, arg=[65], column='Age')
        cohort_filter_continuous_2 = CohortFilter(
            method=CohortFilterMethods.METHOD_GREATER,
            arg=[40],
            column='Hours per week')

        user_cohort_continuous = Cohort(name='Cohort Continuous')
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_1)
        user_cohort_continuous.add_cohort_filter(cohort_filter_continuous_2)

        with pytest.raises(
                UserConfigValidationException,
                match="Found cohorts with duplicate names. "
                "All pre-defined cohorts need to have distinct names."):
            ResponsibleAIDashboard(
                ri,
                cohort_list=[user_cohort_continuous, user_cohort_continuous])
    def test_cohort_filter_serialization_include_exclude_methods(self, method):
        cohort_filter_str = CohortFilter(method=method,
                                         arg=['val1', 'val2', 'val3'],
                                         column='age')
        json_str = json.dumps(cohort_filter_str,
                              default=cohort_filter_json_converter)
        assert method in json_str
        assert 'val1' in json_str
        assert 'val2' in json_str
        assert 'val3' in json_str
        assert 'age' in json_str

        cohort_filter_int = CohortFilter(method=method,
                                         arg=[1, 2, 3],
                                         column='age')
        json_str = json.dumps(cohort_filter_int,
                              default=cohort_filter_json_converter)
        assert method in json_str
        assert '1' in json_str
        assert '2' in json_str
        assert '3' in json_str
        assert 'age' in json_str
    def test_cohort_list_serialization(self):
        cohort_filter_1 = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[65], column='age')
        cohort_1 = Cohort(name="Cohort New")
        cohort_1.add_cohort_filter(cohort_filter_1)

        cohort_2 = Cohort(name="Cohort Old")
        cohort_2.add_cohort_filter(cohort_filter_1)

        cohort_list = [cohort_1, cohort_2]
        json_str = json.dumps(cohort_list,
                              default=cohort_filter_json_converter)

        assert 'Cohort Old' in json_str
        assert 'Cohort New' in json_str
        assert CohortFilterMethods.METHOD_LESS in json_str
        assert '[65]' in json_str
        assert 'age' in json_str
    def test_cohort_serialization_deserialization_in_range_method(self):
        cohort_filter_1 = CohortFilter(
            method=CohortFilterMethods.METHOD_RANGE,
            arg=[65.0, 70.0], column='age')
        cohort_1 = Cohort(name="Cohort New")
        cohort_1.add_cohort_filter(cohort_filter_1)

        json_str = cohort_1.to_json()
        assert 'Cohort New' in json_str
        assert CohortFilterMethods.METHOD_RANGE in json_str
        assert '65.0' in json_str
        assert '70.0' in json_str
        assert 'age' in json_str

        cohort_1_new = Cohort.from_json(json_str)
        assert cohort_1_new.name == cohort_1.name
        assert len(cohort_1_new.cohort_filter_list) == \
            len(cohort_1.cohort_filter_list)
        assert cohort_1_new.cohort_filter_list[0].method == \
            cohort_1.cohort_filter_list[0].method
    def test_validate_with_test_data_classification_error_filter_validations(
            self):
        test_data_multiclass = get_toy_multiclass_classification_dataset()

        test_data_binary = get_toy_binary_classification_dataset()

        cohort_filter_classification_excludes = CohortFilter(
            method=CohortFilterMethods.METHOD_EXCLUDES,
            arg=[ClassificationOutcomes.FALSE_NEGATIVE],
            column=CohortFilter.CLASSIFICATION_OUTCOME)

        cohort_filter_classification_includes = CohortFilter(
            method=CohortFilterMethods.METHOD_INCLUDES,
            arg=["random"],
            column=CohortFilter.CLASSIFICATION_OUTCOME)

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome cannot be "
                      "configured for multi-class classification"
                      " and regression scenarios."):
            cohort_filter_classification_excludes._validate_with_test_data(
                test_data=test_data_multiclass, target_column="target",
                categorical_features=[], is_classification=True
            )

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome cannot be "
                      "configured for multi-class classification"
                      " and regression scenarios."):
            cohort_filter_classification_excludes._validate_with_test_data(
                test_data=test_data_binary, target_column="target",
                categorical_features=[], is_classification=False
            )

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome can only be configured with "
                      "cohort filter includes."):
            cohort_filter_classification_excludes._validate_with_test_data(
                test_data=test_data_binary, target_column="target",
                categorical_features=[], is_classification=True
            )

        with pytest.raises(
                UserConfigValidationException,
                match="Classification outcome can only take argument values "
                      "from False negative or False positive or True "
                      "negative or True positive."):
            cohort_filter_classification_includes._validate_with_test_data(
                test_data=test_data_binary, target_column="target",
                categorical_features=[], is_classification=True)
    def test_validate_with_test_data_regression_error_filter_validations(
            self):
        test_data_regression = get_toy_regression_dataset()

        cohort_filter_regression = CohortFilter(
            method=CohortFilterMethods.METHOD_LESS,
            arg=[2.5],
            column=CohortFilter.REGRESSION_ERROR)

        with pytest.raises(
                UserConfigValidationException,
                match="Error cannot be configured for classification"
                      " scenarios."):
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=True)

        with pytest.raises(
                UserConfigValidationException,
                match="Error cannot be configured with either includes"
                      " or excludes."):
            cohort_filter_regression.method = \
                CohortFilterMethods.METHOD_INCLUDES
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)

        with pytest.raises(
                UserConfigValidationException,
                match="Error cannot be configured with either includes"
                      " or excludes."):
            cohort_filter_regression.method = \
                CohortFilterMethods.METHOD_EXCLUDES
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)

        with pytest.raises(
                UserConfigValidationException,
                match="All entries in arg should be of type int or float"
                      " for Error cohort."):
            cohort_filter_regression.method = \
                CohortFilterMethods.METHOD_GREATER
            cohort_filter_regression.arg = ['val1', 'val2']
            cohort_filter_regression._validate_with_test_data(
                test_data=test_data_regression,
                target_column="target",
                categorical_features=[],
                is_classification=False)