예제 #1
0
def test_explain_instance_regression(caplog):
    """
    Tests :mod:`fatf.transparency.lime.Lime.explain_instance` method.

    These tests are for a regression task.
    """
    # Check logging
    assert len(caplog.records) == 0

    # Regression a non-probabilistic model
    lime = ftl.Lime(NUMERICAL_STRUCT_ARRAY,
                    mode='regression',
                    model=CLF_NON_PROBA,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE)
    assert _is_explanation_equal({'a': explained}, {'a': REGRESSION_RESULTS})

    # Check logging
    assert len(caplog.records) == 1
    assert caplog.records[0].levelname == 'WARNING'
    assert caplog.records[0].getMessage() == LOG_WARNING

    # Regression a probabilistic model
    lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                    mode='regression',
                    model=CLF,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE_STRUCT)
    assert _is_explanation_equal({'a': explained}, {'a': REGRESSION_RESULTS})

    # Regression with a model and function
    with pytest.warns(UserWarning) as warning:
        lime = ftl.Lime(NUMERICAL_STRUCT_ARRAY,
                        mode='regression',
                        model=CLF,
                        predict_fn=CLF_NON_PROBA.predict,
                        class_names=CLASS_NAMES,
                        feature_names=FEATURE_NAMES)
    assert len(warning) == 1
    assert str(warning[0].message) == USER_WARNING_MODEL_PRED
    explained = lime.explain_instance(SAMPLE_STRUCT)
    assert _is_explanation_equal({'a': explained}, {'a': REGRESSION_RESULTS})

    # Regression without a model
    lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                    mode='regression',
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE, predict_fn=CLF_NON_PROBA.predict)
    assert _is_explanation_equal({'a': explained}, {'a': REGRESSION_RESULTS})

    # Check logging
    assert len(caplog.records) == 1
예제 #2
0
def test_lime_init():
    """
    Tests :mod:`fatf.transparency.lime.Lime` object initialisation.

    This only looks into cases where the initialisation would fail.
    """
    attribute_error = 'The following named parameters are not valid: {}.'
    shape_error_data = ('The data parameter must be a 2-dimensional numpy '
                        'array.')
    value_error_cat = 'LIME does not support non-numerical data arrays.'
    value_error = ("The mode must be either 'classification' or 'regression'. "
                   "'{}' given.")
    incompatible_model_error = ('LIME requires a model object to have a fit '
                                'method and optionally a predict_proba '
                                'method.')
    type_error_predictor = ('The predict_fn parameter is not callable -- it '
                            'has to be a function.')
    type_error_struct_indices = ('The categorical_features parameter either '
                                 'has to be a list, a numpy array or None.')
    incorrect_shape_struct_indices = ('categorical_features array/list is not '
                                      '1-dimensional.')
    value_error_struct_indices = ('Since categorical_features is an array of '
                                  'indices for a structured array, all of its '
                                  'elements should be strings.')
    value_error_struct_incorrect_indices = (
        'Indices given in the categorical_features parameter are not valid '
        'for the input data array.')
    #
    attribute_error_explain = ('The following named parameters are not valid: '
                               '{}.')
    incorrect_shape_error_explain = ('The instance to be explained should be '
                                     '1-dimensional.')
    value_error_explain = ('The instance to be explained should be purely '
                           'numerical -- LIME does not support categorical '
                           'features.')

    # Wrong named parameter
    with pytest.raises(AttributeError) as exin:
        ftl.Lime(NUMERICAL_NP_ARRAY, model=CLF, lorem='ipsum')
    assert str(exin.value) == attribute_error.format("{'lorem'}")

    # Not a 2-dimensional array
    with pytest.raises(IncorrectShapeError) as exin:
        ftl.Lime(np.ones((6, 4, 4)))
    assert str(exin.value) == shape_error_data

    # Not a numerical array
    with pytest.raises(ValueError) as exin:
        lime = ftl.Lime(np.ones((6, 4), dtype='U1'))
    assert str(exin.value) == value_error_cat

    # A structured data array with weird categorical indices type
    with pytest.raises(TypeError) as exin:
        ftl.Lime(NUMERICAL_STRUCT_ARRAY, categorical_features='')
    assert str(exin.value) == type_error_struct_indices

    # A structured data array with weird categorical indices shape
    with pytest.raises(IncorrectShapeError) as exin:
        ftl.Lime(NUMERICAL_STRUCT_ARRAY, categorical_features=[['a']])
    assert str(exin.value) == incorrect_shape_struct_indices

    # A structured data array with non-textual categorical indices
    with pytest.raises(ValueError) as exin:
        ftl.Lime(NUMERICAL_STRUCT_ARRAY, categorical_features=np.array([3, 2]))
    assert str(exin.value) == value_error_struct_indices

    # A structured data array with incorrect categorical indices
    with pytest.raises(ValueError) as exin:
        ftl.Lime(NUMERICAL_STRUCT_ARRAY, categorical_features=['a', 'e', 'b'])
    assert str(exin.value) == value_error_struct_incorrect_indices

    # Wrong operation mode
    with pytest.raises(ValueError) as exin:
        ftl.Lime(NUMERICAL_NP_ARRAY, mode='c')
    assert str(exin.value) == value_error.format('c')

    # Invalid model
    invalid_model = InvalidModel()
    with pytest.raises(IncompatibleModelError) as exin:
        ftl.Lime(NUMERICAL_NP_ARRAY,
                 model=invalid_model,
                 mode='classification')
    assert str(exin.value) == incompatible_model_error
    with pytest.raises(IncompatibleModelError) as exin:
        ftl.Lime(NUMERICAL_NP_ARRAY, model='a', mode='classification')
    assert str(exin.value) == incompatible_model_error

    # Invalid predictive function
    with pytest.raises(TypeError) as exin:
        ftl.Lime(NUMERICAL_NP_ARRAY, predict_fn='a', mode='regression')
    assert str(exin.value) == type_error_predictor

    ###########################################################################
    # Test explain_instance for exceptions and errors
    lime = ftl.Lime(NUMERICAL_NP_ARRAY)

    # Incorrect parameter
    with pytest.raises(AttributeError) as exin:
        lime.explain_instance(SAMPLE, weird_named_argument='yes')
    assert str(exin.value) == attribute_error_explain.format(
        "{'weird_named_argument'}")

    # Incorrect shape
    with pytest.raises(IncorrectShapeError) as exin:
        lime.explain_instance(NUMERICAL_STRUCT_ARRAY)
    assert str(exin.value) == incorrect_shape_error_explain

    # Not numerical
    with pytest.raises(ValueError) as exin:
        lime.explain_instance(np.ones((5, ), dtype='U1'))
    assert str(exin.value) == value_error_explain
예제 #3
0
def test_explain_instance_classification(caplog):
    """
    Tests :mod:`fatf.transparency.lime.Lime.explain_instance` method.

    These tests are for a classification task.
    """
    runtime_error_no_predictor = 'A predictive function is not available.'
    runtime_error_non_prob = ('The predictive model is not probabilistic. '
                              'Please specify a predictive function instead.')

    # Check logging
    assert len(caplog.records) == 0

    # Non-probabilistic model -- function -- probabilistic function
    with pytest.warns(UserWarning) as warning:
        lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                        model=CLF_NON_PROBA,
                        predict_fn=CLF.predict_proba,
                        class_names=CLASS_NAMES,
                        feature_names=FEATURE_NAMES)
    assert len(warning) == 1
    assert str(warning[0].message) == USER_WARNING_MODEL_PRED
    explained = lime.explain_instance(SAMPLE, predict_fn=CLF.predict_proba)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    # Non-probabilistic model -- function -- no function
    with pytest.warns(UserWarning) as warning:
        lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                        model=CLF_NON_PROBA,
                        predict_fn=CLF.predict_proba,
                        class_names=CLASS_NAMES,
                        feature_names=FEATURE_NAMES)
    assert len(warning) == 1
    assert str(warning[0].message) == USER_WARNING_MODEL_PRED
    explained = lime.explain_instance(SAMPLE)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    # Non-probabilistic model -- no function -- probabilistic function
    lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                    model=CLF_NON_PROBA,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE, predict_fn=CLF.predict_proba)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    # Non-probabilistic model -- no function -- no function
    lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                    model=CLF_NON_PROBA,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    with pytest.raises(RuntimeError) as exin:
        lime.explain_instance(SAMPLE_STRUCT)
    assert str(exin.value) == runtime_error_non_prob

    # Check logging
    assert len(caplog.records) == 4
    for i in range(4):
        assert caplog.records[i].levelname == 'WARNING'
        assert caplog.records[i].getMessage() == LOG_WARNING

    # No model -- function -- probabilistic function
    lime = ftl.Lime(NUMERICAL_STRUCT_ARRAY,
                    predict_fn=CLF.predict_proba,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE, predict_fn=CLF.predict_proba)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    # No model -- function -- no function
    lime = ftl.Lime(NUMERICAL_STRUCT_ARRAY,
                    predict_fn=CLF.predict_proba,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    # No model -- no function -- probabilistic function
    lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE, predict_fn=CLF.predict_proba)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    # No model -- no function -- no function
    lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    with pytest.raises(RuntimeError) as exin:
        lime.explain_instance(SAMPLE)
    assert str(exin.value) == runtime_error_no_predictor

    # Check logging
    assert len(caplog.records) == 4

    # Probabilistic model -- probabilistic function -- empty call
    with pytest.warns(UserWarning) as warning:
        lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                        model=CLF,
                        predict_fn=CLF.predict_proba,
                        class_names=CLASS_NAMES,
                        feature_names=FEATURE_NAMES)
    assert len(warning) == 1
    assert str(warning[0].message) == USER_WARNING_MODEL_PRED
    explained = lime.explain_instance(SAMPLE_STRUCT)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    #
    # Probabilistic model -- probabilistic function -- non-empty call
    with pytest.warns(UserWarning) as warning:
        lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                        model=CLF,
                        predict_fn=CLF.predict_proba,
                        class_names=CLASS_NAMES,
                        feature_names=FEATURE_NAMES)
    assert len(warning) == 1
    assert str(warning[0].message) == USER_WARNING_MODEL_PRED
    explained = lime.explain_instance(SAMPLE, predict_fn=CLF.predict_proba)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    #
    # Probabilistic model -- no function -- empty call
    lime = ftl.Lime(NUMERICAL_STRUCT_ARRAY,
                    model=CLF,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)
    #
    # Probabilistic model -- no function -- non-empty call
    lime = ftl.Lime(NUMERICAL_STRUCT_ARRAY,
                    model=CLF,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES)
    explained = lime.explain_instance(SAMPLE_STRUCT,
                                      predict_fn=CLF.predict_proba)
    assert _is_explanation_equal(explained, NUMERICAL_RESULTS)

    # Check logging
    assert len(caplog.records) == 4

    ###########################################################################
    # Test with categorical features: feat0 and feat1

    cat_feat = [0, 1]
    lime = ftl.Lime(NUMERICAL_NP_ARRAY,
                    model=CLF,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES,
                    categorical_features=cat_feat)
    explained = lime.explain_instance(SAMPLE_STRUCT)
    assert _is_explanation_equal(CATEGORICAL_RESULTS, explained)

    cat_feat = ['a', 'b']
    lime = ftl.Lime(NUMERICAL_STRUCT_ARRAY,
                    model=CLF,
                    class_names=CLASS_NAMES,
                    feature_names=FEATURE_NAMES,
                    categorical_features=cat_feat)
    explained = lime.explain_instance(SAMPLE)
    assert _is_explanation_equal(CATEGORICAL_RESULTS, explained)

    # Check logging
    assert len(caplog.records) == 4
print(__doc__)

# Load data
iris_data_dict = fatf_datasets.load_iris()
iris_X = iris_data_dict['data']
iris_y = iris_data_dict['target']
iris_feature_names = iris_data_dict['feature_names']
iris_class_names = iris_data_dict['target_names']

# Train a model
clf = fatf_models.KNN()
clf.fit(iris_X, iris_y)

# Create a LIME explainer
lime = fatf_lime.Lime(iris_X,
                      model=clf,
                      feature_names=iris_feature_names,
                      class_names=iris_class_names)

# Choose an index of the instance to be explained
index_to_explain = 42

# Explain an instance
lime_explanation = lime.explain_instance(iris_X[index_to_explain, :],
                                         num_samples=500)

# Display the textual explanation
pprint(lime_explanation)

# Plot the explanation
fatf_vis_lime.plot_lime(lime_explanation)