def test_osvar_seed(caplog):
    """
    Tests random number generator seeding with a system environment variable.
    """
    seed_int = 42
    seed_int_random = 2147483648
    seed_str = '{}'.format(seed_int)
    message = 'Seeding RNGs with {}.'.format(seed_str)

    # Memorise the current state of the system variable
    fatf_seed = os.environ.get('FATF_SEED', None)

    os.environ['FATF_SEED'] = seed_str
    fatf.setup_random_seed()

    # Check logging
    # Check that only one message was logged
    assert len(caplog.records) == 1
    # Check this message's log level
    assert caplog.records[0].levelname == 'INFO'
    # Check that the message matches
    assert caplog.records[0].getMessage() == message

    # Pseudo-check the actual seed
    assert random.getstate()[1][0] == seed_int_random
    assert np.random.get_state()[1][0] == seed_int

    # Restore the system variable
    if fatf_seed is None:
        del os.environ['FATF_SEED']  # pragma: nocover
    else:
        os.environ['FATF_SEED'] = fatf_seed  # pragma: nocover
Exemple #2
0
    def test_linear_regressors(self):
        """
        Tests ``SKLearnLinearModelExplainer`` with linear regressors.
        """
        fatf.setup_random_seed()

        for i, clf in enumerate(LINEAR_REGRESSORS):
            name = clf.__name__
            kwargs = get_kwargs(name)
            clf_instance = clf(**kwargs)
            clf_instance.fit(DATA, LABELS)

            ske = ftsl.SKLearnLinearModelExplainer(
                clf_instance, feature_names=self.feature_names)
            #
            assert ske.clf == clf_instance
            assert ske.is_classifier is False
            assert ske.feature_names == self.feature_names
            assert ske.class_names is None
            assert ske.features_number == 4
            assert ske.classes_array is None

            coef = ske.feature_importance()
            if name == 'SGDRegressor':
                assert np.allclose(coef / 1e+10, LINEAR_REG_COEF[i], atol=1e-3)
            else:
                assert np.allclose(coef, LINEAR_REG_COEF[i], atol=1e-3)
Exemple #3
0
    def test_linear_classifiers(self):
        """
        Tests ``SKLearnLinearModelExplainer`` with linear classifiers.
        """
        fatf.setup_random_seed()

        for i, clf in enumerate(LINEAR_CLASSIFIERS):
            name = clf.__name__
            kwargs = get_kwargs(name)
            clf_instance = clf(**kwargs)
            clf_instance.fit(DATA, LABELS)

            ske = ftsl.SKLearnLinearModelExplainer(clf_instance,
                                                   self.feature_names,
                                                   self.class_names)
            #
            assert ske.clf == clf_instance
            assert ske.is_classifier is True
            assert ske.feature_names == self.feature_names
            assert ske.class_names == self.class_names
            assert ske.features_number == 4
            assert np.array_equal(ske.classes_array, [0, 1])

            coef = ske.feature_importance()
            assert np.allclose(coef, LINEAR_CLF_COEF[i], atol=1e-3)
def test_random_seed(caplog):
    """
    Tests random number generator seeding when the seed is random.
    """
    fatf_seed = os.environ.get('FATF_SEED', None)
    if fatf_seed is not None:
        del os.environ['FATF_SEED']  # pragma: nocover
    assert 'FATF_SEED' not in os.environ

    fatf.setup_random_seed()
    seed = np.random.get_state()[1][0]
    message = 'Seeding RNGs with {}.'.format(seed)

    # Check logging
    # Check that only one message was logged
    assert len(caplog.records) == 1
    # Check this message's log level
    assert caplog.records[0].levelname == 'INFO'
    # Check that the message matches
    assert caplog.records[0].getMessage() == message

    # Check Python random state
    python_random_seed = random.getstate()
    random.seed(seed)
    assert random.getstate() == python_random_seed
    assert id(random.getstate()) != id(python_random_seed)

    if fatf_seed is not None:
        os.environ['FATF_SEED'] = fatf_seed  # pragma: nocover
        assert 'FATF_SEED' in os.environ  # pragma: nocover
def test_random_binary_sampler():
    """
    Tests :func:`fatf.utils.data.instance_augmentation.random_binary_sampler`.
    """
    err_msg = 'The number of elements must be an integer.'
    with pytest.raises(TypeError) as exin:
        fudi.random_binary_sampler('int')
    assert str(exin.value) == err_msg
    with pytest.raises(TypeError) as exin:
        fudi.random_binary_sampler(1.0)
    assert str(exin.value) == err_msg

    err_msg = 'The number of elements must be greater than 0.'
    with pytest.raises(ValueError) as exin:
        fudi.random_binary_sampler(0)
    assert str(exin.value) == err_msg
    with pytest.raises(ValueError) as exin:
        fudi.random_binary_sampler(-42)
    assert str(exin.value) == err_msg

    err_msg = 'The number of samples must be an integer.'
    with pytest.raises(TypeError) as exin:
        fudi.random_binary_sampler(4, 'int')
    assert str(exin.value) == err_msg
    with pytest.raises(TypeError) as exin:
        fudi.random_binary_sampler(4, 4.2)
    assert str(exin.value) == err_msg

    err_msg = 'The number of samples must be greater than 0.'
    with pytest.raises(ValueError) as exin:
        fudi.random_binary_sampler(4, 0)
    assert str(exin.value) == err_msg
    with pytest.raises(ValueError) as exin:
        fudi.random_binary_sampler(4, -42)
    assert str(exin.value) == err_msg

    fatf.setup_random_seed()
    sample = fudi.random_binary_sampler(4, 10)
    sample_ = np.array([[0, 1, 0, 0], [0, 1, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0],
                        [1, 1, 1, 0], [1, 0, 1, 1], [1, 1, 1, 1], [1, 1, 0, 0],
                        [1, 1, 1, 0], [1, 0, 0, 0]])
    assert np.array_equal(sample, sample_)
def test_random_seed(caplog):
    """
    Tests random number generator seeding when the seed is random.
    """
    fatf.setup_random_seed()
    seed = np.random.get_state()[1][0]
    message = 'Seeding RNGs with {}.'.format(seed)

    # Check logging
    # Check that only one message was logged
    assert len(caplog.records) == 1
    # Check this message's log level
    assert caplog.records[0].levelname == 'INFO'
    # Check that the message matches
    assert caplog.records[0].getMessage() == message

    # Check Python random state
    python_random_seed = random.getstate()
    random.seed(seed)
    assert random.getstate() == python_random_seed
    assert id(random.getstate()) != id(python_random_seed)
    def test_randomise_patch(self):
        """
        Tests :func:`fatf.utils.data.occlusion.Occlusion._randomise_patch`.
        """
        fatf.setup_random_seed()
        mask_ = np.array([[1, 0], [0, 1]], dtype=bool)

        # Colour
        occlusion = fudo.Occlusion(ARRAY_IMAGE_3D, SEGMENTS)
        assert np.array_equal(
            occlusion._randomise_patch(mask_),
            np.array([[125, 114, 71], [52, 44, 216]], dtype=np.uint8))
        # ..check the default
        assert np.array_equal(
            occlusion._colouring_strategy(ONES),
            occlusion._generate_colouring_strategy('mean')(ONES))

        # Grayscale
        occlusion = fudo.Occlusion(ARRAY_IMAGE_2D, SEGMENTS)
        assert np.array_equal(
            occlusion._randomise_patch(mask_),
            np.array([119, 13], dtype=np.uint8))
        # ..check the default
        assert np.array_equal(
            occlusion._colouring_strategy(ONES),
            occlusion._generate_colouring_strategy('mean')(ONES))

        # Black-and-white
        occlusion = fudo.Occlusion(
            np.array([[0, 255], [255, 0]], dtype=np.uint8), SEGMENTS)
        assert np.array_equal(
            occlusion._randomise_patch(mask_),
            np.array([0, 255], dtype=np.uint8))
        # ..check the default
        assert np.array_equal(
            occlusion._colouring_strategy(ONES),
            occlusion._generate_colouring_strategy('black')(ONES))
Exemple #8
0
import matplotlib.pyplot as plt
import numpy as np

import fatf

import fatf.utils.models as fatf_models

import fatf.transparency.predictions.surrogate_image_explainers as fatf_exp

import fatf.vis.lime as fatf_vis_lime

print(__doc__)

# Fix random seed
fatf.setup_random_seed(42)

# Create a simple data set
r, g, b, k = [255, 0, 0], [0, 255, 0], [0, 0, 255], [0, 0, 0]
X = np.array([[[r, g], [b, k]], [[r, b], [g, k]], [[r, k], [b, g]],
              [[k, g], [b, r]], [[k, b], [g, r]], [[g, k], [b, r]]],
             dtype=np.uint8)
y = np.array([0, 0, 0, 1, 1, 1])

feature_names = {
    'Segment #1': 'top-left',
    'Segment #2': 'top-right',
    'Segment #3': 'bottom-left',
    'Segment #4': 'bottom-right'
}
class_names = {0: 'top-left-red', 1: 'bottom-right-red'}
Exemple #9
0
def test_linear_classifier_coefficients():
    """
    Tests linear scikit-learn classifier coefficient extraction.

    Tests :func:`fatf.transparency.sklearn.linear_model.\
linear_classifier_coefficients` function.
    """
    fatf.setup_random_seed()

    type_error = ('This functionality is designated for linear-like '
                  'scikit-learn predictor instances only. Instead got: {}.')
    unfit_error = ("This {} instance is not fitted yet. Call 'fit' with "
                   'appropriate arguments before using this method.')

    for clf in NON_LINEAR_MODELS:
        clf_instance = clf()
        clf_instance.fit(DATA, LABELS)

        with pytest.raises(TypeError) as excinfo:
            ftsl.linear_classifier_coefficients(clf_instance)
        name = str(clf).strip("<>' ")[7:]
        assert str(excinfo.value) == type_error.format(name)

    for i, clf in enumerate(LINEAR_REGRESSORS):
        name = clf.__name__
        kwargs = get_kwargs(name)
        clf_instance = clf(**kwargs)

        with pytest.raises(sklearn.exceptions.NotFittedError) as excinfo:
            ftsl.linear_classifier_coefficients(clf_instance)
        msg = unfit_error.format(clf_instance.__class__.__name__)
        assert str(excinfo.value) == msg

        clf_instance.fit(DATA, LABELS)

        coef = ftsl.linear_classifier_coefficients(clf_instance)
        if name == 'SGDRegressor':
            assert np.allclose(coef / 1e+10, LINEAR_REG_COEF[i], atol=1e-3)
        else:
            assert np.allclose(coef, LINEAR_REG_COEF[i], atol=1e-3)

    for i, clf in enumerate(LINEAR_CLASSIFIERS):
        name = clf.__name__
        kwargs = get_kwargs(name)
        clf_instance = clf(**kwargs)

        with pytest.raises(sklearn.exceptions.NotFittedError) as excinfo:
            ftsl.linear_classifier_coefficients(clf_instance)
        msg = unfit_error.format(clf_instance.__class__.__name__)
        assert str(excinfo.value) == msg

        clf_instance.fit(DATA, LABELS)

        coef = ftsl.linear_classifier_coefficients(clf_instance)
        assert np.allclose(coef, LINEAR_CLF_COEF[i], atol=1e-3)

    for i, clf in enumerate(LINEAR_MULTITASK_REGRESSORS):
        name = clf.__name__
        kwargs = get_kwargs(name)
        clf_instance = clf(**kwargs)

        with pytest.raises(sklearn.exceptions.NotFittedError) as excinfo:
            ftsl.linear_classifier_coefficients(clf_instance)
        msg = unfit_error.format(clf_instance.__class__.__name__)
        assert str(excinfo.value) == msg

        clf_instance.fit(DATA, LABELS_MULTITASK)

        coef = ftsl.linear_classifier_coefficients(clf_instance)
        assert np.allclose(coef, LINEAR_MUL_REG_COEF[i], atol=1e-3)
Exemple #10
0
def test_local_fidelity_score():
    """
    Tests the ``local_fidelity_score`` function.

    This function tests the
    :func:`fatf.utils.transparency.surrogate_evaluation.local_fidelity_score`
    function.
    """
    accuracy_warning = ('Some of the given labels are not present in either '
                        'of the input arrays: {}.')
    fatf.setup_random_seed()

    def accuracy(global_predictions, local_predictions):
        global_predictions[global_predictions >= 0.5] = 1
        global_predictions[global_predictions < 0.5] = 0

        local_predictions[local_predictions >= 0.5] = 1
        local_predictions[local_predictions < 0.5] = 0

        confusion_matrix = fumt.get_confusion_matrix(global_predictions,
                                                     local_predictions,
                                                     labels=[0, 1])
        accuracy = fummet.accuracy(confusion_matrix)

        return accuracy

    def accuracy_prob(global_predictions,
                      local_predictions,
                      global_proba=True,
                      local_proba=True):
        if global_proba:
            global_predictions = np.argmax(global_predictions, axis=1)
        if local_proba:
            local_predictions = np.argmax(local_predictions, axis=1)

        confusion_matrix = fumt.get_confusion_matrix(global_predictions,
                                                     local_predictions,
                                                     labels=[0, 1, 2])
        accuracy = fummet.accuracy(confusion_matrix)

        return accuracy

    def accuracy_proba_np(global_predictions, local_predictions):
        return accuracy_prob(global_predictions,
                             local_predictions,
                             global_proba=False,
                             local_proba=True)

    def accuracy_proba_nn(global_predictions, local_predictions):
        return accuracy_prob(global_predictions,
                             local_predictions,
                             global_proba=False,
                             local_proba=False)

    def reg_dist(global_predictions, local_predictions):
        return (global_predictions - local_predictions).sum()

    predictor = fumm.KNN(k=3)
    predictor.fit(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET)

    regressor = fumm.KNN(k=3, mode='regressor')
    regressor.fit(NUMERICAL_NP_ARRAY_LOCAL, NUMERICAL_NP_ARRAY_LOCAL_TARGET)

    regressor_23 = fumm.KNN(k=3, mode='regressor')
    regressor_23.fit(NUMERICAL_NP_ARRAY_LOCAL[:, [2, 3]],
                     NUMERICAL_NP_ARRAY_LOCAL_TARGET)

    # Structured array
    predictor_struct = fumm.KNN(k=3)
    predictor_struct.fit(NUMERICAL_STRUCT_ARRAY, NUMERICAL_NP_ARRAY_TARGET)
    #
    regressor_struct_cd = fumm.KNN(k=3, mode='regressor')
    regressor_struct_cd.fit(NUMERICAL_STRUCT_ARRAY_LOCAL[['c', 'd']],
                            NUMERICAL_NP_ARRAY_LOCAL_TARGET)

    # Global: probabilistic...
    # ...local: regressor
    comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                           NUMERICAL_NP_ARRAY[0],
                                           predictor.predict_proba,
                                           regressor.predict, accuracy, 2)
    assert np.isclose(comparison, 0.26)
    # ...local: classifier
    comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                           NUMERICAL_NP_ARRAY[0],
                                           predictor.predict_proba,
                                           predictor.predict, accuracy, 2)
    assert np.isclose(comparison, 1.0)
    # ...local: probabilistic
    with pytest.warns(UserWarning) as w:
        comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                               NUMERICAL_NP_ARRAY[0],
                                               predictor.predict_proba,
                                               predictor.predict_proba,
                                               accuracy_prob)
    assert len(w) == 1
    assert str(w[0].message) == accuracy_warning.format(set([1]))
    assert np.isclose(comparison, 1.0)

    # Global: classifier...
    # ...local: probabilistic
    with pytest.warns(UserWarning) as w:
        comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                               NUMERICAL_NP_ARRAY[0],
                                               predictor.predict,
                                               predictor.predict_proba,
                                               accuracy_proba_np)
    assert len(w) == 1
    assert str(w[0].message) == accuracy_warning.format(set([1]))
    assert np.isclose(comparison, 1.0)
    # ...local: classifier
    with pytest.warns(UserWarning) as w:
        comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                               NUMERICAL_NP_ARRAY[0],
                                               predictor.predict,
                                               predictor.predict,
                                               accuracy_proba_nn)
    assert len(w) == 1
    assert str(w[0].message) == accuracy_warning.format(set([1]))
    assert np.isclose(comparison, 1.0)

    # Global: regressor...
    # ...local: regressor
    comparison = futs.local_fidelity_score(NUMERICAL_NP_ARRAY,
                                           NUMERICAL_NP_ARRAY[0],
                                           regressor.predict,
                                           regressor_23.predict,
                                           reg_dist,
                                           explained_feature_indices=[2, 3])
    assert np.isclose(comparison, 0)

    # Structured array
    # Global: probabilistic...
    # ...local: regressor
    comparison = futs.local_fidelity_score(
        NUMERICAL_STRUCT_ARRAY,
        NUMERICAL_STRUCT_ARRAY[0],
        predictor_struct.predict_proba,
        regressor_struct_cd.predict,
        accuracy,
        0,
        explained_feature_indices=['c', 'd'])
    assert np.isclose(comparison, 0.94)
def test_submodular_pick():
    """Tests :func:`fatf.transparency.models.submodular_pick`."""
    fatf.setup_random_seed()

    explanations, explanation_ind = ftms.submodular_pick(
        NUMERICAL_NP_ARRAY, explain_instance_a, explanations_number=2)
    assert explanation_ind == [0, 2]
    assert explanations == [EXPLAINERS[0], EXPLAINERS[2]]

    explanations, explanation_ind = ftms.submodular_pick(
        NUMERICAL_NP_ARRAY, explain_instance_b, explanations_number=2)
    assert explanation_ind == [0, 1]
    assert explanations == [EXPLAINERS[3], EXPLAINERS[2]]

    msg = ('sample_size is larger than the number of samples in the data set. '
           'The whole dataset will be used.')
    with pytest.warns(UserWarning) as warning:
        explanations, explanation_ind = ftms.submodular_pick(
            NUMERICAL_NP_ARRAY,
            explain_instance_a,
            sample_size=100,
            explanations_number=1)
    assert len(warning) == 1
    assert str(warning[0].message) == msg
    assert explanation_ind == [0]
    assert explanations == [EXPLAINERS[0]]

    explanations, explanation_ind = ftms.submodular_pick(
        NUMERICAL_NP_ARRAY,
        explain_instance_a,
        sample_size=1,
        explanations_number=1)
    assert explanation_ind == [1]
    assert explanations == [EXPLAINERS[1]]

    explanations, explanation_ind = ftms.submodular_pick(
        NUMERICAL_NP_ARRAY,
        explain_instance_a,
        sample_size=0,
        explanations_number=0)
    assert explanation_ind == [0, 2, 1, 3]
    assert explanations == [
        EXPLAINERS[0], EXPLAINERS[2], EXPLAINERS[1], EXPLAINERS[3]
    ]

    explanations, explanation_ind = ftms.submodular_pick(
        NUMERICAL_NP_ARRAY,
        explain_instance_a,
        sample_size=2,
        explanations_number=0)
    assert explanation_ind == [3, 1]
    assert explanations == [EXPLAINERS[3], EXPLAINERS[1]]

    msg = ('The number of explanations cannot be larger than '
           'the number of instances (rows) in the data set.')
    with pytest.warns(UserWarning) as warning:
        explanations, explanation_ind = ftms.submodular_pick(
            NUMERICAL_NP_ARRAY, explain_instance_a, 0, 222)
    assert len(warning) == 1
    assert str(warning[0].message) == msg
    assert explanation_ind == [0, 2, 1, 3]
    assert explanations == [
        EXPLAINERS[0], EXPLAINERS[2], EXPLAINERS[1], EXPLAINERS[3]
    ]
    def test_sample(self):
        """
        Tests :func:`~fatf.utils.data.augmentation.Mixup.sample` method.
        """
        user_warning_gt = (
            'This Mixup class has not been initialised with a ground truth '
            'vector. The value of the data_row_target parameter will be '
            'ignored, therefore target values samples will not be returned.')
        user_warning_strat = (
            'Since the ground truth vector was not provided while '
            'initialising the Mixup class it is not possible to get a '
            'stratified sample of data points. Instead, Mixup will choose '
            'data points at random, which is equivalent to assuming that the '
            'class distribution is balanced.')
        fatf.setup_random_seed()

        # Mixed array with ground truth and probabilities
        samples = self.mixed_augmentor_i2f.sample(MIXED_ARRAY[0],
                                                  0,
                                                  5,
                                                  return_probabilities=True)
        assert len(samples) == 2
        answer_sample = np.array(
            [(0.000, 'a', 0.332, 'a'),
             (0.000, 'a', 0.080, 'a'),
             (0.780, 'a', 0.587, 'a'),
             (0.992, 'a', 0.725, 'a'),
             (0.734, 'a', 0.073, 'a')],
            dtype=[('a', '<f4'), ('b', '<U1'),
                   ('c', '<f4'), ('d', '<U2')])  # yapf: disable
        answer_sample_gt = np.array([[1, 0], [1, 0], [1, 0], [1, 0],
                                     [0.266, 0.734]])
        assert np.allclose(samples[1], answer_sample_gt, atol=1e-3)
        for i in ['a', 'c']:
            assert np.allclose(samples[0][i], answer_sample[i], atol=1e-3)
        for i in ['b', 'd']:
            assert np.array_equal(samples[0][i], answer_sample[i])

        # Mixed array with ground truth and probabilities
        samples = self.mixed_augmentor.sample(MIXED_ARRAY[0],
                                              1,
                                              5,
                                              return_probabilities=True)
        assert len(samples) == 2
        answer_sample = np.array(
            [(0, 'a', 0.829, 'a'),
             (0, 'a', 0.601, 'a'),
             (0, 'a', 0.255, 'a'),
             (0, 'a', 0.377, 'a'),
             (0, 'a', 0.071, 'a')],
            dtype=[('a', '<i4'), ('b', '<U1'),
                   ('c', '<f4'), ('d', '<U2')])  # yapf: disable
        answer_sample_gt = np.array([[0.823, 0.177], [0.802, 0.198],
                                     [0.624, 0.376], [0.457, 0.543], [0, 1]])
        assert np.allclose(samples[1], answer_sample_gt, atol=1e-3)
        for i in ['a', 'c']:
            assert np.allclose(samples[0][i], answer_sample[i], atol=1e-3)
        for i in ['b', 'd']:
            assert np.array_equal(samples[0][i], answer_sample[i])

        # Numpy array without ground truth -- categorical
        with pytest.warns(UserWarning) as warning:
            samples = self.categorical_np_augmentor.sample(
                CATEGORICAL_NP_ARRAY[0], samples_number=5)
        assert len(warning) == 1
        assert str(warning[0].message) == user_warning_strat
        #
        answer_sample = np.array([['a', 'b', 'c'], ['a', 'b', 'c'],
                                  ['a', 'b', 'c'], ['a', 'b', 'c'],
                                  ['a', 'b', 'c']])
        assert np.array_equal(samples, answer_sample)

        # Numpy array without ground truth -- numerical -- test for warning
        with pytest.warns(UserWarning) as warning:
            samples = self.numerical_np_augmentor.sample(NUMERICAL_NP_ARRAY[0],
                                                         data_row_target=1,
                                                         samples_number=5)
        assert len(warning) == 2
        assert str(warning[0].message) == user_warning_gt
        assert str(warning[1].message) == user_warning_strat
        #
        answer_sample = np.array([[0.792, 0.000, 0.040, 0.373],
                                  [0.000, 0.000, 0.080, 0.690],
                                  [1.220, 0.610, 0.476, 0.562],
                                  [0.000, 0.000, 0.080, 0.690],
                                  [1.389, 0.694, 0.531, 0.544]])
        assert np.allclose(samples, answer_sample, atol=1e-3)

        # Structured array with ground truth -- numerical -- no probabilities
        samples = self.numerical_struct_augmentor.sample(
            NUMERICAL_STRUCT_ARRAY[0], samples_number=5, data_row_target='b')
        assert len(samples) == 2
        answer_sample = np.array(
            [(0, 0, 0.039, 0.358),
             (1, 0, 0.544, 0.540),
             (1, 0, 0.419, 0.580),
             (0, 0, 0.080, 0.690),
             (0, 0, 0.080, 0.690)],
            dtype=[('a', '<i4'), ('b', '<i4'),
                   ('c', '<f4'), ('d', '<f4')])  # yapf: disable
        answer_sample_gt = np.array(['a', 'a', 'a', 'b', 'b'])
        assert np.array_equal(samples[1], answer_sample_gt)
        for index in ['a', 'b', 'c', 'd']:
            assert np.allclose(samples[0][index],
                               answer_sample[index],
                               atol=1e-3)
Exemple #13
0
def test_highest_weights(caplog):
    """
    Tests :func:`fatf.utils.data.feature_choice.sklearn.highest_weights`.
    """
    assert len(caplog.records) == 0
    fatf.setup_random_seed()
    assert len(caplog.records) == 2
    assert caplog.records[0].levelname == 'INFO'
    assert caplog.records[0].getMessage().startswith('Seeding RNGs ')
    assert caplog.records[1].levelname == 'INFO'
    assert caplog.records[1].getMessage() == 'Seeding RNGs with 42.'

    # Weights and no-weights
    weights = np.ones((NUMERICAL_NP_ARRAY.shape[0], ))
    # Classic array -- weights
    features = fudfs.highest_weights(NUMERICAL_NP_ARRAY,
                                     NUMERICAL_NP_ARRAY_TARGET, weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([1, 2]))
    # Structured array -- no-weights
    features = fudfs.highest_weights(
        NUMERICAL_STRUCT_ARRAY, NUMERICAL_NP_ARRAY_TARGET, features_number=2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array(['b', 'c']))
    #
    # Selecting exactly 4 features -- no need for Lasso
    features = fudfs.highest_weights(NUMERICAL_NP_ARRAY,
                                     NUMERICAL_NP_ARRAY_TARGET, weights, 4)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 1, 2, 3]))
    # Selecting more than 4 features
    with pytest.warns(UserWarning) as warning:
        features = fudfs.highest_weights(NUMERICAL_STRUCT_ARRAY,
                                         NUMERICAL_NP_ARRAY_TARGET, weights, 5)
    assert len(warning) == 1
    assert str(warning[0].message) == FEATURE_INDICES_WARNING
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array(['a', 'b', 'c', 'd']))
    #
    # No features number -- just percentage
    features = fudfs.highest_weights(
        NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET, features_percentage=50)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([1, 2]))
    # No features number -- just percentage -- too small no features selected
    assert len(caplog.records) == 2
    features = fudfs.highest_weights(
        NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET, features_percentage=24)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([2]))
    assert len(caplog.records) == 3
    assert caplog.records[2].levelname == 'WARNING'
    assert caplog.records[2].getMessage() == FEATURE_PERCENTAGE_LOG

    # Small weights
    weights = np.array([1, 1, 100, 1, 1, 1]) * 1e-20
    features = fudfs.highest_weights(NUMERICAL_NP_ARRAY,
                                     NUMERICAL_NP_ARRAY_TARGET, weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 1]))

    # Another selection
    weights = np.array([100, 1, 1, 1, 1, 1])
    features = fudfs.highest_weights(NUMERICAL_NP_ARRAY,
                                     NUMERICAL_NP_ARRAY_TARGET, weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([2, 3]))
    features = fudfs.highest_weights(NUMERICAL_STRUCT_ARRAY,
                                     NUMERICAL_NP_ARRAY_TARGET, weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array(['c', 'd']))

    # Custom data
    features = fudfs.highest_weights(
        np.array([[1, 2, 3], [2, 2, 3], [3, 2, 3], [4, 2, 3]]),
        np.array([1, 2, 3, 4]),
        features_number=2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 2]))
    assert len(caplog.records) == 3
Exemple #14
0
def test_lasso_path(caplog):
    """
    Tests :func:`fatf.utils.data.feature_choice.sklearn.lasso_path` function.
    """
    no_lasso_log = ('The lasso path feature selection could not pick any '
                    'feature subset. All of the features were selected.')
    less_lasso_log = ('The lasso path feature selection could not pick {} '
                      'features. Only {} were selected.')

    assert len(caplog.records) == 0
    fatf.setup_random_seed()
    assert len(caplog.records) == 2
    assert caplog.records[0].levelname == 'INFO'
    assert caplog.records[0].getMessage().startswith('Seeding RNGs ')
    assert caplog.records[1].levelname == 'INFO'
    assert caplog.records[1].getMessage() == 'Seeding RNGs with 42.'

    # Weights and no-weights
    weights = np.ones((NUMERICAL_NP_ARRAY.shape[0], ))
    # Classic array -- weights
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 1]))
    # Structured array -- no-weights
    features = fudfs.lasso_path(
        NUMERICAL_STRUCT_ARRAY, NUMERICAL_NP_ARRAY_TARGET, features_number=2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array(['a', 'b']))
    #
    # Selecting exactly 4 features -- no need for Lasso
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 4)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 1, 2, 3]))
    # Selecting more than 4 features
    with pytest.warns(UserWarning) as warning:
        features = fudfs.lasso_path(NUMERICAL_STRUCT_ARRAY,
                                    NUMERICAL_NP_ARRAY_TARGET, weights, 5)
    assert len(warning) == 1
    assert str(warning[0].message) == FEATURE_INDICES_WARNING
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array(['a', 'b', 'c', 'd']))
    #
    # No features number -- just percentage
    features = fudfs.lasso_path(
        NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET, features_percentage=50)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 1]))
    # No features number -- just percentage -- too small no features selected
    assert len(caplog.records) == 2
    features = fudfs.lasso_path(
        NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET, features_percentage=24)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0]))
    assert len(caplog.records) == 3
    assert caplog.records[2].levelname == 'WARNING'
    assert caplog.records[2].getMessage() == FEATURE_PERCENTAGE_LOG

    # Weights too small so no path is found -- returns all features
    weights = np.array([1, 1, 100, 1, 1, 1]) * 1e-20
    assert len(caplog.records) == 3
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 1, 2, 3]))
    assert len(caplog.records) == 4
    assert caplog.records[3].levelname == 'WARNING'
    assert caplog.records[3].getMessage() == no_lasso_log

    # Another selection
    weights = np.array([1, 1, 100, 1, 1, 1])
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0, 2]))
    features = fudfs.lasso_path(NUMERICAL_STRUCT_ARRAY,
                                NUMERICAL_NP_ARRAY_TARGET, weights, 2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array(['a', 'c']))

    # Lasso with no possibility of reducing the number of features
    assert len(caplog.records) == 4
    features = fudfs.lasso_path(
        np.array([[1, 2, 3], [2, 2, 3], [3, 2, 3], [4, 2, 3]]),
        np.array([1, 2, 3, 4]),
        features_number=2)
    assert fuav.is_1d_array(features)
    assert np.array_equal(features, np.array([0]))
    assert len(caplog.records) == 5
    assert caplog.records[4].levelname == 'WARNING'
    assert caplog.records[4].getMessage() == less_lasso_log.format(2, 1)
    def test_generate_colouring_strategy(self):
        """
        Tests :func:`fatf.utils.data.occlusion.Occlusion.\
_generate_colouring_strategy`.
        """
        occlusion = fudo.Occlusion(ARRAY_IMAGE_3D, SEGMENTS)

        # Errors
        msg = ('The colour can either be a string specifier; or '
               'an RGB thriplet for RGB images and an integer '
               'for or grayscale and black-and-white images.')
        with pytest.raises(TypeError) as exin:
            occlusion._generate_colouring_strategy(['list'])
        assert str(exin.value) == msg

        # int for colour
        with pytest.raises(TypeError) as exin:
            occlusion._generate_colouring_strategy(33)
        assert str(exin.value) == msg

        # tuple for grayscale/black-and-white
        occlusion = fudo.Occlusion(ARRAY_IMAGE_2D, SEGMENTS)
        with pytest.raises(TypeError) as exin:
            occlusion._generate_colouring_strategy((4, 2, 0))
        assert str(exin.value) == msg
        with pytest.raises(TypeError) as exin:
            occlusion._generate_colouring_strategy(2.0)
        assert str(exin.value) == msg

        # Colour
        occlusion = fudo.Occlusion(ARRAY_IMAGE_3D, SEGMENTS)

        # string
        msg = ('Unknown colouring strategy name: colour.\n'
               "Choose one of the following: ['black', 'blue', 'green', "
               "'mean', 'pink', 'random', 'random-patch', 'randomise', "
               "'randomise-patch', 'red', 'white'].")
        with pytest.raises(ValueError) as exin:
            occlusion._generate_colouring_strategy('colour')
        assert str(exin.value) == msg
        # functional -- mean
        clr = occlusion._generate_colouring_strategy(None)(ONES)
        assert np.array_equal(clr, np.ones(shape=(2, 2, 2, 3), dtype=np.uint8))
        clr = occlusion._generate_colouring_strategy('mean')(ONES)
        assert np.array_equal(clr, np.ones(shape=(2, 2, 2, 3), dtype=np.uint8))

        one_ = np.zeros(shape=(2, 2), dtype=bool)
        one_[1, 1] = True
        fatf.setup_random_seed()
        # functional -- random
        clr = occlusion._generate_colouring_strategy('random')(ONES)
        assert np.array_equal(clr, (57, 12, 140))
        # functional -- random-patch
        clr = occlusion._generate_colouring_strategy('random-patch')(one_)
        assert np.array_equal(clr, np.array([[16, 15, 47]], dtype=np.uint8))
        # functional -- randomise
        clr = occlusion._generate_colouring_strategy('randomise')(one_)
        assert np.array_equal(clr, (101, 214, 112))
        # functional -- randomise-patch
        clr = occlusion._generate_colouring_strategy('randomise-patch')(one_)
        assert np.array_equal(clr, np.array([[81, 216, 174]], dtype=np.uint8))
        # functional -- black
        clr = occlusion._generate_colouring_strategy('black')(one_)
        assert np.array_equal(clr, (0, 0, 0))
        # functional -- white
        clr = occlusion._generate_colouring_strategy('white')(one_)
        assert np.array_equal(clr, (255, 255, 255))
        # functional -- red
        clr = occlusion._generate_colouring_strategy('red')(one_)
        assert np.array_equal(clr, (255, 0, 0))
        # functional -- green
        clr = occlusion._generate_colouring_strategy('green')(one_)
        assert np.array_equal(clr, (0, 255, 0))
        # functional -- blue
        clr = occlusion._generate_colouring_strategy('blue')(one_)
        assert np.array_equal(clr, (0, 0, 255))
        # functional -- pink
        clr = occlusion._generate_colouring_strategy('pink')(one_)
        assert np.array_equal(clr, (255, 192, 203))

        # tuple
        clr = occlusion._generate_colouring_strategy((42, 24, 242))(one_)
        assert np.array_equal(clr, (42, 24, 242))

        # Grayscale
        occlusion = fudo.Occlusion(ARRAY_IMAGE_2D, SEGMENTS)
        # int
        msg = ('Unknown colouring strategy name: colour.\n'
               "Choose one of the following: ['black', 'mean', 'random', "
               "'random-patch', 'randomise', 'randomise-patch', 'white'].")
        with pytest.raises(ValueError) as exin:
            occlusion._generate_colouring_strategy('colour')
        assert str(exin.value) == msg

        msg = ('The colour should be an integer between '
               '0 and 255 for grayscale images.')
        with pytest.raises(ValueError) as exin:
            occlusion._generate_colouring_strategy(-1)
        assert str(exin.value) == msg
        with pytest.raises(ValueError) as exin:
            occlusion._generate_colouring_strategy(256)
        assert str(exin.value) == msg

        clr = occlusion._generate_colouring_strategy(42)(one_)
        assert clr == 42

        # string
        clr = occlusion._generate_colouring_strategy(None)(ONES)
        assert np.array_equal(
            clr,
            np.array([[[85, 2], [85, 2]], [[85, 2], [85, 2]]], dtype=np.uint8))
        clr = occlusion._generate_colouring_strategy('mean')(ONES)
        assert np.array_equal(
            clr,
            np.array([[[85, 2], [85, 2]], [[85, 2], [85, 2]]], dtype=np.uint8))

        fatf.setup_random_seed()
        # functional -- random
        clr = occlusion._generate_colouring_strategy('random')(ONES)
        assert clr == 57
        # functional -- random-patch
        clr = occlusion._generate_colouring_strategy('random-patch')(one_)
        assert np.array_equal(clr, np.array([125], dtype=np.uint8))
        # functional -- randomise
        clr = occlusion._generate_colouring_strategy('randomise')(one_)
        assert clr == 71
        # functional -- randomise-patch
        clr = occlusion._generate_colouring_strategy('randomise-patch')(one_)
        assert np.array_equal(clr, np.array([44], dtype=np.uint8))
        # functional -- black
        clr = occlusion._generate_colouring_strategy('black')(one_)
        assert clr == 0
        # functional -- white
        clr = occlusion._generate_colouring_strategy('white')(one_)
        assert clr == 255

        # Black-and-white
        occlusion = fudo.Occlusion(
            np.array([[0, 255], [0, 255]], dtype=np.uint8), SEGMENTS)

        # int
        msg = ('The colour should be 0 for black, or 1 or 255 for '
               'white for black-and-white images.')
        with pytest.raises(ValueError) as exin:
            occlusion._generate_colouring_strategy(42)
        assert str(exin.value) == msg

        clr = occlusion._generate_colouring_strategy(0)(one_)
        assert clr == 0
        clr = occlusion._generate_colouring_strategy(1)(one_)
        assert clr == 255
        clr = occlusion._generate_colouring_strategy(255)(one_)
        assert clr == 255

        # string
        msg = 'Mean occlusion is not supported for black-and-white images.'
        with pytest.raises(RuntimeError) as exin:
            occlusion._generate_colouring_strategy(None)
        assert str(exin.value) == msg
        with pytest.raises(RuntimeError) as exin:
            occlusion._generate_colouring_strategy('mean')
        assert str(exin.value) == msg

        fatf.setup_random_seed()
        # functional -- random
        clr = occlusion._generate_colouring_strategy('random')(ONES)
        assert clr == 0
        # functional -- random-patch
        clr = occlusion._generate_colouring_strategy('random-patch')(one_)
        assert np.array_equal(clr, np.array([0], dtype=np.uint8))
        # functional -- randomise
        clr = occlusion._generate_colouring_strategy('randomise')(one_)
        assert clr == 0
        # functional -- randomise-patch
        clr = occlusion._generate_colouring_strategy('randomise-patch')(one_)
        assert np.array_equal(clr, np.array([0], dtype=np.uint8))
        # functional -- black
        clr = occlusion._generate_colouring_strategy('black')(one_)
        assert clr == 0
        # functional -- white
        clr = occlusion._generate_colouring_strategy('white')(one_)
        assert clr == 255
def test_lasso_path(caplog):
    """
    Tests :func:`fatf.utils.data.feature_choice.sklearn.lasso_path` function.
    """
    feature_indices_warning = ('The selected number of features is larger '
                               'than the total number of features in the '
                               'dataset array. All of the features are being '
                               'selected.')
    feature_percentage_log = (
        'Since the number of features to be extracted was not given 24% of '
        'features will be used. This percentage translates to 0 features, '
        'therefore the number of features to be used is overwritten to 1. To '
        'prevent this from happening, you should either explicitly set the '
        'number of features via the features_number parameter or increase the '
        'value of the features_percentage parameter.')
    no_lasso_log = ('The lasso path feature selection could not pick any '
                    'feature subset. All of the features were selected.')
    less_lasso_log = ('The lasso path feature selection could not pick {} '
                      'features. Only {} were selected.')

    assert len(caplog.records) == 0
    fatf.setup_random_seed()
    assert len(caplog.records) == 2
    assert caplog.records[0].levelname == 'INFO'
    assert caplog.records[0].getMessage().startswith('Seeding RNGs ')
    assert caplog.records[1].levelname == 'INFO'
    assert caplog.records[1].getMessage() == 'Seeding RNGs with 42.'

    # Weights and no-weights
    weights = np.ones((NUMERICAL_NP_ARRAY.shape[0], ))
    # Classic array -- weights
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 2)
    assert np.array_equal(features, np.array([0, 1]))
    # Structured array -- no-weights
    features = fudfs.lasso_path(NUMERICAL_STRUCT_ARRAY,
                                NUMERICAL_NP_ARRAY_TARGET,
                                features_number=2)
    assert np.array_equal(features, np.array(['a', 'b']))
    #
    # Selecting exactly 4 features -- no need for Lasso
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 4)
    assert np.array_equal(features, np.array([0, 1, 2, 3]))
    # Selecting more than 4 features
    with pytest.warns(UserWarning) as warning:
        features = fudfs.lasso_path(NUMERICAL_STRUCT_ARRAY,
                                    NUMERICAL_NP_ARRAY_TARGET, weights, 5)
    assert len(warning) == 1
    assert str(warning[0].message) == feature_indices_warning
    assert np.array_equal(features, np.array(['a', 'b', 'c', 'd']))
    #
    # No features number -- just percentage
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY,
                                NUMERICAL_NP_ARRAY_TARGET,
                                features_percentage=50)
    assert np.array_equal(features, np.array([0, 1]))
    # No features number -- just percentage -- too small no features selected
    assert len(caplog.records) == 2
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY,
                                NUMERICAL_NP_ARRAY_TARGET,
                                features_percentage=24)
    assert np.array_equal(features, np.array([0]))
    assert len(caplog.records) == 3
    assert caplog.records[2].levelname == 'WARNING'
    assert caplog.records[2].getMessage() == feature_percentage_log

    # Weights too small so no path is found -- returns all features
    weights = np.array([1, 1, 100, 1, 1, 1]) * 1e-20
    assert len(caplog.records) == 3
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 2)
    assert np.array_equal(features, np.array([0, 1, 2, 3]))
    assert len(caplog.records) == 4
    assert caplog.records[3].levelname == 'WARNING'
    assert caplog.records[3].getMessage() == no_lasso_log

    # Another selection
    weights = np.array([1, 1, 100, 1, 1, 1])
    features = fudfs.lasso_path(NUMERICAL_NP_ARRAY, NUMERICAL_NP_ARRAY_TARGET,
                                weights, 2)
    assert np.array_equal(features, np.array([0, 2]))
    features = fudfs.lasso_path(NUMERICAL_STRUCT_ARRAY,
                                NUMERICAL_NP_ARRAY_TARGET, weights, 2)
    assert np.array_equal(features, np.array(['a', 'c']))

    # Lasso with no possibility of reducing the number of features
    assert len(caplog.records) == 4
    features = fudfs.lasso_path(np.array([[1, 2, 3], [2, 2, 3], [3, 2, 3],
                                          [4, 2, 3]]),
                                np.array([1, 2, 3, 4]),
                                features_number=2)
    assert len(caplog.records) == 5
    assert caplog.records[4].levelname == 'WARNING'
    assert caplog.records[4].getMessage() == less_lasso_log.format(2, 1)
Exemple #17
0
def test_binary_sampler():
    """
    Tests :func:`fatf.utils.data.instance_augmentation.binary_sampler`.
    """
    fatf.setup_random_seed()

    binary_msg = 'The data_row is not binary.'
    proportions = [0.5, 0., 0.5, 0.5]

    numerical_binary_array = np.array([1, 0, 1, 1])
    numerical_binary_array_sampled = np.array([
        [0, 0, 0, 0],
        [1, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 1, 0]
    ])  # yapf: disable

    struct_dtype = [('a', 'i'), ('b', 'i'), ('c', 'f'), ('d', bool)]
    numerical_binary_struct_array = np.array([(1, 0, 1., True)],
                                             dtype=struct_dtype)
    numerical_binary_struct_array = numerical_binary_struct_array[0]
    numerical_binary_struct_array_sampled = np.array(
        [(1, 0, 0., False),
         (0, 0, 0., True),
         (1, 0, 0., True),
         (1, 0, 1., True),
         (1, 0, 0., False)],
        dtype=struct_dtype)  # yapf: disable

    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(np.array([0, 1, 2, 3]))
    assert str(exin.value) == binary_msg
    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(np.array([0., 0.5, 0.5, 0.2]))
    assert str(exin.value) == binary_msg
    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(CATEGORICAL_STRUCT_ARRAY[0])
    assert str(exin.value) == binary_msg
    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(MIXED_ARRAY[0])
    assert str(exin.value) == binary_msg

    #

    samples = fudi.binary_sampler(numerical_binary_array, samples_number=5)
    assert np.array_equal(samples, numerical_binary_array_sampled)

    samples = fudi.binary_sampler(numerical_binary_array, samples_number=1000)
    assert np.allclose(
        samples.sum(axis=0) / samples.shape[0], proportions, atol=1e-1)

    samples = fudi.binary_sampler(
        numerical_binary_struct_array, samples_number=5)
    assert np.array_equal(samples, numerical_binary_struct_array_sampled)
    assert fuav.are_similar_dtype_arrays(
        np.asarray(numerical_binary_struct_array), samples, True)

    samples = fudi.binary_sampler(
        numerical_binary_struct_array, samples_number=1000)
    for i, name in enumerate(numerical_binary_struct_array.dtype.names):
        assert np.allclose(
            samples[name].sum() / samples[name].shape[0],
            proportions[i],
            atol=1e-1)
    assert fuav.are_similar_dtype_arrays(
        np.asarray(numerical_binary_struct_array), samples, True)
    def test_sample(self):
        """
        Tests :func:`~fatf.utils.data.augmentation.NormalSampling.sample`.
        """
        fatf.setup_random_seed()

        # Pure numerical sampling of a data point
        # ...numpy array results
        samples = self.numerical_np_augmentor.sample(NUMERICAL_NP_ARRAY[0, :],
                                                     samples_number=3)
        assert np.allclose(samples, NUMERICAL_NP_RESULTS, atol=1e-3)

        # ...structured array results
        samples_struct = self.numerical_struct_augmentor.sample(
            NUMERICAL_STRUCT_ARRAY[0], samples_number=3)
        for i in samples_struct.dtype.names:
            assert np.allclose(samples_struct[i],
                               NUMERICAL_STRUCT_RESULTS[i],
                               atol=1e-3)

        # ...numpy array results mean
        samples = self.numerical_np_augmentor.sample(NUMERICAL_NP_ARRAY[0, :],
                                                     samples_number=1000)
        assert np.allclose(samples.mean(axis=0),
                           NUMERICAL_NP_ARRAY[0, :],
                           atol=1e-1)
        assert np.allclose(samples.std(axis=0),
                           NUMERICAL_NP_ARRAY.std(axis=0),
                           atol=1e-1)

        # ...structured array results mean
        samples_struct = self.numerical_struct_augmentor.sample(
            NUMERICAL_STRUCT_ARRAY[0], samples_number=1000)
        for i in samples_struct.dtype.names:
            assert np.allclose(np.mean(samples_struct[i]),
                               NUMERICAL_STRUCT_ARRAY[0][i],
                               atol=1e-1)
            assert np.allclose(np.std(samples_struct[i]),
                               np.std(NUMERICAL_STRUCT_ARRAY[i]),
                               atol=1e-1)

        # Pure numerical sampling of the mean of the data
        # ...numpy array mean
        samples = self.numerical_np_augmentor.sample(samples_number=1000)
        assert np.allclose(samples.mean(axis=0),
                           NUMERICAL_NP_ARRAY.mean(axis=0),
                           atol=1e-1)
        assert np.allclose(samples.std(axis=0),
                           NUMERICAL_NP_ARRAY.std(axis=0),
                           atol=1e-1)

        # ...structured array mean
        samples_struct = self.numerical_struct_augmentor.sample(
            samples_number=1000)
        for i in samples_struct.dtype.names:
            assert np.allclose(np.mean(samples_struct[i]),
                               np.mean(NUMERICAL_STRUCT_ARRAY[i]),
                               atol=1e-1)
            assert np.allclose(np.std(samples_struct[i]),
                               np.std(NUMERICAL_STRUCT_ARRAY[i]),
                               atol=1e-1)

        #######################################################################

        # Numerical sampling with one categorical index defined
        # ...numpy array results
        samples = self.numerical_np_0_augmentor.sample(
            NUMERICAL_NP_ARRAY[0, :], samples_number=3)
        assert np.allclose(samples, NUMERICAL_NP_CAT_RESULTS, atol=1e-3)

        # ...structured array results
        samples_struct = self.numerical_struct_a_augmentor.sample(
            NUMERICAL_STRUCT_ARRAY[0], samples_number=3)
        for i in samples_struct.dtype.names:
            assert np.allclose(samples_struct[i],
                               NUMERICAL_STRUCT_CAT_RESULTS[i],
                               atol=1e-3)

        # ...numpy array results mean
        samples = self.numerical_np_0_augmentor.sample(
            NUMERICAL_NP_ARRAY[0, :], samples_number=100)
        # ......numerical
        assert np.allclose(samples.mean(axis=0)[1:],
                           NUMERICAL_NP_ARRAY[0, 1:],
                           atol=1e-1)
        assert np.allclose(samples.std(axis=0)[1:],
                           NUMERICAL_NP_ARRAY.std(axis=0)[1:],
                           atol=1e-1)
        # ......categorical
        val, freq = np.unique(samples[:, 0], return_counts=True)
        freq = freq / freq.sum()
        assert np.array_equal(val, NUMERICAL_NP_0_CAT_VAL)
        assert np.allclose(freq, NUMERICAL_NP_0_CAT_FREQ, atol=1e-1)

        # ...structured array results mean
        samples_struct = self.numerical_struct_a_augmentor.sample(
            NUMERICAL_STRUCT_ARRAY[0], samples_number=100)
        # ......numerical
        for i in samples_struct.dtype.names[1:]:
            assert np.allclose(np.mean(samples_struct[i]),
                               NUMERICAL_STRUCT_ARRAY[0][i],
                               atol=1e-1)
            assert np.allclose(np.std(samples_struct[i]),
                               np.std(NUMERICAL_STRUCT_ARRAY[i]),
                               atol=1e-1)
        # ......categorical
        val_struct, freq_struct = np.unique(samples_struct['a'],
                                            return_counts=True)
        freq_struct = freq_struct / freq_struct.sum()
        assert np.array_equal(val_struct, NUMERICAL_NP_0_CAT_VAL)
        assert np.allclose(freq_struct, NUMERICAL_NP_0_CAT_FREQ, atol=1e-1)

        # ...numpy array mean
        samples = self.numerical_np_0_augmentor.sample(samples_number=1000)
        # ......numerical
        assert np.allclose(samples.mean(axis=0)[1:],
                           NUMERICAL_NP_ARRAY.mean(axis=0)[1:],
                           atol=1e-1)
        # ......categorical
        val, freq = np.unique(samples[:, 0], return_counts=True)
        freq = freq / freq.sum()
        assert np.array_equal(val, NUMERICAL_NP_0_CAT_VAL)
        assert np.allclose(freq, NUMERICAL_NP_0_CAT_FREQ, atol=1e-1)

        # ...structured array mean
        samples_struct = self.numerical_struct_a_augmentor.sample(
            samples_number=1000)
        # ......numerical
        for i in samples_struct.dtype.names[1:]:
            assert np.allclose(np.mean(samples_struct[i]),
                               np.mean(NUMERICAL_STRUCT_ARRAY[i]),
                               atol=1e-1)
            assert np.allclose(np.std(samples_struct[i]),
                               np.std(NUMERICAL_STRUCT_ARRAY[i]),
                               atol=1e-1)
        # ......categorical
        val_struct, freq_struct = np.unique(samples_struct['a'],
                                            return_counts=True)
        freq_struct = freq_struct / freq_struct.sum()
        assert np.array_equal(val_struct, NUMERICAL_NP_0_CAT_VAL)
        assert np.allclose(freq_struct, NUMERICAL_NP_0_CAT_FREQ, atol=1e-1)

        #######################################################################
        #######################################################################

        # Pure categorical sampling
        # ...numpy array
        samples = self.categorical_np_012_augmentor.sample(
            CATEGORICAL_NP_ARRAY[0], samples_number=3)
        assert np.array_equal(samples, CATEGORICAL_NP_RESULTS)

        # ...structured array
        samples_struct = self.categorical_struct_abc_augmentor.sample(
            CATEGORICAL_STRUCT_ARRAY[0], samples_number=3)
        assert np.array_equal(samples_struct, CATEGORICAL_STRUCT_RESULTS)

        vals = [['a', 'b'], ['b', 'c', 'f'], ['c', 'g']]
        # ...numpy array proportions and values
        samples = self.categorical_np_012_augmentor.sample(
            CATEGORICAL_NP_ARRAY[0], samples_number=100)
        #
        proportions = [
            np.array([0.62, 0.38]),
            np.array([0.31, 0.17, 0.52]),
            np.array([0.63, 0.37])
        ]
        for i, index in enumerate([0, 1, 2]):
            val, freq = np.unique(samples[:, index], return_counts=True)
            freq = freq / freq.sum()
            assert np.array_equal(val, vals[i])
            assert np.allclose(freq, proportions[i], atol=1e-2)

        # ...structured array proportions and values
        samples_struct = self.categorical_struct_abc_augmentor.sample(
            CATEGORICAL_STRUCT_ARRAY[0], samples_number=100)
        #
        proportions = [
            np.array([0.74, 0.26]),
            np.array([0.38, 0.12, 0.50]),
            np.array([0.63, 0.37])
        ]
        for i, index in enumerate(['a', 'b', 'c']):
            val, freq = np.unique(samples_struct[index], return_counts=True)
            freq = freq / freq.sum()
            assert np.array_equal(val, vals[i])
            assert np.allclose(freq, proportions[i], atol=1e-2)

        # No need to check for mean of dataset since categorical features are
        # sampled from the distribution of the entire dataset and not centered
        # on the data_row.

        #######################################################################
        #######################################################################

        # Mixed array with categorical indices auto-discovered
        vals = [['a', 'c', 'f'], ['a', 'aa', 'b', 'bb']]
        proportions = [
            np.array([0.33, 0.33, 0.33]),
            np.array([0.33, 0.16, 0.16, 0.33])
        ]
        # Instance
        samples = self.mixed_augmentor.sample(MIXED_ARRAY[0], samples_number=3)
        # ...categorical
        assert np.array_equal(samples[['b', 'd']], MIXED_RESULTS[['b', 'd']])
        # ...numerical
        for i in ['a', 'c']:
            assert np.allclose(samples[i], MIXED_RESULTS[i], atol=1e-3)

        # Instance mean
        samples = self.mixed_augmentor.sample(MIXED_ARRAY[0],
                                              samples_number=1000)
        # ...numerical
        for i in ['a', 'c']:
            assert np.allclose(np.mean(samples[i]),
                               MIXED_ARRAY[0][i],
                               atol=1e-1)
            assert np.allclose(np.std(samples[i]),
                               np.std(MIXED_ARRAY[i]),
                               atol=1e-1)
        # ...categorical
        for i, index in enumerate(['b', 'd']):
            val, freq = np.unique(samples[index], return_counts=True)
            freq = freq / freq.sum()
            assert np.array_equal(val, vals[i])
            assert np.allclose(freq, proportions[i], atol=1e-1)

        # Dataset mean
        samples = self.mixed_augmentor.sample(samples_number=1000)
        # ...numerical
        for i in ['a', 'c']:
            assert np.allclose(np.mean(samples[i]),
                               np.mean(MIXED_ARRAY[i]),
                               atol=1e-1)
            assert np.allclose(np.std(samples[i]),
                               np.std(MIXED_ARRAY[i]),
                               atol=1e-1)
        # ...categorical
        for i, index in enumerate(['b', 'd']):
            val, freq = np.unique(samples[index], return_counts=True)
            freq = freq / freq.sum()
            assert np.array_equal(val, vals[i])
            assert np.allclose(freq, proportions[i], atol=1e-1)

        #######################################################################

        # Sample without float cast
        samples = self.numerical_struct_augmentor_f.sample(samples_number=5)
        samples_answer = np.array(
            [(-1, 0, 0.172, 0.624),
             (1, 1, 0.343, 0.480),
             (0, 0, 0.649, 0.374),
             (0, 0, 0.256, 0.429),
             (0, 0, 0.457, 0.743)],
            dtype=NUMERICAL_STRUCT_ARRAY.dtype)  # yapf: disable
        for i in ['a', 'b', 'c', 'd']:
            assert np.allclose(samples[i], samples_answer[i], atol=1e-3)

        # Cast to float on in the tests to compare (this ouput was generated
        # with self.numerical_struct_augmentor)
        samples = self.numerical_struct_augmentor_f.sample(samples_number=5)
        samples_answer = np.array(
            [(1.250, 0.264, 0.381, 0.479),
             (-0.181, 1.600, 0.602, 0.345),
             (0.472, 0.609, -0.001, 1.026),
             (0.105, 1.091, 0.384, 0.263),
             (1.263, -0.007, 0.762, 0.603)],
            dtype=NUMERICAL_STRUCT_ARRAY.dtype)  # yapf: disable
        for i in ['a', 'b', 'c', 'd']:
            assert np.allclose(samples[i], samples_answer[i], atol=1e-3)