Beispiel #1
0
def _choose_structured_to_unstructured() -> bool:
    """
    Decides which implementation of ``structured_to_unstructured`` to use.

    See :func:`fatf.utils.tools.structured_to_unstructured` function
    description for details on how the choice is made.

    Returns
    -------
    use_local_implementation : boolean
        ``True`` if local implementation
        (:func:`fatf.utils.tools.fatf_structured_to_unstructured) is to be
        used. ``False`` if numpy's implementation
        (:func:`numpy.lib.recfunctions.structured_to_unstructured`) is to be
        used.
    """
    use_local_implementation = True
    np_ver = [int(i) for i in np.version.version.split('.')]
    # Use builtin numpy if it is implemented therein
    if fut.at_least_verion([1, 16], np_ver):
        logger.info("Using numpy's numpy.lib.recfunctions."
                    'structured_to_unstructured as fatf.utils.array.tools.'
                    'structured_to_unstructured and fatf.utils.array.tools.'
                    'structured_to_unstructured_row.')
        use_local_implementation = False
    else:
        logger.info("Using fatf's fatf.utils.array.tools."
                    'fatf_structured_to_unstructured as fatf.utils.'
                    'array.tools.structured_to_unstructured and fatf.utils.'
                    'array.tools.fatf_structured_to_unstructured_row as '
                    'fatf.utils.array.tools.structured_to_unstructured_row.')
        use_local_implementation = True
    return use_local_implementation
# License: new BSD

import csv
import os

from typing import Dict, List, Tuple, Union

import numpy as np

import fatf.utils.tools as fut
import fatf.utils.array.validation as fuav

__all__ = ['load_data', 'load_health_records', 'load_iris']

_NUMPY_VERSION = [int(i) for i in np.version.version.split('.')]
_NUMPY_1_14 = fut.at_least_verion([1, 14], _NUMPY_VERSION)

_DATA_PATH = os.path.join(os.path.dirname(__file__), 'datasets')


def _validate_data_header(X: np.ndarray, y: np.ndarray, n_samples: int,
                          n_features: int, y_names: np.ndarray) -> bool:
    """
    Checks if reading in data is consistent by ... the csv header.

    For details on valid header formatting see the
    :func:`fatf.utils.datasets.load_data` documentation.

    Parameters
    ----------
    X : numpy.ndarray
def test_at_least_verion():
    """
    Tests :func:`fatf.utils.tools.at_least_verion` function.
    """
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements
    # Wrong outer input types
    min_type_error = 'minimum_requirement parameter has to be a list.'
    min_element_type_error = ('{} element ({}) of the minimum_requirement '
                              'list is not an integer.')
    curr_element_type_error = ('{} element ({}) of the package_version list '
                               'is not an integer.')
    current_type_error = 'package_version parameter has to be a list.'
    min_value_error = 'Minimum version for a package is not specified.'
    current_value_error = 'Current version for a package is not specified.'
    length_value_error = ('The minimum requirement should not be more precise '
                          '(longer) than the current version.')
    wrong_outer_types = [None, 0, '0', {}, range(5)]
    wrong_inner_types = [[None, None], ['0', '0'], [1., '0'], [2., 2.],
                         [1. + 0j, 6 + 1j, 2.]]
    partially_wrong_inner_type_1 = [[None, 0], ['0', 0]]
    partially_wrong_inner_type_2 = [[0, '0'], [0, None]]
    correct_inner_types = [[0], [0, 0]]
    empty_inner_type = [[]]

    for i in wrong_outer_types:
        for j in wrong_outer_types:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_type_error

        for j in wrong_inner_types:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_type_error
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == min_element_type_error.format(0, j[0])

        for j in partially_wrong_inner_type_1:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_type_error
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == min_element_type_error.format(0, j[0])

        for j in partially_wrong_inner_type_2:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_type_error
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == min_element_type_error.format(1, j[1])

        for j in correct_inner_types:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_type_error
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == current_type_error

        for j in empty_inner_type:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_type_error
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == current_type_error

    for i in wrong_inner_types:
        for j in wrong_inner_types:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])

        for j in partially_wrong_inner_type_1:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == min_element_type_error.format(0, j[0])

        for j in partially_wrong_inner_type_2:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == min_element_type_error.format(1, j[1])

        for j in correct_inner_types:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == curr_element_type_error.format(0, i[0])

        for j in empty_inner_type:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == curr_element_type_error.format(0, i[0])

    for i in partially_wrong_inner_type_1:
        for j in partially_wrong_inner_type_1:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])

        for j in partially_wrong_inner_type_2:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == min_element_type_error.format(1, j[1])

        for j in correct_inner_types:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == curr_element_type_error.format(0, i[0])

        for j in empty_inner_type:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(0, i[0])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == curr_element_type_error.format(0, i[0])

    for i in partially_wrong_inner_type_2:
        for j in partially_wrong_inner_type_2:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(1, i[1])

        for j in correct_inner_types:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(1, i[1])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == curr_element_type_error.format(1, i[1])

        for j in empty_inner_type:
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_element_type_error.format(1, i[1])
            with pytest.raises(TypeError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == curr_element_type_error.format(1, i[1])

    assert len(correct_inner_types) == 2, \
        'Testing correct_inner_types just for 2 elements.'
    assert fut.at_least_verion(correct_inner_types[0], correct_inner_types[1])
    with pytest.raises(ValueError) as exin:
        assert fut.at_least_verion(correct_inner_types[1],
                                   correct_inner_types[0])
    assert str(exin.value) == length_value_error
    for i in correct_inner_types:
        assert fut.at_least_verion(i, i)

        for j in empty_inner_type:
            with pytest.raises(ValueError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == current_value_error
            with pytest.raises(ValueError) as exin:
                assert fut.at_least_verion(j, i)
            assert str(exin.value) == min_value_error

    for i in empty_inner_type:
        for j in empty_inner_type:
            with pytest.raises(ValueError) as exin:
                assert fut.at_least_verion(i, j)
            assert str(exin.value) == min_value_error

    # Correct outer, correct inner, different lengths
    # Current shorter than required and satisfies
    assert fut.at_least_verion([1], [1, 4, 2])
    assert fut.at_least_verion([1, 6], [1, 6, 0])
    # Current shorter than required and does not satisfy
    assert not fut.at_least_verion([1], [0, 4, 2])
    # Current longer than required and satisfies
    with pytest.raises(ValueError) as exin:
        assert fut.at_least_verion([1, 0, 0], [1, 0])
    assert str(exin.value) == length_value_error
    # Current longer than required and does not satisfy
    with pytest.raises(ValueError) as exin:
        assert fut.at_least_verion([1, 4, 2], [0, 5])
    assert str(exin.value) == length_value_error

    # Correct outer, correct inner, same lengths
    # Correct
    assert fut.at_least_verion([1, 4, 2], [1, 4, 2])
    assert fut.at_least_verion([1, 6, 2], [1, 7, 1])
    assert fut.at_least_verion([1, 6, 2], [2, 0, 0])
    # Incorrect
    assert not fut.at_least_verion([2, 0, 0], [1, 9, 9])
Beispiel #4
0
# License: new BSD

from typing import Union

import sklearn.linear_model
import sklearn.utils.validation

import numpy as np

import fatf.transparency.sklearn.tools as ftst
import fatf.utils.tools as fut

__all__ = ['linear_classifier_coefficients', 'SKLearnLinearModelExplainer']

_SKLEARN_VERSION = [int(i) for i in sklearn.__version__.split('.')[:2]]
_SKLEARN_0_22 = fut.at_least_verion([0, 22], _SKLEARN_VERSION)

if _SKLEARN_0_22:  # pragma: nocover
    # pylint: disable=invalid-name,protected-access,no-member
    _linear_base = sklearn.linear_model._base
    _linear_coordinate_descent = sklearn.linear_model._coordinate_descent
    _lienar_stochastic_gradient = sklearn.linear_model._stochastic_gradient
    _linear_bayes = sklearn.linear_model._bayes
    _linear_theil = sklearn.linear_model._theil_sen
    _linear_omp = sklearn.linear_model._omp
    _linear_ridge = sklearn.linear_model._ridge
    _linear_angles = sklearn.linear_model._least_angle
else:  # pragma: nocover
    _linear_base = sklearn.linear_model.base  # pylint: disable=invalid-name
    # pylint: disable=invalid-name
    _linear_coordinate_descent = sklearn.linear_model.coordinate_descent
"""
# Author: Kacper Sokol <*****@*****.**>
#         Rafael Poyiadzi <*****@*****.**>
# License: new BSD

import pytest

import numpy as np

import fatf.transparency.data.describe_functions as ftddf
import fatf.utils.tools as fut

from fatf.exceptions import IncorrectShapeError

_NUMPY_VERSION = [int(i) for i in np.version.version.split('.')]
_NUMPY_1_16 = fut.at_least_verion([1, 16], _NUMPY_VERSION)
_NUMPY_1_14_4 = fut.at_least_verion([1, 14, 4], _NUMPY_VERSION)
_NUMPY_1_11 = fut.at_least_verion([1, 11], _NUMPY_VERSION)
_NUMPY_1_10 = fut.at_least_verion([1, 10], _NUMPY_VERSION)

NUMERICAL_KEYS = [
    'count', 'mean', 'std', 'max', 'min', '25%', '50%', '75%', 'nan_count'
]
CATEGORICAL_KEYS = [
    'count', 'unique', 'unique_counts', 'top', 'freq', 'is_top_unique'
]


def test_describe_numerical_array():
    """
    Tests :func:`fatf.transparency.data.describe.describe_numerical_array`.
Beispiel #6
0
import sklearn.exceptions
import sklearn.linear_model
import sklearn.naive_bayes
import sklearn.neighbors
import sklearn.svm
import sklearn.tree

import numpy as np

import fatf

import fatf.transparency.sklearn.linear_model as ftsl
import fatf.utils.tools as fut

_SKLEARN_VERSION = [int(i) for i in sklearn.__version__.split('.')[:2]]
_SKLEARN_0_20 = fut.at_least_verion([0, 20], _SKLEARN_VERSION)
_SKLEARN_0_22 = fut.at_least_verion([0, 22], _SKLEARN_VERSION)
_SKLEARN_0_23 = fut.at_least_verion([0, 23], _SKLEARN_VERSION)

# yapf: disable
LINEAR_CLASSIFIERS = [
    sklearn.svm.LinearSVC,
    sklearn.linear_model.RidgeClassifier,
    sklearn.linear_model.RidgeClassifierCV,
    sklearn.discriminant_analysis.LinearDiscriminantAnalysis,
    #
    sklearn.linear_model.LogisticRegression,
    sklearn.linear_model.LogisticRegressionCV
]
# These three linear models get different results in different Python versions,
# hence will not be tested for parameters.
Beispiel #7
0
           'is_textual_dtype',
           'is_base_dtype',
           'is_flat_dtype',
           'are_similar_dtypes',
           'are_similar_dtype_arrays',
           'is_numerical_array',
           'is_textual_array',
           'is_base_array',
           'is_1d_array',
           'is_2d_array',
           'is_structured_row',
           'is_1d_like',
           'is_structured_array']  # yapf: disable

_NUMPY_VERSION = [int(i) for i in np.version.version.split('.')]
_NUMPY_1_13 = fut.at_least_verion([1, 13], _NUMPY_VERSION)

# Unsigned byte, Boolean, (signed) byte -- Boolean, unsigned integer,
# (signed) integer, floating-point and complex-floating point.
_NUMPY_NUMERICAL_KINDS = set('B?buifc')
# Unicode string
_NUMPY_TEXTUAL_KINDS = set('U')
# Zero-terminated bytes
_NUMPY_TEXTUAL_KINDS_UNSUPPORTED = set('Sa')
# O, M, m and V are considered complex objects
_NUMPY_BASE_KINDS = set('?buifcBSaU')


def is_numerical_dtype(dtype: np.dtype) -> bool:
    """
    Determines whether a numpy dtype object is of numerical type.