Exemple #1
0
def test_get_required_parameters_number():
    """
    Tests :func:`fatf.utils.validation.get_required_parameters_number`.
    """
    type_error = ('The callable_object should be Python callable, e.g., a '
                  'function or a method.')

    with pytest.raises(TypeError) as error:
        fuv.get_required_parameters_number('callable')
    assert str(error.value) == type_error

    def function1():
        pass  # pragma: no cover

    def function2(x):
        pass  # pragma: no cover

    def function3(x, y):
        pass  # pragma: no cover

    def function4(x, y=3):
        pass  # pragma: no cover

    def function5(x=3, y=3):
        pass  # pragma: no cover

    def function6(x, **kwargs):
        pass  # pragma: no cover

    assert fuv.get_required_parameters_number(function1) == 0
    assert fuv.get_required_parameters_number(function2) == 1
    assert fuv.get_required_parameters_number(function3) == 2
    assert fuv.get_required_parameters_number(function4) == 1
    assert fuv.get_required_parameters_number(function5) == 0
    assert fuv.get_required_parameters_number(function6) == 1
Exemple #2
0
def check_distance_functionality(distance_function: Callable[..., np.ndarray],
                                 suppress_warning: bool = False) -> bool:
    """
    Checks whether a distance function takes exactly 2 required parameters.

    .. versionadded:: 0.0.2

    The distance function to be checked should calculate a distance matrix
    (2-dimensional numpy array) between all of the rows of the two
    2-dimensional numpy arrays passed as input to the ``distance_function``.

    Parameters
    ----------
    distance_function : Callable[[numpy.ndarray, numpy.ndarray, ...], \
numpy.ndarray]
        A function that calculates a distance matrix between all pairs of rows
        of the two input arrays.
    suppress_warning : boolean, optional (default=False)
        A boolean parameter that indicates whether the function should suppress
        its warning message. Defaults to False.

    Warns
    -----
    UserWarning
        Warns about the details of the required functionality that the distance
        function lacks.

    Raises
    ------
    TypeError
        The ``distance_function`` parameter is not a Python callable or the
        ``suppress_warning`` parameter is not a boolean.

    Returns
    -------
    is_functional : boolean
        A boolean variable that indicates whether the distance function is
        valid.
    """
    if not callable(distance_function):
        raise TypeError('The distance_function parameter should be a Python '
                        'callable.')
    if not isinstance(suppress_warning, bool):
        raise TypeError('The suppress_warning parameter should be a boolean.')

    required_param_n = fuv.get_required_parameters_number(distance_function)
    is_functional = required_param_n == 2

    if not is_functional and not suppress_warning:
        message = ("The '{}' distance function has incorrect number "
                   '({}) of the required parameters. It needs to have '
                   'exactly 2 required parameters. Try using optional '
                   'parameters if you require more functionality.').format(
                       distance_function.__name__, required_param_n)
        warnings.warn(message, category=UserWarning)

    return is_functional
Exemple #3
0
def check_kernel_functionality(kernel_function: Callable[..., np.ndarray],
                               suppress_warning: bool = False) -> bool:
    """
    Checks whether a kernel function has exactly one required parameter.

    .. versionadded:: 0.0.2

    Parameters
    ----------
    kernel_function : Callable[[numpy.ndarray, ...], numpy.ndarray]
        A Python callable, e.g., a function or a method, which represents a
        kernel function.
    suppress_warning : boolean, optional (default=False)
        A boolean parameter that indicates whether the function should suppress
        its warning message. Defaults to False.

    Warns
    -----
    UserWarning
        Warns about the details of the required functionality that the kernel
        function lacks.

    Raises
    ------
    TypeError
        The ``kernel_function`` parameter is not a Python callable or the
        ``suppress_warning`` parameter is not a boolean.

    Returns
    -------
    is_functional : boolean
        A boolean variable that indicates whether the kernel function is valid.
    """
    if not callable(kernel_function):
        raise TypeError('The kernel_function parameter should be a Python '
                        'callable.')
    if not isinstance(suppress_warning, bool):
        raise TypeError('The suppress_warning parameter should be a boolean.')

    required_param_n = fuv.get_required_parameters_number(kernel_function)
    is_functional = required_param_n == 1

    if not is_functional and not suppress_warning:
        message = ("The '{}' kernel function has incorrect number ({}) of the "
                   'required parameters. It needs to have exactly 1 required '
                   'parameter. Try using optional parameters if you require '
                   'more functionality.').format(kernel_function.__name__,
                                                 required_param_n)
        warnings.warn(message, category=UserWarning)

    return is_functional
def _validate_input(dataset: np.ndarray, explain_instance: Callable,
                    sample_size: int, explanations_number: int) -> bool:
    """
    Validates input for submodular pick.

    For the input parameters description, warnings and exceptions please see
    the documentation of the :func:`fatf.transparency.models.submodular_pick`
    function.

    Returns
    -------
    is_valid : boolean
        ``True`` if the input is valid, ``False`` otherwise.
    """
    is_valid = False

    if not fuav.is_2d_array(dataset):
        raise IncorrectShapeError(
            'The input data set must be a 2-dimensional array.')
    if not fuav.is_base_array(dataset):
        raise ValueError('The input data set must only contain base types '
                         '(strings and numbers).')

    if not isinstance(sample_size, int):
        raise TypeError('sample_size must be an integer.')
    if sample_size < 0:
        raise ValueError('sample_size must be a non-negative integer.')

    if not isinstance(explanations_number, int):
        raise TypeError('explanations_number must be an integer.')
    if explanations_number is not None and explanations_number < 0:
        raise ValueError('explanations_number must be a non-negative integer.')

    if (sample_size and explanations_number
            and sample_size < explanations_number):
        raise ValueError('The number of explanations cannot be larger than '
                         'the number of samples.')

    if callable(explain_instance):
        params_n = fuv.get_required_parameters_number(explain_instance)
        if params_n != 1:
            raise RuntimeError('The explain_instance callable must accept '
                               'exactly one required parameter.')
    else:
        raise TypeError('The explain_instance should be a Python callable '
                        '(function or method).')

    is_valid = True
    return is_valid
def batch_data(data: np.ndarray,
               batch_size: int = 50,
               transformation_fn: Callable = None) -> np.ndarray:
    """
    Slices ``data`` into batches and returns then sequentially.

    .. versionadded:: 0.1.1

    Since some data may be too large to fit into memory as whole,
    this function slices them into batches and yields them sequentially.
    If desired, each batch can be processed by ``transformation_fn``
    prior to returning it.

    Parameters
    ----------
    data : numpy.ndarray
        A two dimensional numpy array (either classic or structured) to be
        sliced into batches.
    batch_size : integer, optional (default=50)
        The size (number of rows) of each batch.
    transformation_fn : callable, optional (default=None)
        A callable object to apply to each batch before returning it.
        It must have exactly one required parameter.

    Raises
    ------
    IncorrectShapeError
        The ``data`` array is not 2-dimensional.
    RuntimeError
        The transformation function does not have exactly one required
        parameter.
    TypeError
        The ``batch_size`` is not an integer or the ``transformation_fn`` is
        not a callable object.
    ValueError
        The ``batch_size`` is smaller than 1.

    Yields
    ------
    slice : numpy.ndarray
        A slice of data.
    """
    if not fuav.is_2d_array(data):
        raise IncorrectShapeError('The data array must be 2-dimensional.')
    if fuav.is_structured_array(data):
        slice_fn = lambda d, a, b: d[a:b]  # noqa: E731
    else:
        slice_fn = lambda d, a, b: d[a:b, :]  # noqa: E731

    if not isinstance(batch_size, int):
        raise TypeError('The batch size must be an integer.')
    if batch_size < 1:
        raise ValueError('The batch size must be larger than 0.')

    if transformation_fn is None:
        transformation_fn = lambda slice: slice  # noqa: E731
    else:
        if not callable(transformation_fn):
            raise TypeError(
                'The transformation function must be a callable object.')
        required_params = fuv.get_required_parameters_number(transformation_fn)
        if required_params != 1:
            raise RuntimeError(
                'The transformation function must have only one required '
                'parameter; now it has {}.'.format(required_params))

    n_rows = data.shape[0]

    def _batch_data():
        for i_start in np.arange(0, n_rows, batch_size):
            i_end = np.min([i_start + batch_size, n_rows])
            data_slice_ = slice_fn(data, i_start, i_end)
            data_slice = transformation_fn(data_slice_)
            yield data_slice

    return _batch_data()
Exemple #6
0
def _validate_input_local_fidelity(
        dataset: np.ndarray, data_row: Union[np.ndarray, np.void],
        global_predictive_function: PredictiveFunctionType,
        local_predictive_function: PredictiveFunctionType,
        metric_function: Callable[[np.ndarray, np.ndarray], float],
        explained_class_index: Union[int, None],
        explained_feature_indices: Union[List[IndexType], None],
        fidelity_radius_percentage: int, samples_number: int) -> bool:
    """
    Validates the input parameters for the ``local_fidelity_score`` function.

    This function validates input parameter of the
    :func:`fatf.utils.transparency.surrogate_evaluation.local_fidelity_score`
    function. The description of this function's input parameters, errors and
    exceptions can be found therein.

    Returns
    -------
    is_input_ok : boolean
        ``True`` if the input is valid, ``False`` otherwise.
    """
    # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
    is_input_ok = False

    if not fuav.is_2d_array(dataset):
        raise IncorrectShapeError('The input dataset must be a '
                                  '2-dimensional numpy array.')
    if not fuav.is_base_array(dataset):
        raise TypeError('The input dataset must be of a base type -- numbers '
                        'and/or strings.')

    if not fuav.is_1d_like(data_row):
        raise IncorrectShapeError('The data_row must either be a '
                                  '1-dimensional numpy array or a numpy '
                                  'void object for structured data rows.')

    are_similar = fuav.are_similar_dtype_arrays(dataset, np.array([data_row]))
    if not are_similar:
        raise TypeError('The dtype of the data_row is too different from '
                        'the dtype of the dataset array.')

    # If the dataset is structured and the data_row has a different
    # number of features this will be caught by the above dtype check.
    # For classic numpy arrays this has to be done separately.
    if not fuav.is_structured_array(dataset):
        if dataset.shape[1] != data_row.shape[0]:
            raise IncorrectShapeError('The data_row must contain the same '
                                      'number of features as the dataset.')

    if callable(global_predictive_function):
        global_params_n = fuv.get_required_parameters_number(
            global_predictive_function)
        if global_params_n != 1:
            raise IncompatibleModelError(
                'The global predictive function must have exactly *one* '
                'required parameter to work with this metric.')
    else:
        raise TypeError('The global_predictive_function should be a Python '
                        'callable, e.g., a Python function.')

    if callable(local_predictive_function):
        local_params_n = fuv.get_required_parameters_number(
            local_predictive_function)
        if local_params_n != 1:
            raise IncompatibleModelError(
                'The local predictive function must have exactly *one* '
                'required parameter to work with this metric.')
    else:
        raise TypeError('The local_predictive_function should be a Python '
                        'callable, e.g., a Python function.')

    if callable(metric_function):
        if fuv.get_required_parameters_number(metric_function) != 2:
            raise TypeError('The metric_function must take exactly *two* '
                            'required parameters.')
    else:
        raise TypeError('The metric_function should be a Python callable, '
                        'e.g., a Python function.')

    # Explained class index
    global_prediction = global_predictive_function(dataset[:1])
    assert not fuav.is_structured_array(global_prediction), 'Must be plain.'
    assert global_prediction.shape[0] == 1, 'Just 1 data point was predicted.'
    if fuav.is_2d_array(global_prediction):  # A probabilistic model.
        if explained_class_index is not None:
            if isinstance(explained_class_index, int):
                if (explained_class_index >= global_prediction.shape[1]
                        or explained_class_index < 0):
                    raise ValueError('The explained_class_index parameter is '
                                     'negative or larger than the number of '
                                     'classes output by the global '
                                     'probabilistic model.')
            else:
                raise TypeError('For probabilistic global models, i.e., '
                                'global predictive functions, the '
                                'explained_class_index parameter has to be an '
                                'integer or None.')
    elif fuav.is_1d_array(global_prediction):
        if explained_class_index is not None:
            warnings.warn(
                'The explained_class_index parameter is not None and will be '
                'ignored since the global model is not probabilistic.',
                UserWarning)
    else:
        assert False, ('Global predictor must output a 1- or 2-dimensional '
                       'numpy array.')  # pragma: nocover

    if explained_feature_indices is not None:
        if isinstance(explained_feature_indices, list):
            invalid_indices = fuat.get_invalid_indices(
                dataset, np.asarray(explained_feature_indices))
            if invalid_indices.size:
                raise IndexError(
                    'The following column indices are invalid for the input '
                    'dataset: {}.'.format(invalid_indices))
        else:
            raise TypeError('The explained_feature_indices parameter must be '
                            'a Python list or None.')

    if isinstance(fidelity_radius_percentage, int):
        if fidelity_radius_percentage <= 0 or fidelity_radius_percentage > 100:
            raise ValueError('The fidelity_radius_percentage must be an '
                             'integer between 1 and 100.')
    else:
        raise TypeError('The fidelity_radius_percentage must be an integer '
                        'between 1 and 100.')

    if isinstance(samples_number, int):
        if samples_number < 1:
            raise ValueError('The samples_number must be a positive integer.')
    else:
        raise TypeError('The samples_number must be an integer.')

    is_input_ok = True
    return is_input_ok