Python are_similar_dtype_arraysの例、fatf.utils.array.validation.are_similar_dtype_arrays Pythonの例

コード例 #1

0

ファイルを表示

    def _validate_data_point(self, data_point: DataRow, clip: bool) -> bool:
        """
        Validates input parameters of the ``score_data_point`` method.

        Parameters
        ----------
        data_point : Union[numpy.array, numpy.void]
            A data row. For numpy arrays this will be a numpy ndarray. For
            structured numpy arrays this will be numpy void.

        Raises
        ------
        IncorrectShapeError
            The data point is not 1-dimensional numpy array (either numpy
            ndarray for classic numpy arrays or numpy void for structured numpy
            arrays). The data point does not have the same number of columns
            (features) as the data set used to initialise this class.
        TypeError
            The data point is not of a base type (strings and/or numbers). The
            dtype of the data point is too different from the dtype of the
            data set used to initialise this class. The ``clip`` parameter is
            not a boolean.

        Returns
        -------
        is_valid : boolean
            ``True`` if the input parameters are valid, ``False`` otherwise.
        """
        is_valid = False

        if not fuav.is_1d_like(data_point):
            raise IncorrectShapeError('The data point has to be 1-dimensional '
                                      'numpy array or numpy void (for '
                                      'structured arrays).')
        data_point_array = np.asarray([data_point])
        if not fuav.is_base_array(data_point_array):
            raise TypeError('The data point has to be of a base type (strings '
                            'and/or numbers).')
        if not fuav.are_similar_dtype_arrays(self.data_set, data_point_array):
            raise TypeError('The dtypes of the data set used to initialise '
                            'this class and the provided data point are too '
                            'different.')
        # Testing only for unstructured as the dtype comparison picks up on a
        # different number of columns in a structured array
        if not self._is_structured:
            if self.data_set.shape[1] != data_point_array.shape[1]:
                raise IncorrectShapeError('The data point has different '
                                          'number of columns (features) than '
                                          'the data set used to initialise '
                                          'this class.')

        if not isinstance(clip, bool):
            raise TypeError('The clip parameter has to be a boolean.')

        is_valid = True
        return is_valid

コード例 #2

0

ファイルを表示

ファイル: transformation.py プロジェクト: alexhepburn/fat-forensics-broken

def _validate_input_drm(dataset: np.ndarray, data_row: Union[np.ndarray,
                                                             np.void]) -> bool:
    """
    Validates :func:`fatf.utils.data.transformation.dataset_row_masking` input.

    This function checks if ``dataset`` is a 2-dimensional array and if
    ``data_row`` is a 1-dimensional array of the same length as the number of
    columns in the ``dataset``. It also checks if they have valid and
    compatible dtypes.

    For the description of input parameters, and warnings and exceptions raised
    by this function please see the  documentation of the
    :func:`fatf.utils.data.transformation.dataset_row_masking` function.

    Returns
    -------
    is_valid : boolean
        ``True`` if input is valid, ``False`` otherwise.
    """
    is_valid = False

    if not fuav.is_2d_array(dataset):
        raise IncorrectShapeError('The input dataset must be a 2-dimensional '
                                  'numpy array.')
    if not fuav.is_base_array(dataset):
        raise TypeError('The input dataset must be of a base type -- text, '
                        'numbers or mixture of the two.')

    if not fuav.is_1d_like(data_row):
        raise IncorrectShapeError('The data row must either be a '
                                  '1-dimensional numpy array or a numpy void '
                                  'object for structured rows.')

    # For structured arrays the dtype check also checks the number of columns
    are_similar = fuav.are_similar_dtype_arrays(dataset,
                                                np.array([data_row]),
                                                strict_comparison=False)
    if not are_similar:
        raise TypeError('The dtype of the data row is too different from the '
                        'dtype of the dataset provided.')

    # Since the types agree both, the row and the data set, have to be
    # structured or plane
    if not fuav.is_structured_array(dataset):
        if dataset.shape[1] != data_row.shape[0]:
            raise IncorrectShapeError('The data row must contain the same '
                                      'number of elements as the number of '
                                      'columns in the provided dataset.')

    is_valid = True
    return is_valid

コード例 #3

0

ファイルを表示

def test_binary_sampler():
    """
    Tests :func:`fatf.utils.data.instance_augmentation.binary_sampler`.
    """
    fatf.setup_random_seed()

    binary_msg = 'The data_row is not binary.'
    proportions = [0.5, 0., 0.5, 0.5]

    numerical_binary_array = np.array([1, 0, 1, 1])
    numerical_binary_array_sampled = np.array([
        [0, 0, 0, 0],
        [1, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 0, 1],
        [0, 0, 1, 0]
    ])  # yapf: disable

    struct_dtype = [('a', 'i'), ('b', 'i'), ('c', 'f'), ('d', bool)]
    numerical_binary_struct_array = np.array([(1, 0, 1., True)],
                                             dtype=struct_dtype)
    numerical_binary_struct_array = numerical_binary_struct_array[0]
    numerical_binary_struct_array_sampled = np.array(
        [(1, 0, 0., False),
         (0, 0, 0., True),
         (1, 0, 0., True),
         (1, 0, 1., True),
         (1, 0, 0., False)],
        dtype=struct_dtype)  # yapf: disable

    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(np.array([0, 1, 2, 3]))
    assert str(exin.value) == binary_msg
    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(np.array([0., 0.5, 0.5, 0.2]))
    assert str(exin.value) == binary_msg
    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(CATEGORICAL_STRUCT_ARRAY[0])
    assert str(exin.value) == binary_msg
    with pytest.raises(ValueError) as exin:
        fudi.binary_sampler(MIXED_ARRAY[0])
    assert str(exin.value) == binary_msg

    #

    samples = fudi.binary_sampler(numerical_binary_array, samples_number=5)
    assert np.array_equal(samples, numerical_binary_array_sampled)

    samples = fudi.binary_sampler(numerical_binary_array, samples_number=1000)
    assert np.allclose(
        samples.sum(axis=0) / samples.shape[0], proportions, atol=1e-1)

    samples = fudi.binary_sampler(
        numerical_binary_struct_array, samples_number=5)
    assert np.array_equal(samples, numerical_binary_struct_array_sampled)
    assert fuav.are_similar_dtype_arrays(
        np.asarray(numerical_binary_struct_array), samples, True)

    samples = fudi.binary_sampler(
        numerical_binary_struct_array, samples_number=1000)
    for i, name in enumerate(numerical_binary_struct_array.dtype.names):
        assert np.allclose(
            samples[name].sum() / samples[name].shape[0],
            proportions[i],
            atol=1e-1)
    assert fuav.are_similar_dtype_arrays(
        np.asarray(numerical_binary_struct_array), samples, True)

コード例 #4

0

ファイルを表示

ファイル: augmentation.py プロジェクト: enrsr/fat-forensics-1

    def _validate_sample_input(self, data_row: Union[None, np.ndarray,
                                                     np.void],
                               samples_number: int) -> bool:
        """
        Validates input parameters of the ``sample`` method.

        This function checks the validity of ``data_row`` and
        ``samples_number`` parameters.

        Raises
        ------
        IncorrectShapeError
            The ``data_row`` is not a 1-dimensional numpy array-like object.
            The number of features (columns) in the ``data_row`` is different
            to the number of features in the data array used to initialise this
            object.
        TypeError
            The dtype of the ``data_row`` is different than the dtype of the
            data array used to initialise this object. The ``samples_number``
            parameter is not an integer.
        ValueError
            The ``samples_number`` parameter is not a positive integer.

        Returns
        -------
        is_valid : boolean
            ``True`` if input parameters are valid, ``False`` otherwise.
        """
        is_valid = False

        if data_row is not None:
            if not fuav.is_1d_like(data_row):
                raise IncorrectShapeError('The data_row must either be a '
                                          '1-dimensional numpy array or numpy '
                                          'void object for structured rows.')

            are_similar = fuav.are_similar_dtype_arrays(self.dataset,
                                                        np.array([data_row]),
                                                        strict_comparison=True)
            if not are_similar:
                raise TypeError('The dtype of the data_row is different to '
                                'the dtype of the data array used to '
                                'initialise this class.')

            # If the dataset is structured and the data_row has a different
            # number of features this will be caught by the above dtype check.
            # For classic numpy arrays this has to be done separately.
            if not self.is_structured:
                if data_row.shape[0] != self.dataset.shape[1]:
                    raise IncorrectShapeError('The data_row must contain the '
                                              'same number of features as the '
                                              'dataset used to initialise '
                                              'this class.')

        if isinstance(samples_number, int):
            if samples_number < 1:
                raise ValueError('The samples_number parameter must be a '
                                 'positive integer.')
        else:
            raise TypeError('The samples_number parameter must be an integer.')

        is_valid = True
        return is_valid

コード例 #5

0

ファイルを表示

ファイル: models.py プロジェクト: mattclifford1/fat-forensics-1

    def predict_proba(self, X: np.ndarray) -> np.ndarray:
        """
        Calculates label probabilities for new instances with the fitted model.

        Parameters
        ----------
        X : numpy.ndarray
            The data for which labels probabilities will be predicted.

        Raises
        ------
        IncorrectShapeError
            X is not a 2-dimensional array, it has 0 rows or it has a different
            number of columns than the training data.
        UnfittedModelError
            Raised when trying to predict data when the model has not been
            fitted yet. Try using the ``fit`` method to fit the model first.
        RuntimeError
            Raised when trying to use this method when the predictor is
            initialised as a regressor.
        ValueError
            X has a different dtype than the data used to fit the model.

        Returns
        -------
        probabilities : numpy.ndarray
            Probabilities of each instance belonging to every class. The labels
            in the return array are ordered by lexicographic order.
        """
        if not self._is_classifier:
            raise RuntimeError('This functionality is not available for a '
                               'regressor.')

        if not self._is_fitted:
            raise UnfittedModelError('This model has not been fitted yet.')
        if not fuav.is_2d_array(X):
            raise IncorrectShapeError('X must be a 2-dimensional array. If '
                                      'you want to predict a single data '
                                      'point please format it as a single row '
                                      'in a 2-dimensional array.')
        if not fuav.are_similar_dtype_arrays(X, self._X):
            raise ValueError('X must have the same dtype as the training '
                             'data.')
        if not X.shape[0]:
            raise IncorrectShapeError('X must have at least one row.')
        # No need to check for columns in a structured array -> this is handled
        # by the dtype checker.
        if not fuav.is_structured_array(X):
            if X.shape[1] != self._X.shape[1]:
                raise IncorrectShapeError(('X must have the same number of '
                                           'columns as the training data '
                                           '({}).').format(self._X.shape[1]))

        probabilities = np.empty((X.shape[0], self._unique_y.shape[0]))

        if self._k < self._X_n:
            distances = self._get_distances(X)
            knn = np.argpartition(distances, self._k, axis=0)
            probabilities = []
            for column in knn.T:
                close_labels = self._y[column[:self._k]]
                values, counts = np.unique(close_labels, return_counts=True)
                total_counts = np.sum(counts)
                probs = np.zeros((self._unique_y.shape[0], ))
                for i in range(values.shape[0]):
                    ind = np.where(self._unique_y == values[i])[0]
                    probs[ind] = counts[i] / total_counts
                probabilities.append(probs)
            probabilities = np.array(probabilities)
        else:
            probabilities = np.tile(self._unique_y_probabilities,
                                    (X.shape[0], 1))
        return probabilities

コード例 #6

0

ファイルを表示

ファイル: models.py プロジェクト: mattclifford1/fat-forensics-1

    def predict(self, X: np.ndarray) -> np.ndarray:
        """
        Predicts labels of new instances with the fitted model.

        Parameters
        ----------
        X : numpy.ndarray
            The data for which labels will be predicted.

        Raises
        ------
        IncorrectShapeError
            X is not a 2-dimensional array, it has 0 rows or it has a different
            number of columns than the training data.
        UnfittedModelError
            Raised when trying to predict data when the model has not been
            fitted yet. Try using the ``fit`` method to fit the model first.
        ValueError
            X has a different dtype than the data used to fit the model.

        Returns
        -------
        predictions : numpy.ndarray
            Predicted class labels for each data point.
        """
        # pylint: disable=too-many-locals,too-many-branches
        if not self._is_fitted:
            raise UnfittedModelError('This model has not been fitted yet.')
        if not fuav.is_2d_array(X):
            raise IncorrectShapeError('X must be a 2-dimensional array. If '
                                      'you want to predict a single data '
                                      'point please format it as a single row '
                                      'in a 2-dimensional array.')
        if not fuav.are_similar_dtype_arrays(X, self._X):
            raise ValueError('X must have the same dtype as the training '
                             'data.')
        if not X.shape[0]:
            raise IncorrectShapeError('X must have at least one row.')
        # No need to check for columns in a structured array -> this is handled
        # by the dtype checker.
        if not fuav.is_structured_array(X):
            if X.shape[1] != self._X.shape[1]:
                raise IncorrectShapeError(('X must have the same number of '
                                           'columns as the training data '
                                           '({}).').format(self._X.shape[1]))

        predictions = np.empty((X.shape[0], ))

        if self._k < self._X_n:
            distances = self._get_distances(X)
            # If there are 3 nearest neighbours within distances 1, 2 and 2 and
            # k is set to 2, then argpartition will always take the first
            # within distance 2.
            knn = np.argpartition(distances, self._k, axis=0)
            predictions = []
            for column in knn.T:
                close_labels = self._y[column[:self._k]]
                if self._is_classifier:
                    values, counts = np.unique(close_labels,
                                               return_counts=True)
                    # If there is a tie in the counts take into consideration
                    # the overall label count in the training data to resolve
                    # it.
                    top_label_index = counts == counts.max()
                    top_label_unique_sorted = np.sort(values[top_label_index])
                    assert len(top_label_unique_sorted.shape) == 1, \
                        'This should be a flat array.'
                    if top_label_unique_sorted.shape[0] > 1:
                        # Resolve the tie.
                        # Get count of these label for the training data.
                        labels_filter = np.array(self._unique_y.shape[0] *
                                                 [False])
                        for top_prediction in top_label_unique_sorted:
                            unique_y_filter = self._unique_y == top_prediction
                            np.logical_or(labels_filter,
                                          unique_y_filter,
                                          out=labels_filter)
                        g_top_label = self._unique_y[labels_filter]
                        g_top_label_counts = (
                            self._unique_y_counts[labels_filter])

                        # What if any of the global labels have the same count?
                        g_top_label_index = g_top_label_counts == np.max(
                            g_top_label_counts)
                        g_top_label_sorted = np.sort(
                            g_top_label[g_top_label_index])

                        prediction = g_top_label_sorted[0]
                    else:
                        prediction = top_label_unique_sorted[0]
                else:
                    prediction = close_labels.mean()

                predictions.append(prediction)
            predictions = np.array(predictions)
        else:
            predictions = np.array(X.shape[0] * [self._majority_label])

        return predictions

コード例 #7

0

ファイルを表示

ファイル: models.py プロジェクト: mattclifford1/fat-forensics-1

    def _get_distances(self, X: np.ndarray) -> np.ndarray:
        """
        Gets distances for a mixture of numerical and categorical features.

        For numerical columns the distance is calculated as the Euclidean
        distance. For categorical columns (i.e. non-numerical, e.g. strings)
        the distance is 0 when the value matches and 1 otherwise.

        Parameters
        ----------
        X : numpy.ndarray
            A data array for which distances to the training data will be
            calculated.

        Raises
        ------
        AssertionError
            Raised when the model is not fitted, X is not a 2-dimensional
            array or X's dtype is different than training data's dtype. It is
            also raised when the distances matrix is not 2-dimensional.

        Returns
        -------
        distances : numpy.ndarray
            An array of distances between X and the training data.
        """
        # pylint: disable=invalid-name
        assert self._is_fitted, 'Cannot calculate distances on unfitted model.'
        assert fuav.is_2d_array(X), 'X must be a 2-dimensional array.'
        assert fuav.are_similar_dtype_arrays(X, self._X), \
            'X must have the same dtype as the training data.'

        distances_shape = (self._X.shape[0], X.shape[0])
        categorical_distances = np.zeros(distances_shape)
        numerical_distances = np.zeros(distances_shape)

        if self._is_structured:
            if self._categorical_indices.size:
                categorical_distances = fud.binary_array_distance(
                    self._X[self._categorical_indices],
                    X[self._categorical_indices])
            if self._numerical_indices.size:
                numerical_distances = fud.euclidean_array_distance(
                    self._X[self._numerical_indices],
                    X[self._numerical_indices])
        else:
            if self._categorical_indices.size:
                categorical_distances = fud.binary_array_distance(
                    self._X[:, self._categorical_indices],
                    X[:, self._categorical_indices])
            if self._numerical_indices.size:
                numerical_distances = fud.euclidean_array_distance(
                    self._X[:, self._numerical_indices],
                    X[:, self._numerical_indices])

        assert categorical_distances.shape == numerical_distances.shape, \
            'Different number of point-wise distances for these feature types.'
        distances = categorical_distances + numerical_distances
        assert fuav.is_2d_array(distances), 'Distances matrix must be 2D.'

        return distances

コード例 #8

0

ファイルを表示

def _validate_input_local_fidelity(
        dataset: np.ndarray, data_row: Union[np.ndarray, np.void],
        global_predictive_function: PredictiveFunctionType,
        local_predictive_function: PredictiveFunctionType,
        metric_function: Callable[[np.ndarray, np.ndarray], float],
        explained_class_index: Union[int, None],
        explained_feature_indices: Union[List[IndexType], None],
        fidelity_radius_percentage: int, samples_number: int) -> bool:
    """
    Validates the input parameters for the ``local_fidelity_score`` function.

    This function validates input parameter of the
    :func:`fatf.utils.transparency.surrogate_evaluation.local_fidelity_score`
    function. The description of this function's input parameters, errors and
    exceptions can be found therein.

    Returns
    -------
    is_input_ok : boolean
        ``True`` if the input is valid, ``False`` otherwise.
    """
    # pylint: disable=too-many-arguments,too-many-branches,too-many-statements
    is_input_ok = False

    if not fuav.is_2d_array(dataset):
        raise IncorrectShapeError('The input dataset must be a '
                                  '2-dimensional numpy array.')
    if not fuav.is_base_array(dataset):
        raise TypeError('The input dataset must be of a base type -- numbers '
                        'and/or strings.')

    if not fuav.is_1d_like(data_row):
        raise IncorrectShapeError('The data_row must either be a '
                                  '1-dimensional numpy array or a numpy '
                                  'void object for structured data rows.')

    are_similar = fuav.are_similar_dtype_arrays(dataset, np.array([data_row]))
    if not are_similar:
        raise TypeError('The dtype of the data_row is too different from '
                        'the dtype of the dataset array.')

    # If the dataset is structured and the data_row has a different
    # number of features this will be caught by the above dtype check.
    # For classic numpy arrays this has to be done separately.
    if not fuav.is_structured_array(dataset):
        if dataset.shape[1] != data_row.shape[0]:
            raise IncorrectShapeError('The data_row must contain the same '
                                      'number of features as the dataset.')

    if callable(global_predictive_function):
        global_params_n = fuv.get_required_parameters_number(
            global_predictive_function)
        if global_params_n != 1:
            raise IncompatibleModelError(
                'The global predictive function must have exactly *one* '
                'required parameter to work with this metric.')
    else:
        raise TypeError('The global_predictive_function should be a Python '
                        'callable, e.g., a Python function.')

    if callable(local_predictive_function):
        local_params_n = fuv.get_required_parameters_number(
            local_predictive_function)
        if local_params_n != 1:
            raise IncompatibleModelError(
                'The local predictive function must have exactly *one* '
                'required parameter to work with this metric.')
    else:
        raise TypeError('The local_predictive_function should be a Python '
                        'callable, e.g., a Python function.')

    if callable(metric_function):
        if fuv.get_required_parameters_number(metric_function) != 2:
            raise TypeError('The metric_function must take exactly *two* '
                            'required parameters.')
    else:
        raise TypeError('The metric_function should be a Python callable, '
                        'e.g., a Python function.')

    # Explained class index
    global_prediction = global_predictive_function(dataset[:1])
    assert not fuav.is_structured_array(global_prediction), 'Must be plain.'
    assert global_prediction.shape[0] == 1, 'Just 1 data point was predicted.'
    if fuav.is_2d_array(global_prediction):  # A probabilistic model.
        if explained_class_index is not None:
            if isinstance(explained_class_index, int):
                if (explained_class_index >= global_prediction.shape[1]
                        or explained_class_index < 0):
                    raise ValueError('The explained_class_index parameter is '
                                     'negative or larger than the number of '
                                     'classes output by the global '
                                     'probabilistic model.')
            else:
                raise TypeError('For probabilistic global models, i.e., '
                                'global predictive functions, the '
                                'explained_class_index parameter has to be an '
                                'integer or None.')
    elif fuav.is_1d_array(global_prediction):
        if explained_class_index is not None:
            warnings.warn(
                'The explained_class_index parameter is not None and will be '
                'ignored since the global model is not probabilistic.',
                UserWarning)
    else:
        assert False, ('Global predictor must output a 1- or 2-dimensional '
                       'numpy array.')  # pragma: nocover

    if explained_feature_indices is not None:
        if isinstance(explained_feature_indices, list):
            invalid_indices = fuat.get_invalid_indices(
                dataset, np.asarray(explained_feature_indices))
            if invalid_indices.size:
                raise IndexError(
                    'The following column indices are invalid for the input '
                    'dataset: {}.'.format(invalid_indices))
        else:
            raise TypeError('The explained_feature_indices parameter must be '
                            'a Python list or None.')

    if isinstance(fidelity_radius_percentage, int):
        if fidelity_radius_percentage <= 0 or fidelity_radius_percentage > 100:
            raise ValueError('The fidelity_radius_percentage must be an '
                             'integer between 1 and 100.')
    else:
        raise TypeError('The fidelity_radius_percentage must be an integer '
                        'between 1 and 100.')

    if isinstance(samples_number, int):
        if samples_number < 1:
            raise ValueError('The samples_number must be a positive integer.')
    else:
        raise TypeError('The samples_number must be an integer.')

    is_input_ok = True
    return is_input_ok

コード例 #9

0

ファイルを表示

ファイル: distances.py プロジェクト: enrsr/fat-forensics-1

def get_point_distance(
        data_array: np.ndarray, data_point: Union[np.ndarray, np.void],
        distance_function: Callable[[np.ndarray, np.ndarray], float]
) -> np.ndarray:
    """
    Computes the distance between a data point and an array of data.

    This function computes the distances between the ``data_point`` and all
    rows of the ``data_array``.

    Parameters
    ----------
    data_array : numpy.ndarray
        A 2-dimensional numpy array to which rows distances will be computed.
    data_point : Union[numpy.ndarray, numpy.void]
        A 1-dimensional numpy array or numpy void (for structured data points)
        for which distances to every row of the ``data_array`` will be
        computed.
    distance_function : Callable[[numpy.ndarray, numpy.ndarray], number]
        A Python function that takes as an input two 1-dimensional numpy arrays
        of equal length and outputs a number representing a distance between
        them. **The distance function is assumed to return the same distance
        regardless of the order in which parameters are given.**

    Raises
    ------
    AttributeError
        The distance function does not require exactly two parameters.
    IncorrectShapeError
        The data array is not a 2-dimensional numpy array. The data point is
        not 1-dimensional. The number of columns in the data array is different
        to the number of elements in the data point.
    TypeError
        The data array or the data point is not of a base type (numbers and/or
        strings). The data point and the data array have incomparable dtypes.
        The distance function is not a Python callable (function).

    Returns
    -------
    distances : numpy.ndarray
        A 1-dimensional numerical numpy array with distances between
        ``data_point`` and every row of the ``data_array``.
    """
    assert _validate_get_distance(data_array,
                                  distance_function), 'Invalid input.'

    is_structured = fuav.is_structured_array(data_array)

    if not fuav.is_1d_like(data_point):
        raise IncorrectShapeError('The data point has to be 1-dimensional '
                                  'numpy array or numpy void (for structured '
                                  'arrays).')
    data_point_array = np.asarray([data_point])
    if not fuav.is_base_array(data_point_array):
        raise TypeError('The data point has to be of a base type (strings '
                        'and/or numbers).')
    if not fuav.are_similar_dtype_arrays(data_array, data_point_array):
        raise TypeError('The dtypes of the data set and the data point are '
                        'too different.')
    # Testing only for unstructured as the dtype comparison picks up on a
    # different number of columns in a structured array
    if not is_structured:
        if data_array.shape[1] != data_point_array.shape[1]:
            raise IncorrectShapeError('The data point has different number of '
                                      'columns (features) than the data set.')

    if is_structured:
        distances = np.zeros((data_array.shape[0], ), dtype=np.float64)
        for row_i in range(data_array.shape[0]):
            distances[row_i] = distance_function(data_array[row_i], data_point)
    else:
        distances = np.apply_along_axis(distance_function, 1, data_array,
                                        data_point)

    return distances

コード例 #10

0

ファイルを表示

ファイル: discretisation.py プロジェクト: alexhepburn/fat-forensics-broken

    def _validate_input_discretise(
            self, dataset: Union[np.ndarray, np.void]) -> bool:
        """
        Validates the input parameters of the ``discretise`` method.

        This method checks the validity of the input ``dataset``, which can be
        either a 1-D or a 2-D array with *similar* dtype to the data array
        used to initialise this class.

        Parameters
        ----------
        dataset : Union[numpy.ndarray, numpy.void]
            A data point (1-D array) or a data set (2-D array) to be
            discretised.

        Raises
        ------
        IncorrectShapeError
            The input ``dataset`` is neither 1- nor 2-dimensional numpy array.
            The number of features (columns) in the input ``dataset`` is
            different than the number of features in the dataset used to
            initialise this object.
        TypeError
            The dtype of the input ``dataset`` is too different from the dtype
            of the dataset used to initialise this object.

        Returns
        -------
        is_valid : boolean
            ``True`` if the input parameter is valid, ``False`` otherwise.
        """
        is_valid = False

        if not (fuav.is_1d_like(dataset) or fuav.is_2d_array(dataset)):
            raise IncorrectShapeError('The dataset must be either a '
                                      '1-dimensional (a plane numpy array or '
                                      'numpy void for structured '
                                      '1-dimensional arrays) or a '
                                      '2-dimensional array.')

        are_similar = fuav.are_similar_dtype_arrays(
            np.empty((0, ), dtype=self.dataset_dtype),
            np.array(dataset),
            strict_comparison=False)
        if not are_similar:
            raise TypeError('The dtype of the input dataset is too different '
                            'from the dtype of the dataset used to initialise '
                            'this class.')
        # The dimensions of a structured array are automatically compared above
        if not self.is_structured:
            if fuav.is_1d_like(dataset):
                features_number = dataset.shape[0]
            else:
                features_number = dataset.shape[1]

            if features_number != self.features_number:
                raise IncorrectShapeError('The input dataset must contain the '
                                          'same number of features as the '
                                          'dataset used to initialise this '
                                          'class.')

        is_valid = True
        return is_valid