Ejemplo n.º 1
0
def _daal_assert_all_finite(X, allow_nan=False, msg_dtype=None):
    """Like assert_all_finite, but only for ndarray."""
    # validation is also imported in extmath
    from sklearn.utils.extmath import _safe_accumulator_op

    if _get_config()['assume_finite']:
        return

    is_df = is_DataFrame(X)
    num_of_types = get_number_of_types(X)

    # if X is heterogeneous pandas.DataFrame then
    # covert it to a list of arrays 
    if is_df and num_of_types > 1:
        lst = []
        for idx in X:
            arr = X[idx].to_numpy()
            lst.append(arr if arr.flags['C_CONTIGUOUS'] else np.ascontiguousarray(arr))
    else:
        X = np.asanyarray(X)
        is_df = False

    dt = np.dtype(get_dtype(X))
    is_float = dt.kind in 'fc'

    msg_err = "Input contains {} or a value too large for {!r}."
    type_err = 'infinity' if allow_nan else 'NaN, infinity'
    err = msg_err.format(type_err, msg_dtype if msg_dtype is not None else dt)

    if (X.ndim in [1, 2]
        and not np.any(np.equal(X.shape, 0))
        and dt in [np.float32, np.float64]
        ):
        if X.ndim == 1:
            X = X.reshape((-1, 1))

        x_for_daal = lst if is_df and num_of_types > 1 else X

        if dt == np.float64:
            if not d4p.daal_assert_all_finite(x_for_daal, allow_nan, 0):
                raise ValueError(err)
        elif dt == np.float32:
            if not d4p.daal_assert_all_finite(x_for_daal, allow_nan, 1):
                raise ValueError(err)
    # First try an O(n) time, O(1) space solution for the common case that
    # everything is finite; fall back to O(n) space np.isfinite to prevent
    # false positives from overflow in sum method. The sum is also calculated
    # safely to reduce dtype induced overflows.
    elif is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))):
        pass
    elif is_float:
        if (allow_nan and np.isinf(X).any() or
                not allow_nan and not np.isfinite(X).all()):
            raise ValueError(err)
    # for object dtype data, we only check for NaNs (GH-13254)
    elif dt == np.dtype('object') and not allow_nan:
        if _object_dtype_isnan(X).any():
            raise ValueError("Input contains NaN")
Ejemplo n.º 2
0
def _assert_all_finite(X, allow_nan=False):
    """Like assert_all_finite, but only for ndarray."""
    if _get_config()['assume_finite']:
        return
    X = np.asanyarray(X)
    # First try an O(n) time, O(1) space solution for the common case that
    # everything is finite; fall back to O(n) space np.isfinite to prevent
    # false positives from overflow in sum method.
    is_float = X.dtype.kind in 'fc'
    if is_float and np.isfinite(X.sum()):
        pass
    elif is_float:
        msg_err = "Input contains {} or a value too large for {!r}."
        if (allow_nan and np.isinf(X).any()
                or not allow_nan and not np.isfinite(X).all()):
            type_err = 'infinity' if allow_nan else 'NaN, infinity'
            raise ValueError(msg_err.format(type_err, X.dtype))
Ejemplo n.º 3
0
    def _check_X(self, X):
        """
        Perform custom check_array:
        - convert list of strings to object dtype
        - check for missing values for object dtype data (check_array does
          not do that)

        """
        X_temp = check_array(X, dtype=None)
        if not hasattr(X, 'dtype') and np.issubdtype(X_temp.dtype, np.str_):
            X = check_array(X, dtype=np.object)
        else:
            X = X_temp

        if X.dtype == np.dtype('object'):
            if not _get_config()['assume_finite']:
                if _object_dtype_isnan(X).any():
                    raise ValueError("Input contains NaN")

        return X
Ejemplo n.º 4
0
def _daal_assert_all_finite(X, allow_nan=False, msg_dtype=None):
    """Like assert_all_finite, but only for ndarray."""
    # validation is also imported in extmath
    from sklearn.utils.extmath import _safe_accumulator_op

    if _get_config()['assume_finite']:
        return
    X = np.asanyarray(X)

    dt = X.dtype
    is_float = dt.kind in 'fc'

    msg_err = "Input contains {} or a value too large for {!r}."
    type_err = 'infinity' if allow_nan else 'NaN, infinity'
    err = msg_err.format(type_err,
                         msg_dtype if msg_dtype is not None else X.dtype)

    if (X.ndim in [1, 2] and not np.any(np.equal(X.shape, 0))
            and dt in [np.float32, np.float64]):
        if X.ndim == 1:
            X = X.reshape((-1, 1))
        if dt == np.float64:
            if not d4p.daal_assert_all_finite(X, allow_nan, 0):
                raise ValueError(err)
        elif dt == np.float32:
            if not d4p.daal_assert_all_finite(X, allow_nan, 1):
                raise ValueError(err)
    # First try an O(n) time, O(1) space solution for the common case that
    # everything is finite; fall back to O(n) space np.isfinite to prevent
    # false positives from overflow in sum method. The sum is also calculated
    # safely to reduce dtype induced overflows.
    elif is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))):
        pass
    elif is_float:
        if (allow_nan and np.isinf(X).any()
                or not allow_nan and not np.isfinite(X).all()):
            raise ValueError(err)
    # for object dtype data, we only check for NaNs (GH-13254)
    elif X.dtype == np.dtype('object') and not allow_nan:
        if _object_dtype_isnan(X).any():
            raise ValueError("Input contains NaN")