def _daal_assert_all_finite(X, allow_nan=False, msg_dtype=None): """Like assert_all_finite, but only for ndarray.""" # validation is also imported in extmath from sklearn.utils.extmath import _safe_accumulator_op if _get_config()['assume_finite']: return is_df = is_DataFrame(X) num_of_types = get_number_of_types(X) # if X is heterogeneous pandas.DataFrame then # covert it to a list of arrays if is_df and num_of_types > 1: lst = [] for idx in X: arr = X[idx].to_numpy() lst.append(arr if arr.flags['C_CONTIGUOUS'] else np.ascontiguousarray(arr)) else: X = np.asanyarray(X) is_df = False dt = np.dtype(get_dtype(X)) is_float = dt.kind in 'fc' msg_err = "Input contains {} or a value too large for {!r}." type_err = 'infinity' if allow_nan else 'NaN, infinity' err = msg_err.format(type_err, msg_dtype if msg_dtype is not None else dt) if (X.ndim in [1, 2] and not np.any(np.equal(X.shape, 0)) and dt in [np.float32, np.float64] ): if X.ndim == 1: X = X.reshape((-1, 1)) x_for_daal = lst if is_df and num_of_types > 1 else X if dt == np.float64: if not d4p.daal_assert_all_finite(x_for_daal, allow_nan, 0): raise ValueError(err) elif dt == np.float32: if not d4p.daal_assert_all_finite(x_for_daal, allow_nan, 1): raise ValueError(err) # First try an O(n) time, O(1) space solution for the common case that # everything is finite; fall back to O(n) space np.isfinite to prevent # false positives from overflow in sum method. The sum is also calculated # safely to reduce dtype induced overflows. elif is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))): pass elif is_float: if (allow_nan and np.isinf(X).any() or not allow_nan and not np.isfinite(X).all()): raise ValueError(err) # for object dtype data, we only check for NaNs (GH-13254) elif dt == np.dtype('object') and not allow_nan: if _object_dtype_isnan(X).any(): raise ValueError("Input contains NaN")
def _assert_all_finite(X, allow_nan=False): """Like assert_all_finite, but only for ndarray.""" if _get_config()['assume_finite']: return X = np.asanyarray(X) # First try an O(n) time, O(1) space solution for the common case that # everything is finite; fall back to O(n) space np.isfinite to prevent # false positives from overflow in sum method. is_float = X.dtype.kind in 'fc' if is_float and np.isfinite(X.sum()): pass elif is_float: msg_err = "Input contains {} or a value too large for {!r}." if (allow_nan and np.isinf(X).any() or not allow_nan and not np.isfinite(X).all()): type_err = 'infinity' if allow_nan else 'NaN, infinity' raise ValueError(msg_err.format(type_err, X.dtype))
def _check_X(self, X): """ Perform custom check_array: - convert list of strings to object dtype - check for missing values for object dtype data (check_array does not do that) """ X_temp = check_array(X, dtype=None) if not hasattr(X, 'dtype') and np.issubdtype(X_temp.dtype, np.str_): X = check_array(X, dtype=np.object) else: X = X_temp if X.dtype == np.dtype('object'): if not _get_config()['assume_finite']: if _object_dtype_isnan(X).any(): raise ValueError("Input contains NaN") return X
def _daal_assert_all_finite(X, allow_nan=False, msg_dtype=None): """Like assert_all_finite, but only for ndarray.""" # validation is also imported in extmath from sklearn.utils.extmath import _safe_accumulator_op if _get_config()['assume_finite']: return X = np.asanyarray(X) dt = X.dtype is_float = dt.kind in 'fc' msg_err = "Input contains {} or a value too large for {!r}." type_err = 'infinity' if allow_nan else 'NaN, infinity' err = msg_err.format(type_err, msg_dtype if msg_dtype is not None else X.dtype) if (X.ndim in [1, 2] and not np.any(np.equal(X.shape, 0)) and dt in [np.float32, np.float64]): if X.ndim == 1: X = X.reshape((-1, 1)) if dt == np.float64: if not d4p.daal_assert_all_finite(X, allow_nan, 0): raise ValueError(err) elif dt == np.float32: if not d4p.daal_assert_all_finite(X, allow_nan, 1): raise ValueError(err) # First try an O(n) time, O(1) space solution for the common case that # everything is finite; fall back to O(n) space np.isfinite to prevent # false positives from overflow in sum method. The sum is also calculated # safely to reduce dtype induced overflows. elif is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))): pass elif is_float: if (allow_nan and np.isinf(X).any() or not allow_nan and not np.isfinite(X).all()): raise ValueError(err) # for object dtype data, we only check for NaNs (GH-13254) elif X.dtype == np.dtype('object') and not allow_nan: if _object_dtype_isnan(X).any(): raise ValueError("Input contains NaN")