Exemple #1
0
def test_incremental_weighted_mean_and_variance(mean, var, weight_loc,
                                                weight_scale, rng):

    # Testing of correctness and numerical stability
    def _assert(X, sample_weight, expected_mean, expected_var):
        n = X.shape[0]
        for chunk_size in [1, n//10 + 1, n//4 + 1, n//2 + 1, n]:
            last_mean, last_weight_sum, last_var = 0, 0, 0
            for batch in gen_batches(n, chunk_size):
                last_mean, last_var, last_weight_sum = \
                    _incremental_weighted_mean_and_var(X[batch],
                                                       sample_weight[batch],
                                                       last_mean,
                                                       last_var,
                                                       last_weight_sum)
            assert_allclose(last_mean, expected_mean)
            assert_allclose(last_var, expected_var, atol=1e-6)

    size = (100, 20)
    weight = rng.normal(loc=weight_loc, scale=weight_scale, size=size[0])

    # Compare to weighted average: np.average
    X = rng.normal(loc=mean, scale=var, size=size)
    expected_mean = _safe_accumulator_op(np.average, X, weights=weight, axis=0)
    expected_var = _safe_accumulator_op(
        np.average, (X - expected_mean) ** 2, weights=weight, axis=0)
    _assert(X, weight, expected_mean, expected_var)

    # Compare to unweighted mean: np.mean
    X = rng.normal(loc=mean, scale=var, size=size)
    ones_weight = np.ones(size[0])
    expected_mean = _safe_accumulator_op(np.mean, X, axis=0)
    expected_var = _safe_accumulator_op(np.var, X, axis=0)
    _assert(X, ones_weight, expected_mean, expected_var)
Exemple #2
0
def _daal_assert_all_finite(X, allow_nan=False, msg_dtype=None):
    """Like assert_all_finite, but only for ndarray."""
    # validation is also imported in extmath
    from sklearn.utils.extmath import _safe_accumulator_op

    if _get_config()['assume_finite']:
        return

    is_df = is_DataFrame(X)
    num_of_types = get_number_of_types(X)

    # if X is heterogeneous pandas.DataFrame then
    # covert it to a list of arrays 
    if is_df and num_of_types > 1:
        lst = []
        for idx in X:
            arr = X[idx].to_numpy()
            lst.append(arr if arr.flags['C_CONTIGUOUS'] else np.ascontiguousarray(arr))
    else:
        X = np.asanyarray(X)
        is_df = False

    dt = np.dtype(get_dtype(X))
    is_float = dt.kind in 'fc'

    msg_err = "Input contains {} or a value too large for {!r}."
    type_err = 'infinity' if allow_nan else 'NaN, infinity'
    err = msg_err.format(type_err, msg_dtype if msg_dtype is not None else dt)

    if (X.ndim in [1, 2]
        and not np.any(np.equal(X.shape, 0))
        and dt in [np.float32, np.float64]
        ):
        if X.ndim == 1:
            X = X.reshape((-1, 1))

        x_for_daal = lst if is_df and num_of_types > 1 else X

        if dt == np.float64:
            if not d4p.daal_assert_all_finite(x_for_daal, allow_nan, 0):
                raise ValueError(err)
        elif dt == np.float32:
            if not d4p.daal_assert_all_finite(x_for_daal, allow_nan, 1):
                raise ValueError(err)
    # First try an O(n) time, O(1) space solution for the common case that
    # everything is finite; fall back to O(n) space np.isfinite to prevent
    # false positives from overflow in sum method. The sum is also calculated
    # safely to reduce dtype induced overflows.
    elif is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))):
        pass
    elif is_float:
        if (allow_nan and np.isinf(X).any() or
                not allow_nan and not np.isfinite(X).all()):
            raise ValueError(err)
    # for object dtype data, we only check for NaNs (GH-13254)
    elif dt == np.dtype('object') and not allow_nan:
        if _object_dtype_isnan(X).any():
            raise ValueError("Input contains NaN")
Exemple #3
0
def _daal_assert_all_finite(X, allow_nan=False, msg_dtype=None):
    """Like assert_all_finite, but only for ndarray."""
    # validation is also imported in extmath
    from sklearn.utils.extmath import _safe_accumulator_op

    if _get_config()['assume_finite']:
        return
    X = np.asanyarray(X)

    dt = X.dtype
    is_float = dt.kind in 'fc'

    msg_err = "Input contains {} or a value too large for {!r}."
    type_err = 'infinity' if allow_nan else 'NaN, infinity'
    err = msg_err.format(type_err,
                         msg_dtype if msg_dtype is not None else X.dtype)

    if (X.ndim in [1, 2] and not np.any(np.equal(X.shape, 0))
            and dt in [np.float32, np.float64]):
        if X.ndim == 1:
            X = X.reshape((-1, 1))
        if dt == np.float64:
            if not d4p.daal_assert_all_finite(X, allow_nan, 0):
                raise ValueError(err)
        elif dt == np.float32:
            if not d4p.daal_assert_all_finite(X, allow_nan, 1):
                raise ValueError(err)
    # First try an O(n) time, O(1) space solution for the common case that
    # everything is finite; fall back to O(n) space np.isfinite to prevent
    # false positives from overflow in sum method. The sum is also calculated
    # safely to reduce dtype induced overflows.
    elif is_float and (np.isfinite(_safe_accumulator_op(np.sum, X))):
        pass
    elif is_float:
        if (allow_nan and np.isinf(X).any()
                or not allow_nan and not np.isfinite(X).all()):
            raise ValueError(err)
    # for object dtype data, we only check for NaNs (GH-13254)
    elif X.dtype == np.dtype('object') and not allow_nan:
        if _object_dtype_isnan(X).any():
            raise ValueError("Input contains NaN")