def test_anova_kernel_sparse_subset(degree):
    # compute exact kernel
    n_components = 2000 * 5
    n_sub_features = 25
    gram = anova(X_sp, Y_sp, degree, True)
    # approximate kernel mapping
    rk_transform = SubfeatureRandomKernel(n_components=n_components,
                                          random_state=rng,
                                          kernel='anova',
                                          degree=degree,
                                          distribution="rademacher",
                                          n_sub_features=n_sub_features)
    X_trans = rk_transform.fit_transform(X_sp)
    Y_trans = rk_transform.transform(Y_sp)
    assert_almost_equal(rk_transform.random_weights_.nnz,
                        n_components * n_sub_features)

    kernel_approx = safe_sparse_dot(X_trans, Y_trans.T, dense_output=True)
    error = gram - kernel_approx
    assert np.abs(np.mean(error)) < 0.001
    assert np.max(error) < 0.1  # nothing too far off
    assert np.mean(error) < 0.005  # mean is fairly close
    assert_almost_equal(n_sub_features * n_components,
                        rk_transform.random_weights_.nnz)
    assert_allclose_dense_sparse(
        n_sub_features * np.ones(n_components),
        np.array(abs(rk_transform.random_weights_).sum(axis=0))[0])
def test_check_array_force_all_finite_valid(value, force_all_finite, retype):
    X = retype(np.arange(4).reshape(2, 2).astype(np.float))
    X[0, 0] = value
    X_checked = check_array(X,
                            force_all_finite=force_all_finite,
                            accept_sparse=True)
    assert_allclose_dense_sparse(X, X_checked)
def test_safe_indexing_1d_container_mask(array_type, indices_type):
    indices = [False] + [True] * 2 + [False] * 6
    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
    indices = _convert_container(indices, indices_type)
    subset = _safe_indexing(array, indices, axis=0)
    assert_allclose_dense_sparse(subset, _convert_container([2, 3],
                                                            array_type))
Exemple #4
0
def check_pipeline_consistency(name, estimator_orig):
    if estimator_orig._get_tags()['non_deterministic']:
        msg = name + ' is non deterministic'
        raise SkipTest(msg)

    # check that make_pipeline(est) gives same score as est
    X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0, n_features=2, cluster_std=0.1)
    X -= X.min()
    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
    estimator = clone(estimator_orig)
    y = multioutput_estimator_convert_y_2d(estimator, y)
    set_random_state(estimator)
    pipeline = make_pipeline(estimator)
    estimator.fit(X, y)
    pipeline.fit(X, y)

    funcs = ["score", "fit_transform"]

    for func_name in funcs:
        func = getattr(estimator, func_name, None)
        if func is not None:
            func_pipeline = getattr(pipeline, func_name)
            result = func(X, y)
            result_pipe = func_pipeline(X, y)
            assert_allclose_dense_sparse(result, result_pipe)
def test_signed_circulant_random_matrix_for_dot():
    # compute exact kernel
    kernel = np.dot(X, Y.T)
    # approximate kernel mapping
    n_components = X.shape[1]
    transformer = SignedCirculantRandomMatrix(n_components=n_components,
                                              random_fourier=False,
                                              random_state=0)
    X_trans = transformer.fit_transform(X)
    Y_trans = transformer.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.01
    assert np.max(error) < 0.1  # nothing too far off
    assert np.mean(error) < 0.05  # mean is fairly close
    # for sparse matrix
    X_trans_sp = transformer.transform(csr_matrix(X))
    assert_allclose_dense_sparse(X_trans, X_trans_sp)

    # comparing naive computation
    circ = circulant(ifft(transformer.random_weights_[0]).real)
    circ *= transformer.random_sign_.T
    X_trans_naive = np.dot(X, circ.T) / np.sqrt(n_components)
    assert_allclose(X_trans, X_trans_naive)
def test_function_sampler_func(X, y):
    def func(X, y):
        return X[:10], y[:10]

    sampler = FunctionSampler(func=func)
    X_res, y_res = sampler.fit_resample(X, y)
    assert_allclose_dense_sparse(X_res, X[:10])
    assert_array_equal(y_res, y[:10])
Exemple #7
0
def test_function_sampler_func(X, y):
    def func(X, y):
        return X[:10], y[:10]

    sampler = FunctionSampler(func=func)
    X_res, y_res = sampler.fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X[:10])
    assert_array_equal(y_res, y[:10])
Exemple #8
0
def check_fit_idempotent(name, estimator_orig):
    # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would
    # check that the estimated parameters during training (e.g. coefs_) are
    # the same, but having a universal comparison function for those
    # attributes is difficult and full of edge cases. So instead we check that
    # predict(), predict_proba(), decision_function() and transform() return
    # the same results.

    check_methods = ["predict", "transform", "decision_function",
                     "predict_proba"]
    rng = np.random.RandomState(0)

    if estimator_orig._get_tags()['non_deterministic']:
        msg = name + ' is non deterministic'
        raise SkipTest(msg)

    estimator = clone(estimator_orig)
    set_random_state(estimator)
    if 'warm_start' in estimator.get_params().keys():
        estimator.set_params(warm_start=False)

    n_samples = 100
    X, _ = _create_small_ts_dataset()
    X = X.reshape((X.shape[0], X.shape[1]))
    X = pairwise_estimator_convert_X(X, estimator)
    if is_regressor(estimator_orig):
        y = rng.normal(size=n_samples)
    else:
        y = rng.randint(low=0, high=2, size=n_samples)

    train, test = next(ShuffleSplit(test_size=.2, random_state=rng).split(X))
    X_train, y_train = _safe_split(estimator, X, y, train)
    X_test, y_test = _safe_split(estimator, X, y, test, train)

    # Fit for the first time
    estimator.fit(X_train, y_train)

    result = {method: getattr(estimator, method)(X_test)
              for method in check_methods
              if hasattr(estimator, method)}

    # Fit again
    set_random_state(estimator)
    estimator.fit(X_train, y_train)

    for method in check_methods:
        if hasattr(estimator, method):
            new_result = getattr(estimator, method)(X_test)
            if np.issubdtype(new_result.dtype, np.floating):
                tol = 2*np.finfo(new_result.dtype).eps
            else:
                tol = 2*np.finfo(np.float64).eps
            assert_allclose_dense_sparse(
                result[method], new_result,
                atol=max(tol, 1e-9), rtol=max(tol, 1e-7),
                err_msg="Idempotency check failed for method {}".format(method)
            )
def test_safe_indexing_1d_container(array_type, indices_type):
    indices = [1, 2]
    if indices_type == 'slice' and isinstance(indices[1], int):
        indices[1] += 1
    array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type)
    indices = _convert_container(indices, indices_type)
    subset = _safe_indexing(array, indices, axis=0)
    assert_allclose_dense_sparse(subset, _convert_container([2, 3],
                                                            array_type))
Exemple #10
0
def test_tfidf_transformer_sparse():
    X = sparse.rand(10, 20000, dtype=np.float64, random_state=42)
    X_csc = sparse.csc_matrix(X)
    X_csr = sparse.csr_matrix(X)

    X_trans_csc = TfidfTransformer().fit_transform(X_csc)
    X_trans_csr = TfidfTransformer().fit_transform(X_csr)
    assert_allclose_dense_sparse(X_trans_csc, X_trans_csr)
    assert X_trans_csc.format == X_trans_csr.format
def test_tfidf_transformer_sparse():
    X = sparse.rand(10, 20000, dtype=np.float64, random_state=42)
    X_csc = sparse.csc_matrix(X)
    X_csr = sparse.csr_matrix(X)

    X_trans_csc = TfidfTransformer().fit_transform(X_csc)
    X_trans_csr = TfidfTransformer().fit_transform(X_csr)
    assert_allclose_dense_sparse(X_trans_csc, X_trans_csr)
    assert X_trans_csc.format == X_trans_csr.format
def test_safe_indexing_2d_mask(array_type, indices_type, axis,
                               expected_subset):
    columns_name = ['col_0', 'col_1', 'col_2']
    array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type,
                               columns_name)
    indices = [False, True, True]
    indices = _convert_container(indices, indices_type)

    subset = _safe_indexing(array, indices, axis=axis)
    assert_allclose_dense_sparse(
        subset, _convert_container(expected_subset, array_type))
Exemple #13
0
def test_function_sampler_func_kwargs(X, y):

    def func(X, y, ratio, random_state):
        rus = RandomUnderSampler(ratio=ratio, random_state=random_state)
        return rus.fit_sample(X, y)

    sampler = FunctionSampler(func=func, kw_args={'ratio': 'auto',
                                                  'random_state': 0})
    X_res, y_res = sampler.fit_sample(X, y)
    X_res_2, y_res_2 = RandomUnderSampler(random_state=0).fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X_res_2)
    assert_array_equal(y_res, y_res_2)
def test_safe_indexing_2d_read_only_axis_1(array_read_only, indices_read_only,
                                           array_type, indices_type, axis,
                                           expected_array):
    array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    if array_read_only:
        array.setflags(write=False)
    array = _convert_container(array, array_type)
    indices = np.array([1, 2])
    if indices_read_only:
        indices.setflags(write=False)
    indices = _convert_container(indices, indices_type)
    subset = _safe_indexing(array, indices, axis=axis)
    assert_allclose_dense_sparse(
        subset, _convert_container(expected_array, array_type))
Exemple #15
0
def test_column_transformer_sparse_array():
    X_sparse = sparse.eye(3, 2).tocsr()

    # no distinction between 1D and 2D
    X_res_first = X_sparse[:, 0]
    X_res_both = X_sparse

    for col in [0, [0], slice(0, 1)]:
        for remainder, res in [('drop', X_res_first),
                               ('passthrough', X_res_both)]:
            ct = ColumnTransformer([('trans', Trans(), col)],
                                   remainder=remainder,
                                   sparse_threshold=0.8)
            assert_true(sparse.issparse(ct.fit_transform(X_sparse)))
            assert_allclose_dense_sparse(ct.fit_transform(X_sparse), res)
            assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
                                         res)

    for col in [[0, 1], slice(0, 2)]:
        ct = ColumnTransformer([('trans', Trans(), col)],
                               sparse_threshold=0.8)
        assert_true(sparse.issparse(ct.fit_transform(X_sparse)))
        assert_allclose_dense_sparse(ct.fit_transform(X_sparse), X_res_both)
        assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
                                     X_res_both)
def test_column_transformer_sparse_array():
    X_sparse = sparse.eye(3, 2).tocsr()

    # no distinction between 1D and 2D
    X_res_first = X_sparse[:, 0]
    X_res_both = X_sparse

    for col in [0, [0], slice(0, 1)]:
        for remainder, res in [('drop', X_res_first),
                               ('passthrough', X_res_both)]:
            ct = ColumnTransformer([('trans', Trans(), col)],
                                   remainder=remainder,
                                   sparse_threshold=0.8)
            assert sparse.issparse(ct.fit_transform(X_sparse))
            assert_allclose_dense_sparse(ct.fit_transform(X_sparse), res)
            assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
                                         res)

    for col in [[0, 1], slice(0, 2)]:
        ct = ColumnTransformer([('trans', Trans(), col)],
                               sparse_threshold=0.8)
        assert sparse.issparse(ct.fit_transform(X_sparse))
        assert_allclose_dense_sparse(ct.fit_transform(X_sparse), X_res_both)
        assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse),
                                     X_res_both)
def test_safe_sparse_dot_dense_output(dense_output):
    rng = np.random.RandomState(0)

    A = sparse.random(30, 10, density=0.1, random_state=rng)
    B = sparse.random(10, 20, density=0.1, random_state=rng)

    expected = A.dot(B)
    actual = safe_sparse_dot(A, B, dense_output=dense_output)

    assert sparse.issparse(actual) == (not dense_output)

    if dense_output:
        expected = expected.toarray()
    assert_allclose_dense_sparse(actual, expected)
Exemple #18
0
def test_20news_normalization():
    try:
        X = datasets.fetch_20newsgroups_vectorized(normalize=False,
                                                   download_if_missing=False)
        X_ = datasets.fetch_20newsgroups_vectorized(normalize=True,
                                                    download_if_missing=False)
    except IOError:
        raise SkipTest("Download 20 newsgroups to run this test")

    X_norm = X_['data'][:100]
    X = X['data'][:100]

    assert_allclose_dense_sparse(X_norm, normalize(X))
    assert np.allclose(np.linalg.norm(X_norm.todense(), axis=1), 1)
Exemple #19
0
def test_imputers_add_indicator_sparse(imputer, marker):
    X = sparse.csr_matrix([[marker, 1, 5, marker,
                            1], [2, marker, 1, marker, 2],
                           [6, 3, marker, marker, 3], [1, 2, 9, marker, 4]])
    X_true_indicator = sparse.csr_matrix([[1., 0., 0., 1.], [0., 1., 0., 1.],
                                          [0., 0., 1., 1.], [0., 0., 0., 1.]])
    imputer.set_params(missing_values=marker, add_indicator=True)

    X_trans = imputer.fit_transform(X)
    assert_allclose_dense_sparse(X_trans[:, -4:], X_true_indicator)
    assert_array_equal(imputer.indicator_.features_, np.array([0, 1, 2, 3]))

    imputer.set_params(add_indicator=False)
    X_trans_no_indicator = imputer.fit_transform(X)
    assert_allclose_dense_sparse(X_trans[:, :-4], X_trans_no_indicator)
Exemple #20
0
def test_incremental_pca_batch_rank():
    # Test sample size in each batch is always larger or equal to n_components
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 20
    X = rng.randn(n_samples, n_features)
    all_components = []
    batch_sizes = np.arange(20, 90, 3)
    for batch_size in batch_sizes:
        ipca = IncrementalPCA(n_components=20, batch_size=batch_size).fit(X)
        all_components.append(ipca.components_)

    for components_i, components_j in zip(all_components[:-1],
                                          all_components[1:]):
        assert_allclose_dense_sparse(components_i, components_j)
def test_incremental_pca_batch_rank():
    # Test sample size in each batch is always larger or equal to n_components
    rng = np.random.RandomState(1999)
    n_samples = 100
    n_features = 20
    X = rng.randn(n_samples, n_features)
    all_components = []
    batch_sizes = np.arange(20, 90, 3)
    for batch_size in batch_sizes:
        ipca = IncrementalPCA(n_components=20, batch_size=batch_size).fit(X)
        all_components.append(ipca.components_)

    for components_i, components_j in zip(all_components[:-1],
                                          all_components[1:]):
        assert_allclose_dense_sparse(components_i, components_j)
Exemple #22
0
def test_mb(dense_output):
    # compute exact kernel
    kernel = intersection(X, Y)
    # approximate kernel mapping
    mb_transform = MB(n_components=10000, dense_output=dense_output)
    X_trans = mb_transform.fit_transform(X)
    Y_trans = mb_transform.transform(Y)
    kernel_approx = safe_sparse_dot(X_trans, Y_trans.T, 
                                    dense_output=dense_output)
    error = kernel - kernel_approx
    assert np.mean(np.abs(error)) < 50 / mb_transform.n_grids_

    # for sparse matrix
    X_trans_sp = mb_transform.fit_transform(csr_matrix(X))
    assert_allclose_dense_sparse(X_trans_sp, X_trans)
Exemple #23
0
def test_subfeature_random_maclaurin_polynomial(degree):
    # compute exact kernel
    kernel = polynomial(X, Y, degree)
    # approximate kernel mapping
    rm_transform = SubfeatureRandomMaclaurin(n_components=500, degree=degree,
                                                random_state=rng, kernel='poly')
    X_trans = rm_transform.fit_transform(X)
    Y_trans = rm_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)
    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.001
    assert np.max(error) < 0.01  # nothing too far off
    assert np.mean(error) < 0.005  # mean is fairly close

    X_trans_sp = rm_transform.transform(X_sp)
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_subsampled_random_hadamard_for_dot():
    # compute exact kernel
    kernel = np.dot(X, Y.T)
    # approximate kernel mapping
    rf_transform = SubsampledRandomHadamard(n_components=30, random_state=0)
    X_trans = rf_transform.fit_transform(X)
    Y_trans = rf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.01
    assert np.max(error) < 0.1  # nothing too far off
    assert np.mean(error) < 0.05  # mean is fairly close
    # for sparse matrix
    X_trans_sp = rf_transform.transform(csr_matrix(X))
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
Exemple #25
0
def test_imputation_constant_float(array_constructor):
    # Test imputation using the constant strategy on floats
    X = np.array([[np.nan, 1.1, 0, np.nan], [1.2, np.nan, 1.3, np.nan],
                  [0, 0, np.nan, np.nan], [1.4, 1.5, 0, np.nan]])

    X_true = np.array([[-1, 1.1, 0, -1], [1.2, -1, 1.3, -1], [0, 0, -1, -1],
                       [1.4, 1.5, 0, -1]])

    X = array_constructor(X)

    X_true = array_constructor(X_true)

    imputer = SimpleImputer(strategy="constant", fill_value=-1)
    X_trans = imputer.fit_transform(X)

    assert_allclose_dense_sparse(X_trans, X_true)
Exemple #26
0
def test_orthogonal_random_feature(gamma, n_components, use_offset):
    # compute exact kernel
    kernel = rbf_kernel(X, Y, gamma)
    # approximate kernel mapping
    rf_transform = OrthogonalRandomFeature(n_components=n_components,
                                           gamma=gamma, use_offset=use_offset,
                                           random_state=0)
    X_trans = rf_transform.fit_transform(X)
    Y_trans = rf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.01
    assert np.max(error) < 0.1  # nothing too far off
    assert np.mean(error) < 0.05  # mean is fairly close
    # for sparse matrix
    X_trans_sp = rf_transform.transform(csr_matrix(X))
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
Exemple #27
0
def test_fastfood_for_dot():
    # compute exact kernel
    kernel = np.dot(X, Y.T)
    # approximate kernel mapping
    rf_transform = FastFood(n_components=64,
                            random_fourier=False,
                            random_state=0)
    X_trans = rf_transform.fit_transform(X)
    Y_trans = rf_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.01
    assert np.max(error) < 0.1  # nothing too far off
    assert np.mean(error) < 0.05  # mean is fairly close
    # for sparse matrix
    X_trans_sp = rf_transform.transform(csr_matrix(X))
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
Exemple #28
0
def test_safe_indexing_mask_axis_1(array_type):
    # regression test for #14510
    # check that boolean array-like and boolean array lead to the same indexing
    # even in NumPy < 1.12
    if array_type == 'array':
        array_constructor = np.asarray
    elif array_type == 'sparse':
        array_constructor = sp.csr_matrix
    elif array_type == 'dataframe':
        pd = pytest.importorskip('pandas')
        array_constructor = pd.DataFrame

    X = array_constructor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
    mask = [True, False, True]
    mask_array = np.array(mask)
    X_masked = safe_indexing(X, mask, axis=1)
    X_masked_array = safe_indexing(X, mask_array, axis=1)
    assert_allclose_dense_sparse(X_masked, X_masked_array)
Exemple #29
0
def test_tensor_sketching(degree):
    # compute exact kernel
    kernel = polynomial(X, Y, degree)
    # approximate kernel mapping
    ts_transform = TensorSketch(n_components=1000,
                                degree=degree,
                                random_state=rng)
    X_trans = ts_transform.fit_transform(X)
    Y_trans = ts_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.001
    assert np.max(error) < 0.01  # nothing too far off
    assert np.mean(error) < 0.005  # mean is fairly close

    X_trans_sp = ts_transform.transform(X_sp)
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_assert_allclose_dense_sparse():
    x = np.arange(9).reshape(3, 3)
    msg = "Not equal to tolerance "
    y = sparse.csc_matrix(x)
    for X in [x, y]:
        # basic compare
        assert_raise_message(AssertionError, msg, assert_allclose_dense_sparse,
                             X, X * 2)
        assert_allclose_dense_sparse(X, X)

    assert_raise_message(ValueError, "Can only compare two sparse",
                         assert_allclose_dense_sparse, x, y)

    A = sparse.diags(np.ones(5), offsets=0).tocsr()
    B = sparse.csr_matrix(np.ones((1, 5)))

    assert_raise_message(AssertionError, "Arrays are not equal",
                         assert_allclose_dense_sparse, B, A)
Exemple #31
0
def test_assert_allclose_dense_sparse():
    x = np.arange(9).reshape(3, 3)
    msg = "Not equal to tolerance "
    y = sparse.csc_matrix(x)
    for X in [x, y]:
        # basic compare
        assert_raise_message(AssertionError, msg, assert_allclose_dense_sparse,
                             X, X * 2)
        assert_allclose_dense_sparse(X, X)

    assert_raise_message(ValueError, "Can only compare two sparse",
                         assert_allclose_dense_sparse, x, y)

    A = sparse.diags(np.ones(5), offsets=0).tocsr()
    B = sparse.csr_matrix(np.ones((1, 5)))

    assert_raise_message(AssertionError, "Arrays are not equal",
                         assert_allclose_dense_sparse, B, A)
Exemple #32
0
def test_subfeature_random_maclaurin_polynomial_bias_h01(bias, degree):
    # compute exact kernel
    kernel = polynomial(X, Y, degree, bias=bias)
    # approximate kernel mapping
    print('bias: {} degree: {}'.format(bias, degree))
    rm_transform = SubfeatureRandomMaclaurin(n_components=5000, degree=degree,
                                             n_sub_features=10,
                                             random_state=rng, kernel='poly',
                                             bias=bias, h01=True)
    X_trans = rm_transform.fit_transform(X)
    Y_trans = rm_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)
    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.01
    assert np.max(error) < 0.1  # nothing too far off
    assert np.mean(error) < 0.05  # mean is fairly close

    X_trans_sp = rm_transform.transform(X_sp)
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
Exemple #33
0
def test_subfeature_random_maclaurin_exp():
    # compute exact kernel
    kernel = exp_kernel(X, Y, 0.1)
    # approximate kernel mapping
    rm_transform = SubfeatureRandomMaclaurin(n_components=10000, n_sub_features=10,
                                             random_state=rng, kernel='exp',
                                             gamma=0.1)

    X_trans = rm_transform.fit_transform(X)
    Y_trans = rm_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.01
    assert np.max(error) < 0.1  # nothing too far off
    assert np.mean(error) < 0.05  # mean is fairly close

    X_trans_sp = rm_transform.transform(X_sp)
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
Exemple #34
0
def test_anova_kernel(degree):
    # compute exact kernel
    kernel = anova(X, Y, degree)
    # approximate kernel mapping
    rk_transform = SignedCirculantRandomKernel(n_components=1000,
                                               random_state=rng,
                                               kernel='anova',
                                               degree=degree)
    X_trans = rk_transform.fit_transform(X)
    Y_trans = rk_transform.transform(Y)
    kernel_approx = np.dot(X_trans, Y_trans.T)

    error = kernel - kernel_approx
    assert np.abs(np.mean(error)) < 0.0001
    assert np.max(error) < 0.001  # nothing too far off
    assert np.mean(error) < 0.0005  # mean is fairly close

    X_trans_sp = rk_transform.transform(X_sp)
    assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_random_fourier(gamma, n_components, use_offset):
    for gamma, n_components in zip([10, 100], [2048, 4096]):
        # compute exact kernel
        kernel = rbf_kernel(X, Y, gamma)
        # approximate kernel mapping
        rf_transform = RandomFourier(n_components=n_components,
                                     gamma=gamma,
                                     use_offset=True,
                                     random_state=0)
        X_trans = rf_transform.fit_transform(X)
        Y_trans = rf_transform.transform(Y)
        kernel_approx = np.dot(X_trans, Y_trans.T)

        error = kernel - kernel_approx
        assert np.abs(np.mean(error)) < 0.01
        assert np.max(error) < 0.1  # nothing too far off
        assert np.mean(error) < 0.05  # mean is fairly close
        # for sparse matrix
        X_trans_sp = rf_transform.transform(csr_matrix(X))
        assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_check_inverse():
    X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2))

    X_list = [X_dense,
              sparse.csr_matrix(X_dense),
              sparse.csc_matrix(X_dense)]

    for X in X_list:
        if sparse.issparse(X):
            accept_sparse = True
        else:
            accept_sparse = False
        trans = FunctionTransformer(func=np.sqrt,
                                    inverse_func=np.around,
                                    accept_sparse=accept_sparse,
                                    check_inverse=True,
                                    validate=True)
        assert_warns_message(UserWarning,
                             "The provided functions are not strictly"
                             " inverse of each other. If you are sure you"
                             " want to proceed regardless, set"
                             " 'check_inverse=False'.",
                             trans.fit, X)

        trans = FunctionTransformer(func=np.expm1,
                                    inverse_func=np.log1p,
                                    accept_sparse=accept_sparse,
                                    check_inverse=True,
                                    validate=True)
        Xt = assert_no_warnings(trans.fit_transform, X)
        assert_allclose_dense_sparse(X, trans.inverse_transform(Xt))

    # check that we don't check inverse when one of the func or inverse is not
    # provided.
    trans = FunctionTransformer(func=np.expm1, inverse_func=None,
                                check_inverse=True, validate=True)
    assert_no_warnings(trans.fit, X_dense)
    trans = FunctionTransformer(func=None, inverse_func=np.expm1,
                                check_inverse=True, validate=True)
    assert_no_warnings(trans.fit, X_dense)
Exemple #37
0
def test_imputation_constant_float(array_constructor):
    # Test imputation using the constant strategy on floats
    X = np.array([
        [np.nan, 1.1, 0, np.nan],
        [1.2, np.nan, 1.3, np.nan],
        [0, 0, np.nan, np.nan],
        [1.4, 1.5, 0, np.nan]
    ])

    X_true = np.array([
        [-1, 1.1, 0, -1],
        [1.2, -1, 1.3, -1],
        [0, 0, -1, -1],
        [1.4, 1.5, 0, -1]
    ])

    X = array_constructor(X)

    X_true = array_constructor(X_true)

    imputer = SimpleImputer(strategy="constant", fill_value=-1)
    X_trans = imputer.fit_transform(X)

    assert_allclose_dense_sparse(X_trans, X_true)
def test_pipeline_consistency(estimator, build_dataset,
                              with_preprocessor):
  # Adapted from scikit learn
  # check that make_pipeline(est) gives same score as est
  # we do this test on all except quadruplets (since they don't have a y
  # in fit):
  if estimator.__class__.__name__ not in [e.__class__.__name__
                                          for (e, _) in
                                          quadruplets_learners]:
    input_data, y, preprocessor, _ = build_dataset(with_preprocessor)

    def make_random_state(estimator, in_pipeline):
      rs = {}
      name_estimator = estimator.__class__.__name__
      if name_estimator[-11:] == '_Supervised':
        name_param = 'random_state'
        if in_pipeline:
            name_param = name_estimator.lower() + '__' + name_param
        rs[name_param] = check_random_state(0)
      return rs

    estimator = clone(estimator)
    estimator.set_params(preprocessor=preprocessor)
    pipeline = make_pipeline(estimator)
    estimator.fit(*remove_y_quadruplets(estimator, input_data, y),
                  **make_random_state(estimator, False))
    pipeline.fit(*remove_y_quadruplets(estimator, input_data, y),
                 **make_random_state(estimator, True))

    if hasattr(estimator, 'score'):
      result = estimator.score(*remove_y_quadruplets(estimator,
                                                     input_data,
                                                     y))
      result_pipe = pipeline.score(*remove_y_quadruplets(estimator,
                                                         input_data,
                                                         y))
      assert_allclose_dense_sparse(result, result_pipe)

    if hasattr(estimator, 'predict'):
      result = estimator.predict(input_data)
      result_pipe = pipeline.predict(input_data)
      assert_allclose_dense_sparse(result, result_pipe)

    if issubclass(estimator.__class__, TransformerMixin):
      if hasattr(estimator, 'transform'):
        result = estimator.transform(input_data)
        result_pipe = pipeline.transform(input_data)
        assert_allclose_dense_sparse(result, result_pipe)
def test_function_sampler_identity(X, y):
    sampler = FunctionSampler()
    X_res, y_res = sampler.fit_sample(X, y)
    assert_allclose_dense_sparse(X_res, X)
    assert_array_equal(y_res, y)
def test_check_array_force_all_finite_valid(value, force_all_finite, retype):
    X = retype(np.arange(4).reshape(2, 2).astype(np.float))
    X[0, 0] = value
    X_checked = check_array(X, force_all_finite=force_all_finite,
                            accept_sparse=True)
    assert_allclose_dense_sparse(X, X_checked)
def test_as_float_array_nan(X):
    X[5, 0] = np.nan
    X[6, 1] = np.nan
    X_converted = as_float_array(X, force_all_finite='allow-nan')
    assert_allclose_dense_sparse(X_converted, X)