def test_anova_kernel_sparse_subset(degree): # compute exact kernel n_components = 2000 * 5 n_sub_features = 25 gram = anova(X_sp, Y_sp, degree, True) # approximate kernel mapping rk_transform = SubfeatureRandomKernel(n_components=n_components, random_state=rng, kernel='anova', degree=degree, distribution="rademacher", n_sub_features=n_sub_features) X_trans = rk_transform.fit_transform(X_sp) Y_trans = rk_transform.transform(Y_sp) assert_almost_equal(rk_transform.random_weights_.nnz, n_components * n_sub_features) kernel_approx = safe_sparse_dot(X_trans, Y_trans.T, dense_output=True) error = gram - kernel_approx assert np.abs(np.mean(error)) < 0.001 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.005 # mean is fairly close assert_almost_equal(n_sub_features * n_components, rk_transform.random_weights_.nnz) assert_allclose_dense_sparse( n_sub_features * np.ones(n_components), np.array(abs(rk_transform.random_weights_).sum(axis=0))[0])
def test_check_array_force_all_finite_valid(value, force_all_finite, retype): X = retype(np.arange(4).reshape(2, 2).astype(np.float)) X[0, 0] = value X_checked = check_array(X, force_all_finite=force_all_finite, accept_sparse=True) assert_allclose_dense_sparse(X, X_checked)
def test_safe_indexing_1d_container_mask(array_type, indices_type): indices = [False] + [True] * 2 + [False] * 6 array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type) indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=0) assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
def check_pipeline_consistency(name, estimator_orig): if estimator_orig._get_tags()['non_deterministic']: msg = name + ' is non deterministic' raise SkipTest(msg) # check that make_pipeline(est) gives same score as est X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]], random_state=0, n_features=2, cluster_std=0.1) X -= X.min() X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel) estimator = clone(estimator_orig) y = multioutput_estimator_convert_y_2d(estimator, y) set_random_state(estimator) pipeline = make_pipeline(estimator) estimator.fit(X, y) pipeline.fit(X, y) funcs = ["score", "fit_transform"] for func_name in funcs: func = getattr(estimator, func_name, None) if func is not None: func_pipeline = getattr(pipeline, func_name) result = func(X, y) result_pipe = func_pipeline(X, y) assert_allclose_dense_sparse(result, result_pipe)
def test_signed_circulant_random_matrix_for_dot(): # compute exact kernel kernel = np.dot(X, Y.T) # approximate kernel mapping n_components = X.shape[1] transformer = SignedCirculantRandomMatrix(n_components=n_components, random_fourier=False, random_state=0) X_trans = transformer.fit_transform(X) Y_trans = transformer.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.01 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.05 # mean is fairly close # for sparse matrix X_trans_sp = transformer.transform(csr_matrix(X)) assert_allclose_dense_sparse(X_trans, X_trans_sp) # comparing naive computation circ = circulant(ifft(transformer.random_weights_[0]).real) circ *= transformer.random_sign_.T X_trans_naive = np.dot(X, circ.T) / np.sqrt(n_components) assert_allclose(X_trans, X_trans_naive)
def test_function_sampler_func(X, y): def func(X, y): return X[:10], y[:10] sampler = FunctionSampler(func=func) X_res, y_res = sampler.fit_resample(X, y) assert_allclose_dense_sparse(X_res, X[:10]) assert_array_equal(y_res, y[:10])
def test_function_sampler_func(X, y): def func(X, y): return X[:10], y[:10] sampler = FunctionSampler(func=func) X_res, y_res = sampler.fit_sample(X, y) assert_allclose_dense_sparse(X_res, X[:10]) assert_array_equal(y_res, y[:10])
def check_fit_idempotent(name, estimator_orig): # Check that est.fit(X) is the same as est.fit(X).fit(X). Ideally we would # check that the estimated parameters during training (e.g. coefs_) are # the same, but having a universal comparison function for those # attributes is difficult and full of edge cases. So instead we check that # predict(), predict_proba(), decision_function() and transform() return # the same results. check_methods = ["predict", "transform", "decision_function", "predict_proba"] rng = np.random.RandomState(0) if estimator_orig._get_tags()['non_deterministic']: msg = name + ' is non deterministic' raise SkipTest(msg) estimator = clone(estimator_orig) set_random_state(estimator) if 'warm_start' in estimator.get_params().keys(): estimator.set_params(warm_start=False) n_samples = 100 X, _ = _create_small_ts_dataset() X = X.reshape((X.shape[0], X.shape[1])) X = pairwise_estimator_convert_X(X, estimator) if is_regressor(estimator_orig): y = rng.normal(size=n_samples) else: y = rng.randint(low=0, high=2, size=n_samples) train, test = next(ShuffleSplit(test_size=.2, random_state=rng).split(X)) X_train, y_train = _safe_split(estimator, X, y, train) X_test, y_test = _safe_split(estimator, X, y, test, train) # Fit for the first time estimator.fit(X_train, y_train) result = {method: getattr(estimator, method)(X_test) for method in check_methods if hasattr(estimator, method)} # Fit again set_random_state(estimator) estimator.fit(X_train, y_train) for method in check_methods: if hasattr(estimator, method): new_result = getattr(estimator, method)(X_test) if np.issubdtype(new_result.dtype, np.floating): tol = 2*np.finfo(new_result.dtype).eps else: tol = 2*np.finfo(np.float64).eps assert_allclose_dense_sparse( result[method], new_result, atol=max(tol, 1e-9), rtol=max(tol, 1e-7), err_msg="Idempotency check failed for method {}".format(method) )
def test_safe_indexing_1d_container(array_type, indices_type): indices = [1, 2] if indices_type == 'slice' and isinstance(indices[1], int): indices[1] += 1 array = _convert_container([1, 2, 3, 4, 5, 6, 7, 8, 9], array_type) indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=0) assert_allclose_dense_sparse(subset, _convert_container([2, 3], array_type))
def test_tfidf_transformer_sparse(): X = sparse.rand(10, 20000, dtype=np.float64, random_state=42) X_csc = sparse.csc_matrix(X) X_csr = sparse.csr_matrix(X) X_trans_csc = TfidfTransformer().fit_transform(X_csc) X_trans_csr = TfidfTransformer().fit_transform(X_csr) assert_allclose_dense_sparse(X_trans_csc, X_trans_csr) assert X_trans_csc.format == X_trans_csr.format
def test_safe_indexing_2d_mask(array_type, indices_type, axis, expected_subset): columns_name = ['col_0', 'col_1', 'col_2'] array = _convert_container([[1, 2, 3], [4, 5, 6], [7, 8, 9]], array_type, columns_name) indices = [False, True, True] indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=axis) assert_allclose_dense_sparse( subset, _convert_container(expected_subset, array_type))
def test_function_sampler_func_kwargs(X, y): def func(X, y, ratio, random_state): rus = RandomUnderSampler(ratio=ratio, random_state=random_state) return rus.fit_sample(X, y) sampler = FunctionSampler(func=func, kw_args={'ratio': 'auto', 'random_state': 0}) X_res, y_res = sampler.fit_sample(X, y) X_res_2, y_res_2 = RandomUnderSampler(random_state=0).fit_sample(X, y) assert_allclose_dense_sparse(X_res, X_res_2) assert_array_equal(y_res, y_res_2)
def test_safe_indexing_2d_read_only_axis_1(array_read_only, indices_read_only, array_type, indices_type, axis, expected_array): array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) if array_read_only: array.setflags(write=False) array = _convert_container(array, array_type) indices = np.array([1, 2]) if indices_read_only: indices.setflags(write=False) indices = _convert_container(indices, indices_type) subset = _safe_indexing(array, indices, axis=axis) assert_allclose_dense_sparse( subset, _convert_container(expected_array, array_type))
def test_column_transformer_sparse_array(): X_sparse = sparse.eye(3, 2).tocsr() # no distinction between 1D and 2D X_res_first = X_sparse[:, 0] X_res_both = X_sparse for col in [0, [0], slice(0, 1)]: for remainder, res in [('drop', X_res_first), ('passthrough', X_res_both)]: ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder, sparse_threshold=0.8) assert_true(sparse.issparse(ct.fit_transform(X_sparse))) assert_allclose_dense_sparse(ct.fit_transform(X_sparse), res) assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse), res) for col in [[0, 1], slice(0, 2)]: ct = ColumnTransformer([('trans', Trans(), col)], sparse_threshold=0.8) assert_true(sparse.issparse(ct.fit_transform(X_sparse))) assert_allclose_dense_sparse(ct.fit_transform(X_sparse), X_res_both) assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse), X_res_both)
def test_column_transformer_sparse_array(): X_sparse = sparse.eye(3, 2).tocsr() # no distinction between 1D and 2D X_res_first = X_sparse[:, 0] X_res_both = X_sparse for col in [0, [0], slice(0, 1)]: for remainder, res in [('drop', X_res_first), ('passthrough', X_res_both)]: ct = ColumnTransformer([('trans', Trans(), col)], remainder=remainder, sparse_threshold=0.8) assert sparse.issparse(ct.fit_transform(X_sparse)) assert_allclose_dense_sparse(ct.fit_transform(X_sparse), res) assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse), res) for col in [[0, 1], slice(0, 2)]: ct = ColumnTransformer([('trans', Trans(), col)], sparse_threshold=0.8) assert sparse.issparse(ct.fit_transform(X_sparse)) assert_allclose_dense_sparse(ct.fit_transform(X_sparse), X_res_both) assert_allclose_dense_sparse(ct.fit(X_sparse).transform(X_sparse), X_res_both)
def test_safe_sparse_dot_dense_output(dense_output): rng = np.random.RandomState(0) A = sparse.random(30, 10, density=0.1, random_state=rng) B = sparse.random(10, 20, density=0.1, random_state=rng) expected = A.dot(B) actual = safe_sparse_dot(A, B, dense_output=dense_output) assert sparse.issparse(actual) == (not dense_output) if dense_output: expected = expected.toarray() assert_allclose_dense_sparse(actual, expected)
def test_20news_normalization(): try: X = datasets.fetch_20newsgroups_vectorized(normalize=False, download_if_missing=False) X_ = datasets.fetch_20newsgroups_vectorized(normalize=True, download_if_missing=False) except IOError: raise SkipTest("Download 20 newsgroups to run this test") X_norm = X_['data'][:100] X = X['data'][:100] assert_allclose_dense_sparse(X_norm, normalize(X)) assert np.allclose(np.linalg.norm(X_norm.todense(), axis=1), 1)
def test_imputers_add_indicator_sparse(imputer, marker): X = sparse.csr_matrix([[marker, 1, 5, marker, 1], [2, marker, 1, marker, 2], [6, 3, marker, marker, 3], [1, 2, 9, marker, 4]]) X_true_indicator = sparse.csr_matrix([[1., 0., 0., 1.], [0., 1., 0., 1.], [0., 0., 1., 1.], [0., 0., 0., 1.]]) imputer.set_params(missing_values=marker, add_indicator=True) X_trans = imputer.fit_transform(X) assert_allclose_dense_sparse(X_trans[:, -4:], X_true_indicator) assert_array_equal(imputer.indicator_.features_, np.array([0, 1, 2, 3])) imputer.set_params(add_indicator=False) X_trans_no_indicator = imputer.fit_transform(X) assert_allclose_dense_sparse(X_trans[:, :-4], X_trans_no_indicator)
def test_incremental_pca_batch_rank(): # Test sample size in each batch is always larger or equal to n_components rng = np.random.RandomState(1999) n_samples = 100 n_features = 20 X = rng.randn(n_samples, n_features) all_components = [] batch_sizes = np.arange(20, 90, 3) for batch_size in batch_sizes: ipca = IncrementalPCA(n_components=20, batch_size=batch_size).fit(X) all_components.append(ipca.components_) for components_i, components_j in zip(all_components[:-1], all_components[1:]): assert_allclose_dense_sparse(components_i, components_j)
def test_mb(dense_output): # compute exact kernel kernel = intersection(X, Y) # approximate kernel mapping mb_transform = MB(n_components=10000, dense_output=dense_output) X_trans = mb_transform.fit_transform(X) Y_trans = mb_transform.transform(Y) kernel_approx = safe_sparse_dot(X_trans, Y_trans.T, dense_output=dense_output) error = kernel - kernel_approx assert np.mean(np.abs(error)) < 50 / mb_transform.n_grids_ # for sparse matrix X_trans_sp = mb_transform.fit_transform(csr_matrix(X)) assert_allclose_dense_sparse(X_trans_sp, X_trans)
def test_subfeature_random_maclaurin_polynomial(degree): # compute exact kernel kernel = polynomial(X, Y, degree) # approximate kernel mapping rm_transform = SubfeatureRandomMaclaurin(n_components=500, degree=degree, random_state=rng, kernel='poly') X_trans = rm_transform.fit_transform(X) Y_trans = rm_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.001 assert np.max(error) < 0.01 # nothing too far off assert np.mean(error) < 0.005 # mean is fairly close X_trans_sp = rm_transform.transform(X_sp) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_subsampled_random_hadamard_for_dot(): # compute exact kernel kernel = np.dot(X, Y.T) # approximate kernel mapping rf_transform = SubsampledRandomHadamard(n_components=30, random_state=0) X_trans = rf_transform.fit_transform(X) Y_trans = rf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.01 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.05 # mean is fairly close # for sparse matrix X_trans_sp = rf_transform.transform(csr_matrix(X)) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_imputation_constant_float(array_constructor): # Test imputation using the constant strategy on floats X = np.array([[np.nan, 1.1, 0, np.nan], [1.2, np.nan, 1.3, np.nan], [0, 0, np.nan, np.nan], [1.4, 1.5, 0, np.nan]]) X_true = np.array([[-1, 1.1, 0, -1], [1.2, -1, 1.3, -1], [0, 0, -1, -1], [1.4, 1.5, 0, -1]]) X = array_constructor(X) X_true = array_constructor(X_true) imputer = SimpleImputer(strategy="constant", fill_value=-1) X_trans = imputer.fit_transform(X) assert_allclose_dense_sparse(X_trans, X_true)
def test_orthogonal_random_feature(gamma, n_components, use_offset): # compute exact kernel kernel = rbf_kernel(X, Y, gamma) # approximate kernel mapping rf_transform = OrthogonalRandomFeature(n_components=n_components, gamma=gamma, use_offset=use_offset, random_state=0) X_trans = rf_transform.fit_transform(X) Y_trans = rf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.01 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.05 # mean is fairly close # for sparse matrix X_trans_sp = rf_transform.transform(csr_matrix(X)) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_fastfood_for_dot(): # compute exact kernel kernel = np.dot(X, Y.T) # approximate kernel mapping rf_transform = FastFood(n_components=64, random_fourier=False, random_state=0) X_trans = rf_transform.fit_transform(X) Y_trans = rf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.01 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.05 # mean is fairly close # for sparse matrix X_trans_sp = rf_transform.transform(csr_matrix(X)) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_safe_indexing_mask_axis_1(array_type): # regression test for #14510 # check that boolean array-like and boolean array lead to the same indexing # even in NumPy < 1.12 if array_type == 'array': array_constructor = np.asarray elif array_type == 'sparse': array_constructor = sp.csr_matrix elif array_type == 'dataframe': pd = pytest.importorskip('pandas') array_constructor = pd.DataFrame X = array_constructor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) mask = [True, False, True] mask_array = np.array(mask) X_masked = safe_indexing(X, mask, axis=1) X_masked_array = safe_indexing(X, mask_array, axis=1) assert_allclose_dense_sparse(X_masked, X_masked_array)
def test_tensor_sketching(degree): # compute exact kernel kernel = polynomial(X, Y, degree) # approximate kernel mapping ts_transform = TensorSketch(n_components=1000, degree=degree, random_state=rng) X_trans = ts_transform.fit_transform(X) Y_trans = ts_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.001 assert np.max(error) < 0.01 # nothing too far off assert np.mean(error) < 0.005 # mean is fairly close X_trans_sp = ts_transform.transform(X_sp) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_assert_allclose_dense_sparse(): x = np.arange(9).reshape(3, 3) msg = "Not equal to tolerance " y = sparse.csc_matrix(x) for X in [x, y]: # basic compare assert_raise_message(AssertionError, msg, assert_allclose_dense_sparse, X, X * 2) assert_allclose_dense_sparse(X, X) assert_raise_message(ValueError, "Can only compare two sparse", assert_allclose_dense_sparse, x, y) A = sparse.diags(np.ones(5), offsets=0).tocsr() B = sparse.csr_matrix(np.ones((1, 5))) assert_raise_message(AssertionError, "Arrays are not equal", assert_allclose_dense_sparse, B, A)
def test_subfeature_random_maclaurin_polynomial_bias_h01(bias, degree): # compute exact kernel kernel = polynomial(X, Y, degree, bias=bias) # approximate kernel mapping print('bias: {} degree: {}'.format(bias, degree)) rm_transform = SubfeatureRandomMaclaurin(n_components=5000, degree=degree, n_sub_features=10, random_state=rng, kernel='poly', bias=bias, h01=True) X_trans = rm_transform.fit_transform(X) Y_trans = rm_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.01 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.05 # mean is fairly close X_trans_sp = rm_transform.transform(X_sp) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_subfeature_random_maclaurin_exp(): # compute exact kernel kernel = exp_kernel(X, Y, 0.1) # approximate kernel mapping rm_transform = SubfeatureRandomMaclaurin(n_components=10000, n_sub_features=10, random_state=rng, kernel='exp', gamma=0.1) X_trans = rm_transform.fit_transform(X) Y_trans = rm_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.01 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.05 # mean is fairly close X_trans_sp = rm_transform.transform(X_sp) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_anova_kernel(degree): # compute exact kernel kernel = anova(X, Y, degree) # approximate kernel mapping rk_transform = SignedCirculantRandomKernel(n_components=1000, random_state=rng, kernel='anova', degree=degree) X_trans = rk_transform.fit_transform(X) Y_trans = rk_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.0001 assert np.max(error) < 0.001 # nothing too far off assert np.mean(error) < 0.0005 # mean is fairly close X_trans_sp = rk_transform.transform(X_sp) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_random_fourier(gamma, n_components, use_offset): for gamma, n_components in zip([10, 100], [2048, 4096]): # compute exact kernel kernel = rbf_kernel(X, Y, gamma) # approximate kernel mapping rf_transform = RandomFourier(n_components=n_components, gamma=gamma, use_offset=True, random_state=0) X_trans = rf_transform.fit_transform(X) Y_trans = rf_transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) error = kernel - kernel_approx assert np.abs(np.mean(error)) < 0.01 assert np.max(error) < 0.1 # nothing too far off assert np.mean(error) < 0.05 # mean is fairly close # for sparse matrix X_trans_sp = rf_transform.transform(csr_matrix(X)) assert_allclose_dense_sparse(X_trans, X_trans_sp)
def test_check_inverse(): X_dense = np.array([1, 4, 9, 16], dtype=np.float64).reshape((2, 2)) X_list = [X_dense, sparse.csr_matrix(X_dense), sparse.csc_matrix(X_dense)] for X in X_list: if sparse.issparse(X): accept_sparse = True else: accept_sparse = False trans = FunctionTransformer(func=np.sqrt, inverse_func=np.around, accept_sparse=accept_sparse, check_inverse=True, validate=True) assert_warns_message(UserWarning, "The provided functions are not strictly" " inverse of each other. If you are sure you" " want to proceed regardless, set" " 'check_inverse=False'.", trans.fit, X) trans = FunctionTransformer(func=np.expm1, inverse_func=np.log1p, accept_sparse=accept_sparse, check_inverse=True, validate=True) Xt = assert_no_warnings(trans.fit_transform, X) assert_allclose_dense_sparse(X, trans.inverse_transform(Xt)) # check that we don't check inverse when one of the func or inverse is not # provided. trans = FunctionTransformer(func=np.expm1, inverse_func=None, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense) trans = FunctionTransformer(func=None, inverse_func=np.expm1, check_inverse=True, validate=True) assert_no_warnings(trans.fit, X_dense)
def test_imputation_constant_float(array_constructor): # Test imputation using the constant strategy on floats X = np.array([ [np.nan, 1.1, 0, np.nan], [1.2, np.nan, 1.3, np.nan], [0, 0, np.nan, np.nan], [1.4, 1.5, 0, np.nan] ]) X_true = np.array([ [-1, 1.1, 0, -1], [1.2, -1, 1.3, -1], [0, 0, -1, -1], [1.4, 1.5, 0, -1] ]) X = array_constructor(X) X_true = array_constructor(X_true) imputer = SimpleImputer(strategy="constant", fill_value=-1) X_trans = imputer.fit_transform(X) assert_allclose_dense_sparse(X_trans, X_true)
def test_pipeline_consistency(estimator, build_dataset, with_preprocessor): # Adapted from scikit learn # check that make_pipeline(est) gives same score as est # we do this test on all except quadruplets (since they don't have a y # in fit): if estimator.__class__.__name__ not in [e.__class__.__name__ for (e, _) in quadruplets_learners]: input_data, y, preprocessor, _ = build_dataset(with_preprocessor) def make_random_state(estimator, in_pipeline): rs = {} name_estimator = estimator.__class__.__name__ if name_estimator[-11:] == '_Supervised': name_param = 'random_state' if in_pipeline: name_param = name_estimator.lower() + '__' + name_param rs[name_param] = check_random_state(0) return rs estimator = clone(estimator) estimator.set_params(preprocessor=preprocessor) pipeline = make_pipeline(estimator) estimator.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, False)) pipeline.fit(*remove_y_quadruplets(estimator, input_data, y), **make_random_state(estimator, True)) if hasattr(estimator, 'score'): result = estimator.score(*remove_y_quadruplets(estimator, input_data, y)) result_pipe = pipeline.score(*remove_y_quadruplets(estimator, input_data, y)) assert_allclose_dense_sparse(result, result_pipe) if hasattr(estimator, 'predict'): result = estimator.predict(input_data) result_pipe = pipeline.predict(input_data) assert_allclose_dense_sparse(result, result_pipe) if issubclass(estimator.__class__, TransformerMixin): if hasattr(estimator, 'transform'): result = estimator.transform(input_data) result_pipe = pipeline.transform(input_data) assert_allclose_dense_sparse(result, result_pipe)
def test_function_sampler_identity(X, y): sampler = FunctionSampler() X_res, y_res = sampler.fit_sample(X, y) assert_allclose_dense_sparse(X_res, X) assert_array_equal(y_res, y)
def test_as_float_array_nan(X): X[5, 0] = np.nan X[6, 1] = np.nan X_converted = as_float_array(X, force_all_finite='allow-nan') assert_allclose_dense_sparse(X_converted, X)