Exemple #1
0
def test_one_hot_encoder_handle_unknown():
    X = np.array([[0, 2, 1], [1, 0, 3], [1, 0, 2]])
    X2 = np.array([[4, 1, 1]])

    # Test that one hot encoder raises error for unknown features
    # present during transform.
    oh = OneHotEncoder(handle_unknown='error')
    oh.fit(X)
    with pytest.raises(ValueError, match='Found unknown categories'):
        oh.transform(X2)

    # Test the ignore option, ignores unknown features (giving all 0's)
    oh = OneHotEncoder(handle_unknown='ignore')
    oh.fit(X)
    X2_passed = X2.copy()
    assert_array_equal(
        oh.transform(X2_passed).toarray(),
        np.array([[0.,  0.,  0.,  0.,  1.,  0.,  0.]]))
    # ensure transformed data was not modified in place
    assert_allclose(X2, X2_passed)

    # Raise error if handle_unknown is neither ignore or error.
    oh = OneHotEncoder(handle_unknown='42')
    with pytest.raises(ValueError, match='handle_unknown should be either'):
        oh.fit(X)
Exemple #2
0
def test_ridge_loo_cv_asym_scoring():
    # checking on asymmetric scoring
    scoring = 'explained_variance'
    n_samples, n_features = 10, 5
    n_targets = 1
    X, y = _make_sparse_offset_regression(n_samples=n_samples,
                                          n_features=n_features,
                                          n_targets=n_targets,
                                          random_state=0,
                                          shuffle=False,
                                          noise=1,
                                          n_informative=5)

    alphas = [1e-3, .1, 1., 10., 1e3]
    loo_ridge = RidgeCV(cv=n_samples,
                        fit_intercept=True,
                        alphas=alphas,
                        scoring=scoring,
                        normalize=True)

    gcv_ridge = RidgeCV(fit_intercept=True,
                        alphas=alphas,
                        scoring=scoring,
                        normalize=True)

    loo_ridge.fit(X, y)
    gcv_ridge.fit(X, y)

    assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_)
    assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
    assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
Exemple #3
0
def test_one_hot_encoder_pandas():
    pd = pytest.importorskip('pandas')

    X_df = pd.DataFrame({'A': ['a', 'b'], 'B': [1, 2]})

    Xtr = check_categorical_onehot(X_df)
    assert_allclose(Xtr, [[1, 0, 1, 0], [0, 1, 0, 1]])
Exemple #4
0
def test_ridge_gcv_vs_ridge_loo_cv(gcv_mode, X_constructor, X_shape, y_shape,
                                   fit_intercept, normalize, noise):
    n_samples, n_features = X_shape
    n_targets = y_shape[-1] if len(y_shape) == 2 else 1
    X, y = _make_sparse_offset_regression(n_samples=n_samples,
                                          n_features=n_features,
                                          n_targets=n_targets,
                                          random_state=0,
                                          shuffle=False,
                                          noise=noise,
                                          n_informative=5)
    y = y.reshape(y_shape)

    alphas = [1e-3, .1, 1., 10., 1e3]
    loo_ridge = RidgeCV(cv=n_samples,
                        fit_intercept=fit_intercept,
                        alphas=alphas,
                        scoring='neg_mean_squared_error',
                        normalize=normalize)
    gcv_ridge = RidgeCV(gcv_mode=gcv_mode,
                        fit_intercept=fit_intercept,
                        alphas=alphas,
                        normalize=normalize)

    loo_ridge.fit(X, y)

    X_gcv = X_constructor(X)
    gcv_ridge.fit(X_gcv, y)

    assert gcv_ridge.alpha_ == pytest.approx(loo_ridge.alpha_)
    assert_allclose(gcv_ridge.coef_, loo_ridge.coef_, rtol=1e-3)
    assert_allclose(gcv_ridge.intercept_, loo_ridge.intercept_, rtol=1e-3)
Exemple #5
0
def test_solver_consistency(solver, proportion_nonzero, n_samples, dtype,
                            sparse_X, seed):
    alpha = 1.
    noise = 50. if proportion_nonzero > .9 else 500.
    X, y = _make_sparse_offset_regression(
        bias=10,
        n_features=30,
        proportion_nonzero=proportion_nonzero,
        noise=noise,
        random_state=seed,
        n_samples=n_samples)
    svd_ridge = Ridge(solver='svd', normalize=True, alpha=alpha).fit(X, y)
    X = X.astype(dtype, copy=False)
    y = y.astype(dtype, copy=False)
    if sparse_X:
        X = sp.csr_matrix(X)
    if solver == 'ridgecv':
        ridge = RidgeCV(alphas=[alpha], normalize=True)
    else:
        ridge = Ridge(solver=solver, tol=1e-10, normalize=True, alpha=alpha)
    ridge.fit(X, y)
    assert_allclose(ridge.coef_, svd_ridge.coef_, atol=1e-3, rtol=1e-3)
    assert_allclose(ridge.intercept_,
                    svd_ridge.intercept_,
                    atol=1e-3,
                    rtol=1e-3)
Exemple #6
0
def test_ridge_regression_dtype_stability(solver, seed):
    random_state = np.random.RandomState(seed)
    n_samples, n_features = 6, 5
    X = random_state.randn(n_samples, n_features)
    coef = random_state.randn(n_features)
    y = np.dot(X, coef) + 0.01 * random_state.randn(n_samples)
    alpha = 1.0
    results = dict()
    # XXX: Sparse CG seems to be far less numerically stable than the
    # others, maybe we should not enable float32 for this one.
    atol = 1e-3 if solver == "sparse_cg" else 1e-5
    for current_dtype in (np.float32, np.float64):
        results[current_dtype] = ridge_regression(X.astype(current_dtype),
                                                  y.astype(current_dtype),
                                                  alpha=alpha,
                                                  solver=solver,
                                                  random_state=random_state,
                                                  sample_weight=None,
                                                  max_iter=500,
                                                  tol=1e-10,
                                                  return_n_iter=False,
                                                  return_intercept=False)

    assert results[np.float32].dtype == np.float32
    assert results[np.float64].dtype == np.float64
    assert_allclose(results[np.float32], results[np.float64], atol=atol)
Exemple #7
0
def test_knn_imputer_zero_nan_imputes_the_same(na):
    # Test with an imputable matrix and compare with different missing_values
    X_zero = np.array([
        [1, 0, 1, 1, 1.],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 0],
        [6, 6, 0, 6, 6],
    ])

    X_nan = np.array([
        [1, na, 1, 1, 1.],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, na],
        [6, 6, na, 6, 6],
    ])

    X_imputed = np.array([
        [1, 2.5, 1, 1, 1.],
        [2, 2, 2, 2, 2],
        [3, 3, 3, 3, 1.5],
        [6, 6, 2.5, 6, 6],
    ])

    imputer_zero = KNNImputer(missing_values=0,
                              n_neighbors=2,
                              weights="uniform")

    imputer_nan = KNNImputer(missing_values=na,
                             n_neighbors=2,
                             weights="uniform")

    assert_allclose(imputer_zero.fit_transform(X_zero), X_imputed)
    assert_allclose(imputer_zero.fit_transform(X_zero),
                    imputer_nan.fit_transform(X_nan))
Exemple #8
0
def test_stacking_regressor_diabetes(cv, final_estimator, predict_params,
                                     passthrough):
    # prescale the data to avoid convergence warning without using a pipeline
    # for later assert
    X_train, X_test, y_train, _ = train_test_split(scale(X_diabetes),
                                                   y_diabetes,
                                                   random_state=42)
    estimators = [('lr', LinearRegression()), ('svr', LinearSVR())]
    reg = StackingRegressor(estimators=estimators,
                            final_estimator=final_estimator,
                            cv=cv,
                            passthrough=passthrough)
    reg.fit(X_train, y_train)
    result = reg.predict(X_test, **predict_params)
    expected_result_length = 2 if predict_params else 1
    if predict_params:
        assert len(result) == expected_result_length

    X_trans = reg.transform(X_test)
    expected_column_count = 12 if passthrough else 2
    assert X_trans.shape[1] == expected_column_count
    if passthrough:
        assert_allclose(X_test, X_trans[:, -10:])

    reg.set_params(lr='drop')
    reg.fit(X_train, y_train)
    reg.predict(X_test)

    X_trans = reg.transform(X_test)
    expected_column_count_drop = 11 if passthrough else 1
    assert X_trans.shape[1] == expected_column_count_drop
    if passthrough:
        assert_allclose(X_test, X_trans[:, -10:])
def test_ovr_decision_function():
    # test properties for ovr decision function

    predictions = np.array([[0, 1, 1], [0, 1, 0], [0, 1, 1], [0, 1, 1]])

    confidences = np.array([[-1e16, 0, -1e16], [1., 2., -3.], [-5., 2., 5.],
                            [-0.5, 0.2, 0.5]])

    n_classes = 3

    dec_values = _ovr_decision_function(predictions, confidences, n_classes)

    # check that the decision values are within 0.5 range of the votes
    votes = np.array([[1, 0, 2], [1, 1, 1], [1, 0, 2], [1, 0, 2]])

    assert_allclose(votes, dec_values, atol=0.5)

    # check that the prediction are what we expect
    # highest vote or highest confidence if there is a tie.
    # for the second sample we have a tie (should be won by 1)
    expected_prediction = np.array([2, 1, 2, 2])
    assert_array_equal(np.argmax(dec_values, axis=1), expected_prediction)

    # third and fourth sample have the same vote but third sample
    # has higher confidence, this should reflect on the decision values
    assert (dec_values[2, 2] > dec_values[3, 2])

    # assert subset invariance.
    dec_values_one = [
        _ovr_decision_function(np.array([predictions[i]]),
                               np.array([confidences[i]]), n_classes)[0]
        for i in range(4)
    ]

    assert_allclose(dec_values, dec_values_one, atol=1e-6)
Exemple #10
0
def test_iterative_imputer_additive_matrix():
    rng = np.random.RandomState(0)
    n = 100
    d = 10
    A = rng.randn(n, d)
    B = rng.randn(n, d)
    X_filled = np.zeros(A.shape)
    for i in range(d):
        for j in range(d):
            X_filled[:, (i+j) % d] += (A[:, i] + B[:, j]) / 2
    # a quarter is randomly missing
    nan_mask = rng.rand(n, d) < 0.25
    X_missing = X_filled.copy()
    X_missing[nan_mask] = np.nan

    # split up data
    n = n // 2
    X_train = X_missing[:n]
    X_test_filled = X_filled[n:]
    X_test = X_missing[n:]

    imputer = IterativeImputer(max_iter=10,
                               verbose=1,
                               random_state=rng).fit(X_train)
    X_test_est = imputer.transform(X_test)
    assert_allclose(X_test_filled, X_test_est, rtol=1e-3, atol=0.01)
Exemple #11
0
def test_stacking_with_sample_weight(stacker, X, y):
    # check that sample weights has an influence on the fitting
    # note: ConvergenceWarning are catch since we are not worrying about the
    # convergence here
    n_half_samples = len(y) // 2
    total_sample_weight = np.array([0.1] * n_half_samples + [0.9] *
                                   (len(y) - n_half_samples))
    X_train, X_test, y_train, _, sample_weight_train, _ = train_test_split(
        X, y, total_sample_weight, random_state=42)

    with ignore_warnings(category=ConvergenceWarning):
        stacker.fit(X_train, y_train)
    y_pred_no_weight = stacker.predict(X_test)

    with ignore_warnings(category=ConvergenceWarning):
        stacker.fit(X_train, y_train, sample_weight=np.ones(y_train.shape))
    y_pred_unit_weight = stacker.predict(X_test)

    assert_allclose(y_pred_no_weight, y_pred_unit_weight)

    with ignore_warnings(category=ConvergenceWarning):
        stacker.fit(X_train, y_train, sample_weight=sample_weight_train)
    y_pred_biased = stacker.predict(X_test)

    assert np.abs(y_pred_no_weight - y_pred_biased).sum() > 0
Exemple #12
0
def test_iterative_imputer_early_stopping():
    rng = np.random.RandomState(0)
    n = 50
    d = 5
    A = rng.rand(n, 1)
    B = rng.rand(1, d)
    X = np.dot(A, B)
    nan_mask = rng.rand(n, d) < 0.5
    X_missing = X.copy()
    X_missing[nan_mask] = np.nan

    imputer = IterativeImputer(max_iter=100,
                               tol=1e-2,
                               sample_posterior=False,
                               verbose=1,
                               random_state=rng)
    X_filled_100 = imputer.fit_transform(X_missing)
    assert len(imputer.imputation_sequence_) == d * imputer.n_iter_

    imputer = IterativeImputer(max_iter=imputer.n_iter_,
                               sample_posterior=False,
                               verbose=1,
                               random_state=rng)
    X_filled_early = imputer.fit_transform(X_missing)
    assert_allclose(X_filled_100, X_filled_early, atol=1e-7)

    imputer = IterativeImputer(max_iter=100,
                               tol=0,
                               sample_posterior=False,
                               verbose=1,
                               random_state=rng)
    imputer.fit(X_missing)
    assert imputer.n_iter_ == imputer.max_iter
Exemple #13
0
def test_iterative_imputer_skip_non_missing(skip_complete):
    # check the imputing strategy when missing data are present in the
    # testing set only.
    # taken from: https://github.com/scikit-learn/scikit-learn/issues/14383
    rng = np.random.RandomState(0)
    X_train = np.array([
        [5, 2, 2, 1],
        [10, 1, 2, 7],
        [3, 1, 1, 1],
        [8, 4, 2, 2]
    ])
    X_test = np.array([
        [np.nan, 2, 4, 5],
        [np.nan, 4, 1, 2],
        [np.nan, 1, 10, 1]
    ])
    imputer = IterativeImputer(
        initial_strategy='mean', skip_complete=skip_complete, random_state=rng
    )
    X_test_est = imputer.fit(X_train).transform(X_test)
    if skip_complete:
        # impute with the initial strategy: 'mean'
        assert_allclose(X_test_est[:, 0], np.mean(X_train[:, 0]))
    else:
        assert_allclose(X_test_est[:, 0], [11, 7, 12], rtol=1e-4)
Exemple #14
0
def test_check_sample_weight():
    # check array order
    sample_weight = np.ones(10)[::2]
    assert not sample_weight.flags["C_CONTIGUOUS"]
    sample_weight = _check_sample_weight(sample_weight, X=np.ones((5, 1)))
    assert sample_weight.flags["C_CONTIGUOUS"]

    # check None input
    sample_weight = _check_sample_weight(None, X=np.ones((5, 2)))
    assert_allclose(sample_weight, np.ones(5))

    # check numbers input
    sample_weight = _check_sample_weight(2.0, X=np.ones((5, 2)))
    assert_allclose(sample_weight, 2 * np.ones(5))

    # check wrong number of dimensions
    with pytest.raises(ValueError,
                       match="Sample weights must be 1D array or scalar"):
        _check_sample_weight(np.ones((2, 4)), X=np.ones((2, 2)))

    # check incorrect n_samples
    msg = r"sample_weight.shape == \(4,\), expected \(2,\)!"
    with pytest.raises(ValueError, match=msg):
        _check_sample_weight(np.ones(4), X=np.ones((2, 2)))

    # float32 dtype is preserved
    X = np.ones((5, 2))
    sample_weight = np.ones(5, dtype=np.float32)
    sample_weight = _check_sample_weight(sample_weight, X)
    assert sample_weight.dtype == np.float32

    # int dtype will be converted to float64 instead
    X = np.ones((5, 2), dtype=np.int)
    sample_weight = _check_sample_weight(None, X, dtype=X.dtype)
    assert sample_weight.dtype == np.float64
Exemple #15
0
def test_dtype_match(solver):
    rng = np.random.RandomState(0)
    alpha = 1.0

    n_samples, n_features = 6, 5
    X_64 = rng.randn(n_samples, n_features)
    y_64 = rng.randn(n_samples)
    X_32 = X_64.astype(np.float32)
    y_32 = y_64.astype(np.float32)

    tol = 2 * np.finfo(np.float32).resolution
    # Check type consistency 32bits
    ridge_32 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=tol)
    ridge_32.fit(X_32, y_32)
    coef_32 = ridge_32.coef_

    # Check type consistency 64 bits
    ridge_64 = Ridge(alpha=alpha, solver=solver, max_iter=500, tol=tol)
    ridge_64.fit(X_64, y_64)
    coef_64 = ridge_64.coef_

    # Do the actual checks at once for easier debug
    assert coef_32.dtype == X_32.dtype
    assert coef_64.dtype == X_64.dtype
    assert ridge_32.predict(X_32).dtype == X_32.dtype
    assert ridge_64.predict(X_64).dtype == X_64.dtype
    assert_allclose(ridge_32.coef_, ridge_64.coef_, rtol=1e-4, atol=5e-4)
def test_inverse_transform(algo, X_sparse):
    # We need a lot of components for the reconstruction to be "almost
    # equal" in all positions. XXX Test means or sums instead?
    tsvd = TruncatedSVD(n_components=52, random_state=42, algorithm=algo)
    Xt = tsvd.fit_transform(X_sparse)
    Xinv = tsvd.inverse_transform(Xt)
    assert_allclose(Xinv, X_sparse.toarray(), rtol=1e-1, atol=2e-1)
Exemple #17
0
def test_threshold_and_max_features():
    X, y = datasets.make_classification(n_samples=1000,
                                        n_features=10,
                                        n_informative=3,
                                        n_redundant=0,
                                        n_repeated=0,
                                        shuffle=False,
                                        random_state=0)
    est = RandomForestClassifier(n_estimators=50, random_state=0)

    transformer1 = SelectFromModel(estimator=est,
                                   max_features=3,
                                   threshold=-np.inf)
    X_new1 = transformer1.fit_transform(X, y)

    transformer2 = SelectFromModel(estimator=est, threshold=0.04)
    X_new2 = transformer2.fit_transform(X, y)

    transformer3 = SelectFromModel(estimator=est,
                                   max_features=3,
                                   threshold=0.04)
    X_new3 = transformer3.fit_transform(X, y)
    assert X_new3.shape[1] == min(X_new1.shape[1], X_new2.shape[1])
    selected_indices = transformer3.transform(
        np.arange(X.shape[1])[np.newaxis, :])
    assert_allclose(X_new3, X[:, selected_indices[0]])
Exemple #18
0
def test_knn_imputer_distance_weighted_not_enough_neighbors(
        na, working_memory):
    X = np.array([[3, na], [2, na], [na, 4], [5, 6], [6, 8], [na, 5]])

    dist = pairwise_distances(X,
                              metric="nan_euclidean",
                              squared=False,
                              missing_values=na)

    X_01 = np.average(X[3:5, 1], weights=1 / dist[0, 3:5])
    X_11 = np.average(X[3:5, 1], weights=1 / dist[1, 3:5])
    X_20 = np.average(X[3:5, 0], weights=1 / dist[2, 3:5])
    X_50 = np.average(X[3:5, 0], weights=1 / dist[5, 3:5])

    X_expected = np.array([[3, X_01], [2, X_11], [X_20, 4], [5, 6], [6, 8],
                           [X_50, 5]])

    with config_context(working_memory=working_memory):
        knn_3 = KNNImputer(missing_values=na,
                           n_neighbors=3,
                           weights='distance')
        assert_allclose(knn_3.fit_transform(X), X_expected)

        knn_4 = KNNImputer(missing_values=na,
                           n_neighbors=4,
                           weights='distance')
        assert_allclose(knn_4.fit_transform(X), X_expected)
Exemple #19
0
def test_iterative_imputer_all_missing():
    n = 100
    d = 3
    X = np.zeros((n, d))
    imputer = IterativeImputer(missing_values=0, max_iter=1)
    X_imputed = imputer.fit_transform(X)
    assert_allclose(X_imputed, imputer.initial_imputer_.transform(X))
Exemple #20
0
def test_transform_target_regressor_multi_to_single():
    X = friedman[0]
    y = np.transpose([friedman[1], (friedman[1]**2 + 1)])

    def func(y):
        out = np.sqrt(y[:, 0]**2 + y[:, 1]**2)
        return out[:, np.newaxis]

    def inverse_func(y):
        return y

    tt = TransformedTargetRegressor(func=func,
                                    inverse_func=inverse_func,
                                    check_inverse=False)
    tt.fit(X, y)
    y_pred_2d_func = tt.predict(X)
    assert y_pred_2d_func.shape == (100, 1)

    # force that the function only return a 1D array
    def func(y):
        return np.sqrt(y[:, 0]**2 + y[:, 1]**2)

    tt = TransformedTargetRegressor(func=func,
                                    inverse_func=inverse_func,
                                    check_inverse=False)
    tt.fit(X, y)
    y_pred_1d_func = tt.predict(X)
    assert y_pred_1d_func.shape == (100, 1)

    assert_allclose(y_pred_1d_func, y_pred_2d_func)
Exemple #21
0
def test_pca_svd_solver_auto(data, n_components, expected_solver):
    pca_auto = PCA(n_components=n_components, random_state=0)
    pca_test = PCA(n_components=n_components,
                   svd_solver=expected_solver,
                   random_state=0)
    pca_auto.fit(data)
    pca_test.fit(data)
    assert_allclose(pca_auto.components_, pca_test.components_)
Exemple #22
0
def test_pca_check_projection_list(svd_solver):
    # Test that the projection of data is correct
    X = [[1.0, 0.0], [0.0, 1.0]]
    pca = PCA(n_components=1, svd_solver=svd_solver, random_state=0)
    X_trans = pca.fit_transform(X)
    assert X_trans.shape, (2, 1)
    assert_allclose(X_trans.mean(), 0.00, atol=1e-12)
    assert_allclose(X_trans.std(), 0.71, rtol=5e-3)
Exemple #23
0
def test_pca_explained_variance_empirical(X, svd_solver):
    pca = PCA(n_components=2, svd_solver=svd_solver, random_state=0)
    X_pca = pca.fit_transform(X)
    assert_allclose(pca.explained_variance_, np.var(X_pca, ddof=1, axis=0))

    expected_result = np.linalg.eig(np.cov(X, rowvar=False))[0]
    expected_result = sorted(expected_result, reverse=True)[:2]
    assert_allclose(pca.explained_variance_, expected_result, rtol=5e-3)
Exemple #24
0
def test_pca_score_consistency_solvers(svd_solver):
    # Check the consistency of score between solvers
    X, _ = datasets.load_digits(return_X_y=True)
    pca_full = PCA(n_components=30, svd_solver='full', random_state=0)
    pca_other = PCA(n_components=30, svd_solver=svd_solver, random_state=0)
    pca_full.fit(X)
    pca_other.fit(X)
    assert_allclose(pca_full.score(X), pca_other.score(X), rtol=5e-6)
Exemple #25
0
def test_knn_imputer_drops_all_nan_features(na):
    X1 = np.array([[na, 1], [na, 2]])
    knn = KNNImputer(missing_values=na, n_neighbors=1)
    X1_expected = np.array([[1], [2]])
    assert_allclose(knn.fit_transform(X1), X1_expected)

    X2 = np.array([[1, 2], [3, na]])
    X2_expected = np.array([[2], [1.5]])
    assert_allclose(knn.transform(X2), X2_expected)
Exemple #26
0
def test_knn_imputer_one_n_neighbors(na):

    X = np.array([[0, 0], [na, 2], [4, 3], [5, na], [7, 7], [na, 8], [14, 13]])

    X_imputed = np.array([[0, 0], [4, 2], [4, 3], [5, 3], [7, 7], [7, 8],
                          [14, 13]])

    imputer = KNNImputer(n_neighbors=1, missing_values=na)

    assert_allclose(imputer.fit_transform(X), X_imputed)
Exemple #27
0
def test_nrm2(dtype):
    nrm2 = _nrm2_memview[NUMPY_TO_CYTHON[dtype]]

    rng = np.random.RandomState(0)
    x = rng.random_sample(10).astype(dtype, copy=False)

    expected = np.linalg.norm(x)
    actual = nrm2(x)

    assert_allclose(actual, expected, rtol=RTOL[dtype])
Exemple #28
0
def test_asum(dtype):
    asum = _asum_memview[NUMPY_TO_CYTHON[dtype]]

    rng = np.random.RandomState(0)
    x = rng.random_sample(10).astype(dtype, copy=False)

    expected = np.abs(x).sum()
    actual = asum(x)

    assert_allclose(actual, expected, rtol=RTOL[dtype])
Exemple #29
0
def test_pca_deterministic_output(svd_solver):
    rng = np.random.RandomState(0)
    X = rng.rand(10, 10)

    transformed_X = np.zeros((20, 2))
    for i in range(20):
        pca = PCA(n_components=2, svd_solver=svd_solver, random_state=rng)
        transformed_X[i, :] = pca.fit_transform(X)[0]
    assert_allclose(transformed_X,
                    np.tile(transformed_X[0, :], 20).reshape(20, 2))
Exemple #30
0
def test_precomputed_dists():
    redX = X[::2]
    dists = pairwise_distances(redX, metric='euclidean')
    clust1 = OPTICS(min_samples=10, algorithm='brute',
                    metric='precomputed').fit(dists)
    clust2 = OPTICS(min_samples=10, algorithm='brute',
                    metric='euclidean').fit(redX)

    assert_allclose(clust1.reachability_, clust2.reachability_)
    assert_array_equal(clust1.labels_, clust2.labels_)