Esempio n. 1
0
def test_group_pca_matches_sklearn_iris_results(n_components, exclude_groups):
    X = iris.data
    pca = PCA(n_components=n_components)

    X_2 = np.hstack([X, X, X])
    groups = [
        np.arange(0, X.shape[1]),
        np.arange(X.shape[1], 2 * X.shape[1]),
        np.arange(2 * X.shape[1], 3 * X.shape[1]),
    ]

    gpca = GroupPCA(n_components=n_components,
                    groups=groups,
                    exclude_groups=exclude_groups)

    # _ = gpca.fit_transform(X_2)

    X_r = pca.fit_transform(X)
    X_r2 = gpca.fit_transform(X_2)

    hstack_list = [X_r, X_r, X_r]
    if exclude_groups is not None:
        if isinstance(exclude_groups, int):
            hstack_list[exclude_groups] = X
        else:
            for idx in exclude_groups:
                hstack_list[idx] = X

    assert_allclose(X_r2, np.hstack(hstack_list))
Esempio n. 2
0
def test_theta_equals_zero_equals_unsupervised(binarize):
    n_components = 3
    diabetes = load_diabetes()

    X = diabetes.data
    y = diabetes.target

    if binarize:
        y = (y > np.median(y)).astype(int)

    groups = [np.arange(0, X.shape[1]), np.arange(X.shape[1], 2 * X.shape[1])]
    X = np.hstack([X, X])

    gpca = GroupPCA(n_components=n_components, groups=groups)
    sgpca = SupervisedGroupPCA(n_components=n_components,
                               groups=groups,
                               theta=0.0)

    X_r = gpca.fit_transform(X, y)
    X_rs = sgpca.fit_transform(X, y)

    assert_allclose(X_r, X_rs)
Esempio n. 3
0
def test_group_pca_inverse_matches_input():
    X = iris.data
    X_2 = np.hstack([X, X])
    groups = [np.arange(0, X.shape[1]), np.arange(X.shape[1], 2 * X.shape[1])]

    gpca = GroupPCA(groups=groups, group_names=["one", "two"]).fit(X_2)
    Y = gpca.transform(X_2)
    Y_inv = gpca.inverse_transform(Y)
    assert_allclose(Y_inv, X_2)
    assert gpca.get_feature_names() == (
        [f"one_pc{idx}" for idx in range(Y.shape[1] // 2)] +
        [f"two_pc{idx}" for idx in range(Y.shape[1] // 2)])

    with pytest.raises(TypeError):
        GroupPCA(groups=groups, exclude_groups="error").fit(X_2)
Esempio n. 4
0
feature_names = afqdata.feature_names
group_names = afqdata.group_names
subjects = afqdata.subjects

# Here we reduce computation time by taking the first 10 principal components of each feature group and performing SGL logistic regression on those components.
# If you want to train an SGL model without group PCA, set ``do_group_pca = False``. This will increase the number of features by an order of magnitude and slow down execution time.
do_group_pca = True

if do_group_pca:
    n_components = 10

    # The next three lines retrieve the group structure of the group-wise PCA
    # and store it in ``groups_pca``. We do not use the imputer or GroupPCA transformer
    # for anything else
    imputer = SimpleImputer(strategy="median")
    gpca = GroupPCA(n_components=n_components, groups=groups)
    groups_pca = gpca.fit(imputer.fit_transform(X)).groups_out_

    transformer = GroupPCA
    transformer_kwargs = {"groups": groups, "n_components": n_components}
else:
    transformer = False
    transformer_kwargs = None

pipe = make_afq_classifier_pipeline(
    imputer_kwargs={"strategy": "median"},  # Use median imputation
    use_cv_estimator=True,  # Automatically determine the best hyperparameters
    feature_transformer=transformer,  # See note above about group PCA
    feature_transformer_kwargs=transformer_kwargs,
    scaler="standard",  # Standard scale the features before regression
    groups=groups_pca if do_group_pca else