Example #1
0
def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    if sys.platform == 'win32':  # fake parallelism for win32
        import sklearn.externals.joblib.parallel as joblib_par
        _mp = joblib_par.multiprocessing
        joblib_par.multiprocessing = None
        try:
            spca = SparsePCA(n_components=3, n_jobs=2, random_state=0,
                             alpha=alpha).fit(Y)
            U2 = spca.transform(Y)
        finally:
            joblib_par.multiprocessing = _mp
    else:  # we can efficiently use parallelism
        spca = SparsePCA(n_components=3, n_jobs=2, method='lars', alpha=alpha,
                         random_state=0).fit(Y)
        U2 = spca.transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)
    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3, method='cd', random_state=0,
                           alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
Example #2
0
def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3,
                          method='lars',
                          alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    spca = SparsePCA(n_components=3,
                     n_jobs=2,
                     method='lars',
                     alpha=alpha,
                     random_state=0).fit(Y)
    U2 = spca.transform(Y)
    assert_true(not np.all(spca_lars.components_ == 0))
    assert_array_almost_equal(U1, U2)
    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3,
                           method='cd',
                           random_state=0,
                           alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
Example #3
0
def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3,
                          method='lars',
                          alpha=alpha,
                          random_state=0)
    spca_lars.fit(Y)

    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3,
                           method='cd',
                           random_state=0,
                           alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)

    # Test that deprecated ridge_alpha parameter throws warning
    warning_msg = "The ridge_alpha parameter on transform()"
    assert_warns_message(DeprecationWarning,
                         warning_msg,
                         spca_lars.transform,
                         Y,
                         ridge_alpha=0.01)
    assert_warns_message(DeprecationWarning,
                         warning_msg,
                         spca_lars.transform,
                         Y,
                         ridge_alpha=None)
Example #4
0
def test_fit_transform_tall():
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 65, (8, 8), random_state=rng)  # tall array
    spca_lars = SparsePCA(n_components=3, method='lars', random_state=rng)
    U1 = spca_lars.fit_transform(Y)
    spca_lasso = SparsePCA(n_components=3, method='cd', random_state=rng)
    U2 = spca_lasso.fit(Y).transform(Y)
    assert_array_almost_equal(U1, U2)
Example #5
0
def test_fit_transform():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=0)
    spca_lars.fit(Y)

    # Test that CD gives similar results
    spca_lasso = SparsePCA(n_components=3, method="cd", random_state=0, alpha=alpha)
    spca_lasso.fit(Y)
    assert_array_almost_equal(spca_lasso.components_, spca_lars.components_)
Example #6
0
def test_correct_shapes():
    rng = np.random.RandomState(0)
    X = rng.randn(12, 10)
    spca = SparsePCA(n_components=8, random_state=rng)
    U = spca.fit_transform(X)
    assert_equal(spca.components_.shape, (8, 10))
    assert_equal(U.shape, (12, 8))
    # test overcomplete decomposition
    spca = SparsePCA(n_components=13, random_state=rng)
    U = spca.fit_transform(X)
    assert_equal(spca.components_.shape, (13, 10))
    assert_equal(U.shape, (12, 13))
Example #7
0
def test_fit_transform_parallel():
    alpha = 1
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    spca_lars = SparsePCA(n_components=3, method="lars", alpha=alpha, random_state=0)
    spca_lars.fit(Y)
    U1 = spca_lars.transform(Y)
    # Test multiple CPUs
    spca = SparsePCA(
        n_components=3, n_jobs=2, method="lars", alpha=alpha, random_state=0
    ).fit(Y)
    U2 = spca.transform(Y)
    assert not np.all(spca_lars.components_ == 0)
    assert_array_almost_equal(U1, U2)
Example #8
0
def _explainedvar(X,
                  n_components=None,
                  onehot=False,
                  random_state=None,
                  n_jobs=-1,
                  verbose=3):
    # Create the model
    if sp.issparse(X):
        if verbose >= 3: print('[pca] >Fiting using Truncated SVD..')
        model = TruncatedSVD(n_components=n_components,
                             random_state=random_state)
    elif onehot:
        if verbose >= 3: print('[pca] >Fitting using Sparse PCA..')
        model = SparsePCA(n_components=n_components,
                          random_state=random_state,
                          n_jobs=n_jobs)
    else:
        if verbose >= 3: print('[pca] >Fitting using PCA..')
        model = PCA(n_components=n_components, random_state=random_state)

    # Fit model
    model.fit(X)
    # Do the reduction
    if verbose >= 3: print('[pca] >Computing loadings and PCs..')
    loadings = model.components_  # Ook wel de coeeficienten genoemd: coefs!
    PC = model.transform(X)
    if not onehot:
        # Compute explained variance, top 95% variance
        if verbose >= 3: print('[pca] >Computing explained variance..')
        percentExplVar = model.explained_variance_ratio_.cumsum()
    else:
        percentExplVar = None
    # Return
    return (model, PC, loadings, percentExplVar)
def compress_l1(W, b, fac, alpha): # This is the Sparse-Coreset setting
	W_s = W.reshape(W.shape[0], W.size/W.shape[0])
	b_s = b.reshape(b.shape[0], b.size/b.shape[0])

	X = np.concatenate([W_s,b_s], axis=1).transpose()

	if fac == -1:
		# Use (4/sqrt(3) * median eigenvalue as trunc-value)
		# we have to do SVD on the full matrix to 
		# figure out median eigenvalue
		Ux, sx, Vx = np.linalg.svd(X)
		r = (4.0/(3.0**0.5))*np.median(sx)
		n_comp = np.sum(np.array([int(j>=r) for j in sx]))
		print 'Optimal Eigenvalue: %f, Number of components selected: %d/%d' % (r, n_comp, len(sx))
	elif fac == 0:
		print 'No compression, Number of components selected: %d/%d' % (W.shape[0], W.shape[0])
		return (W, b)
	else:
		n_comp=int(W.shape[0]*fac)
		print 'Predefined ratio, Number of components selected: %d/%d' % (n_comp, W.shape[0])

	# perform truncation

	pca = SparsePCA(n_components=n_comp, alpha=alpha)
	Xs = pca.fit_transform(X)
	Xr = np.dot(Xs, pca.components_)
	# Approximate the original weights
	Wf = Xr.transpose()[:,:-1].reshape(W.shape)
	bf = Xr.transpose()[:,-1].reshape(b.shape)

	return Wf, bf
    def fit(self, data):
        '''
        训练数据
        '''
        #先以n_comp为基准,如不满足条件则
        if self.method == 'pca':
            self.dr_model = PCA(n_components=self.n_comp)
            self.dr_model.fit(data)
            if self.dr_model.explained_variance_ratio_.cumsum(
            )[-1] < self.cum_std:
                self.dr_model = PCA(n_components=self.cum_std)
                self.dr_model.fit(data)

        elif self.method == 'kpca':
            self.dr_model = KernelPCA(n_components=self.n_comp, kernel="rbf")
            self.dr_model.fit(data)

        elif self.method == 'fa':
            self.dr_model = FactorAnalysis(n_components=self.n_comp)
            self.dr_model.fit(data)

        elif self.method == 'spca':
            self.dr_model = SparsePCA(n_components=self.n_comp)
            self.dr_model.fit(data)

        elif self.method == 'tsvd':
            self.dr_model = TruncatedSVD(n_components=self.n_comp)
            self.dr_model.fit(data)

        elif self.method == 'ipca':
            self.dr_model = IncrementalPCA(n_components=self.n_comp)
            self.dr_model.fit(data)

        self.data_col = data.columns
Example #11
0
def featureVect(X_train, y, compoents, feature_para):

    bigram_vectorizer = CountVectorizer(ngram_range=(1, 25),
                                        stop_words="english")
    X_2 = bigram_vectorizer.fit_transform(X_train).toarray()

    vectorizer = TfidfVectorizer(ngram_range=(1, 25), stop_words="english")
    X_2_DFIDF = vectorizer.fit_transform(X_train).toarray()

    X = np.multiply(X_2, X_2_DFIDF)

    # This dataset is way to high-dimensional. Better do PCA:
    # pca = PCA(n_components=400)
    pca = SparsePCA(n_components=compoents[0])

    # Build estimator from PCA and Univariate selection:
    # ,("dfr",selection_fdr),("fwe",selection_fwe),("fpr",selection_fpr), ("univ_select", selection)
    feature_list = [("pca", pca)]
    feature_list += feature_para

    combined_features = FeatureUnion(feature_list)

    # Use combined features to transform dataset:
    X_features = combined_features.fit(X, y).transform(X)

    select_chi = chi2(X_2, y)

    ind = np.argpartition(select_chi[0], -compoents[1])[-compoents[1]:]
    selection_chi2 = X_2[:, ind]

    X_features = np.concatenate((X_features, selection_chi2), axis=1)

    return [X_features, combined_features, bigram_vectorizer, vectorizer, ind]
def factorization(method='TruncatedSVD', n_components=10):
    # print("Unsupervised feature selection: matrix factorization with", method, "(", n_components, "components )")

    sparse = {
        'LatentDirichletAllocation':
        LatentDirichletAllocation(n_components=n_components,
                                  n_jobs=-1,
                                  learning_method='online'),
        'TruncatedSVD':
        Pipeline([("selector", TruncatedSVD(n_components)),
                  ("normalizer", MinMaxScaler())]),
        'NMF':
        Pipeline([("selector", NMF(n_components, tol=0.01)),
                  ("normalizer", MinMaxScaler())]),
    }

    model = sparse.get(method, None)

    if model is not None:
        return model

    dense = {
        'PCA': PCA(n_components),
        'SparsePCA': SparsePCA(n_components),
        'FactorAnalysis': FactorAnalysis(n_components)
    }

    model = dense.get(method, None)

    if model is not None:
        return Pipeline([("densifier", Densifier()), ("selector", model),
                         ("normalizer", MinMaxScaler())])
    else:
        return Pipeline([("selector", TruncatedSVD(n_components)),
                         ("normalizer", MinMaxScaler())])
Example #13
0
def createSparsePCADecomposition(params):
    # params['method'] = {‘lars’, ‘cd’}
    # params['alpha'] = {1}
    # params['ridge_alpha'] = {1}

    cls = SparsePCA()
    return cls
Example #14
0
 def _sparse_pca(self, x, y):
     """
     Computes the adpative weights based on sparse principal component analysis.
     """
     # Compute sparse pca
     x_center = x - x.mean(axis=0)
     total_variance_in_x = np.sum(np.var(x, axis=0))
     spca = SparsePCA(n_components=np.min((x.shape[0], x.shape[1])),
                      alpha=self.spca_alpha,
                      ridge_alpha=self.spca_ridge_alpha)
     t = spca.fit_transform(x_center)
     p = spca.components_.T
     # Obtain explained variance using spca as explained in the original paper (based on QR decomposition)
     t_spca_qr_decomp = np.linalg.qr(t)
     # QR decomposition of modified PCs
     r_spca = t_spca_qr_decomp[1]
     t_spca_variance = np.diag(r_spca)**2 / x.shape[0]
     # compute variance_ratio
     fractions_of_explained_variance = np.cumsum(t_spca_variance /
                                                 total_variance_in_x)
     # Update variability_pct
     self.variability_pct = np.min(
         (self.variability_pct, np.max(fractions_of_explained_variance)))
     n_comp = np.argmax(
         fractions_of_explained_variance >= self.variability_pct) + 1
     unpenalized_model = ASGL(model=self.model,
                              penalization=None,
                              intercept=True,
                              tau=self.tau)
     unpenalized_model.fit(x=t[:, 0:n_comp], y=y)
     beta_qr = unpenalized_model.coef_[0][1:]
     # Recover an estimation of the beta parameters and use it as weight
     tmp_weight = np.abs(np.dot(p[:, 0:n_comp], beta_qr)).flatten()
     return tmp_weight
Example #15
0
    def reduce_dimension(self, X):
        """
        Perform dimensionality reduction.

        Inputs:
            X: (DataFrame) Independent variables.

        Returns:
            pd_new_X: (DataFrame) Reduced dimension independent variables.
            mode: (str) Dimensionality reduction used (PCA | tSNE)
        """
        if self.dimension_reduction_mode.lower() == 'pca':
            model = PCA(n_components=self.projection_dim)
            column_prefix = 'pc'
        elif self.dimension_reduction_mode.lower() == 'sparsepca':
            model = SparsePCA(n_components=self.projection_dim)
            column_prefix = 'pc'
        elif self.dimension_reduction_mode.lower() == 'tsne':
            model = TSNE(n_components=self.projection_dim)
            column_prefix = 'embedding'
        else:
            raise ValueError('Invalid mode: {}'.format(self.dimension_reduction_mode))

        pd_new_X = pd.DataFrame(
            model.fit_transform(X),
            index=X.index,
            columns=[column_prefix + str(i+1) for i in range(self.projection_dim)])

        return pd_new_X, self.dimension_reduction_mode
Example #16
0
def sparce_pca(df_train, df_test, n_components=30):

    print("sparce_pca")
    cols = [
        "card1", "card2", "card3", "card4", "card5", "card6", "addr1", "addr2",
        "id_19", "DeviceInfo"
    ]
    for col in cols:
        valid = pd.concat([df_train[[col]], df_test[[col]]])
        valid = valid[col].value_counts()
        valid = valid[valid > 75]
        valid = list(valid.index)

        df_train[col] = np.where(df_train[col].isin(valid), df_train[col],
                                 "others")
        df_test[col] = np.where(df_test[col].isin(valid), df_test[col],
                                "others")

    X_all = df_train.append(df_test)[cols]
    X_all = pd.get_dummies(X_all, columns=cols, sparse=True).astype(np.int8)
    print(X_all.shape)
    X_all = SparsePCA(n_components=n_components).fit_transform(X_all)

    col_names = ["cat_SpacePCA_{}".format(x) for x in range(n_components)]
    df_train = pd.DataFrame(X_all[:len(df_train)], columns=col_names)
    df_test = pd.DataFrame(X_all[len(df_train):], columns=col_names)

    return df_train, df_test
Example #17
0
def SPCA(X, reg, reg2):
    X = StandardScaler().fit_transform(X)
    transformer = SparsePCA(n_components=9, alpha=reg, ridge_alpha=reg2)
    transformer.fit(X)
    norm_comps = np.array(
        [i / np.linalg.norm(i) for i in transformer.components_])
    return norm_comps
Example #18
0
def get_dim_reds_scikit(pct_features):
    n_components = max(int(pct_features * num_features), 1)
    return [
        LinearDiscriminantAnalysis(n_components=n_components),
        TruncatedSVD(n_components=n_components),
        #SparseCoder(n_components=n_components),
        DictionaryLearning(n_components=n_components),
        FactorAnalysis(n_components=n_components),
        SparsePCA(n_components=n_components),
        NMF(n_components=n_components),
        PCA(n_components=n_components),
        RandomizedPCA(n_components=n_components),
        KernelPCA(kernel="linear", n_components=n_components),
        KernelPCA(kernel="poly", n_components=n_components),
        KernelPCA(kernel="rbf", n_components=n_components),
        KernelPCA(kernel="sigmoid", n_components=n_components),
        KernelPCA(kernel="cosine", n_components=n_components),
        Isomap(n_components=n_components),
        LocallyLinearEmbedding(n_components=n_components,
                               eigen_solver='auto',
                               method='standard'),
        LocallyLinearEmbedding(n_neighbors=n_components,
                               n_components=n_components,
                               eigen_solver='auto',
                               method='modified'),
        LocallyLinearEmbedding(n_neighbors=n_components,
                               n_components=n_components,
                               eigen_solver='auto',
                               method='ltsa'),
        SpectralEmbedding(n_components=n_components)
    ]
Example #19
0
def _explainedvar(X,
                  n_components=None,
                  sparse=False,
                  random_state=None,
                  verbose=3):

    # Create the model
    if sp.issparse(X):
        if verbose >= 3: print('[TruncatedSVD] Fit..')
        model = TruncatedSVD(n_components=n_components,
                             random_state=random_state)
    elif sparse:
        if verbose >= 3: print('[PCA] Fit sparse dataset..')
        model = SparsePCA(n_components=n_components, random_state=random_state)
    else:
        if verbose >= 3: print('[PCA] Fit..')
        model = PCA(n_components=n_components, random_state=random_state)

    # Fit model
    model.fit(X)
    # Do the reduction
    loadings = model.components_  # Ook wel de coeeficienten genoemd: coefs!
    PC = model.transform(X)
    # Compute explained variance, top 95% variance
    percentExplVar = model.explained_variance_ratio_.cumsum()
    # Return
    return (model, PC, loadings, percentExplVar)
Example #20
0
def pca_svm(filename):

    data = pd.read_csv('archive/' +filename,
        usecols=['label', 'tweet']
    )

    vectorizer = TfidfVectorizer()
    vectorized = vectorizer.fit_transform(data['tweet'])
    vectorized=vectorized.todense()

    X_tr, X_te, y_tr, y_te = train_test_split(vectorized, data['label'],test_size = 0.2)


    pca = SparsePCA()
    X_tr = pca.fit_transform(X_tr)
    X_te = pca.transform(X_te)
    clf = SVC(kernel = 'rbf')
    clf.fit(X_tr, y_tr)
    y_pred = clf.predict(X_te)
    y_pred_tr = clf.predict(X_tr)


    accuracy = accuracy_score(y_te, y_pred)
    accuracy_train = accuracy_score(y_tr, y_pred_tr)


    plot_confusion_matrix(clf, X_te, y_te)
    plt.show()
def train_reduc(data,
                reduc_type='pca',
                kernel='rbf',
                n_c=8,
                eps=0.01,
                random_state=2020):
    if reduc_type == 'pca':
        reduc = PCA(n_components=n_c)
    elif reduc_type == 'spca':
        reduc = SparsePCA(n_components=n_c)
    elif reduc_type == 'kpca':
        reduc = KernelPCA(n_components=n_c, kernel=kernel)
    elif reduc_type == 'ica':
        reduc = FastICA(n_components=n_c)
    elif reduc_type == 'grp':
        reduc = GaussianRandomProjection(n_components=n_c,
                                         eps=eps,
                                         random_state=random_state)
    elif reduc_type == 'srp':
        reduc = SparseRandomProjection(n_components=n_c,
                                       density='auto',
                                       eps=eps,
                                       dense_output=True,
                                       random_state=random_state)

    reduced = reduc.fit_transform(data)
    print('Reduc Complete')
    return reduced, reduc
Example #22
0
    def __init__(self,
                 num_components=10,
                 catalog_name='unknown',
                 alpha=0.1,
                 ridge_alpha=0.01,
                 max_iter=2000,
                 tol=1e-9,
                 n_jobs=1,
                 random_state=None):

        self._decomposition = 'Sparse PCA'
        self._num_components = num_components
        self._catalog_name = catalog_name
        self._alpha = alpha
        self._ridge_alpha = ridge_alpha
        self._n_jobs = n_jobs
        self._max_iter = max_iter
        self._tol = tol
        self._random_state = random_state

        self._SPCA = SparsePCA(n_components=self._num_components,
                               alpha=self._alpha,
                               ridge_alpha=self._ridge_alpha,
                               n_jobs=self._n_jobs,
                               max_iter=self._max_iter,
                               tol=self._tol,
                               random_state=self._random_state)
Example #23
0
File: amm.py Project: ljeagle/bolt
def _fitted_sparse_pca(X, d, unscaled_alpha, **kwargs):
    # this seems to work better than initializing with MiniBatchSparsePCA,
    # svd of cov mat, or basically anything else I tried
    U, _, Vt = randomized_svd(X, n_components=d, random_state=123)
    U = U[:, :d]
    V = Vt.T[:d]

    # SparsePCA (and all the sklearn dictionary learning stuff)
    # internally uses sum of squared errs for each sample, and L1 norm
    # of parameter matrix; to make alpha meaningful across datasets,
    # want to scale by number of examples (so it's effectively using MSE)
    # and divide by L1 norm (which grows linearly with size of parameter
    # matrix / vector); also scale by variance of data for similar reasons
    N, D = X.shape
    alpha = unscaled_alpha * np.var(X - X.mean(axis=0)) * N / D
    verbose = 1
    pca = SparsePCA(
        n_components=d,
        alpha=alpha,
        normalize_components=True,
        method='lars',
        U_init=U,
        V_init=V,
        max_iter=10,
        ridge_alpha=max(1,
                        len(X) * X.std() * 10),
        # ridge_alpha=1e8,
        verbose=verbose,
        random_state=123)
    if verbose > 0:
        print("fitting sparse pca...")
    return pca.fit(X)
    def calc_principal_components(self, df, n_comp=20, method='PCA'):
        '''
        Run PCA and Sparse PCA on feature table
        :param df: 
        :return: 
        '''
        print(">> Running " + method + "...")
        if df.shape[1] <= n_comp:
            n_comp = df.shape[1] - 1

        tmp_drop_cols = ['Gene_Name', self.cfg.Y]
        X = df.drop(tmp_drop_cols, axis=1)
        pca_data = X.copy()

        pca = None
        if method == 'SparsePCA':
            pca = SparsePCA(n_components=n_comp)
        else:
            pca = PCA(n_components=n_comp)
        principal_components = pca.fit_transform(pca_data)

        columns = []
        for i in range(1, n_comp + 1):
            columns.append('PC' + str(i))

        pca_df = pd.DataFrame(data=principal_components, columns=columns)
        pca_df = pd.concat([pca_df, df[tmp_drop_cols]], axis=1)

        filepath = str(self.cfg.unsuperv_out / (method + ".table.tsv"))
        pca_df.to_csv(filepath, sep='\t', index=None)

        return pca, pca_df
Example #25
0
 def __init__(
     self, 
     *args,
     sparse=False,
     kernel=None,
     **kwargs
 ):
     super().__init__(*args, **kwargs)
     if kernel:
         self.model = KernelPCA(
             n_components = self.n_latent, 
             kernel=kernel,
             random_state = self.random_state,
             copy_X=False
             )
     elif sparse:
         self.model = SparsePCA(
             n_components = self.n_latent,
             random_state = self.random_state
             )
     else:
         self.model = PCA(
             n_components = self.n_latent, 
             random_state = self.random_state
             )
def spca(components, train_matrix, test_matrix):
    """Sparse principal component analysis routine.

    Parameters
    ----------
    components : int
        The number of components to be returned.
    train_matrix : array
        The training features.
    test_matrix : array
        The test features.

    Returns
    -------
    new_train : array
        Extracted training features.
    new_test : array
        Extracted test features.
    """
    msg = 'The number of components must be a positive int greater than 0.'
    assert components > 0, msg

    pca = SparsePCA(n_components=components)
    model = pca.fit(X=train_matrix)
    new_train = model.transform(train_matrix)
    new_test = model.transform(test_matrix)

    return new_train, new_test
Example #27
0
def test_transform_nan():
    # Test that SparsePCA won't return NaN when there is 0 feature in all
    # samples.
    rng = np.random.RandomState(0)
    Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng)  # wide array
    Y[:, 0] = 0
    estimator = SparsePCA(n_components=8)
    assert not np.any(np.isnan(estimator.fit_transform(Y)))
Example #28
0
def test_initialization():
    rng = np.random.RandomState(0)
    U_init = rng.randn(5, 3)
    V_init = rng.randn(3, 4)
    model = SparsePCA(n_components=3, U_init=U_init, V_init=V_init, max_iter=0,
                      random_state=rng)
    model.fit(rng.randn(5, 4))
    assert_array_equal(model.components_, V_init)
Example #29
0
def sccodedirect():
    "得到不带眼镜的RPCA结果"
    nglassmodel = np.load('nglassline.npy').astype('f')
    from sklearn.decomposition import SparsePCA
    learning = SparsePCA(500,verbose=True)
    learning.fit(nglassmodel)
    import cPickle
    cPickle.dump(learning,file('sparsepcadirect','wb'),-1)
Example #30
0
def sparse_pca(K, alpha, ridge_alpha):
    transformer = SparsePCA(n_components=1, alpha=alpha, ridge_alpha=ridge_alpha, normalize_components=False, random_state=0)
    transformer.fit(K)
    val = transformer.components_[0]
    print('#nnz: ', np.sum(np.abs(val) > 1.0e-10))
    #print(np.sum(val * val))
    #val = np.random.randn(K.shape[1])
    return val / np.linalg.norm(val)