def spca(data, num_components=None, alpha=1): # creates a matrix with sparse principal component analysis # build matrix with all data data = [d.flatten() for d in data if not any(isnan(d))] datamatrix = row_stack(data) # center data cdata = datamatrix - mean(datamatrix, axis=0) if num_components is None: num_components = cdata.shape[0] # do spca on matrix spca = SparsePCA(n_components=num_components, alpha=alpha) spca.fit(cdata) # normalize components components = spca.components_.T for r in range(0, components.shape[1]): compnorm = numpy.apply_along_axis(numpy.linalg.norm, 0, components[:, r]) if not compnorm == 0: components[:, r] /= compnorm components = components.T # calc adjusted explained variance from "Sparse Principal Component Analysis" by Zou, Hastie, Tibshirani spca.components_ = components #nuz = spca.transform(cdata).T nuz = ridge_regression(spca.components_.T, cdata.T, 0.01, solver='dense_cholesky').T #nuz = dot(components, cdata.T) q, r = qr(nuz.T) cumulative_var = [] for i in range(1, num_components + 1): cumulative_var.append(trace(r[0:i, ] * r[0:i, ])) explained_var = [math.sqrt(cumulative_var[0])] for i in range(1, num_components): explained_var.append( math.sqrt(cumulative_var[i]) - math.sqrt(cumulative_var[i - 1])) order = numpy.argsort(explained_var)[::-1] components = numpy.take(components, order, axis=0) evars = numpy.take(explained_var, order).tolist() #evars = numpy.take(explained_var,order) #order2 = [0,1,2,4,5,7,12,19] #components = numpy.take(components,order2,axis=0) #evars = numpy.take(evars,order2).tolist() return components, evars
def spca(data, num_components=None, alpha=1): # creates a matrix with sparse principal component analysis # build matrix with all data data = [d.flatten() for d in data if not any(isnan(d))] datamatrix = row_stack(data) # center data cdata = datamatrix - mean(datamatrix, axis=0) if num_components is None: num_components = cdata.shape[0] # do spca on matrix spca = SparsePCA(n_components=num_components, alpha=alpha) spca.fit(cdata) # normalize components components = spca.components_.T for r in xrange(0,components.shape[1]): compnorm = numpy.apply_along_axis(numpy.linalg.norm, 0, components[:,r]) if not compnorm == 0: components[:,r] /= compnorm components = components.T # calc adjusted explained variance from "Sparse Principal Component Analysis" by Zou, Hastie, Tibshirani spca.components_ = components #nuz = spca.transform(cdata).T nuz = ridge_regression(spca.components_.T, cdata.T, 0.01, solver='dense_cholesky').T #nuz = dot(components, cdata.T) q,r = qr(nuz.T) cumulative_var = [] for i in range(1,num_components+1): cumulative_var.append(trace(r[0:i,]*r[0:i,])) explained_var = [math.sqrt(cumulative_var[0])] for i in range(1,num_components): explained_var.append(math.sqrt(cumulative_var[i])-math.sqrt(cumulative_var[i-1])) order = numpy.argsort(explained_var)[::-1] components = numpy.take(components,order,axis=0) evars = numpy.take(explained_var,order).tolist() #evars = numpy.take(explained_var,order) #order2 = [0,1,2,4,5,7,12,19] #components = numpy.take(components,order2,axis=0) #evars = numpy.take(evars,order2).tolist() return components, evars
def test_fit_transform_variance(): alpha = 1 rng = np.random.RandomState(0) Y, _, _ = generate_toy_data(3, 10, (8, 8), random_state=rng) # wide array spca_lars = SparsePCA(n_components=3, method='lars', alpha=alpha, random_state=0, variance=True) pca = PCA(n_components=3, random_state=0) pca.fit(Y) # no need to fit spca for this spca_lars.fit(Y) components = pca.components_ explained_variance = pca.explained_variance_ spca_lars.components_ = components explained_variance_sparse = spca_lars.explained_variance_ assert_array_almost_equal(explained_variance, explained_variance_sparse)