def wpca_decomposition(data): weights = 0. + np.isfinite(data) kwds = {'weights': weights} pca = WPCA(n_components=1).fit(data, **kwds) eigen_samples = pca.transform(data)[:,0] eigen_genes = pca.components_[0,:] return eigen_genes, eigen_samples
def get_pca(input_: Array, learn_input: Array, learn_weight_vec: Opt[Array], n_comp_list: Iterable[int], err_printer: Callable[[Array, Array, str], None] = None, normalize_x: bool = True, normalize_z: bool = False) -> LinearAnalyzer: """ The last from ``n_comp_list`` would be returned. """ def expl(pca_): return np.round(np.sum(pca_.explained_variance_ratio_), 2) n_comp_list = list(n_comp_list) x = x_normalized = learn_input # (~6000, ~162) weight_vec = learn_weight_vec μ_x: Union[Array, int] = 0 σ_x: Union[Array, int] = 1 if normalize_x: x_normalized, μ_x, σ_x = get_x_normalized_μ_σ(x, weight_vec) weight_vec_as_mat = weights_matrix(weight_vec, x) if (weight_vec is not None) else None for j, i in enumerate(n_comp_list): pca = ClassWPCA(i) pca.fit(x_normalized, weights=weight_vec_as_mat) z: Array = pca.transform(x_normalized) inverse_transform_matrix, μ_z, σ_z = get__inverse_transform_matrix__μ_z__σ_z( z, weight_vec, normalize_z, x_normalized) an = LinearAnalyzer(n=pca.n_components, analyzer=pca, x=input_, μ_x=μ_x, σ_x=σ_x, μ_z=μ_z, σ_z=σ_z, inverse_transform_matrix=inverse_transform_matrix, normalize_x=normalize_x, normalize_z=normalize_z) if err_printer is not None: pref = f"Expl = {expl(pca)}, PC N = {pca.n_components}, " err_printer(input_, an.x_rec, pref) if (j + 1) == len(n_comp_list): break else: raise ValueError('Empty n_comp_list') return an
def component_removal(data, n_comp): mean = data.mean(axis=1) data = data.sub(mean, axis=0) dataT = data.T.values weights = 0 + np.isfinite(dataT) kwds = {'weights': weights} pca = WPCA(n_components=30).fit(dataT, **kwds) #Fit data to model reconstruction = np.dot( pca.transform(dataT)[:, n_comp:], pca.components_[n_comp:, :]) reconst_df = pd.DataFrame(data=reconstruction.T, columns=data.columns, index=data.index) reconst_df = reconst_df.add(mean, axis=0) return reconst_df
def weighted_PCA(df, n_pc=1, standardize=True): ''' Function for performing the PCA, using sklearn. df - Dataframe with expression values ''' x = df.values.T #Set x as transpose of only the numerical values of the dataframe if standardize: standardizer = StandardScaler() x2 = standardizer.fit_transform( x ) #Standardize the data (center to mean and scale to unit variance) else: x2 = x x2 = np.nan_to_num( x2 ) #Change back NaN values to 0, so array is accepted by the PCA function weights = 0 + np.isfinite(x) kwds = {'weights': weights} n_pcs = min(df.shape[0], n_pc) pca = WPCA(n_components=n_pcs).fit(x2, **kwds) #Fit data to model expl = pca.explained_variance_ratio_ x3 = pca.transform( x2, **kwds ) #Transform the data (apply dimensionality reduciton) and set x3 as principal components out_df = pd.DataFrame( x3.T, index=list(range(1, n_pcs + 1)), columns=df.columns ).T #Create dataframe with vlues from the PCA and set columnindex as the PC number cont = pd.DataFrame(index=df.index) for i in range(n_pcs): cont.loc[:, f'PC{i+1} contribution'] = pca.components_[i]**2 cont.sort_values(by='PC1 contribution', ascending=False, inplace=True) while n_pcs < n_pc: expl = np.append(expl, float('NaN')) n_pcs += 1 out_df.loc[:, str(n_pcs)] = float('NaN') return out_df, expl, cont