def wpca_decomposition(data):
    weights = 0. + np.isfinite(data)
    kwds = {'weights': weights}
    pca = WPCA(n_components=1).fit(data, **kwds)
    eigen_samples = pca.transform(data)[:,0]
    eigen_genes = pca.components_[0,:]
    return eigen_genes, eigen_samples
def get_pca(input_: Array,
            learn_input: Array,
            learn_weight_vec: Opt[Array],
            n_comp_list: Iterable[int],
            err_printer: Callable[[Array, Array, str], None] = None,
            normalize_x: bool = True,
            normalize_z: bool = False) -> LinearAnalyzer:
    """ The last from ``n_comp_list`` would be returned. """
    def expl(pca_):
        return np.round(np.sum(pca_.explained_variance_ratio_), 2)

    n_comp_list = list(n_comp_list)

    x = x_normalized = learn_input  # (~6000, ~162)
    weight_vec = learn_weight_vec
    μ_x: Union[Array, int] = 0
    σ_x: Union[Array, int] = 1
    if normalize_x:
        x_normalized, μ_x, σ_x = get_x_normalized_μ_σ(x, weight_vec)
    weight_vec_as_mat = weights_matrix(weight_vec,
                                       x) if (weight_vec is not None) else None

    for j, i in enumerate(n_comp_list):
        pca = ClassWPCA(i)
        pca.fit(x_normalized, weights=weight_vec_as_mat)
        z: Array = pca.transform(x_normalized)

        inverse_transform_matrix, μ_z, σ_z = get__inverse_transform_matrix__μ_z__σ_z(
            z, weight_vec, normalize_z, x_normalized)

        an = LinearAnalyzer(n=pca.n_components,
                            analyzer=pca,
                            x=input_,
                            μ_x=μ_x,
                            σ_x=σ_x,
                            μ_z=μ_z,
                            σ_z=σ_z,
                            inverse_transform_matrix=inverse_transform_matrix,
                            normalize_x=normalize_x,
                            normalize_z=normalize_z)

        if err_printer is not None:
            pref = f"Expl = {expl(pca)}, PC N = {pca.n_components}, "
            err_printer(input_, an.x_rec, pref)

        if (j + 1) == len(n_comp_list):
            break
    else:
        raise ValueError('Empty n_comp_list')
    return an
Exemple #3
0
def component_removal(data, n_comp):
    mean = data.mean(axis=1)
    data = data.sub(mean, axis=0)

    dataT = data.T.values

    weights = 0 + np.isfinite(dataT)
    kwds = {'weights': weights}

    pca = WPCA(n_components=30).fit(dataT, **kwds)  #Fit data to model

    reconstruction = np.dot(
        pca.transform(dataT)[:, n_comp:], pca.components_[n_comp:, :])
    reconst_df = pd.DataFrame(data=reconstruction.T,
                              columns=data.columns,
                              index=data.index)
    reconst_df = reconst_df.add(mean, axis=0)

    return reconst_df
Exemple #4
0
def weighted_PCA(df, n_pc=1, standardize=True):
    '''
    Function for performing the PCA, using sklearn.

    df - Dataframe with expression values
    '''
    x = df.values.T  #Set x as transpose of only the numerical values of the dataframe
    if standardize:
        standardizer = StandardScaler()
        x2 = standardizer.fit_transform(
            x
        )  #Standardize the data (center to mean and scale to unit variance)
    else:
        x2 = x

    x2 = np.nan_to_num(
        x2
    )  #Change back NaN values to 0, so array is accepted by the PCA function

    weights = 0 + np.isfinite(x)
    kwds = {'weights': weights}
    n_pcs = min(df.shape[0], n_pc)
    pca = WPCA(n_components=n_pcs).fit(x2, **kwds)  #Fit data to model
    expl = pca.explained_variance_ratio_
    x3 = pca.transform(
        x2, **kwds
    )  #Transform the data (apply dimensionality reduciton) and set x3 as principal components
    out_df = pd.DataFrame(
        x3.T, index=list(range(1, n_pcs + 1)), columns=df.columns
    ).T  #Create dataframe with vlues from the PCA and set columnindex as the PC number

    cont = pd.DataFrame(index=df.index)
    for i in range(n_pcs):
        cont.loc[:, f'PC{i+1} contribution'] = pca.components_[i]**2
    cont.sort_values(by='PC1 contribution', ascending=False, inplace=True)

    while n_pcs < n_pc:
        expl = np.append(expl, float('NaN'))
        n_pcs += 1
        out_df.loc[:, str(n_pcs)] = float('NaN')

    return out_df, expl, cont