Example #1
0
def NMFreg(
    counts,
    coords,
    size,
    metacell_dict,
    gene_intersection,
    num_atlas_clusters,
    celltype_to_factor_dict,
    celltype_dict,
    plot_size_dict,
):

    puckcounts = counts[["barcode"] + gene_intersection]
    puckcounts = puckcounts.set_index(counts["barcode"])
    puckcounts = puckcounts.drop("barcode", axis=1)

    cell_totalUMI = np.sum(puckcounts, axis=1)
    puckcounts_cellnorm = np.divide(puckcounts, cell_totalUMI[:, None])
    puckcounts_scaled = StandardScaler(
        with_mean=False).fit_transform(puckcounts_cellnorm)

    XsT = puckcounts_scaled.T

    Hs_hat = []
    for b in tqdm(range(XsT.shape[1])):
        h_hat = scipy.optimize.nnls(WaT, XsT[:, b])[0]
        if b == 0:
            Hs_hat = h_hat
        else:
            Hs_hat = np.vstack((Hs_hat, h_hat))

    Hs = pd.DataFrame(Hs_hat)
    Hs["barcode"] = puckcounts.index.tolist()

    Hs_norm = StandardScaler(with_mean=False).fit_transform(
        Hs.drop("barcode", axis=1))

    Hs_norm = pd.DataFrame(Hs_norm)
    Hs_norm["barcode"] = puckcounts.index.tolist()

    maxloc_s = Hs_norm.drop("barcode", axis=1).values.argmax(axis=1)
    barcode_clusters = pd.DataFrame()
    barcode_clusters["barcode"] = Hs_norm["barcode"]
    barcode_clusters["max_factor"] = maxloc_s

    barcode_clusters["atlas_cluster"] = barcode_clusters["barcode"]

    for c in range(1, num_atlas_clusters + 1):
        condition = np.isin(barcode_clusters["max_factor"],
                            celltype_to_factor_dict[c])
        barcode_clusters["atlas_cluster"][condition] = c

    bead_deconv_df = Hs_norm.apply(
        lambda x: deconv_factor_to_celltype(
            row=x,
            adict=factor_to_celltype_dict,
            K=K,
            num_atlas_clusters=num_atlas_clusters,
        ),
        axis=1,
    )
    bead_deconv_df.insert(0, "barcode", Hs_norm["barcode"])
    bead_deconv_df.columns = ["barcode"
                              ] + (bead_deconv_df.columns[1:] + 1).tolist()
    bead_deconv_df = pd.DataFrame(bead_deconv_df)
    bead_deconv_df = bead_deconv_df.rename(columns=celltype_dict)

    maxloc_ct = bead_deconv_df.drop("barcode",
                                    axis=1).values.argmax(axis=1) + 1
    bead_maxct_df = pd.DataFrame()
    bead_maxct_df["barcode"] = bead_deconv_df["barcode"]
    bead_maxct_df["max_cell_type"] = maxloc_ct

    return Hs, Hs_norm, puckcounts, bead_deconv_df, barcode_clusters, bead_maxct_df