def NMFreg( counts, coords, size, metacell_dict, gene_intersection, num_atlas_clusters, celltype_to_factor_dict, celltype_dict, plot_size_dict, ): puckcounts = counts[["barcode"] + gene_intersection] puckcounts = puckcounts.set_index(counts["barcode"]) puckcounts = puckcounts.drop("barcode", axis=1) cell_totalUMI = np.sum(puckcounts, axis=1) puckcounts_cellnorm = np.divide(puckcounts, cell_totalUMI[:, None]) puckcounts_scaled = StandardScaler( with_mean=False).fit_transform(puckcounts_cellnorm) XsT = puckcounts_scaled.T Hs_hat = [] for b in tqdm(range(XsT.shape[1])): h_hat = scipy.optimize.nnls(WaT, XsT[:, b])[0] if b == 0: Hs_hat = h_hat else: Hs_hat = np.vstack((Hs_hat, h_hat)) Hs = pd.DataFrame(Hs_hat) Hs["barcode"] = puckcounts.index.tolist() Hs_norm = StandardScaler(with_mean=False).fit_transform( Hs.drop("barcode", axis=1)) Hs_norm = pd.DataFrame(Hs_norm) Hs_norm["barcode"] = puckcounts.index.tolist() maxloc_s = Hs_norm.drop("barcode", axis=1).values.argmax(axis=1) barcode_clusters = pd.DataFrame() barcode_clusters["barcode"] = Hs_norm["barcode"] barcode_clusters["max_factor"] = maxloc_s barcode_clusters["atlas_cluster"] = barcode_clusters["barcode"] for c in range(1, num_atlas_clusters + 1): condition = np.isin(barcode_clusters["max_factor"], celltype_to_factor_dict[c]) barcode_clusters["atlas_cluster"][condition] = c bead_deconv_df = Hs_norm.apply( lambda x: deconv_factor_to_celltype( row=x, adict=factor_to_celltype_dict, K=K, num_atlas_clusters=num_atlas_clusters, ), axis=1, ) bead_deconv_df.insert(0, "barcode", Hs_norm["barcode"]) bead_deconv_df.columns = ["barcode" ] + (bead_deconv_df.columns[1:] + 1).tolist() bead_deconv_df = pd.DataFrame(bead_deconv_df) bead_deconv_df = bead_deconv_df.rename(columns=celltype_dict) maxloc_ct = bead_deconv_df.drop("barcode", axis=1).values.argmax(axis=1) + 1 bead_maxct_df = pd.DataFrame() bead_maxct_df["barcode"] = bead_deconv_df["barcode"] bead_maxct_df["max_cell_type"] = maxloc_ct return Hs, Hs_norm, puckcounts, bead_deconv_df, barcode_clusters, bead_maxct_df