scaled = counts.divide(counts.sum(axis=0), axis=1) * 10000 gene_means = scaled.mean(axis=1) valid_genes = gene_means.index[gene_means < highXMeanCutoff] hs_results = hs_results.loc[valid_genes & hs_results.index] if topN is None: hs_genes = hs_results.index[hs_results.FDR < fdrThresh] else: hs_genes = hs_results.sort_values('Z').tail(topN).index hs_genes = hs_genes & counts.index counts = counts.loc[hs_genes] c_counts = local_stats_pairs.create_centered_counts(counts.values, model, num_umi.values) # Run the process eg2s = np.array([ conditional_eg2(c_counts[i], hs.neighbors.values, hs.weights.values) for i in range(c_counts.shape[0]) ]) def _map_fun_parallel_pairs_centered(rowpair): global g_neighbors global g_weights global g_counts global g_eg2s return _compute_hs_pairs_inner_centered_cond_sym_shuff(
hs = hotspot.Hotspot(counts, model='danb', latent=latent, umi_counts=num_umi) hs.create_knn_graph(weighted_graph=weighted_graph, n_neighbors=n_neighbors, neighborhood_factor=3) neighbors = hs.neighbors.values weights = hs.weights.values norm_counts = counts.values / num_umi.values.reshape((1, -1)) * 10000 norm_counts = np.log(norm_counts + 1) from hotspot.local_stats_pairs import create_centered_counts norm_counts = create_centered_counts(counts.values, hs.model, num_umi.values) geary_results = [] moran_results = [] for i in tqdm(range(norm_counts.shape[0])): exp_vals = norm_counts[i, :] g = geary(exp_vals, neighbors, weights) geary_results.append(g) m = moran(exp_vals, neighbors, weights) moran_results.append(m) results = pd.DataFrame({ 'GearyC': geary_results,
counts = ds[:, :] scaled = ds.layers['scaled'][:, :] gene_info = ds.ra['EnsID', 'Symbol'] num_umi = ds.ca['NumUmi'][:] gene_info = pd.DataFrame( gene_info, columns=['EnsID', 'Symbol']).set_index('EnsID') counts = pd.DataFrame(counts, index=gene_info.index, columns=barcodes) scaled = pd.DataFrame(scaled, index=gene_info.index, columns=barcodes) num_umi = pd.Series(num_umi, index=barcodes) counts_hs = counts.loc[hs_genes] scaled_hs = scaled.loc[hs_genes] from hotspot.local_stats_pairs import create_centered_counts model = 'bernoulli' counts_hs_centered = create_centered_counts(counts_hs.values, model, num_umi.values) counts_hs_centered = pd.DataFrame( counts_hs_centered, index=counts_hs.index, columns=counts_hs.columns) # %% SVD from sklearn.decomposition import TruncatedSVD model = TruncatedSVD(n_components=5) results = model.fit_transform(counts_hs_centered.values.T) results.shape # %% TSNE from sklearn.manifold import TSNE