latent = pd.read_table(latent_file, index_col=0) # Have to do this because data_slideseq makes it a numpy array gene_info = pd.DataFrame(gene_info, columns=['EnsID', 'Symbol']).set_index('EnsID') counts = pd.DataFrame(counts, index=gene_info.index, columns=barcodes) num_umi = pd.Series(num_umi, index=barcodes) # Align to latent space counts = counts.loc[:, latent.index] num_umi = num_umi[latent.index] proj = proj.loc[latent.index] # need counts, latent, and num_umi hs = hotspot.Hotspot(counts, latent, num_umi) hs.create_knn_graph(weighted_graph=False, n_neighbors=n_neighbors, neighborhood_factor=3) # %% module = 5 module_genes = out_clusters.index[out_clusters == module] scores = hotspot.modules.compute_scores(counts.loc[module_genes].values, model, num_umi.values, hs.neighbors.values, hs.weights.values) # %% Plot scores
return tot / recovered dist_mat = np.zeros((cm.shape[0], cm.shape[0])) cm_np = cm.values for i in tqdm(range(cm.shape[0])): for j in range(cm.shape[0]): dd = dist_fun(cm_np[i, :], cm_np[j, :]) dist_mat[i, j] = dd dist_mat = pd.DataFrame(dist_mat, index=cm.index, columns=cm.index) hs = hotspot.Hotspot(counts, model=model, distances=dist_mat, umi_counts=num_umi) hs.create_knn_graph(weighted_graph=weighted_graph, n_neighbors=n_neighbors, neighborhood_factor=3) results = hs.compute_hotspot(jobs=5) results = gene_info.join(results, how='right') results.to_csv(out_file, sep="\t")
# Have to do this because data_slideseq makes it a numpy array gene_info = pd.DataFrame(gene_info, columns=['EnsID', 'Symbol']).set_index('EnsID') counts = pd.DataFrame(counts, index=gene_info.index, columns=barcodes) num_umi = pd.Series(num_umi, index=barcodes) # Align to latent space counts = counts.loc[:, latent.index] num_umi = num_umi[latent.index] # need counts, latent, and num_umi valid_genes = (counts > 0).sum(axis=1) >= n_cells_min counts = counts.loc[valid_genes] hs = hotspot.Hotspot(counts, model='danb', latent=latent, umi_counts=num_umi) hs.create_knn_graph(weighted_graph=weighted_graph, n_neighbors=n_neighbors, neighborhood_factor=3) neighbors = hs.neighbors.values weights = hs.weights.values norm_counts = counts.values / num_umi.values.reshape((1, -1)) * 10000 norm_counts = np.log(norm_counts + 1) from hotspot.local_stats_pairs import create_centered_counts norm_counts = create_centered_counts(counts.values, hs.model, num_umi.values)
num_umi = pd.Series(num_umi, index=barcodes) valid_cells = set() for x in t: if x.is_leaf(): valid_cells.add(x.name) valid_cells = pd.Index(valid_cells) # Align to latent space counts = counts.loc[:, valid_cells] num_umi = num_umi[valid_cells] # need counts, latent, and num_umi latent = pd.DataFrame(0, index=counts.columns, columns=range(10)) hs = hotspot.Hotspot(counts, latent=latent, umi_counts=num_umi) neighbors, weights = hotspot.knn.tree_neighbors_and_weights( t, n_neighbors, counts) weights = hotspot.knn.make_weights_non_redundant(neighbors.values, weights.values) weights = pd.DataFrame(weights, index=neighbors.index, columns=neighbors.columns) hs.weights = weights hs.neighbors = neighbors if highXMeanCutoff is not None: