Beispiel #1
0
latent = pd.read_table(latent_file, index_col=0)

# Have to do this because data_slideseq makes it a numpy array
gene_info = pd.DataFrame(gene_info, columns=['EnsID',
                                             'Symbol']).set_index('EnsID')
counts = pd.DataFrame(counts, index=gene_info.index, columns=barcodes)
num_umi = pd.Series(num_umi, index=barcodes)

# Align to latent space
counts = counts.loc[:, latent.index]
num_umi = num_umi[latent.index]
proj = proj.loc[latent.index]

# need counts, latent, and num_umi

hs = hotspot.Hotspot(counts, latent, num_umi)
hs.create_knn_graph(weighted_graph=False,
                    n_neighbors=n_neighbors,
                    neighborhood_factor=3)

# %%

module = 5
module_genes = out_clusters.index[out_clusters == module]

scores = hotspot.modules.compute_scores(counts.loc[module_genes].values, model,
                                        num_umi.values, hs.neighbors.values,
                                        hs.weights.values)

# %% Plot scores
Beispiel #2
0
    return tot / recovered


dist_mat = np.zeros((cm.shape[0], cm.shape[0]))

cm_np = cm.values

for i in tqdm(range(cm.shape[0])):
    for j in range(cm.shape[0]):

        dd = dist_fun(cm_np[i, :], cm_np[j, :])
        dist_mat[i, j] = dd

dist_mat = pd.DataFrame(dist_mat, index=cm.index, columns=cm.index)

hs = hotspot.Hotspot(counts,
                     model=model,
                     distances=dist_mat,
                     umi_counts=num_umi)

hs.create_knn_graph(weighted_graph=weighted_graph,
                    n_neighbors=n_neighbors,
                    neighborhood_factor=3)

results = hs.compute_hotspot(jobs=5)

results = gene_info.join(results, how='right')

results.to_csv(out_file, sep="\t")
Beispiel #3
0
# Have to do this because data_slideseq makes it a numpy array
gene_info = pd.DataFrame(gene_info, columns=['EnsID',
                                             'Symbol']).set_index('EnsID')
counts = pd.DataFrame(counts, index=gene_info.index, columns=barcodes)
num_umi = pd.Series(num_umi, index=barcodes)

# Align to latent space
counts = counts.loc[:, latent.index]
num_umi = num_umi[latent.index]

# need counts, latent, and num_umi

valid_genes = (counts > 0).sum(axis=1) >= n_cells_min
counts = counts.loc[valid_genes]

hs = hotspot.Hotspot(counts, model='danb', latent=latent, umi_counts=num_umi)

hs.create_knn_graph(weighted_graph=weighted_graph,
                    n_neighbors=n_neighbors,
                    neighborhood_factor=3)

neighbors = hs.neighbors.values
weights = hs.weights.values

norm_counts = counts.values / num_umi.values.reshape((1, -1)) * 10000
norm_counts = np.log(norm_counts + 1)

from hotspot.local_stats_pairs import create_centered_counts

norm_counts = create_centered_counts(counts.values, hs.model, num_umi.values)
Beispiel #4
0
num_umi = pd.Series(num_umi, index=barcodes)

valid_cells = set()
for x in t:
    if x.is_leaf():
        valid_cells.add(x.name)
valid_cells = pd.Index(valid_cells)

# Align to latent space
counts = counts.loc[:, valid_cells]
num_umi = num_umi[valid_cells]

# need counts, latent, and num_umi

latent = pd.DataFrame(0, index=counts.columns, columns=range(10))
hs = hotspot.Hotspot(counts, latent=latent, umi_counts=num_umi)

neighbors, weights = hotspot.knn.tree_neighbors_and_weights(
    t, n_neighbors, counts)

weights = hotspot.knn.make_weights_non_redundant(neighbors.values,
                                                 weights.values)
weights = pd.DataFrame(weights,
                       index=neighbors.index,
                       columns=neighbors.columns)

hs.weights = weights
hs.neighbors = neighbors

if highXMeanCutoff is not None: