Beispiel #1
0
def visualize_dictionary(ct, X_dimred, genes, cell_types,
                         namespace, cluster_method, verbose=True):
    from anndata import AnnData
    from scanorama import visualize
    import scanpy as sc
    import seaborn as sns

    # KNN and UMAP.

    if verbose:
        tprint('Constructing KNN graph...')
    adata = AnnData(X=X_dimred)
    sc.pp.neighbors(adata, use_rep='X')

    if verbose:
        tprint('Visualizing with UMAP...')
    sc.tl.umap(adata, min_dist=0.5)
    embedding = np.array(adata.obsm['X_umap'])
    embedding[embedding < -20] = -20
    embedding[embedding > 20] = 20

    # Visualize cell types.

    le = LabelEncoder().fit(cell_types)
    cell_types_int = le.transform(cell_types)
    visualize(
        None, cell_types_int,
        '{}_pan_umap_{}_type'.format(namespace, cluster_method),
        np.array(sorted(set(cell_types))),
        embedding=embedding,
        image_suffix='.png'
    )

    #max_intensity = ct.labels_.max()

    for c_idx in range(ct.labels_.shape[1]):
        intensity = ct.labels_[:, c_idx]
        intensity /= intensity.max()

        print('\nCluster {}'.format(c_idx))

        print_cell_types(cell_types, intensity)

        # Visualize cluster in UMAP coordinates.

        plt.figure()
        plt.title('Cluster {}'.format(c_idx))
        plt.scatter(embedding[:, 0], embedding[:, 1],
                    c=intensity, cmap=cm.get_cmap('Blues'), s=1)
        plt.savefig('{}_pan_umap_{}_cluster{}.png'
                    .format(namespace, cluster_method, c_idx), dpi=500)


        plt.figure()
        plt.title('Cluster {}'.format(c_idx))
        plt.hist(intensity.flatten(), bins=100)
        plt.savefig('{}_pan_umap_{}_intensehist{}.png'
                    .format(namespace, cluster_method, c_idx), dpi=500)

        intensity = (intensity > 0.8) * 1

        plt.figure()
        plt.title('Cluster {}'.format(c_idx))
        plt.scatter(embedding[:, 0], embedding[:, 1],
                    c=intensity, cmap=cm.get_cmap('Blues'), s=1)
        plt.savefig('{}_pan_umap_{}_member{}.png'
                    .format(namespace, cluster_method, c_idx), dpi=500)

    for c_idx in range(ct.labels_.shape[1]):

        # Visualize covariance matrix.

        corr = ct.dictionary_[:, :, c_idx]
        corr[np.isnan(corr)] = 0

        #print('\nCluster {}'.format(c_idx))

        #print_gene_modules(corr, genes)

        gene_idx = np.sum(np.abs(corr), axis=1) > 0
        if np.sum(gene_idx) == 0:
            continue
        corr = corr[gene_idx]
        corr = corr[:, gene_idx]

        plt.figure()
        plt.title('Cluster {}'.format(c_idx))
        plt.rcParams.update({'font.size': 5})
        cmap = sns.diverging_palette(220, 10, as_cmap=True)
        corr_max = max(corr.max(), abs(corr.min()))
        sns.clustermap(corr, xticklabels=genes[gene_idx],
                       yticklabels=genes[gene_idx], cmap=cmap,
                       vmin=-corr_max, vmax=corr_max)
        plt.xticks(rotation=90)
        plt.yticks(rotation=90)
        plt.savefig('{}_pan_cov_{}_cluster{}.png'
                    .format(namespace, cluster_method, c_idx), dpi=500)
Beispiel #2
0
    save_datasets(datasets, genes, data_names)

    labels = []
    names = []
    curr_label = 0
    for i, a in enumerate(datasets):
        labels += list(np.zeros(a.shape[0]) + curr_label)
        names.append(data_names[i])
        curr_label += 1
    labels = np.array(labels, dtype=int)

    embedding = visualize(datasets_dimred,
                          labels,
                          NAMESPACE + '_ds',
                          names,
                          perplexity=600,
                          n_iter=400,
                          size=100)

    cell_labels = (open(
        'data/cell_labels/293t_jurkat_cluster.txt').read().rstrip().split())
    le = LabelEncoder().fit(cell_labels)
    labels = le.transform(cell_labels)
    cell_types = le.classes_

    visualize(None,
              labels,
              NAMESPACE + '_type',
              cell_types,
              perplexity=600,
Beispiel #3
0
    labels = np.array(labels, dtype=int)

    pbmc_genes = [
        'CD14', 'PTPRC', 'FCGR3A', 'ITGAX', 'ITGAM', 'CD19', 'HLA-DRB1',
        'FCGR2B', 'FCGR2A', 'CD3E', 'CD4', 'CD8A', 'CD8B', 'CD28', 'CD8',
        'TBX21', 'IKAROS', 'IL2RA', 'CD44', 'SELL', 'CCR7', 'MS4A1', 'CD68',
        'CD163', 'IL5RA', 'SIGLEC8', 'KLRD1', 'NCR1', 'CD22', 'IL3RA', 'CCR6',
        'IL7R', 'CD27', 'FOXP3', 'PTCRA', 'ID3', 'PF4', 'CCR10', 'SIGLEC7',
        'NKG7', 'S100A8', 'CXCR3', 'CCR5', 'CCR3', 'CCR4', 'PTGDR2', 'RORC'
    ]

    embedding = visualize(datasets_dimred,
                          labels,
                          NAMESPACE + '_ds',
                          names,
                          gene_names=pbmc_genes,
                          gene_expr=np.concatenate(datasets),
                          genes=genes,
                          perplexity=500,
                          n_iter=400)

    cell_labels = (
        open('data/cell_labels/pbmc_cluster.txt').read().rstrip().split())
    le = LabelEncoder().fit(cell_labels)
    cell_labels = le.transform(cell_labels)
    cell_types = le.classes_

    visualize(datasets_dimred,
              cell_labels,
              NAMESPACE + '_type',
              cell_types,
Beispiel #4
0
    names = []
    curr_label = 0
    for i, a in enumerate(datasets):
        labels += list(np.zeros(a.shape[0]) + curr_label)
        names.append(data_names[i])
        curr_label += 1
    labels = np.array(labels, dtype=int)

    hsc_genes = [
        'GATA2', 'APOE', 'SPHK1', 'CTSE', 'FOS'
    ]

    # Visualize with PCA.
    visualize(None, labels, NAMESPACE + '_ds', names,
              gene_names=hsc_genes, genes=genes,
              gene_expr=np.concatenate(datasets),
              embedding=np.concatenate(datasets_dimred),
              size=4)

    cell_labels = (
        open('data/cell_labels/hsc_cluster.txt')
        .read().rstrip().split()
    )
    le = LabelEncoder().fit(cell_labels)
    cell_labels = le.transform(cell_labels)
    cell_types = le.classes_

    visualize(None, cell_labels, NAMESPACE + '_type', cell_types,
              embedding=np.concatenate(datasets_dimred), size=4)

Beispiel #5
0
from scanorama import correct, visualize, process_data
from scanorama import dimensionality_reduce, merge_datasets

NAMESPACE = 'different'

data_names = [
    'data/293t_jurkat/293t',
    'data/brain/neuron_9k',
    'data/hsc/hsc_mars',
    'data/macrophage/uninfected',
    'data/pancreas/pancreas_inDrop',
    'data/pbmc/10x/68k_pbmc',
]

if __name__ == '__main__':
    datasets, genes_list, n_cells = load_names(data_names)
    datasets, genes = correct(datasets, genes_list)
    datasets = [normalize(ds, axis=1) for ds in datasets]
    datasets_dimred = dimensionality_reduce(datasets)

    labels = []
    names = []
    curr_label = 0
    for i, a in enumerate(datasets):
        labels += list(np.zeros(a.shape[0]) + curr_label)
        names.append(data_names[i])
        curr_label += 1
    labels = np.array(labels, dtype=int)

    visualize(datasets_dimred, labels, NAMESPACE, data_names)