def subcluster_3_cluster_QC_DE(adata, res, tit):
    # 1)clustering
    leiden_tit = "leiden_res_"+str(res)
    pg.leiden(adata, rep="pca_harmony", resolution= res, class_label=leiden_tit)

    # # 2) QC plots
    sc.set_figure_params(figsize=(10,10), fontsize=30)
    sc.pl.umap(adata, color=[leiden_tit, "method"], size=35, show=False)
    plt.suptitle(tit, fontsize=50)
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.show()

    sc.set_figure_params(figsize=(16, 12), fontsize=30)
    sc.pl.umap(adata, color=["patient", "sample"], size=95, show=False)
    plt.suptitle(tit, fontsize=50)
    plt.show()

    sc.pl.umap(adata, size=95, color=['n_genes', 'n_counts', 'percent_mito'], show=False)
    plt.suptitle(tit, fontsize=50)
    plt.show()
    composition_barplot(adata, leiden_tit, "sample", tit+" - Sample count per cluster")
    composition_barplot(adata, leiden_tit, "patient", tit+" - Patient count per cluster")

    # 3) DE genes
    pg.de_analysis(adata, leiden_tit)
    markers = pg.markers(adata)
    pg.write_results_to_excel(markers, "DE_genes_"+ tit +".xlsx")
    filt_markers = filter_markers(markers, 0.75, False)
    filehandler = open("FiltMarkers"+tit+".pckl", "wb")
    pickle.dump(filt_markers, filehandler)
    filehandler.close()
    return filt_markers
def viral_enrichment_umap(adata, enrichment_score, save_str="", title="", all_or_subclustering="all"):
    # setting colors
    adata = adata.copy()
    data = adata.obs[enrichment_score]
    num_levels = 20
    vmin, midpoint, vmax = data.min(), 0, data.max()
    levels = np.linspace(vmin, vmax, num_levels)
    midp = np.mean(np.c_[levels[:-1], levels[1:]], axis=1)
    vals = np.interp(midp, [vmin, midpoint, vmax], [0, 0.5, 1])
    colors = plt.cm.coolwarm(vals)
    cmap, norm = matplotlib.colors.from_levels_and_colors(levels, colors)
    adata.obs["Viral+"] = pd.Categorical(adata.obs["Viral+"])
    plt.clf()
    sc.set_figure_params(figsize=(5, 5))
    fig, ax = plt.subplots(1, 1)
    sc.pl.umap(adata, color=enrichment_score, color_map=cmap, title=title +" "+ enrichment_score, show=False, ax=ax)
    size=23
    if all_or_subclustering!="all":
        size=50
    sc.pl.umap(adata[adata.obs["Viral+"]==True,:], color="Viral+", palette=["black"], size=size, show=False, ax=ax, legend_loc=None, title=title)
    plt.tight_layout()
    if save_str == "":
        plt.show()
    else:
        plt.savefig("figures/"+save_str+"_viral_umap.pdf")
Esempio n. 3
0
def filter_abundant_barcodes(adata,
                             filter_cells=False,
                             threshold=1000,
                             library_id='',
                             save_path='./figures/'):
    '''
    Plots a weighted histogram of transcripts per cell barcode for guiding the
    placement of a filtering threshold. Returns a filtered version of adata.  
    '''

    # if necessary, create the output directory
    if not os.path.isdir(save_path):
        os.makedirs(save_path)

    # use adata.uns['library_id'] if it exists
    if not library_id:
        if 'library_id' in adata.uns:
            library_id = adata.uns['library_id']

    # Sum total UMI counts and genes for each cell-barcode, save to obs
    counts = np.array(adata.X.sum(1))
    genes = np.array(adata.X.astype(bool).sum(axis=1))
    adata.obs['total_counts'] = counts
    adata.obs['n_genes_by_counts'] = genes
    ix = counts >= threshold

    # Plot and format a weighted cell-barcode counts histogram
    sc.set_figure_params(dpi=100, figsize=[4, 4], fontsize=12)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax.hist(counts, bins=np.logspace(0, 6, 100), weights=counts / sum(counts))
    ax.set_xscale('log')
    ax.set_xlabel('Transcripts per cell barcode')
    ax.set_ylabel('Fraction of total transcripts')
    ax.set_title(library_id)
    ax.text(0.99,
            0.95,
            str(np.sum(ix)) + '/' + str(counts.shape[0]) + ' cells retained',
            ha='right',
            va='center',
            transform=ax.transAxes)

    # Overlay the counts threshold as a vertical line
    ax.plot([threshold, threshold], ax.get_ylim())

    # Save figure to file
    fig.tight_layout()
    plt.savefig(save_path + 'barcode_hist_' + library_id + '.png')
    plt.show()
    plt.close()

    # Print the number of cell barcodes that will be retained
    print('Barcode Filtering ' + library_id + ' (' + str(np.sum(ix)) + '/' +
          str(counts.shape[0]) + ' cells retained)')
    print()

    # If requested, return a filtered version of adata
    if filter_cells:
        sc.pp.filter_cells(adata, min_counts=threshold, inplace=True)
        return adata
Esempio n. 4
0
def marker_analysis(adata,variables=['leiden','region'],markerpath='https://docs.google.com/spreadsheets/d/e/2PACX-1vTz5a6QncpOOO-f3FHW2Edomn7YM5mOJu4z_y07OE3Q4TzcRr14iZuVyXWHv8rQuejzhhPlEBBH1y0V/pub?gid=1154528422&single=true&output=tsv'):
    sc.set_figure_params(color_map="Purples")
    import random
    markerpath=os.path.expanduser(markerpath)
    markers=pd.read_csv(markerpath,sep="\t")
    print(markers)
    markers[markers.keys()[0]]=[str(x) for x in markers[markers.keys()[0]]]
    markers[markers.keys()[2]]=[str(x).split(',') for x in markers[markers.keys()[2]]]
    markers[markers.keys()[3]]=[str(x).split(';') for x in markers[markers.keys()[3]]]
    markers[markers.keys()[3]]=[[str(x).split(',') for x in y] for y in markers[markers.keys()[3]]]
    uniqueClasses=set([y for x in markers[markers.keys()[2]] for y in x if y!='nan'])
    uniqueSubClasses=set([z for x in markers[markers.keys()[3]] for y in x for z in y if z!='nan'])
    comboClasses=[]
    print(markers)
    for i in range(markers.shape[0]):
        rowlist=[]
        for j in range(len(markers[markers.keys()[2]][i])):
            for k in markers[markers.keys()[3]][i][j]:
                rowlist.append(' '.join(filter(lambda x: x != 'nan',[k,markers[markers.keys()[2]][i][j]])))
        comboClasses.append(rowlist)
    markers['fullclass']=comboClasses
    markers.set_index(markers.keys()[0],inplace=True,drop=False)
    markers=markers.loc[ [x for x in markers[markers.keys()[0]] if x in adata.var_names],:]
    uniqueFullClasses=set([y for x in markers['fullclass'] for y in x if y!='nan'])
    from collections import defaultdict
    markerDict = defaultdict(list)

    for x in uniqueFullClasses:
        for y in markers[markers.keys()[0]]:
            if x in markers.loc[y,'fullclass']:
                markerDict[x].append(y)
    markerDictClass = defaultdict(list)
    for x in uniqueClasses:
        for y in markers[markers.keys()[0]]:
            if x in markers.loc[y,'fullclass']:
                markerDictClass[x].append(y)

    markerPlotGroups=[]
    for k in markerDict.keys():
        if len(markerDict[k])>1:
            print(k)
            print(len(markerDict[k]))
            sc.tl.score_genes(adata,gene_list=markerDict[k],score_name=k,gene_pool= markerDict[k]+random.sample(adata.var.index.tolist(),min(4000,adata.var.index.shape[0])))
            markerPlotGroups.append(k)
    adata.uns['marker_groups']=list(markerDict.keys())
    for tag in variables:
        pd.DataFrame(adata.obs.groupby(tag).describe()).to_csv(os.path.join(sc.settings.figdir, tag+"MarkerSumStats.csv"))

    if 'X_tsne' in adata.obsm.keys():
        sc.pl.tsne(adata, color=markerPlotGroups,save="_Marker_Group")
    sc.pl.umap(adata, color=markerPlotGroups,save="_Marker_Group")
    print(markerDict)
    #sc.pl.violin(adata, markerPlotGroups, groupby='leiden',save="_Marker_Group_violins")
    for i in markerDictClass:
        print(i)
        if 'X_tsne' in adata.obsm.keys():
            sc.pl.tsne(adata, color=sorted(markerDictClass[i]),save="_"+str(i)+"_Marker")
        sc.pl.umap(adata, color=sorted(markerDictClass[i]),save="_"+str(i)+"_Marker")
    return(adata)
Esempio n. 5
0
def test_violin():
    sc.pl.set_rcParams_defaults()
    sc.set_figure_params(dpi=50, color_map='viridis')

    pbmc = sc.datasets.pbmc68k_reduced()
    sc.pl.violin(pbmc, ['n_genes', 'percent_mito', 'n_counts'],
                 stripplot=True, multi_panel=True, jitter=True, show=False)
    save_and_compare_images('master_violin_multi_panel', tolerance=40)
def plot_zscore_signature(markers, data, pdf, pop_name):
    sc.set_figure_params(figsize=(10, 8))
    gene_set = [gene for gene in markers if gene in data.var_names]
    data.obs["zscore_" + pop_name] = signature_score_per_cell(data, gene_set)
    gene_list = "".join([x + "\n" for x in gene_set])
    fig = sc.pl.umap(data, color="zscore_"+pop_name, cmap="Purples", size=40, show=False, return_fig=True)
    plt.suptitle(pop_name+" Z-score")
    plt.annotate(gene_list, xy=(1.2, 0), xycoords=('axes fraction', 'axes fraction'))
    pdf.savefig(fig, bbox_inches="tight")
Esempio n. 7
0
def set_figure_params(
    self,
    episcanpy: bool = True,
    dpi: int = 80,
    dpi_save: int = 150,
    frameon: bool = True,
    vector_friendly: bool = True,
    fontsize: int = 14,
    color_map: Optional[str] = None,
    format: Union[str, Iterable[str]] = "pdf",
    transparent: bool = False,
    ipython_format: str = "png2x",
    ):

    """\
        Set resolution/size, styling and format of figures.

        Parameters
        ----------
        episcanpy
            Init default values for :obj:`matplotlib.rcParams` suited for `scanpy` or `epiScanpy`.
        dpi
            Resolution of rendered figures - this influences the size of figures in notebooks.
        dpi_save
            Resolution of saved figures. This should typically be higher to achieve
            publication quality.
        frameon
            Add frames and axes labels to scatter plots.
        vector_friendly
            Plot scatter plots using `png` backend even when exporting as `pdf` or `svg`.
        fontsize
            Set the fontsize for several `rcParams` entries. Ignored if `scanpy=False`.
        color_map
            Convenience method for setting the default color map. Ignored if `scanpy=False`.
        format: {`'png'`, `'pdf'`, `'svg'`, etc.}, optional (default: `'pdf'`)
            This sets the default format for saving figures: `file_format_figs`.
        transparent
            Save figures with transparent back ground. Sets
            `rcParams['savefig.transparent']`.
        ipython_format
            Only concerns the notebook/IPython environment; see
            :func:`~IPython.display.set_matplotlib_formats` for details.
        """

    sc.set_figure_params(self,
        scanpy=episcanpy,
        dpi=dpi,
        dpi_save=dpi_save,
        frameon=frameon,
        vector_friendly=vector_friendly,
        fontsize=fontsize,
        color_map=color_map,
        format=format,
        transparent=transparent,
        ipython_format=ipython_format)
Esempio n. 8
0
def main():
    """Run CLI."""
    parser = argparse.ArgumentParser(description="""
            Read AnnData object and markers from .csv file. Plot expression \
             of markers in each cluster.
            """)

    parser.add_argument('-h5',
                        '--h5_anndata',
                        action='store',
                        dest='h5',
                        required=True,
                        help='H5 AnnData file.')

    parser.add_argument(
        '--markers_csv',
        action='store',
        dest='markers_csv',
        default='none',
        help='Markers to plot. Must have the following column: hgnc_symbol.')

    options = parser.parse_args()

    # Scanpy settings
    sc.settings.figdir = os.getcwd()  # figure output directory to match base.
    # sc.settings.n_jobs = options.ncpu  # number CPUs
    # sc.settings.max_memory = 500  # in Gb
    sc.set_figure_params(dpi_save=300)

    # Get the out file base.
    out_file_base = os.path.basename(
        options.markers_csv).rstrip('csv').rstrip('.')

    # Read in the data
    adata = sc.read_h5ad(filename=options.h5)

    # Read in the marker database file
    df = pd.read_table(options.markers_csv)
    df = df['hgnc_symbol']

    marker_genes_found = adata.var['gene_symbols'][
        adata.var['gene_symbols'].isin(df)]

    # Dotplots
    _ = sc.pl.dotplot(
        adata=adata,
        var_names=marker_genes_found,
        groupby='cluster',
        gene_symbols='gene_symbols',
        dendrogram=True,
        show=False,
        standard_scale='var',  # Scale color between 0 and 1
        use_raw=False,
        color_map='Blues',
        save='{}_dotplot.png'.format(out_file_base))
Esempio n. 9
0
def test_violin(image_comparer):
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=40)

    with plt.rc_context():
        sc.pl.set_rcParams_defaults()
        sc.set_figure_params(dpi=50, color_map='viridis')

        pbmc = sc.datasets.pbmc68k_reduced()
        sc.pl.violin(
            pbmc,
            ['n_genes', 'percent_mito', 'n_counts'],
            stripplot=True,
            multi_panel=True,
            jitter=True,
            show=False,
        )
        save_and_compare_images('master_violin_multi_panel')

        sc.pl.violin(
            pbmc,
            ['n_genes', 'percent_mito', 'n_counts'],
            ylabel=["foo", "bar", "baz"],
            groupby='bulk_labels',
            stripplot=True,
            multi_panel=True,
            jitter=True,
            show=False,
            rotation=90,
        )
        save_and_compare_images('master_violin_multi_panel_with_groupby')

        # test use of layer
        pbmc.layers['negative'] = pbmc.X * -1
        sc.pl.violin(
            pbmc,
            'CST3',
            groupby='bulk_labels',
            stripplot=True,
            multi_panel=True,
            jitter=True,
            show=False,
            layer='negative',
            use_raw=False,
            rotation=90,
        )
        save_and_compare_images('master_violin_multi_panel_with_layer')
Esempio n. 10
0
def visualAnalyzeData(canvas, fig, analysisMethod, color, paths):
    fig.clear()
    axes = fig.add_subplot(111)
    # sc setting
    sc.settings.verbosity = 3
    sc.set_figure_params(dpi=100, color_map='viridis_r')
    sc.logging.print_header()

    # load data, store in AnnData
    adata = sc.AnnData(X=np.loadtxt(paths['seq_expr']).T)
    adata.var_names = np.loadtxt(paths['seq_gnames'], dtype=str).tolist()
    adata.obs_names = np.loadtxt(paths['seq_cnames'], dtype=str).tolist()
    adata.obs['cell_name'] = np.loadtxt(paths['seq_cnames'],
                                        dtype=str).tolist()
    # print(adata)
    # print(adata.to_df())
    # print(adata.obs.columns)
    if color == 'cell label':
        color = 'cell_name'
    if analysisMethod == "clustering":  # based on leiden
        sc.pp.neighbors(adata, knn=True)  # two params
        sc.tl.leiden(adata, key_added='clustering'
                     )  # by default, using Leiden graph clustering
        sc.tl.umap(adata)
        sc.pl.umap(adata, color=color, ax=axes, show=False)
    if analysisMethod == 'trajectory inference':
        sc.pp.neighbors(adata, knn=True)  # two params
        sc.tl.louvain(adata, resolution=1.0, key_added='clustering')
        sc.tl.paga(adata, groups='clustering')
        sc.pl.paga(adata, color=color, ax=axes, show=False)
    if analysisMethod == 'dimensionality reduction (pca)':
        sc.tl.pca(adata, svd_solver='arpack')
        if color != 'clustering':
            sc.pl.pca(adata, color=color, ax=axes, show=False)
        else:
            tk.messagebox.showwarning(
                title='Warning',
                message=
                'dimensionality reduction visualization not support clustering!'
            )

    canvas.draw()
Esempio n. 11
0
def test_violin(image_comparer):
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=40)

    sc.pl.set_rcParams_defaults()
    sc.set_figure_params(dpi=50, color_map='viridis')

    pbmc = sc.datasets.pbmc68k_reduced()
    sc.pl.violin(pbmc, ['n_genes', 'percent_mito', 'n_counts'],
                 stripplot=True,
                 multi_panel=True,
                 jitter=True,
                 show=False)
    save_and_compare_images('master_violin_multi_panel')

    sc.pl.violin(pbmc, ['n_genes', 'percent_mito', 'n_counts'],
                 groupby='bulk_labels',
                 stripplot=True,
                 multi_panel=True,
                 jitter=True,
                 show=False)
    save_and_compare_images('master_violin_multi_panel_with_groupby')
Esempio n. 12
0
import logging, matplotlib, os, sys
import scanpy as sc
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import pandas as pd
from glbase3 import genelist
plt.rcParams['figure.figsize'] = (8, 8)
sc.settings.verbosity = 3
sc.set_figure_params(dpi=200, dpi_save=200)
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['font.size'] = 10

sc.settings.figdir = 'diffexp'

adata = sc.read('./learned.h5ad')

sc.tl.rank_genes_groups(adata, 'leiden_r0.5', method='wilcoxon', n_genes=3000)
adata.write('./de.h5ad')

adata = sc.read('./de.h5ad')

sc.pl.rank_genes_groups(adata,
                        n_genes=25,
                        sharey=True,
                        show=False,
                        save='genes-top25.pdf')
sc.pl.rank_genes_groups(adata,
                        key='rank_genes_groups',
                        show=False,
                        save='genes.pdf')
Esempio n. 13
0
setup()

from matplotlib.testing.compare import compare_images
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from anndata import AnnData

import scanpy as sc

HERE: Path = Path(__file__).parent
ROOT = HERE / '_images'
FIGS = HERE / 'figures'

sc.pl.set_rcParams_defaults()
sc.set_figure_params(dpi=40, color_map='viridis')

#####
# Test images are saved under the folder ./figures
# if test images need to be updated, simply copy them from
# the ./figures folder to ./_images/


def test_heatmap(image_comparer):
    save_and_compare_images = image_comparer(ROOT, FIGS, tol=15)

    adata = sc.datasets.krumsiek11()
    sc.pl.heatmap(adata,
                  adata.var_names,
                  'cell_type',
                  use_raw=False,
Esempio n. 14
0
def main():
    """Run CLI."""
    parser = argparse.ArgumentParser(
        description="""
            Read AnnData object and PCs file. Generates UMAP.
            """
    )

    parser.add_argument(
        '-v', '--version',
        action='version',
        version='%(prog)s {version}'.format(version=__version__)
    )

    parser.add_argument(
        '-h5', '--h5_anndata',
        action='store',
        dest='h5',
        required=True,
        help='H5 AnnData file.'
    )

    parser.add_argument(
        '-pc', '--tsv_pcs',
        action='store',
        dest='pc',
        default='',
        help='Tab-delimited file of PCs for each cell. First column is\
            cell_barcode. Subsequent columns are PCs. If "", uses pca\
            slot in AnnData.\
            (default: "")'
    )

    parser.add_argument(
        '-cq', '--colors_quantitative',
        action='store',
        dest='cq',
        default='',
        help='Comma seperated list of quantitative variable names for colors.\
            (default: "")'
    )

    parser.add_argument(
        '-cc', '--colors_categorical',
        action='store',
        dest='cc',
        default='',
        help='Comma seperated list of categorical variable names for colors.\
            (default: "")'
    )

    parser.add_argument(
        '-npc', '--number_pcs',
        action='store',
        dest='npc',
        default=0,
        type=int,
        help='Number of PCs to use.\
            (default: maximum number in tsv_pcs file)'
    )

    parser.add_argument(
        '-nn', '--n_neighbors',
        action='store',
        dest='n_neighbors',
        default='15',
        type=str,
        help='Number of neighbors for sc.pp.neighbors call\
            (default: %(default)s)'
    )

    parser.add_argument(
        '-uinit', '--umap_init',
        action='store',
        dest='umap_init',
        default='X_pca',
        help='How to initialize the low dimensional embedding.\
            Valid options: any key for adata.obsm,\
            ’paga’: positions from paga(),\
            ’spectral’: use a spectral embedding of the graph,\
            ’random’: assign initial embedding positions at random.\
            (default: X_pca, the slot where tsv_pcs is stored if provided)'
    )

    parser.add_argument(
        '-umd', '--umap_min_dist',
        action='store',
        dest='umap_min_dist',
        default='0.5',
        type=str,
        help='The effective minimum distance between embedded points. Smaller\
            values will result in a more clustered/clumped embedding where\
            nearby points on the manifold are drawn closer together, while\
            larger values will result on a more even dispersal of points.\
            The value should be set relative to the spread value, which\
            determines the scale at which embedded points will be spread out.\
            (default: %(default)s)'
    )

    parser.add_argument(
        '-us', '--umap_spread',
        action='store',
        dest='umap_spread',
        default='1.0',
        type=str,
        help='The minimum distance apart that points are allowed to be in the\
            low dimensional representation (effective scale of embedded points\
            ). In combination with min_dist this determines how\
            clustered/clumped the embedded points are.\
            (default: %(default)s)'
    )

    parser.add_argument(
        '-dln', '--drop_legend_n',
        action='store',
        dest='drop_legend',
        default=-1,
        type=int,
        help='Drop the legend for categorical colors with >= drop_legend_n\
            categories. If drop_legend_n < 0, then no legend drops are\
            performed.\
            (default: %(default)s)'
    )

    parser.add_argument(
        '--force_recalculate_neighbors',
        action='store_true',
        dest='calculate_neighbors',
        default=False,
        help='Calculate neighbor graph even if it already exists in the\
            AnnData (which it my do so if you already ran BBKNN).\
            (default: %(default)s)'
    )

    parser.add_argument(
        '-ncpu', '--number_cpu',
        action='store',
        dest='ncpu',
        default=4,
        type=int,
        help='Number of CPUs to use.\
            (default: %(default)s)'
    )

    parser.add_argument(
        '-of', '--output_file',
        action='store',
        dest='of',
        default='',
        help='Basename of output files, assuming output in current working \
            directory.\
            (default: <h5_anndata>-<tsv_pcs>-umap)'
    )

    options = parser.parse_args()

    # Fixed settings.
    verbose = True

    # Scanpy settings
    sc.settings.figdir = os.getcwd()  # figure output directory to match base.
    sc.settings.n_jobs = options.ncpu  # number CPUs
    # sc.settings.max_memory = 500  # in Gb
    sc.set_figure_params(dpi_save=300)

    # Load the AnnData file.
    adata = sc.read_h5ad(filename=options.h5)

    # Load the PCs.
    if options.pc == '':
        df_pca = pd.DataFrame(
            data=adata.obsm['X_pca'],
            index=adata.obs.index,
            columns=[
                'PC{}'.format(x) for x in
                range(1, adata.obsm['X_pca'].shape[1]+1)
            ]
        )
    else:
        df_pca = pd.read_csv(options.pc, sep='\t', index_col='cell_barcode')
    # df_pca = pd.read_csv(
    #     'adata-pcs-harmony.tsv.gz',
    #     sep='\t',
    #     index_col='cell_barcode'
    # )

    # Check that nPCs is valid.
    n_pcs = options.npc
    if 'neighbors' in adata.uns and not options.calculate_neighbors:
        # If we are using the pre-calculated neighbors use the PCs from that
        n_pcs = adata.uns['neighbors']['params']['n_pcs']
    if n_pcs == 0:
        n_pcs = len(df_pca.columns)
    elif n_pcs > len(df_pca.columns):
        raise Exception(
            '--number_pcs ({}) is > than n_pcs in --tsv_pcs ({}).'.format(
                n_pcs,
                len(df_pca.columns)
            )
        )
    if verbose:
        print('Using {} PCs.'.format(n_pcs))
    # Subset number of PCs to be exactly nPCs - here we assume PCs are ordered.
    print('Subetting PCs - we assume they are ordered by column index.')
    df_pca = df_pca.iloc[:, range(0, n_pcs)]
    print('PC columns:\t{}'.format(np.array_str(df_pca.columns)))

    # Add the reduced dimensions to the AnnData object.
    # NOTE: We need to do this for BBKNN in the case were we init with X_pca
    adata.obsm['X_pca__umap'] = df_pca.loc[adata.obs.index, :].values.copy()

    # Get the init position for UMAP
    umap_init = options.umap_init
    if umap_init == 'X_pca':
        umap_init = 'X_pca__umap'

    # Get the out file base.
    out_file_base = options.of
    if out_file_base == '':
        out_file_base = '{}-{}-umap'.format(
            os.path.basename(options.h5.rstrip('h5ad').rstrip('.')),
            os.path.basename(options.pc.rstrip('tsv.gz').rstrip('.'))
        )
    # Append the parameters to the output file.
    out_file_base = '{},number_pcs={}'.format(
        out_file_base,
        n_pcs
    )
    out_file_base = '{},umap_init={}'.format(
        out_file_base,
        options.umap_init
    )

    # Parse the color variables.
    colors_quantitative = []
    if options.cq != '':
        colors_quantitative = options.cq.split(',')

    colors_categorical = []
    if options.cc != '':
        colors_categorical = options.cc.split(',')

    if len(colors_quantitative) == 0 and len(colors_categorical) == 0:
        raise Exception('Specify a color value.')

    # Add colors_large_palette to adata.uns.
    # adata.uns["annotation_colors"] = COLORS_LARGE_PALLETE

    # Parse the neighbors iterations.
    list__n_neighbors = []
    if options.n_neighbors != '':
        list__n_neighbors = list(map(int, options.n_neighbors.split(',')))

    # Parse the min_dist iterations.
    list__min_dist = []
    if options.umap_min_dist != '':
        list__min_dist = list(map(float, options.umap_min_dist.split(',')))

    # Parse the neighbors iterations.
    list__spread = []
    if options.umap_spread != '':
        list__spread = list(map(float, options.umap_spread.split(',')))

    # Update the out base if only one of any iteration.
    if len(list__n_neighbors) == 1:
        out_file_base = '{},n_neighbors={}'.format(
            out_file_base,
            list__n_neighbors[0]
        )
    if len(list__min_dist) == 1:
        out_file_base = '{},umap_min_dist={}'.format(
            out_file_base,
            list__min_dist[0]
        )
    if len(list__spread) == 1:
        out_file_base = '{},umap_spread={}'.format(
            out_file_base,
            list__spread[0]
        )

    # Loop over all combinations of the different paramters we want to analyse.
    list__umap_keys = {}
    for i__n_neighbors, i__min_dist, i__spread in itertools.product(
        list__n_neighbors, list__min_dist, list__spread
    ):
        # Check input parameters
        if not (2 <= i__n_neighbors <= 100):
            # Recommended in parameter documentation:
            # https://umap-learn.readthedocs.io/en/latest/api.html
            warnings.warn(
                'WARNING: it is suggested to set n_neighbors to a {}'.format(
                    'value between 2-100.'
                )
            )
        if not (0.0 <= i__min_dist <= 1.0):
            # Recommended here: https://github.com/lmcinnes/umap/issues/249
            warnings.warn(
                'WARNING: it is suggested to set umap_min_dist to a {}'.format(
                    'value between 0-1.'
                )
            )
        if not (0.0 <= i__spread <= 3.0):
            # Recommendation based on single cell experience.
            warnings.warn(
                'WARNING: it is suggested to set umap_spread to a {}'.format(
                    'value between 0-3.'
                )
            )

        # Set the plot label.
        plt__label = 'n_neighbors={}'.format(i__n_neighbors)
        plt__label = '{},umap_min_dist={}'.format(
            plt__label,
            str(i__min_dist).replace('.', 'pt')
        )
        plt__label = '{},umap_spread={}'.format(
            plt__label,
            str(i__spread).replace('.', 'pt')
        )

        # Calculate neighbors for on the specified PCs.
        # By default saved to adata.uns['neighbors']
        #
        # First, however, check to see if adata.uns['neighbors'] already exists
        # ...and unless the user tells us not to, use that slot, not calculate
        # neighbors. This default behaviour is to accommodate the instance when
        # bbknn has been run on the data.
        if 'neighbors' not in adata.uns or options.calculate_neighbors:
            sc.pp.neighbors(
                adata,
                use_rep='X_pca__umap',
                n_pcs=n_pcs,
                n_neighbors=i__n_neighbors,  # Scanpy default = 15
                copy=False,
                random_state=0
            )
        else:
            warnings.warn(
                'WARNING: found neighbors slot in adata.uns. {}'.format(
                    'Not calculating neighbors (ignoring n_neighbors).'
                )
            )
            # If we are using the pre-calculated neighbors drop npcs note.
            # if 'n_pcs' in adata.uns['neighbors']['params']:
            # n_pcs = adata.uns['neighbors']['params']['n_pcs']
            i__n_neighbors = adata.uns['neighbors']['params']['n_neighbors']

        # Save the parameters to a dict
        list__umap_keys['X_umap__{}'.format(plt__label)] = {
            'n_neighbors': i__n_neighbors,
            'umap_min_dist': i__min_dist,
            'umap_spread': i__spread
        }

        adata.uns['neighbors__{}'.format(plt__label)] = adata.uns['neighbors']

        # TODO: add paga
        # # If init with paga, plot paga first - NOTE we can only do this if
        # if options.umap_init == 'paga' and 'paga' not in adata.uns:
        #     print(
        #         'Trying to call sc.tl.paga.',
        #         'NOTE: requires one to have clustered the data.'
        #     )
        #     sc.tl.paga(
        #         adata,
        #         use_rna_velocity=False,
        #         copy=False
        #     )

        # UMAP
        # Saved to adata.uns['umap'] and adata.obsm['X_umap']
        # NOTE: If umap_init == X_pca, then X_umap will have an equal number
        #       of n_components to X_pca (n_components is overridden).
        sc.tl.umap(
            adata,
            n_components=2,
            min_dist=i__min_dist,  # Scanpy default = 0.05
            spread=i__spread,  # Scanpy default = 1.0
            init_pos=umap_init,  # Scanpy default = spectral
            # For some reason cannot access neighbors key slot, thus we
            # must keep uns['neighbors'] until we have run this.
            # neighbors_key='neighbors__{}'.format(plt__label),
            copy=False,
            random_state=0
        )

        if 'embedding_density' in colors_quantitative:
            sc.tl.embedding_density(
                adata,
                basis='umap'
            )
            # Rename density estimates
            adata.obs[
                'umap__{}__density'.format(plt__label)
            ] = adata.obs.pop('umap_density')
            adata.uns[
                'umap__{}__density_params'.format(plt__label)
            ] = adata.uns.pop('umap_density_params')

        # Rename UMAP
        adata.uns[
            'umap__{}__params'.format(plt__label)
        ] = adata.uns.pop('umap')
        adata.obsm[
            'X_umap__{}'.format(plt__label)
        ] = adata.obsm.pop('X_umap')

        # Delete key that we no longer need since already copied and we have
        # run umap.
        del adata.uns['neighbors']

    # NOTE: If the color var is a gene, you should color by ln(CPM+1).
    #       By default these sc.pl.umap uses the .raw attribute of AnnData
    #       if present which is assumed to be ln(CPM+1).

    # For each color to plot, loop over the different iterations.
    for color_var in colors_quantitative:
        save_plot(
            adata=adata,
            list__umap_keys=list__umap_keys,
            out_file_base=out_file_base,
            color_var=color_var,
            colors_quantitative=True,
            drop_legend=options.drop_legend
        )
    for color_var in colors_categorical:
        save_plot(
            adata=adata,
            list__umap_keys=list__umap_keys,
            out_file_base=out_file_base,
            color_var=color_var,
            colors_quantitative=False,
            drop_legend=options.drop_legend
        )

    adata.write(
        '{}.h5ad'.format('test'),
        compression='gzip'
    )
# %% [markdown]
# container rpy_v3.1

# %%
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib.pyplot as plt
import tables as tb
import scipy as scipy
import dotscore

#Specifying random seed
import random

sc.set_figure_params(color_map='viridis', dpi_save=350)
sc.settings.verbosity = 3

# %%
# %load_ext autoreload
# %autoreload 2

# %%
import matplotlib as mpl

mpl.rcParams['figure.figsize'] = (4, 4)
mpl.rcParams['pdf.fonttype'] = 42  #Ensures readable fonts in illustrator
mpl.rcParams['ps.fonttype'] = 42

# %%
# Setting up target directories
Esempio n. 16
0
sample1 = 'pbmc5k'
dirPre = '../preprocess_scanpy/write_' + sample1 + '/'
dirLeiden = '../leiden_scanpy/write_' + sample1 + '/'

dirOut1 = './write_' + sample1 + '/'
dirOut = './write_' + sample1 + '/CRclusters9/'

dirFig = './figures_' + sample1 + '/CRclusters9/'

os.system('mkdir -p ' + dirOut)
os.system('mkdir -p ' + dirFig)
#%%
sc.settings.verbosity = 3  # verbosity: errors (0), warnings (1), info (2), hints (3)

#scanpy.set_figure_params(scanpy=True, dpi=80, dpi_save=150, frameon=True, vector_friendly=True, fontsize=14, figsize=None, color_map=None, format='pdf', facecolor=None, transparent=False, ipython_format='png2x')
sc.set_figure_params(dpi=150, dpi_save=300, fontsize=20, figsize=[8, 8])
#%%
mtxPrefix = '/scratch/dobin/STAR/STARsoloPreprint/maia1/count/'

toolsIndex = ['CR', 'Sfu', 'Ssp', 'kb', 'Ask', 'Adf']  #, 'Ase', 'Apa', 'Afu']
tools = pd.DataFrame(index=toolsIndex, columns=['name', 'mtxDir'])
tools.name['CR'] = 'CellRanger'
tools.mtxDir[
    'CR'] = mtxPrefix + 'CellRanger_5.0.1/human_CR_3.0.0/standard/default/10X/3/pbmc_5k/20/b02/Run1/outs/filtered_feature_bc_matrix'

tools.name['Sfu'] = 'STAR fullSA'
tools.mtxDir[
    'Sfu'] = mtxPrefix + 'STAR_2.7.8a/human_CR_3.0.0/fullSA/10X_CR4_noSAM/10X/3/pbmc_5k/20/b02/Solo.out/Gene/raw/'

tools.name['Ssp'] = 'STAR sparseSA'
tools.mtxDir[
Esempio n. 17
0
from typing import Mapping
import pytest

from anndata import AnnData
import scanpy as sc

import numpy as np
import pandas as pd

from squidpy import gr, pl
from tests.conftest import DPI, PlotTester, PlotTesterMeta

C_KEY = "leiden"

sc.pl.set_rcParams_defaults()
sc.set_figure_params(dpi=40, color_map="viridis")

# WARNING:
# 1. all classes must both subclass PlotTester and use metaclass=PlotTesterMeta
# 2. tests which produce a plot must be prefixed with `test_plot_`
# 3. if the tolerance needs to be change, don't prefix the function with `test_plot_`, but with something else
#    the comp. function can be accessed as `self.compare(<your_filename>, tolerance=<your_tolerance>)`
#    ".png" is appended to <your_filename>, no need to set it


class TestGraph(PlotTester, metaclass=PlotTesterMeta):
    def test_plot_interaction(self, adata: AnnData):
        gr.spatial_neighbors(adata)
        gr.interaction_matrix(adata, cluster_key=C_KEY)

        pl.interaction_matrix(adata, cluster_key=C_KEY)
Esempio n. 18
0
import anndata as ad
import episcanpy.api as epi
import scanpy as sc
import numpy as np
import pandas as pd
import re
import pyranges as pr

sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250,
                     frameon=True, vector_friendly=True,
                     color_map="YlGnBu", format='pdf', transparent=False,
                     ipython_format='png2x')

DATADIR = './EpiScanpy/'

HBCx22 = epi.read_text("Matrices/HBCx_22.tsv", delimiter="\t",first_column_names="regions")
Jurkat = epi.read_text("Matrices/Jurkat.tsv", delimiter="\t",first_column_names="regions")
Ramos = epi.read_text("Matrices/Ramos.tsv", delimiter="\t",first_column_names="regions")
MM468 = epi.read_text("Matrices/MM468.tsv", delimiter="\t",first_column_names="regions")

MM468.var_names = "MM468_" + MM468.var_names
HBCx22.var_names = "HBCx22_" + HBCx22.var_names
Jurkat.var_names = "Jurkat_" + Jurkat.var_names
Ramos.var_names = "Ramos_" + Ramos.var_names

Ramos = Ramos.T
HBCx22 = HBCx22.T
Jurkat = Jurkat.T
MM468 = MM468.T

adata = MM468.concatenate(HBCx22, Jurkat, Ramos, join="inner", index_unique=None)
Esempio n. 19
0
                                             out='z')

    # 2. cluster
    sc.pp.neighbors(adata, n_neighbors=30, use_rep='latent')
    if args.cluster_method == 'leiden':
        sc.tl.leiden(adata)
    elif args.cluster_method == 'kmeans':
        kmeans = KMeans(n_clusters=k, n_init=20, random_state=0)
        adata.obs['kmeans'] = kmeans.fit_predict(
            adata.obsm['latent']).astype(str)

#     if args.reference in adata.obs:
#         cluster_report(adata.obs[args.reference].cat.codes, adata.obs[args.cluster_method].astype(int))

    sc.settings.figdir = outdir
    sc.set_figure_params(dpi=80, figsize=(6, 6), fontsize=10)
    if args.embed == 'UMAP':
        sc.tl.umap(adata, min_dist=0.1)
        color = [
            c for c in ['celltype', args.cluster_method] if c in adata.obs
        ]
        sc.pl.umap(adata, color=color, save='.pdf', wspace=0.4, ncols=4)
    elif args.embed == 'tSNE':
        sc.tl.tsne(adata, use_rep='latent')
        color = [
            c for c in ['celltype', args.cluster_method] if c in adata.obs
        ]
        sc.pl.tsne(adata, color=color, save='.pdf', wspace=0.4, ncols=4)

    if args.impute:
        adata.obsm['impute'] = model.encodeBatch(testloader,
Esempio n. 20
0
###SET DIRECTORY TO READ/WRITE DATA.
#THIS SHOULD BE THE DIRECTORY CONTAINING THE .MTX DATA FILE AND .TSV BARCODES & FEATURE FILES:
BaseDirectory = '/d2/studies/scanPy/VM_LHb_Stress/Ctrl_Stress_MergedScanPy/'
sampleName = 'ACWS_VM_NDB_Stress' #This is used for name result output files
os.chdir(BaseDirectory)
%logstart -o scanpy_log.txt

###SET SCANPY SETTINGS:
results_file='Results_File'
#results_file = os.path.join(BaseDirectory, sampleName + '_scanpy_results.h5ad')  # the file that will store the analysis results
results_file_partial = os.path.join(BaseDirectory, sampleName + '_scanpy_adata_preHVGselection.h5ad')

sc.settings.verbosity=3  # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.n_jobs=8 #use parallel processing when possible
sc.logging.print_header()
sc.set_figure_params(fontsize=14, dpi=80, dpi_save=300, format='svg')
matplotlib.rcParams.update({'text.usetex': False, 'font.family': 'stixgeneral', 'mathtext.fontset': 'stix',})
color_map='inferno' #see options for colormaps at https://matplotlib.org/3.1.1/gallery/color/colormap_reference.html

###LOAD DATA
if not new:
    try:
        sc.read(results_file)
    except FileNotFoundError:
        print("No full results file found. Attemping to open partial results file.")
        sc.read(results_file_partial)
elif new:
    if dataType=='.h5':
        fileNames = glob.glob(os.path.join(BaseDirectory, '*filtered.h5'))
        if len(fileNames)>1:
            raise NameError("Multiple files matched glob pattern, check files or use more specific pattern")
    sc.pp.neighbors(adata)
    sc.tl.leiden(adata)

    return adata.obs["leiden"]


###############################################################################
# Then, we calculate feature clusters using different features and compare them to gene clusters:
adata.obs["features_summary_cluster"] = cluster_features(
    adata.obsm["features"], like="summary")
adata.obs["features_histogram_cluster"] = cluster_features(
    adata.obsm["features"], like="histogram")
adata.obs["features_texture_cluster"] = cluster_features(
    adata.obsm["features"], like="texture")

sc.set_figure_params(facecolor="white", figsize=(8, 8))
sc.pl.spatial(
    adata,
    color=[
        "features_summary_cluster",
        "features_histogram_cluster",
        "features_texture_cluster",
        "cluster",
    ],
    ncols=3,
)

###############################################################################
# Like the gene-space clusters (bottom middle), the feature space clusters are also spatially coherent.
#
# The feature clusters of the different feature extractors are quite diverse, but all of them reflect
Esempio n. 22
0
import scanpy as sc
from sklearn.dummy import DummyClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.cluster import SpectralClustering, OPTICS, cluster_optics_dbscan, AgglomerativeClustering

# settings
plt.rc('font', size = 8)
plt.rc('font', family='sans serif')
plt.rcParams['pdf.fonttype']=42
plt.rcParams['ps.fonttype']=42
plt.rcParams['text.usetex']=True
plt.rcParams['legend.frameon']=False
plt.rcParams['axes.grid']=False
plt.rcParams['legend.markerscale']=0.5
sc.set_figure_params(dpi=300,dpi_save=600,
                     frameon=False,
                     fontsize=8)
plt.rcParams['savefig.dpi']=600
sc.settings.verbosity=2
sc._settings.ScanpyConfig.n_jobs=-1



# reproducibility
rs = np.random.seed(42)

# utils
def mwu(X,Y,gene_names,correction=None,debug=False) :
    '''
    Benjamini-Hochberg correction implemented. Can change to Bonferonni
Esempio n. 23
0
    def plot_sca(self, adata, sca_params, figdir='./figures/'):
        '''
        See the Scanpy visualization library for examples
        '''
        print("Plotting")

        ## Create my custom palette for FeaturePlots and define a matlplotlib colormap object
        if self.umap_feature_color == 'blue_orange':
            feature_colors = [(35, 35, 142), (255, 127, 0)]
            my_feature_cmap = self.make_cmap(feature_colors, bit=True)
        elif self.umap_feature_color == 'yellow_blue':
            feature_colors = [(210, 210, 210), (210, 210, 210),
                              (245, 245, 200), (100, 200, 225), (0, 45, 125)]
            position = [0, 0.019999, 0.02, 0.55, 1]
            my_feature_cmap = self.make_cmap(feature_colors,
                                             bit=True,
                                             position=position)
        else:
            feature_colors = [(210, 210, 210), (210, 210, 210),
                              (245, 245, 200), (100, 200, 225), (0, 45, 125)]
            position = [0, 0.019999, 0.02, 0.55, 1]
            my_feature_cmap = self.make_cmap(feature_colors,
                                             bit=True,
                                             position=position)

        gray_cmap = self.make_cmap([(220, 220, 220), (220, 220, 220)],
                                   bit=True)

        ## Check to see if user specified a color palette for categorical umap plots, ie. leiden, obs_fields
        if self.umap_categorical_color == 'default':
            ## Custom color palette for cluster plots and observation plots
            colors = [(1, 0.5, 0), (0.5, 0.5, 0.85), (0, 1, 0), (1, 0, 0),
                      (0, 0, 0.9), (0, 1, 1),
                      (0.4, 0.4, 0.4), (0.5, 0.85, 0.5), (0.5, 0.15, 0.5),
                      (0.15, 0.5, 0.5), (0.5, 0.5, 0.15), (0.9, 0.9, 0),
                      (1, 0, 1), (0, 0.5, 1), (0.85, 0.5, 0.5), (0.5, 1, 0),
                      (0.5, 0, 1), (1, 0, 0.5), (0, 0.9, 0.6), (0.3, 0.6, 0),
                      (0, 0.3, 0.6), (0.6, 0.3, 0), (0.3, 0, 0.6),
                      (0, 0.6, 0.3), (0.6, 0, 0.3)]
        else:
            colors = self.umap_categorical_color

        ## General figure parameters and settings
        sc.set_figure_params(dpi_save=300, dpi=300)  #,vector_friendly=False)
        sc.settings.figdir = figdir
        sc.set_figure_params(fontsize=12)
        size = self.size

        # Check to see if user wants publication quality figures
        if self.final_quality:
            # rcParams['figure.figsize'] = 4, 4
            rcParams['savefig.dpi'] = 1200
            file_type = '.pdf'
        else:
            file_type = '.png'

        ## Violin plots for filtering parameters pre and post
        sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],
                     jitter=0.4,
                     multi_panel=True,
                     save='_postFiltered_plot.png',
                     show=False)
        if sca_params.adata_preQC:
            sc.pl.violin(sca_params.adata_preQC,
                         ['n_genes', 'n_counts', 'percent_mito'],
                         jitter=0.4,
                         multi_panel=True,
                         save='_preFiltered_plot.png',
                         show=False)

        ## Draw the PCA elbow plot to determine which PCs to use
        sc.pl.pca_variance_ratio(adata,
                                 log=True,
                                 n_pcs=50,
                                 save='_elbowPlot.png',
                                 show=False)
        ## Ranks and displays most contributing genes for each principal component
        components = 4
        loadings_components = range(
            sca_params.analysis_params.n_pcs - components,
            sca_params.analysis_params.n_pcs + components + 1)
        sc.pl.pca_loadings(adata,
                           components=loadings_components,
                           save='_rank_genes.png',
                           show=False)

        ## Plot results of UMAP (and t-SNE) dimensional reduction and clustering
        for observation in self.umap_obs:
            legend = 'on data' if (observation == sca_params.analysis_params.
                                   clustering_choice) else 'right margin'
            sc.pl.umap(adata,
                       color=observation,
                       save=''.join(['_', observation, file_type]),
                       show=False,
                       legend_loc=legend,
                       edges=False,
                       size=size,
                       palette=colors,
                       alpha=0.75)

            if sca_params.analysis_params.do_tSNE:
                sc.pl.tsne(adata,
                           color=observation,
                           save=''.join(['_', observation, file_type]),
                           show=False,
                           legend_loc=legend,
                           edges=False,
                           size=size,
                           palette=colors,
                           alpha=0.75)
        # sc.external.pl.phate(adata,gene_symbols=['CAV1','LY6D','KRT4','TP63','CDH1'], use_raw=True, color_map=my_feature_cmap,
        #                    save='phate.png', size=size)
        ## Find marker genes via Wilxocon test based on cluster assignment
        # Create a simple plot to show the top 25 most significant markers for each cluster
        # Write most significant markers to a csv file
        for rank_grouping in self.rank_grouping:
            n_genes_rank = 5
            for comparison in self.clusters2_compare:
                if comparison == 'all':
                    comparison = None
                self.__rank_genes(adata,
                                  rank_grouping,
                                  figdir=figdir,
                                  clusters2_compare=comparison)

            if 'all' in self.clusters2_compare:
                sc.pl.rank_genes_groups_heatmap(
                    adata,
                    n_genes=n_genes_rank,
                    use_raw=True,
                    show=False,
                    save=''.join(['_rank_heatmap_', rank_grouping, file_type]),
                    cmap=my_feature_cmap)
                sc.pl.rank_genes_groups_dotplot(
                    adata,
                    n_genes=n_genes_rank,
                    use_raw=True,
                    show=False,
                    save=''.join(['_rank_dotplot_', rank_grouping, file_type]),
                    color_map=my_feature_cmap)
                sc.pl.rank_genes_groups_stacked_violin(
                    adata,
                    n_genes=n_genes_rank,
                    use_raw=True,
                    show=False,
                    save=''.join(['_rank_violin_', rank_grouping, file_type]))

        ## Feature plots and dot plot analysis for each specified set of genes
        #sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, save='_markerPlots.png', show=False)
        if sca_params.gene_lists:
            missing_genes = []
            for gene_list in sca_params.gene_lists:
                gene_obj = sca_params.gene_dict[gene_list]
                genes_to_plot = []
                [genes_to_plot, missing_genes
                 ] = self.__find_genes(adata,
                                       gene_obj.markers,
                                       missing_genes=missing_genes)

                ## Do FeaturePlots for select genes
                print('Plotting standard marker genes: ', genes_to_plot, '\n')
                sc.pl.umap(adata,
                           color=genes_to_plot,
                           save=''.join(
                               ['_featureplots_', gene_list, file_type]),
                           show=False,
                           cmap=my_feature_cmap,
                           size=size,
                           use_raw=True,
                           vmin=0)

                if sca_params.analysis_params.do_tSNE:
                    sc.pl.tsne(adata,
                               color=genes_to_plot,
                               save=''.join(
                                   ['_featureplots_', gene_list, file_type]),
                               show=False,
                               cmap=my_feature_cmap,
                               size=size,
                               use_raw=True,
                               vmin=0)

                feature_positions = gene_obj.feature_positions  # Manually set and determined
                feature_groups = gene_obj.feature_groups
                groupby_positions = gene_obj.groupby_positions

                if len(gene_obj.markers) != 0:
                    for grouping in self.dot_grouping:
                        ## Dotplot analysis
                        # Circle color corresponds to expression level, and circle size corresponds to percentage of cells expressing gene

                        ## Reordering categories for dotplot or heatmap rows
                        adata_plots = adata.copy()
                        dendrogram = False
                        if groupby_positions:
                            dendrogram = False
                            clustering_chosen = sca_params.analysis_params.clustering_choice
                            adata_plots.obs[clustering_chosen] = adata.obs[
                                clustering_chosen].cat.reorder_categories(
                                    groupby_positions, inplace=False)

                        sc.pl.dotplot(
                            adata_plots,
                            genes_to_plot,
                            groupby=grouping,
                            var_group_positions=feature_positions,
                            var_group_labels=feature_groups,
                            save=''.join([
                                '_markers_', gene_list, '_', grouping,
                                file_type
                            ]),
                            show=False,
                            color_map=my_feature_cmap,
                            use_raw=True,
                            dendrogram=dendrogram
                        )  #, figsize=(4,6))#, dot_max=0.4)#, dendrogram=True)
                        ## Heatmaps
                        # Each horizontal line represents expression of one cell
                        sc.pl.heatmap(adata_plots,
                                      genes_to_plot,
                                      groupby=grouping,
                                      var_group_positions=feature_positions,
                                      var_group_labels=feature_groups,
                                      save=''.join([
                                          '_markers_', gene_list, '_',
                                          grouping, file_type
                                      ]),
                                      show=False,
                                      cmap=my_feature_cmap,
                                      use_raw=True)

        # Genes that are not expressed or are invariable are plotted using a grayscale
        sca_params.missing_genes = missing_genes
        print('Plotting empty genes: ', missing_genes, '\n')
        empty_genes = [
            gene for gene in missing_genes if (gene in adata.raw.var_names)
        ]
        genes_noseq = [
            gene for gene in missing_genes if (gene not in empty_genes)
        ]
        print('Zero genes: ', empty_genes, '\n')
        print('Gene not in dataset: ', genes_noseq, '\n')
        if empty_genes:
            sc.pl.umap(adata,
                       color=empty_genes,
                       save=''.join(['_featureplots_gray', file_type]),
                       show=False,
                       cmap=gray_cmap,
                       size=size,
                       use_raw=True)

        # tSNE Plots - should move to integrate in umap code
        if sca_params.analysis_params.do_tSNE:
            sc.pl.tsne(adata,
                       color=missing_genes,
                       save=''.join(['_featureplots_gray', file_type]),
                       show=False,
                       cmap=gray_cmap,
                       size=size,
                       use_raw=True)

        if sca_params.qc_params.doublet_detection:
            sc.pl.umap(adata,
                       color='doublet_labels',
                       save='doublet_test.png',
                       show=False,
                       edges=False,
                       size=size)
            f = doubletdetection.plot.convergence(
                sca_params.doublet_clf,
                save=''.join([figdir, 'convergence_test.pdf']),
                show=False,
                p_thresh=1e-16,
                voter_thresh=0.5)
            f3 = doubletdetection.plot.threshold(
                sca_params.doublet_clf,
                save=''.join([figdir, 'threshold_test.pdf']),
                show=False,
                p_step=6)

        # Generate a umap feature plot based on cell scoring
        if sca_params.cell_score_lists:
            max_list_len = max([
                len(self.vmax_list),
                len(self.vmin_list),
                len(sca_params.cell_score_lists)
            ])
            if not self.vmax_list:
                vmax = [
                    adata.obs.loc[:, sca_params.cell_score_lists].values.max()
                ] * max_list_len
            else:
                vmax = self.vmax_list

            if not self.vmin_list:
                vmin = [
                    adata.obs.loc[:, sca_params.cell_score_lists].values.min()
                ] * max_list_len
            else:
                vmin = self.vmin_list

            for i, score_name in enumerate(sca_params.cell_score_lists):
                sc.pl.umap(adata,
                           color=score_name,
                           save=''.join(
                               ['_', score_name, '_cellType_score.png']),
                           show=False,
                           edges=False,
                           color_map=my_feature_cmap,
                           size=size,
                           vmin=vmin[i],
                           vmax=vmax[i])
                sc.pl.umap(adata,
                           color=score_name,
                           save=''.join(
                               ['_', score_name, '_cellType_score_0min.png']),
                           show=False,
                           edges=False,
                           color_map=my_feature_cmap,
                           size=size,
                           vmin=0,
                           vmax=vmax[i])

            sc.pl.violin(adata,
                         sca_params.cell_score_lists,
                         groupby='sampleName',
                         jitter=0.4,
                         save='_cell_scores.png',
                         show=False,
                         multi_panel=True,
                         rotation=90)

        if sca_params.analysis_params.dpt:
            sc.pl.diffmap(adata,
                          color=[
                              'dpt_pseudotime',
                              sca_params.analysis_params.clustering_choice
                          ],
                          size=self.size,
                          show=False,
                          save=''.join(
                              [sca_params.analysis_params.dpt[0], '.png']))
            sc.pl.umap(adata,
                       color='dpt_pseudotime',
                       size=self.size,
                       show=False,
                       save=''.join([
                           '_', 'dpt', '_', sca_params.analysis_params.dpt[0],
                           '.png'
                       ]))
            # sc.pl.dpt_groups_pseudotime(adata, color_map=my_feature_cmap,
            #                           save=''.join([sca_params.analysis_params.dpt[0],sca_params.analysis_params.dpt[1],'.png']))
            # sc.pl.dpt_timeseries(adata, color_map=my_feature_cmap, show=False,
            #                    save=''.join([sca_params.analysis_params.dpt[0],sca_params.analysis_params.dpt[1],'.png']))

        # ## Violin plot for comparing gene expression among different groups/clusters
        # # Create observation field labeled using binary information
        # # Will have option integrated in pipeline in the future
        # adata.obs['CDH5_exp'] = ['CDH5+' if (cell!=0) else 'CDH5-' for cell in adata.raw[:,'CDH5'].X]

        # # Built in scanpy module
        # sc.pl.violin(adata, genes_to_plot+['CDH5'], groupby='CDH5_exp', jitter=True,
        #   save='_feature.png', show=False, scale='width',use_raw=True) #order = ['CDH5+','CDH5-'],

        # Custom violin plot module -- Not complete/in testing
        df = pd.DataFrame()
        # Add Gaussian y-jitter to better visualize zero expression in violin plots
        for gene in genes_to_plot:
            sigma = np.amax(adata.raw[:, gene].X) / 40
            gene_df = [
                cell if (cell != 0) else np.random.normal(loc=0, scale=sigma)
                for cell in adata.raw[:, gene].X
            ]
            df[gene] = gene_df

        # df['CDH5_exp']=adata.obs['CDH5_exp'].values
        # vplot, axes = plt.subplots(math.ceil(len(genes_to_plot)/4),4, figsize=(18,12))
        # plt.rcParams.update({'font.size':12})
        # plt.subplots_adjust(left=0.125, right=0.9, bottom=0.1, top=0.9, wspace=0.4, hspace=0.4)
        # for i,gene in enumerate(genes_to_plot):
        #   sns.violinplot(x='CDH5_exp', y=gene, data=df, inner=None, scale='width', ax=axes[math.floor(i/4),i%4])
        #   sns.stripplot(x='CDH5_exp', y=gene, data=df, jitter = True, color='black', size=0.4, ax=axes[math.floor(i/4),i%4])
        # vplot.savefig(''.join([figdir,'/violin_feature_jitter.png']))

        ## Scatter plots to identify clusters that are high in number of genes, UMI counts, and mito transcript fraction
        adata.obs['jitter'] = np.random.rand(len(adata.obs_names)) * 10
        sc.pl.scatter(adata,
                      x='jitter',
                      y='n_genes',
                      color=sca_params.analysis_params.clustering_choice,
                      save='_n_genes.png',
                      palette=colors,
                      show=False)
        sc.pl.scatter(adata,
                      x='jitter',
                      y='n_counts',
                      color=sca_params.analysis_params.clustering_choice,
                      save='_n_counts.png',
                      palette=colors,
                      show=False)
        sc.pl.scatter(adata,
                      x='jitter',
                      y='percent_mito',
                      color=sca_params.analysis_params.clustering_choice,
                      save='_percent_mito.png',
                      palette=colors,
                      show=False)

        sc.pl.umap(adata,
                   color=['n_genes', 'n_counts', 'percent_mito'],
                   color_map=my_feature_cmap,
                   save='_counts_check.png',
                   show=False)

        # Set the thresholds and scaling factors for drawing the paga map/plot
        node_size_scale = 1.25
        node_size_power = 0.9
        edge_width_scale = 1
        min_edge_width = 0.035
        max_edge_width = 2
        threshold = 0.08
        sc.pl.paga(adata,
                   layout='fr',
                   threshold=threshold,
                   node_size_scale=node_size_scale,
                   node_size_power=node_size_power,
                   edge_width_scale=edge_width_scale,
                   min_edge_width=min_edge_width,
                   max_edge_width=max_edge_width,
                   show=False,
                   save='_pagaPlot.png',
                   title='PAGA: Fruchterman Reingold',
                   frameon=False)

        return adata
Esempio n. 24
0
def plot_sca(adata,
             sca_dict,
             adata_preFiltering=None,
             figdir='./figures',
             annotation_dict=None,
             summary_dict=None,
             adata_postPCA=None,
             final_quality=False):
    '''
	See the Scanpy visualization library for examples
	'''
    print("Plotting")

    ## Create my custom palette for FeaturePlots and define a matlplotlib colormap object
    feature_colors = [(35, 35, 142), (255, 127, 0)]
    blue_orange_cmap = make_cmap(feature_colors, bit=True)
    feature_colors = [(210, 210, 210), (210, 210, 210), (245, 245, 200),
                      (100, 200, 225), (0, 45, 125)]
    position = [0, 0.019999, 0.02, 0.55, 1]
    my_feature_cmap = make_cmap(feature_colors, bit=True, position=position)
    gray_cmap = make_cmap([(220, 220, 220), (220, 220, 220)], bit=True)

    ## Custom color palette for cluster plots and observation plots
    colors = [(0.3, 0.3, 0.3), (1, 0, 0), (0, 1, 0), (0, 0, 0.9), (1, 0, 1),
              (0, 1, 1), (0.9, 0.9, 0), (0.85, 0.5, 0.5), (0.5, 0.85, 0.5),
              (0.5, 0.5, 0.85), (0.15, 0.5, 0.5), (0.5, 0.15, 0.5),
              (0.5, 0.5, 0.15), (1, 0.5, 0), (0, 0.5, 1), (0.5, 1, 0),
              (0.5, 0, 1), (1, 0, 0.5), (0, 1, 0.5)]

    ## General figure parameters and settings
    sc.set_figure_params(dpi_save=300, dpi=300)  #,vector_friendly=False)
    sc.settings.figdir = figdir
    sc.set_figure_params(fontsize=12)
    size = sca_dict['plot_params']['size']

    # Check to see if user wants publication quality figures
    if final_quality:
        rcParams['figure.figsize'] = 4, 4
        rcParams['savefig.dpi'] = 1200
        file_type = '.pdf'
    else:
        file_type = '.png'

    summary_dict.update(final_cell_count=len(adata.obs_names),
                        final_gene_count=len(adata.var_names))
    # ## Write a summary of the analysis to a text file including sample information and parameters
    write_summary(adata, sca_dict, annotation_dict, summary_dict)

    ## Violin plots for filtering parameters pre and post
    sc.pl.violin(adata_preFiltering, ['n_genes', 'n_counts', 'percent_mito'],
                 jitter=0.4,
                 multi_panel=True,
                 save='_preFiltering_plot.png',
                 show=False)
    sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],
                 jitter=0.4,
                 multi_panel=True,
                 save='_postFiltering_plot.png',
                 show=False)

    ## Draw the PCA elbow plot to determine which PCs to use
    sc.pl.pca_variance_ratio(adata_postPCA,
                             log=True,
                             n_pcs=100,
                             save='_elbowPlot.png',
                             show=False)
    ## Ranks and displays most contributing genes for each principal component
    components = 4
    loadings_components = range(
        sca_dict['analysis_params']['n_pcs'] - components,
        sca_dict['analysis_params']['n_pcs'] + components + 1)
    sc.pl.pca_loadings(adata_postPCA,
                       components=loadings_components,
                       save='_rank_genes.png',
                       show=False)

    ## Plot results of UMAP dimensional reduction and clustering
    for observation in sca_dict['plot_params']['umap_obs']:
        legend = 'on data' if (observation == 'louvain') else 'right margin'
        sc.pl.umap(adata,
                   color=observation,
                   save=''.join(['_', observation, file_type]),
                   show=False,
                   legend_loc=legend,
                   edges=False,
                   size=size,
                   palette=colors,
                   alpha=0.75)

    ## Find marker genes via Wilxocon test based on Louvain cluster assignment
    # Create a simple plot to show the top 25 most significant markers for each cluster
    # Write most significant markers to a csv file
    # adata.obs['is_adult'] = ['Adult' if cell=='ND15989_Fresh_WT_Lung_Adult' else 'Fetal' for cell in adata.obs['sampleName']]
    # rank_grouping = 'age'
    # rank_genes(adata,rank_grouping,figdir=figdir)#,clusters2_compare=['1','4'])
    # sc.pl.rank_genes_groups_heatmap(adata, n_genes=100, use_raw=True, show=False,
    # 		save=''.join(['_rank_heatmap_',rank_grouping,file_type]), cmap=my_feature_cmap)
    # sc.pl.rank_genes_groups_dotplot(adata, n_genes=5, use_raw=True, show=False,
    # 		save=''.join(['_rank_dotplot_',rank_grouping,file_type]), color_map=my_feature_cmap)
    # sc.pl.rank_genes_groups_stacked_violin(adata, n_genes=5, use_raw=True,
    # 		show=False, save=''.join(['_rank_violin_',rank_grouping,file_type]))

    rank_grouping = 'louvain'
    n_genes_rank = 25
    rank_genes(adata, rank_grouping,
               figdir=figdir)  #,clusters2_compare=['1','4'])
    sc.pl.rank_genes_groups_heatmap(
        adata,
        n_genes=n_genes_rank,
        use_raw=True,
        show=False,
        save=''.join(['_rank_heatmap_', rank_grouping, file_type]),
        cmap=my_feature_cmap)
    sc.pl.rank_genes_groups_dotplot(
        adata,
        n_genes=n_genes_rank,
        use_raw=True,
        show=False,
        save=''.join(['_rank_dotplot_', rank_grouping, file_type]),
        color_map=my_feature_cmap)
    sc.pl.rank_genes_groups_stacked_violin(
        adata,
        n_genes=n_genes_rank,
        use_raw=True,
        show=False,
        save=''.join(['_rank_violin_', rank_grouping, file_type]))

    ## Feature plots and dot plot analysis for each specified set of genes
    #sc.pl.rank_genes_groups(adata, n_genes=25, sharey=False, save='_markerPlots.png', show=False)
    if sca_dict['gene_lists']:
        missing_genes = []
        for gene_list in sca_dict['gene_lists']:
            gene_dict = sca_dict[gene_list]
            genes_to_plot = []
            [genes_to_plot,
             missing_genes] = self.__find_genes(adata,
                                                gene_dict['markers'],
                                                missing_genes=missing_genes)

            ## Do FeaturePlots for select genes
            print('Plotting standard marker genes: ', genes_to_plot, '\n')
            sc.pl.umap(adata,
                       color=genes_to_plot,
                       save=''.join(['_featureplots_', gene_list, file_type]),
                       show=False,
                       cmap=my_feature_cmap,
                       size=size,
                       use_raw=True)

            if gene_dict['positions'] and gene_dict['groups']:
                group_positions = gene_dict[
                    'positions']  # Manually set and determined
                group_labels = gene_dict['groups']
            else:
                group_positions = None
                group_labels = None

            if len(gene_dict['markers']) != 1:
                for grouping in sca_dict['plot_params']['exp_grouping']:
                    ## Dotplot analysis
                    # Circle color corresponds to expression level, and circle size corresponds to percentage of cells expressing gene

                    ## Reordering categories for dotplot or heatmap rows
                    #adata_temp = adata.copy()
                    #adata_temp.obs['louvain'] = adata.obs['louvain'].cat.reorder_categories(['3','5','0','4','2','1'],inplace = False)

                    sc.pl.dotplot(adata,
                                  genes_to_plot,
                                  groupby=grouping,
                                  var_group_positions=group_positions,
                                  var_group_labels=group_labels,
                                  save=''.join([
                                      '_markers_', gene_list, '_', grouping,
                                      file_type
                                  ]),
                                  show=False,
                                  color_map=my_feature_cmap,
                                  use_raw=True,
                                  dot_max=0.4)
                    ## Heatmaps
                    # Each horizontal line represents expression of one cell
                    sc.pl.heatmap(adata,
                                  genes_to_plot,
                                  groupby=grouping,
                                  var_group_positions=group_positions,
                                  var_group_labels=group_labels,
                                  save=''.join([
                                      '_markers_', gene_list, '_', grouping,
                                      file_type
                                  ]),
                                  show=False,
                                  cmap=my_feature_cmap,
                                  use_raw=True)

    # Genes that are not expressed or are invariable are plotted using a grayscale
    print('Plotting empty genes: ', missing_genes, '\n')
    sc.pl.umap(adata,
               color=missing_genes,
               save=''.join(['_featureplots_gray', file_type]),
               show=False,
               cmap=gray_cmap,
               size=size,
               use_raw=True)

    ## tSNE Plots
    # sc.pl.tsne(adata, color='louvain', save = '_clusterIdentity.png', show = False,
    # 			legend_loc = 'right margin', edges = False, size = 20,
    # 			palette = colors, alpha = 0.75)
    # sc.pl.tsne(adata, color='sampleName', save = '_sample.png', show = False,
    # 			legend_loc = 'right margin', edges = False, size = 20,
    # 			palette = colors, alpha = 0.75)
    # sc.pl.tsne(adata, color=genes_to_plot, save = '_featureplots.png', show = False, cmap = my_feature_cmap, size = 25, use_raw = True)
    # sc.pl.tsne(adata, color=missing_genes, save='_featureplots_gray.png', show=False, cmap=gray_cmap, size=20, use_raw=True)

    # ## Violin plot for comparing gene expression among different groups/clusters
    # # Create observation field labeled using binary information
    # # Will have option integrated in pipeline in the future
    # adata.obs['CDH5_exp'] = ['CDH5+' if (cell!=0) else 'CDH5-' for cell in adata.raw[:,'CDH5'].X]

    # # Built in scanpy module
    # sc.pl.violin(adata, genes_to_plot+['CDH5'], groupby='CDH5_exp', jitter=True,
    # 	save='_feature.png', show=False, scale='width',use_raw=True) #order = ['CDH5+','CDH5-'],

    # Custom violin plot module -- Not complete/in testing
    df = pd.DataFrame()
    # Add Gaussian y-jitter to better visualize zero expression in violin plots
    for gene in genes_to_plot:
        sigma = np.amax(adata.raw[:, gene].X) / 40
        gene_df = [
            cell if (cell != 0) else np.random.normal(loc=0, scale=sigma)
            for cell in adata.raw[:, gene].X
        ]
        df[gene] = gene_df

    # df['CDH5_exp']=adata.obs['CDH5_exp'].values
    # vplot, axes = plt.subplots(math.ceil(len(genes_to_plot)/4),4, figsize=(18,12))
    # plt.rcParams.update({'font.size':12})
    # plt.subplots_adjust(left=0.125, right=0.9, bottom=0.1, top=0.9, wspace=0.4, hspace=0.4)
    # for i,gene in enumerate(genes_to_plot):
    # 	sns.violinplot(x='CDH5_exp', y=gene, data=df, inner=None, scale='width', ax=axes[math.floor(i/4),i%4])
    # 	sns.stripplot(x='CDH5_exp', y=gene, data=df, jitter = True, color='black', size=0.4, ax=axes[math.floor(i/4),i%4])
    # vplot.savefig(''.join([figdir,'/violin_feature_jitter.png']))

    ## Scatter plots to identify clusters that are high in
    adata.obs['jitter'] = np.random.rand(len(adata.obs_names)) * 10
    sc.pl.scatter(adata,
                  x='jitter',
                  y='n_genes',
                  color='louvain',
                  save='_n_genes.png',
                  palette=colors,
                  show=False)
    sc.pl.scatter(adata,
                  x='jitter',
                  y='n_counts',
                  color='louvain',
                  save='_n_counts.png',
                  palette=colors,
                  show=False)
    sc.pl.scatter(adata,
                  x='jitter',
                  y='percent_mito',
                  color='louvain',
                  save='_percent_mito.png',
                  palette=colors,
                  show=False)

    # Set the thresholds and scaling factors for drawing the paga map/plot
    node_size_scale = 1.25
    node_size_power = 0.9
    edge_width_scale = 1
    min_edge_width = 0.035
    max_edge_width = 2
    threshold = 0.08
    # Draw the actual plot
    sc.pl.paga(adata,
               layout='fr',
               threshold=threshold,
               node_size_scale=node_size_scale,
               node_size_power=node_size_power,
               edge_width_scale=edge_width_scale,
               min_edge_width=min_edge_width,
               max_edge_width=max_edge_width,
               show=False,
               save='_pagaPlot.png',
               title='PAGA: Fruchterman Reingold',
               frameon=False)

    print("\nAll done!\n")
    return adata
Esempio n. 25
0
Spyder Editor

This is a temporary script file.
"""
%reset
import numpy as np
import pandas as pd
import os
import scanpy as sc
import seaborn as sns
from plotnine import *
path = '/Users/kj22643/Documents/Documents/231_Classifier_Project/data'
#path = '/stor/scratch/Brock/231_10X_data/'
os.chdir(path)
sc.settings.figdir = 'EB_plots'
sc.set_figure_params(dpi_save=300)
sc.settings.verbosity = 3
#%% 

#read in 10X data to create anndata object called "adata"
adata = sc.read_10x_mtx('agg_all/outs/filtered_feature_bc_matrix',
                        cache=True).copy()
#%%
#add lineage and sample info for each cell
df = pd.read_csv('lineage_analysis/10x_231.lineage_assignment.clustered_lineage.singletons_only.tsv',sep='\t')
df = df.rename(columns={'clustered_lineage':'lineage'})
df.set_index('proper_cell_name',inplace=True)
adata.obs = adata.obs.join(df)
dic = {'1':'Doxneg','2':'Doxpos','3':'107Aziz','4':'113Aziz'}
adata.obs['sample'] = adata.obs.index.str[-1].map(dic)
adata.obs['sample'] = adata.obs['sample'].astype('category').cat.reorder_categories(['Doxneg','Doxpos','107Aziz','113Aziz'])
Esempio n. 26
0
import numpy as np
import pandas as pd
import scipy
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
# from sklearn.metrics import silhouette_score

from peppy import Project
# from ngs_toolkit.analysis import Analysis

import scanpy as sc
# from dca.utils import plot_mean_dropout

from natsort import natsorted as sorted

sc.set_figure_params(format="svg", dpi_save=300, vector_friendly=True)
sns.set_style("white")
plt.rcParams['svg.fonttype'] = 'none'

# random seed
SEED = int("".join(LabelEncoder().fit(list(string.ascii_uppercase)).transform(
    list("BOCKLAB")).astype(str)))
random.seed(SEED)
np.random.seed(SEED)

# Set settings
pd.set_option("date_dayfirst", True)
sns.set(context="paper", style="white", palette="pastel", color_codes=True)
sns.set_palette(sns.color_palette("colorblind"))
matplotlib.rcParams["svg.fonttype"] = "none"
matplotlib.rc('text', usetex=False)
Esempio n. 27
0
def setFigureOpt(opt):
  sc.set_figure_params(dpi_save=int(opt['dpi']),fontsize= float(opt['fontsize']),vector_friendly=(opt['vectorFriendly'] == 'Yes'),transparent=(opt['transparent'] == 'Yes'),color_map=opt['colorMap'])
  rcParams.update({'savefig.format':opt['img']})
    )
except (ValueError, IndexError):
    # This tends to fail with a batch key... if it does re-do it without a batch key. 
    sc.pp.highly_variable_genes(
        adata_scran, flavor="cell_ranger", n_top_genes=5000, batch_key=None
    )

sc.pp.pca(adata, n_comps=50, use_highly_variable=True, svd_solver="arpack")
sc.pp.neighbors(adata)
sc.tl.umap(adata)

sc.tl.leiden(adata)

markers = {
    "CD8A",
    "CD4",
    "FOXP3",
    "phase",
    "leiden",
    "sample",
    "origin",
    "tissue",
    "condition",
} & (set(adata.var_names) | set(adata.obs.columns))

sc.set_figure_params(figsize=(5,5))

sc.pl.umap(adata, color=markers, ncols=3)

adata.write_h5ad(output_adata, compression="lzf")
Esempio n. 29
0
import numpy as np
import scanpy as sc
import torch
import anndata
import matplotlib.pyplot as plt
from typing import Union

from scarches.dataset.trvae._utils import label_encoder
from scarches.metrics.metrics import entropy_batch_mixing, knn_purity, asw, nmi
from scarches.models import trVAE, TRVAE
from scarches.trainers import trVAETrainer

sc.settings.set_figure_params(dpi=200, frameon=False)
sc.set_figure_params(dpi=200)
torch.set_printoptions(precision=3, sci_mode=False, edgeitems=7)
np.set_printoptions(precision=2, edgeitems=7)


class TRVAE_EVAL:
    def __init__(
            self,
            model: Union[trVAE, TRVAE],
            adata: anndata.AnnData,
            trainer: trVAETrainer = None,
            condition_key: str = None,
            cell_type_key: str = None
    ):
        if type(model) is TRVAE:
            trainer = model.trainer
            model = model.model
Esempio n. 30
0
from python_scripts.spatial_correlation import helper_functions

# Plotting packages
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# System specific
import os

# Calculation packages
import scanpy as sc
import numpy as np

# Figure params
sc.set_figure_params(color_map='viridis')
fig_size, title_fontsize, axis_label_fontsize, legend_fontsize, fileformat, img_key, xy_ticks, text_fontsize = \
    helper_functions.figure_params()


def plot_standalone_colorbar(tissuecomb_colors, labels, save_folder):
    """Plot a standalone vertical and horizontal Colorbar

    Parameters
    ----------
    tissuecomb_colors : matplotlib.colors.ListedColormap
    labels : list of str
    save_folder : str

    Returns
    -------