Example #1
0
def recipe_li(adata, n_top_genes=1000):
    import desc
    sc.pp.filter_cells(adata, min_genes=200)
    sc.pp.filter_genes(adata, min_cells=3)
    mito_genes = adata.var_names.str.startswith('MT-')
    adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X,
                                       axis=1).A1 / np.sum(adata.X, axis=1).A1
    adata.obs['n_counts'] = adata.X.sum(axis=1).A1
    # sc.pl.violin(adata, ['n_genes', 'n_counts', 'percent_mito'],jitter=0.4, multi_panel=True,show=False)
    adata = adata[adata.obs['n_genes'] < 2500, :]
    adata = adata[adata.obs['percent_mito'] < 0.05, :]
    desc.normalize_per_cell(adata, counts_per_cell_after=1e4)
    desc.log1p(adata)
    adata.raw = adata
    sc.pp.highly_variable_genes(adata, n_top_genes=n_top_genes, subset=True)
    adata = adata[:, adata.var['highly_variable']]
    desc.scale(adata, zero_center=True, max_value=3)
    return adata
Example #2
0
adata.var_names_make_unique()
###filter
sc.pp.filter_genes(adata, min_cells=3)
sc.pp.filter_cells(adata, min_genes=200)
##remove cells with a high proportion of mitochondria genes expression.
mito_genes = adata.var_names.str.startswith('MT-')
adata.obs['percent_mito'] = np.sum(adata[:, mito_genes].X, axis=1).A1 / np.sum(
    adata.X, axis=1).A1
# add the total counts per cell as observations-annotation to adata
adata.obs['n_counts'] = adata.X.sum(axis=1).A1

#adata = adata[adata.obs['n_genes'] < 2500, :]
adata = adata[adata.obs['n_counts'] < 1500, :]
adata = adata[adata.obs['percent_mito'] < 0.5, :]
##normalization
desc.normalize_per_cell(adata, counts_per_cell_after=1e4)
sc.pp.log1p(adata)
adata.raw = adata
##Selection of highly variable genes

sc.pp.highly_variable_genes(adata,
                            min_mean=0.0125,
                            max_mean=3,
                            min_disp=0.5,
                            subset=True)
adata = adata[:, adata.var['highly_variable']]

desc.scale(
    adata, zero_center=True, max_value=3
)  # if the the dataset has two or more batches you can use `adata=desc.scale(adata,groupby="BatchID")`
save_dir = "h5_result"