Example #1
0
def kBET_single(matrix, batch, type_ = None, k0 = 10, knn=None, subsample=0.5, heuristic=True, verbose=False):
    """
    params:
        matrix: expression matrix (at the moment: a PCA matrix, so do.pca is set to FALSE
        batch: series or list of batch assignemnts
        subsample: fraction to be subsampled. No subsampling if `subsample=None`
    returns:
        kBET p-value
    """
        
    anndata2ri.activate()
    ro.r("library(kBET)")
    
    if verbose:
        print("importing expression matrix")
    ro.globalenv['data_mtrx'] = matrix
    ro.globalenv['batch'] = batch
    #print(matrix.shape)
    #print(len(batch))
    
    if verbose:
        print("kBET estimation")
    #k0 = len(batch) if len(batch) < 50 else 'NULL'
    
    ro.globalenv['knn_graph'] = knn
    ro.globalenv['k0'] = k0
    batch_estimate = ro.r(f"batch.estimate <- kBET(data_mtrx, batch, knn=knn_graph, k0=k0, plot=FALSE, do.pca=FALSE, heuristic=FALSE, adapt=FALSE, verbose={str(verbose).upper()})")
            
    anndata2ri.deactivate()
    try:
        ro.r("batch.estimate$average.pval")[0]
    except rpy2.rinterface_lib.embedded.RRuntimeError:
        return np.nan
    else:
        return ro.r("batch.estimate$average.pval")[0]
Example #2
0
def identify_empty_droplets(data, min_cells=3, **kw):
    """Detect empty droplets using DropletUtils

    """
    import rpy2.robjects as robj
    from rpy2.robjects import default_converter
    from rpy2.robjects.packages import importr
    import anndata2ri
    from rpy2.robjects.conversion import localconverter
    importr("DropletUtils")
    adata = data.copy()
    col_sum = adata.X.sum(0)
    if hasattr(col_sum, 'A'):
        col_sum = col_sum.A.squeeze()
        
    keep = col_sum > min_cells
    adata = adata[:,keep]
    #adata.X = adata.X.tocsc()
    anndata2ri.activate()
    robj.globalenv["X"] = adata
    res = robj.r('res <- emptyDrops(assay(X))')
    anndata2ri.deactivate()
    keep = res.loc[res.FDR<0.01,:]
    data = data[keep.index,:] 
    data.obs['empty_FDR'] = keep['FDR']
    
    return data
Example #3
0
def saveSeurat(adata, path, batch, hvgs=None):
    import re
    ro.r('library(Seurat)')
    ro.r('library(scater)')
    anndata2ri.activate()

    if sparse.issparse(adata.X):
        if not adata.X.has_sorted_indices:
            adata.X.sort_indices()

    for key in adata.layers:
        if sparse.issparse(adata.layers[key]):
            if not adata.layers[key].has_sorted_indices:
                adata.layers[key].sort_indices()

    ro.globalenv['adata'] = adata

    ro.r('sobj = as.Seurat(adata, counts="counts", data = "X")')

    # Fix error if levels are 0 and 1
    # ro.r(f'sobj$batch <- as.character(sobj${batch})')
    ro.r(f'Idents(sobj) = "{batch}"')
    ro.r(f'saveRDS(sobj, file="{path}")')
    if hvgs is not None:
        hvg_out = re.sub('\.RDS$', '', path) + '_hvg.RDS'
        #hvg_out = path+'_hvg.rds'
        ro.globalenv['hvgs'] = hvgs
        ro.r('unlist(hvgs)')
        ro.r(f'saveRDS(hvgs, file="{hvg_out}")')

    anndata2ri.deactivate()
Example #4
0
def test_py2rpy_activate(check, shape, dataset):
    try:
        anndata2ri.activate()
        globalenv["adata"] = dataset()
    finally:
        anndata2ri.deactivate()
    ex = globalenv["adata"]
    assert tuple(baseenv["dim"](ex)[::-1]) == shape
    check(ex)
Example #5
0
def test_convert_activate(check, shape, dataset):
    try:
        anndata2ri.activate()
        ad = dataset()
    finally:
        anndata2ri.deactivate()
    assert isinstance(ad, AnnData)
    assert ad.shape == shape
    check(ad)
def save_adata(adata: AnnData, transpose: bool = False):
    anndata2ri.activate()

    if transpose:
        r.saveRDS(adata.X.T, file="adata_t.rds")
    else:
        r.saveRDS(adata.X, file="adata.rds")
    r.saveRDS(adata.obs_names.values, file="obs_names.rds")
    r.saveRDS(adata.var_names.values, file="var_names.rds")

    anndata2ri.deactivate()
Example #7
0
def test_py2rpy2_numpy_pbmc68k():
    """This has some weird metadata"""
    from scanpy.datasets import pbmc68k_reduced

    try:
        anndata2ri.activate()
        with catch_warnings(record=True) as logs:  # type: List[WarningMessage]
            simplefilter("ignore", DeprecationWarning)
            globalenv["adata"] = pbmc68k_reduced()
        assert len(logs) == 0, [m.message for m in logs]
    finally:
        anndata2ri.deactivate()
Example #8
0
def log_scran_pooling(adata):
    """Normalize data with scran via rpy2."""
    import anndata2ri
    import scIB.preprocessing

    scprep.run.install_bioconductor("scran")
    # Normalize via scran-pooling with own clustering at res=0.5
    scIB.preprocessing.normalize(adata)
    anndata2ri.deactivate()

    # Make lightweight
    del adata.raw
def save_stemnet_cluster_pop(size: int, col: int):
    anndata2ri.activate()

    with open(DATA_DIR / "benchmarking" / "runtime_analysis" / "gpcca.pickle", "rb") as fin:
        data = pickle.load(fin)[size][str(col)]

    # old name: main_states
    cluster_annot = data["terminal_states"]
    clusters = cluster_annot.cat.categories

    df = pd.DataFrame(dict(zip(clusters, [cluster_annot.isin([c]) for c in clusters])))
    r.saveRDS(df, file="cluster_pop.rds")

    anndata2ri.deactivate()
Example #10
0
def test_py2rpy2_numpy_pbmc68k():
    """This has some weird metadata"""
    from scanpy.datasets import pbmc68k_reduced

    try:
        anndata2ri.activate()
        with catch_warnings(record=True) as logs:  # type: List[WarningMessage]
            simplefilter("ignore", DeprecationWarning)
            globalenv["adata"] = pbmc68k_reduced()
        assert len(logs) == 1, [m.message for m in logs]
        assert logs[0].category is NotConvertedWarning
        assert "scipy.sparse.csr.csr_matrix" in str(logs[0].message)
    finally:
        anndata2ri.deactivate()
Example #11
0
def readSeurat(path):
    anndata2ri.activate()
    ro.r('library(Seurat)')
    ro.r('library(scater)')
    ro.r(f'sobj <- readRDS("{path}")')
    adata = ro.r('as.SingleCellExperiment(sobj)')
    anndata2ri.deactivate()

    #Test for 'X_EMB'
    if 'X_EMB' in adata.obsm:
        if 'X_emb' in adata.obsm:
            print(
                'overwriting existing `adata.obsm["X_emb"] in the adata object'
            )
        adata.obsm['X_emb'] = adata.obsm['X_EMB']
        del adata.obsm['X_EMB']

    return (adata)
Example #12
0
def normalize(adata, min_mean=0.1, log=True, precluster=True, sparsify=True):

    checkAdata(adata)

    # Check for 0 count cells
    if np.any(adata.X.sum(axis=1) == 0):
        raise ValueError('found 0 count cells in the AnnData object.'
                         ' Please filter these from your dataset.')

    # Check for 0 count genes
    if np.any(adata.X.sum(axis=0) == 0):
        raise ValueError('found 0 count genes in the AnnData object.'
                         ' Please filter these from your dataset.')

    if sparsify:
        # massive speedup when working with sparse matrix
        if not sparse.issparse(
                adata.X):  # quick fix: HVG doesn't work on dense matrix
            adata.X = sparse.csr_matrix(adata.X)

    anndata2ri.activate()
    ro.r('library("scran")')

    # keep raw counts
    adata.layers["counts"] = adata.X.copy()

    is_sparse = False
    X = adata.X.T
    # convert to CSC if possible. See https://github.com/MarioniLab/scran/issues/70
    if sparse.issparse(X):
        is_sparse = True

        if X.nnz > 2**31 - 1:
            X = X.tocoo()
        else:
            X = X.tocsc()

    ro.globalenv['data_mat'] = X

    if precluster:
        # Preliminary clustering for differentiated normalisation
        adata_pp = adata.copy()
        sc.pp.normalize_per_cell(adata_pp, counts_per_cell_after=1e6)
        sc.pp.log1p(adata_pp)
        sc.pp.pca(adata_pp, n_comps=15, svd_solver='arpack')
        sc.pp.neighbors(adata_pp)
        sc.tl.louvain(adata_pp, key_added='groups', resolution=0.5)

        ro.globalenv['input_groups'] = adata_pp.obs['groups']
        size_factors = ro.r(
            'sizeFactors(computeSumFactors(SingleCellExperiment('
            'list(counts=data_mat)), clusters = input_groups,'
            f' min.mean = {min_mean}))')

        del adata_pp

    else:
        size_factors = ro.r(
            'sizeFactors(computeSumFactors(SingleCellExperiment('
            f'list(counts=data_mat)), min.mean = {min_mean}))')

    # modify adata
    adata.obs['size_factors'] = size_factors
    adata.X /= adata.obs['size_factors'].values[:, None]
    if log:
        print("Note! Performing log1p-transformation after normalization.")
        sc.pp.log1p(adata)
    else:
        print("No log-transformation performed after normalization.")

    if is_sparse:
        # convert to sparse, bc operation always converts to dense
        adata.X = sparse.csr_matrix(adata.X)

    adata.raw = adata  # Store the full data set in 'raw' as log-normalised data for statistical testing

    # Free memory in R
    ro.r('rm(list=ls())')
    ro.r(
        'lapply(names(sessionInfo()$loadedOnly), require, character.only = TRUE)'
    )
    ro.r(
        'invisible(lapply(paste0("package:", names(sessionInfo()$otherPkgs)), '
        'detach, character.only=TRUE, unload=TRUE))')
    ro.r('gc()')

    anndata2ri.deactivate()