def test_sandbag_min(): print("") print("") print("## Testing correctness of sandbag()") print("") print("# Testing algorithm on minimal data unjitted") print("") settings.enable_jit = False params = { 'data': min_ref_mat.T, 'annotation': min_ref_cats, 'gene_names': min_ref_gene_names, 'sample_names': min_ref_sample_names } marker_pairs = pairs.sandbag(**params) assert utils.same_marker(marker_pairs, min_ref) utils.benchmark_test(pairs.sandbag, params) print("") print("# Testing algorithm on minimal data jitted, single core") print("") settings.verbosity = 4 settings.n_jobs = 1 settings.enable_jit = True marker_pairs = pairs.sandbag(**params) assert utils.same_marker(marker_pairs, min_ref) utils.benchmark_test(pairs.sandbag, params) print("") print("# Testing algorithm on minimal data jitted, multi core") print("") settings.verbosity = 4 settings.n_jobs = 4 settings.enable_jit = True marker_pairs = pairs.sandbag(**params) assert utils.same_marker(marker_pairs, min_ref) utils.benchmark_test(pairs.sandbag, params)
def sandbag( adata, annotation, gene_names, sample_names, fraction=0.65, filter_genes=None, filter_samples=None): """Generate pairs of genes [Scialdone15]_ [Fechtner18]_. Calculates the pairs of genes serving as marker pairs for each phase, based on a matrix of gene counts and an annotation of known phases. This reproduces the approach of [Scialdone15]_ in the implementation of [Fechtner18]_. More information and bug reports `here <https://github.com/rfechtner/pypairs>`__. Parameters ---------- adata : :class:`~anndata.AnnData` The annotated data matrix. categories : `dict` Dictionary of lists, i.e. {phase: [sample, ...]}, containing annotation of samples to their phase gene_names: `list` List of genes. sample_names: `list` List of samples. fraction : `float`, optional (default: 0.5) Fraction to be used as threshold. filter_genes : `list` or `None`, optional (default: `None`) Genes for sampling the reference set. Default is all genes. filter_samples : `list` or `None`, optional (default: `None`) Cells for sampling the reference set. Default is all samples. Returns ------- `dict` of `list` of `tuple`, i.e. {phase: [(Gene1, Gene2), ...]}, containing marker pairs per phase """ try: from pypairs import __version__ as pypairsversion from distutils.version import LooseVersion if LooseVersion(pypairsversion) < LooseVersion("v3.0.9"): raise ImportError('Please only use `pypairs` >= v3.0.9 ') except ImportError: raise ImportError('You need to install the package `pypairs`.') from pypairs.pairs import sandbag from . import settings from pypairs import settings as pp_settings pp_settings.verbosity = settings.verbosity pp_settings.n_jobs = settings.n_jobs pp_settings.writedir = settings.writedir pp_settings.cachedir = settings.cachedir pp_settings.logfile = settings.logfile return sandbag( data = adata, annotation = annotation, gene_names = gene_names, sample_names = sample_names, fraction = fraction, filter_genes = filter_genes, filter_samples = filter_samples )
def sandbag( adata: Union[AnnData], annotation: Optional[Mapping[str, Genes]] = None, *, fraction: float = 0.65, filter_genes: Optional[Genes] = None, filter_samples: Optional[Genes] = None, ) -> Dict[str, List[Tuple[str, str]]]: """\ Calculate marker pairs of genes. [Scialdone15]_ [Fechtner18]_. Calculates the pairs of genes serving as marker pairs for each phase, based on a matrix of gene counts and an annotation of known phases. This reproduces the approach of [Scialdone15]_ in the implementation of [Fechtner18]_. More information and bug reports `here <https://github.com/rfechtner/pypairs>`__. Parameters ---------- adata The annotated data matrix. annotation Mapping from category to genes, e.g. `{'phase': [Gene1, ...]}`. Defaults to ``data.vars['category']``. fraction Fraction of cells per category where marker criteria must be satisfied. filter_genes Genes for sampling the reference set. Defaults to all genes. filter_samples Cells for sampling the reference set. Defaults to all samples. Returns ------- A dict mapping from category to lists of marker pairs, e.g.: `{'Category_1': [(Gene_1, Gene_2), ...], ...}`. Examples -------- >>> from scanpy.external.tl import sandbag >>> from pypairs import datasets >>> adata = datasets.leng15() >>> marker_pairs = sandbag(adata, fraction=0.5) """ _check_import() from pypairs.pairs import sandbag from pypairs import settings as pp_settings pp_settings.verbosity = settings.verbosity pp_settings.n_jobs = settings.n_jobs pp_settings.writedir = settings.writedir pp_settings.cachedir = settings.cachedir pp_settings.logfile = settings.logfile return sandbag( data=adata, annotation=annotation, fraction=fraction, filter_genes=filter_genes, filter_samples=filter_samples, )
def test_sandbag_inputs(): print("") print("") print("## Testing different input types for sandbag()") settings.n_jobs = 4 settings.verbosity = 4 print("") print("# Testing AnnData obj, including annotation") print("") training_data = datasets.leng15(mode='sorted', gene_sub=list(range(0, 1000))) marker_pairs = pairs.sandbag(training_data) assert utils.same_marker(marker_pairs, ref_markers) print("") print("# Testing AnnData obj, with separate annotation") print("") annotation = { cat: [i for i, x in enumerate(training_data.obs['category']) if x == cat] for cat in ["G1", "S", "G2M"] } marker_pairs = pairs.sandbag(training_data, annotation=annotation) assert utils.same_marker(marker_pairs, ref_markers) print("") print("# Testing DataFrame obj, with separate annotation") print("") training_data_df = DataFrame(training_data.X) sample_names = list(training_data.obs_names) gene_names = list(training_data.var_names) training_data_df.Index = sample_names training_data_df.columns = gene_names marker_pairs = pairs.sandbag(training_data_df, annotation=annotation) assert utils.same_marker(marker_pairs, ref_markers) print("") print("# Testing DataFrame obj, with separate annotation and separate gene-/sample_names") print("") marker_pairs = pairs.sandbag(training_data_df, annotation, gene_names, sample_names) assert utils.same_marker(marker_pairs, ref_markers) print("") print("# Testing ndarray obj, with separate annotation and separate gene-/sample_names") print("") training_data_np = training_data_df.values marker_pairs = pairs.sandbag(training_data_np, annotation, gene_names, sample_names) assert utils.same_marker(marker_pairs, ref_markers)