Example #1
0
def test_sandbag_min():
    print("")
    print("")

    print("## Testing correctness of sandbag()")

    print("")
    print("# Testing algorithm on minimal data unjitted")
    print("")

    settings.enable_jit = False

    params = {
        'data': min_ref_mat.T,
        'annotation': min_ref_cats,
        'gene_names': min_ref_gene_names,
        'sample_names': min_ref_sample_names
    }

    marker_pairs = pairs.sandbag(**params)

    assert utils.same_marker(marker_pairs, min_ref)

    utils.benchmark_test(pairs.sandbag, params)

    print("")
    print("# Testing algorithm on minimal data jitted, single core")
    print("")

    settings.verbosity = 4
    settings.n_jobs = 1
    settings.enable_jit = True

    marker_pairs = pairs.sandbag(**params)

    assert utils.same_marker(marker_pairs, min_ref)

    utils.benchmark_test(pairs.sandbag, params)

    print("")
    print("# Testing algorithm on minimal data jitted, multi core")
    print("")

    settings.verbosity = 4
    settings.n_jobs = 4
    settings.enable_jit = True

    marker_pairs = pairs.sandbag(**params)

    assert utils.same_marker(marker_pairs, min_ref)

    utils.benchmark_test(pairs.sandbag, params)
Example #2
0
def sandbag(
        adata,
        annotation,
        gene_names,
        sample_names,
        fraction=0.65,
        filter_genes=None,
        filter_samples=None):
    """Generate pairs of genes [Scialdone15]_ [Fechtner18]_.

    Calculates the pairs of genes serving as marker pairs for each phase,
    based on a matrix of gene counts and an annotation of known phases.

    This reproduces the approach of [Scialdone15]_ in the implementation of
    [Fechtner18]_.

    More information and bug reports `here
    <https://github.com/rfechtner/pypairs>`__.

    Parameters
    ----------
    adata : :class:`~anndata.AnnData`
        The annotated data matrix.
    categories : `dict`
        Dictionary of lists, i.e. {phase: [sample, ...]},
        containing annotation of samples to their phase
    gene_names: `list`
        List of genes.
    sample_names: `list`
        List of samples.
    fraction : `float`, optional (default: 0.5)
        Fraction to be used as threshold.
    filter_genes : `list` or `None`, optional (default: `None`)
        Genes for sampling the reference set. Default is all genes.
    filter_samples : `list` or `None`, optional (default: `None`)
        Cells for sampling the reference set. Default is all samples.

    Returns
    -------
    `dict` of `list` of `tuple`, i.e.
    {phase: [(Gene1, Gene2), ...]},
    containing marker pairs per phase
    """
    try:
        from pypairs import __version__ as pypairsversion
        from distutils.version import LooseVersion

        if LooseVersion(pypairsversion) < LooseVersion("v3.0.9"):
            raise ImportError('Please only use `pypairs` >= v3.0.9 ')
    except ImportError:
        raise ImportError('You need to install the package `pypairs`.')


    from pypairs.pairs import sandbag
    from . import settings
    from pypairs import settings as pp_settings

    pp_settings.verbosity = settings.verbosity
    pp_settings.n_jobs = settings.n_jobs
    pp_settings.writedir = settings.writedir
    pp_settings.cachedir = settings.cachedir
    pp_settings.logfile = settings.logfile

    return sandbag(
        data = adata,
        annotation = annotation,
        gene_names = gene_names,
        sample_names = sample_names,
        fraction = fraction,
        filter_genes = filter_genes,
        filter_samples = filter_samples
    )
Example #3
0
def sandbag(
    adata: Union[AnnData],
    annotation: Optional[Mapping[str, Genes]] = None,
    *,
    fraction: float = 0.65,
    filter_genes: Optional[Genes] = None,
    filter_samples: Optional[Genes] = None,
) -> Dict[str, List[Tuple[str, str]]]:
    """\
    Calculate marker pairs of genes. [Scialdone15]_ [Fechtner18]_.

    Calculates the pairs of genes serving as marker pairs for each phase,
    based on a matrix of gene counts and an annotation of known phases.

    This reproduces the approach of [Scialdone15]_ in the implementation of
    [Fechtner18]_.

    More information and bug reports `here
    <https://github.com/rfechtner/pypairs>`__.

    Parameters
    ----------
    adata
        The annotated data matrix.
    annotation
        Mapping from category to genes, e.g. `{'phase': [Gene1, ...]}`.
        Defaults to ``data.vars['category']``.
    fraction
        Fraction of cells per category where marker criteria must be satisfied.
    filter_genes
        Genes for sampling the reference set. Defaults to all genes.
    filter_samples
        Cells for sampling the reference set. Defaults to all samples.

    Returns
    -------
    A dict mapping from category to lists of marker pairs, e.g.:
    `{'Category_1': [(Gene_1, Gene_2), ...], ...}`.

    Examples
    --------
    >>> from scanpy.external.tl import sandbag
    >>> from pypairs import datasets
    >>> adata = datasets.leng15()
    >>> marker_pairs = sandbag(adata, fraction=0.5)
    """
    _check_import()
    from pypairs.pairs import sandbag
    from pypairs import settings as pp_settings

    pp_settings.verbosity = settings.verbosity
    pp_settings.n_jobs = settings.n_jobs
    pp_settings.writedir = settings.writedir
    pp_settings.cachedir = settings.cachedir
    pp_settings.logfile = settings.logfile

    return sandbag(
        data=adata,
        annotation=annotation,
        fraction=fraction,
        filter_genes=filter_genes,
        filter_samples=filter_samples,
    )
Example #4
0
def test_sandbag_inputs():
    print("")
    print("")

    print("## Testing different input types for sandbag()")

    settings.n_jobs = 4
    settings.verbosity = 4

    print("")
    print("# Testing AnnData obj, including annotation")
    print("")

    training_data = datasets.leng15(mode='sorted', gene_sub=list(range(0, 1000)))

    marker_pairs = pairs.sandbag(training_data)

    assert utils.same_marker(marker_pairs, ref_markers)

    print("")
    print("# Testing AnnData obj, with separate annotation")
    print("")

    annotation = {
        cat: [i for i, x in enumerate(training_data.obs['category']) if x == cat]
        for cat in ["G1", "S", "G2M"]
    }

    marker_pairs = pairs.sandbag(training_data, annotation=annotation)

    assert utils.same_marker(marker_pairs, ref_markers)

    print("")
    print("# Testing DataFrame obj, with separate annotation")
    print("")

    training_data_df = DataFrame(training_data.X)

    sample_names = list(training_data.obs_names)
    gene_names = list(training_data.var_names)

    training_data_df.Index = sample_names
    training_data_df.columns = gene_names

    marker_pairs = pairs.sandbag(training_data_df, annotation=annotation)

    assert utils.same_marker(marker_pairs, ref_markers)

    print("")
    print("# Testing DataFrame obj, with separate annotation and separate gene-/sample_names")
    print("")

    marker_pairs = pairs.sandbag(training_data_df, annotation, gene_names, sample_names)

    assert utils.same_marker(marker_pairs, ref_markers)

    print("")
    print("# Testing ndarray obj, with separate annotation and separate gene-/sample_names")
    print("")

    training_data_np = training_data_df.values

    marker_pairs = pairs.sandbag(training_data_np, annotation, gene_names, sample_names)

    assert utils.same_marker(marker_pairs, ref_markers)