def sandbag(adata, phases, fraction=0.5, subset_genes=None, subset_samples=None, processes=1): """Generate pairs of genes [Scialdone15]_. Calculates the pairs of genes serving as marker pairs for each phase, based on a matrix of gene counts and an annotation of known phases. This reproduces the approach in Pairs [Scialdone15]_ and has been implemented for Scanpy by Ron Fechtner. Parameters ---------- adata : :class:`~scanpy.api.AnnData` The annotated data matrix. phases : `dict` Dictionary of lists, i.e. {phase: [sample, ...]}, containing annotation of samples to their phase fraction : `float`, optional (default: 0.5) Fraction to be used as threshold. subset_genes : `list` or `None`, optional (default: `None`) Genes for sampling the reference set. Default is all genes. subset_samples : `list` or `None`, optional (default: `None`) Cells for sampling the reference set. Default is all samples. processes : `int`, optional (default: 1) Number of concurrent processes to be used. 0 = use all available cores. Returns ------- `dict` of `list` of `tuple`, i.e. {phase: [(Gene1, Gene2), ...]}, containing marker pairs per phase Examples -------- See this `notebook <https://github.com/theislab/scanpy_usage/tree/master/180209_cell_cycle>`_. """ try: import pypairs except ImportError: raise ImportError('You need to install the package `pypairs`.') x = pd.DataFrame(adata.X) return pypairs.sandbag(x=x, phases=phases, subset_genes=subset_genes, subset_samples=subset_samples, processes=processes)
def sandbag(adata, phases, fraction=0.5, subset_genes=None, subset_samples=None, n_jobs=1): """Generate pairs of genes [Scialdone15]_ [Fechtner18]_. Calculates the pairs of genes serving as marker pairs for each phase, based on a matrix of gene counts and an annotation of known phases. This reproduces the approach of [Scialdone15]_ in the implementation of [Fechtner18]_. More information and bug reports `here <https://github.com/rfechtner/pypairs>`__. Parameters ---------- adata : :class:`~anndata.AnnData` The annotated data matrix. phases : `dict` Dictionary of lists, i.e. {phase: [sample, ...]}, containing annotation of samples to their phase fraction : `float`, optional (default: 0.5) Fraction to be used as threshold. subset_genes : `list` or `None`, optional (default: `None`) Genes for sampling the reference set. Default is all genes. subset_samples : `list` or `None`, optional (default: `None`) Cells for sampling the reference set. Default is all samples. n_jobs : `int`, optional (default: 1) Number of concurrent n_jobs to be used. 0 = use all available cores. Returns ------- `dict` of `list` of `tuple`, i.e. {phase: [(Gene1, Gene2), ...]}, containing marker pairs per phase """ try: import pypairs except ImportError: raise ImportError('You need to install the package `pypairs`.') x = pd.DataFrame(adata.X) return pypairs.sandbag(x=x, phases=phases, subset_genes=subset_genes, subset_samples=subset_samples, processes=n_jobs)
def load_ocope_marker(relPath, fraction=0.6, cc_only=True, weighted=False, triplets=False): # Load matrix gencounts_oscope = pandas.read_csv( Path(relPath + "GSE64016_H1andFUCCI_normalized_EC_human.csv")) # Set index right gencounts_oscope.set_index("Unnamed: 0", inplace=True) # Subset sorted gencounts_oscope_sorted = gencounts_oscope.iloc[:, [ gencounts_oscope.columns.get_loc(c) for c in gencounts_oscope.columns if "G1_" in c or "G2_" in c or "S_" in c ]] # Define annotation is_G1 = [ gencounts_oscope_sorted.columns.get_loc(c) for c in gencounts_oscope_sorted.columns if "G1_" in c ] is_S = [ gencounts_oscope_sorted.columns.get_loc(c) for c in gencounts_oscope_sorted.columns if "S_" in c ] is_G2M = [ gencounts_oscope_sorted.columns.get_loc(c) for c in gencounts_oscope_sorted.columns if "G2_" in c ] annotation = {"G1": list(is_G1), "S": list(is_S), "G2M": list(is_G2M)} go_0007049 = [ line.replace("\n", "").replace("\r", "") for line in open(relPath + "go_0007049_homoSapiens.csv", "r") ] cycle_base = [ line.split("\t")[0] for i, line in enumerate( open(relPath + "cyclebase_top1000_genes.tsv", "r")) if 0 < i ] cycle_genes = numpy.unique(numpy.concatenate((go_0007049, cycle_base), 0)) if cc_only: return pairs.sandbag(gencounts_oscope_sorted, phases=annotation, subset_genes=list(cycle_genes), fraction=fraction, processes=10, verbose=True, weighted=weighted, triplets=triplets) else: return pairs.sandbag(gencounts_oscope_sorted, phases=annotation, fraction=fraction, processes=10, verbose=True, weighted=weighted, triplets=triplets)