def list_ranks_db(rank_file=None, debug=False): '''list all taxonomic ranks present in the NCBI taxonomy database Parameters ---------- rank_file : str, default None Specify the location of the rank definition and order file; by default, taxonkit uses `~/taxonkit/ranks.txt` debug : bool, default False Print debugging output, e.g., system calls to `taxonkit` Returns ------- list A list of taxonomic ranks. >>> import pytaxonkit >>> ranks = pytaxonkit.list_ranks_db() >>> ranks[:5] ['superkingdom', 'kingdom', 'subkingdom', 'superphylum', 'phylum'] ''' arglist = ['taxonkit', 'filter', '--list-ranks'] if rank_file: # pragma: no cover arglist.extend(['--rank-file', rank_file]) if debug: log(*arglist) proc = Popen(arglist, stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True) out, err = proc.communicate(input='') data = pandas.read_csv(StringIO(out), header=None, names=['Rank'], index_col=False) return pylist(data.Rank)
def test_list_take(xs: PyList[int], x: int): ys: FrozenList[int] try: ys = frozenlist.of_seq(xs).take(x) assert pylist(ys) == xs[:x] except ValueError: assert x > len(xs)
def test_list_slice(xs: PyList[int], x: int, y: int): expected = xs[x:y] ys: FrozenList[int] ys = frozenlist.of_seq(xs)[x:y] assert pylist(ys) == expected
def filter(ids, threads=None, equal_to=None, higher_than=None, lower_than=None, discard_norank=False, save_predictable=False, discard_root=False, root_taxid=None, blacklist=None, rank_file=None, debug=False): '''filter taxids by taxonomic rank (or a range of ranks) Executes the `taxonkit filter` command to include or exclude taxa at the specified ranks. Parameters ---------- ids : list or iterable A list of taxids (ints or strings are ok) threads : int Override the default taxonkit threads setting equal_to : str or list, default None Keep only taxa at the specified rank(s); can be a string or a list of strings higher_than : str, default None Keep only taxa ranked higher than the specified rank lower_than : str, default None Keep only taxa ranked lower than the specified rank discard_norank : bool, default False Discard generic ranks without an explicit ranking order ("no rank" and "clade") save_predictable : bool, default False When `discard_norank=True`, do not discard some special ranks without order where the rank of the closest higher node is still lower than rank cutoff discard_root : bool, default False Discard root taxon root_taxid : int or str override taxid of the root taxon blacklist : list of strs A list of ranks to exclude rank_file : str, default None Specify the location of the rank definition and order file; by default, taxonkit uses `~/taxonkit/ranks.txt` debug : bool, default False Print debugging output, e.g., system calls to `taxonkit` Returns ------- list A list of taxids passing the specified filters. >>> import pytaxonkit >>> taxids = [131567, 2, 1783257, 74201, 203494, 48461, 1647988, 239934, 239935, 349741] >>> pytaxonkit.filter(taxids, blacklist=['family', 'species']) [131567, 2, 1783257, 74201, 203494, 48461, 239934, 349741] >>> pytaxonkit.filter(taxids, lower_than='genus') [131567, 1783257, 239935, 349741] ''' if higher_than is not None and lower_than is not None: raise ValueError('cannot specify "higher_than" and "lower_than" simultaneously') idlist = '\n'.join(map(str, ids)) arglist = ['taxonkit', 'filter'] if threads: arglist.extend(('--threads', validate_threads(threads))) if equal_to: if isinstance(equal_to, (pylist, tuple)): equal_to = ','.join(equal_to) arglist.extend(['--equal-to', equal_to]) if higher_than: arglist.extend(['--higher-than', higher_than]) if lower_than: arglist.extend(['--lower-than', lower_than]) if discard_norank: arglist.append('--discard-noranks') if save_predictable: arglist.append('--save-predictable-norank') if discard_root: # pragma: no cover arglist.append('--discard-root') if blacklist: arglist.extend(['--black-list', ','.join(blacklist)]) if root_taxid: # pragma: no cover arglist.extend(['--root-taxid', str(root_taxid)]) if rank_file: # pragma: no cover arglist.extend(['--rank-file', rank_file]) if debug: log(*arglist) proc = Popen(arglist, stdin=PIPE, stdout=PIPE, stderr=PIPE, universal_newlines=True) out, err = proc.communicate(input=idlist) data = pandas.read_csv(StringIO(out), header=None, names=['TaxID'], index_col=False) return pylist(data.TaxID)
def test_list_skip_last(xs: PyList[int], x: int): expected = xs[:-x] ys: FrozenList[int] ys = frozenlist.of_seq(xs).skip_last(x) assert pylist(ys) == expected
def test_list_take_last(xs: PyList[int], x: int): expected = xs[-x:] ys: FrozenList[int] ys = frozenlist.of_seq(xs).take_last(x) assert pylist(ys) == expected