def testCachedValidation(self): try: # noinspection PyPackageRequirements import dogpile.cache except: pass else: r = pathmap.get_test_repos() p = Phylesystem(r) nexson, sha = p.return_study('xy_10') r = p.add_validation_annotation(nexson, sha) cache_hits = p._cache_hits r1 = p.add_validation_annotation(nexson, sha) self.assertEqual(1 + cache_hits, p._cache_hits) self.assertEqual(r, r1) write_as_json(nexson, sys.stdout)
def phylesystem_obj(self): if self._phylesystem_obj is None: if self._src_code == _GET_LOCAL: self._phylesystem_obj = Phylesystem() else: self._phylesystem_obj = PhylesystemProxy(self.phylesystem_config) return self._phylesystem_obj
def testCachedValidation(self): try: import dogpile.cache except: pass else: r = pathmap.get_test_repos() p = Phylesystem(r) nexson, sha = p.return_study('xy_10') r = p.add_validation_annotation(nexson, sha) cache_hits = p._cache_hits r1 = p.add_validation_annotation(nexson, sha) self.assertEqual(1 + cache_hits, p._cache_hits) self.assertEqual(r, r1) import sys; from peyotl import write_as_json; write_as_json(nexson, sys.stdout)
def testCachedValidation(self): try: import dogpile.cache except: pass else: r = pathmap.get_test_repos() p = Phylesystem(r) nexson, sha = p.return_study('xy_10') r = p.add_validation_annotation(nexson, sha) cache_hits = p._cache_hits r1 = p.add_validation_annotation(nexson, sha) self.assertEqual(1 + cache_hits, p._cache_hits) self.assertEqual(r, r1) import sys from peyotl import write_as_json write_as_json(nexson, sys.stdout)
def phylesystem_obj(self): if self._phylesystem_obj is None: if self._src_code == _GET_LOCAL: self._phylesystem_obj = Phylesystem( repos_dict=self._locals_repo_dict) else: self._phylesystem_obj = PhylesystemProxy( self.phylesystem_config) return self._phylesystem_obj
#!/usr/bin/env python """Lists the absolute filepath for every study in the phylesystem directories that the peyotl library can find (see README for discussion of configuration). """ from peyotl.phylesystem.phylesystem_umbrella import Phylesystem phy = Phylesystem() for study_id, filepath in phy.iter_study_filepaths(): print(filepath)
#!/usr/bin/env python """Examines the tags (ot:tag) study. Prints out a list of each unique tag used in the studies """ from peyotl.manip import iter_trees from peyotl.phylesystem.phylesystem_umbrella import Phylesystem from peyotl.nexson_syntax import get_nexml_el from collections import defaultdict import codecs import sys phy = Phylesystem() study_dict = defaultdict(int) tree_dict = defaultdict(int) out = codecs.getwriter("utf-8")(sys.stdout) for study_id, n in phy.iter_study_objs(): nexml = get_nexml_el(n) t = nexml.get("^ot:tag") if t: # print study_id, t if isinstance(t, list): for tag in t: study_dict[tag] += 1 else: study_dict[t] += 1 for trees_group_id, tree_id, tree in iter_trees(n): t = tree.get("^ot:tag") if t: # print study_id, tree_id, t if isinstance(t, list): for tag in t: study_dict[tag] += 1
mattype="fasta" print study_id ''' study_id = sys.argv[1] tree_id = sys.argv[2] seqaln = sys.argv[3] mattype = sys.argv[4] runname = sys.argv[5] #Fixed values E_VALUE_THRESH = 0.04 ott_ncbi = "../ott_ncbi" #TODO config file Entrez.email = "*****@*****.**" phy = Phylesystem() n = phy.return_study(study_id)[0] api_wrapper.study.get(study_id, tree=tree_id) ##This is a weird way to get the ingroup node, but I need the OTT ids anyhow. m = extract_tree(n, tree_id, PhyloSchema('newick', output_nexml2json='1.2.1', content="tree", tip_label="ot:ottId"), subtree_id="ingroup") otu_dict = gen_otu_dict(n) ottids = [] for oid, o in otu_dict.items(): try:
from peyotl.phylesystem.phylesystem_umbrella import Phylesystem from peyotl.nexson_syntax import get_nexml_el from peyotl.manip import iter_otus from collections import defaultdict import argparse import codecs import sys import os description = __doc__ prog = os.path.split(sys.argv[0])[-1] parser = argparse.ArgumentParser(prog=prog, description=description) parser.add_argument('output') args = parser.parse_args(sys.argv[1:]) if os.path.exists(args.output): sys.exit('{} already exists! Exiting...\n'.format(args.output)) phy = Phylesystem() with codecs.open(args.output, 'w', encoding='utf-8') as out: num_unmapped = 0 for study_id, n in phy.iter_study_objs(): for og, otu_id, otu in iter_otus(n): if '^ot:ottTaxonName' in otu: out.write(u'{s}\t{o}\t{r}\t{m}\n'.format( s=study_id, o=otu_id, r=otu['^ot:originalLabel'], m=otu['^ot:ottTaxonName'])) else: num_unmapped += 1 sys.stderr.write('{n:d} unmapped otus\n'.format(n=num_unmapped))
from peyotl.phylesystem.git_workflows import acquire_lock_raise, \ commit_and_try_merge2master, \ delete_study, \ GitWorkflowError, \ merge_from_master from peyotl.phylesystem.phylesystem_umbrella import Phylesystem from peyotl.utility.input_output import read_as_json import unittest import codecs import json import copy from peyotl.test.support import pathmap from peyotl.utility import get_logger _LOG = get_logger(__name__) phylesystem = Phylesystem(pathmap.get_test_repos()) _MINI_PHYL_SHA1 = '2d59ab892ddb3d09d4b18c91470b8c1c4cca86dc' _SID = 'xy_10' _AUTH = { 'name': 'test_name', 'email': '*****@*****.**', 'login': '******', } class TestPhylesystemDel(unittest.TestCase): def testDelStudy(self): ga = phylesystem.create_git_action(_SID) ga.acquire_lock() try:
#!/usr/bin/env python from peyotl.phylesystem.phylesystem_umbrella import Phylesystem from peyotl.nexson_syntax import extract_tree_nexson import sys try: phylsys = Phylesystem() except Exception as e: sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message)) sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem ' \ 'instance. Double check your configuration (see ' \ 'http://opentreeoflife.github.io/peyotl/configuration/ for info).') try: print_freq = 500 num_trees = 0 num_studies = 0 max_trees_per_study = 0 biggest_study = None studies_without_trees = [] sys.stderr.write('count_trees.py: beginning loop over studies...\n') for study_id, nexson in phylsys.iter_study_objs(): num_studies += 1 try: nt = len(extract_tree_nexson(nexson, tree_id=None)) except: sys.stderr.write( 'Problem extracting trees from study {}'.format(study_id)) raise if nt == 0: studies_without_trees.append(study_id) else: num_trees += nt
#!/usr/bin/env python from __future__ import print_function from peyotl.phylesystem.phylesystem_umbrella import Phylesystem from peyotl.nexson_syntax import extract_tree_nexson import sys try: phylsys = Phylesystem() except Exception as e: sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message)) sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem ' 'instance. Double check your configuration (see ' 'http://opentreeoflife.github.io/peyotl/configuration/ for info).') try: print_freq = 500 num_trees = 0 num_studies = 0 max_trees_per_study = 0 biggest_study = None studies_without_trees = [] sys.stderr.write('count_trees.py: beginning loop over studies...\n') for study_id, nexson in phylsys.iter_study_objs(): num_studies += 1 try: nt = len(extract_tree_nexson(nexson, tree_id=None)) except: sys.stderr.write('Problem extracting trees from study {}'.format(study_id)) raise if nt == 0: studies_without_trees.append(study_id) else: num_trees += nt
def write_tree_list(outpath): conflict_analyses = read_conflict_analyses() trees_in_synthesis = read_synthesis_list() taxa_in_synthesis = read_synthesis_taxa() phylesystem = Phylesystem() study_count = 0 tree_count = 0 preferred_count = 0 table = [] for study_id, nexson in phylesystem.iter_study_objs(): study_count += 1 nexml_el = nexson[u'nexml'] n_intended = 1 not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis') if not_intended == True: n_intended = 0 else: n_intended = 2 candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis') if candidates == None: candidates = [] tid_tree_otug = extract_tree_nexson(nexson, tree_id=None) for (tree_id, tree, otu_group) in tid_tree_otug: tree_count += 1 row = Row() # otu_group = otu_groups[ogi]['otuById'] long_id = '%s@%s' % (study_id, tree_id) row.id = long_id row.n_intended = n_intended # per study if len(candidates) == 0: # No selection(s) made if len(tid_tree_otug) == 1: n_preferred = 2 # Only one tree; use it else: n_preferred = 1 # More than one tree; decision required else: if tree_id in candidates: preferred_count += 1 n_preferred = 2 # This is a preferred tree; use it else: n_preferred = 0 # Not preferred, another is; do not use row.n_preferred = n_preferred ctype = tree.get('^ot:curatedType') n_ctype = 0 if ctype != None and ctype != '': n_ctype = 1 row.n_ctype = n_ctype # whether a curator has confirmed the root root = tree.get('^ot:specifiedRoot') root_confirmed = 0 if root != None and root != '': root_confirmed = 1 row.root_confirmed = root_confirmed row.n_synth = 1 if long_id in trees_in_synthesis else 0 ingroup_node_id = tree.get('^ot:inGroupClade') row.n_ingroup = (1 if (ingroup_node_id != None) else 0) (row.tip_count, row.ott_count, row.new_count) = \ examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis) row.conflict_count = 0 row.resolve_count = 0 analysis = conflict_analyses.get(long_id) if analysis != None: row.conflict_count = int(analysis[1]) row.resolve_count = int(analysis[2]) row.score = ((row.new_count + row.resolve_count) - (row.conflict_count * 20) + (row.n_ingroup * 10) + (row.n_preferred * 50) + (row.n_intended * 100)) table.append(row) if tree_count % 500 == 0: print tree_count, long_id, ctype table.sort(key=lambda row:(-row.score, row.n_intended == 0, # whether intended for synthesis -row.n_preferred, # whether preferred -row.n_ingroup, # whether ingroup is designated row.conflict_count, # number of synth tree conflicts -row.new_count, # number of OTUs mapped to OTT -row.n_ctype, # whether there's a 'curated type' -row.tip_count, # total number of tips (for comparison) )) with codecs.open(outpath, 'w', encoding='utf-8') as outfile: writer = csv.writer(outfile) writer.writerow(['tree', 'intended', 'preferred', 'has ingroup', 'has method', 'root confirmed', 'in synth', '#tips', '#mapped', '#new', '#resolved', '#conflicts', 'score']) for row in table: writer.writerow([row.id, row.n_intended, row.n_preferred, row.n_ingroup, row.n_ctype, row.root_confirmed, row.n_synth, row.tip_count, row.ott_count, row.new_count, row.resolve_count, row.conflict_count, row.score]) print 'studies:', study_count print 'trees:', tree_count print 'preferred:', preferred_count
def write_tree_list(outpath): conflict_analyses = read_conflict_analyses() trees_in_synthesis = read_synthesis_list() taxa_in_synthesis = read_synthesis_taxa() phylesystem = Phylesystem() study_count = 0 tree_count = 0 preferred_count = 0 table = [] for study_id, nexson in phylesystem.iter_study_objs(): study_count += 1 nexml_el = nexson[u'nexml'] n_intended = 1 not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis') if not_intended == True: n_intended = 0 else: n_intended = 2 candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis') if candidates == None: candidates = [] tid_tree_otug = extract_tree_nexson(nexson, tree_id=None) for (tree_id, tree, otu_group) in tid_tree_otug: tree_count += 1 row = Row() # otu_group = otu_groups[ogi]['otuById'] long_id = '%s@%s' % (study_id, tree_id) row.id = long_id row.n_intended = n_intended # per study if len(candidates) == 0: # No selection(s) made if len(tid_tree_otug) == 1: n_preferred = 2 # Only one tree; use it else: n_preferred = 1 # More than one tree; decision required else: if tree_id in candidates: preferred_count += 1 n_preferred = 2 # This is a preferred tree; use it else: n_preferred = 0 # Not preferred, another is; do not use row.n_preferred = n_preferred ctype = tree.get('^ot:curatedType') n_ctype = 0 if ctype != None and ctype != '': n_ctype = 1 row.n_ctype = n_ctype # whether a curator has confirmed the root root = tree.get('^ot:specifiedRoot') root_confirmed = 0 if root != None and root != '': root_confirmed = 1 row.root_confirmed = root_confirmed row.n_synth = 1 if long_id in trees_in_synthesis else 0 ingroup_node_id = tree.get('^ot:inGroupClade') row.n_ingroup = (1 if (ingroup_node_id != None) else 0) (row.tip_count, row.ott_count, row.new_count) = \ examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis) row.conflict_count = 0 row.resolve_count = 0 analysis = conflict_analyses.get(long_id) if analysis != None: row.conflict_count = int(analysis[1]) row.resolve_count = int(analysis[2]) row.score = ((row.new_count + row.resolve_count) - (row.conflict_count * 20) + (row.n_ingroup * 10) + (row.n_preferred * 50) + (row.n_intended * 100)) table.append(row) if tree_count % 500 == 0: print tree_count, long_id, ctype table.sort(key=lambda row: ( -row.score, row.n_intended == 0, # whether intended for synthesis -row.n_preferred, # whether preferred -row.n_ingroup, # whether ingroup is designated row.conflict_count, # number of synth tree conflicts -row.new_count, # number of OTUs mapped to OTT -row.n_ctype, # whether there's a 'curated type' -row.tip_count, # total number of tips (for comparison) )) with codecs.open(outpath, 'w', encoding='utf-8') as outfile: writer = csv.writer(outfile) writer.writerow([ 'tree', 'intended', 'preferred', 'has ingroup', 'has method', 'root confirmed', 'in synth', '#tips', '#mapped', '#new', '#resolved', '#conflicts', 'score' ]) for row in table: writer.writerow([ row.id, row.n_intended, row.n_preferred, row.n_ingroup, row.n_ctype, row.root_confirmed, row.n_synth, row.tip_count, row.ott_count, row.new_count, row.resolve_count, row.conflict_count, row.score ]) print 'studies:', study_count print 'trees:', tree_count print 'preferred:', preferred_count