def testTreesCulledNonmatcingConvViaPS(self): o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='1.2.1', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) #pylint: disable=E1103 rx = json.loads(x) etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def testTreesCulledNonmatcingConvViaPS(self): o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='1.2.1', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) # pylint: disable=E1103 rx = json.loads(x) etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def testTreesCulledNonmatcingConvViaPSV0(self): """Verify that the culling does not break the conversion to other forms of NexSON""" o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) # pylint: disable=E1103 rx = json.loads(x) self.assertEqual(detect_nexson_version(rx), '0.0.0') etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def testTreesCulledNonmatcingConvViaPSV0(self): '''Verify that the culling does not break the conversion to other forms of NexSON''' o = pathmap.nexson_obj('9/v1.2.json') self.assertEqual(len(extract_tree_nexson(o, tree_id=None)), 2) ps = PhyloSchema('nexson', content='tree', content_id='tree2', version='0.0.0', cull_nonmatching='true') x = ps.serialize(o) etn = extract_tree_nexson(o, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2') self.assertTrue(x.startswith('{')) #pylint: disable=E1103 rx = json.loads(x) self.assertEqual(detect_nexson_version(rx), '0.0.0') etn = extract_tree_nexson(rx, tree_id=None) self.assertEqual(len(etn), 1) self.assertEqual(etn[0][0], 'tree2')
def find_tree_and_otus_in_nexson(nexson, tree_id): tl = extract_tree_nexson(nexson, tree_id) if (len(tl) != 1): # sys.stderr.write('{}: len(tl) = {}\n'.format(tree_id,len(tl))) return None, None tree_id, tree, otus = tl[0] return tree, otus
def find_tree_and_otus_in_nexson(nexson, tree_id): tl = extract_tree_nexson(nexson, tree_id) if (len(tl) != 1): # sys.stderr.write('{}: len(tl) = {}\n'.format(tree_id,len(tl))) return None, None tree_id, tree, otus = tl[0] return tree, otus
sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message)) sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem ' \ 'instance. Double check your configuration (see ' \ 'http://opentreeoflife.github.io/peyotl/configuration/ for info).') try: print_freq = 500 num_trees = 0 num_studies = 0 max_trees_per_study = 0 biggest_study = None studies_without_trees = [] sys.stderr.write('count_trees.py: beginning loop over studies...\n') for study_id, nexson in phylsys.iter_study_objs(): num_studies += 1 try: nt = len(extract_tree_nexson(nexson, tree_id=None)) except: sys.stderr.write( 'Problem extracting trees from study {}'.format(study_id)) raise if nt == 0: studies_without_trees.append(study_id) else: num_trees += nt if nt > max_trees_per_study: biggest_study = study_id max_trees_per_study = nt if num_studies % print_freq == 0: sys.stderr.write(' ...{d} studies read. Still going...\n'.format( d=num_studies))
sys.stderr.write('count_trees.py: Exception: {}\n'.format(e.message)) sys.exit('count_trees.py: There was a problem creating a wrapper around your phylesystem ' 'instance. Double check your configuration (see ' 'http://opentreeoflife.github.io/peyotl/configuration/ for info).') try: print_freq = 500 num_trees = 0 num_studies = 0 max_trees_per_study = 0 biggest_study = None studies_without_trees = [] sys.stderr.write('count_trees.py: beginning loop over studies...\n') for study_id, nexson in phylsys.iter_study_objs(): num_studies += 1 try: nt = len(extract_tree_nexson(nexson, tree_id=None)) except: sys.stderr.write('Problem extracting trees from study {}'.format(study_id)) raise if nt == 0: studies_without_trees.append(study_id) else: num_trees += nt if nt > max_trees_per_study: biggest_study = study_id max_trees_per_study = nt if num_studies % print_freq == 0: sys.stderr.write(' ...{d} studies read. Still going...\n'.format(d=num_studies)) except: sys.exit('Unexpected error in iteration, please report this bug.')
def write_tree_list(outpath): conflict_analyses = read_conflict_analyses() trees_in_synthesis = read_synthesis_list() taxa_in_synthesis = read_synthesis_taxa() phylesystem = Phylesystem() study_count = 0 tree_count = 0 preferred_count = 0 table = [] for study_id, nexson in phylesystem.iter_study_objs(): study_count += 1 nexml_el = nexson[u'nexml'] n_intended = 1 not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis') if not_intended == True: n_intended = 0 else: n_intended = 2 candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis') if candidates == None: candidates = [] tid_tree_otug = extract_tree_nexson(nexson, tree_id=None) for (tree_id, tree, otu_group) in tid_tree_otug: tree_count += 1 row = Row() # otu_group = otu_groups[ogi]['otuById'] long_id = '%s@%s' % (study_id, tree_id) row.id = long_id row.n_intended = n_intended # per study if len(candidates) == 0: # No selection(s) made if len(tid_tree_otug) == 1: n_preferred = 2 # Only one tree; use it else: n_preferred = 1 # More than one tree; decision required else: if tree_id in candidates: preferred_count += 1 n_preferred = 2 # This is a preferred tree; use it else: n_preferred = 0 # Not preferred, another is; do not use row.n_preferred = n_preferred ctype = tree.get('^ot:curatedType') n_ctype = 0 if ctype != None and ctype != '': n_ctype = 1 row.n_ctype = n_ctype # whether a curator has confirmed the root root = tree.get('^ot:specifiedRoot') root_confirmed = 0 if root != None and root != '': root_confirmed = 1 row.root_confirmed = root_confirmed row.n_synth = 1 if long_id in trees_in_synthesis else 0 ingroup_node_id = tree.get('^ot:inGroupClade') row.n_ingroup = (1 if (ingroup_node_id != None) else 0) (row.tip_count, row.ott_count, row.new_count) = \ examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis) row.conflict_count = 0 row.resolve_count = 0 analysis = conflict_analyses.get(long_id) if analysis != None: row.conflict_count = int(analysis[1]) row.resolve_count = int(analysis[2]) row.score = ((row.new_count + row.resolve_count) - (row.conflict_count * 20) + (row.n_ingroup * 10) + (row.n_preferred * 50) + (row.n_intended * 100)) table.append(row) if tree_count % 500 == 0: print tree_count, long_id, ctype table.sort(key=lambda row:(-row.score, row.n_intended == 0, # whether intended for synthesis -row.n_preferred, # whether preferred -row.n_ingroup, # whether ingroup is designated row.conflict_count, # number of synth tree conflicts -row.new_count, # number of OTUs mapped to OTT -row.n_ctype, # whether there's a 'curated type' -row.tip_count, # total number of tips (for comparison) )) with codecs.open(outpath, 'w', encoding='utf-8') as outfile: writer = csv.writer(outfile) writer.writerow(['tree', 'intended', 'preferred', 'has ingroup', 'has method', 'root confirmed', 'in synth', '#tips', '#mapped', '#new', '#resolved', '#conflicts', 'score']) for row in table: writer.writerow([row.id, row.n_intended, row.n_preferred, row.n_ingroup, row.n_ctype, row.root_confirmed, row.n_synth, row.tip_count, row.ott_count, row.new_count, row.resolve_count, row.conflict_count, row.score]) print 'studies:', study_count print 'trees:', tree_count print 'preferred:', preferred_count
def write_tree_list(outpath): conflict_analyses = read_conflict_analyses() trees_in_synthesis = read_synthesis_list() taxa_in_synthesis = read_synthesis_taxa() phylesystem = Phylesystem() study_count = 0 tree_count = 0 preferred_count = 0 table = [] for study_id, nexson in phylesystem.iter_study_objs(): study_count += 1 nexml_el = nexson[u'nexml'] n_intended = 1 not_intended = nexml_el.get(u'^ot:notIntendedForSynthesis') if not_intended == True: n_intended = 0 else: n_intended = 2 candidates = nexml_el.get(u'^ot:candidateTreeForSynthesis') if candidates == None: candidates = [] tid_tree_otug = extract_tree_nexson(nexson, tree_id=None) for (tree_id, tree, otu_group) in tid_tree_otug: tree_count += 1 row = Row() # otu_group = otu_groups[ogi]['otuById'] long_id = '%s@%s' % (study_id, tree_id) row.id = long_id row.n_intended = n_intended # per study if len(candidates) == 0: # No selection(s) made if len(tid_tree_otug) == 1: n_preferred = 2 # Only one tree; use it else: n_preferred = 1 # More than one tree; decision required else: if tree_id in candidates: preferred_count += 1 n_preferred = 2 # This is a preferred tree; use it else: n_preferred = 0 # Not preferred, another is; do not use row.n_preferred = n_preferred ctype = tree.get('^ot:curatedType') n_ctype = 0 if ctype != None and ctype != '': n_ctype = 1 row.n_ctype = n_ctype # whether a curator has confirmed the root root = tree.get('^ot:specifiedRoot') root_confirmed = 0 if root != None and root != '': root_confirmed = 1 row.root_confirmed = root_confirmed row.n_synth = 1 if long_id in trees_in_synthesis else 0 ingroup_node_id = tree.get('^ot:inGroupClade') row.n_ingroup = (1 if (ingroup_node_id != None) else 0) (row.tip_count, row.ott_count, row.new_count) = \ examine_tree(tree, otu_group, ingroup_node_id, taxa_in_synthesis) row.conflict_count = 0 row.resolve_count = 0 analysis = conflict_analyses.get(long_id) if analysis != None: row.conflict_count = int(analysis[1]) row.resolve_count = int(analysis[2]) row.score = ((row.new_count + row.resolve_count) - (row.conflict_count * 20) + (row.n_ingroup * 10) + (row.n_preferred * 50) + (row.n_intended * 100)) table.append(row) if tree_count % 500 == 0: print tree_count, long_id, ctype table.sort(key=lambda row: ( -row.score, row.n_intended == 0, # whether intended for synthesis -row.n_preferred, # whether preferred -row.n_ingroup, # whether ingroup is designated row.conflict_count, # number of synth tree conflicts -row.new_count, # number of OTUs mapped to OTT -row.n_ctype, # whether there's a 'curated type' -row.tip_count, # total number of tips (for comparison) )) with codecs.open(outpath, 'w', encoding='utf-8') as outfile: writer = csv.writer(outfile) writer.writerow([ 'tree', 'intended', 'preferred', 'has ingroup', 'has method', 'root confirmed', 'in synth', '#tips', '#mapped', '#new', '#resolved', '#conflicts', 'score' ]) for row in table: writer.writerow([ row.id, row.n_intended, row.n_preferred, row.n_ingroup, row.n_ctype, row.root_confirmed, row.n_synth, row.tip_count, row.ott_count, row.new_count, row.resolve_count, row.conflict_count, row.score ]) print 'studies:', study_count print 'trees:', tree_count print 'preferred:', preferred_count
out = codecs.open(outfn, mode='w', encoding='utf-8') except: sys.exit('validate_ot_nexson: Could not open output filepath "{fn}"\n'.format(fn=outfn)) else: out = codecs.getwriter('utf-8')(sys.stdout) try: nexson = read_as_json(inp_filepath) except ValueError as vx: _LOG.error('Not valid JSON.') if args.verbose: raise vx else: sys.exit(1) except Exception as nx: _LOG.error(nx.value) sys.exit(1) convert_nexson_format(nexson, BY_ID_HONEY_BADGERFISH) trees = extract_tree_nexson(nexson, tree_id=args.tree_id) if len(trees) == 0: trees = extract_tree_nexson(nexson, tree_id=None) if trees: v = '", "'.join([i[0] for i in trees]) sys.exit('Tree ID {i} not found. Valid IDs for this file are "{l}"\n'.format(i=args.tree_id, l=v)) else: sys.exit('This NexSON has not trees.\n') ott = OTT() for tree_id, tree, otus in trees: tree_proxy = NexsonTreeProxy(tree=tree, tree_id=tree_id, otus=otus) evaluate_tree_rooting(nexson, ott, tree_proxy)
if len(study) == 1: study = '0' + study study2tree.setdefault('pg_' + study, []).append('tree' + tree) pa = PhylesystemAPI(get_from='local') raw_phylsys = pa.phylesystem_obj nexson_version = raw_phylsys.repo_nexml2json for study_id, tree_list in study2tree.items(): if verbose: sys.stderr.write('treelist={t} for study {s}.\n'.format(t=str(tree_list), s=study_id)) try: fp = raw_phylsys.get_filepath_for_study(study_id) blob = read_as_json(fp) nex = get_nexml_el(blob) prev = nex.setdefault('^ot:candidateTreeForSynthesis', []) for tree_id in tree_list: if tree_id not in prev: prev.append(tree_id) i_t_o_list = extract_tree_nexson(blob, tree_id, nexson_version) if not i_t_o_list: sys.stderr.write('tree {t} of study {s} not found !!!\n'.format(t=tree_id, s=study_id)) for tid, tree, otus_group in i_t_o_list: tree['^ot:unrootedTree'] = False tree['^ot:specifiedRoot'] = tree['^ot:rootNodeId'] if not dry_run: write_as_json(blob, fp) except KeyError: sys.stderr.write('study {} not found !!!\n'.format(study_id))