def test_synth_induced(self): tre = OT.synth_induced_tree(ott_ids=synth_spp).tree leaves = [ leaf.taxon.label for leaf in OT.synth_induced_tree( ott_ids=synth_spp).tree.leaf_nodes() ] assert leaves.sort() == expected_tips.sort() assert isinstance(tre, dendropy.datamodel.treemodel.Tree)
def get_ott_ids_for_group(group_ott_id, write_file = 'children_ott_ids.txt', synth_only = False): """Returns all descendent ottids of a taxon""" sys.stdout.write('Gathering ott ids from group with ott id {}.\n'.format(group_ott_id)) #debug(group_ott_id) subtree = OT.taxon_subtree(ott_id = group_ott_id, label_format='name_and_id') if synth_only == True: nodes = [taxon.label.split()[-1] for taxon in subtree.tree.taxon_namespace] resp = OT.synth_node_info(node_ids = nodes) if 'unknown' in resp.response_dict: synth_ids = set(nodes).difference(set(resp.response_dict['unknown'])) ott_ids = [nodeid.strip('ott') for nodeid in synth_ids] else: ott_ids =[taxon.label.split()[-1].strip('ott') for taxon in subtree.tree.taxon_namespace] return ott_ids
def get_ott_ids_for_rank(rank, taxonomy_file, synth_only = True): """Returns all the ott_ids for a given rank. Args rank: (must be in ['species', 'genus', 'family', 'order', 'class']) taxonomy_file: path to taxonomy.tsv If synth_only == True, will return only ids included in synth. (Does not assess if taxa actaully appear as monophyletic in synth, e.g. if taxa are broken.) """ assert rank in ['species', 'genus', 'family', 'order', 'class'] assert os.path.exists(taxonomy_file) taxon_dir = os.path.dirname(taxonomy_file) output_path = "{}/{}.tsv".format(taxon_dir, rank) #if not os.path.exists(output_path): os.system("""cat {tf} | awk '$7 == "{r}"' > {op}""".format(tf=taxonomy_file, r=rank, op=output_path)) # clean taxonomy file # os.system('grep -a "' + rank + '" ' + taxonomy_file + ' | egrep -v "Incertae" | egrep -v "no rank" | egrep -v "major_rank_conflict" | egrep -v "uncultured" | egrep -v "barren" | egrep -v "extinct" | egrep -v "incertae" | egrep -v "unplaced" | egrep -v "hidden" | egrep -v "inconsistent" | egrep -v "synonym" | egrep -v "in ' + rank + '" | egrep -v "species" | egrep -v "genus" | egrep -v "super' + rank + '" | egrep -v "sub' + rank + '" > {}'.format(output_path)) # extract ott ids from taxonomy reduced file with open(output_path, "r") as inp: ott_ids = [] for lin in inp: lii = lin.split('\t') ott_ids.append(lii[0]) if synth_only == True: nodes = ['ott' + idn for idn in ott_ids] resp = OT.synth_node_info(node_ids = nodes) if 'unknown' in resp.response_dict: synth_ids = set(nodes).difference(set(resp.response_dict['unknown'])) ott_ids = [nodeid.strip('ott') for nodeid in synth_ids] return ott_ids
def test_get_tree(self): res = OT.get_tree(study_id=study_id, tree_id=tree_id, label_format='ot:otttaxonname', tree_format='nexus') nex = res.response_dict['content'].decode("utf-8") assert nex.startswith("#NEXUS")
def test_relabel(self): ## JUST PUT A LIST OF IDS TO SIMPLIFY jetz = OT.get_tree(study_id='ot_809', tree_id='tree1', tree_format="newick", label_format="ot:ottId") jetz_tree = Tree.get(string=jetz.response_dict['content'].decode(), schema='newick', suppress_internal_node_taxa=True, suppress_leaf_node_taxa=True) tips = [tip.label for tip in jetz_tree.leaf_node_iter()] ott_ids = set() for tip in tips: try: ott_ids.add(int(tip)) except: pass ret = taxonomy_helpers.labelled_induced_synth(ott_ids=list(ott_ids), label_format='name') tips = [ tip.taxon.label for tip in ret['labelled_tree'].leaf_node_iter() if tip.taxon ] assert len(tips) == 6624 ret = taxonomy_helpers.labelled_induced_synth( ott_ids=list(ott_ids), label_format='name_and_id') nodes = [ node.taxon.label for node in ret['labelled_tree'] if node.taxon ] assert 'MRCA of taxa in Amazona auropalliata_ott1118 Amazona oratrix_ott1119' in nodes, nodes
def test_get_tree_object(self): res = OT.get_tree(study_id=study_id, tree_id=tree_id, label_format='ot:ottid', tree_format='object') tree_dict = res.response_dict['data'] assert isinstance(tree_dict, dict)
def test_find_trees(self): phylesystem_studies_resp = OT.find_trees(bg_tax, search_property='ot:ottId') matches = [ study['ot:studyId'] for study in phylesystem_studies_resp.response_dict['matched_studies'] ] assert 'ot_409' in matches
def test_find_chrono(self): output = OT.find_trees(search_property="ot:branchLengthMode", value="ot:time") chronograms = set() for study in output.response_dict["matched_studies"]: study_id = study['ot:studyId'] for tree in study['matched_trees']: tree_id = tree['ot:treeId'] chronograms.add('{}@{}'.format(study_id, tree_id)) assert 'ot_1000@tree1' in chronograms
def test_taxon_flag(self): corr_url = 'https://github.com/OpenTreeOfLife/reference-taxonomy/wiki/Taxon-flags#flags-leading-to-taxa-being-unavailable-for-tnrs' util.get_suppressed_taxon_flag_expl_url() assert util.get_suppressed_taxon_flag_expl_url() == corr_url, util.get_suppressed_taxon_flag_expl_url() assert util.ott_str_as_int('23') == 23 corr_ott_link = 'https://tree.opentreeoflife.org/taxonomy/browse?id=123' assert util._create_link_from_node_info_conf_key_value_pair('ott', 123) == corr_ott_link corr_study_link = 'https://tree.opentreeoflife.org/curator/study/view/ot_1979?tab=trees&tree=tree1&node=node5' assert util._create_link_from_node_info_conf_key_value_pair('ot_1979@tree1', 'node5') == corr_study_link blob = OT.synth_node_info(node_id='mrcaott354607ott374748').response_dict util.write_node_info_links_to_input_trees(blob) blob = OT.synth_node_info(node_id='mrcaott177ott29310').response_dict util.write_node_info_links_to_input_trees(blob)
def _gather_broken_taxa_info(broken_response, label_format): """Gathers information about broken taxa in a synth tree call Args: broken_response: induced_subtree_response['broken'] label format: one of ['name', 'id', 'name_and_id'] Returns: (relabel, relabel_ott_ids, broken_dict) where relabel: {nodeid : [label_broken_tax1, label_broken_tax2] relabel_ott_ids: {nodeid : [ottid_broken_tax1, ottid_broken_tax2] broken_dict: {ottid_broken_tax1: {'url: url_to_taxonomy, 'MRCA_location_in_synth': nodeid1, 'broken_taxa_mapping_to_same_node': ottid_broken_tax2, and all the other responses from OT.taxon_info, e.g. 'name': ott taxon name, } } """ broken_dict = {} relabel = {} relabel_ott_ids = {} for taxon in broken_response: remap = broken_response[taxon] # Where on the tree is that taxon now? ott_id = taxon.strip('ott') tax_inf = OT.taxon_info(ott_id=ott_id).response_dict tax_inf['tax_url'] = "https://tree.opentreeoflife.org/taxonomy/browse?id={}".format(ott_id) tax_inf['synth_url'] = "https://tree.opentreeoflife.org/opentree/argus/ottol@{}".format(ott_id) tax_inf['MRCA_location_in_synth'] = remap taxon_name = tax_inf.get('name', taxon) if label_format == 'name': taxon_label = "{}".format(taxon_name) elif label_format == 'name_and_id': taxon_label = "{}_{}".format(taxon_name, taxon) else: taxon_label = taxon if remap not in relabel: relabel[remap] = [] #Sometimes multiple taxa map to the same node or id relabel_ott_ids[remap] = [] relabel[remap].append("{}".format(taxon_label)) relabel_ott_ids[remap].append(ott_id) tax_inf['broken_taxa_mapping_to_same_node'] = relabel[remap] broken_dict[ott_id] = tax_inf return relabel, relabel_ott_ids, broken_dict
def _gather_unknown_taxa_info(unknown_ids): """Gathers taxon info for unknwon ids Args: unknown_ids: a list of unknown ott ids Returns a dictionary containing: unknown_dict: a dictionary with ott_ids as keys value: dictionary containnig full response of a taxon_info call + 'url': the link to the taxon in taxonomy browser """ unknown_dict = {} for unk in unknown_ids: uid = unk.strip('ott') #URL for taxonomy needs integer tax_inf = OT.taxon_info(ott_id=uid).response_dict tax_inf['url'] = "https://tree.opentreeoflife.org/taxonomy/browse?id={}".format(uid) unknown_dict[unk] = tax_inf return unknown_dict
def test_get_citations(self): cites = OT.get_citations(studies=['ot_1000@tree1', 'ot_1984'])
def test_success_mixed_ids(self): OT.synth_induced_tree(node_ids=['ott770315'], ott_ids=[417950])
def test_success_ott_ids(self): OT.synth_induced_tree(ott_ids=[417950, 770315])
def test_taxon_mrca(self): res = OT.taxon_mrca(ott_ids=[bos, h**o]).response_dict assert res['mrca']['name'] == 'Boreoeutheria'
def test_taxon_subtree(self): res = OT.taxon_subtree(ott_id=bos) nwk = res.response_dict['newick'] assert isinstance(nwk, str) curl = res.curl_call tax = res.taxon
def test_properties(self): properties = OT.studies_properties().response_dict assert 'ot:curatorName', 'ot:curatorName' in properties[ 'study_properties'] assert 'ot:ottid', 'ot:curatorName' in properties['tree_properties']
def test_get_otus(self): res = OT.get_otus(study_id=study_id) otu_dict = res.response_dict['otus1']['otuById'] assert len(otu_dict) == 45
def test_conflict(self): res = OT.conflict_info(study_id=study_id, tree_id=tree_id, compare_to='synth') assert 'node5' in res.response_dict
def test_matchdict(self): matches, failed = OT.get_matchdict_from_taxlist( ['H**o', 'Bos', 'Meep']) assert matches['Bos'] == 'ott{}'.format(bos) assert 'Meep' in failed
def test_find_studies(self): res = OT.find_studies("Ilex", "ot:focalCladeOTTTaxonName") assert "ot_1984" in [ match['ot:studyId'] for match in res.response_dict["matched_studies"] ]
def test_subproblems_fail(self): with self.assertRaises(OTWebServicesError): ## Bos is not in the synth tree sol = OT.get_subproblem_solution('opentree12.3', bos) sol_trees = OT.get_subproblem_trees('opentree12.3', bos) rev_sol = OT.get_reversed_subproblem_solution('opentree12.3', bos)
def test_taxon_info(self): res = OT.taxon_info(ott_id=bos).response_dict assert 'Taurus' in res['synonyms']
def test_subproblems(self): sol = OT.get_subproblem_solution('opentree12.3', h**o) sol_trees = OT.get_subproblem_trees('opentree12.3', h**o) rev_sol = OT.get_reversed_subproblem_solution('opentree12.3', h**o)
def test_int_ott_ids(self): with self.assertRaises(ValueError): OT.synth_induced_tree(ott_ids=["hi"])
def test_about(self): ret = OT.about() assert 'taxonomy_about', 'synth_tree_about' in ret
def test_success_node_ids(self): OT.synth_induced_tree(node_ids=['ott417950', 'ott770315'])
def test_get_tree_newick(self): res = OT.get_tree(study_id=study_id, tree_id=tree_id, tree_format='newick') nwk = res.response_dict['content'].decode("utf-8") assert isinstance(nwk, str)
def test_demands_id_arg(self): with self.assertRaises(ValueError): OT.synth_induced_tree()
def test_get_tree_bad_format(self): with self.assertRaises(ValueError): res = OT.get_tree(study_id=study_id, tree_id=tree_id, tree_format='newrk')