def test_01tree_annotation(self): t = PhyloTree( "((9598, 9606), 10090);", sp_naming_function=lambda name: name) t.annotate_ncbi_taxa(dbfile=DATABASE_PATH) self.assertEqual(t.sci_name, 'Euarchontoglires') homi = (t&'9606').up self.assertEqual(homi.sci_name, 'Homininae') self.assertEqual(homi.taxid, 207598) self.assertEqual(homi.rank, 'subfamily') self.assertEqual(homi.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae']) self.assertEqual(homi.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598] ) human = t&'9606' self.assertEqual(human.sci_name, 'H**o sapiens') self.assertEqual(human.taxid, 9606) self.assertEqual(human.rank, 'species') self.assertEqual(human.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae', u'H**o', u'H**o sapiens']) self.assertEqual(human.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, 9605, 9606])
def run(args): from ete2 import Tree, PhyloTree features = set() for nw in args.src_tree_iterator: if args.ncbi: tree = PhyloTree(nw) features.update(["taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage", "lineage"]) tree.annotate_ncbi_taxa(args.taxid_attr) else: tree = Tree(nw) type2cast = {"str":str, "int":int, "float":float, "set":set, "list":list} for annotation in args.feature: aname, asource, amultiple, acast = None, None, False, str for field in annotation: try: key, value = map(strip, field.split(":")) except Exception: raise ValueError("Invalid feature option [%s]" %field ) if key == "name": aname = value elif key == "source": asource = value elif key == "multiple": #append amultiple = value elif key == "type": try: acast = type2cast[value] except KeyError: raise ValueError("Invalid feature type [%s]" %field) else: raise ValueError("Unknown feature option [%s]" %field) if not aname and not asource: ValueError('name and source are required when annotating a new feature [%s]' % annotation) features.add(aname) for line in open(asource, 'rU'): line = line.strip() if not line or line.startswith('#'): continue nodenames, attr_value = map(strip, line.split('\t')) nodenames = map(strip, nodenames.split(',')) relaxed_grouping = True if nodenames[0].startswith('!'): relaxed_grouping = False nodenames[0] = nodenames[0][1:] if len(nodenames) > 1: target_node = tree.get_common_ancestor(nodenames) if not relaxed_grouping: pass # do something else: target_node = tree & nodenames[0] if hasattr(target_node, aname): log.warning('Overwriting annotation for node" [%s]"' %nodenames) else: target_node.add_feature(aname, acast(attr_value)) dump(tree, features=features)
def test_ncbi_compare(self): t = PhyloTree( "((9606, (9598, 9606)), 10090);", sp_naming_function=lambda x: x.name ) t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
def test_01tree_annotation(self): t = PhyloTree("((9598, 9606), 10090);", sp_naming_function=lambda name: name) t.annotate_ncbi_taxa(dbfile=DATABASE_PATH) self.assertEqual(t.sci_name, "Euarchontoglires") homi = (t & "9606").up self.assertEqual(homi.sci_name, "Homininae") self.assertEqual(homi.taxid, 207598) self.assertEqual(homi.rank, "subfamily") self.assertEqual( homi.named_lineage, [ u"root", u"cellular organisms", u"Eukaryota", u"Opisthokonta", u"Metazoa", u"Eumetazoa", u"Bilateria", u"Deuterostomia", u"Chordata", u"Craniata", u"Vertebrata", u"Gnathostomata", u"Teleostomi", u"Euteleostomi", u"Sarcopterygii", u"Dipnotetrapodomorpha", u"Tetrapoda", u"Amniota", u"Mammalia", u"Theria", u"Eutheria", u"Boreoeutheria", u"Euarchontoglires", u"Primates", u"Haplorrhini", u"Simiiformes", u"Catarrhini", u"Hominoidea", u"Hominidae", u"Homininae", ], ) self.assertEqual( homi.lineage, [ 1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, ], ) human = t & "9606" self.assertEqual(human.sci_name, "H**o sapiens") self.assertEqual(human.taxid, 9606) self.assertEqual(human.rank, "species") self.assertEqual( human.named_lineage, [ u"root", u"cellular organisms", u"Eukaryota", u"Opisthokonta", u"Metazoa", u"Eumetazoa", u"Bilateria", u"Deuterostomia", u"Chordata", u"Craniata", u"Vertebrata", u"Gnathostomata", u"Teleostomi", u"Euteleostomi", u"Sarcopterygii", u"Dipnotetrapodomorpha", u"Tetrapoda", u"Amniota", u"Mammalia", u"Theria", u"Eutheria", u"Boreoeutheria", u"Euarchontoglires", u"Primates", u"Haplorrhini", u"Simiiformes", u"Catarrhini", u"Hominoidea", u"Hominidae", u"Homininae", u"H**o", u"H**o sapiens", ], ) self.assertEqual( human.lineage, [ 1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, 9605, 9606, ], )
def run(args): from ete2 import Tree, PhyloTree features = set() for nw in args.src_tree_iterator: if args.ncbi: tree = PhyloTree(nw) features.update([ "taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage", "lineage" ]) tree.annotate_ncbi_taxa(args.taxid_attr) else: tree = Tree(nw) type2cast = { "str": str, "int": int, "float": float, "set": set, "list": list } for annotation in args.feature: aname, asource, amultiple, acast = None, None, False, str for field in annotation: try: key, value = map(strip, field.split(":")) except Exception: raise ValueError("Invalid feature option [%s]" % field) if key == "name": aname = value elif key == "source": asource = value elif key == "multiple": #append amultiple = value elif key == "type": try: acast = type2cast[value] except KeyError: raise ValueError("Invalid feature type [%s]" % field) else: raise ValueError("Unknown feature option [%s]" % field) if not aname and not asource: ValueError( 'name and source are required when annotating a new feature [%s]' % annotation) features.add(aname) for line in open(asource, 'rU'): line = line.strip() if not line or line.startswith('#'): continue nodenames, attr_value = map(strip, line.split('\t')) nodenames = map(strip, nodenames.split(',')) relaxed_grouping = True if nodenames[0].startswith('!'): relaxed_grouping = False nodenames[0] = nodenames[0][1:] if len(nodenames) > 1: target_node = tree.get_common_ancestor(nodenames) if not relaxed_grouping: pass # do something else: target_node = tree & nodenames[0] if hasattr(target_node, aname): log.warning('Overwriting annotation for node" [%s]"' % nodenames) else: target_node.add_feature(aname, acast(attr_value)) dump(tree, features=features)