Exemple #1
0
  def test_01tree_annotation(self):
    t = PhyloTree( "((9598, 9606), 10090);", sp_naming_function=lambda name: name)
    t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
    self.assertEqual(t.sci_name, 'Euarchontoglires')

    homi = (t&'9606').up
    self.assertEqual(homi.sci_name, 'Homininae')
    self.assertEqual(homi.taxid, 207598)
    self.assertEqual(homi.rank, 'subfamily')
    self.assertEqual(homi.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae'])
    self.assertEqual(homi.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598] )

    human = t&'9606'
    self.assertEqual(human.sci_name, 'H**o sapiens')
    self.assertEqual(human.taxid, 9606)
    self.assertEqual(human.rank, 'species')
    self.assertEqual(human.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae', u'H**o', u'H**o sapiens'])
    self.assertEqual(human.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, 9605, 9606])
Exemple #2
0
def run(args):
    from ete2 import Tree, PhyloTree
        
    features = set()    
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update(["taxid", "name", "rank", "bgcolor", "sci_name",
                             "collapse_subspecies", "named_lineage", "lineage"])            
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)
        
        type2cast = {"str":str, "int":int, "float":float, "set":set, "list":list}

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" %field )
                
                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" %field)
                else:
                    raise ValueError("Unknown feature option [%s]" %field)
                
            if not aname and not asource:
                ValueError('name and source are required when annotating a new feature [%s]'
                           % annotation)
                    
            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]
                    
                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0] 

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))
            
        dump(tree, features=features)
Exemple #3
0
 def test_ncbi_compare(self):
   t = PhyloTree( "((9606, (9598, 9606)), 10090);", sp_naming_function=lambda x: x.name )
   t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
Exemple #4
0
    def test_01tree_annotation(self):
        t = PhyloTree("((9598, 9606), 10090);", sp_naming_function=lambda name: name)
        t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
        self.assertEqual(t.sci_name, "Euarchontoglires")

        homi = (t & "9606").up
        self.assertEqual(homi.sci_name, "Homininae")
        self.assertEqual(homi.taxid, 207598)
        self.assertEqual(homi.rank, "subfamily")
        self.assertEqual(
            homi.named_lineage,
            [
                u"root",
                u"cellular organisms",
                u"Eukaryota",
                u"Opisthokonta",
                u"Metazoa",
                u"Eumetazoa",
                u"Bilateria",
                u"Deuterostomia",
                u"Chordata",
                u"Craniata",
                u"Vertebrata",
                u"Gnathostomata",
                u"Teleostomi",
                u"Euteleostomi",
                u"Sarcopterygii",
                u"Dipnotetrapodomorpha",
                u"Tetrapoda",
                u"Amniota",
                u"Mammalia",
                u"Theria",
                u"Eutheria",
                u"Boreoeutheria",
                u"Euarchontoglires",
                u"Primates",
                u"Haplorrhini",
                u"Simiiformes",
                u"Catarrhini",
                u"Hominoidea",
                u"Hominidae",
                u"Homininae",
            ],
        )
        self.assertEqual(
            homi.lineage,
            [
                1,
                131567,
                2759,
                33154,
                33208,
                6072,
                33213,
                33511,
                7711,
                89593,
                7742,
                7776,
                117570,
                117571,
                8287,
                1338369,
                32523,
                32524,
                40674,
                32525,
                9347,
                1437010,
                314146,
                9443,
                376913,
                314293,
                9526,
                314295,
                9604,
                207598,
            ],
        )

        human = t & "9606"
        self.assertEqual(human.sci_name, "H**o sapiens")
        self.assertEqual(human.taxid, 9606)
        self.assertEqual(human.rank, "species")
        self.assertEqual(
            human.named_lineage,
            [
                u"root",
                u"cellular organisms",
                u"Eukaryota",
                u"Opisthokonta",
                u"Metazoa",
                u"Eumetazoa",
                u"Bilateria",
                u"Deuterostomia",
                u"Chordata",
                u"Craniata",
                u"Vertebrata",
                u"Gnathostomata",
                u"Teleostomi",
                u"Euteleostomi",
                u"Sarcopterygii",
                u"Dipnotetrapodomorpha",
                u"Tetrapoda",
                u"Amniota",
                u"Mammalia",
                u"Theria",
                u"Eutheria",
                u"Boreoeutheria",
                u"Euarchontoglires",
                u"Primates",
                u"Haplorrhini",
                u"Simiiformes",
                u"Catarrhini",
                u"Hominoidea",
                u"Hominidae",
                u"Homininae",
                u"H**o",
                u"H**o sapiens",
            ],
        )
        self.assertEqual(
            human.lineage,
            [
                1,
                131567,
                2759,
                33154,
                33208,
                6072,
                33213,
                33511,
                7711,
                89593,
                7742,
                7776,
                117570,
                117571,
                8287,
                1338369,
                32523,
                32524,
                40674,
                32525,
                9347,
                1437010,
                314146,
                9443,
                376913,
                314293,
                9526,
                314295,
                9604,
                207598,
                9605,
                9606,
            ],
        )
Exemple #5
0
def run(args):
    from ete2 import Tree, PhyloTree

    features = set()
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update([
                "taxid", "name", "rank", "bgcolor", "sci_name",
                "collapse_subspecies", "named_lineage", "lineage"
            ])
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)

        type2cast = {
            "str": str,
            "int": int,
            "float": float,
            "set": set,
            "list": list
        }

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" % field)

                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" % field)
                else:
                    raise ValueError("Unknown feature option [%s]" % field)

            if not aname and not asource:
                ValueError(
                    'name and source are required when annotating a new feature [%s]'
                    % annotation)

            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]

                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0]

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %
                                nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))

        dump(tree, features=features)