コード例 #1
0
ファイル: test_ncbiquery.py プロジェクト: molsim/ete
  def test_01tree_annotation(self):
    t = PhyloTree( "((9598, 9606), 10090);", sp_naming_function=lambda name: name)
    t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
    self.assertEqual(t.sci_name, 'Euarchontoglires')

    homi = (t&'9606').up
    self.assertEqual(homi.sci_name, 'Homininae')
    self.assertEqual(homi.taxid, 207598)
    self.assertEqual(homi.rank, 'subfamily')
    self.assertEqual(homi.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae'])
    self.assertEqual(homi.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598] )

    human = t&'9606'
    self.assertEqual(human.sci_name, 'H**o sapiens')
    self.assertEqual(human.taxid, 9606)
    self.assertEqual(human.rank, 'species')
    self.assertEqual(human.named_lineage, [u'root', u'cellular organisms', u'Eukaryota', u'Opisthokonta', u'Metazoa', u'Eumetazoa', u'Bilateria', u'Deuterostomia', u'Chordata', u'Craniata', u'Vertebrata', u'Gnathostomata', u'Teleostomi', u'Euteleostomi', u'Sarcopterygii', u'Dipnotetrapodomorpha', u'Tetrapoda', u'Amniota', u'Mammalia', u'Theria', u'Eutheria', u'Boreoeutheria', u'Euarchontoglires', u'Primates', u'Haplorrhini', u'Simiiformes', u'Catarrhini', u'Hominoidea', u'Hominidae', u'Homininae', u'H**o', u'H**o sapiens'])
    self.assertEqual(human.lineage, [1, 131567, 2759, 33154, 33208, 6072, 33213, 33511, 7711, 89593, 7742, 7776, 117570, 117571, 8287, 1338369, 32523, 32524, 40674, 32525, 9347, 1437010, 314146, 9443, 376913, 314293, 9526, 314295, 9604, 207598, 9605, 9606])
コード例 #2
0
ファイル: ete_annotate.py プロジェクト: a1an77/ete
def run(args):
    from ete2 import Tree, PhyloTree
        
    features = set()    
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update(["taxid", "name", "rank", "bgcolor", "sci_name",
                             "collapse_subspecies", "named_lineage", "lineage"])            
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)
        
        type2cast = {"str":str, "int":int, "float":float, "set":set, "list":list}

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" %field )
                
                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" %field)
                else:
                    raise ValueError("Unknown feature option [%s]" %field)
                
            if not aname and not asource:
                ValueError('name and source are required when annotating a new feature [%s]'
                           % annotation)
                    
            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]
                    
                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0] 

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))
            
        dump(tree, features=features)
コード例 #3
0
ファイル: test_ncbiquery.py プロジェクト: molsim/ete
 def test_ncbi_compare(self):
   t = PhyloTree( "((9606, (9598, 9606)), 10090);", sp_naming_function=lambda x: x.name )
   t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
コード例 #4
0
ファイル: test_ncbiquery.py プロジェクト: jerryatmda/ete
    def test_01tree_annotation(self):
        t = PhyloTree("((9598, 9606), 10090);", sp_naming_function=lambda name: name)
        t.annotate_ncbi_taxa(dbfile=DATABASE_PATH)
        self.assertEqual(t.sci_name, "Euarchontoglires")

        homi = (t & "9606").up
        self.assertEqual(homi.sci_name, "Homininae")
        self.assertEqual(homi.taxid, 207598)
        self.assertEqual(homi.rank, "subfamily")
        self.assertEqual(
            homi.named_lineage,
            [
                u"root",
                u"cellular organisms",
                u"Eukaryota",
                u"Opisthokonta",
                u"Metazoa",
                u"Eumetazoa",
                u"Bilateria",
                u"Deuterostomia",
                u"Chordata",
                u"Craniata",
                u"Vertebrata",
                u"Gnathostomata",
                u"Teleostomi",
                u"Euteleostomi",
                u"Sarcopterygii",
                u"Dipnotetrapodomorpha",
                u"Tetrapoda",
                u"Amniota",
                u"Mammalia",
                u"Theria",
                u"Eutheria",
                u"Boreoeutheria",
                u"Euarchontoglires",
                u"Primates",
                u"Haplorrhini",
                u"Simiiformes",
                u"Catarrhini",
                u"Hominoidea",
                u"Hominidae",
                u"Homininae",
            ],
        )
        self.assertEqual(
            homi.lineage,
            [
                1,
                131567,
                2759,
                33154,
                33208,
                6072,
                33213,
                33511,
                7711,
                89593,
                7742,
                7776,
                117570,
                117571,
                8287,
                1338369,
                32523,
                32524,
                40674,
                32525,
                9347,
                1437010,
                314146,
                9443,
                376913,
                314293,
                9526,
                314295,
                9604,
                207598,
            ],
        )

        human = t & "9606"
        self.assertEqual(human.sci_name, "H**o sapiens")
        self.assertEqual(human.taxid, 9606)
        self.assertEqual(human.rank, "species")
        self.assertEqual(
            human.named_lineage,
            [
                u"root",
                u"cellular organisms",
                u"Eukaryota",
                u"Opisthokonta",
                u"Metazoa",
                u"Eumetazoa",
                u"Bilateria",
                u"Deuterostomia",
                u"Chordata",
                u"Craniata",
                u"Vertebrata",
                u"Gnathostomata",
                u"Teleostomi",
                u"Euteleostomi",
                u"Sarcopterygii",
                u"Dipnotetrapodomorpha",
                u"Tetrapoda",
                u"Amniota",
                u"Mammalia",
                u"Theria",
                u"Eutheria",
                u"Boreoeutheria",
                u"Euarchontoglires",
                u"Primates",
                u"Haplorrhini",
                u"Simiiformes",
                u"Catarrhini",
                u"Hominoidea",
                u"Hominidae",
                u"Homininae",
                u"H**o",
                u"H**o sapiens",
            ],
        )
        self.assertEqual(
            human.lineage,
            [
                1,
                131567,
                2759,
                33154,
                33208,
                6072,
                33213,
                33511,
                7711,
                89593,
                7742,
                7776,
                117570,
                117571,
                8287,
                1338369,
                32523,
                32524,
                40674,
                32525,
                9347,
                1437010,
                314146,
                9443,
                376913,
                314293,
                9526,
                314295,
                9604,
                207598,
                9605,
                9606,
            ],
        )
コード例 #5
0
ファイル: ete_annotate.py プロジェクト: F4L2/devoirs
def run(args):
    from ete2 import Tree, PhyloTree

    features = set()
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update([
                "taxid", "name", "rank", "bgcolor", "sci_name",
                "collapse_subspecies", "named_lineage", "lineage"
            ])
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)

        type2cast = {
            "str": str,
            "int": int,
            "float": float,
            "set": set,
            "list": list
        }

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" % field)

                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" % field)
                else:
                    raise ValueError("Unknown feature option [%s]" % field)

            if not aname and not asource:
                ValueError(
                    'name and source are required when annotating a new feature [%s]'
                    % annotation)

            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]

                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0]

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %
                                nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))

        dump(tree, features=features)