Exemple #1
0
        ##########################################################################################
        #           Get support value for the evolutionary events
        ##########################################################################################
        evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "r")
        #events_and_support = file('%s''/''%s''.para.events' %(events_dir,tree), "w")

        output_count = 0
        for row in evts:

            pair = row.split("<===>")
            left = "".join(pair[0].split())
            right = "".join(pair[1].split())
            if (len(left) > 0 and len(right) > 0):
                child_left = left.split(",")
                child_right = right.split(",")
                ancestor_1 = t.get_common_ancestor(child_left[0],
                                                   child_right[0])
                #print child_left[0] + "\t" + child_right[0]
                #print ancestor_1
                #a counter to score the number of monocots genes
                mono_left = 0
                mono_right = 0
                Cpent_left = 0
                Cpent_right = 0
                rosid_left = 0
                rosid_right = 0
                asterid_left = 0
                asterid_right = 0
                other_left = 0
                other_right = 0
                #para_list = list()
                rosid_list = list()
##########################################################################################        
#           Get support value for the evolutionary events 
##########################################################################################
        evts = file('%s''/''%s''.temp' %(events_dir,tree), "r")
        #events_and_support = file('%s''/''%s''.para.events' %(events_dir,tree), "w")
        
        output_count =0
        for row in evts:

            pair = row.split("<===>" )
            left = "".join(pair[0].split())
            right = "".join(pair[1].split())
            if (len(left) > 0 and len(right) > 0):
            	child_left = left.split(",")
            	child_right = right.split(",")
            	ancestor_1 = t.get_common_ancestor(child_left[0], child_right[0])
                #print child_left[0] + "\t" + child_right[0] 
                #print ancestor_1
                #a counter to score the number of monocots genes
                mono_left =0 
                mono_right = 0
                #para_left = 0
                #para_right= 0
                Cpent_left = 0
                Cpent_right =0
                Cpent_list = list()

                rosid_left = 0
                rosid_right=0
                asterid_left=0
                asterid_right =0 
Exemple #3
0
                evts.write(",".join(ev.out_seqs))
                evts.write("\n")
        evts.close()

        evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "r")

        output_count = 0
        for row in evts:

            pair = row.split("<===>")
            left = "".join(pair[0].split())
            right = "".join(pair[1].split())
            if (len(left) > 0 and len(right) > 0):
                child_left = left.split(",")
                child_right = right.split(",")
                ancestor_1 = t.get_common_ancestor(child_left[0],
                                                   child_right[0])
                #print child_left[0] + "\t" + child_right[0]
                #print ancestor_1
                #a counter to score the number of monocots genes
                mono_left = 0
                mono_right = 0
                para_left = 0
                para_right = 0
                rosid_left = 0
                rosid_right = 0
                asterid_left = 0
                asterid_right = 0
                other_left = 0
                other_right = 0
                para_list = list()
Exemple #4
0
def run(args):
    from ete2 import Tree, PhyloTree
        
    features = set()    
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update(["taxid", "name", "rank", "bgcolor", "sci_name",
                             "collapse_subspecies", "named_lineage", "lineage"])            
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)
        
        type2cast = {"str":str, "int":int, "float":float, "set":set, "list":list}

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" %field )
                
                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" %field)
                else:
                    raise ValueError("Unknown feature option [%s]" %field)
                
            if not aname and not asource:
                ValueError('name and source are required when annotating a new feature [%s]'
                           % annotation)
                    
            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]
                    
                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0] 

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))
            
        dump(tree, features=features)
Exemple #5
0
def run(args):
    from ete2 import Tree, PhyloTree

    features = set()
    for nw in args.src_tree_iterator:
        if args.ncbi:
            tree = PhyloTree(nw)
            features.update([
                "taxid", "name", "rank", "bgcolor", "sci_name",
                "collapse_subspecies", "named_lineage", "lineage"
            ])
            tree.annotate_ncbi_taxa(args.taxid_attr)
        else:
            tree = Tree(nw)

        type2cast = {
            "str": str,
            "int": int,
            "float": float,
            "set": set,
            "list": list
        }

        for annotation in args.feature:
            aname, asource, amultiple, acast = None, None, False, str
            for field in annotation:
                try:
                    key, value = map(strip, field.split(":"))
                except Exception:
                    raise ValueError("Invalid feature option [%s]" % field)

                if key == "name":
                    aname = value
                elif key == "source":
                    asource = value
                elif key == "multiple":
                    #append
                    amultiple = value
                elif key == "type":
                    try:
                        acast = type2cast[value]
                    except KeyError:
                        raise ValueError("Invalid feature type [%s]" % field)
                else:
                    raise ValueError("Unknown feature option [%s]" % field)

            if not aname and not asource:
                ValueError(
                    'name and source are required when annotating a new feature [%s]'
                    % annotation)

            features.add(aname)
            for line in open(asource, 'rU'):
                line = line.strip()
                if not line or line.startswith('#'):
                    continue
                nodenames, attr_value = map(strip, line.split('\t'))
                nodenames = map(strip, nodenames.split(','))
                relaxed_grouping = True
                if nodenames[0].startswith('!'):
                    relaxed_grouping = False
                    nodenames[0] = nodenames[0][1:]

                if len(nodenames) > 1:
                    target_node = tree.get_common_ancestor(nodenames)
                    if not relaxed_grouping:
                        pass
                        # do something
                else:
                    target_node = tree & nodenames[0]

                if hasattr(target_node, aname):
                    log.warning('Overwriting annotation for node" [%s]"' %
                                nodenames)
                else:
                    target_node.add_feature(aname, acast(attr_value))

        dump(tree, features=features)
# the phylogenetic tree.  Note that ages are only relative numbers to
# define which species are older, and that different species can
# belong to the same age.
species2age = {
  'Hsa': 1, # H**o sapiens (Hominids)
  'Ptr': 2, # P. troglodytes (primates)
  'Mmu': 2, # Macaca mulata (primates)
  'Mms': 3, # Mus musculus (mammals)
  'Cfa': 3, # Canis familiaris (mammals)
  'Dme': 4  # Drosophila melanogaster (metazoa)
}
# We can translate each number to its correspondig taxonomic number
age2name = {
  1:"hominids",
  2:"primates",
  3:"mammals",
  4:"metazoa"
}
event1= t.get_common_ancestor("Hsa_001", "Hsa_004")
event2=t.get_common_ancestor("Hsa_001", "Hsa_002")
print
print "The duplication event leading to the human sequences Hsa_001 and "+\
    "Hsa_004 is dated at: ", age2name[event1.get_age(species2age)]
print "The duplication event leading to the human sequences Hsa_001 and "+\
    "Hsa_002 is dated at: ", age2name[event2.get_age(species2age)]
# The duplication event leading to the human sequences Hsa_001 and Hsa_004
# is dated at:  primates
#
# The duplication event leading to the human sequences Hsa_001 and Hsa_002
# is dated at:  mammals
Exemple #7
0
                              " ASCII representation and all its evolutionary events"
                              " before orthoXML export"))
    
    
    args = parser.parse_args()
    newick = args.tree[0]

    SPECIES_NAME_POS = args.species_field
    SPECIES_NAME_DELIMITER = args.species_delimiter

    # load a phylomeDB Tree provided as a newick file in the command line
    t = PhyloTree(newick, sp_naming_function=extract_spname)

    if args.root:
        if len(args.root) > 1:
            outgroup = t.get_common_ancestor(args.root)
        else:
            outgroup = t & args.root[0]
        t.set_outgroup(outgroup)


    if not args.skip_ortholog_detection:
        # detect speciation and duplication events using the species overlap
        # algorithm used in phylomeDB
        t.get_descendant_evol_events()
        
    if args.ascii:
        print t.get_ascii(attributes=[args.evoltype_attr, "name"], show_internal=True)
        
    if args.newick:
        print t.write(features=[args.evoltype_attr], format_root_node=True)
# Of course, you can disable the automatic generation of species
# names. To do so, you can set the species naming function to
# None. This is useful to set the species names manually or for
# reading them from a newick file. Other wise, species attribute would
# be overwriten
mynewick = """
(((Hsa_001[&&NHX:species=Human],Ptr_001[&&NHX:species=Chimp]),
(Cfa_001[&&NHX:species=Dog],Mms_001[&&NHX:species=Mouse])),
(Dme_001[&&NHX:species=Fly],Dme_002[&&NHX:species=Fly]));
"""
t = PhyloTree(mynewick, sp_naming_function=None)
print "Disabled mode (manual set):"
for n in t.get_leaves():
    print "node:", n.name, "Species name:", n.species
# node: Dme_001 Species name: Fly
# node: Dme_002 Species name: Fly
# node: Hsa_001 Species name: Human
# node: Ptr_001 Species name: Chimp
# node: Cfa_001 Species name: Dog
# node: Mms_001 Species name: Mouse
#
# Of course, once this info is available you can query any internal
# node for species covered.
human_mouse_ancestor = t.get_common_ancestor("Hsa_001", "Mms_001")
print "These are the species under the common ancestor of Human & Mouse"
print '\n'.join( human_mouse_ancestor.get_species() )
# Mouse
# Chimp
# Dog
# Human