########################################################################################## # Get support value for the evolutionary events ########################################################################################## evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "r") #events_and_support = file('%s''/''%s''.para.events' %(events_dir,tree), "w") output_count = 0 for row in evts: pair = row.split("<===>") left = "".join(pair[0].split()) right = "".join(pair[1].split()) if (len(left) > 0 and len(right) > 0): child_left = left.split(",") child_right = right.split(",") ancestor_1 = t.get_common_ancestor(child_left[0], child_right[0]) #print child_left[0] + "\t" + child_right[0] #print ancestor_1 #a counter to score the number of monocots genes mono_left = 0 mono_right = 0 Cpent_left = 0 Cpent_right = 0 rosid_left = 0 rosid_right = 0 asterid_left = 0 asterid_right = 0 other_left = 0 other_right = 0 #para_list = list() rosid_list = list()
########################################################################################## # Get support value for the evolutionary events ########################################################################################## evts = file('%s''/''%s''.temp' %(events_dir,tree), "r") #events_and_support = file('%s''/''%s''.para.events' %(events_dir,tree), "w") output_count =0 for row in evts: pair = row.split("<===>" ) left = "".join(pair[0].split()) right = "".join(pair[1].split()) if (len(left) > 0 and len(right) > 0): child_left = left.split(",") child_right = right.split(",") ancestor_1 = t.get_common_ancestor(child_left[0], child_right[0]) #print child_left[0] + "\t" + child_right[0] #print ancestor_1 #a counter to score the number of monocots genes mono_left =0 mono_right = 0 #para_left = 0 #para_right= 0 Cpent_left = 0 Cpent_right =0 Cpent_list = list() rosid_left = 0 rosid_right=0 asterid_left=0 asterid_right =0
evts.write(",".join(ev.out_seqs)) evts.write("\n") evts.close() evts = file('%s' '/' '%s' '.temp' % (events_dir, tree), "r") output_count = 0 for row in evts: pair = row.split("<===>") left = "".join(pair[0].split()) right = "".join(pair[1].split()) if (len(left) > 0 and len(right) > 0): child_left = left.split(",") child_right = right.split(",") ancestor_1 = t.get_common_ancestor(child_left[0], child_right[0]) #print child_left[0] + "\t" + child_right[0] #print ancestor_1 #a counter to score the number of monocots genes mono_left = 0 mono_right = 0 para_left = 0 para_right = 0 rosid_left = 0 rosid_right = 0 asterid_left = 0 asterid_right = 0 other_left = 0 other_right = 0 para_list = list()
def run(args): from ete2 import Tree, PhyloTree features = set() for nw in args.src_tree_iterator: if args.ncbi: tree = PhyloTree(nw) features.update(["taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage", "lineage"]) tree.annotate_ncbi_taxa(args.taxid_attr) else: tree = Tree(nw) type2cast = {"str":str, "int":int, "float":float, "set":set, "list":list} for annotation in args.feature: aname, asource, amultiple, acast = None, None, False, str for field in annotation: try: key, value = map(strip, field.split(":")) except Exception: raise ValueError("Invalid feature option [%s]" %field ) if key == "name": aname = value elif key == "source": asource = value elif key == "multiple": #append amultiple = value elif key == "type": try: acast = type2cast[value] except KeyError: raise ValueError("Invalid feature type [%s]" %field) else: raise ValueError("Unknown feature option [%s]" %field) if not aname and not asource: ValueError('name and source are required when annotating a new feature [%s]' % annotation) features.add(aname) for line in open(asource, 'rU'): line = line.strip() if not line or line.startswith('#'): continue nodenames, attr_value = map(strip, line.split('\t')) nodenames = map(strip, nodenames.split(',')) relaxed_grouping = True if nodenames[0].startswith('!'): relaxed_grouping = False nodenames[0] = nodenames[0][1:] if len(nodenames) > 1: target_node = tree.get_common_ancestor(nodenames) if not relaxed_grouping: pass # do something else: target_node = tree & nodenames[0] if hasattr(target_node, aname): log.warning('Overwriting annotation for node" [%s]"' %nodenames) else: target_node.add_feature(aname, acast(attr_value)) dump(tree, features=features)
def run(args): from ete2 import Tree, PhyloTree features = set() for nw in args.src_tree_iterator: if args.ncbi: tree = PhyloTree(nw) features.update([ "taxid", "name", "rank", "bgcolor", "sci_name", "collapse_subspecies", "named_lineage", "lineage" ]) tree.annotate_ncbi_taxa(args.taxid_attr) else: tree = Tree(nw) type2cast = { "str": str, "int": int, "float": float, "set": set, "list": list } for annotation in args.feature: aname, asource, amultiple, acast = None, None, False, str for field in annotation: try: key, value = map(strip, field.split(":")) except Exception: raise ValueError("Invalid feature option [%s]" % field) if key == "name": aname = value elif key == "source": asource = value elif key == "multiple": #append amultiple = value elif key == "type": try: acast = type2cast[value] except KeyError: raise ValueError("Invalid feature type [%s]" % field) else: raise ValueError("Unknown feature option [%s]" % field) if not aname and not asource: ValueError( 'name and source are required when annotating a new feature [%s]' % annotation) features.add(aname) for line in open(asource, 'rU'): line = line.strip() if not line or line.startswith('#'): continue nodenames, attr_value = map(strip, line.split('\t')) nodenames = map(strip, nodenames.split(',')) relaxed_grouping = True if nodenames[0].startswith('!'): relaxed_grouping = False nodenames[0] = nodenames[0][1:] if len(nodenames) > 1: target_node = tree.get_common_ancestor(nodenames) if not relaxed_grouping: pass # do something else: target_node = tree & nodenames[0] if hasattr(target_node, aname): log.warning('Overwriting annotation for node" [%s]"' % nodenames) else: target_node.add_feature(aname, acast(attr_value)) dump(tree, features=features)
# the phylogenetic tree. Note that ages are only relative numbers to # define which species are older, and that different species can # belong to the same age. species2age = { 'Hsa': 1, # H**o sapiens (Hominids) 'Ptr': 2, # P. troglodytes (primates) 'Mmu': 2, # Macaca mulata (primates) 'Mms': 3, # Mus musculus (mammals) 'Cfa': 3, # Canis familiaris (mammals) 'Dme': 4 # Drosophila melanogaster (metazoa) } # We can translate each number to its correspondig taxonomic number age2name = { 1:"hominids", 2:"primates", 3:"mammals", 4:"metazoa" } event1= t.get_common_ancestor("Hsa_001", "Hsa_004") event2=t.get_common_ancestor("Hsa_001", "Hsa_002") print print "The duplication event leading to the human sequences Hsa_001 and "+\ "Hsa_004 is dated at: ", age2name[event1.get_age(species2age)] print "The duplication event leading to the human sequences Hsa_001 and "+\ "Hsa_002 is dated at: ", age2name[event2.get_age(species2age)] # The duplication event leading to the human sequences Hsa_001 and Hsa_004 # is dated at: primates # # The duplication event leading to the human sequences Hsa_001 and Hsa_002 # is dated at: mammals
" ASCII representation and all its evolutionary events" " before orthoXML export")) args = parser.parse_args() newick = args.tree[0] SPECIES_NAME_POS = args.species_field SPECIES_NAME_DELIMITER = args.species_delimiter # load a phylomeDB Tree provided as a newick file in the command line t = PhyloTree(newick, sp_naming_function=extract_spname) if args.root: if len(args.root) > 1: outgroup = t.get_common_ancestor(args.root) else: outgroup = t & args.root[0] t.set_outgroup(outgroup) if not args.skip_ortholog_detection: # detect speciation and duplication events using the species overlap # algorithm used in phylomeDB t.get_descendant_evol_events() if args.ascii: print t.get_ascii(attributes=[args.evoltype_attr, "name"], show_internal=True) if args.newick: print t.write(features=[args.evoltype_attr], format_root_node=True)
# Of course, you can disable the automatic generation of species # names. To do so, you can set the species naming function to # None. This is useful to set the species names manually or for # reading them from a newick file. Other wise, species attribute would # be overwriten mynewick = """ (((Hsa_001[&&NHX:species=Human],Ptr_001[&&NHX:species=Chimp]), (Cfa_001[&&NHX:species=Dog],Mms_001[&&NHX:species=Mouse])), (Dme_001[&&NHX:species=Fly],Dme_002[&&NHX:species=Fly])); """ t = PhyloTree(mynewick, sp_naming_function=None) print "Disabled mode (manual set):" for n in t.get_leaves(): print "node:", n.name, "Species name:", n.species # node: Dme_001 Species name: Fly # node: Dme_002 Species name: Fly # node: Hsa_001 Species name: Human # node: Ptr_001 Species name: Chimp # node: Cfa_001 Species name: Dog # node: Mms_001 Species name: Mouse # # Of course, once this info is available you can query any internal # node for species covered. human_mouse_ancestor = t.get_common_ancestor("Hsa_001", "Mms_001") print "These are the species under the common ancestor of Human & Mouse" print '\n'.join( human_mouse_ancestor.get_species() ) # Mouse # Chimp # Dog # Human