def get_evol_events_from_root(node, sos_thr): """ Returns a list of **all** duplication and speciation events detected after this node. Nodes are assumed to be duplications when a species overlap is found between its child linages. Method is described more detail in: "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon T. Genome Biol. 2007;8(6):R109. """ # Get the tree's root root = node.get_tree_root() # Checks that is actually rooted outgroups = root.get_children() if len(outgroups) != 2: raise TypeError, "Tree is not rooted" # Cautch the smaller outgroup (will be stored as the tree outgroup) o1 = set([n.name for n in outgroups[0].get_leaves()]) o2 = set([n.name for n in outgroups[1].get_leaves()]) if len(o2)<len(o1): smaller_outg = outgroups[1] else: smaller_outg = outgroups[0] # Get family size fSize = len( [n for n in root.get_leaves()] ) # Clean data from previous analyses for n in root.get_descendants()+[root]: n.del_feature("evoltype") # Gets Prepared to browse the tree from root to leaves to_visit = [] current = root all_events = [] while current: # Gets childs and appends them to the To_visit list childs = current.get_children() to_visit += childs if len(childs)>2: raise TypeError, "nodes are expected to have two childs." elif len(childs)==0: pass # leaf else: # Get leaves and species at both sides of event sideA_leaves= set([n for n in childs[0].get_leaves()]) sideB_leaves= set([n for n in childs[1].get_leaves()]) sideA_spcs = set([n.species for n in childs[0].get_leaves()]) sideB_spcs = set([n.species for n in childs[1].get_leaves()]) # Calculates species overlap overlaped_spcs = sideA_spcs & sideB_spcs all_spcs = sideA_spcs | sideB_spcs score = float(len(overlaped_spcs))/len(all_spcs) # Creates a new evolEvent event = EvolEvent() event.fam_size = fSize event.branch_supports = [current.support, current.children[0].support, current.children[1].support] # event.seed = leafName # event.e_newick = current.up.get_newick() # high mem usage!! event.sos = score event.outgroup_spcs = smaller_outg.get_species() event.in_seqs = set([n.name for n in sideA_leaves]) event.out_seqs = set([n.name for n in sideB_leaves]) event.inparalogs = set([n.name for n in sideA_leaves]) # If species overlap: duplication if score >sos_thr: event.node = current event.etype = "D" event.outparalogs = set([n.name for n in sideB_leaves]) event.orthologs = set([]) current.add_feature("evoltype","D") # If NO species overlap: speciation else: event.node = current event.etype = "S" event.orthologs = set([n.name for n in sideB_leaves]) event.outparalogs = set([]) current.add_feature("evoltype","S") all_events.append(event) # Keep visiting nodes try: current = to_visit.pop(0) except IndexError: current = None return all_events
def get_evol_events_from_root(node, sos_thr): """ Returns a list of **all** duplication and speciation events detected after this node. Nodes are assumed to be duplications when a species overlap is found between its child linages. Method is described more detail in: "The Human Phylome." Huerta-Cepas J, Dopazo H, Dopazo J, Gabaldon T. Genome Biol. 2007;8(6):R109. """ # Get the tree's root root = node.get_tree_root() # Checks that is actually rooted outgroups = root.get_children() if len(outgroups) != 2: raise TypeError, "Tree is not rooted" # Cautch the smaller outgroup (will be stored as the tree outgroup) o1 = set([n.name for n in outgroups[0].get_leaves()]) o2 = set([n.name for n in outgroups[1].get_leaves()]) if len(o2) < len(o1): smaller_outg = outgroups[1] else: smaller_outg = outgroups[0] # Get family size fSize = len([n for n in root.get_leaves()]) # Clean data from previous analyses for n in root.get_descendants() + [root]: n.del_feature("evoltype") # Gets Prepared to browse the tree from root to leaves to_visit = [] current = root all_events = [] while current: # Gets childs and appends them to the To_visit list childs = current.get_children() to_visit += childs if len(childs) > 2: raise TypeError, "nodes are expected to have two childs." elif len(childs) == 0: pass # leaf else: # Get leaves and species at both sides of event sideA_leaves = set([n for n in childs[0].get_leaves()]) sideB_leaves = set([n for n in childs[1].get_leaves()]) sideA_spcs = set([n.species for n in childs[0].get_leaves()]) sideB_spcs = set([n.species for n in childs[1].get_leaves()]) # Calculates species overlap overlaped_spcs = sideA_spcs & sideB_spcs all_spcs = sideA_spcs | sideB_spcs score = float(len(overlaped_spcs)) / len(all_spcs) # Creates a new evolEvent event = EvolEvent() event.fam_size = fSize event.branch_supports = [ current.support, current.children[0].support, current.children[1].support ] # event.seed = leafName # event.e_newick = current.up.get_newick() # high mem usage!! event.sos = score event.outgroup_spcs = smaller_outg.get_species() event.in_seqs = set([n.name for n in sideA_leaves]) event.out_seqs = set([n.name for n in sideB_leaves]) event.inparalogs = set([n.name for n in sideA_leaves]) # If species overlap: duplication if score > sos_thr: event.node = current event.etype = "D" event.outparalogs = set([n.name for n in sideB_leaves]) event.orthologs = set([]) current.add_feature("evoltype", "D") # If NO species overlap: speciation else: event.node = current event.etype = "S" event.orthologs = set([n.name for n in sideB_leaves]) event.outparalogs = set([]) current.add_feature("evoltype", "S") all_events.append(event) # Keep visiting nodes try: current = to_visit.pop(0) except IndexError: current = None return all_events