def switch_hosts(self, t0, seed=None): """ Select an extant pathogen lineage at random and reassign its host :return: """ assert len( self.extant_h) > 1, "Error: attempted to switch between one host" if seed: random.seed(seed) pick_p = random.choice( self.extant_p) # select an extant pathogen lineage at random pick_h = pick_p.host while pick_h == pick_p.host: pick_h = random.choice(self.extant_h) # add a node of degree size 2 to annotate host switch event in tree pick_p.dist = t0 - pick_p.height next_p = Tree(name=pick_p.name + '_m%s-%sm' % (pick_p.host.name, pick_h.name), dist=0) next_p.add_features(host=pick_h, height=t0) pick_p.up = next_p next_p.children = [pick_p] self.extant_p.remove(pick_p) self.extant_p.append(next_p) self.not_extant_p.append(pick_p)
def evolveAlongTree(host, guest, reverseMap, rootSequence, hmmfile, emissionProbs, transmat): """ Evolves a root sequence along an entire host tree, taking into account the domain level events present in the guest tree (duplication, loss, speciation) Args: host (Tree) : The host tree (ete3 format) inside which the guest tree evolved guest (Tree) : The guest tree over which to evolve a sequence reverseMap (dict) : mapping from nodes in the host node -> guest nodes rootSequence (str): Initial sequence to evolve. Should contain sequence with ONE domain hmmfile (str ) : path to hmmfile used to identify domains emissionProbs : matrix with dimensions (n x 20) where n is the length of the domain. Each row contains the probability of each aa appearing at that position (in pfam hmm order) """ for node in host.traverse(): node.add_feature('sequence', "") for hostNode in host.traverse(): tempSequence = rootSequence if hostNode == host else hostNode.up.sequence #No events occured at this node if hostNode not in reverseMap: hostNode.sequence = evolveSequence(tempSequence, 0.05, hostNode.dist, \ emissionProbs, hmmfile, transmat) continue allGuestNodes = reverseMap[hostNode] allGuestNodesSet = set(allGuestNodes) upAncestors, leafChildren = {}, {} for guestNode in allGuestNodes: if guestNode.up not in allGuestNodesSet: upAncestors[guestNode] = guestNode.up #pass positional information on from the previous species if guestNode.up != None: guestNode.add_feature('position', guestNode.up.position) guestNode.up = None if guestNode.children != [] and guestNode.children[ 0] not in allGuestNodesSet: leafChildren[guestNode] = guestNode.children guestNode.children = [] if hostNode != host: t = Tree() t.dist = 0 t.children = upAncestors.keys() for guestNode in upAncestors.keys(): guestNode.up = t else: t = guest #Actually do the work tempSequence = domainOrder(tempSequence, .75, hmmfile, emissionProbs, t, hostNode.name, transmat) hostNode.sequence = tempSequence #Reconnect all root and leaf nodes to the rest of the guest tree for node in upAncestors: node.up = upAncestors[node] for node in leafChildren: node.children = leafChildren[node]
def buildGuestTree(host, dupRateFunc, dupfunc, eventDist, branchFunc, startSize): """ Creates a guest tree topology inside of the host tree given parameters Args: host (Tree): The host tree (ete3 format) to evolve inside dupRateFunc (func): A function that takes the current number of domains as input and returns the rate at which duplications occur dupfunc (func): A function which returns the size of a tandem duplication. Must be of the form dupfunc(x,y), where x and y are the min and max duplication sizes respectively eventDist (float): function that takes no arguments and returns the distance to the next evolutionary event branchFunc (func): takes eventDist as input, returns actual distance between two events startSize (int ): The number of leaves in the initial guest, prior to any evolutionary event. Returns: guest (Tree): The complete guest tree topology nodemap (dict): host -> guest mapping of nodes """ global dupnodes global dupevents global lossnodes dupnodes = 0 dupevents = 0 lossnodes = 0 guest = createTree(startSize) guest.name = "g" + host.name[1:] + "_0" guest.add_feature('pos', 0) nodemap = {} branchLength = 0 for node in host: node.add_feature('bl', node.dist) nodemap[node] = [] for hostNode in host.traverse(): if hostNode == host: #Root node branchLength = buildGuestNode(guest, dupRateFunc, dupfunc, hostNode.name, hostNode.dist, branchFunc, eventDist) hostNode.bl = branchLength nodemap[hostNode] = [node for node in guest.traverse()] hostNode.add_feature( 'leaves', [leaf for leaf in guest if leaf.pos != LOSS_CODE]) else: #Find last occurrence of domain nodes above me gleaves = hostNode.up.leaves hostNode.add_feature('leaves', []) if gleaves == []: continue newTrees = [] treepositions = [] #create a subtree for each gleaf for i in range(len(gleaves)): if gleaves[i].pos != LOSS_CODE: t = createNode() t.pos = gleaves[i].pos newTrees.append(t) treepositions.append(i) if len(treepositions) == 0: continue supernode = Tree() supernode.children = newTrees branchLength = buildGuestNode(supernode, dupRateFunc, dupfunc, hostNode.name, hostNode.dist, branchFunc, eventDist) hostNode.bl = branchLength hostNode.leaves += [ leaf for leaf in supernode if leaf != LOSS_CODE ] nodemap[hostNode] = [n for n in supernode.traverse()] nodemap[hostNode].pop(0) for i in range(len(treepositions)): gleaves[treepositions[i]].add_child(newTrees[i]) #clean(host, guest) #Only add this back in when clean problems are solved #print "GuestTreeGen: The cost is " + str(cost()) return guest, nodemap
def spr(tree, subtree, new_sibling): """ Performs a subtree prune and regraft operation moving a subtree to a new location in the tree Arguments: tree (Tree): The full tree in which to perform an spr move subtree (Tree): The subtree that will be pruned and grafted new_sibling (Tree): The new sibling of the subtree being moved >>> t = Tree('((A,D),(B,C));') >>> subtree = t&"A" >>> new_sibling = t&"B" >>> t = spr(t, subtree, new_sibling) >>> newtree = Tree('(D,(C,(B,A)));') >>> rf = newtree.robinson_foulds(t)[0] >>> assert(rf == 0) """ if tree == subtree: raise ValueError if subtree == new_sibling or subtree.up == new_sibling: raise ValueError if subtree.up == new_sibling.up: raise ValueError if tree.get_common_ancestor(subtree, new_sibling) == subtree: raise ValueError #CASE 1 (int -> int): if subtree.up != tree and new_sibling != tree: #Add node between new_sibling and its parent temp = Tree() temp.up = new_sibling.up temp.children = [subtree, new_sibling] new_sibling.up = temp if temp.up.children[0] == new_sibling: temp.up.children[0] = temp else: temp.up.children[1] = temp #Remove subtree from its current location old_parent = subtree.up subtree.up = temp temp.name = old_parent.name #Remove old parent ancestor = old_parent.up if old_parent.children[0] == subtree: other_child = old_parent.children[1] else: other_child = old_parent.children[0] other_child.up = ancestor if ancestor.children[0] == old_parent: ancestor.children[0] = other_child else: ancestor.children[1] = other_child #CASE 2 (cor -> int) elif subtree.up == tree: old_root = tree if tree.children[0] == subtree: tree = tree.children[1] else: tree = tree.children[0] tree.up = None old_root.up = new_sibling.up old_root.children = [subtree, new_sibling] new_sibling.up = old_root if old_root.up.children[0] == new_sibling: old_root.up.children[0] = old_root else: old_root.up.children[1] = old_root #CASE 3 (int -> root) else: temp = Tree() temp.up = None temp.children = [tree, subtree] tree.up = temp tree = temp temp.name = subtree.up.name old_parent = subtree.up subtree.up = tree #Remove old parent ancestor = old_parent.up if old_parent.children[0] == subtree: other_child = old_parent.children[1] else: other_child = old_parent.children[0] other_child.up = ancestor if ancestor.children[0] == old_parent: ancestor.children[0] = other_child else: ancestor.children[1] = other_child return tree
def generateGuestPoints(self, pipeWidth=75): #Add in loss nodes for leaf in self.guest: node = leaf while node != self.guest: host_me = self.nodemap[node] host_parent = self.nodemap[node.up] if len(node.children) == 2: lchild = self.nodemap[ node.children[0]] == host_me and self.nodemap[ node.children[1]] != host_me rchild = self.nodemap[ node.children[1]] == host_me and self.nodemap[ node.children[0]] != host_me if len(node.children) == 2 and (lchild or rchild): if self.nodemap[node.children[0]] == host_me: tofix = node.children[1] else: tofix = node.children[0] temp = Tree() temp.name = "L_" + node.name nodemap[temp] = host_me temp.up = tofix.up temp.children = [tofix] tofix.up = temp if tofix == node.children[0]: node.children[0] = temp else: node.children[1] = temp if host_me != host_parent and host_me.up != host_parent: #Add loss nodes in dist = host_parent.get_distance(host_me, topology_only=True) guest_parent = node.up curr = node for i in range(int(dist)): temp = Tree() temp.name = "L_" + str(i) + "_" + guest_parent.name nodemap[temp] = nodemap[curr].up temp.up = curr.up temp.children = [curr] curr.up = temp if curr == guest_parent.children[0]: guest_parent.children[0] = temp else: guest_parent.children[1] = temp curr = temp guest_parent = node else: node = node.up #Add levels for node in self.guest.traverse(): node.add_feature('level', -1) for leaf in self.guest: node = leaf node.level = 0 currmap = self.nodemap[node] currlevel = 0 node = node.up while node != None: mymap = self.nodemap[node] if mymap == currmap: node.level = max(node.level, currlevel + 1) else: node.level = max(node.level, 0) currlevel = node.level currmap = mymap node = node.up #How many points at each level of a node in the host tree? rmap = {} #map of host -> guest for key in self.nodemap: rkey = self.nodemap[key] if rkey in rmap: rmap[rkey].append(key) else: rmap[rkey] = [key] hostlevels = {} # hostnode -> levelcounts usedlevels = { } # same as hostlevels, but will count how many of each level have been used so far for key in rmap: nodes = rmap[key] maxlevel = 0 for node in nodes: maxlevel = max(maxlevel, node.level) levelsizes = [0 for _ in range(maxlevel + 1)] for node in nodes: levelsizes[node.level] += 1 hostlevels[key] = levelsizes usedlevels[key] = [0 for _ in range(maxlevel + 1)] #Generate Points - this only works for generateSpeciesTree2 for node in self.guest.traverse(): hostnode, level = self.nodemap[node], node.level used = usedlevels[hostnode][level] maxlevel = len(usedlevels[hostnode]) usedlevels[hostnode][level] += 1 bottom = hostnode.coord if hostnode == self.host: top = list(self.host.coord) top[1] -= 100 else: top = hostnode.up.coord ydiff = bottom[1] - top[1] yused = ydiff * level / maxlevel y = bottom[1] - yused xlow, xhigh = bottom[0], top[0] xmid = int(xlow + (xhigh - xlow) * (yused / float(ydiff))) xused = int(pipeWidth * 2 * (used + 1) / (float(hostlevels[hostnode][level]) + 1)) x = xmid + xused - pipeWidth node.add_feature('coord', (x, y))