Example #1
0
    def switch_hosts(self, t0, seed=None):
        """
        Select an extant pathogen lineage at random and reassign its host
        :return:
        """
        assert len(
            self.extant_h) > 1, "Error: attempted to switch between one host"
        if seed:
            random.seed(seed)
        pick_p = random.choice(
            self.extant_p)  # select an extant pathogen lineage at random
        pick_h = pick_p.host
        while pick_h == pick_p.host:
            pick_h = random.choice(self.extant_h)

        # add a node of degree size 2 to annotate host switch event in tree
        pick_p.dist = t0 - pick_p.height
        next_p = Tree(name=pick_p.name + '_m%s-%sm' %
                      (pick_p.host.name, pick_h.name),
                      dist=0)
        next_p.add_features(host=pick_h, height=t0)
        pick_p.up = next_p
        next_p.children = [pick_p]

        self.extant_p.remove(pick_p)
        self.extant_p.append(next_p)
        self.not_extant_p.append(pick_p)
Example #2
0
def evolveAlongTree(host, guest, reverseMap, rootSequence, hmmfile,
                    emissionProbs, transmat):
    """
    Evolves a root sequence along an entire host tree, taking into account the domain level 
    events present in the guest tree (duplication, loss, speciation)

    Args:
        host (Tree)       : The host tree (ete3 format) inside which the guest tree evolved
        guest (Tree)      : The guest tree over which to evolve a sequence 
        reverseMap (dict) : mapping from nodes in the host node -> guest nodes 
        rootSequence (str): Initial sequence to evolve. Should contain sequence with ONE domain
        hmmfile (str )    : path to hmmfile used to identify domains
        emissionProbs     : matrix with dimensions (n x 20) where n is the length of 
                            the domain. Each row contains the probability of each 
                            aa appearing at that position (in pfam hmm order) 
    """

    for node in host.traverse():
        node.add_feature('sequence', "")

    for hostNode in host.traverse():
        tempSequence = rootSequence if hostNode == host else hostNode.up.sequence

        #No events occured at this node
        if hostNode not in reverseMap:
            hostNode.sequence = evolveSequence(tempSequence, 0.05, hostNode.dist, \
                                    emissionProbs, hmmfile, transmat)
            continue

        allGuestNodes = reverseMap[hostNode]
        allGuestNodesSet = set(allGuestNodes)
        upAncestors, leafChildren = {}, {}

        for guestNode in allGuestNodes:
            if guestNode.up not in allGuestNodesSet:
                upAncestors[guestNode] = guestNode.up
                #pass positional information on from the previous species
                if guestNode.up != None:
                    guestNode.add_feature('position', guestNode.up.position)
                guestNode.up = None
            if guestNode.children != [] and guestNode.children[
                    0] not in allGuestNodesSet:
                leafChildren[guestNode] = guestNode.children
                guestNode.children = []

        if hostNode != host:
            t = Tree()
            t.dist = 0
            t.children = upAncestors.keys()
            for guestNode in upAncestors.keys():
                guestNode.up = t

        else:
            t = guest

        #Actually do the work
        tempSequence = domainOrder(tempSequence, .75, hmmfile, emissionProbs,
                                   t, hostNode.name, transmat)
        hostNode.sequence = tempSequence

        #Reconnect all root and leaf nodes to the rest of the guest tree
        for node in upAncestors:
            node.up = upAncestors[node]
        for node in leafChildren:
            node.children = leafChildren[node]
Example #3
0
def buildGuestTree(host, dupRateFunc, dupfunc, eventDist, branchFunc,
                   startSize):
    """
    Creates a guest tree topology inside of the host tree given parameters

    Args:
        host (Tree): The host tree (ete3 format) to evolve inside
        dupRateFunc (func): A function that takes the current number of domains as input
                            and returns the rate at which duplications occur
        dupfunc (func): A function which returns the size of a tandem duplication.
                        Must be of the form dupfunc(x,y), where x and y are the 
                        min and max duplication sizes respectively
        eventDist (float): function that takes no arguments and returns the distance to 
                          the next evolutionary event
        branchFunc (func): takes eventDist as input, returns actual distance between two events
        startSize (int ): The number of leaves in the initial guest, prior to any
                          evolutionary event.

    Returns:
        guest (Tree):   The complete guest tree topology
        nodemap (dict): host -> guest mapping of nodes
    """
    global dupnodes
    global dupevents
    global lossnodes

    dupnodes = 0
    dupevents = 0
    lossnodes = 0

    guest = createTree(startSize)
    guest.name = "g" + host.name[1:] + "_0"
    guest.add_feature('pos', 0)
    nodemap = {}
    branchLength = 0

    for node in host:
        node.add_feature('bl', node.dist)
        nodemap[node] = []

    for hostNode in host.traverse():
        if hostNode == host:  #Root node
            branchLength = buildGuestNode(guest, dupRateFunc, dupfunc,
                                          hostNode.name, hostNode.dist,
                                          branchFunc, eventDist)
            hostNode.bl = branchLength
            nodemap[hostNode] = [node for node in guest.traverse()]
            hostNode.add_feature(
                'leaves', [leaf for leaf in guest if leaf.pos != LOSS_CODE])

        else:
            #Find last occurrence of domain nodes above me
            gleaves = hostNode.up.leaves
            hostNode.add_feature('leaves', [])
            if gleaves == []:
                continue
            newTrees = []
            treepositions = []

            #create a subtree for each gleaf
            for i in range(len(gleaves)):
                if gleaves[i].pos != LOSS_CODE:
                    t = createNode()
                    t.pos = gleaves[i].pos
                    newTrees.append(t)
                    treepositions.append(i)
            if len(treepositions) == 0:
                continue
            supernode = Tree()
            supernode.children = newTrees

            branchLength = buildGuestNode(supernode, dupRateFunc, dupfunc,
                                          hostNode.name, hostNode.dist,
                                          branchFunc, eventDist)
            hostNode.bl = branchLength

            hostNode.leaves += [
                leaf for leaf in supernode if leaf != LOSS_CODE
            ]
            nodemap[hostNode] = [n for n in supernode.traverse()]
            nodemap[hostNode].pop(0)

            for i in range(len(treepositions)):
                gleaves[treepositions[i]].add_child(newTrees[i])

    #clean(host, guest) #Only add this back in when clean problems are solved
    #print "GuestTreeGen: The cost is " + str(cost())
    return guest, nodemap
Example #4
0
def spr(tree, subtree, new_sibling):
    """
    Performs a subtree prune and regraft operation moving a subtree to 
    a new location in the tree

    Arguments:
    tree (Tree): The full tree in which to perform an spr move
    subtree (Tree): The subtree that will be pruned and grafted
    new_sibling (Tree): The new sibling of the subtree being moved

    >>> t = Tree('((A,D),(B,C));')
    >>> subtree = t&"A"
    >>> new_sibling = t&"B"
    >>> t = spr(t, subtree, new_sibling)
    >>> newtree = Tree('(D,(C,(B,A)));')
    >>> rf = newtree.robinson_foulds(t)[0]
    >>> assert(rf == 0)
    """
    
    if tree == subtree:
        raise ValueError
    if subtree == new_sibling or subtree.up == new_sibling:
        raise ValueError
    if subtree.up == new_sibling.up:
        raise ValueError
    if tree.get_common_ancestor(subtree, new_sibling) == subtree:
        raise ValueError

    #CASE 1 (int -> int):
    if subtree.up != tree and new_sibling != tree:
        #Add node between new_sibling and its parent
        temp = Tree()
        temp.up = new_sibling.up
        temp.children = [subtree, new_sibling]
        new_sibling.up = temp
        if temp.up.children[0] == new_sibling:
            temp.up.children[0] = temp
        else:
            temp.up.children[1] = temp

        #Remove subtree from its current location
        old_parent = subtree.up
        subtree.up = temp
        temp.name = old_parent.name

        #Remove old parent
        ancestor = old_parent.up
        if old_parent.children[0] == subtree:
            other_child = old_parent.children[1]
        else:
            other_child = old_parent.children[0]

        other_child.up = ancestor
        if ancestor.children[0] == old_parent:
            ancestor.children[0] = other_child
        else:
            ancestor.children[1] = other_child

    #CASE 2 (cor -> int)
    elif subtree.up == tree:

        old_root = tree
        if tree.children[0] == subtree:
            tree = tree.children[1]
        else:
            tree = tree.children[0]
        tree.up = None

        old_root.up = new_sibling.up
        old_root.children = [subtree, new_sibling]
        new_sibling.up = old_root
        if old_root.up.children[0] == new_sibling:
            old_root.up.children[0] = old_root
        else:
            old_root.up.children[1] = old_root

    #CASE 3 (int -> root)
    else:

        temp = Tree()
        temp.up = None
        temp.children = [tree, subtree]
        tree.up = temp
        tree = temp
        temp.name = subtree.up.name

        old_parent = subtree.up
        subtree.up = tree

        #Remove old parent
        ancestor = old_parent.up
        if old_parent.children[0] == subtree:
            other_child = old_parent.children[1]
        else:
            other_child = old_parent.children[0]

        other_child.up = ancestor
        if ancestor.children[0] == old_parent:
            ancestor.children[0] = other_child
        else:
            ancestor.children[1] = other_child

    return tree
Example #5
0
    def generateGuestPoints(self, pipeWidth=75):

        #Add in loss nodes
        for leaf in self.guest:
            node = leaf
            while node != self.guest:
                host_me = self.nodemap[node]
                host_parent = self.nodemap[node.up]
                if len(node.children) == 2:
                    lchild = self.nodemap[
                        node.children[0]] == host_me and self.nodemap[
                            node.children[1]] != host_me
                    rchild = self.nodemap[
                        node.children[1]] == host_me and self.nodemap[
                            node.children[0]] != host_me
                if len(node.children) == 2 and (lchild or rchild):
                    if self.nodemap[node.children[0]] == host_me:
                        tofix = node.children[1]
                    else:
                        tofix = node.children[0]
                    temp = Tree()
                    temp.name = "L_" + node.name
                    nodemap[temp] = host_me
                    temp.up = tofix.up
                    temp.children = [tofix]
                    tofix.up = temp
                    if tofix == node.children[0]:
                        node.children[0] = temp
                    else:
                        node.children[1] = temp
                if host_me != host_parent and host_me.up != host_parent:
                    #Add loss nodes in
                    dist = host_parent.get_distance(host_me,
                                                    topology_only=True)
                    guest_parent = node.up
                    curr = node
                    for i in range(int(dist)):
                        temp = Tree()
                        temp.name = "L_" + str(i) + "_" + guest_parent.name
                        nodemap[temp] = nodemap[curr].up
                        temp.up = curr.up
                        temp.children = [curr]
                        curr.up = temp
                        if curr == guest_parent.children[0]:
                            guest_parent.children[0] = temp
                        else:
                            guest_parent.children[1] = temp
                        curr = temp
                    guest_parent = node
                else:
                    node = node.up

        #Add levels
        for node in self.guest.traverse():
            node.add_feature('level', -1)

        for leaf in self.guest:
            node = leaf
            node.level = 0
            currmap = self.nodemap[node]
            currlevel = 0
            node = node.up
            while node != None:
                mymap = self.nodemap[node]
                if mymap == currmap:
                    node.level = max(node.level, currlevel + 1)
                else:
                    node.level = max(node.level, 0)
                currlevel = node.level
                currmap = mymap
                node = node.up

        #How many points at each level of a node in the host tree?
        rmap = {}  #map of host -> guest
        for key in self.nodemap:
            rkey = self.nodemap[key]
            if rkey in rmap:
                rmap[rkey].append(key)
            else:
                rmap[rkey] = [key]

        hostlevels = {}  # hostnode -> levelcounts
        usedlevels = {
        }  # same as hostlevels, but will count how many of each level have been used so far
        for key in rmap:
            nodes = rmap[key]

            maxlevel = 0
            for node in nodes:
                maxlevel = max(maxlevel, node.level)

            levelsizes = [0 for _ in range(maxlevel + 1)]
            for node in nodes:
                levelsizes[node.level] += 1

            hostlevels[key] = levelsizes
            usedlevels[key] = [0 for _ in range(maxlevel + 1)]

        #Generate Points - this only works for generateSpeciesTree2
        for node in self.guest.traverse():
            hostnode, level = self.nodemap[node], node.level
            used = usedlevels[hostnode][level]
            maxlevel = len(usedlevels[hostnode])
            usedlevels[hostnode][level] += 1

            bottom = hostnode.coord
            if hostnode == self.host:
                top = list(self.host.coord)
                top[1] -= 100
            else:
                top = hostnode.up.coord

            ydiff = bottom[1] - top[1]
            yused = ydiff * level / maxlevel
            y = bottom[1] - yused

            xlow, xhigh = bottom[0], top[0]
            xmid = int(xlow + (xhigh - xlow) * (yused / float(ydiff)))
            xused = int(pipeWidth * 2 * (used + 1) /
                        (float(hostlevels[hostnode][level]) + 1))
            x = xmid + xused - pipeWidth
            node.add_feature('coord', (x, y))