コード例 #1
0
ファイル: simplerecon.py プロジェクト: maggishaggy/dlcpar
    def __init__(self,
                 tree,
                 stree,
                 gene2species,
                 dupcost=1,
                 losscost=1,
                 coalcost=1,
                 implied=True,
                 init_locus_tree=None,
                 name_internal="n",
                 log=sys.stdout):

        # rename input tree nodes
        common.rename_nodes(tree, name_internal)

        self.coal_tree = tree
        self.stree = stree
        self.gene2species = gene2species

        self.dupcost = dupcost
        self.losscost = losscost
        self.coalcost = coalcost
        self.implied = implied

        self.name_internal = name_internal
        self.log_stream = log
        self.init_locus_tree = init_locus_tree \
                               if init_locus_tree else tree.copy()

        self.proposer = DLCReconProposer(tree, stree, gene2species)
コード例 #2
0
ファイル: simplerecon.py プロジェクト: maggishaggy/dlcpar
    def recon(self, nsearch=1000, noimprov=None):
        """Perform reconciliation"""

        self.init_search()
        proposal = self.proposer.init_proposal()
        self.maxrecon = proposal.copy()
        best_cost = util.INF
        i_best_cost = -1
        for i in xrange(nsearch):
            ##            if i % 10 == 0:
            ##                print "search", i

            ##            util.tic("eval")
            cost = self.eval_proposal(proposal)
            if cost < best_cost:
                best_cost = cost
                i_best_cost = i
            if noimprov != None and (i - i_best_cost) >= noimprov:
                break
##            util.print_dict(proposal.data)
##            print '\t'.join(map(lambda key: str(proposal.data[key]),
##                                ("cost", "ndup", "nloss", "ncoal")))
##            util.toc()

##            util.tic("prop")
            self.eval_search(cost, proposal)
            proposal = self.proposer.next_proposal()


##            util.toc()

# rename locus tree nodes
        common.rename_nodes(self.maxrecon.locus_tree, self.name_internal)

        return self.maxrecon
コード例 #3
0
ファイル: reconscape.py プロジェクト: bzhanghmc/dlcpar
    def __init__(self, gtree, stree, gene2species, gene2locus=None,
                 duprange=DEFAULT_RANGE, lossrange=DEFAULT_RANGE,
                 max_loci=INF, max_dups=INF, max_losses=INF,
                 name_internal="n", log=sys.stdout):

        # rename gene tree nodes
        common.rename_nodes(gtree, name_internal)

        self.gtree = gtree
        self.stree = stree
        self.gene2species = gene2species
        self.gene2locus = gene2locus

        dup_min, dup_max = duprange
        loss_min, loss_max = lossrange
        assert (dup_min > 0) and (dup_max > 0) and (dup_min < dup_max) and \
               (loss_min > 0) and (loss_max > 0) and (loss_min < loss_max)
        self.duprange = duprange
        self.lossrange = lossrange

        self.implied = True
        self.delay = False
        self.prescreen = False

        assert (max_loci > 0) and (max_dups > 0) and (max_losses > 0)
        self.max_loci = max_loci
        self.max_dups = max_dups
        self.max_losses = max_losses

        self.name_internal = name_internal
        self.log = util.Timer(log)
コード例 #4
0
ファイル: simplerecon.py プロジェクト: maggishaggy/dlcpar
    def next_proposal(self):

        if len(self._locus_search.get_tree().leaves()) <= 2:
            return self._recon

        # if locus_tree has not yet been accepted, then revert it
        if not self._accept_locus:
            self._locus_search.revert()

        # propose new locus_tree
        self._locus_search.propose()
        self._accept_locus = False
        locus_tree = self._locus_search.get_tree().copy()

        # TODO: make recon root optional
        phylo.recon_root(locus_tree,
                         self._stree,
                         self._gene2species,
                         newCopy=False)
        common.rename_nodes(locus_tree)

        # propose remaining parts of dlcoal recon
        self._recon = self._recon_lca(locus_tree)

        return self._recon
コード例 #5
0
ファイル: simplerecon.py プロジェクト: wutron/dlcpar
    def recon(self, nsearch=1000, nconverge=None):
        """Perform reconciliation"""

        # initialize
        self.init_search()
        proposal = self.proposer.init_proposal()
        self.maxrecon = proposal.copy()

        # keep track of convergence
        if nconverge:
            iconverge = 0
            mincost = util.INF

        # search
        for i in xrange(nsearch):
##            if i % 10 == 0:
##                print "search", i

##            util.tic("eval")
            # evaluate cost of proposal
            cost = self.eval_proposal(proposal)
##            util.print_dict(proposal.data)
##            print '\t'.join(map(lambda key: str(proposal.data[key]),
##                                ("cost", "ndup", "nloss", "ncoal")))
##            util.toc()

##            util.tic("prop")
            # update maxrecon based on accepting / rejecting proposal
            self.eval_search(cost, proposal)

            # stop if converged
            # why not check accept? because can toggle between
            # multiple optimal solutions with the same cost
            if nconverge:
                if cost < mincost:
                    iconverge = 0
                    mincost = cost
                else:
                    iconverge += 1
                    if iconverge == nconverge:
                        break

            # make new proposal
            proposal = self.proposer.next_proposal()
##            util.toc()

        # rename locus tree nodes
        common.rename_nodes(self.maxrecon.locus_tree, self.name_internal)

        return self.maxrecon
コード例 #6
0
    def recon(self):
        """Perform reconciliation"""

        self.log.start("Reconciling")

        # log input gene and species trees
        self.log.log("gene tree\n")
        log_tree(self.gtree, self.log, func=treelib.draw_tree_names)
        self.log.log("species tree\n")
        log_tree(self.stree, self.log, func=treelib.draw_tree_names)

        # infer species map
        self._infer_species_map()
        self.log.log("\n\n")

        # add implied speciation nodes but first start the species tree at the right root
        substree = treelib.subtree(self.stree, self.srecon[self.gtree.root])
        subrecon = util.mapdict(self.srecon,
                                val=lambda snode: substree.nodes[snode.name])

        # switch internal storage with subtrees
        self.stree, subtree = substree, self.stree
        self.srecon, subrecon = subrecon, self.srecon

        # add implied nodes (standard speciation, speciation from duplication, delay nodes)
        # then relabel events (so that factor_tree works)
        reconlib.add_implied_nodes(self.gtree,
                                   self.stree,
                                   self.srecon,
                                   self.sevents,
                                   delay=self.delay)
        self.sevents = phylo.label_events(self.gtree, self.srecon)
        common.rename_nodes(self.gtree, self.name_internal)

        # log gene tree (with species map)
        self.log.log("gene tree (with species map)\n")
        log_tree(self.gtree,
                 self.log,
                 func=draw_tree_srecon,
                 srecon=self.srecon)

        # infer locus map
        self._infer_locus_map()

        self.log.stop()

        return self.count_vectors
コード例 #7
0
ファイル: reconscape.py プロジェクト: bzhanghmc/dlcpar
    def recon(self):
        """Perform reconciliation"""

        self.log.start("Reconciling")

        # log input gene and species trees
        self.log.log("gene tree\n")
        log_tree(self.gtree, self.log, func=treelib.draw_tree_names)
        self.log.log("species tree\n")
        log_tree(self.stree, self.log, func=treelib.draw_tree_names)

        # infer species map
        self._infer_species_map()
        self.log.log("\n\n")

        # add implied speciation nodes but first start the species tree at the right root
        substree = treelib.subtree(self.stree, self.srecon[self.gtree.root])
        subrecon = util.mapdict(self.srecon, val=lambda snode: substree.nodes[snode.name])

        # switch internal storage with subtrees
        self.stree, subtree = substree, self.stree
        self.srecon, subrecon = subrecon, self.srecon

        # add implied nodes (standard speciation, speciation from duplication, delay nodes)
        # then relabel events (so that factor_tree works)
        reconlib.add_implied_nodes(self.gtree, self.stree, self.srecon, self.sevents, delay=self.delay)
        self.sevents = phylo.label_events(self.gtree, self.srecon)
        common.rename_nodes(self.gtree, self.name_internal)

        # log gene tree (with species map)
        self.log.log("gene tree (with species map)\n")
        log_tree(self.gtree, self.log, func=draw_tree_srecon, srecon=self.srecon)

        # infer locus map
        self._infer_locus_map()

        self.log.stop()

        return self.count_vectors
コード例 #8
0
    def __init__(self,
                 gtree,
                 stree,
                 gene2species,
                 gene2locus=None,
                 duprange=DEFAULT_RANGE,
                 lossrange=DEFAULT_RANGE,
                 max_loci=INF,
                 max_dups=INF,
                 max_losses=INF,
                 name_internal="n",
                 log=sys.stdout):

        # rename gene tree nodes
        common.rename_nodes(gtree, name_internal)

        self.gtree = gtree
        self.stree = stree
        self.gene2species = gene2species
        self.gene2locus = gene2locus

        dup_min, dup_max = duprange
        loss_min, loss_max = lossrange
        assert (dup_min > 0) and (dup_max > 0) and (dup_min < dup_max) and \
               (loss_min > 0) and (loss_max > 0) and (loss_min < loss_max)
        self.duprange = duprange
        self.lossrange = lossrange

        self.implied = True
        self.delay = False
        self.prescreen = False

        assert (max_loci > 0) and (max_dups > 0) and (max_losses > 0)
        self.max_loci = max_loci
        self.max_dups = max_dups
        self.max_losses = max_losses

        self.name_internal = name_internal
        self.log = util.Timer(log)
コード例 #9
0
ファイル: simplerecon.py プロジェクト: wutron/dlcpar
    def __init__(self, tree, stree, gene2species,
                 dupcost=1, losscost=1, coalcost=1, implied=True,
                 init_locus_tree=None,
                 name_internal="n", log=sys.stdout):

        # rename input tree nodes
        common.rename_nodes(tree, name_internal)

        self.coal_tree = tree
        self.stree = stree
        self.gene2species = gene2species

        self.dupcost = dupcost
        self.losscost = losscost
        self.coalcost = coalcost
        self.implied = implied

        self.name_internal = name_internal
        self.log_stream = log
        self.init_locus_tree = init_locus_tree \
                               if init_locus_tree else tree.copy()

        self.proposer = DLCReconProposer(tree, stree, gene2species)
コード例 #10
0
ファイル: simplerecon.py プロジェクト: wutron/dlcpar
    def next_proposal(self):

        if len(self._locus_search.get_tree().leaves()) <= 2:
            return self._recon

        # if locus_tree has not yet been accepted, then revert it
        if not self._accept_locus:
            self._locus_search.revert()

        # propose new locus_tree
        self._locus_search.propose()
        self._accept_locus = False
        locus_tree = self._locus_search.get_tree().copy()

        # TODO: make recon root optional
        phylo.recon_root(locus_tree, self._stree,
                         self._gene2species,
                         newCopy=False)
        common.rename_nodes(locus_tree)

        # propose remaining parts of dlcoal recon
        self._recon = self._recon_lca(locus_tree)

        return self._recon
コード例 #11
0
ファイル: reconlib.py プロジェクト: bzhanghmc/dlcpar
def labeledrecon_to_recon(gene_tree, labeled_recon, stree,
                          name_internal="n"):
    """Convert from DLCpar to DLCoal reconciliation model

    NOTE: This is non-reversible because it produces NON-dated coalescent and locus trees
    """

    locus_map = labeled_recon.locus_map
    species_map = labeled_recon.species_map
    order = labeled_recon.order

    # coalescent tree equals gene tree
    coal_tree = gene_tree.copy()

    # factor gene tree
    events = phylo.label_events(gene_tree, species_map)
    subtrees = factor_tree(gene_tree, stree, species_map, events)

    # gene names
    genenames = {}
    for snode in stree:
        genenames[snode] = {}
    for leaf in gene_tree.leaves():
        genenames[species_map[leaf]][locus_map[leaf]] = leaf.name

    # 2D dict to keep track of locus tree nodes by hashing by speciation node and locus
    # key1 = snode, key2 = locus, value = list of nodes (sorted from oldest to most recent)
    locus_tree_map = {}
    for snode in stree:
        locus_tree_map[snode] = {}

    # initialize locus tree, coal/locus recon, and daughters
    locus_tree = treelib.Tree()
    coal_recon = {}
    locus_recon = {}
    locus_events = {}
    daughters = []

    # initialize root of locus tree
    root = treelib.TreeNode(locus_tree.new_name())
    locus_tree.add(root)
    locus_tree.root = root
    sroot = species_map[gene_tree.root]
    locus = locus_map[gene_tree.root]
    coal_recon[coal_tree.root] = root
    locus_recon[root] = sroot
    locus_tree_map[sroot][locus] = [root]

    # build locus tree along each species branch
    for snode in stree.preorder(sroot):
        subtrees_snode = subtrees[snode]

        # skip if no branches in this species branch
        if len(subtrees_snode) == 0:
            continue

        # build locus tree
        # 1) speciation
        if snode.parent:
            for (root, rootchild, leaves) in subtrees_snode:
                if rootchild:
                    locus = locus_map[root]     # use root locus!

                    # create new locus tree node in this species branch
                    if locus not in locus_tree_map[snode]:
                        old_node = locus_tree_map[snode.parent][locus][-1]

                        new_node = treelib.TreeNode(locus_tree.new_name())
                        locus_tree.add_child(old_node, new_node)
                        locus_recon[new_node] = snode

                        locus_events[old_node] = "spec"

                        locus_tree_map[snode][locus] = [new_node]

                    # update coal_recon
                    cnode = coal_tree.nodes[rootchild.name]
                    lnode = locus_tree_map[snode][locus][-1]
                    coal_recon[cnode] = lnode

        # 2) duplication
        if snode in order:
            # may have to reorder loci (in case of multiple duplications)
            queue = collections.deque(order[snode].keys())
            while len(queue) > 0:
                plocus = queue.popleft()
                if plocus not in locus_tree_map[snode]:
                    # punt
                    queue.append(plocus)
                    continue

                # handle this ordered list
                lst =  order[snode][plocus]
                for gnode in lst:
                    locus = locus_map[gnode]
                    cnode = coal_tree.nodes[gnode.name]

                    if locus != plocus:     # duplication
                        # update locus_tree, locus_recon, and daughters
                        old_node = locus_tree_map[snode][plocus][-1]

                        new_node1 = treelib.TreeNode(locus_tree.new_name())
                        locus_tree.add_child(old_node, new_node1)
                        locus_recon[new_node1] = snode

                        new_node2 = treelib.TreeNode(locus_tree.new_name())
                        locus_tree.add_child(old_node, new_node2)
                        coal_recon[cnode] = new_node2
                        locus_recon[new_node2] = snode
                        daughters.append(new_node2)

                        locus_events[old_node] = "dup"

                        locus_tree_map[snode][plocus].append(new_node1)
                        locus_tree_map[snode][locus] = [new_node2]

                    else:                   # deep coalescence
                        lnode = locus_tree_map[snode][locus][-1]
                        coal_recon[cnode] = lnode

        # reconcile remaining coal tree nodes to locus tree
        # (no duplication so only a single locus tree node with the desired locus)
        for (root, rootchild, leaves) in subtrees_snode:
            if rootchild:
                for gnode in gene_tree.preorder(rootchild, is_leaf=lambda x: x in leaves):
                    cnode = coal_tree.nodes[gnode.name]
                    if cnode not in coal_recon:
                        locus = locus_map[gnode]
                        assert len(locus_tree_map[snode][locus]) == 1
                        lnode = locus_tree_map[snode][locus][-1]
                        coal_recon[cnode] = lnode

        # tidy up if at an extant species
        if snode.is_leaf():
            for locus, nodes in locus_tree_map[snode].iteritems():
                genename = genenames[snode][locus]
                lnode = nodes[-1]
                cnode = coal_tree.nodes[genename]

                # relabel genes in locus tree
                locus_tree.rename(lnode.name, genename)

                # relabel locus events
                locus_events[lnode] = "gene"

                # reconcile genes (genes in coal tree reconcile to genes in locus tree)
                # possible mismatch due to genes having an internal ordering even though all exist to present time
                # [could also do a new round of "speciation" at bottom of extant species branches,
                # but this introduces single children nodes that would just be removed anyway]
                coal_recon[cnode] = lnode

    # rename internal nodes
    common.rename_nodes(locus_tree, name_internal)

    # simplify coal_tree (and reconciliations)
    removed = treelib.remove_single_children(coal_tree)
    for cnode in removed:
        del coal_recon[cnode]

    # simplify locus_tree (and reconciliations + daughters)
    removed = treelib.remove_single_children(locus_tree)
    for cnode, lnode in coal_recon.items():
        if lnode in removed:
            # reconciliation updates to first child that is not removed
            new_lnode = lnode
            while new_lnode in removed:
                new_lnode = new_lnode.children[0]
            coal_recon[cnode] = new_lnode
    for lnode in removed:
        del locus_recon[lnode]
        del locus_events[lnode]
    for ndx, lnode in enumerate(daughters):
        if lnode in removed:
            # daughter updates to first child that is not removed
            new_lnode = lnode
            while new_lnode in removed:
                new_lnode = new_lnode.children[0]
            daughters[ndx] = new_lnode

##    locus_events = phylo.label_events(locus_tree, locus_recon)
    assert all([lnode in locus_events for lnode in locus_tree])

    #========================================
    # put everything together

    return coal_tree, phyloDLC.Recon(coal_recon, locus_tree, locus_recon, locus_events, daughters)
コード例 #12
0
ファイル: reconlib.py プロジェクト: bzhanghmc/dlcpar
def recon_to_labeledrecon(coal_tree, recon, stree, gene2species,
                          name_internal="n", locus_mpr=True):
    """Convert from DLCoal to DLCpar reconciliation model

    If locus_mpr is set (default), use MPR from locus_tree to stree.
    """

    gene_tree = coal_tree.copy()
    coal_recon = recon.coal_recon
    locus_tree = recon.locus_tree
    if not locus_mpr:
        locus_recon = recon.locus_recon
        daughters = recon.daughters
    else:
        locus_recon = phylo.reconcile(locus_tree, stree, gene2species)
        locus_events = phylo.label_events(locus_tree, locus_recon)
        daughters = filter(lambda node: locus_events[node.parent] == "dup", recon.daughters)

    #========================================
    # find species map

    # find species tree subtree
    substree = treelib.subtree(stree, locus_recon[coal_recon[coal_tree.root]])

    # find species map
    species_map = {}
    for node in gene_tree:
        cnode = coal_tree.nodes[node.name]
        lnode = coal_recon[cnode]
        snode = locus_recon[lnode]
        species_map[node] = substree[snode.name]

    # add implied speciation and delay nodes to gene tree
    events = phylo.label_events(gene_tree, species_map)
    added_spec, added_dup, added_delay = add_implied_nodes(gene_tree, substree, species_map, events)

    # rename internal nodes
    common.rename_nodes(gene_tree, name_internal)

    #========================================
    # helper functions

    def walk_up(node):
        if node.name in coal_tree.nodes:
            return coal_tree.nodes[node.name]
        return walk_up(node.parent)

    def walk_down(node):
        if node.name in coal_tree.nodes:
            return coal_tree.nodes[node.name]
        assert len(node.children) == 1, (node.name, node.children)
        return walk_down(node.children[0])

    #========================================
    # find locus map

    # label loci in locus tree
    loci = {}
    next = 1
    # keep track of duplication ages (measured as dist from leaf since root dist may differ in coal and locus trees)
    locus_times = treelib.get_tree_ages(locus_tree)
    dup_times = {}
    dup_snodes = {}
    for lnode in locus_tree.preorder():
        if not lnode.parent:            # root
            loci[lnode] = next
        elif lnode in daughters:        # duplication
            next += 1
            loci[lnode] = next
            dup_times[next] = locus_times[lnode.parent]
            dup_snodes[next] = locus_recon[lnode.parent]
        else:                           # regular node
            loci[lnode] = loci[lnode.parent]

    # label loci in gene tree
    locus_map = {}
    for node in gene_tree:
        if node.name in coal_tree.nodes:
            # node in coal tree
            cnode = coal_tree.nodes[node.name]
            lnode = coal_recon[cnode]
            locus_map[node] = loci[lnode]
        else:
            # node not in coal tree, so use either parent or child locus
            cnode_up = walk_up(node)
            lnode_up = coal_recon[cnode_up]
            loci_up = loci[lnode_up]

            cnode_down = walk_down(node)
            lnode_down = coal_recon[cnode_down]
            loci_down = loci[lnode_down]

            if loci_up == loci_down:
                # parent and child locus match
                locus_map[node] = loci_up
            else:
                # determine whether to use parent or child locus
                snode = species_map[node]
                dup_snode = dup_snodes[loci_down]
                if (snode.name == dup_snode.name) or (snode.name in dup_snode.descendant_names()):
                    locus_map[node] = loci_down
                else:
                    locus_map[node] = loci_up

    #========================================
    # find order

    # find loci that give rise to new loci in each sbranch
    parent_loci = set()
    for node in gene_tree:
        if node.parent:
            locus = locus_map[node]
            plocus = locus_map[node.parent]

            if locus != plocus:
                snode = species_map[node]
                parent_loci.add((snode, plocus))

    # find order (locus tree and coal tree must use same timescale)
    order = {}
    for node in gene_tree:
        if node.parent:
            snode = species_map[node]
            plocus = locus_map[node.parent]

            if (snode, plocus) in parent_loci:
                order.setdefault(snode, {})
                order[snode].setdefault(plocus, [])
                order[snode][plocus].append(node)

    # find coalescent/duplication times (= negative age) and depths
    coal_times = treelib.get_tree_ages(coal_tree)
    depths = get_tree_depths(gene_tree, distfunc=lambda node: 1)
    def get_time(node):
        if locus_map[node.parent] != locus_map[node]:
            # duplication
            return -dup_times[locus_map[node]], depths[node]
        else:
            # walk up to the nearest node in the coal tree
            # if the node was added (due to spec or dup), it has a single child
            # so it can be placed directly after its parent without affecting the extra lineage count
            if node.name in coal_tree.nodes:
                cnode = coal_tree.nodes[node.name]
            else:
                cnode = walk_up(node)
            return -coal_times[cnode], depths[node]

    # sort by node times
    # 1) larger age (smaller dist from root) are earlier in sort
    # 2) if equal dist, then smaller depths are earlier in sort
    for snode, d in order.iteritems():
        for plocus, lst in d.iteritems():
            lst.sort(key=get_time)

    #========================================
    # put everything together

    return gene_tree, LabeledRecon(species_map, locus_map, order)