Example #1
0
def TranslateNode(node, tree, terminals, options):
    if options.do_translate:
        return options.separator.join(sorted(TreeTools.GetLeaves(tree, node)))
    elif node in terminals:
        return tree.node(node).data.taxon
    else:
        return str(node)
Example #2
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id: trees2tree.py 2782 2009-09-10 11:40:29Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("counts", "min", "max", "sum", "mean", "median", "stddev", "non-redundant", "consensus",
                               "select-largest"),
                      help="aggregation function.")

    parser.add_option("-r", "--regex-id", dest="regex_id", type="string",
                      help="regex pattern to extract identifier from tree name for the selection functions.")

    parser.add_option("-w", "--write-values", dest="write_values", type="string",
                      help="if processing multiple trees, write values to file.")

    parser.add_option("-e", "--error-branchlength", dest="error_branchlength", type="float",
                      help="set branch length without counts to this value.")

    parser.set_defaults(
        method="mean",
        regex_id=None,
        filtered_branch_lengths=(-999.0, 999.0),
        write_values = None,
        error_branchlength = None,
        separator=":",
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.loglevel >= 2:
        options.stdlog.write("# reading trees from stdin.\n")
        options.stdlog.flush()

    nexus = TreeTools.Newick2Nexus(sys.stdin)
    if options.loglevel >= 1:
        options.stdlog.write(
            "# read %i trees from stdin.\n" % len(nexus.trees))

    nskipped = 0
    ninput = len(nexus.trees)
    noutput = 0
    nerrors = 0

    if options.method == "non-redundant":
        # compute non-redudant trees
        template_trees = []
        template_counts = []
        ntree = 0
        for tree in nexus.trees:

            for x in range(0, len(template_trees)):
                is_compatible, reason = TreeTools.IsCompatible(
                    tree, template_trees[x])
                if is_compatible:
                    template_counts[x] += 1
                    break
            else:
                template_counts.append(1)
                template_trees.append(tree)

            if options.loglevel >= 2:
                options.stdlog.write(
                    "# tree=%i, ntemplates=%i\n" % (ntree, len(template_trees)))

            ntree += 1

        for x in range(0, len(template_trees)):
            if options.loglevel >= 1:
                options.stdlog.write("# tree: %i, counts: %i, percent=%5.2f\n" %
                                     (x, template_counts[x], template_counts[x] * 100.0 / ntotal))
            options.stdout.write(
                TreeTools.Tree2Newick(template_trees[x]) + "\n")

    elif options.method in ("select-largest",):
        # select one of the trees with the same name.
        clusters = {}
        for x in range(0, len(nexus.trees)):
            n = nexus.trees[x].name

            if options.regex_id:
                n = re.search(options.regex_id, n).groups()[0]

            if n not in clusters:
                clusters[n] = []
            clusters[n].append(x)

        new_trees = []

        for name, cluster in clusters.items():
            new_trees.append(
                getBestTree([nexus.trees[x] for x in cluster], options.method))

        for x in range(0, len(new_trees)):
            options.stdout.write(">%s\n" % new_trees[x].name)
            options.stdout.write(TreeTools.Tree2Newick(new_trees[x],) + "\n")
            noutput += 1

        nskipped = ntotal - noutput

    elif options.method == "consensus":

        phylip = WrapperPhylip.Phylip()
        phylip.setLogLevel(options.loglevel - 2)
        phylip.setProgram("consense")
        phylip_options = []
        phylip_options.append("Y")

        phylip.setOptions(phylip_options)
        phylip.setTrees(nexus.trees)

        result = phylip.run()

        options.stdout.write(
            "# consensus tree built from %i trees\n" % (phylip.mNInputTrees))
        options.stdout.write(
            TreeTools.Tree2Newick(result.mNexus.trees[0]) + "\n")
        noutput = 1

    else:
        if options.method in ("min", "max", "sum", "mean", "counts"):

            xtree = nexus.trees[0]
            for n in xtree.chain.keys():
                if xtree.node(n).data.branchlength in options.filtered_branch_lengths:
                    xtree.node(n).data.branchlength = 0
                ntotals = [1] * len(xtree.chain.keys())

            if options.method == "min":
                f = min
            elif options.method == "max":
                f = max
            elif options.method == "sum":
                f = lambda x, y: x + y
            elif options.method == "mean":
                f = lambda x, y: x + y
            elif options.method == "counts":
                f = lambda x, y: x + 1
                for n in xtree.chain.keys():
                    if xtree.node(n).data.branchlength not in options.filtered_branch_lengths:
                        xtree.node(n).data.branchlength = 1
                    else:
                        xtree.node(n).data.branchlength = 0
            else:
                raise "unknown option %s" % options.method

            for tree in nexus.trees[1:]:

                for n in tree.chain.keys():
                    if tree.node(n).data.branchlength not in options.filtered_branch_lengths:
                        xtree.node(n).data.branchlength = f(
                            xtree.node(n).data.branchlength, tree.node(n).data.branchlength)
                        ntotals[n] += 1

            if options.method == "mean":
                for n in xtree.chain.keys():
                    if ntotals[n] > 0:
                        xtree.node(n).data.branchlength = float(
                            xtree.node(n).data.branchlength) / ntotals[n]
                    else:
                        if options.error_branchlength is not None:
                            xtree.node(
                                n).data.branchlength = options.error_branchlength
                            if options.loglevel >= 1:
                                options.stdlog.write(
                                    "# no counts for node %i - set to %f\n" % (n, options.error_branchlength))
                                nerrors += 1
                        else:
                            raise "no counts for node %i" % n

        else:
            # collect all values for trees
            values = [[] for x in range(TreeTools.GetSize(nexus.trees[0]))]

            for tree in nexus.trees:
                for n, node in tree.chain.items():
                    if node.data.branchlength not in options.filtered_branch_lengths:
                        values[n].append(node.data.branchlength)

            tree = nexus.trees[0]
            for n, node in tree.chain.items():
                if len(values[n]) > 0:
                    if options.method == "stddev":
                        node.data.branchlength = scipy.std(values[n])
                    elif options.method == "median":
                        node.data.branchlength = scipy.median(values[n])
                else:
                    if options.error_branchlength is not None:
                        node.data.branchlength = options.error_branchlength
                        if options.loglevel >= 1:
                            options.stdlog.write(
                                "# no counts for node %i - set to %f\n" % (n, options.error_branchlength))
                            nerrors += 1
                    else:
                        raise "no counts for node %i" % n

            if options.write_values:
                outfile = open(options.write_values, "w")
                for n, node in tree.chain.items():
                    values[n].sort()
                    id = options.separator.join(
                        sorted(TreeTools.GetLeaves(tree, n)))
                    outfile.write("%s\t%s\n" %
                                  (id, ";".join(map(str, values[n]))))
                outfile.close()

        del nexus.trees[1:]
        options.stdout.write(TreeTools.Nexus2Newick(nexus) + "\n")
        noutput = 1

    if options.loglevel >= 1:
        options.stdlog.write("# ntotal=%i, nskipped=%i, noutput=%i, nerrors=%i\n" % (
            ninput, nskipped, noutput, nerrors))

    E.Stop()