Example #1
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(version="%prog version: $Id: trees2tree.py 2782 2009-09-10 11:40:29Z andreas $",
                            usage=globals()["__doc__"])

    parser.add_option("-m", "--method", dest="method", type="choice",
                      choices=("counts", "min", "max", "sum", "mean", "median", "stddev", "non-redundant", "consensus",
                               "select-largest"),
                      help="aggregation function.")

    parser.add_option("-r", "--regex-id", dest="regex_id", type="string",
                      help="regex pattern to extract identifier from tree name for the selection functions.")

    parser.add_option("-w", "--write-values", dest="write_values", type="string",
                      help="if processing multiple trees, write values to file.")

    parser.add_option("-e", "--error-branchlength", dest="error_branchlength", type="float",
                      help="set branch length without counts to this value.")

    parser.set_defaults(
        method="mean",
        regex_id=None,
        filtered_branch_lengths=(-999.0, 999.0),
        write_values = None,
        error_branchlength = None,
        separator=":",
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    if options.loglevel >= 2:
        options.stdlog.write("# reading trees from stdin.\n")
        options.stdlog.flush()

    nexus = TreeTools.Newick2Nexus(sys.stdin)
    if options.loglevel >= 1:
        options.stdlog.write(
            "# read %i trees from stdin.\n" % len(nexus.trees))

    nskipped = 0
    ninput = len(nexus.trees)
    noutput = 0
    nerrors = 0

    if options.method == "non-redundant":
        # compute non-redudant trees
        template_trees = []
        template_counts = []
        ntree = 0
        for tree in nexus.trees:

            for x in range(0, len(template_trees)):
                is_compatible, reason = TreeTools.IsCompatible(
                    tree, template_trees[x])
                if is_compatible:
                    template_counts[x] += 1
                    break
            else:
                template_counts.append(1)
                template_trees.append(tree)

            if options.loglevel >= 2:
                options.stdlog.write(
                    "# tree=%i, ntemplates=%i\n" % (ntree, len(template_trees)))

            ntree += 1

        for x in range(0, len(template_trees)):
            if options.loglevel >= 1:
                options.stdlog.write("# tree: %i, counts: %i, percent=%5.2f\n" %
                                     (x, template_counts[x], template_counts[x] * 100.0 / ntotal))
            options.stdout.write(
                TreeTools.Tree2Newick(template_trees[x]) + "\n")

    elif options.method in ("select-largest",):
        # select one of the trees with the same name.
        clusters = {}
        for x in range(0, len(nexus.trees)):
            n = nexus.trees[x].name

            if options.regex_id:
                n = re.search(options.regex_id, n).groups()[0]

            if n not in clusters:
                clusters[n] = []
            clusters[n].append(x)

        new_trees = []

        for name, cluster in clusters.items():
            new_trees.append(
                getBestTree([nexus.trees[x] for x in cluster], options.method))

        for x in range(0, len(new_trees)):
            options.stdout.write(">%s\n" % new_trees[x].name)
            options.stdout.write(TreeTools.Tree2Newick(new_trees[x],) + "\n")
            noutput += 1

        nskipped = ntotal - noutput

    elif options.method == "consensus":

        phylip = WrapperPhylip.Phylip()
        phylip.setLogLevel(options.loglevel - 2)
        phylip.setProgram("consense")
        phylip_options = []
        phylip_options.append("Y")

        phylip.setOptions(phylip_options)
        phylip.setTrees(nexus.trees)

        result = phylip.run()

        options.stdout.write(
            "# consensus tree built from %i trees\n" % (phylip.mNInputTrees))
        options.stdout.write(
            TreeTools.Tree2Newick(result.mNexus.trees[0]) + "\n")
        noutput = 1

    else:
        if options.method in ("min", "max", "sum", "mean", "counts"):

            xtree = nexus.trees[0]
            for n in xtree.chain.keys():
                if xtree.node(n).data.branchlength in options.filtered_branch_lengths:
                    xtree.node(n).data.branchlength = 0
                ntotals = [1] * len(xtree.chain.keys())

            if options.method == "min":
                f = min
            elif options.method == "max":
                f = max
            elif options.method == "sum":
                f = lambda x, y: x + y
            elif options.method == "mean":
                f = lambda x, y: x + y
            elif options.method == "counts":
                f = lambda x, y: x + 1
                for n in xtree.chain.keys():
                    if xtree.node(n).data.branchlength not in options.filtered_branch_lengths:
                        xtree.node(n).data.branchlength = 1
                    else:
                        xtree.node(n).data.branchlength = 0
            else:
                raise "unknown option %s" % options.method

            for tree in nexus.trees[1:]:

                for n in tree.chain.keys():
                    if tree.node(n).data.branchlength not in options.filtered_branch_lengths:
                        xtree.node(n).data.branchlength = f(
                            xtree.node(n).data.branchlength, tree.node(n).data.branchlength)
                        ntotals[n] += 1

            if options.method == "mean":
                for n in xtree.chain.keys():
                    if ntotals[n] > 0:
                        xtree.node(n).data.branchlength = float(
                            xtree.node(n).data.branchlength) / ntotals[n]
                    else:
                        if options.error_branchlength is not None:
                            xtree.node(
                                n).data.branchlength = options.error_branchlength
                            if options.loglevel >= 1:
                                options.stdlog.write(
                                    "# no counts for node %i - set to %f\n" % (n, options.error_branchlength))
                                nerrors += 1
                        else:
                            raise "no counts for node %i" % n

        else:
            # collect all values for trees
            values = [[] for x in range(TreeTools.GetSize(nexus.trees[0]))]

            for tree in nexus.trees:
                for n, node in tree.chain.items():
                    if node.data.branchlength not in options.filtered_branch_lengths:
                        values[n].append(node.data.branchlength)

            tree = nexus.trees[0]
            for n, node in tree.chain.items():
                if len(values[n]) > 0:
                    if options.method == "stddev":
                        node.data.branchlength = scipy.std(values[n])
                    elif options.method == "median":
                        node.data.branchlength = scipy.median(values[n])
                else:
                    if options.error_branchlength is not None:
                        node.data.branchlength = options.error_branchlength
                        if options.loglevel >= 1:
                            options.stdlog.write(
                                "# no counts for node %i - set to %f\n" % (n, options.error_branchlength))
                            nerrors += 1
                    else:
                        raise "no counts for node %i" % n

            if options.write_values:
                outfile = open(options.write_values, "w")
                for n, node in tree.chain.items():
                    values[n].sort()
                    id = options.separator.join(
                        sorted(TreeTools.GetLeaves(tree, n)))
                    outfile.write("%s\t%s\n" %
                                  (id, ";".join(map(str, values[n]))))
                outfile.close()

        del nexus.trees[1:]
        options.stdout.write(TreeTools.Nexus2Newick(nexus) + "\n")
        noutput = 1

    if options.loglevel >= 1:
        options.stdlog.write("# ntotal=%i, nskipped=%i, noutput=%i, nerrors=%i\n" % (
            ninput, nskipped, noutput, nerrors))

    E.Stop()
Example #2
0
def Process(lines, other_trees, options, map_old2new, ntree):

    nexus = TreeTools.Newick2Nexus(map(lambda x: x[:-1], lines))

    if options.loglevel >= 1:
        options.stdlog.write("# read %i trees.\n" % len(nexus.trees))

    nskipped = 0
    ntotal = len(nexus.trees)
    extract_pattern = None
    species2remove = None
    write_map = False

    phylip_executable = None
    phylip_options = None

    index = 0

    # default: do not output internal node names
    write_all_taxa = False

    for tree in nexus.trees:

        if options.outgroup:
            tree.root_with_outgroup(options.outgroup)

        for method in options.methods:

            if options.loglevel >= 3:
                options.stdlog.write("# applying method %s to tree %i.\n" %
                                     (method, index))

            if method == "midpoint-root":
                tree.root_midpoint()

            elif method == "balanced-root":
                tree.root_balanced()

            elif method == "unroot":
                TreeTools.Unroot(tree)

            elif method == "phylip":
                if not phylip_executable:
                    phylip_executable = options.parameters[0]
                    del options.parameters[0]
                    phylip_options = re.split("@", options.parameters[0])
                    del options.parameters[0]

                    phylip = WrapperPhylip.Phylip()
                    phylip.setProgram(phylip_executable)
                    phylip.setOptions(phylip_options)

                phylip.setTree(tree)

                result = phylip.run()

                nexus.trees[index] = result.mNexus.trees[0]

            elif method == "normalize":
                if options.value == 0:
                    v = 0
                    for n in tree.chain.keys():
                        v = max(v, tree.node(n).data.branchlength)
                else:
                    v = options.value

                for n in tree.chain.keys():
                    tree.node(n).data.branchlength /= float(options.value)

            elif method == "divide-by-tree":

                if len(other_trees) > 1:
                    other_tree = other_trees[ntree]
                else:
                    other_tree = other_trees[0]

                # the trees have to be exactly the same!!
                if options.loglevel >= 2:
                    print tree.display()
                    print other_tree.display()

                if not tree.is_identical(other_tree):
                    nskipped += 1
                    continue

                # even if the trees are the same (in topology), the node numbering might not be
                # the same. Thus build a map of node ids.
                map_a2b = TreeTools.GetNodeMap(tree, other_tree)

                for n in tree.chain.keys():
                    try:
                        tree.node(n).data.branchlength /= float(
                            other_tree.node(map_a2b[n]).data.branchlength)
                    except ZeroDivisionError:
                        options.stdlog.write(
                            "# Warning: branch for nodes %i and %i in tree-pair %i: divide by zero\n"
                            % (n, map_a2b[n], ntree))
                        continue

            elif method == "rename":
                if not map_old2new:

                    map_old2new = IOTools.ReadMap(open(options.parameters[0],
                                                       "r"),
                                                  columns=(0, 1))

                    if options.invert_map:
                        map_old2new = IOTools.getInvertedDictionary(
                            map_old2new, make_unique=True)

                    del options.parameters[0]

                unknown = []
                for n, node in tree.chain.items():
                    if node.data.taxon:
                        try:
                            node.data.taxon = map_old2new[node.data.taxon]
                        except KeyError:
                            unknown.append(node.data.taxon)

                for taxon in unknown:
                    tree.prune(taxon)

            # reformat terminals
            elif method == "extract-with-pattern":

                if not extract_pattern:
                    extract_pattern = re.compile(options.parameters[0])
                    del options.parameters[0]

                for n in tree.get_terminals():
                    node = tree.node(n)
                    node.data.taxon = extract_pattern.search(
                        node.data.taxon).groups()[0]

            elif method == "set-uniform-branchlength":
                for n in tree.chain.keys():
                    tree.node(n).data.branchlength = options.value

            elif method == "build-map":
                # build a map of identifiers
                options.write_map = True
                for n in tree.get_terminals():
                    node = tree.node(n)
                    if node.data.taxon not in map_old2new:
                        new = options.template_identifier % (len(map_old2new) +
                                                             1)
                        map_old2new[node.data.taxon] = new
                    node.data.taxon = map_old2new[node.data.taxon]

            elif method == "remove-pattern":
                if species2remove is None:
                    species2remove = re.compile(options.parameters[0])
                    del options.parameters
                taxa = []
                for n in tree.get_terminals():
                    t = tree.node(n).data.taxon
                    skip = False
                    if species2remove.search(t):
                        continue
                    if not skip:
                        taxa.append(t)
                TreeTools.PruneTree(tree, taxa)

            elif method == "add-node-names":

                inode = 0
                write_all_taxa = True
                for n, node in tree.chain.items():
                    if not node.data.taxon:
                        node.data.taxon = "inode%i" % inode
                        inode += 1

            elif method == "newick2nhx":
                # convert names to species names
                for n in tree.get_terminals():
                    t = tree.node(n).data.taxon
                    d = t.split("|")
                    if len(d) >= 2:
                        tree.node(n).data.species = d[0]

        index += 1
        ntree += 1

    if options.output_format == "nh":
        options.stdout.write(
            TreeTools.Nexus2Newick(
                nexus,
                write_all_taxa=True,
                with_branchlengths=options.with_branchlengths) + "\n")
    else:
        for tree in nexus.trees:
            tree.writeToFile(options.stdout, format=options.output_format)

    return ntotal, nskipped, ntree
Example #3
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: matrix2tree.py 2782 2009-09-10 11:40:29Z andreas $"
    )

    parser.add_option("-i",
                      "--invert-map",
                      dest="invert_map",
                      action="store_true",
                      help="""invert map.""")

    parser.add_option("--input-format",
                      dest="input_format",
                      type="choice",
                      choices=("phylip", "full"),
                      help="""input format.""")

    parser.add_option("-t",
                      "--filename-tree",
                      dest="filename_tree",
                      type="string",
                      help="""filename with tree to fit.""")

    parser.add_option("-m",
                      "--method",
                      dest="method",
                      type="choice",
                      choices=("nj", "kitsch", "fitch"),
                      help="""algorithm to run.""")

    parser.add_option("-e",
                      "--replicates",
                      dest="replicates",
                      action="store_true",
                      help="replicates.")

    parser.add_option("-r",
                      "--root",
                      dest="root",
                      action="store_true",
                      help="midpoint root (if it is not rooted).")

    parser.add_option("-u",
                      "--unroot",
                      dest="unroot",
                      action="store_true",
                      help="unroot tree (if it is rooted).")

    parser.add_option("--skip-separators",
                      dest="write_separators",
                      action="store_false",
                      help="do not echo separators (starting with >)")

    #    parser.add_option("-i", "--iterations", dest="iterations", type="int",
    #                      help="number of iterations." )

    parser.add_option("-p",
                      "--power",
                      dest="power",
                      type="float",
                      help="power.")

    parser.add_option(
        "--prune-tree",
        dest="prune_tree",
        action="store_true",
        help=
        "prune tree such to include only taxa which are part of the input matrix."
    )

    parser.add_option(
        "--add-random",
        dest="add_random",
        action="store_true",
        help="add small random value to off-diagonal zero elements in matrix.")

    parser.add_option(
        "--pseudo-replicates",
        dest="pseudo_replicates",
        action="store_true",
        help=
        "add small random value to off-diagonal zero elements in matrix, even if they have no replicates."
    )

    parser.add_option("--debug",
                      dest="debug",
                      action="store_true",
                      help="dump debug information.")

    parser.set_defaults(
        value=0,
        method="nj",
        input_format="phylip",
        filename_tree=None,
        outgroup=None,
        replicates=False,
        root=False,
        unroot=False,
        power=0,
        write_separators=True,
        prune_tree=False,
        add_random=False,
        debug=False,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    phylip = WrapperPhylip.Phylip()

    if options.debug:
        phylip.setLogLevel(options.loglevel)

    phylip.setPruneTree(options.prune_tree)

    lines = filter(lambda x: x[0] != "#", sys.stdin.readlines())

    chunks = filter(lambda x: lines[x][0] == ">", range(len(lines)))

    if not chunks:
        options.write_separators = False
        chunks = [-1]

    chunks.append(len(lines))

    for x in range(len(chunks) - 1):

        matrix = lines[chunks[x] + 1:chunks[x + 1]]

        # parse phylip matrix
        if options.add_random:
            mm = []
            ids = []
            for l in range(1, len(matrix)):
                values = re.split("\s+", matrix[l][:-1])
                ids.append(values[0])
                mm.append(map(lambda x: x.strip(), values[1:]))

            d = len(mm)
            if options.replicates:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        cc = col * 2
                        rr = row * 2
                        if mm[row][cc] == "0" and mm[row][cc + 1] != "0":
                            mm[row][cc + 1] = "1"
                            mm[col][rr + 1] = "1"
                            v = str(random.random() / 10000.0)
                            mm[row][cc] = v
                            mm[col][rr] = v

            else:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        if mm[row][col] == "0":
                            v = str(random.random() / 10000.0)
                            mm[row][col] = v
                            mm[col][row] = v

            matrix = ["%i\n" % d]
            for row in range(d):
                matrix.append(ids[row] + "    " + "    ".join(mm[row]) + "\n")

        # parse phylip matrix
        if options.pseudo_replicates:
            mm = []
            ids = []
            for l in range(1, len(matrix)):
                values = re.split("\s+", matrix[l][:-1])
                ids.append(values[0])
                mm.append(map(lambda x: x.strip(), values[1:]))

            d = len(mm)
            if options.replicates:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        cc = col * 2
                        rr = row * 2
                        if mm[row][cc + 1] == "0":
                            mm[row][cc + 1] = "1"
                            mm[col][rr + 1] = "1"
                            v = str(random.random() / 10000.0)
                            mm[row][cc] = v
                            mm[col][rr] = v
                        else:
                            mm[row][cc + 1] = "100"
                            mm[col][rr + 1] = "100"
            else:
                for row in range(d - 1):
                    for col in range(row + 1, d):
                        if mm[row][col] == "0":
                            v = str(random.random() / 10000.0)
                            mm[row][col] = v
                            mm[col][row] = v

            matrix = ["%i\n" % d]
            for row in range(d):
                matrix.append(ids[row] + "    " + "    ".join(mm[row]) + "\n")

        phylip.setMatrix(matrix)

        phylip_options = []

        if options.filename_tree:
            nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r"))
            ref_tree = nexus.trees[0]
            phylip.setTree(ref_tree)
            phylip_options.append("U")
        else:
            ref_tree = None

        if options.method == "nj":
            phylip.setProgram("neighbor")

        elif options.method == "fitch":
            phylip.setProgram("fitch")

        elif options.method == "kitsch":
            phylip.setProgram("kitsch")

        if options.replicates:
            phylip_options.append("S")

        if options.power > 0:
            phylip_options.append("P")
            phylip_options.append("%f" % options.power)

        phylip_options.append("Y")

        phylip.setOptions(phylip_options)

        result = phylip.run()

        # root with outgroup
        if options.root:
            if options.outgroup:
                pass
            # midpoint root
            else:
                for tree in result.mNexus.trees:
                    tree.root_midpoint()

        # explicitely unroot
        elif options.unroot:
            phylip.setOptions(("Y", "W", "U", "Q"))
            phylip.setProgram("retree")
            for x in range(len(result.mNexus.trees)):
                phylip.setTree(result.mNexus.trees[x])
                xresult = phylip.run()
                result.mNexus.trees[x] = xresult.mNexus.trees[0]

        if options.write_separators:
            options.stdout.write(lines[chunks[x]])

        if result.mNexus:
            options.stdout.write(TreeTools.Nexus2Newick(result.mNexus) + "\n")

        if options.loglevel >= 1:
            if ref_tree:
                nref = len(ref_tree.get_terminals())
            else:
                nref = 0
            for tree in result.mNexus.trees:
                options.stdlog.write(
                    "# ninput=%i, nreference=%i, noutput=%i\n" %
                    (len(matrix) - 1, nref, len(tree.get_terminals())))

    E.Stop()
Example #4
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if argv is None:
        argv = sys.argv

    parser = E.OptionParser(
        version=
        "%prog version: $Id: tree2tree.py 2782 2009-09-10 11:40:29Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-d",
                      "--value",
                      dest="value",
                      type="float",
                      help="normalizing value.")
    parser.add_option(
        "-m",
        "--method",
        dest="methods",
        type="string",
        help=
        """methods to apply [normalize|divide-by-tree|divide-by-tree|rename|set-uniform-branch-length|extract-with-pattern|build-map|remove-pattern|unroot|midpoint-root|balanced-root|add-node-names"""
    )
    parser.add_option("-2",
                      "--filename-tree2",
                      dest="filename_tree2",
                      type="string",
                      help="filename with second tree.")
    parser.add_option("-o",
                      "--outgroup",
                      dest="outgroup",
                      type="string",
                      help="reroot with outgroup before processing.")
    parser.add_option("-p",
                      "--parameters",
                      dest="parameters",
                      type="string",
                      help="parameters for methods.")
    parser.add_option(
        "-e",
        "--template-identifier",
        dest="template_identifier",
        type="string",
        help="""template identifier [%default]. A %i is replaced by the position
                      of the sequence in the file.""")
    parser.add_option("-i",
                      "--invert-map",
                      dest="invert_map",
                      action="store_true",
                      help="""invert map.""")
    parser.add_option("-f",
                      "--filter",
                      dest="filter",
                      type="choice",
                      choices=("max-branch-length", ),
                      help="filter trees")
    parser.add_option("--output-format",
                      dest="output_format",
                      type="choice",
                      choices=("nh", "nhx"),
                      help=("output format for trees."))
    parser.add_option(
        "-b",
        "--no-branch-lengths",
        dest="with_branchlengths",
        action="store_false",
        help=
        """do not write branchlengths. Per default, 0 branch lengths are added."""
    )

    parser.set_defaults(
        value=0,
        methods="",
        filename_tree2=None,
        outgroup=None,
        parameters="",
        template_identifier="ID%06i",
        write_map=False,
        invert_map=False,
        filter=None,
        output_format="nh",
        with_branchlengths=True,
    )

    (options, args) = E.Start(parser, add_pipe_options=True)

    options.methods = options.methods.split(",")
    options.parameters = options.parameters.split(",")

    other_trees = []
    # read other trees
    if options.filename_tree2:
        other_nexus = TreeTools.Newick2Nexus(open(options.filename_tree2, "r"))
        if len(other_nexus.trees) > 0:
            other_trees = other_nexus.trees
        else:
            other_tree = other_nexus.trees[0]
            other_trees = [other_tree]

    lines = sys.stdin.readlines()

    ntotal, nskipped, ntree = 0, 0, 0

    if options.filter:

        nexus = TreeTools.Newick2Nexus(lines)

        new_trees = []

        value = float(options.parameters[0])
        del options.parameters[0]

        # decision functions: return true, if tree
        # is to be skipped
        if options.filter == "max-branch-length":
            f = lambda x: x >= value

        for tree in nexus.trees:
            ntotal += 1

            for id, node in tree.chain.items():
                if f(node.data.branchlength):
                    nskipped += 1
                    break
            else:
                new_trees.append(tree)
                ntree += 1

        nexus.trees = new_trees

        options.stdout.write(
            TreeTools.Nexus2Newick(nexus, with_names=True) + "\n")

    else:

        # iterate over chunks
        chunks = filter(lambda x: lines[x][0] == ">", range(len(lines)))

        map_old2new = {}

        if chunks:
            for c in range(len(chunks) - 1):
                a, b = chunks[c], chunks[c + 1]
                options.stdout.write(lines[a])
                a += 1
                Process(lines[a:b], other_trees, options, map_old2new, ntree)

            options.stdout.write(lines[chunks[-1]])
            t, s, ntree = Process(lines[chunks[-1] + 1:], other_trees, options,
                                  map_old2new, ntree)
            ntotal += t
            nskipped += s
        else:
            ntotal, nskipped, ntree = Process(lines, other_trees, options,
                                              map_old2new, ntree)

        if options.write_map:
            p = options.parameters[0]
            if p:
                outfile = open(p, "w")
            else:
                outfile = options.stdout

            outfile.write("old\tnew\n")
            for old_id, new_id in map_old2new.items():
                outfile.write("%s\t%s\n" % (old_id, new_id))
            if p:
                outfile.close()

    if options.loglevel >= 1:
        options.stdlog.write("# ntotal=%i, nskipped=%i\n" % (ntotal, nskipped))

    E.Stop()
Example #5
0
    notu = 0

    for tree in nexus.trees:
        if param_loglevel >= 2:
            tree.display()

        for nx in tree.get_terminals():
            t1 = tree.node(nx).get_data().taxon

            if param_create:
                if t1 not in keys:
                    keys[t1] = "otu%i" % notu
                    notu += 1

            if t1 in keys:
                tree.node(nx).get_data().taxon = keys[t1]

    print TreeTools.Nexus2Newick(nexus)

    if param_create:
        outfile = open(param_create, "w")
        for key in keys:
            outfile.write("%s\t%s\n" % (key, keys[key]))
        outfile.close()

    print E.GetFooter()


if __name__ == "__main__":
    sys.exit(main(sys.argv))