def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser(version="%prog version: $Id: trees2tree.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-m", "--method", dest="method", type="choice", choices=("counts", "min", "max", "sum", "mean", "median", "stddev", "non-redundant", "consensus", "select-largest"), help="aggregation function.") parser.add_option("-r", "--regex-id", dest="regex_id", type="string", help="regex pattern to extract identifier from tree name for the selection functions.") parser.add_option("-w", "--write-values", dest="write_values", type="string", help="if processing multiple trees, write values to file.") parser.add_option("-e", "--error-branchlength", dest="error_branchlength", type="float", help="set branch length without counts to this value.") parser.set_defaults( method="mean", regex_id=None, filtered_branch_lengths=(-999.0, 999.0), write_values = None, error_branchlength = None, separator=":", ) (options, args) = E.Start(parser, add_pipe_options=True) if options.loglevel >= 2: options.stdlog.write("# reading trees from stdin.\n") options.stdlog.flush() nexus = TreeTools.Newick2Nexus(sys.stdin) if options.loglevel >= 1: options.stdlog.write( "# read %i trees from stdin.\n" % len(nexus.trees)) nskipped = 0 ninput = len(nexus.trees) noutput = 0 nerrors = 0 if options.method == "non-redundant": # compute non-redudant trees template_trees = [] template_counts = [] ntree = 0 for tree in nexus.trees: for x in range(0, len(template_trees)): is_compatible, reason = TreeTools.IsCompatible( tree, template_trees[x]) if is_compatible: template_counts[x] += 1 break else: template_counts.append(1) template_trees.append(tree) if options.loglevel >= 2: options.stdlog.write( "# tree=%i, ntemplates=%i\n" % (ntree, len(template_trees))) ntree += 1 for x in range(0, len(template_trees)): if options.loglevel >= 1: options.stdlog.write("# tree: %i, counts: %i, percent=%5.2f\n" % (x, template_counts[x], template_counts[x] * 100.0 / ntotal)) options.stdout.write( TreeTools.Tree2Newick(template_trees[x]) + "\n") elif options.method in ("select-largest",): # select one of the trees with the same name. clusters = {} for x in range(0, len(nexus.trees)): n = nexus.trees[x].name if options.regex_id: n = re.search(options.regex_id, n).groups()[0] if n not in clusters: clusters[n] = [] clusters[n].append(x) new_trees = [] for name, cluster in clusters.items(): new_trees.append( getBestTree([nexus.trees[x] for x in cluster], options.method)) for x in range(0, len(new_trees)): options.stdout.write(">%s\n" % new_trees[x].name) options.stdout.write(TreeTools.Tree2Newick(new_trees[x],) + "\n") noutput += 1 nskipped = ntotal - noutput elif options.method == "consensus": phylip = WrapperPhylip.Phylip() phylip.setLogLevel(options.loglevel - 2) phylip.setProgram("consense") phylip_options = [] phylip_options.append("Y") phylip.setOptions(phylip_options) phylip.setTrees(nexus.trees) result = phylip.run() options.stdout.write( "# consensus tree built from %i trees\n" % (phylip.mNInputTrees)) options.stdout.write( TreeTools.Tree2Newick(result.mNexus.trees[0]) + "\n") noutput = 1 else: if options.method in ("min", "max", "sum", "mean", "counts"): xtree = nexus.trees[0] for n in xtree.chain.keys(): if xtree.node(n).data.branchlength in options.filtered_branch_lengths: xtree.node(n).data.branchlength = 0 ntotals = [1] * len(xtree.chain.keys()) if options.method == "min": f = min elif options.method == "max": f = max elif options.method == "sum": f = lambda x, y: x + y elif options.method == "mean": f = lambda x, y: x + y elif options.method == "counts": f = lambda x, y: x + 1 for n in xtree.chain.keys(): if xtree.node(n).data.branchlength not in options.filtered_branch_lengths: xtree.node(n).data.branchlength = 1 else: xtree.node(n).data.branchlength = 0 else: raise "unknown option %s" % options.method for tree in nexus.trees[1:]: for n in tree.chain.keys(): if tree.node(n).data.branchlength not in options.filtered_branch_lengths: xtree.node(n).data.branchlength = f( xtree.node(n).data.branchlength, tree.node(n).data.branchlength) ntotals[n] += 1 if options.method == "mean": for n in xtree.chain.keys(): if ntotals[n] > 0: xtree.node(n).data.branchlength = float( xtree.node(n).data.branchlength) / ntotals[n] else: if options.error_branchlength is not None: xtree.node( n).data.branchlength = options.error_branchlength if options.loglevel >= 1: options.stdlog.write( "# no counts for node %i - set to %f\n" % (n, options.error_branchlength)) nerrors += 1 else: raise "no counts for node %i" % n else: # collect all values for trees values = [[] for x in range(TreeTools.GetSize(nexus.trees[0]))] for tree in nexus.trees: for n, node in tree.chain.items(): if node.data.branchlength not in options.filtered_branch_lengths: values[n].append(node.data.branchlength) tree = nexus.trees[0] for n, node in tree.chain.items(): if len(values[n]) > 0: if options.method == "stddev": node.data.branchlength = scipy.std(values[n]) elif options.method == "median": node.data.branchlength = scipy.median(values[n]) else: if options.error_branchlength is not None: node.data.branchlength = options.error_branchlength if options.loglevel >= 1: options.stdlog.write( "# no counts for node %i - set to %f\n" % (n, options.error_branchlength)) nerrors += 1 else: raise "no counts for node %i" % n if options.write_values: outfile = open(options.write_values, "w") for n, node in tree.chain.items(): values[n].sort() id = options.separator.join( sorted(TreeTools.GetLeaves(tree, n))) outfile.write("%s\t%s\n" % (id, ";".join(map(str, values[n])))) outfile.close() del nexus.trees[1:] options.stdout.write(TreeTools.Nexus2Newick(nexus) + "\n") noutput = 1 if options.loglevel >= 1: options.stdlog.write("# ntotal=%i, nskipped=%i, noutput=%i, nerrors=%i\n" % ( ninput, nskipped, noutput, nerrors)) E.Stop()
def Process(lines, other_trees, options, map_old2new, ntree): nexus = TreeTools.Newick2Nexus(map(lambda x: x[:-1], lines)) if options.loglevel >= 1: options.stdlog.write("# read %i trees.\n" % len(nexus.trees)) nskipped = 0 ntotal = len(nexus.trees) extract_pattern = None species2remove = None write_map = False phylip_executable = None phylip_options = None index = 0 # default: do not output internal node names write_all_taxa = False for tree in nexus.trees: if options.outgroup: tree.root_with_outgroup(options.outgroup) for method in options.methods: if options.loglevel >= 3: options.stdlog.write("# applying method %s to tree %i.\n" % (method, index)) if method == "midpoint-root": tree.root_midpoint() elif method == "balanced-root": tree.root_balanced() elif method == "unroot": TreeTools.Unroot(tree) elif method == "phylip": if not phylip_executable: phylip_executable = options.parameters[0] del options.parameters[0] phylip_options = re.split("@", options.parameters[0]) del options.parameters[0] phylip = WrapperPhylip.Phylip() phylip.setProgram(phylip_executable) phylip.setOptions(phylip_options) phylip.setTree(tree) result = phylip.run() nexus.trees[index] = result.mNexus.trees[0] elif method == "normalize": if options.value == 0: v = 0 for n in tree.chain.keys(): v = max(v, tree.node(n).data.branchlength) else: v = options.value for n in tree.chain.keys(): tree.node(n).data.branchlength /= float(options.value) elif method == "divide-by-tree": if len(other_trees) > 1: other_tree = other_trees[ntree] else: other_tree = other_trees[0] # the trees have to be exactly the same!! if options.loglevel >= 2: print tree.display() print other_tree.display() if not tree.is_identical(other_tree): nskipped += 1 continue # even if the trees are the same (in topology), the node numbering might not be # the same. Thus build a map of node ids. map_a2b = TreeTools.GetNodeMap(tree, other_tree) for n in tree.chain.keys(): try: tree.node(n).data.branchlength /= float( other_tree.node(map_a2b[n]).data.branchlength) except ZeroDivisionError: options.stdlog.write( "# Warning: branch for nodes %i and %i in tree-pair %i: divide by zero\n" % (n, map_a2b[n], ntree)) continue elif method == "rename": if not map_old2new: map_old2new = IOTools.ReadMap(open(options.parameters[0], "r"), columns=(0, 1)) if options.invert_map: map_old2new = IOTools.getInvertedDictionary( map_old2new, make_unique=True) del options.parameters[0] unknown = [] for n, node in tree.chain.items(): if node.data.taxon: try: node.data.taxon = map_old2new[node.data.taxon] except KeyError: unknown.append(node.data.taxon) for taxon in unknown: tree.prune(taxon) # reformat terminals elif method == "extract-with-pattern": if not extract_pattern: extract_pattern = re.compile(options.parameters[0]) del options.parameters[0] for n in tree.get_terminals(): node = tree.node(n) node.data.taxon = extract_pattern.search( node.data.taxon).groups()[0] elif method == "set-uniform-branchlength": for n in tree.chain.keys(): tree.node(n).data.branchlength = options.value elif method == "build-map": # build a map of identifiers options.write_map = True for n in tree.get_terminals(): node = tree.node(n) if node.data.taxon not in map_old2new: new = options.template_identifier % (len(map_old2new) + 1) map_old2new[node.data.taxon] = new node.data.taxon = map_old2new[node.data.taxon] elif method == "remove-pattern": if species2remove is None: species2remove = re.compile(options.parameters[0]) del options.parameters taxa = [] for n in tree.get_terminals(): t = tree.node(n).data.taxon skip = False if species2remove.search(t): continue if not skip: taxa.append(t) TreeTools.PruneTree(tree, taxa) elif method == "add-node-names": inode = 0 write_all_taxa = True for n, node in tree.chain.items(): if not node.data.taxon: node.data.taxon = "inode%i" % inode inode += 1 elif method == "newick2nhx": # convert names to species names for n in tree.get_terminals(): t = tree.node(n).data.taxon d = t.split("|") if len(d) >= 2: tree.node(n).data.species = d[0] index += 1 ntree += 1 if options.output_format == "nh": options.stdout.write( TreeTools.Nexus2Newick( nexus, write_all_taxa=True, with_branchlengths=options.with_branchlengths) + "\n") else: for tree in nexus.trees: tree.writeToFile(options.stdout, format=options.output_format) return ntotal, nskipped, ntree
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: matrix2tree.py 2782 2009-09-10 11:40:29Z andreas $" ) parser.add_option("-i", "--invert-map", dest="invert_map", action="store_true", help="""invert map.""") parser.add_option("--input-format", dest="input_format", type="choice", choices=("phylip", "full"), help="""input format.""") parser.add_option("-t", "--filename-tree", dest="filename_tree", type="string", help="""filename with tree to fit.""") parser.add_option("-m", "--method", dest="method", type="choice", choices=("nj", "kitsch", "fitch"), help="""algorithm to run.""") parser.add_option("-e", "--replicates", dest="replicates", action="store_true", help="replicates.") parser.add_option("-r", "--root", dest="root", action="store_true", help="midpoint root (if it is not rooted).") parser.add_option("-u", "--unroot", dest="unroot", action="store_true", help="unroot tree (if it is rooted).") parser.add_option("--skip-separators", dest="write_separators", action="store_false", help="do not echo separators (starting with >)") # parser.add_option("-i", "--iterations", dest="iterations", type="int", # help="number of iterations." ) parser.add_option("-p", "--power", dest="power", type="float", help="power.") parser.add_option( "--prune-tree", dest="prune_tree", action="store_true", help= "prune tree such to include only taxa which are part of the input matrix." ) parser.add_option( "--add-random", dest="add_random", action="store_true", help="add small random value to off-diagonal zero elements in matrix.") parser.add_option( "--pseudo-replicates", dest="pseudo_replicates", action="store_true", help= "add small random value to off-diagonal zero elements in matrix, even if they have no replicates." ) parser.add_option("--debug", dest="debug", action="store_true", help="dump debug information.") parser.set_defaults( value=0, method="nj", input_format="phylip", filename_tree=None, outgroup=None, replicates=False, root=False, unroot=False, power=0, write_separators=True, prune_tree=False, add_random=False, debug=False, ) (options, args) = E.Start(parser, add_pipe_options=True) phylip = WrapperPhylip.Phylip() if options.debug: phylip.setLogLevel(options.loglevel) phylip.setPruneTree(options.prune_tree) lines = filter(lambda x: x[0] != "#", sys.stdin.readlines()) chunks = filter(lambda x: lines[x][0] == ">", range(len(lines))) if not chunks: options.write_separators = False chunks = [-1] chunks.append(len(lines)) for x in range(len(chunks) - 1): matrix = lines[chunks[x] + 1:chunks[x + 1]] # parse phylip matrix if options.add_random: mm = [] ids = [] for l in range(1, len(matrix)): values = re.split("\s+", matrix[l][:-1]) ids.append(values[0]) mm.append(map(lambda x: x.strip(), values[1:])) d = len(mm) if options.replicates: for row in range(d - 1): for col in range(row + 1, d): cc = col * 2 rr = row * 2 if mm[row][cc] == "0" and mm[row][cc + 1] != "0": mm[row][cc + 1] = "1" mm[col][rr + 1] = "1" v = str(random.random() / 10000.0) mm[row][cc] = v mm[col][rr] = v else: for row in range(d - 1): for col in range(row + 1, d): if mm[row][col] == "0": v = str(random.random() / 10000.0) mm[row][col] = v mm[col][row] = v matrix = ["%i\n" % d] for row in range(d): matrix.append(ids[row] + " " + " ".join(mm[row]) + "\n") # parse phylip matrix if options.pseudo_replicates: mm = [] ids = [] for l in range(1, len(matrix)): values = re.split("\s+", matrix[l][:-1]) ids.append(values[0]) mm.append(map(lambda x: x.strip(), values[1:])) d = len(mm) if options.replicates: for row in range(d - 1): for col in range(row + 1, d): cc = col * 2 rr = row * 2 if mm[row][cc + 1] == "0": mm[row][cc + 1] = "1" mm[col][rr + 1] = "1" v = str(random.random() / 10000.0) mm[row][cc] = v mm[col][rr] = v else: mm[row][cc + 1] = "100" mm[col][rr + 1] = "100" else: for row in range(d - 1): for col in range(row + 1, d): if mm[row][col] == "0": v = str(random.random() / 10000.0) mm[row][col] = v mm[col][row] = v matrix = ["%i\n" % d] for row in range(d): matrix.append(ids[row] + " " + " ".join(mm[row]) + "\n") phylip.setMatrix(matrix) phylip_options = [] if options.filename_tree: nexus = TreeTools.Newick2Nexus(open(options.filename_tree, "r")) ref_tree = nexus.trees[0] phylip.setTree(ref_tree) phylip_options.append("U") else: ref_tree = None if options.method == "nj": phylip.setProgram("neighbor") elif options.method == "fitch": phylip.setProgram("fitch") elif options.method == "kitsch": phylip.setProgram("kitsch") if options.replicates: phylip_options.append("S") if options.power > 0: phylip_options.append("P") phylip_options.append("%f" % options.power) phylip_options.append("Y") phylip.setOptions(phylip_options) result = phylip.run() # root with outgroup if options.root: if options.outgroup: pass # midpoint root else: for tree in result.mNexus.trees: tree.root_midpoint() # explicitely unroot elif options.unroot: phylip.setOptions(("Y", "W", "U", "Q")) phylip.setProgram("retree") for x in range(len(result.mNexus.trees)): phylip.setTree(result.mNexus.trees[x]) xresult = phylip.run() result.mNexus.trees[x] = xresult.mNexus.trees[0] if options.write_separators: options.stdout.write(lines[chunks[x]]) if result.mNexus: options.stdout.write(TreeTools.Nexus2Newick(result.mNexus) + "\n") if options.loglevel >= 1: if ref_tree: nref = len(ref_tree.get_terminals()) else: nref = 0 for tree in result.mNexus.trees: options.stdlog.write( "# ninput=%i, nreference=%i, noutput=%i\n" % (len(matrix) - 1, nref, len(tree.get_terminals()))) E.Stop()
def main(argv=None): """script main. parses command line options in sys.argv, unless *argv* is given. """ if argv is None: argv = sys.argv parser = E.OptionParser( version= "%prog version: $Id: tree2tree.py 2782 2009-09-10 11:40:29Z andreas $", usage=globals()["__doc__"]) parser.add_option("-d", "--value", dest="value", type="float", help="normalizing value.") parser.add_option( "-m", "--method", dest="methods", type="string", help= """methods to apply [normalize|divide-by-tree|divide-by-tree|rename|set-uniform-branch-length|extract-with-pattern|build-map|remove-pattern|unroot|midpoint-root|balanced-root|add-node-names""" ) parser.add_option("-2", "--filename-tree2", dest="filename_tree2", type="string", help="filename with second tree.") parser.add_option("-o", "--outgroup", dest="outgroup", type="string", help="reroot with outgroup before processing.") parser.add_option("-p", "--parameters", dest="parameters", type="string", help="parameters for methods.") parser.add_option( "-e", "--template-identifier", dest="template_identifier", type="string", help="""template identifier [%default]. A %i is replaced by the position of the sequence in the file.""") parser.add_option("-i", "--invert-map", dest="invert_map", action="store_true", help="""invert map.""") parser.add_option("-f", "--filter", dest="filter", type="choice", choices=("max-branch-length", ), help="filter trees") parser.add_option("--output-format", dest="output_format", type="choice", choices=("nh", "nhx"), help=("output format for trees.")) parser.add_option( "-b", "--no-branch-lengths", dest="with_branchlengths", action="store_false", help= """do not write branchlengths. Per default, 0 branch lengths are added.""" ) parser.set_defaults( value=0, methods="", filename_tree2=None, outgroup=None, parameters="", template_identifier="ID%06i", write_map=False, invert_map=False, filter=None, output_format="nh", with_branchlengths=True, ) (options, args) = E.Start(parser, add_pipe_options=True) options.methods = options.methods.split(",") options.parameters = options.parameters.split(",") other_trees = [] # read other trees if options.filename_tree2: other_nexus = TreeTools.Newick2Nexus(open(options.filename_tree2, "r")) if len(other_nexus.trees) > 0: other_trees = other_nexus.trees else: other_tree = other_nexus.trees[0] other_trees = [other_tree] lines = sys.stdin.readlines() ntotal, nskipped, ntree = 0, 0, 0 if options.filter: nexus = TreeTools.Newick2Nexus(lines) new_trees = [] value = float(options.parameters[0]) del options.parameters[0] # decision functions: return true, if tree # is to be skipped if options.filter == "max-branch-length": f = lambda x: x >= value for tree in nexus.trees: ntotal += 1 for id, node in tree.chain.items(): if f(node.data.branchlength): nskipped += 1 break else: new_trees.append(tree) ntree += 1 nexus.trees = new_trees options.stdout.write( TreeTools.Nexus2Newick(nexus, with_names=True) + "\n") else: # iterate over chunks chunks = filter(lambda x: lines[x][0] == ">", range(len(lines))) map_old2new = {} if chunks: for c in range(len(chunks) - 1): a, b = chunks[c], chunks[c + 1] options.stdout.write(lines[a]) a += 1 Process(lines[a:b], other_trees, options, map_old2new, ntree) options.stdout.write(lines[chunks[-1]]) t, s, ntree = Process(lines[chunks[-1] + 1:], other_trees, options, map_old2new, ntree) ntotal += t nskipped += s else: ntotal, nskipped, ntree = Process(lines, other_trees, options, map_old2new, ntree) if options.write_map: p = options.parameters[0] if p: outfile = open(p, "w") else: outfile = options.stdout outfile.write("old\tnew\n") for old_id, new_id in map_old2new.items(): outfile.write("%s\t%s\n" % (old_id, new_id)) if p: outfile.close() if options.loglevel >= 1: options.stdlog.write("# ntotal=%i, nskipped=%i\n" % (ntotal, nskipped)) E.Stop()
notu = 0 for tree in nexus.trees: if param_loglevel >= 2: tree.display() for nx in tree.get_terminals(): t1 = tree.node(nx).get_data().taxon if param_create: if t1 not in keys: keys[t1] = "otu%i" % notu notu += 1 if t1 in keys: tree.node(nx).get_data().taxon = keys[t1] print TreeTools.Nexus2Newick(nexus) if param_create: outfile = open(param_create, "w") for key in keys: outfile.write("%s\t%s\n" % (key, keys[key])) outfile.close() print E.GetFooter() if __name__ == "__main__": sys.exit(main(sys.argv))