class Clades(PhyltrCommand): sink = StringFormatter parser = OptionParser(__doc__, prog="phyltr clades") parser.add_option('-a', '--ages', action="store_true", dest="age", default=False, help="Include age information in report.") parser.add_option('-f', '--frequency', type="float", dest="frequency", default=0.0, help='Minimum clade frequency to report.') def __init__(self, frequency=0.0, ages=False): self.frequency = frequency self.ages = ages self.cp = phyltr.utils.cladeprob.CladeProbabilities() @classmethod def init_from_opts(cls, options, files): clades = Clades(options.frequency, options.age) return clades def process_tree(self, t): self.cp.add_tree(t) def postprocess(self): self.cp.compute_probabilities() self.cp.save_clade_report("/dev/stdout", self.frequency, self.ages) return []
class Dedupe(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr dedupe") @classmethod def init_from_opts(cls, options, files): dedupe = Dedupe() return dedupe def process_tree(self, t): leaf_names = [l.name for l in t.get_leaves() if l.name] dupes = set([n for n in leaf_names if leaf_names.count(n) > 1]) if not dupes: return t # Remove dupes one at a time victims = [] for dupe in dupes: dupe_taxa = t.get_leaves_by_name(dupe) assert all([d.is_leaf() for d in dupe_taxa]) # First try to collapse monophyletic dupes is_mono, junk, trash = t.check_monophyly([dupe], "name") if is_mono: mrca = t.get_common_ancestor(dupe_taxa) mrca.name = dupe for child in mrca.get_children(): child.detach() # If the dupe is non-monophyletic, kill at random else: victims.extend(random.sample(dupe_taxa, len(dupe_taxa) - 1)) if victims: t.prune([l for l in t.get_leaves() if l not in victims], preserve_branch_length=True) # for v in victims: # v.detach() return t
class Stat(PhyltrCommand): sink = NullSink parser = OptionParser(__doc__, prog="phyltr stat") def __init__(self): self.tree_count = 0 self.taxa_count = 0 self.ultrametric = True self.topologically_unique_trees = [] self.tree_ages = [] self.firsttree = True @classmethod def init_from_opts(cls, options, files): stat = Stat() return stat def process_tree(self, t): # Stuff we do to every tree... self.tree_count += 1 leaves = t.get_leaves() leave_ages = [t.get_distance(l) for l in leaves] self.tree_ages.append(t.get_farthest_leaf()[1]) if abs(max(leave_ages) - min(leave_ages)) > max(leave_ages) / 1000.0: self.ultrametric = False # Stuff we only do to the first tree... if self.firsttree: self.firsttree = False self.taxa_count = len(leaves) self.topologically_unique_trees.append(t) # Stuff we only do to trees *other* than the first... else: for u in self.topologically_unique_trees: if are_same_topology(t, u): break else: self.topologically_unique_trees.append(t) return t def postprocess(self): self.topology_count = len(self.topologically_unique_trees) self.min_tree_height = min(self.tree_ages) self.max_tree_height = max(self.tree_ages) self.mean_tree_height = sum(self.tree_ages) / self.tree_count return [] def post_print(self): print("Total taxa: %d" % self.taxa_count) print("Total trees: %d" % self.tree_count) print("Unique topologies: %d" % self.topology_count) print("Are trees ultrametric? %s" % str(self.ultrametric)) print("Mean tree height: %f" % self.mean_tree_height) print("Min tree height: %f" % self.min_tree_height) print("Max tree height: %f" % self.max_tree_height)
class Prune(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr prune") parser.add_option('-a', '--attribute', default=None) parser.add_option('-f', '--file', dest="filename", help='Specifies a file from which to read taxa') parser.add_option('-i', '--inverse', action="store_true", default=False, dest="inverse") parser.add_option('-v', '--value', default=None) def __init__(self, taxa=None, filename=None, attribute=None, value=None, inverse=False): self.attribute = attribute self.filename = filename self.inverse = inverse self.value = value self.by_attribute = False if taxa: self.taxa = taxa elif filename: with open(self.filename, "r") as fp: self.taxa = set([t.strip() for t in fp.readlines()]) if not self.taxa: raise ValueError("Empty file!") elif self.attribute and self.value: self.taxa = [] else: raise ValueError("Incompatible arguments") @classmethod def init_from_opts(cls, options, files=[]): if files: taxa = set(files[0].split(",")) files = files[1:] else: taxa = [] prune = cls(taxa, options.filename, options.attribute, options.value, options.inverse) return prune def process_tree(self, t): if self.taxa: # Pruning by a list of taxa if self.inverse: pruning_taxa = [l for l in t.get_leaves() if l.name in self.taxa] else: pruning_taxa = [l for l in t.get_leaves() if l.name not in self.taxa] else: # Pruning by an attribute value if self.inverse: pruning_taxa = [l for l in t.get_leaves() if hasattr(l,self.attribute) and getattr(l,self.attribute) == self.value] else: pruning_taxa = [l for l in t.get_leaves() if hasattr(l,self.attribute) and getattr(l,self.attribute) != self.value] # Do the deed t.prune(pruning_taxa, preserve_branch_length=True) return t
class Length(PhyltrCommand): sink = StringFormatter parser = OptionParser(__doc__, prog="phyltr length") @classmethod def init_from_opts(cls, options, files): length = Length() return length def process_tree(self, t): return sum([n.dist for n in t.traverse()])
class Height(PhyltrCommand): sink = StringFormatter parser = OptionParser(__doc__, prog="phyltr height") @classmethod def init_from_opts(cls, options, files): height = Height() return height def process_tree(self, t): return t.get_farthest_leaf()[1]
class Support(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr support") parser.add_option('-a', '--age', action="store_true", dest="age", default=False, help="Include age information in report.") parser.add_option('-f', '--frequency', type="float", dest="frequency", default=0.0, help='Minimum clade frequency to report.') parser.add_option("-o", "--output", action="store", dest="filename", help="save clades to FILE", metavar="FILE") parser.add_option('-s', '--sort', action="store_true", dest="sort", default=False) def __init__(self, frequency=0.0, ages=False, sort=False, filename=None): self.frequency = frequency self.ages = ages self.sort = sort self.filename = filename self.trees = [] self.cp = phyltr.utils.cladeprob.CladeProbabilities() @classmethod def init_from_opts(cls, options, files): support = Support(options.frequency, options.age, options.sort, options.filename) return support def process_tree(self, t): self.trees.append(t) self.cp.add_tree(t) return None def postprocess(self): self.cp.compute_probabilities() # Save clade probabilities if self.filename: self.cp.save_clade_report(self.filename, self.frequency, self.ages) # Annotate trees for t in self.trees: self.cp.annotate_tree(t) # Sort if self.sort: trees = [(self.cp.get_tree_prob(t),t) for t in self.trees] trees.sort() trees.reverse() self.trees = [t for (p,t) in trees] # Output for t in self.trees: yield t
class Subtree(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr subtree") parser.add_option('-a', '--attribute', default=None) parser.add_option('-f', '--file', dest="filename", help='Specifies a file from which to read taxa') parser.add_option('-v', '--value', default=None) def __init__(self, taxa=None, filename=None, attribute=None, value=None): self.attribute = attribute self.filename = filename self.value = value self.by_attribute = False if taxa: self.taxa = taxa elif filename: with open(self.filename, "r") as fp: self.taxa = [t.strip() for t in fp.readlines()] if not self.taxa: raise ValueError("Empty file!") elif self.attribute and self.value: self.taxa = [] else: raise ValueError("Incompatible arguments") @classmethod def init_from_opts(cls, options, files): if files: taxa = set(files[0].split(",")) files = files[1:] else: taxa = [] subtree = Subtree(taxa, options.filename, options.attribute, options.value) return subtree def process_tree(self, t): if self.taxa: leaves = [l for l in t.get_leaves() if l.name in self.taxa] mrca = leaves[0].get_common_ancestor(leaves[1:]) t = mrca else: taxa = [l for l in t.get_leaves() if hasattr(l,self.attribute) and getattr(l,self.attribute) == self.value] mrca = taxa[0].get_common_ancestor(taxa[1:]) t = mrca return t
class Pretty(PhyltrCommand): sink = StringFormatter parser = OptionParser(__doc__, prog="phyltr pretty") parser.add_option('-c', '--compress', action="store_true", dest="compress", default=False) parser.add_option('-l', '--label', default="name") def __init__(self, label="name", compress=False): self.label = label self.compress = compress @classmethod def init_from_opts(cls, options, files): pretty = Pretty(label=options.label, compress=options.compress) return pretty def process_tree(self, t): # Change node names to get the desired appearance for node in t.traverse(): # Replace leaf node names with requested attribute if node.is_leaf() and hasattr(node, self.label): node.name = getattr(node, self.label) # Add support to interior nodes else: node.name = "%.2f" % node.support # Collapse high probability clades if self.compress: dead_nodes = [] for node in t.traverse("preorder"): if node in dead_nodes or node.is_leaf(): continue desc = node.get_descendants() desc.append(node) if all([n.support >= 0.9 for n in desc]): dead_nodes.extend(desc) node.name = "(%.2f) %s" % (node.support, "+".join( sorted([l.name for l in node.get_leaves()]))) for child in node.get_children(): child.detach() return t.get_ascii()
class Uniq(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr uniq") parser.add_option('-l', '--lengths', action="store", dest="lengths", default="mean") def __init__(self, lengths="mean"): self.lengths = lengths self.topologies = {} @classmethod def init_from_opts(cls, options, files): uniq = Uniq(options.lengths) return uniq def process_tree(self, t): # Compare this tree to all topology exemplars. If we find a match, # add it to the record and move on to the next tree. for exemplar in self.topologies: if are_same_topology(t, exemplar): self.topologies[exemplar].append(t) break else: self.topologies[t] = [t] return None def postprocess(self): for equ_class in self.topologies.values(): for nodes in itertools.izip(*[t.traverse() for t in equ_class]): dists = [n.dist for n in nodes] if self.lengths == "max": dist = max(dists) elif self.lengths == "mean": dist = sum(dists) / len(dists) elif self.lengths == "median": dists.sort() l = len(dists) if l % 2 == 0: dist = 0.5*(dists[l//2]+dists[l//2-1]) else: dist = dists[l//2] elif self.lengths == "min": dist = min(dists) nodes[0].dist = dist yield equ_class[0]
class Taxa(PhyltrCommand): sink = ListPerLineFormatter parser = OptionParser(__doc__, prog="phyltr taxa") def __init__(self): self.done = False @classmethod def init_from_opts(cls, options, files): taxa = Taxa() return taxa def process_tree(self, t): if self.done: raise StopIteration else: names = [n.name for n in t.traverse() if n.name] self.done = True return sorted(names)
class Scale(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr scale") parser.add_option('-s', '--scale', type="float", default=1.0, help='Scaling factor.') def __init__(self, scalefactor=1.0): self.scalefactor = scalefactor @classmethod def init_from_opts(cls, options, files): scale = Scale(options.scale) return scale def process_tree(self, t): for node in t.traverse(): node.dist *= self.scalefactor return t
class Consensus(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr consensus") parser.add_option('-f', '--frequency', type="float",dest="frequency", default=0.5, help="Minimum clade support to include in tree.") def __init__(self, frequency=0.5): self.frequency = frequency self.cp = phyltr.utils.cladeprob.CladeProbabilities() @classmethod def init_from_opts(cls, options, files=[]): consensus = Consensus(options.frequency) return consensus def process_tree(self, t): self.cp.add_tree(t) def postprocess(self): self.cp.compute_probabilities() # Build consensus tree t = self.build_consensus_tree() yield t def build_consensus_tree(self): # Build a list of all clades in the treestream with frequency above the # requested threshold, sorted first by size and then by frequency. Do not # include the trivial clade of all leaves. clades = [] for clade, p in self.cp.clade_probs.items(): if p >= self.frequency: clade = clade.split(",") clades.append((len(clade), p, set(clade))) clades.sort() # Pop the clade with highest probability, which *should* be the clade # with support 1.0 containing all leaves taxon_count, prob, all_leaves = clades.pop() assert prob == 1.0 assert all((taxon_count > count for count, p, clade in clades)) clades.reverse() # Start out with a tree in which all leaves are joined in one big polytomy t = ete3.Tree() for l in all_leaves: t.add_child(name=l) # Now recursively resolve the polytomy by greedily grouping clades t = recursive_builder(t, clades) cache = t.get_cached_content() # Add age annotations for clade in t.traverse("postorder"): clade_key = ",".join(sorted([l.name for l in cache[clade]])) if not clade.is_leaf(): # all leaves have age zero, so don't bother ages = self.cp.clade_ages[clade_key] mean = sum(ages)/len(ages) for c in clade.get_children(): leaf, age = c.get_farthest_leaf() c.dist = mean - age ages.sort() lower, median, upper = [ages[int(x*len(ages))] for x in (0.05,0.5,0.95)] clade.add_feature("age_mean", mean) clade.add_feature("age_median", median) clade.add_feature("age_HPD", "{%f-%f}" % (lower,upper)) for f in self.cp.clade_attributes: values = self.cp.clade_attributes[f][clade_key] mean = sum(values)/len(values) values.sort() lower, median, upper = [values[int(x*len(values))] for x in (0.025,0.5,0.975)] clade.add_feature("%s_mean" % f, mean) clade.add_feature("%s_median" % f, median) clade.add_feature("%s_HPD" % f, "{%f-%f}" % (lower,upper)) return t
class Plot(PhyltrCommand): sink = NullSink parser = OptionParser(__doc__, prog="phyltr plot") parser.add_option('-a', '--attribute', dest="attribute", default=None) parser.add_option('-d', '--dpi', type="int", default=None) parser.add_option('-H', '--height', type="int", dest="height", default=None) parser.add_option('-l', '--label', default="name") parser.add_option('-m', '--multiple', default=False, action="store_true") parser.add_option('-o', '--output', default=None) parser.add_option('-u', '--units', default="px") parser.add_option('-w', '--width', type="int", dest="width", default=None) def __init__(self, label="name", attribute=None, output=None, multiple=False, width=None, height=None, units="px", dpi=300, dummy=False): self.label = label self.attribute = attribute self.output = output self.multiple = multiple self.width = width self.height = height self.units = units self.dpi = dpi self.n = 0 self.dummy = dummy if not self.dummy: # Setup TreeStyle self.ts = ete3.TreeStyle() self.ts.show_scale = False self.ts.show_branch_support = True @classmethod def init_from_opts(cls, options, files): plot = Plot(options.label, options.attribute, options.output, options.multiple, options.width, options.height, options.units, options.dpi) return plot def process_tree(self, t): # Add faces if self.attribute: values = set([getattr(l, self.attribute) for l in t.get_leaves()]) colours = get_colour_set(len(values)) colour_map = dict(zip(values, colours)) for l in t.iter_leaves(): mycolour = colour_map[getattr(l, self.attribute)] if not self.dummy: l.add_face( ete3.CircleFace(radius=10, color=mycolour, style="sphere"), 0) # Apply labels if not self.dummy: for l in t.iter_leaves(): l.add_face(ete3.TextFace(getattr(l, self.label)), 1) # Plot or save if self.output: kw = {} if self.height or self.width: kw["h"] = self.height kw["w"] = self.width kw["units"] = self.units kw["dpi"] = self.dpi if self.multiple: base, ext = os.path.splitext(self.output) filename = base + ("_%06d" % (self.n + 1)) + ext else: filename = self.output if not self.dummy: t.render(filename, ultrametric, tree_style=self.ts, **kw) else: # pragma: no cover if not self.dummy: t.show(ultrametric, tree_style=self.ts) self.n += 1 if self.multiple: return None else: raise StopIteration
class Cat(PhyltrCommand): PhyltrCommand.source = ComplexNewickParser parser = OptionParser(__doc__, prog="phyltr cat") parser.add_option('-b', '--burnin', action="store", dest="burnin", type="int", default=0) parser.add_option('-s', '--subsample', action="store", dest="subsample", type="int", default=1) parser.add_option('--no-annotations', action="store_true", dest="no_annotations", default=False) def __init__(self, burnin=0, subsample=1, annotations=True): self.burnin = burnin self.subsample = subsample self.annotations = annotations self.trees = [] self.n = 0 @classmethod def init_from_opts(cls, options, files=[]): cat = Cat(options.burnin, options.subsample, not options.no_annotations) return cat def process_tree(self, t): if self.burnin: # If we're discarding a fixed percentage as burn-in, we need to # know the total number of trees. So for now, just dump 'em in # a list, consume ALL the memory... self.trees.append(t) return None else: # Otherwise, we can subsample as we go if self.n % self.subsample == 0: self.n += 1 return t else: self.n += 1 # Would be nice to avoid duplicating this return None def postprocess(self): if self.burnin: # If there's a burn-in, we now have all trees sitting in a list, # so dump 'em all now burnin = int(round((self.burnin / 100.0) * len(self.trees))) self.trees = self.trees[burnin::self.subsample] for t in self.trees: yield t else: # If there's no burn-in, we've already done everything raise StopIteration
class Collapse(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr collapse") parser.add_option('-a', '--attribute', dest="attribute", default=None) parser.add_option('-t', '--translate', help='Specifies the translation file.', default=None) @classmethod def init_from_opts(cls, options, files): collapse = Collapse({}, options.translate, options.attribute) return collapse def __init__(self, clades={}, filename=None, attribute=None): if clades: self.trans = clades # trans = translation elif filename: self.filename = filename self.read_clade_file(self.filename) elif attribute: self.attribute = attribute self.trans = {} else: raise ValueError( "Must provide a dictionary of clades, a filename or an attribute." ) def process_tree(self, t): if self.trans: self.collapse_by_dict(t) else: self.collapse_by_attribute(t) return t def read_clade_file(self, filename): """Read a file of names and clade definitions and return a dictionary of this data.""" self.trans = {} fp = open(filename, "r") for line in fp: name, clade = line.strip().split(":") clade = clade.strip().split(",") self.trans[name] = clade fp.close() def collapse_by_dict(self, t): cache = t.get_cached_content() tree_leaves = cache[t] for name, clade in self.trans.items(): # Get a list of leaves in this tree clade_leaves = [l for l in tree_leaves if l.name in clade] if not clade_leaves: continue try: self.test_monophyly_and_collapse(t, cache, name, clade_leaves) except MonophylyFailure: # Clade is not monophyletic. We can't collapse it. sys.stderr.write("Monophyly failure for clade: %s\n" % name) # sys.stderr.write("Interlopers: %s\n" % ",".join([n.name for n in set(mrca_leaves) - set(clade_leaves)])) return 1 def collapse_by_attribute(self, t): cache = t.get_cached_content() tree_leaves = cache[t] # Build a dictionary mapping attribute values to lists of leaves values = {} for leaf in tree_leaves: if not hasattr(leaf, self.attribute): continue value = getattr(leaf, self.attribute) if value not in values: values[value] = [ leaf, ] else: values[value].append(leaf) # Do monophyly tests for value, clade_leaves in values.items(): try: self.test_monophyly_and_collapse(t, cache, value, clade_leaves) except MonophylyFailure: # Clade is not monophyletic. We can't collapse it. sys.stderr.write( "Monophyly failure for attribute value: %s=%s\n" % (self.attribute, value)) def test_monophyly_and_collapse(self, t, cache, clade, clade_leaves): # Check monophyly if len(clade_leaves) == 1: mrca = clade_leaves[ 0] # .get_common_ancestor works oddly for singletons else: mrca = t.get_common_ancestor(clade_leaves) mrca_leaves = cache[mrca] if set(mrca_leaves) != set(clade_leaves): raise MonophylyFailure # Clade is monophyletic, so rename and prune # But don't mess up distances mrca.name = clade leaf, dist = mrca.get_farthest_leaf() mrca.dist += dist for child in mrca.get_children(): child.detach()
class PhyltrCommand: parser = OptionParser("Halp!") source = NewickParser sink = NewickFormatter @classmethod def init_from_opts(cls, options, files): raise NotImplementedError # pragma: no cover @classmethod def run_as_script(cls): # Parse the arguments. # If there's an error, let optparse kill the process in its usual # fashion, as we should only be in run_as_script if we're genuinely # running from an interactive shell. options, files = cls.parser.parse_args(exit_on_error=True) # Attempt to instantiate command object try: obj = cls.init_from_opts(options, files) except ValueError as e: # Bad arguments (e.g. incompatible or incomplete) sys.stderr.write(str(e)) return 1 obj.pre_print() raw_source = fileinput.input(files) in_trees = cls.source().consume(raw_source) out_trees = obj.consume(in_trees) cls.sink(sys.stdout).consume(out_trees) raw_source.close() obj.post_print() return 0 @classmethod def init_from_args(cls, string): args = shlex.split(string) # Parse the arguments. # If there is an error, do not kill the process! Rather, raise a # ValueError with some helpful message and let it bubble up to the # caller. options, files = cls.parser.parse_args(args, exit_on_error=False) obj = cls.init_from_opts(options, files) return obj def pre_print(self): pass # pragma: no cover def post_print(self): pass # pragma: no cover # The conceptual heart of phyltr... def consume(self, stream): for tree in stream: try: res = self.process_tree(tree) if res: yield res except StopIteration: stream.close() break for tree in self.postprocess(): yield tree def process_tree(self, t): return t # pragma: no cover def postprocess(self): return []
class Annotate(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr annotate") parser.add_option('-e', '--extract', default=False, action="store_true", help="Extract data from annotated tree to file.") parser.add_option('-f', '--file', dest="filename", help="File to read/write annotation data from/to.") parser.add_option('-k', '--key', dest="key", help="Name of column in annotation file to match against taxon names") parser.add_option('-m', '--multiple', default=False, action="store_true") def __init__(self, filename, key=None, extract=False, multiple=False): self.filename = filename self.key = key self.extract = extract self.multiple = multiple self.n = 0 if not self.extract: self.read_annotation_file() @classmethod def init_from_opts(cls, options, files=[]): annotate = Annotate(options.filename, options.key, options.extract, options.multiple) if annotate.extract and (annotate.filename == "-" or not annotate.filename): # If we're writing an extracted CSV to stdin, we don't want to also # serialise the trees, so plumb to null cls.sink = NullSink return annotate def process_tree(self, t): if self.extract: # Break out of consume if we've done one if not self.multiple: if self.n > 0: raise StopIteration self.extract_annotations(t) else: self.annotate_tree(t) self.n += 1 return t def read_annotation_file(self): self.annotations = {} fp = open(self.filename, "r") dialect = csv.Sniffer().sniff(fp.read(1024)) fp.seek(0) dr = csv.DictReader(fp, dialect=dialect) assert self.key in dr.fieldnames for row in dr: this_key = row.pop(self.key) self.annotations[this_key] = row fp.close() def annotate_tree(self, t): for node in t.traverse(): if node.name in self.annotations: for key, value in self.annotations[node.name].items(): node.add_feature(key, value) def extract_annotations(self, t): if self.filename == "-" or not self.filename: fp = sys.stdout # pragma: no cover else: if self.n > 0: fp = open(self.filename, "a") else: fp = open(self.filename, "w") features = [] for node in t.traverse(): for f in node.features: if f not in ["dist", "support", "name"] and f not in features: features.append(f) features.sort() fieldnames = ["name"] if self.multiple: fieldnames.append("tree_number") fieldnames.extend(features) writer = csv.DictWriter(fp, fieldnames=fieldnames) if self.n == 0: writer.writeheader() for node in t.traverse(): # Only include the root node or nodes with names if not node.name and node.up: continue if any([hasattr(node,f) for f in features]): if not node.name: # Temporarily give the node a name node.name = "root" fix_root_name = True else: fix_root_name = False rowdict = {f:getattr(node, f, "?") for f in fieldnames} if self.multiple: rowdict["tree_number"] = self.n writer.writerow(rowdict) if fix_root_name: node.name = None if self.filename and self.filename != "-": fp.close()
class Rename(PhyltrCommand): parser = OptionParser(__doc__, prog="phyltr rename") parser.add_option('-f', '--file', dest="filename", help='Specifies the translation file.') parser.add_option('-r', '--remove-missing', dest="remove", action="store_true", default=False, help='Remove untranslated taxa.') def __init__(self, rename=None, filename=None, remove=False): if rename: self.rename = rename elif filename: self.read_rename_file(filename) else: raise ValueError("Must supply renaming dictionary or filename!") self.remove = remove self.first = True @classmethod def init_from_opts(cls, options, files): rename = Rename(filename=options.filename, remove=options.remove) return rename def read_rename_file(self, filename): """Read a file of names and their desired replacements and return a dictionary of this data.""" rename = {} with open(filename, "r") as fp: for line in fp: old, new = line.strip().split(":") rename[old.strip()] = new.strip() fp.close() self.rename = rename def process_tree(self, t): # Rename nodes for node in t.traverse(): node.name = self.rename.get( node.name, "KILL-THIS-NODE" if self.remove else node.name) keepers = [l for l in t.get_leaves() if l.name != "KILL-THIS-NODE"] if self.first: n_leaves = len(t.get_leaves()) self.pruning_needed = len(keepers) < n_leaves self.first = False if self.pruning_needed: mrca = t.get_common_ancestor(keepers) if t != mrca: t = mrca t.prune(keepers, preserve_branch_length=True) return t