Example #1
0
class Clades(PhyltrCommand):

    sink = StringFormatter

    parser = OptionParser(__doc__, prog="phyltr clades")
    parser.add_option('-a',
                      '--ages',
                      action="store_true",
                      dest="age",
                      default=False,
                      help="Include age information in report.")
    parser.add_option('-f',
                      '--frequency',
                      type="float",
                      dest="frequency",
                      default=0.0,
                      help='Minimum clade frequency to report.')

    def __init__(self, frequency=0.0, ages=False):
        self.frequency = frequency
        self.ages = ages
        self.cp = phyltr.utils.cladeprob.CladeProbabilities()

    @classmethod
    def init_from_opts(cls, options, files):
        clades = Clades(options.frequency, options.age)
        return clades

    def process_tree(self, t):
        self.cp.add_tree(t)

    def postprocess(self):
        self.cp.compute_probabilities()
        self.cp.save_clade_report("/dev/stdout", self.frequency, self.ages)
        return []
Example #2
0
class Dedupe(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr dedupe")

    @classmethod
    def init_from_opts(cls, options, files):
        dedupe = Dedupe()
        return dedupe

    def process_tree(self, t):
        leaf_names = [l.name for l in t.get_leaves() if l.name]
        dupes = set([n for n in leaf_names if leaf_names.count(n) > 1])
        if not dupes:
            return t
        # Remove dupes one at a time
        victims = []
        for dupe in dupes:
            dupe_taxa = t.get_leaves_by_name(dupe)
            assert all([d.is_leaf() for d in dupe_taxa])
            # First try to collapse monophyletic dupes
            is_mono, junk, trash = t.check_monophyly([dupe], "name")
            if is_mono:
                mrca = t.get_common_ancestor(dupe_taxa)
                mrca.name = dupe
                for child in mrca.get_children():
                    child.detach()
            # If the dupe is non-monophyletic, kill at random
            else:
                victims.extend(random.sample(dupe_taxa, len(dupe_taxa) - 1))
        if victims:
            t.prune([l for l in t.get_leaves() if l not in victims],
                    preserve_branch_length=True)
#                for v in victims:
#                    v.detach()
        return t
Example #3
0
class Stat(PhyltrCommand):

    sink = NullSink

    parser = OptionParser(__doc__, prog="phyltr stat")

    def __init__(self):

        self.tree_count = 0
        self.taxa_count = 0
        self.ultrametric = True
        self.topologically_unique_trees = []
        self.tree_ages = []
        self.firsttree = True

    @classmethod
    def init_from_opts(cls, options, files):
        stat = Stat()
        return stat

    def process_tree(self, t):
        # Stuff we do to every tree...
        self.tree_count += 1
        leaves = t.get_leaves()
        leave_ages = [t.get_distance(l) for l in leaves]
        self.tree_ages.append(t.get_farthest_leaf()[1])
        if abs(max(leave_ages) - min(leave_ages)) > max(leave_ages) / 1000.0:
            self.ultrametric = False
        # Stuff we only do to the first tree...
        if self.firsttree:
            self.firsttree = False
            self.taxa_count = len(leaves)
            self.topologically_unique_trees.append(t)
        # Stuff we only do to trees *other* than the first...
        else:
            for u in self.topologically_unique_trees:
                if are_same_topology(t, u):
                    break
            else:
                self.topologically_unique_trees.append(t)
        return t

    def postprocess(self):
        self.topology_count = len(self.topologically_unique_trees)
        self.min_tree_height = min(self.tree_ages)
        self.max_tree_height = max(self.tree_ages)
        self.mean_tree_height = sum(self.tree_ages) / self.tree_count
        return []

    def post_print(self):

        print("Total taxa: %d" % self.taxa_count)
        print("Total trees: %d" % self.tree_count)
        print("Unique topologies: %d" % self.topology_count)
        print("Are trees ultrametric? %s" % str(self.ultrametric))
        print("Mean tree height: %f" % self.mean_tree_height)
        print("Min tree height: %f" % self.min_tree_height)
        print("Max tree height: %f" % self.max_tree_height)
Example #4
0
class Prune(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr prune")
    parser.add_option('-a', '--attribute', default=None)
    parser.add_option('-f', '--file', dest="filename",
            help='Specifies a file from which to read taxa')
    parser.add_option('-i', '--inverse', action="store_true", default=False, dest="inverse")
    parser.add_option('-v', '--value', default=None)

    def __init__(self, taxa=None, filename=None, attribute=None, value=None, inverse=False):
        self.attribute = attribute
        self.filename = filename
        self.inverse = inverse
        self.value = value

        self.by_attribute = False

        if taxa:
            self.taxa = taxa
        elif filename:
            with open(self.filename, "r") as fp:
                self.taxa = set([t.strip() for t in fp.readlines()])
            if not self.taxa:
                raise ValueError("Empty file!")
        elif self.attribute and self.value:
            self.taxa = []
        else:
            raise ValueError("Incompatible arguments")

    @classmethod 
    def init_from_opts(cls, options, files=[]):
        if files:
            taxa = set(files[0].split(","))
            files = files[1:]
        else:
            taxa = []

        prune = cls(taxa, options.filename, options.attribute, options.value, options.inverse)
        return prune

    def process_tree(self, t):
        if self.taxa:
            # Pruning by a list of taxa
            if self.inverse:
                pruning_taxa = [l for l in t.get_leaves() if l.name in self.taxa]
            else:
                pruning_taxa = [l for l in t.get_leaves() if l.name not in self.taxa]
        else:
            # Pruning by an attribute value
            if self.inverse:
                pruning_taxa = [l for l in t.get_leaves() if hasattr(l,self.attribute) and getattr(l,self.attribute) == self.value]
            else:
                pruning_taxa = [l for l in t.get_leaves() if hasattr(l,self.attribute) and getattr(l,self.attribute) != self.value]
        # Do the deed
        t.prune(pruning_taxa, preserve_branch_length=True)
        return t
Example #5
0
class Length(PhyltrCommand):

    sink = StringFormatter

    parser = OptionParser(__doc__, prog="phyltr length")

    @classmethod
    def init_from_opts(cls, options, files):
        length = Length()
        return length

    def process_tree(self, t):
        return sum([n.dist for n in t.traverse()])
Example #6
0
class Height(PhyltrCommand):

    sink = StringFormatter

    parser = OptionParser(__doc__, prog="phyltr height")

    @classmethod 
    def init_from_opts(cls, options, files):
        height = Height()
        return height

    def process_tree(self, t):
        return t.get_farthest_leaf()[1]
Example #7
0
class Support(PhyltrCommand):
   
    parser = OptionParser(__doc__, prog="phyltr support")
    parser.add_option('-a', '--age', action="store_true", dest="age", default=False, help="Include age information in report.")
    parser.add_option('-f', '--frequency', type="float", dest="frequency",
            default=0.0, help='Minimum clade frequency to report.')
    parser.add_option("-o", "--output", action="store", dest="filename",
        help="save clades to FILE", metavar="FILE")
    parser.add_option('-s', '--sort', action="store_true", dest="sort", default=False)

    def __init__(self, frequency=0.0, ages=False, sort=False, filename=None):
        self.frequency = frequency
        self.ages = ages
        self.sort = sort
        self.filename = filename
        self.trees = []
        self.cp = phyltr.utils.cladeprob.CladeProbabilities()

    @classmethod 
    def init_from_opts(cls, options, files):
        support = Support(options.frequency, options.age, options.sort, options.filename)
        return support

    def process_tree(self, t):
        self.trees.append(t)
        self.cp.add_tree(t)
        return None

    def postprocess(self):
        self.cp.compute_probabilities()

        # Save clade probabilities
        if self.filename:
            self.cp.save_clade_report(self.filename, self.frequency, self.ages)

        # Annotate trees
        for t in self.trees:
            self.cp.annotate_tree(t)

        # Sort
        if self.sort:
            trees = [(self.cp.get_tree_prob(t),t) for t in self.trees]
            trees.sort()
            trees.reverse()
            self.trees = [t for (p,t) in trees]

        # Output
        for t in self.trees:
            yield t
Example #8
0
class Subtree(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr subtree")
    parser.add_option('-a', '--attribute', default=None)
    parser.add_option('-f', '--file', dest="filename",
            help='Specifies a file from which to read taxa')
    parser.add_option('-v', '--value', default=None)

    def __init__(self, taxa=None, filename=None, attribute=None, value=None):
        self.attribute = attribute
        self.filename = filename
        self.value = value

        self.by_attribute = False

        if taxa:
            self.taxa = taxa
        elif filename:
            with open(self.filename, "r") as fp:
                self.taxa = [t.strip() for t in fp.readlines()]
            if not self.taxa:
                raise ValueError("Empty file!")
        elif self.attribute and self.value:
            self.taxa = []
        else:
            raise ValueError("Incompatible arguments")

    @classmethod 
    def init_from_opts(cls, options, files):
        if files:
            taxa = set(files[0].split(","))
            files = files[1:]
        else:
            taxa = []
        subtree = Subtree(taxa, options.filename, options.attribute, options.value)
        return subtree

    def process_tree(self, t):
        if self.taxa:
            leaves = [l for l in t.get_leaves() if l.name in self.taxa]
            mrca = leaves[0].get_common_ancestor(leaves[1:])
            t = mrca
        else:
            taxa = [l for l in t.get_leaves() if hasattr(l,self.attribute) and getattr(l,self.attribute) == self.value]
            mrca = taxa[0].get_common_ancestor(taxa[1:])
            t = mrca
        return t
Example #9
0
class Pretty(PhyltrCommand):

    sink = StringFormatter

    parser = OptionParser(__doc__, prog="phyltr pretty")
    parser.add_option('-c',
                      '--compress',
                      action="store_true",
                      dest="compress",
                      default=False)
    parser.add_option('-l', '--label', default="name")

    def __init__(self, label="name", compress=False):
        self.label = label
        self.compress = compress

    @classmethod
    def init_from_opts(cls, options, files):
        pretty = Pretty(label=options.label, compress=options.compress)
        return pretty

    def process_tree(self, t):
        # Change node names to get the desired appearance
        for node in t.traverse():
            # Replace leaf node names with requested attribute
            if node.is_leaf() and hasattr(node, self.label):
                node.name = getattr(node, self.label)
            # Add support to interior nodes
            else:
                node.name = "%.2f" % node.support

        # Collapse high probability clades
        if self.compress:
            dead_nodes = []
            for node in t.traverse("preorder"):
                if node in dead_nodes or node.is_leaf():
                    continue
                desc = node.get_descendants()
                desc.append(node)
                if all([n.support >= 0.9 for n in desc]):
                    dead_nodes.extend(desc)
                    node.name = "(%.2f) %s" % (node.support, "+".join(
                        sorted([l.name for l in node.get_leaves()])))
                    for child in node.get_children():
                        child.detach()

        return t.get_ascii()
Example #10
0
class Uniq(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr uniq")
    parser.add_option('-l', '--lengths', action="store", dest="lengths", default="mean")

    def __init__(self, lengths="mean"):
        self.lengths = lengths

        self.topologies = {}

    @classmethod 
    def init_from_opts(cls, options, files):
        uniq = Uniq(options.lengths)
        return uniq

    def process_tree(self, t):
        # Compare this tree to all topology exemplars.  If we find a match,
        # add it to the record and move on to the next tree.
        for exemplar in self.topologies:
            if are_same_topology(t, exemplar):
                self.topologies[exemplar].append(t)
                break
        else:
            self.topologies[t] = [t]

        return None
       
    def postprocess(self):
        for equ_class in self.topologies.values():
            for nodes in itertools.izip(*[t.traverse() for t in equ_class]):
                dists = [n.dist for n in nodes]
                if self.lengths == "max":
                    dist = max(dists)
                elif self.lengths == "mean":
                    dist = sum(dists) / len(dists)
                elif self.lengths == "median":
                    dists.sort()
                    l = len(dists)
                    if l % 2 == 0:
                        dist = 0.5*(dists[l//2]+dists[l//2-1])
                    else:
                        dist = dists[l//2]
                elif self.lengths == "min":
                    dist = min(dists)
                nodes[0].dist = dist
            yield equ_class[0]
Example #11
0
class Taxa(PhyltrCommand):

    sink = ListPerLineFormatter

    parser = OptionParser(__doc__, prog="phyltr taxa")

    def __init__(self):
        self.done = False

    @classmethod
    def init_from_opts(cls, options, files):
        taxa = Taxa()
        return taxa

    def process_tree(self, t):
        if self.done:
            raise StopIteration
        else:
            names = [n.name for n in t.traverse() if n.name]
            self.done = True
            return sorted(names)
Example #12
0
class Scale(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr scale")
    parser.add_option('-s',
                      '--scale',
                      type="float",
                      default=1.0,
                      help='Scaling factor.')

    def __init__(self, scalefactor=1.0):
        self.scalefactor = scalefactor

    @classmethod
    def init_from_opts(cls, options, files):
        scale = Scale(options.scale)
        return scale

    def process_tree(self, t):
        for node in t.traverse():
            node.dist *= self.scalefactor
        return t
Example #13
0
class Consensus(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr consensus")
    parser.add_option('-f', '--frequency', type="float",dest="frequency", default=0.5, help="Minimum clade support to include in tree.")

    def __init__(self, frequency=0.5):
        self.frequency = frequency
        self.cp = phyltr.utils.cladeprob.CladeProbabilities()

    @classmethod 
    def init_from_opts(cls, options, files=[]):
        consensus = Consensus(options.frequency)
        return consensus

    def process_tree(self, t):
        self.cp.add_tree(t)

    def postprocess(self):
        self.cp.compute_probabilities()
        # Build consensus tree
        t = self.build_consensus_tree()
        yield t

    def build_consensus_tree(self):

        # Build a list of all clades in the treestream with frequency above the
        # requested threshold, sorted first by size and then by frequency.  Do not
        # include the trivial clade of all leaves.
        clades = []
        for clade, p in self.cp.clade_probs.items():
            if p >= self.frequency:
                clade = clade.split(",")
                clades.append((len(clade), p, set(clade)))
        clades.sort()
        # Pop the clade with highest probability, which *should* be the clade
        # with support 1.0 containing all leaves
        taxon_count, prob, all_leaves = clades.pop()
        assert prob == 1.0
        assert all((taxon_count > count for count, p, clade in clades))
        clades.reverse()

        # Start out with a tree in which all leaves are joined in one big polytomy
        t = ete3.Tree()
        for l in all_leaves:
            t.add_child(name=l)

        # Now recursively resolve the polytomy by greedily grouping clades
        t = recursive_builder(t, clades)
        cache = t.get_cached_content()

        # Add age annotations
        for clade in t.traverse("postorder"):
            clade_key = ",".join(sorted([l.name for l in cache[clade]]))
            if not clade.is_leaf(): # all leaves have age zero, so don't bother
                ages = self.cp.clade_ages[clade_key]
                mean = sum(ages)/len(ages)
                for c in clade.get_children():
                    leaf, age = c.get_farthest_leaf()
                    c.dist = mean - age
                ages.sort()
                lower, median, upper = [ages[int(x*len(ages))] for x in (0.05,0.5,0.95)]
                clade.add_feature("age_mean", mean)
                clade.add_feature("age_median", median)
                clade.add_feature("age_HPD", "{%f-%f}" % (lower,upper))

            for f in self.cp.clade_attributes:
                values = self.cp.clade_attributes[f][clade_key]
                mean = sum(values)/len(values)
                values.sort()
                lower, median, upper = [values[int(x*len(values))] for x in (0.025,0.5,0.975)]
                clade.add_feature("%s_mean" % f, mean)
                clade.add_feature("%s_median" % f, median)
                clade.add_feature("%s_HPD" % f, "{%f-%f}" % (lower,upper))
        return t
Example #14
0
class Plot(PhyltrCommand):

    sink = NullSink

    parser = OptionParser(__doc__, prog="phyltr plot")
    parser.add_option('-a', '--attribute', dest="attribute", default=None)
    parser.add_option('-d', '--dpi', type="int", default=None)
    parser.add_option('-H',
                      '--height',
                      type="int",
                      dest="height",
                      default=None)
    parser.add_option('-l', '--label', default="name")
    parser.add_option('-m', '--multiple', default=False, action="store_true")
    parser.add_option('-o', '--output', default=None)
    parser.add_option('-u', '--units', default="px")
    parser.add_option('-w', '--width', type="int", dest="width", default=None)

    def __init__(self,
                 label="name",
                 attribute=None,
                 output=None,
                 multiple=False,
                 width=None,
                 height=None,
                 units="px",
                 dpi=300,
                 dummy=False):

        self.label = label
        self.attribute = attribute
        self.output = output
        self.multiple = multiple
        self.width = width
        self.height = height
        self.units = units
        self.dpi = dpi
        self.n = 0

        self.dummy = dummy

        if not self.dummy:
            # Setup TreeStyle
            self.ts = ete3.TreeStyle()
            self.ts.show_scale = False
            self.ts.show_branch_support = True

    @classmethod
    def init_from_opts(cls, options, files):
        plot = Plot(options.label, options.attribute, options.output,
                    options.multiple, options.width, options.height,
                    options.units, options.dpi)
        return plot

    def process_tree(self, t):

        # Add faces
        if self.attribute:
            values = set([getattr(l, self.attribute) for l in t.get_leaves()])
            colours = get_colour_set(len(values))
            colour_map = dict(zip(values, colours))
            for l in t.iter_leaves():
                mycolour = colour_map[getattr(l, self.attribute)]
                if not self.dummy:
                    l.add_face(
                        ete3.CircleFace(radius=10,
                                        color=mycolour,
                                        style="sphere"), 0)

        # Apply labels
        if not self.dummy:
            for l in t.iter_leaves():
                l.add_face(ete3.TextFace(getattr(l, self.label)), 1)

        # Plot or save
        if self.output:
            kw = {}
            if self.height or self.width:
                kw["h"] = self.height
                kw["w"] = self.width
                kw["units"] = self.units
                kw["dpi"] = self.dpi
            if self.multiple:
                base, ext = os.path.splitext(self.output)
                filename = base + ("_%06d" % (self.n + 1)) + ext
            else:
                filename = self.output
            if not self.dummy:
                t.render(filename, ultrametric, tree_style=self.ts, **kw)
        else:  # pragma: no cover
            if not self.dummy:
                t.show(ultrametric, tree_style=self.ts)

        self.n += 1

        if self.multiple:
            return None
        else:
            raise StopIteration
Example #15
0
class Cat(PhyltrCommand):

    PhyltrCommand.source = ComplexNewickParser

    parser = OptionParser(__doc__, prog="phyltr cat")
    parser.add_option('-b',
                      '--burnin',
                      action="store",
                      dest="burnin",
                      type="int",
                      default=0)
    parser.add_option('-s',
                      '--subsample',
                      action="store",
                      dest="subsample",
                      type="int",
                      default=1)
    parser.add_option('--no-annotations',
                      action="store_true",
                      dest="no_annotations",
                      default=False)

    def __init__(self, burnin=0, subsample=1, annotations=True):
        self.burnin = burnin
        self.subsample = subsample
        self.annotations = annotations
        self.trees = []
        self.n = 0

    @classmethod
    def init_from_opts(cls, options, files=[]):
        cat = Cat(options.burnin, options.subsample,
                  not options.no_annotations)
        return cat

    def process_tree(self, t):
        if self.burnin:
            # If we're discarding a fixed percentage as burn-in, we need to
            # know the total number of trees.  So for now, just dump 'em in
            # a list, consume ALL the memory...
            self.trees.append(t)
            return None
        else:
            # Otherwise, we can subsample as we go
            if self.n % self.subsample == 0:
                self.n += 1
                return t
            else:
                self.n += 1  # Would be nice to avoid duplicating this
                return None

    def postprocess(self):
        if self.burnin:
            # If there's a burn-in, we now have all trees sitting in a list,
            # so dump 'em all now
            burnin = int(round((self.burnin / 100.0) * len(self.trees)))
            self.trees = self.trees[burnin::self.subsample]
            for t in self.trees:
                yield t
        else:
            # If there's no burn-in, we've already done everything
            raise StopIteration
Example #16
0
class Collapse(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr collapse")
    parser.add_option('-a', '--attribute', dest="attribute", default=None)
    parser.add_option('-t',
                      '--translate',
                      help='Specifies the translation file.',
                      default=None)

    @classmethod
    def init_from_opts(cls, options, files):
        collapse = Collapse({}, options.translate, options.attribute)
        return collapse

    def __init__(self, clades={}, filename=None, attribute=None):
        if clades:
            self.trans = clades  # trans = translation
        elif filename:
            self.filename = filename
            self.read_clade_file(self.filename)
        elif attribute:
            self.attribute = attribute
            self.trans = {}
        else:
            raise ValueError(
                "Must provide a dictionary of clades, a filename or an attribute."
            )

    def process_tree(self, t):
        if self.trans:
            self.collapse_by_dict(t)
        else:
            self.collapse_by_attribute(t)
        return t

    def read_clade_file(self, filename):
        """Read a file of names and clade definitions and return a dictionary of
        this data."""

        self.trans = {}
        fp = open(filename, "r")
        for line in fp:
            name, clade = line.strip().split(":")
            clade = clade.strip().split(",")
            self.trans[name] = clade
        fp.close()

    def collapse_by_dict(self, t):
        cache = t.get_cached_content()
        tree_leaves = cache[t]
        for name, clade in self.trans.items():
            # Get a list of leaves in this tree
            clade_leaves = [l for l in tree_leaves if l.name in clade]
            if not clade_leaves:
                continue
            try:
                self.test_monophyly_and_collapse(t, cache, name, clade_leaves)
            except MonophylyFailure:
                # Clade is not monophyletic.  We can't collapse it.
                sys.stderr.write("Monophyly failure for clade: %s\n" % name)
                #                sys.stderr.write("Interlopers: %s\n" % ",".join([n.name for n in set(mrca_leaves) - set(clade_leaves)]))
                return 1

    def collapse_by_attribute(self, t):
        cache = t.get_cached_content()
        tree_leaves = cache[t]
        # Build a dictionary mapping attribute values to lists of leaves
        values = {}
        for leaf in tree_leaves:
            if not hasattr(leaf, self.attribute):
                continue
            value = getattr(leaf, self.attribute)
            if value not in values:
                values[value] = [
                    leaf,
                ]
            else:
                values[value].append(leaf)
        # Do monophyly tests
        for value, clade_leaves in values.items():
            try:
                self.test_monophyly_and_collapse(t, cache, value, clade_leaves)
            except MonophylyFailure:
                # Clade is not monophyletic.  We can't collapse it.
                sys.stderr.write(
                    "Monophyly failure for attribute value: %s=%s\n" %
                    (self.attribute, value))

    def test_monophyly_and_collapse(self, t, cache, clade, clade_leaves):
        # Check monophyly
        if len(clade_leaves) == 1:
            mrca = clade_leaves[
                0]  # .get_common_ancestor works oddly for singletons
        else:
            mrca = t.get_common_ancestor(clade_leaves)
        mrca_leaves = cache[mrca]
        if set(mrca_leaves) != set(clade_leaves):
            raise MonophylyFailure

        # Clade is monophyletic, so rename and prune
        # But don't mess up distances
        mrca.name = clade
        leaf, dist = mrca.get_farthest_leaf()
        mrca.dist += dist
        for child in mrca.get_children():
            child.detach()
Example #17
0
class PhyltrCommand:

    parser = OptionParser("Halp!")
    source = NewickParser
    sink = NewickFormatter

    @classmethod
    def init_from_opts(cls, options, files):
        raise NotImplementedError  # pragma: no cover

    @classmethod
    def run_as_script(cls):
        # Parse the arguments.
        # If there's an error, let optparse kill the process in its usual
        # fashion, as we should only be in run_as_script if we're genuinely
        # running from an interactive shell.
        options, files = cls.parser.parse_args(exit_on_error=True)

        # Attempt to instantiate command object
        try:
            obj = cls.init_from_opts(options, files)
        except ValueError as e:
            # Bad arguments (e.g. incompatible or incomplete)
            sys.stderr.write(str(e))
            return 1

        obj.pre_print()

        raw_source = fileinput.input(files)
        in_trees = cls.source().consume(raw_source)
        out_trees = obj.consume(in_trees)
        cls.sink(sys.stdout).consume(out_trees)
        raw_source.close()

        obj.post_print()

        return 0

    @classmethod
    def init_from_args(cls, string):
        args = shlex.split(string)
        # Parse the arguments.
        # If there is an error, do not kill the process!  Rather, raise a
        # ValueError with some helpful message and let it bubble up to the
        # caller.
        options, files = cls.parser.parse_args(args, exit_on_error=False)
        obj = cls.init_from_opts(options, files)
        return obj

    def pre_print(self):
        pass  # pragma: no cover

    def post_print(self):
        pass  # pragma: no cover

    # The conceptual heart of phyltr...

    def consume(self, stream):
        for tree in stream:
            try:
                res = self.process_tree(tree)
                if res:
                    yield res
            except StopIteration:
                stream.close()
                break
        for tree in self.postprocess():
            yield tree

    def process_tree(self, t):
        return t  # pragma: no cover

    def postprocess(self):
        return []
Example #18
0
class Annotate(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr annotate")
    parser.add_option('-e', '--extract', default=False, action="store_true", help="Extract data from annotated tree to file.")
    parser.add_option('-f', '--file', dest="filename", help="File to read/write annotation data from/to.")
    parser.add_option('-k', '--key', dest="key", help="Name of column in annotation file to match against taxon names")
    parser.add_option('-m', '--multiple', default=False, action="store_true")

    def __init__(self, filename, key=None, extract=False, multiple=False):
        self.filename = filename
        self.key = key
        self.extract = extract
        self.multiple = multiple

        self.n = 0

        if not self.extract:
            self.read_annotation_file()

    @classmethod 
    def init_from_opts(cls, options, files=[]):
        annotate = Annotate(options.filename, options.key, options.extract, options.multiple)
        if annotate.extract and (annotate.filename == "-" or not annotate.filename):
            # If we're writing an extracted CSV to stdin, we don't want to also
            # serialise the trees, so plumb to null
            cls.sink = NullSink

        return annotate

    def process_tree(self, t):
        if self.extract:
            # Break out of consume if we've done one
            if not self.multiple:
                if self.n > 0:
                    raise StopIteration
            self.extract_annotations(t)
        else:
            self.annotate_tree(t)
        self.n += 1
        return t

    def read_annotation_file(self):
        self.annotations = {}
        fp = open(self.filename, "r")
        dialect = csv.Sniffer().sniff(fp.read(1024))
        fp.seek(0)
        dr = csv.DictReader(fp, dialect=dialect)
        assert self.key in dr.fieldnames
        for row in dr:
            this_key = row.pop(self.key)
            self.annotations[this_key] = row
        fp.close()

    def annotate_tree(self, t):
        for node in t.traverse():
            if node.name in self.annotations:
                for key, value in self.annotations[node.name].items():
                    node.add_feature(key, value)

    def extract_annotations(self, t):
        if self.filename == "-" or not self.filename:
            fp = sys.stdout # pragma: no cover
        else:
            if self.n > 0:
                fp = open(self.filename, "a")
            else:
                fp = open(self.filename, "w")
        features = []
        for node in t.traverse():
            for f in node.features:
                if f not in ["dist", "support", "name"] and f not in features:
                    features.append(f)
        features.sort()
        fieldnames = ["name"]
        if self.multiple:
            fieldnames.append("tree_number")
        fieldnames.extend(features)
        writer = csv.DictWriter(fp, fieldnames=fieldnames)
        if self.n == 0:
            writer.writeheader()
        for node in t.traverse():
            # Only include the root node or nodes with names
            if not node.name and node.up:
                continue
            if any([hasattr(node,f) for f in features]):
                if not node.name:
                    # Temporarily give the node a name
                    node.name = "root"
                    fix_root_name = True
                else:
                    fix_root_name = False
                rowdict = {f:getattr(node, f, "?") for f in fieldnames}
                if self.multiple:
                    rowdict["tree_number"] = self.n
                writer.writerow(rowdict)
                if fix_root_name:
                    node.name = None
        if self.filename and self.filename != "-":
            fp.close()
Example #19
0
class Rename(PhyltrCommand):

    parser = OptionParser(__doc__, prog="phyltr rename")
    parser.add_option('-f',
                      '--file',
                      dest="filename",
                      help='Specifies the translation file.')
    parser.add_option('-r',
                      '--remove-missing',
                      dest="remove",
                      action="store_true",
                      default=False,
                      help='Remove untranslated taxa.')

    def __init__(self, rename=None, filename=None, remove=False):
        if rename:
            self.rename = rename
        elif filename:
            self.read_rename_file(filename)
        else:
            raise ValueError("Must supply renaming dictionary or filename!")
        self.remove = remove

        self.first = True

    @classmethod
    def init_from_opts(cls, options, files):
        rename = Rename(filename=options.filename, remove=options.remove)
        return rename

    def read_rename_file(self, filename):
        """Read a file of names and their desired replacements and return a
        dictionary of this data."""

        rename = {}
        with open(filename, "r") as fp:
            for line in fp:
                old, new = line.strip().split(":")
                rename[old.strip()] = new.strip()
            fp.close()
        self.rename = rename

    def process_tree(self, t):
        # Rename nodes
        for node in t.traverse():
            node.name = self.rename.get(
                node.name, "KILL-THIS-NODE" if self.remove else node.name)

        keepers = [l for l in t.get_leaves() if l.name != "KILL-THIS-NODE"]
        if self.first:
            n_leaves = len(t.get_leaves())
            self.pruning_needed = len(keepers) < n_leaves
            self.first = False

        if self.pruning_needed:
            mrca = t.get_common_ancestor(keepers)
            if t != mrca:
                t = mrca
            t.prune(keepers, preserve_branch_length=True)

        return t