Example #1
0
def test_collapse():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))
    collapsed = Collapse({"left":("A","B","C"), "right":("D","E","F")}).consume(trees)
    # These groups are monophyletic in the first 5 of the 6 basic trees, so...
    for n, t in enumerate(collapsed):
        assert len(t.get_leaves()) == (2 if n < 5 else 6)
Example #2
0
def test_length():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    lengths = Length().consume(trees)
    for l in lengths:
        assert type(l) == float
        assert l >= 0.0
Example #3
0
def test_file_collapse():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))
    collapsed = Collapse(filename="tests/argfiles/collapse.txt").consume(trees)
    # These groups are monophyletic in the first 5 of the 6 basic trees, so...
    for n, t in enumerate(collapsed):
        assert len(t.get_leaves()) == (2 if n < 5 else 6)
Example #4
0
def test_height():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    heights = Height().consume(trees)
    for h in heights:
        assert type(h) == float
        assert h >= 0.0
Example #5
0
def test_uniq():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))
    uniq = Uniq().consume(trees)
    # The 6 basic trees comprise 5 unique topologies.
    # This is a pretty weak test, but...
    assert sum((1 for t in uniq)) == 5
Example #6
0
def test_file_prune():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    pruned = Prune(filename="tests/argfiles/taxa_abc.txt").consume(trees)
    for t in pruned:
        leaves = t.get_leaf_names()
        assert not any((x in leaves for x in ("A", "B", "C")))
Example #7
0
def test_file_subtree():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    subtrees = Subtree(filename="tests/argfiles/taxa_abc.txt").consume(trees)
    expected_taxa = (3, 3, 3, 3, 3, 6)
    for t, n in zip(subtrees, expected_taxa):
        assert len(t.get_leaves()) == n
Example #8
0
def test_prune():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    pruned = Prune(["A"]).consume(trees)
    for t in pruned:
        leaves = t.get_leaf_names()
        assert "A" not in leaves
        assert all((x in leaves for x in ("B", "C", "D", "E", "F")))
Example #9
0
def test_inverse_prune():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    pruned = Prune(["A", "B"], inverse=True).consume(trees)
    for t in pruned:
        leaves = t.get_leaf_names()
        assert all((x in leaves for x in ("A", "B")))
        assert not any((x in leaves for x in ("C", "D", "E", "F")))
Example #10
0
def test_subtree():
    subtree = Subtree.init_from_args("A,B,C")
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    subtrees = subtree.consume(trees)
    expected_taxa = (3, 3, 3, 3, 3, 6)
    for t, n in zip(subtrees, expected_taxa):
        assert len(t.get_leaves()) == n
Example #11
0
def test_identity():
    """Make sure scaling with a factor of 1.0 changes nothing."""

    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))
    unscaled_trees = Scale(1.0).consume(trees)
    for t1, t2 in zip(trees, unscaled_trees):
        assert t1.write() == t2.write()
Example #12
0
def test_annotation_prune():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    annotated = Annotate(filename="tests/argfiles/annotation.csv", key="taxon").consume(trees)
    pruned = Prune(attribute="f1", value="0").consume(annotated)
    for t in pruned:
        leaves = t.get_leaf_names()
        assert not any((x in leaves for x in ("A", "B", "C")))
Example #13
0
def test_clades():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    supported = Support(filename="/dev/null").consume(trees)
    for t in supported:
        for n in t.traverse():
            assert hasattr(n, "support")
            assert type(n.support) == float
            assert 0 <= n.support <= 1
Example #14
0
def test_plot(dummy=False):

    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    with tempfile.NamedTemporaryFile() as fp:
        plot = Plot(dummy=dummy, output=fp.name, height=600, width=800)
        for x in plot.consume(trees):
            pass
    lines.close()
Example #15
0
def test_categorical_annotation():
    # This is just to make sure the clade probability calculator doesnt't
    # erroneously try to calculate means etc. of categorical annotations
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    for t in build_pipeline(
            "annotate -f tests/argfiles/categorical_annotation.csv -k taxon | clades",
            trees):
        pass
Example #16
0
def test_annotation_subtree():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    subtrees = build_pipeline(
        "annotate -f tests/argfiles/annotation.csv -k taxon | subtree --attribute f1 --value 0",
        trees)
    expected_taxa = (3, 3, 3, 3, 3, 6)
    for t, n in zip(subtrees, expected_taxa):
        assert len(t.get_leaves()) == n
Example #17
0
def test_scale():
    scale_factor = 0.42
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))
    old_heights = [t.get_farthest_leaf()[1] for t in trees]
    scaled = Scale(scale_factor).consume(trees)
    new_heights = [t.get_farthest_leaf()[1] for t in scaled]
    for old, new in zip(old_heights, new_heights):
        assert new == old * scale_factor
Example #18
0
def test_rename_from_file():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    renamed = Rename(filename="tests/argfiles/rename.txt").consume(trees)
    for t in renamed:
        leaves = t.get_leaf_names()
        assert "A" not in leaves
        assert "X" in leaves
        assert all((x in leaves for x in ("B", "C", "D", "E", "F")))
Example #19
0
def test_stat():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    stat = Stat()
    for t in stat.consume(trees):
        pass
    assert stat.tree_count == 6
    assert stat.taxa_count == 6
    assert stat.topology_count <= stat.tree_count
Example #20
0
def test_annotate(treefile, argfilepath):
    trees = NewickParser().consume(treefile('basic.trees'))
    annotated = Annotate(filename=argfilepath("annotation.csv"),
                         key="taxon").consume(trees)
    for t in annotated:
        t.write(features=[])
        for l in t.get_leaves():
            assert hasattr(l, "f1")
            assert hasattr(l, "f2")
            assert hasattr(l, "f3")
Example #21
0
def test_attribute_collapse():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))
    annotated = Annotate("tests/argfiles/annotation.csv", "taxon").consume(trees)
    # f1 in the annotations applied above corresponds to the same left/right
    # split as the other tests above
    collapsed = Collapse(attribute="f1").consume(annotated)
    # These groups are monophyletic in the first 5 of the 6 basic trees, so...
    for n, t in enumerate(collapsed):
        assert len(t.get_leaves()) == (2 if n < 5 else 6)
Example #22
0
def test_plot_annotated(dummy=False):

    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    annotated_trees = build_pipeline(
        "annotate --f tests/argfiles/annotation.csv -k taxon", source=trees)
    with tempfile.NamedTemporaryFile() as fp:
        plot = Plot(output=fp.name, attribute="f1", dummy=dummy)
        for x in plot.consume(annotated_trees):
            pass
    lines.close()
Example #23
0
def test_monophyletic_dedupe():
    lines = fileinput.input("tests/treefiles/monophyletic_dupe_taxa.trees")
    trees = list(NewickParser().consume(lines))
    for t in trees:
        leaves = t.get_leaf_names()
        assert not all(
            (leaves.count(x) == 1 for x in ("A", "B", "C", "E", "F")))
    deduped = Dedupe().consume(trees)
    for t in deduped:
        leaves = t.get_leaf_names()
        assert all((leaves.count(x) == 1 for x in ("A", "B", "C", "E", "F")))
Example #24
0
def test_annotate():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    annotated = Annotate("tests/argfiles/annotation.csv",
                         "taxon").consume(trees)
    for t in annotated:
        t.write(features=[])
        for l in t.get_leaves():
            assert hasattr(l, "f1")
            assert hasattr(l, "f2")
            assert hasattr(l, "f3")
Example #25
0
def test_rename_with_remove():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    renamed = Rename({
        "A": "X",
        "B": "Y",
        "C": "Z"
    }, remove=True).consume(trees)
    for t in renamed:
        leaves = t.get_leaf_names()
        assert all((x in leaves for x in ("X", "Y", "Z")))
        assert not any((x in leaves for x in ("A", "B", "C", "D", "E", "F")))
Example #26
0
def test_dedupe():
    lines = fileinput.input("tests/treefiles/duplicate_taxa.trees")
    trees = list(NewickParser().consume(lines))
    for t in trees:
        orig_leaves = t.get_leaf_names()
        assert len(orig_leaves) == 6
        assert orig_leaves.count("A") == 2
        assert all((orig_leaves.count(x) == 1 for x in ("B", "C", "E", "F")))
    deduped = Dedupe().consume(trees)
    for t in deduped:
        leaves = t.get_leaf_names()
        assert len(leaves) == 5
        assert all((leaves.count(x) == 1 for x in ("A", "B", "C", "E", "F")))
Example #27
0
def test_min_med_max_uniq():
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))

    min_uniq = Uniq(lengths="min").consume(trees)
    min_lengths = Length().consume(min_uniq)

    med_uniq = Uniq(lengths="median").consume(trees)
    med_lengths = Length().consume(med_uniq)

    max_uniq = Uniq(lengths="max").consume(trees)
    max_lengths = Length().consume(max_uniq)

    for l, m, L in zip(min_lengths, med_lengths, max_lengths):
        assert l <= m <= L
Example #28
0
def test_pipeline():
    """Silly long pipeline to stress test build_pipeline."""

    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = NewickParser().consume(lines)
    output = build_pipeline(
        "cat -s 2 | rename -f tests/argfiles/rename.txt | prune X,B | dedupe | uniq | support --sort | stat",
        source=trees)
    for t in output:
        leaves = t.get_leaf_names()
        assert all((leaves.count(x) == 1 for x in leaves))
        assert "A" not in leaves
        assert "X" not in leaves
        assert "B" not in leaves
        assert all((x in leaves for x in ("C", "D", "E", "F")))
Example #29
0
def test_roundtrip():
    """Make sure scaling by x and then 1/x changes nothing."""
    lines = fileinput.input("tests/treefiles/basic.trees")
    trees = list(NewickParser().consume(lines))

    heights = Height().consume(trees)
    scaled_heights = build_pipeline("scale -s 2.0 | scale -s 0.5 | height",
                                    trees)
    for x, y in zip(heights, scaled_heights):
        assert x == y

    lengths = Length().consume(trees)
    scaled_lengths = build_pipeline("scale -s 2.0 | scale -s 0.5 | length",
                                    trees)
    for x, y in zip(lengths, scaled_lengths):
        assert x == y
Example #30
0
def test_extract_annotations(treefile, argfilepath):
    trees = list(NewickParser().consume(treefile('basic.trees')))
    with tempfile.NamedTemporaryFile(mode="r") as fp:
        list(
            build_pipeline(
                "annotate -f {0} -k taxon | annotate --extract -f {1}".format(
                    argfilepath('annotation.csv'), fp.name), trees))
        fp.seek(0)
        reader = csv.DictReader(fp)
        assert all(
            (field in reader.fieldnames for field in ("f1", "f2", "f3")))
        assert "tree_number" not in reader.fieldnames
        for row in reader:
            if row["name"] == "A":
                assert row["f1"] == "0"
                assert row["f2"] == "1"
                assert row["f3"] == "1"