예제 #1
0
 def test_match_inputs_ignore_missing_samples_error(self):
     t = Tree.from_tree(self.tree)
     bad_table = self.table.copy()
     # Replace one of the sample IDs in the table with some junk
     bad_table.columns = ["Sample1", "Sample2", "Whatever", "Sample4"]
     with self.assertRaisesRegex(
         tools.DataMatchingError,
         "The feature table contains samples that aren't present in the "
         "sample metadata."
     ):
         tools.match_inputs(t, bad_table, self.sample_metadata)
예제 #2
0
 def test_match_inputs_only_1_feature_in_table(self):
     # This is technically allowed (so long as this 1 feature is a tree tip)
     t = Tree.from_tree(self.tree)
     tiny_table = self.table.loc[["a"]]
     filtered_tiny_table, filtered_sample_md, tm, im = tools.match_inputs(
         t, tiny_table, self.sample_metadata
     )
     assert_frame_equal(filtered_tiny_table, tiny_table)
     assert_frame_equal(filtered_sample_md, self.sample_metadata)
     self.assertIsNone(tm)
     self.assertIsNone(im)
예제 #3
0
    def test_nonroot_missing_branchlengths(self):
        # Note about the fourth test tree here: the reason this triggers a
        # missing-branch-length error before a negative-branch-length error is
        # because the tree is checked in postorder. This sort of "precedence"
        # can be changed in the future if desired.
        bad_newicks = [
            '((b)a:1)root:1;', '((b:1)a)root:0;', '(b,c)a;',
            '((b)a:-1)root:3;', '((b:0,c)a:0)root:0;'
        ]
        for nwk in bad_newicks:
            st = TreeNode.read([nwk])
            with self.assertRaisesRegex(ValueError, "must have lengths"):
                Tree.from_tree(st)

        # Check that roots *with* missing branch lengths don't trigger an error
        # on tree creation
        ok_newicks = ['((b:0,c:1)a:0)root;']
        for nwk in ok_newicks:
            st = TreeNode.read([nwk])
            Tree.from_tree(st)
예제 #4
0
 def test_match_inputs_filter_missing_features_error(self):
     t = Tree.from_tree(self.tree)
     bad_table = self.table.copy()
     # Replace one of the tip IDs in the table with an internal node ID,
     # instead. This isn't ok.
     bad_table.index = ["a", "b", "e", "g"]
     with self.assertRaisesRegex(
         tools.DataMatchingError,
         "The feature table contains features that aren't present as tips "
         "in the tree."
     ):
         tools.match_inputs(t, bad_table, self.sample_metadata)
예제 #5
0
 def test_missing_root_length_tree_rect_layout(self):
     """Like the above test, but checks that things still work ok when the
        root node has no assigned branch length.
     """
     st = TreeNode.read(['((b:2)a:1)root;'])
     t = Tree.from_tree(st)
     t.coords(100, 100)
     expected_coords = [(100, 0.0), (100 / 3.0, 0.0), (0.0, 0.0)]
     self.check_coords(t, "xr", "yr", expected_coords)
     for node in t.non_tips():
         self.assertEqual(node.lowest_child_yr, 0)
         self.assertEqual(node.highest_child_yr, 0)
     self.check_basic_tree_rect_layout(t)
예제 #6
0
    def test_unrooted_layout(self):
        t = Tree.from_tree(self.tree)
        t.coords(500, 500)

        expected_coords = [(-10.222747306219219, 195.06163867407446),
                           (118.00044943013512, 262.22444928198297),
                           (36.73032180166217, 137.07942714215795),
                           (184.76890317443747, 23.95196521134946),
                           (40.6350638142365, 62.57251106991248),
                           (-77.36538561589865, -199.6519382120705),
                           (-290.23109682556253, -205.35762294073118),
                           (-81.27012762847295, -125.14502213982503),
                           (0.0, 0.0)]
        self.check_coords(t, "x2", "y2", expected_coords)
예제 #7
0
 def test_straightline_tree_rect_layout(self):
     """Checks that all nodes are drawn as expected even when there aren't
        any "branches" in the tree.
     """
     # Setting root length to 100 to demonstrate/verify that root length is
     # not taken into account (if this behavior changes we'll need to modify
     # this test, rightfully so)
     st = TreeNode.read(['((b:2)a:1)root:100;'])
     t = Tree.from_tree(st)
     t.coords(100, 100)
     expected_coords = [(100, 0.0), (100 / 3.0, 0.0), (0.0, 0.0)]
     self.check_coords(t, "xr", "yr", expected_coords)
     for node in t.non_tips():
         self.assertEqual(node.lowest_child_yr, 0)
         self.assertEqual(node.highest_child_yr, 0)
     self.check_basic_tree_rect_layout(t)
예제 #8
0
    def test_match_inputs_ignore_missing_samples_override(self):
        """Checks that --p-ignore-missing-samples works as expected."""
        # These inputs are the same as with the above test
        t = Tree.from_tree(self.tree)
        bad_table = self.table.copy()
        # Replace one of the sample IDs in the table with some junk
        bad_table.columns = ["Sample1", "Sample2", "Whatever", "Sample4"]
        out_table = None
        out_sm = None
        with self.assertWarnsRegex(
            tools.DataMatchingWarning,
            (
                r"1 sample\(s\) in the table were not present in the sample "
                r"metadata. These sample\(s\) have been assigned placeholder "
                "metadata."
            )
        ):
            out_table, out_sm, tm, im = tools.match_inputs(
                t, bad_table, self.sample_metadata, ignore_missing_samples=True
            )

        self.assertCountEqual(
            out_table.columns,
            ["Sample1", "Sample2", "Whatever", "Sample4"]
        )
        self.assertCountEqual(
            out_sm.index,
            ["Sample1", "Sample2", "Whatever", "Sample4"]
        )
        # Make sure the table stays consistent
        assert_frame_equal(out_table, bad_table)
        # ...And that the placeholder metadata was added in for the "Whatever"
        # sample correctly
        self.assertTrue(
            (out_sm.loc["Whatever"] == "This sample has no metadata").all()
        )
        # ... And that, with the exception of the newly added placeholder
        # metadata, the sample metadata is also consistent. (The dtypes of
        # individual columns can change if placeholder metadata was added,
        # since the "This sample has no metadata" thing is just a string.)
        # (...And *that* shouldn't impact Empress since Empress stores all
        # sample metadata as strings. At least as of writing this.)
        assert_frame_equal(
            out_sm.loc[["Sample1", "Sample2", "Sample4"]],
            self.sample_metadata.loc[["Sample1", "Sample2", "Sample4"]],
            check_dtype=False
        )
예제 #9
0
 def test_match_inputs_feature_metadata_no_features_in_tree(self):
     """Tests that feature names not corresponding to internal nodes / tips
        in the tree are filtered out of the feature metadata, and that if
        all features in the input feature metadata are filtered that an
        error is raised.
     """
     t = Tree.from_tree(self.tree)
     bad_fm = self.feature_metadata.copy()
     bad_fm.index = range(len(self.feature_metadata.index))
     with self.assertRaisesRegex(
         tools.DataMatchingError,
         (
             "No features in the feature metadata are present in the tree, "
             "either as tips or as internal nodes."
         )
     ):
         tools.match_inputs(t, self.table, self.sample_metadata, bad_fm)
예제 #10
0
    def test_circular_layout(self):
        """Test to make sure the circular layout computes what we expect it to.
           For each node, circular layou computer the following things:
                (xc0, yc0) - the starting location for each node
                (xc1, yc1) - the ending location for each node

            Then, all non-root internal nodes, have an arc that connects the
            "starting points" of the children with the minimum and maximum
            angle:
                (arcx0, arcy0) - the starting location for the arc
                highest_child_clangle - the starting angle for the arc
                lowest_child_clangle - the ending angle for the arc
        """
        st = TreeNode.read(["((d:4,c:3)b:2,a:1)root:1;"])
        t = Tree.from_tree(st)
        t.coords(100, 100)

        # check starting location for each node
        # Note: nodes 'a' and 'b' should have the same starting coordinates
        #       since they both start at the root.
        expected_start = [(38.490018, 0.0),
                          (-19.245009, 33.333333),
                          (0.0, 0.0),
                          (0.0, 0.0),
                          (0.0, 0.0)]
        self.check_coords(t, "xc0", "yc0", expected_start)

        # check ending location for each node
        expected_end = [(115.470054, 0.0),
                        (-48.112522, 83.333333),
                        (19.245009, 33.333333),
                        (-9.622504, -16.666667),
                        (0.0, 0.0)]
        self.check_coords(t, "xc1", "yc1", expected_end)

        # check starting location for b's arc
        expected_arc = [-19.245009, 33.333333]
        b = t.find("b")
        self.assertAlmostEqual(b.arcx0, expected_arc[0], places=5)
        self.assertAlmostEqual(b.arcy0, expected_arc[1], places=5)

        # check b's arc angles
        expected_angles = [2.0943951, 0.0]
        self.assertAlmostEqual(b.highest_child_clangle, expected_angles[0])
        self.assertAlmostEqual(b.lowest_child_clangle, expected_angles[1])
예제 #11
0
 def test_match_inputs_no_tips_in_table(self):
     t = Tree.from_tree(self.tree)
     bad_table = self.table.copy()
     bad_table.index = range(len(self.table.index))
     with self.assertRaisesRegex(
         tools.DataMatchingError,
         "No features in the feature table are present as tips in the tree."
     ):
         tools.match_inputs(t, bad_table, self.sample_metadata)
     # Check that --p-filter-missing-features still doesn't work to override
     # this, since there are NO matching features at all
     with self.assertRaisesRegex(
         tools.DataMatchingError,
         "No features in the feature table are present as tips in the tree."
     ):
         tools.match_inputs(
             t, bad_table, self.sample_metadata,
             filter_missing_features=True
         )
예제 #12
0
 def test_match_inputs_no_shared_samples(self):
     t = Tree.from_tree(self.tree)
     bad_sample_metadata = self.sample_metadata.copy()
     bad_sample_metadata.index = ["lol", "nothing", "here", "matches"]
     with self.assertRaisesRegex(
         tools.DataMatchingError,
         "No samples in the feature table are present in the sample "
         "metadata."
     ):
         tools.match_inputs(t, self.table, bad_sample_metadata)
     # Check that --p-ignore-missing-samples still doesn't work to override
     # this, since there are NO matching samples at all
     with self.assertRaisesRegex(
         tools.DataMatchingError,
         "No samples in the feature table are present in the sample "
         "metadata."
     ):
         tools.match_inputs(
             t, self.table, bad_sample_metadata, ignore_missing_samples=True
         )
예제 #13
0
    def test_match_inputs_feature_metadata_root_metadata_allowed(self):
        """Tests that feature metadata for the root node is preserved."""
        # Slightly modified version of self.tree where root has a name (i)
        t = Tree.from_tree(
            TreeNode.read(['(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2)i:1;'])
        )
        fm = self.feature_metadata.copy()
        fm.index = ["a", "g", "i"]
        f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs(
            t, self.table, self.sample_metadata, fm
        )
        # (check that we didn't mess up the table / sample metadata matching by
        # accident)
        assert_frame_equal(f_table, self.table)
        assert_frame_equal(f_sample_metadata, self.sample_metadata)

        split_fm = split_taxonomy(fm)
        # Main point of this test: all of the feature metadata should have been
        # kept, since a, g, and i are all included in the tree (i in particular
        # is important to verify, since it's the root)
        assert_frame_equal(t_fm, split_fm.loc[["a"]])
        assert_frame_equal(i_fm, split_fm.loc[["g", "i"]], check_like=True)
예제 #14
0
    def test_match_inputs_feature_metadata_only_internal_node_metadata(self):
        """Tests that feature metadata only for internal nodes is allowed."""
        # Slightly modified version of self.tree where root has a name (i)
        t = Tree.from_tree(
            TreeNode.read(['(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2)i:1;'])
        )
        fm = self.feature_metadata.copy()
        fm.index = ["h", "g", "i"]
        f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs(
            t, self.table, self.sample_metadata, fm
        )
        assert_frame_equal(f_table, self.table)
        assert_frame_equal(f_sample_metadata, self.sample_metadata)

        split_fm = split_taxonomy(fm)
        # 1) Check that tip metadata is empty
        self.assertEqual(len(t_fm.index), 0)
        # 2) Check that internal node metadata was preserved
        assert_frame_equal(i_fm, split_fm.loc[fm.index], check_like=True)
        # 3) Check that columns on both DFs are identical
        self.assertListEqual(list(t_fm.columns), self.exp_split_fm_cols)
        self.assertListEqual(list(i_fm.columns), self.exp_split_fm_cols)
예제 #15
0
    def test_match_inputs_feature_metadata_nothing_dropped(self):
        """Tests that tip/internal node names allowed as entries in feat. md.

           (self.feature_metadata describes three features, "e", "h", and "a".
            h is an internal node in self.tree, and e and a are tips.)
        """
        t = Tree.from_tree(self.tree)
        f_table, f_sample_metadata, tip_md, int_md = tools.match_inputs(
            t, self.table, self.sample_metadata, self.feature_metadata
        )
        assert_frame_equal(f_table, self.table)
        assert_frame_equal(f_sample_metadata, self.sample_metadata)
        # Check that no filtering had to be done -- only differences in output
        # and input feature metadata should be that 1) the output is split into
        # two DataFrames, one for tip and one for internal node metadata, and
        # 2) the taxonomy column was split up.
        assert_frame_equal(
            tip_md, self.split_tax_fm.loc[["e", "a"]], check_like=True
        )
        assert_frame_equal(int_md, self.split_tax_fm.loc[["h"]])
        # Check that the tip + internal node metadata have identical columns
        self.assertListEqual(list(tip_md.columns), self.exp_split_fm_cols)
        self.assertListEqual(list(int_md.columns), self.exp_split_fm_cols)
예제 #16
0
    def test_circular_layout_scaling_factor(self):
        """Checks to make sure the scaling factor applied at the end of
           the circular layout calculation preservers branch lengths. Basically
           a nodes length in the circular layout space should be proportional
           to its branch length.
        """
        st = TreeNode.read(["((d:4,c:3)b:2,a:1)root:1;"])
        t = Tree.from_tree(st)
        t.coords(100, 100)

        # All nodes' length (beside the root which is represented by a point)
        # in the circular layout space should have roughly the
        # same proportional length compared to their branch length.
        #
        # For example, in the above tree, if d's length in the circular layout
        # space is 1.5x larger than its branch length than all nodes should be
        # roughly 1.5x larger than their branch lengths.
        test_prop = None
        for n in t.preorder(include_self=False):
            n_prop = sqrt((n.xc1 - n.xc0)**2 + (n.yc1 - n.yc0)**2) / n.length
            if test_prop is None:
                test_prop = n_prop
            else:
                self.assertAlmostEqual(test_prop, n_prop, places=5)
예제 #17
0
 def test_from_tree_node_starts_with_EmpressNode(self):
     t = TreeNode.read(['((a:1,b:3)c:2,EmpressNode1:5)e:2;'])
     with self.assertRaisesRegex(
             ValueError, 'Node names can\'t start with "EmpressNode"'):
         Tree.from_tree(t)
예제 #18
0
 def test_from_tree_duplicate_tip_names(self):
     t = TreeNode.read(['((i:1,a:3)b:2,i:5)r:2;'])
     with self.assertRaisesRegex(ValueError,
                                 "Tip names in the tree must be unique"):
         Tree.from_tree(t)
예제 #19
0
 def test_from_tree_singlenode(self):
     st = TreeNode.read(['i:1;'])
     with self.assertRaisesRegex(ValueError,
                                 "must contain at least 2 nodes"):
         Tree.from_tree(st)
예제 #20
0
    def test_rectangular_layout(self):
        t = Tree.from_tree(self.tree)
        t.coords(500, 500)

        # Why do these coordinates look like this for such a simple tree?
        # There are a few steps.
        #
        # 1. Compute initial y-coordinates of layout: tips are assigned to
        #    y=0, y=1, y=2, ... up to y=|tips|, and internal nodes are
        #    positioned at the average of their childrens' y-positions.
        #
        # 2. Compute initial x-coordinates of layout: root starts at x=0, and
        #    each child C with parent P is assigned x = P.x + C.branch_length.
        #    (...those aren't real attribute names, this is just pseudocode)
        #
        # 3. Positions are scaled relative to the maximum width and height.
        #    With this example tree, there are 5 tips so the maximum height is
        #    4 (since heights are 0-indexed), and the "farthest right" node is
        #    d (at x=5). So we scale y-positions by 500 / 4 = 125, and we
        #    scale x-positions by 500 / 5 = 100. (The "500"s are used here just
        #    because these are the dimensions passed to coords().)
        #
        # 4. At this point we're done with Tree.layout_rectangular(), but
        #    coords() still needs to alter coordinates to be relative to the
        #    root node's coordinates. So every node's x-coordinate is
        #    subtracted by the root's x=0 (this does nothing), and every node's
        #    y-coordinate is subtracted by the root's y=(2.375*125)=296.875.
        #
        # So TLDR this is why a's coordinates go from (3, 0) on the first pass
        # to ((3 * 100) - 0, (0 * 125) - 296.875) = (300, -296.875) in the end.
        expected_coords = [
            (300, -296.875),  # a
            (400, -171.875),  # e
            (200, -234.375),  # f
            (300, -46.875),  # b
            (100, -140.625),  # g
            (300, 78.125),  # c
            (500, 203.125),  # d
            (200, 140.625),  # h
            (0.0, 0.0)
        ]  # i (root)
        self.check_coords(t, "xr", "yr", expected_coords)

        # Check that lowest_child_yr and highest_child_yr attributes were set
        # properly. We do this by iterating over tree.non_tips(), which (like
        # check_coords()) also uses a post-order traversal.
        # (Note that the "coordinates" in this list of 2-tuples are ordered as
        # (lowest child y-coordinate, highest child y-coordinate). Computing
        # these from the list above should be pretty simple.)
        expected_lowesthighest_child_yr = [
            (-296.875, -171.875),  # f
            (-234.375, -46.875),  # g
            (78.125, 203.125),  # h
            (-140.625, 140.625)
        ]  # i
        for i, node in enumerate(t.non_tips()):
            l, h = expected_lowesthighest_child_yr[i]
            self.assertTrue(hasattr(node, "lowest_child_yr"))
            self.assertTrue(hasattr(node, "highest_child_yr"))
            self.assertAlmostEqual(node.lowest_child_yr, l, places=5)
            self.assertAlmostEqual(node.highest_child_yr, h, places=5)

        # ... And also check that tip nodes *don't* have these attributes,
        # since tips don't have children.
        for node in t.tips():
            self.assertFalse(hasattr(node, "lowest_child_yr"))
            self.assertFalse(hasattr(node, "highest_child_yr"))