Ejemplo n.º 1
0
    def setUp(self):
        self.tree = self.mock_tree_from_nwk()
        self.bp_tree = from_skbio_treenode(self.tree)
        self.table = biom.Table(
            np.array([[1, 2, 0, 4], [8, 7, 0, 5], [1, 0, 0, 0], [0, 0, 0,
                                                                 0]]).T,
            list('abed'), ['Sample1', 'Sample2', 'Sample3', 'Sample4'])
        self.sample_metadata = pd.DataFrame(
            {
                "Metadata1": [0, 0, 0, 1],
                "Metadata2": [0, 0, 0, 0],
                "Metadata3": [1, 2, 3, 4],
                "Metadata4": ["abc", "def", "ghi", "jkl"]
            },
            index=list(self.table.ids()))

        # (These are some Greengenes taxonomy annotations I took from the
        # moving pictures taxonomy.qza file. I made up the confidences.)
        self.feature_metadata = pd.DataFrame(
            {
                "Taxonomy":
                [("k__Bacteria; p__Bacteroidetes; c__Bacteroidia; "
                  "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; "
                  "s__"),
                 ("k__Bacteria; p__Proteobacteria; "
                  "c__Gammaproteobacteria; o__Pasteurellales; "
                  "f__Pasteurellaceae; g__; s__"),
                 ("k__Bacteria; p__Bacteroidetes; c__Bacteroidia; "
                  "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; "
                  "s__uniformis")],
                "Confidence": [0.95, 0.8, 0]
            },
            index=["e", "h", "a"])
        self.split_tax_fm, self.taxcols = split_taxonomy(self.feature_metadata)
        self.tip_md = self.split_tax_fm.loc[["a", "e"]]
        self.int_md = self.split_tax_fm.loc[["h"]]
        # This is designed to match the shearing that's done in the core test
        # for --p-shear-to-table
        self.shorn_tree = parse_newick(
            "(((a:1)EmpressNode0:1,b:2)g:1,(d:3)h:2)EmpressNode1:1;")
        self.exp_split_fm_cols = [
            "Level 1", "Level 2", "Level 3", "Level 4", "Level 5", "Level 6",
            "Level 7", "Confidence"
        ]

        eigvals = pd.Series([0.50, 0.25, 0.25], index=['PC1', 'PC2', 'PC3'])
        samples = [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5],
                   [0.4, 0.5, 0.6]]
        proportion_explained = pd.Series([15.5, 12.2, 8.8],
                                         index=['PC1', 'PC2', 'PC3'])
        samples_df = pd.DataFrame(
            samples,
            index=['Sample1', 'Sample2', 'Sample3', 'Sample4'],
            columns=['PC1', 'PC2', 'PC3'])
        self.ordination = OrdinationResults(
            'PCoA',
            'Principal Coordinate Analysis',
            eigvals,
            samples_df,
            proportion_explained=proportion_explained)
Ejemplo n.º 2
0
 def test_fm_filtering_post_shearing_with_moving_pictures_dataset(self):
     # This particular tip can be problematic (it was the reason we found
     # out about https://github.com/biocore/empress/issues/248), so we
     # observe how it is handled in generating a visualization of the
     # moving pictures dataset to verify that #248 does not recur.
     funky_tip = "8406abe6d9a72018bf32d189d1340472"
     tree, tbl, smd, fmd, pcoa = load_mp_data()
     # Convert artifacts / metadata objects to "normal" types that we can
     # pass to Empress
     bp_tree = from_skbio_treenode(tree.view(TreeNode))
     tbl_df = tbl.view(biom.Table)
     pcoa_skbio = pcoa.view(skbio.OrdinationResults)
     smd_df = smd.to_dataframe()
     fmd_df = fmd.to_dataframe()
     # Sanity check -- verify that the funky tip we're looking for is
     # actually present in the data. (We haven't actually done anything
     # specific to Empress yet. This just verifies the environment is ok.)
     # https://stackoverflow.com/a/23549599/10730311
     self.assertTrue(funky_tip in fmd_df.index)
     # Generate an Empress visualization using this data
     viz = Empress(bp_tree,
                   tbl_df,
                   smd_df,
                   feature_metadata=fmd_df,
                   ordination=pcoa_skbio,
                   filter_extra_samples=True,
                   shear_to_table=True)
     # Check that tip 8406abe6d9a72018bf32d189d1340472 *isn't* in the tip
     # metadata. All of the samples this tip is present in are filtered out
     # when --p-filter-extra-samples is used with this particular PCoA, so
     # we verify that this tip is removed from the tip metadata.
     self.assertFalse(funky_tip in viz.tip_md.index)
Ejemplo n.º 3
0
    def test_from_skbio_treenode(self):
        obs_bp = from_skbio_treenode(self.sktn)
        exp_bp = self.bp

        npt.assert_equal(obs_bp.B, exp_bp.B)
        for i in range(len(self.bp.B)):
            self.assertEqual(exp_bp.name(i), obs_bp.name(i))
            self.assertEqual(exp_bp.length(i), obs_bp.length(i))
Ejemplo n.º 4
0
    def test_from_skbio_treenode(self):
        obs_bp = from_skbio_treenode(self.sktn)
        exp_bp = self.bp

        npt.assert_equal(obs_bp.B, exp_bp.B)
        for i in range(len(self.bp.B)):
            self.assertEqual(exp_bp.name(i), obs_bp.name(i))
            self.assertEqual(exp_bp.length(i), obs_bp.length(i))
Ejemplo n.º 5
0
 def setUp(self):
     self.tree = self.mock_tree_from_nwk()
     self.bp_tree = from_skbio_treenode(self.tree)
     # Test table/metadata (mostly) adapted from Qurro:
     # https://github.com/biocore/qurro/blob/b9613534b2125c2e7ee22e79fdff311812f4fefe/qurro/tests/test_df_utils.py#L178
     self.table = pd.DataFrame(
         {
             "Sample1": [1, 2, 0, 4],
             "Sample2": [8, 7, 0, 5],
             "Sample3": [1, 0, 0, 0],
             "Sample4": [0, 0, 0, 0]
         },
         index=["a", "b", "e", "d"]
     )
     self.sample_metadata = pd.DataFrame(
         {
             "Metadata1": [0, 0, 0, 1],
             "Metadata2": [0, 0, 0, 0],
             "Metadata3": [1, 2, 3, 4],
             "Metadata4": ["abc", "def", "ghi", "jkl"]
         },
         index=list(self.table.columns)[:]
     )
     # (These are some Greengenes taxonomy annotations I took from the
     # moving pictures taxonomy.qza file. I made up the confidences.)
     self.feature_metadata = pd.DataFrame(
         {
             "Taxonomy": [
                 (
                     "k__Bacteria; p__Bacteroidetes; c__Bacteroidia; "
                     "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; "
                     "s__"
                 ),
                 (
                     "k__Bacteria; p__Proteobacteria; "
                     "c__Gammaproteobacteria; o__Pasteurellales; "
                     "f__Pasteurellaceae; g__; s__"
                 ),
                 (
                     "k__Bacteria; p__Bacteroidetes; c__Bacteroidia; "
                     "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; "
                     "s__uniformis"
                 )
             ],
             "Confidence": [0.95, 0.8, 0]
         },
         index=["e", "h", "a"]
     )
     self.split_tax_fm = split_taxonomy(self.feature_metadata)
     self.exp_split_fm_cols = [
         "Level 1", "Level 2", "Level 3", "Level 4", "Level 5", "Level 6",
         "Level 7", "Confidence"
     ]
Ejemplo n.º 6
0
    def from_tree(cls, tree, use_lengths=True):
        """ Creates an Tree object from a skbio tree.

        Parameters
        ----------
        tree : skbio.TreeNode
            Input skbio tree
        use_lengths: Boolean
            Specify if the branch length should be incorporated into
            the geometry calculations for visualization.
        Returns
        -------
        Tree: bp.BP

        """
        bp_tree = from_skbio_treenode(tree)
        if sum(bp_tree.B) <= 1:
            raise ValueError("Tree must contain at least 2 nodes.")

        # While traversing the tree, record tip / internal node names
        # (Nodes without names are ignored, since we'll assign those later
        # using tools.fill_missing_node_names())
        tip_names = []
        internal_node_names = []
        max_branch_length = 0
        for i in range(sum(bp_tree.B)):
            node_idx = bp_tree.postorderselect(i)
            name = bp_tree.name(node_idx)
            length = bp_tree.length(node_idx)
            if name is not None:
                # NOTE: This should eventually be taken out when
                # fill_missing_node_names() is refactored. However, for now,
                # this makes sure that users can't accidentally break things by
                # naming nodes identical to our default names for missing nodes
                if name.startswith("EmpressNode"):
                    raise ValueError(
                        'Node names can\'t start with "EmpressNode".')
                if isleaf(bp_tree, node_idx):
                    tip_names.append(name)
                else:
                    internal_node_names.append(name)
            if length is None:
                raise ValueError(
                    "Non-root branches of the tree must have lengths.")
            if length < 0:
                raise ValueError(
                    "Non-root branches of the tree must have nonnegative "
                    "lengths.")

            max_branch_length = max(length, max_branch_length)

        # We didn't consider the root node in the above traversal since we
        # don't care about its length. However, we do care about its name,
        # so we add the root's name to internal_node_names.

        if max_branch_length == 0:
            raise ValueError(
                "At least one non-root branch of the tree must have a "
                "positive length.")
        unique_tip_name_set = set(tip_names)
        if len(unique_tip_name_set) != len(tip_names):
            raise ValueError("Tip names in the tree must be unique.")

        unique_internal_node_name_set = set(internal_node_names)
        if len(unique_tip_name_set & unique_internal_node_name_set) > 0:
            raise ValueError(
                "Tip names in the tree cannot overlap with internal node "
                "names.")

        if len(unique_internal_node_name_set) != len(internal_node_names):
            warnings.warn("Internal node names in the tree are not unique.",
                          TreeFormatWarning)
        bp_tree = Tree(bp_tree)
        bp_tree.update_geometry(use_lengths)
        return bp_tree