def setUp(self): self.tree = self.mock_tree_from_nwk() self.bp_tree = from_skbio_treenode(self.tree) self.table = biom.Table( np.array([[1, 2, 0, 4], [8, 7, 0, 5], [1, 0, 0, 0], [0, 0, 0, 0]]).T, list('abed'), ['Sample1', 'Sample2', 'Sample3', 'Sample4']) self.sample_metadata = pd.DataFrame( { "Metadata1": [0, 0, 0, 1], "Metadata2": [0, 0, 0, 0], "Metadata3": [1, 2, 3, 4], "Metadata4": ["abc", "def", "ghi", "jkl"] }, index=list(self.table.ids())) # (These are some Greengenes taxonomy annotations I took from the # moving pictures taxonomy.qza file. I made up the confidences.) self.feature_metadata = pd.DataFrame( { "Taxonomy": [("k__Bacteria; p__Bacteroidetes; c__Bacteroidia; " "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; " "s__"), ("k__Bacteria; p__Proteobacteria; " "c__Gammaproteobacteria; o__Pasteurellales; " "f__Pasteurellaceae; g__; s__"), ("k__Bacteria; p__Bacteroidetes; c__Bacteroidia; " "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; " "s__uniformis")], "Confidence": [0.95, 0.8, 0] }, index=["e", "h", "a"]) self.split_tax_fm, self.taxcols = split_taxonomy(self.feature_metadata) self.tip_md = self.split_tax_fm.loc[["a", "e"]] self.int_md = self.split_tax_fm.loc[["h"]] # This is designed to match the shearing that's done in the core test # for --p-shear-to-table self.shorn_tree = parse_newick( "(((a:1)EmpressNode0:1,b:2)g:1,(d:3)h:2)EmpressNode1:1;") self.exp_split_fm_cols = [ "Level 1", "Level 2", "Level 3", "Level 4", "Level 5", "Level 6", "Level 7", "Confidence" ] eigvals = pd.Series([0.50, 0.25, 0.25], index=['PC1', 'PC2', 'PC3']) samples = [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]] proportion_explained = pd.Series([15.5, 12.2, 8.8], index=['PC1', 'PC2', 'PC3']) samples_df = pd.DataFrame( samples, index=['Sample1', 'Sample2', 'Sample3', 'Sample4'], columns=['PC1', 'PC2', 'PC3']) self.ordination = OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', eigvals, samples_df, proportion_explained=proportion_explained)
def test_fm_filtering_post_shearing_with_moving_pictures_dataset(self): # This particular tip can be problematic (it was the reason we found # out about https://github.com/biocore/empress/issues/248), so we # observe how it is handled in generating a visualization of the # moving pictures dataset to verify that #248 does not recur. funky_tip = "8406abe6d9a72018bf32d189d1340472" tree, tbl, smd, fmd, pcoa = load_mp_data() # Convert artifacts / metadata objects to "normal" types that we can # pass to Empress bp_tree = from_skbio_treenode(tree.view(TreeNode)) tbl_df = tbl.view(biom.Table) pcoa_skbio = pcoa.view(skbio.OrdinationResults) smd_df = smd.to_dataframe() fmd_df = fmd.to_dataframe() # Sanity check -- verify that the funky tip we're looking for is # actually present in the data. (We haven't actually done anything # specific to Empress yet. This just verifies the environment is ok.) # https://stackoverflow.com/a/23549599/10730311 self.assertTrue(funky_tip in fmd_df.index) # Generate an Empress visualization using this data viz = Empress(bp_tree, tbl_df, smd_df, feature_metadata=fmd_df, ordination=pcoa_skbio, filter_extra_samples=True, shear_to_table=True) # Check that tip 8406abe6d9a72018bf32d189d1340472 *isn't* in the tip # metadata. All of the samples this tip is present in are filtered out # when --p-filter-extra-samples is used with this particular PCoA, so # we verify that this tip is removed from the tip metadata. self.assertFalse(funky_tip in viz.tip_md.index)
def test_from_skbio_treenode(self): obs_bp = from_skbio_treenode(self.sktn) exp_bp = self.bp npt.assert_equal(obs_bp.B, exp_bp.B) for i in range(len(self.bp.B)): self.assertEqual(exp_bp.name(i), obs_bp.name(i)) self.assertEqual(exp_bp.length(i), obs_bp.length(i))
def test_from_skbio_treenode(self): obs_bp = from_skbio_treenode(self.sktn) exp_bp = self.bp npt.assert_equal(obs_bp.B, exp_bp.B) for i in range(len(self.bp.B)): self.assertEqual(exp_bp.name(i), obs_bp.name(i)) self.assertEqual(exp_bp.length(i), obs_bp.length(i))
def setUp(self): self.tree = self.mock_tree_from_nwk() self.bp_tree = from_skbio_treenode(self.tree) # Test table/metadata (mostly) adapted from Qurro: # https://github.com/biocore/qurro/blob/b9613534b2125c2e7ee22e79fdff311812f4fefe/qurro/tests/test_df_utils.py#L178 self.table = pd.DataFrame( { "Sample1": [1, 2, 0, 4], "Sample2": [8, 7, 0, 5], "Sample3": [1, 0, 0, 0], "Sample4": [0, 0, 0, 0] }, index=["a", "b", "e", "d"] ) self.sample_metadata = pd.DataFrame( { "Metadata1": [0, 0, 0, 1], "Metadata2": [0, 0, 0, 0], "Metadata3": [1, 2, 3, 4], "Metadata4": ["abc", "def", "ghi", "jkl"] }, index=list(self.table.columns)[:] ) # (These are some Greengenes taxonomy annotations I took from the # moving pictures taxonomy.qza file. I made up the confidences.) self.feature_metadata = pd.DataFrame( { "Taxonomy": [ ( "k__Bacteria; p__Bacteroidetes; c__Bacteroidia; " "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; " "s__" ), ( "k__Bacteria; p__Proteobacteria; " "c__Gammaproteobacteria; o__Pasteurellales; " "f__Pasteurellaceae; g__; s__" ), ( "k__Bacteria; p__Bacteroidetes; c__Bacteroidia; " "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; " "s__uniformis" ) ], "Confidence": [0.95, 0.8, 0] }, index=["e", "h", "a"] ) self.split_tax_fm = split_taxonomy(self.feature_metadata) self.exp_split_fm_cols = [ "Level 1", "Level 2", "Level 3", "Level 4", "Level 5", "Level 6", "Level 7", "Confidence" ]
def from_tree(cls, tree, use_lengths=True): """ Creates an Tree object from a skbio tree. Parameters ---------- tree : skbio.TreeNode Input skbio tree use_lengths: Boolean Specify if the branch length should be incorporated into the geometry calculations for visualization. Returns ------- Tree: bp.BP """ bp_tree = from_skbio_treenode(tree) if sum(bp_tree.B) <= 1: raise ValueError("Tree must contain at least 2 nodes.") # While traversing the tree, record tip / internal node names # (Nodes without names are ignored, since we'll assign those later # using tools.fill_missing_node_names()) tip_names = [] internal_node_names = [] max_branch_length = 0 for i in range(sum(bp_tree.B)): node_idx = bp_tree.postorderselect(i) name = bp_tree.name(node_idx) length = bp_tree.length(node_idx) if name is not None: # NOTE: This should eventually be taken out when # fill_missing_node_names() is refactored. However, for now, # this makes sure that users can't accidentally break things by # naming nodes identical to our default names for missing nodes if name.startswith("EmpressNode"): raise ValueError( 'Node names can\'t start with "EmpressNode".') if isleaf(bp_tree, node_idx): tip_names.append(name) else: internal_node_names.append(name) if length is None: raise ValueError( "Non-root branches of the tree must have lengths.") if length < 0: raise ValueError( "Non-root branches of the tree must have nonnegative " "lengths.") max_branch_length = max(length, max_branch_length) # We didn't consider the root node in the above traversal since we # don't care about its length. However, we do care about its name, # so we add the root's name to internal_node_names. if max_branch_length == 0: raise ValueError( "At least one non-root branch of the tree must have a " "positive length.") unique_tip_name_set = set(tip_names) if len(unique_tip_name_set) != len(tip_names): raise ValueError("Tip names in the tree must be unique.") unique_internal_node_name_set = set(internal_node_names) if len(unique_tip_name_set & unique_internal_node_name_set) > 0: raise ValueError( "Tip names in the tree cannot overlap with internal node " "names.") if len(unique_internal_node_name_set) != len(internal_node_names): warnings.warn("Internal node names in the tree are not unique.", TreeFormatWarning) bp_tree = Tree(bp_tree) bp_tree.update_geometry(use_lengths) return bp_tree