def test_no_intersection_between_tree_and_table(self): with self.assertRaisesRegex( tools.DataMatchingError, "No features in the feature table are present as tips in the tree." ): Empress(self.tree, self.unrelated_table, self.sample_metadata, shear_to_table=False) # Check that --p-shear-to-table doesn't override this: the data # mismatch should be identified before attempting shearing with self.assertRaisesRegex( tools.DataMatchingError, "No features in the feature table are present as tips in the tree." ): Empress(self.tree, self.unrelated_table, self.sample_metadata, shear_to_table=True)
def test_biplot_no_matching(self): self.feature_metadata.index = ['z', 'y'] viz = Empress(self.biplot_tree, self.biplot_table, self.sample_metadata, feature_metadata=self.feature_metadata, ordination=self.biplot_no_matches, shear_to_table=True) obs = str(viz) self.assertTrue('All elements' in obs)
def tree_plot(output_dir: str, tree: NewickFormat, feature_metadata: qiime2.Metadata = None) -> None: """Visualizes a tree (optionally with feature metadata).""" if feature_metadata is not None: feature_metadata = feature_metadata.to_dataframe() t = get_bp(tree) viz = Empress(tree=t, feature_metadata=feature_metadata) save_viz(viz, output_dir)
def test_init_tree_plot_fm_not_matching(self): # Mainly, this test validates that the matching done between the tree # nodes and feature metadata is still performed even if tree-plot is # used. bad_fm = self.feature_metadata.copy() bad_fm.index = ["idont", "match :O"] with self.assertRaisesRegex( tools.DataMatchingError, ("No features in the feature metadata are present in the tree, " "either as tips or as internal nodes.")): Empress(self.tree, feature_metadata=bad_fm)
def test_to_dict_with_feature_metadata(self): viz = Empress( self.tree, self.table, self.sample_metadata, self.feature_metadata, shear_to_table=False ) obs = viz.to_dict() dict_a_with_fm = copy.deepcopy(DICT_A) dict_a_with_fm["compressed_tip_metadata"] = {1: ["asdf", "qwer"]} dict_a_with_fm["compressed_int_metadata"] = {8: ["ghjk", "tyui"]} dict_a_with_fm["feature_metadata_columns"] = ["fmdcol1", "fmdcol2"] self.assertEqual(obs, dict_a_with_fm)
def test_biplot_partial_match_override(self): fm = self.feature_metadata.copy() fm.index = ['a', 'x'] viz = Empress(self.tree, self.table, self.sample_metadata, feature_metadata=fm, ordination=self.biplot, ignore_missing_samples=True, shear_to_table=True) obs = str(viz) self.assertTrue('This element has no metadata' in obs)
def test_copy_support_files_use_base(self): local_path = './some-local-path/' viz = Empress(self.tree, self.table, self.sample_metadata, resource_path=local_path, shear_to_table=False) self.assertEqual(viz.base_url, local_path) viz.copy_support_files() self.assertTrue(exists(local_path)) self.files_to_remove.append(local_path)
def test_no_intersection_between_tree_and_table(self): bad_table = self.unrelated_table.copy() bad_table.index = range(len(self.unrelated_table.index)) with self.assertRaisesRegex( tools.DataMatchingError, "No features in the feature table are present as tips in the tree." ): Empress(self.tree, self.unrelated_table, self.sample_metadata, filter_unobserved_features_from_phylogeny=False) # Check that --p-filter-unobserved-features-from-phylogeny doesn't # override this: the data mismatch should be identified before # attempting shearing with self.assertRaisesRegex( tools.DataMatchingError, "No features in the feature table are present as tips in the tree." ): Empress(self.tree, self.unrelated_table, self.sample_metadata, filter_unobserved_features_from_phylogeny=True)
def test_to_dict_tree_plot_with_feature_metadata(self): viz = Empress(self.tree, feature_metadata=self.feature_metadata) # Set up expected dict dict_a_cp = copy.deepcopy(DICT_A) self._clear_copied_dict_a(dict_a_cp) # Copied from test_to_dict_with_feature_metadata() above dict_a_cp["compressed_tip_metadata"] = {1: ["asdf", "qwer"]} dict_a_cp["compressed_int_metadata"] = {8: ["ghjk", "tyui"]} dict_a_cp["feature_metadata_columns"] = ["fmdcol1", "fmdcol2"] obs = viz.to_dict() self.assertEqual(obs, dict_a_cp)
def tree_plot( tree: str, output_dir: str, feature_metadata: str, shear_to_feature_metadata: bool, ) -> None: tree_newick, fm = check_and_process_files(output_dir, tree, feature_metadata) viz = Empress(tree_newick, feature_metadata=fm, shear_to_feature_metadata=shear_to_feature_metadata) os.makedirs(output_dir) save_viz(viz, output_dir, q2=False)
def test_biplot_partial_match(self): fm = self.feature_metadata.copy() fm.index = ['a', 'x'] with self.assertRaisesRegex(KeyError, 'There are features not ' 'included in the feature mapping file. ' 'Override this error by using the ' '`ignore_missing_samples` argument. ' 'Offending features: h'): Empress(self.tree, self.table, self.sample_metadata, feature_metadata=fm, ordination=self.biplot, shear_to_table=True)
def test_shear_tree_to_fm_only_int(self): int_fm = pd.DataFrame( { "fmdcol1": ["vulpix", "ninetales"], "fmdcol2": ["growlithe", "arcanine"] }, index=["g", "h"]) exp_errmsg = ("Cannot shear tree to feature metadata: no tips in " "the tree are present in the feature metadata.") with self.assertRaisesRegex(ValueError, exp_errmsg): Empress(self.tree, feature_metadata=int_fm, shear_to_table=False, shear_to_feature_metadata=True)
def test_biplot(self): exp = self.feature_metadata.copy() viz = Empress(self.tree, self.table, self.sample_metadata, feature_metadata=self.feature_metadata, ordination=self.biplot, shear_to_table=True) obs = str(viz) # check that emperor didn't pad the metadata self.assertTrue('All elements' not in obs) # metadata should have been trickled down as expected assert_frame_equal(viz._emperor.feature_mf, exp)
def test_init_tree_plot_extra_fm(self): # Checks that extra stuff in the feature metadata (which doesn't match # any node in the tree) is filtered out of the visualization, even if # tree-plot is used. extra_fm = pd.DataFrame( { "fmdcol1": ["zxcv", "bnm,"], "fmdcol2": ["zaq1", "xsw2"] }, index=["weshould", "befiltered"]) smooshed_fm = self.feature_metadata.append(extra_fm) viz = Empress(self.tree, feature_metadata=smooshed_fm) self.assertFalse(viz.is_community_plot) assert_frame_equal(viz.tip_md, self.feature_metadata.loc[["a"]]) assert_frame_equal(viz.int_md, self.feature_metadata.loc[["h"]])
def test_copy_support_files_use_base(self): local_path = './some-local-path/' viz = Empress(self.tree, self.table, self.sample_metadata, resource_path=local_path, filter_unobserved_features_from_phylogeny=False) self.assertEqual(viz.base_url, local_path) viz.copy_support_files() self.assertTrue(exists(local_path)) self.files_to_remove.append(local_path)
def test_to_dict(self): viz = Empress(self.tree, self.table, self.sample_metadata, shear_to_table=False) obs = viz.to_dict() dict_a_cp = copy.deepcopy(DICT_A) # NOTE: Uncomment the following two lines of code to write the current # DICT_A to a file. Once it's written to a file, you can run # "black -l 79 dictcode.py" (while in the same directory as the file) # to format it so that it's consistent with how DICT_A is set up at the # bottom of this file. # with open("dictcode.py", "w") as f: # f.write("DICT_A = {}".format(str(obs))) self.assertEqual(obs, dict_a_cp)
def test_to_dict(self): viz = Empress(self.tree, self.table, self.sample_metadata, filter_unobserved_features_from_phylogeny=False) obs = viz._to_dict() dict_a_cp = copy.deepcopy(DICT_A) tree_data = obs['tree_data'] exp = dict_a_cp['tree_data'] self.assert_almost_equal_tree_data(tree_data, exp) dict_a_cp.pop('tree_data') obs.pop('tree_data') self.assertEqual(obs, dict_a_cp)
def community_plot(output_dir: str, tree: NewickFormat, feature_table: biom.Table, sample_metadata: qiime2.Metadata, pcoa: OrdinationResults = None, feature_metadata: qiime2.Metadata = None, ignore_missing_samples: bool = False, filter_extra_samples: bool = False, filter_missing_features: bool = False, number_of_features: int = 5, shear_tree: bool = True) -> None: """Visualizes a tree alongside community-level data. The functionality available in this visualization is a superset of the functionality in tree_plot() -- including sample metadata coloring / barplots, animations, and Emperor integration support. """ if pcoa is not None and pcoa.features is not None: # select the top N most important features based on the vector's # magnitude (coped from q2-emperor) feats = pcoa.features.copy() # in cases where the axes are all zero there might be all-NA # columns feats.fillna(0, inplace=True) origin = np.zeros_like(feats.columns) feats['importance'] = feats.apply(euclidean, axis=1, args=(origin, )) feats.sort_values('importance', inplace=True, ascending=False) feats.drop(['importance'], inplace=True, axis=1) pcoa.features = feats[:number_of_features].copy() sample_metadata = sample_metadata.to_dataframe() if feature_metadata is not None: feature_metadata = feature_metadata.to_dataframe() t = get_bp(tree) viz = Empress(tree=t, table=feature_table, sample_metadata=sample_metadata, feature_metadata=feature_metadata, ordination=pcoa, ignore_missing_samples=ignore_missing_samples, filter_extra_samples=filter_extra_samples, filter_missing_features=filter_missing_features, shear_tree=shear_tree) save_viz(viz, output_dir)
def plot(output_dir: str, tree: NewickFormat, feature_table: pd.DataFrame, sample_metadata: qiime2.Metadata, pcoa: OrdinationResults = None, feature_metadata: qiime2.Metadata = None, ignore_missing_samples: bool = False, filter_missing_features: bool = False, number_of_features: int = 5, filter_unobserved_features_from_phylogeny: bool = True) -> None: if pcoa is not None and pcoa.features is not None: # select the top N most important features based on the vector's # magnitude (coped from q2-emperor) feats = pcoa.features.copy() origin = np.zeros_like(feats.columns) feats['importance'] = feats.apply(euclidean, axis=1, args=(origin, )) feats.sort_values('importance', inplace=True, ascending=False) feats.drop(['importance'], inplace=True, axis=1) pcoa.features = feats[:number_of_features].copy() sample_metadata = sample_metadata.to_dataframe() if feature_metadata is not None: feature_metadata = feature_metadata.to_dataframe() # path to the actual newick file with open(str(tree)) as file: t = parse_newick(file.readline()) trim_tree = filter_unobserved_features_from_phylogeny viz = Empress(tree=t, table=feature_table, sample_metadata=sample_metadata, feature_metadata=feature_metadata, ordination=pcoa, ignore_missing_samples=ignore_missing_samples, filter_missing_features=filter_missing_features, filter_unobserved_features_from_phylogeny=trim_tree) with open(os.path.join(output_dir, 'empress.html'), 'w') as file: file.write(str(viz)) viz.copy_support_files(output_dir) index = os.path.join(TEMPLATES, 'index.html') q2templates.render(index, output_dir)
def test_init_with_ordination_empty_samples_in_pcoa(self): def make_bad(v, i, m): if i in ['Sample2', 'Sample4']: return np.zeros(len(v)) else: return v bad_table = self.table.copy() bad_table.transform(make_bad, inplace=True) with self.assertRaisesRegex( ValueError, (r"The ordination contains samples that are empty \(i.e. " r"all 0s\) in the table. Problematic sample IDs: Sample2, " "Sample4")): Empress(self.tree, bad_table, self.sample_metadata, ordination=self.pcoa, shear_to_table=False)
def test_to_dict_with_emperor(self): viz = Empress(self.tree, self.table, self.sample_metadata, ordination=self.pcoa, filter_unobserved_features_from_phylogeny=False) obs = viz._to_dict() self.assertEqual(viz._emperor.width, '48vw') self.assertEqual(viz._emperor.height, '100vh; float: right') self.assertEqual(viz._emperor.settings['axes']['axesColor'], 'black') self.assertEqual(viz._emperor.settings['axes']['backgroundColor'], 'white') # we test key by key so we can do "general" checks on the emperor # values, this helps with tests not breaking if any character changes # in # Emperor for key, value in obs.items(): if key == 'tree_data': tree_data = obs['tree_data'] exp = DICT_A['tree_data'] self.assert_almost_equal_tree_data(tree_data, exp) elif not key.startswith('emperor_'): self.assertEqual(obs[key], DICT_A[key]) exp = " <div id='emperor-notebook" self.assertTrue(obs['emperor_div'].startswith(exp)) exp = "// When running in the Jupyter" self.assertTrue(obs['emperor_require_logic'].startswith(exp)) exp = "}); // END REQUIRE.JS block" self.assertTrue(obs['emperor_require_logic'].endswith(exp)) exp = '<link id="emperor-css" rel="stylesheet"' self.assertTrue(obs['emperor_style'].startswith(exp)) exp = "vendor/js/jquery-" self.assertEqual(obs['emperor_base_dependencies'].count(exp), 1) self.assertTrue(obs['emperor_classes'], 'combined-plot-container')
def test_to_dict_with_emperor(self): viz = Empress(self.tree, self.table, self.sample_metadata, ordination=self.pcoa, shear_to_table=False, filter_extra_samples=True) obs = viz.to_dict() self.assertEqual(viz._emperor.width, '50vw') self.assertEqual(viz._emperor.height, '100vh; float: right') self.assertEqual(viz._emperor.settings['axes']['axesColor'], 'black') self.assertEqual(viz._emperor.settings['axes']['backgroundColor'], 'white') # we test key by key so we can do "general" checks on the emperor # values, this helps with tests not breaking if any character changes # in # Emperor dict_a_cp = copy.deepcopy(DICT_A) # set is_empire_plot flag to True since DICT_A sets it as False (all # other test use a False value) dict_a_cp["is_empire_plot"] = True for key, value in obs.items(): if not key.startswith('emperor_'): self.assertEqual(obs[key], dict_a_cp[key]) exp = "<div id='emperor-in-empire'" self.assertTrue(obs['emperor_div'].startswith(exp)) exp = "// When running in the Jupyter" self.assertTrue(obs['emperor_require_logic'].startswith(exp)) exp = "}); // END REQUIRE.JS block" self.assertTrue(obs['emperor_require_logic'].endswith(exp)) self.assertTrue('"#emperor-css"' in obs['emperor_style']) exp = "vendor/js/jquery-" self.assertEqual(obs['emperor_base_dependencies'].count(exp), 1) self.assertTrue(obs['emperor_classes'], 'combined-plot-container')
def test_shear_tree_to_fm_rmv_int_md(self): """ Shear tree to feature metadata but metadata has entry for an internal node that gets filtered out from the shearing. """ # default feature metadata works - internal node h filtered out viz = Empress(self.tree, feature_metadata=self.feature_metadata, shear_to_table=False, shear_to_feature_metadata=True) names = ['a', None, 'g', None] for i in range(1, len(viz.tree) + 1): node = viz.tree.postorderselect(i) self.assertEqual(viz.tree.name(node), names[i - 1]) assert_frame_equal(viz.tip_md, self.feature_metadata.loc[["a"]]) self.assertTrue(viz.int_md.empty) # feature metadata should be unchanged and be a different id instance assert_frame_equal(self.feature_metadata, viz.features) self.assertNotEqual(id(self.feature_metadata), id(viz.features)) self.assertIsNone(viz.ordination)
def test_shear_tree_to_fm_one_tip(self): lonely_fm = pd.DataFrame({ "fmdcol1": ["mimikyu"], }, index=["a"]) viz = Empress(self.tree, feature_metadata=lonely_fm, shear_to_table=False, shear_to_feature_metadata=True) names = ['a', None, 'g', None] for i in range(1, len(viz.tree) + 1): node = viz.tree.postorderselect(i) self.assertEqual(viz.tree.name(node), names[i - 1]) assert_frame_equal(viz.tip_md, lonely_fm.loc[["a"]]) self.assertTrue(viz.int_md.empty) # feature metadata should be unchanged and be a different id instance assert_frame_equal(lonely_fm, viz.features) self.assertNotEqual(id(lonely_fm), id(viz.features)) self.assertIsNone(viz.ordination)
def community_plot( tree: str, table: str, sample_metadata: str, output_dir: str, pcoa: str, feature_metadata: str, ignore_missing_samples: bool, filter_extra_samples: bool, filter_missing_features: bool, number_of_pcoa_features: int, shear_to_table: bool, ) -> None: tree_newick, fm = check_and_process_files( output_dir, tree, feature_metadata ) table = load_table(table) sample_metadata = pd.read_csv(sample_metadata, sep="\t", index_col=0) if pcoa is not None: pcoa = OrdinationResults.read(pcoa) pcoa = prepare_pcoa(pcoa, number_of_pcoa_features) viz = Empress( tree_newick, table=table, sample_metadata=sample_metadata, feature_metadata=fm, ordination=pcoa, ignore_missing_samples=ignore_missing_samples, filter_extra_samples=filter_extra_samples, filter_missing_features=filter_missing_features, shear_to_table=shear_to_table, ) os.makedirs(output_dir) save_viz(viz, output_dir, q2=False)
def test_filter_unobserved_features_from_phylogeny(self): viz = Empress(self.tree, self.filtered_table, self.sample_metadata, filter_unobserved_features_from_phylogeny=True) self.assertEqual(viz._bp_tree, [1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0]) names = ['a', 'EmpressNode0', 'b', 'g', 'd', 'h', 'EmpressNode1'] for i, node in enumerate(viz.tree.postorder()): self.assertEqual(node.name, names[i]) # table should be unchanged and be a different id instance assert_frame_equal(self.filtered_table, viz.table.T) self.assertNotEqual(id(self.filtered_table), id(viz.table)) # sample metadata should be unchanged and be a different id instance assert_frame_equal(self.sample_metadata, viz.samples) self.assertNotEqual(id(self.sample_metadata), id(viz.samples)) self.assertIsNone(viz.features) self.assertIsNone(viz.ordination)
def test_shear_tree_to_table(self): viz = Empress(self.tree, self.filtered_table, self.filtered_sample_metadata, shear_to_table=True) self.assertEqual(list(viz.tree.B), [1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0]) names = ['a', None, 'b', 'g', 'd', 'h', None] for i in range(1, len(viz.tree) + 1): node = viz.tree.postorderselect(i) self.assertEqual(viz.tree.name(node), names[i - 1]) # table should be unchanged and be a different id instance self.assertEqual(self.filtered_table, viz.table) self.assertNotEqual(id(self.filtered_table), id(viz.table)) # sample metadata should be unchanged and be a different id instance assert_frame_equal(self.filtered_sample_metadata, viz.samples) self.assertNotEqual(id(self.filtered_sample_metadata), id(viz.samples)) self.assertIsNone(viz.features) self.assertIsNone(viz.ordination)
def test_ordination_integration_callbacks(self): viz = Empress(self.tree, self.table, self.sample_metadata, ordination=self.pcoa) # table should be unchanged and be a different id instance self.assertEqual(self.table, viz.table) self.assertNotEqual(id(self.table), id(viz.table)) # sample metadata should be unchanged and be a different id instance assert_frame_equal(self.sample_metadata, viz.samples) self.assertNotEqual(id(self.sample_metadata), id(viz.samples)) self.assertIsNone(viz.features) assert_ordination_results_equal(viz.ordination, self.pcoa) # emperor is instantiated as needed but not yet setup self.assertTrue(isinstance(viz._emperor, Emperor)) # ensure the callbacks were rendered obs = viz.make_empress() self.assertTrue('setOnNodeMenuVisibleCallback' in obs) self.assertTrue('setOnNodeMenuHiddenCallback' in obs)
def test_shear_tree_to_fm_simple(self): # remove e same as in test_shear_tree mini_fm = self.feature_metadata.copy() mini_fm.loc["b"] = ["pikachu", "raichu"] mini_fm.loc["d"] = ["mew", "mewtwo"] viz = Empress(self.tree, feature_metadata=mini_fm, shear_to_table=False, shear_to_feature_metadata=True) self.assertEqual(list(viz.tree.B), [1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0]) names = ['a', None, 'b', 'g', 'd', 'h', None] for i in range(1, len(viz.tree) + 1): node = viz.tree.postorderselect(i) self.assertEqual(viz.tree.name(node), names[i - 1]) assert_frame_equal(viz.tip_md, mini_fm.loc[["a", "b", "d"]]) assert_frame_equal(viz.int_md, mini_fm.loc[["h"]]) # feature metadata should be unchanged and be a different id instance assert_frame_equal(mini_fm, viz.features) self.assertNotEqual(id(mini_fm), id(viz.features)) self.assertIsNone(viz.ordination)
def test_fm_filtering_post_shearing_with_moving_pictures_dataset(self): # This particular tip can be problematic (it was the reason we found # out about https://github.com/biocore/empress/issues/248), so we # observe how it is handled in generating a visualization of the # moving pictures dataset to verify that #248 does not recur. funky_tip = "8406abe6d9a72018bf32d189d1340472" tree, tbl, smd_df, fmd_df, pcoa_skbio = load_mp_data( use_artifact_api=False ) bp_tree = from_skbio_treenode(tree) # Sanity check -- verify that the funky tip we're looking for is # actually present in the data. (We haven't actually done anything # specific to Empress yet. This just verifies the environment is ok.) # https://stackoverflow.com/a/23549599/10730311 self.assertTrue(funky_tip in fmd_df.index) # Generate an Empress visualization using this data viz = Empress(bp_tree, tbl, smd_df, feature_metadata=fmd_df, ordination=pcoa_skbio, filter_extra_samples=True, shear_to_table=True) # Check that tip 8406abe6d9a72018bf32d189d1340472 *isn't* in the tip # metadata. All of the samples this tip is present in are filtered out # when --p-filter-extra-samples is used with this particular PCoA, so # we verify that this tip is removed from the tip metadata. self.assertFalse(funky_tip in viz.tip_md.index)