def test_from_file_with_file_path(self): """Should identify the filepath correctly and parse from it.""" # should fail with the expected exception with self.assertRaises(DissimilarityMatrixFormatError): DistanceMatrix.from_file(self.bad_dm_fp) obs = DistanceMatrix.from_file(self.dm_3x3_fp) self.assertEqual(self.dm_3x3, obs) self.assertTrue(isinstance(obs, DistanceMatrix))
def test_from_file_with_file_path(self): """Should identify the filepath correctly and parse from it.""" # should fail with the expected exception with self.assertRaises(DissimilarityMatrixFormatError): DistanceMatrix.from_file(self.bad_dm_fp) obs = DistanceMatrix.from_file(self.dm_3x3_fp) self.assertEqual(self.dm_3x3, obs) self.assertTrue(isinstance(obs, DistanceMatrix))
def setup(self): with open(get_data_path('PCoA_sample_data_3'), 'U') as lines: dist_matrix = DistanceMatrix.from_file(lines) self.ordination = PCoA(dist_matrix) self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634']
def setup(self): with open(get_data_path('PCoA_sample_data_3'), 'U') as lines: dist_matrix = DistanceMatrix.from_file(lines) self.ordination = PCoA(dist_matrix) self.ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ]
def setUp(self): # The test dataset used here is a subset of the Lauber et al. 2009 # "88 Soils" dataset. It has been altered to exercise various aspects # of the code, including (but not limited to): # # - order of distance matrix IDs and IDs in data frame (metadata) are # not exactly the same # - data frame has an extra sample that is not in the distance matrix # - this extra sample has non-numeric and missing values in some of its # cells # # Additional variations of the distance matrix and data frame are used # to test different orderings of rows/columns, extra non-numeric data # frame columns, etc. # # This dataset is also useful because it is non-trivial in size (6 # samples, 11 environment variables) and it includes positive/negative # floats and integers in the data frame. self.dm = DistanceMatrix.from_file(get_data_path('dm.txt')) # Reordered rows and columns (i.e., different ID order). Still # conceptually the same distance matrix. self.dm_reordered = DistanceMatrix.from_file( get_data_path('dm_reordered.txt')) self.df = pd.read_csv(get_data_path('df.txt'), sep='\t', index_col=0) # Similar to the above data frame, except that it has an extra # non-numeric column, and some of the other rows and columns have been # reordered. self.df_extra_column = pd.read_csv( get_data_path('df_extra_column.txt'), sep='\t', index_col=0) # All columns in the original data frame (these are all numeric # columns). self.cols = self.df.columns.tolist() # This second dataset is derived from vegan::bioenv's example dataset # (varespec and varechem). The original dataset includes a site x # species table (e.g., OTU table) and a data frame of environmental # variables. Since the bioenv function defined here accepts a distance # matrix, we use a Bray-Curtis distance matrix that is derived from the # site x species table (this matches what is done by vegan::bioenv when # provided an OTU table, using their default distance measure). The # data frame only includes the numeric environmental variables we're # interested in for these tests: log(N), P, K, Ca, pH, Al self.dm_vegan = DistanceMatrix.from_file( get_data_path('bioenv_dm_vegan.txt')) self.df_vegan = pd.read_csv( get_data_path('bioenv_df_vegan.txt'), sep='\t', converters={0: str}) self.df_vegan.set_index('#SampleID', inplace=True) # Load expected results. self.exp_results = pd.read_csv(get_data_path('exp_results.txt'), sep='\t', index_col=0) self.exp_results_single_column = pd.read_csv( get_data_path('exp_results_single_column.txt'), sep='\t', index_col=0) self.exp_results_different_column_order = pd.read_csv( get_data_path('exp_results_different_column_order.txt'), sep='\t', index_col=0) self.exp_results_vegan = pd.read_csv( get_data_path('bioenv_exp_results_vegan.txt'), sep='\t', index_col=0)
def test_from_file_invalid_input(self): """Raises error on invalid distance matrix file.""" # Asymmetric. with self.assertRaises(DistanceMatrixError): DistanceMatrix.from_file(self.dm_2x2_asym_f)
def test_from_file_invalid_input(self): """Raises error on invalid distance matrix file.""" # Asymmetric. with self.assertRaises(DistanceMatrixError): DistanceMatrix.from_file(self.dm_2x2_asym_f)
def setUp(self): # The test dataset used here is a subset of the Lauber et al. 2009 # "88 Soils" dataset. It has been altered to exercise various aspects # of the code, including (but not limited to): # # - order of distance matrix IDs and IDs in data frame (metadata) are # not exactly the same # - data frame has an extra sample that is not in the distance matrix # - this extra sample has non-numeric and missing values in some of its # cells # # Additional variations of the distance matrix and data frame are used # to test different orderings of rows/columns, extra non-numeric data # frame columns, etc. # # This dataset is also useful because it is non-trivial in size (6 # samples, 11 environment variables) and it includes positive/negative # floats and integers in the data frame. self.dm = DistanceMatrix.from_file(get_data_path('dm.txt')) # Reordered rows and columns (i.e., different ID order). Still # conceptually the same distance matrix. self.dm_reordered = DistanceMatrix.from_file( get_data_path('dm_reordered.txt')) self.df = pd.read_csv(get_data_path('df.txt'), sep='\t', index_col=0) # Similar to the above data frame, except that it has an extra # non-numeric column, and some of the other rows and columns have been # reordered. self.df_extra_column = pd.read_csv( get_data_path('df_extra_column.txt'), sep='\t', index_col=0) # All columns in the original data frame (these are all numeric # columns). self.cols = self.df.columns.tolist() # This second dataset is derived from vegan::bioenv's example dataset # (varespec and varechem). The original dataset includes a site x # species table (e.g., OTU table) and a data frame of environmental # variables. Since the bioenv function defined here accepts a distance # matrix, we use a Bray-Curtis distance matrix that is derived from the # site x species table (this matches what is done by vegan::bioenv when # provided an OTU table, using their default distance measure). The # data frame only includes the numeric environmental variables we're # interested in for these tests: log(N), P, K, Ca, pH, Al self.dm_vegan = DistanceMatrix.from_file( get_data_path('bioenv_dm_vegan.txt')) self.df_vegan = pd.read_csv(get_data_path('bioenv_df_vegan.txt'), sep='\t', converters={0: str}) self.df_vegan.set_index('#SampleID', inplace=True) # Load expected results. self.exp_results = pd.read_csv(get_data_path('exp_results.txt'), sep='\t', index_col=0) self.exp_results_single_column = pd.read_csv( get_data_path('exp_results_single_column.txt'), sep='\t', index_col=0) self.exp_results_different_column_order = pd.read_csv( get_data_path('exp_results_different_column_order.txt'), sep='\t', index_col=0) self.exp_results_vegan = pd.read_csv( get_data_path('bioenv_exp_results_vegan.txt'), sep='\t', index_col=0)