def test_parse_shuffled_results_files(self): res = StatsResults() _parse_shuffled_results_files('/foobarbaz123', Adonis(), 'Treatment', res, 10) self.assertTrue(res.isEmpty()) _parse_shuffled_results_files('/foobarbaz123', Anosim(), 'Treatment', res, 20, 88) self.assertTrue(res.isEmpty())
def test_parse_original_results_file(self): res = StatsResults() _parse_original_results_file('/foobarbaz123', Anosim(), 'Treatment', res) self.assertTrue(res.isEmpty()) _parse_original_results_file('/foobarbaz123', Anosim(), 'Treatment', res, 42) self.assertTrue(res.isEmpty())
def setUp(self): """Define some sample data that will be used by the tests.""" self.dm_f1 = dm_str1.split('\n') self.map_f1 = map_str1.split('\n') self.dm_f2 = dm_str2.split('\n') self.map_f2 = map_str2.split('\n') empty = StatsResults() nonempty = StatsResults() nonempty.addResult(0.1, 0.001) self.cat_res1 = { 'original': empty, 'shuffled': nonempty } self.cat_res2 = { 'original': nonempty, 'shuffled': empty } self.cat_res3 = { 'original': nonempty, 'shuffled': nonempty } # The prefix to use for temporary files/dirs. This prefix may be added # to, but all temp dirs and files created by the tests will have this # prefix at a minimum. self.prefix = 'microbiogeo_tests' self.start_dir = getcwd() self.dirs_to_remove = [] self.files_to_remove = [] self.tmp_dir = get_qiime_temp_dir() if not exists(self.tmp_dir): makedirs(self.tmp_dir) # If test creates the temp dir, also remove it. self.dirs_to_remove.append(self.tmp_dir) # Set up temporary directories to use with tests. self.input_dir = mkdtemp(dir=self.tmp_dir, prefix='%s_input_dir_' % self.prefix) self.dirs_to_remove.append(self.input_dir)
def test_addResult_invalid_input(self): """Adding invalid input raises error.""" # Effect sizes don't match. self.sr1.addResult(0.5, 0.01) self.assertRaises(ValueError, self.sr1.addResult, 0.6, 0.001) self.assertFloatEqual(self.sr1.effect_size, 0.5) self.assertFloatEqual(self.sr1.p_values, [0.01]) # Invalid p-value range. self.sr1 = StatsResults() self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1) self.assertTrue(self.sr1.effect_size is None) self.assertEqual(self.sr1.p_values, []) self.sr1.addResult(0.5, 0.01) self.sr1.addResult(0.5, 0.02) self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1) self.assertRaises(ValueError, self.sr1.addResult, 0.5, -0.2) self.assertFloatEqual(self.sr1.effect_size, 0.5) self.assertFloatEqual(self.sr1.p_values, [0.01, 0.02])
def _collate_simulated_data_results(in_dir, workflow): """Returns a heavily-nested dictionary of parsed results. Structure: method study depth category trial sample size dissimilarity metric StatsResults instance """ results = {} for study in workflow: study_dir = join(in_dir, study) for method in workflow[study]['methods']: if type(method) in (MantelCorrelogram, Best): continue if method.DirectoryName not in results: results[method.DirectoryName] = {} method_res = results[method.DirectoryName] if study not in method_res: method_res[study] = {} study_res = method_res[study] for depth, _ in workflow[study]['depths']: depth_dir = join(study_dir, '%d' % depth) if depth not in study_res: study_res[depth] = {} depth_res = study_res[depth] data_type = 'simulated' data_type_dir = join(depth_dir, data_type) for category in workflow[study]['categories']: category = category[0] category_dir = join(data_type_dir, category) if category not in depth_res: depth_res[category] = {} category_res = depth_res[category] for trial_num in \ range(workflow[study]['num_sim_data_trials']): trial_num_dir = join(category_dir, '%d' % trial_num) if trial_num not in category_res: category_res[trial_num] = {} trial_num_res = category_res[trial_num] for samp_size in workflow[study]['sample_sizes']: samp_size_dir = join(trial_num_dir, '%d' % samp_size) if samp_size not in trial_num_res: trial_num_res[samp_size] = {} samp_size_res = trial_num_res[samp_size] for d in workflow[study]['dissim']: dissim_dir = join(samp_size_dir, repr(d)) if d not in samp_size_res: samp_size_res[d] = {} dissim_res = samp_size_res[d] for metric, _ in workflow[study]['metrics']: metric_dir = join(dissim_dir, metric) results_fp = join(metric_dir, method.DirectoryName, '%s_results.txt' % method.ResultsName) stats_results = StatsResults() if exists(results_fp): res_f = open(results_fp, 'U') es, p_val = method.parse(res_f) res_f.close() stats_results.addResult(es, p_val) if metric in dissim_res: raise ValueError("More than one set " "of results for a unique set " "of parameters. Check your " "workflow.") else: dissim_res[metric] = stats_results return results
def setUp(self): """Define some sample data that will be used by the tests.""" sr_orig1 = StatsResults() sr_orig1.addResult(0.27, 0.01) sr_orig1.addResult(0.27, 0.001) sr_shuff1 = StatsResults() sr_shuff1.addResult(0.02, 0.45) sr_shuff1.addResult(0.02, 0.476) sr_orig2 = StatsResults() sr_orig2.addResult(0.13, 0.02) sr_orig2.addResult(0.13, 0.002) sr_shuff2 = StatsResults() sr_shuff2.addResult(0.03, 0.40) sr_shuff2.addResult(0.03, 0.401) sr_orig3 = StatsResults() sr_orig3.addResult(0.59, 0.11) sr_orig3.addResult(0.59, 0.101) sr_shuff3 = StatsResults() sr_shuff3.addResult(0.32, 0.65) sr_shuff3.addResult(0.32, 0.776) sr_orig4 = StatsResults() sr_orig4.addResult(0.27, 0.01) sr_orig4.addResult(0.27, 0.001) sr_shuff4 = StatsResults() sr_shuff4.addResult(0.02, 0.45) sr_shuff4.addResult(0.02, 0.476) self.per_method_results1 = { 'adonis': { 'whole_body': { 'BODY_SITE': { 'original': sr_orig1, 'shuffled': sr_shuff1 } } }, 'anosim': { 'whole_body': { 'BODY_SITE': {} } } } self.per_method_results2 = { 'adonis': self.per_method_results1['adonis'], 'anosim': { 'whole_body': { 'SEX': {} } } } self.per_method_results3 = { 'adonis': self.per_method_results1['adonis'], 'anosim': { '88_soils': { 'BODY_SITE': {} } } } self.real_results1 = { '5_percent': { 'unweighted_unifrac': { 'adonis': { 'whole_body': { 'BODY_SITE': { 'original': sr_orig1, 'shuffled': sr_shuff1 } }, '88_soils': { 'ENV_BIOME': { 'original': sr_orig2, 'shuffled': sr_shuff2 } }, 'keyboard': {} }, 'anosim': { 'whole_body': { 'BODY_SITE': { 'original': sr_orig1, 'shuffled': sr_shuff1 } }, 'keyboard': { 'HOST_SUBJECT_ID': { 'original': sr_orig3, 'shuffled': sr_shuff3 } }, '88_soils': { 'ENV_BIOME': {} } } } } } # Invalid results (methods don't cover same studies). self.real_results2 = { '5_percent': { 'unweighted_unifrac': { 'adonis': { 'whole_body': {} }, 'anosim': { '88_soils': {} } } } } self.sim_results1 = { 'adonis': { 'whole_body': { 146: { 'BODY_SITE': { 1: { 10: { 0.02: { 'unweighted_unifrac': sr_orig1 } } } } } } }, 'anosim': { 'whole_body': { 146: { 'BODY_SITE': { 1: { 10: { 0.02: { 'unweighted_unifrac': sr_orig1 } } } } } } } } # Invalid results (wrong number of effect sizes). self.sim_results2 = { 'adonis': { 'whole_body': { 146: { 'BODY_SITE': { 1: { 10: { 0.02: { 'unweighted_unifrac': sr_orig1 } } } } } } }, 'anosim': { 'whole_body': { 146: { 'BODY_SITE': { 1: { 10: { 0.02: { 'unweighted_unifrac': sr_orig1, 'weighted_unifrac': sr_orig2, } } } } } } } }
def setUp(self): """Define some sample data that will be used by the tests.""" self.sr1 = StatsResults()
class StatsResultsTests(TestCase): """Tests for the util.StatsResults class.""" def setUp(self): """Define some sample data that will be used by the tests.""" self.sr1 = StatsResults() def test_addResult(self): """Adding effect size and p-value works correctly on valid input.""" self.sr1.addResult(0.5, 0.01) self.sr1.addResult(0.5, 0.001) self.assertFloatEqual(self.sr1.effect_size, 0.5) self.assertFloatEqual(self.sr1.p_values, [0.01, 0.001]) def test_addResult_invalid_input(self): """Adding invalid input raises error.""" # Effect sizes don't match. self.sr1.addResult(0.5, 0.01) self.assertRaises(ValueError, self.sr1.addResult, 0.6, 0.001) self.assertFloatEqual(self.sr1.effect_size, 0.5) self.assertFloatEqual(self.sr1.p_values, [0.01]) # Invalid p-value range. self.sr1 = StatsResults() self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1) self.assertTrue(self.sr1.effect_size is None) self.assertEqual(self.sr1.p_values, []) self.sr1.addResult(0.5, 0.01) self.sr1.addResult(0.5, 0.02) self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1) self.assertRaises(ValueError, self.sr1.addResult, 0.5, -0.2) self.assertFloatEqual(self.sr1.effect_size, 0.5) self.assertFloatEqual(self.sr1.p_values, [0.01, 0.02]) def test_isEmpty(self): """Test checking if results are empty or not.""" self.assertTrue(self.sr1.isEmpty()) self.sr1.addResult(0.5, 0.01) self.assertFalse(self.sr1.isEmpty()) def test_str(self): """Test __str__ method.""" # Empty results. obs = str(self.sr1) self.assertEqual(obs, 'Empty results') # Populated results. self.sr1.addResult(0.5, 0.01) self.sr1.addResult(0.5, 0.05) obs = str(self.sr1) self.assertEqual(obs, '0.50; ***, **') def test_check_p_value(self): """Raises error on invalid p-value.""" self.sr1._check_p_value(0.0) self.sr1._check_p_value(0.5) self.sr1._check_p_value(1.0) self.assertRaises(ValueError, self.sr1._check_p_value, 1.5) self.assertRaises(ValueError, self.sr1._check_p_value, -1.5) def test_format_p_value_as_asterisk(self): """Test formatting a p-value to indicate statistical significance.""" obs = self.sr1._format_p_value_as_asterisk(1.0) self.assertEqual(obs, 'x') obs = self.sr1._format_p_value_as_asterisk(0.09) self.assertEqual(obs, '*') obs = self.sr1._format_p_value_as_asterisk(0.045) self.assertEqual(obs, '**') obs = self.sr1._format_p_value_as_asterisk(0.01) self.assertEqual(obs, '***') obs = self.sr1._format_p_value_as_asterisk(0.0005) self.assertEqual(obs, '****') def test_format_p_value_as_asterisk_invalid_input(self): """Test supplying an invalid p-value results in error being thrown.""" self.assertRaises(TypeError, self.sr1._format_p_value_as_asterisk, 1) self.assertRaises(TypeError, self.sr1._format_p_value_as_asterisk, "0.05") self.assertRaises(TypeError, self.sr1._format_p_value_as_asterisk, [0.05]) self.assertRaises(ValueError, self.sr1._format_p_value_as_asterisk, 1.1) self.assertRaises(ValueError, self.sr1._format_p_value_as_asterisk, -0.042)