예제 #1
0
    def test_parse_shuffled_results_files(self):
        res = StatsResults()
        _parse_shuffled_results_files('/foobarbaz123', Adonis(), 'Treatment',
                                     res, 10)
        self.assertTrue(res.isEmpty())

        _parse_shuffled_results_files('/foobarbaz123', Anosim(), 'Treatment',
                                     res, 20, 88)
        self.assertTrue(res.isEmpty())
예제 #2
0
    def test_parse_original_results_file(self):
        res = StatsResults()
        _parse_original_results_file('/foobarbaz123', Anosim(), 'Treatment',
                                     res)
        self.assertTrue(res.isEmpty())

        _parse_original_results_file('/foobarbaz123', Anosim(), 'Treatment',
                                     res, 42)
        self.assertTrue(res.isEmpty())
예제 #3
0
    def setUp(self):
        """Define some sample data that will be used by the tests."""
        self.dm_f1 = dm_str1.split('\n')
        self.map_f1 = map_str1.split('\n')

        self.dm_f2 = dm_str2.split('\n')
        self.map_f2 = map_str2.split('\n')

        empty = StatsResults()
        nonempty = StatsResults()
        nonempty.addResult(0.1, 0.001)

        self.cat_res1 = {
            'original': empty,
            'shuffled': nonempty
        }

        self.cat_res2 = {
            'original': nonempty,
            'shuffled': empty
        }

        self.cat_res3 = {
            'original': nonempty,
            'shuffled': nonempty
        }

        # The prefix to use for temporary files/dirs. This prefix may be added
        # to, but all temp dirs and files created by the tests will have this
        # prefix at a minimum.
        self.prefix = 'microbiogeo_tests'

        self.start_dir = getcwd()
        self.dirs_to_remove = []
        self.files_to_remove = []

        self.tmp_dir = get_qiime_temp_dir()

        if not exists(self.tmp_dir):
            makedirs(self.tmp_dir)

            # If test creates the temp dir, also remove it.
            self.dirs_to_remove.append(self.tmp_dir)

        # Set up temporary directories to use with tests.
        self.input_dir = mkdtemp(dir=self.tmp_dir,
                                 prefix='%s_input_dir_' % self.prefix)
        self.dirs_to_remove.append(self.input_dir)
예제 #4
0
    def test_addResult_invalid_input(self):
        """Adding invalid input raises error."""
        # Effect sizes don't match.
        self.sr1.addResult(0.5, 0.01)
        self.assertRaises(ValueError, self.sr1.addResult, 0.6, 0.001)
        self.assertFloatEqual(self.sr1.effect_size, 0.5)
        self.assertFloatEqual(self.sr1.p_values, [0.01])

        # Invalid p-value range.
        self.sr1 = StatsResults()
        self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1)
        self.assertTrue(self.sr1.effect_size is None)
        self.assertEqual(self.sr1.p_values, [])

        self.sr1.addResult(0.5, 0.01)
        self.sr1.addResult(0.5, 0.02)
        self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1)
        self.assertRaises(ValueError, self.sr1.addResult, 0.5, -0.2)
        self.assertFloatEqual(self.sr1.effect_size, 0.5)
        self.assertFloatEqual(self.sr1.p_values, [0.01, 0.02])
예제 #5
0
def _collate_simulated_data_results(in_dir, workflow):
    """Returns a heavily-nested dictionary of parsed results.

    Structure:
        method
            study
                depth
                    category
                        trial
                            sample size
                                dissimilarity
                                    metric
                                        StatsResults instance
    """
    results = {}

    for study in workflow:
        study_dir = join(in_dir, study)

        for method in workflow[study]['methods']:
            if type(method) in (MantelCorrelogram, Best):
                continue

            if method.DirectoryName not in results:
                results[method.DirectoryName] = {}
            method_res = results[method.DirectoryName]

            if study not in method_res:
                method_res[study] = {}
            study_res = method_res[study]

            for depth, _ in workflow[study]['depths']:
                depth_dir = join(study_dir, '%d' % depth)

                if depth not in study_res:
                    study_res[depth] = {}
                depth_res = study_res[depth]

                data_type = 'simulated'
                data_type_dir = join(depth_dir, data_type)

                for category in workflow[study]['categories']:
                    category = category[0]
                    category_dir = join(data_type_dir, category)

                    if category not in depth_res:
                        depth_res[category] = {}
                    category_res = depth_res[category]

                    for trial_num in \
                            range(workflow[study]['num_sim_data_trials']):
                        trial_num_dir = join(category_dir, '%d' % trial_num)

                        if trial_num not in category_res:
                            category_res[trial_num] = {}
                        trial_num_res = category_res[trial_num]

                        for samp_size in workflow[study]['sample_sizes']:
                            samp_size_dir = join(trial_num_dir,
                                                 '%d' % samp_size)

                            if samp_size not in trial_num_res:
                                trial_num_res[samp_size] = {}
                            samp_size_res = trial_num_res[samp_size]

                            for d in workflow[study]['dissim']:
                                dissim_dir = join(samp_size_dir, repr(d))

                                if d not in samp_size_res:
                                    samp_size_res[d] = {}
                                dissim_res = samp_size_res[d]

                                for metric, _ in workflow[study]['metrics']:
                                    metric_dir = join(dissim_dir, metric)

                                    results_fp = join(metric_dir,
                                        method.DirectoryName,
                                        '%s_results.txt' % method.ResultsName)
                                    stats_results = StatsResults()

                                    if exists(results_fp):
                                        res_f = open(results_fp, 'U')
                                        es, p_val = method.parse(res_f)
                                        res_f.close()
                                        stats_results.addResult(es, p_val)

                                    if metric in dissim_res:
                                        raise ValueError("More than one set "
                                                "of results for a unique set "
                                                "of parameters. Check your "
                                                "workflow.")
                                    else:
                                        dissim_res[metric] = stats_results
    return results
예제 #6
0
    def setUp(self):
        """Define some sample data that will be used by the tests."""
        sr_orig1 = StatsResults()
        sr_orig1.addResult(0.27, 0.01)
        sr_orig1.addResult(0.27, 0.001)

        sr_shuff1 = StatsResults()
        sr_shuff1.addResult(0.02, 0.45)
        sr_shuff1.addResult(0.02, 0.476)

        sr_orig2 = StatsResults()
        sr_orig2.addResult(0.13, 0.02)
        sr_orig2.addResult(0.13, 0.002)

        sr_shuff2 = StatsResults()
        sr_shuff2.addResult(0.03, 0.40)
        sr_shuff2.addResult(0.03, 0.401)

        sr_orig3 = StatsResults()
        sr_orig3.addResult(0.59, 0.11)
        sr_orig3.addResult(0.59, 0.101)

        sr_shuff3 = StatsResults()
        sr_shuff3.addResult(0.32, 0.65)
        sr_shuff3.addResult(0.32, 0.776)

        sr_orig4 = StatsResults()
        sr_orig4.addResult(0.27, 0.01)
        sr_orig4.addResult(0.27, 0.001)

        sr_shuff4 = StatsResults()
        sr_shuff4.addResult(0.02, 0.45)
        sr_shuff4.addResult(0.02, 0.476)

        self.per_method_results1 = {
            'adonis': {
                'whole_body': {
                    'BODY_SITE': {
                        'original': sr_orig1,
                        'shuffled': sr_shuff1
                    }
                }
            },

            'anosim': {
                'whole_body': {
                    'BODY_SITE': {}
                }
            }
        }

        self.per_method_results2 = {
            'adonis': self.per_method_results1['adonis'],
            'anosim': {
                'whole_body': {
                    'SEX': {}
                }
            }
        }

        self.per_method_results3 = {
            'adonis': self.per_method_results1['adonis'],
            'anosim': {
                '88_soils': {
                    'BODY_SITE': {}
                }
            }
        }

        self.real_results1 = {
            '5_percent': {
                'unweighted_unifrac': {
                    'adonis': {
                        'whole_body': {
                            'BODY_SITE': {
                                'original': sr_orig1,
                                'shuffled': sr_shuff1
                            }
                        },

                        '88_soils': {
                            'ENV_BIOME': {
                                'original': sr_orig2,
                                'shuffled': sr_shuff2
                            }
                        },

                        'keyboard': {}
                    },

                    'anosim': {
                        'whole_body': {
                            'BODY_SITE': {
                                'original': sr_orig1,
                                'shuffled': sr_shuff1
                            }
                        },

                        'keyboard': {
                            'HOST_SUBJECT_ID': {
                                'original': sr_orig3,
                                'shuffled': sr_shuff3
                            }
                        },

                        '88_soils': {
                            'ENV_BIOME': {}
                        }
                    }
                }
            }
        }

        # Invalid results (methods don't cover same studies).
        self.real_results2 = {
            '5_percent': {
                'unweighted_unifrac': {
                    'adonis': {
                        'whole_body': {}
                    },

                    'anosim': {
                        '88_soils': {}
                    }
                }
            }
        }

        self.sim_results1 = {
            'adonis': {
                'whole_body': {
                    146: {
                        'BODY_SITE': {
                            1: {
                                10: {
                                    0.02: {
                                        'unweighted_unifrac': sr_orig1
                                    }
                                }
                            }
                        }
                    }
                }
            },

            'anosim': {
                'whole_body': {
                    146: {
                        'BODY_SITE': {
                            1: {
                                10: {
                                    0.02: {
                                        'unweighted_unifrac': sr_orig1
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }

        # Invalid results (wrong number of effect sizes).
        self.sim_results2 = {
            'adonis': {
                'whole_body': {
                    146: {
                        'BODY_SITE': {
                            1: {
                                10: {
                                    0.02: {
                                        'unweighted_unifrac': sr_orig1
                                    }
                                }
                            }
                        }
                    }
                }
            },

            'anosim': {
                'whole_body': {
                    146: {
                        'BODY_SITE': {
                            1: {
                                10: {
                                    0.02: {
                                        'unweighted_unifrac': sr_orig1,
                                        'weighted_unifrac': sr_orig2,
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
예제 #7
0
 def setUp(self):
     """Define some sample data that will be used by the tests."""
     self.sr1 = StatsResults()
예제 #8
0
class StatsResultsTests(TestCase):
    """Tests for the util.StatsResults class."""

    def setUp(self):
        """Define some sample data that will be used by the tests."""
        self.sr1 = StatsResults()

    def test_addResult(self):
        """Adding effect size and p-value works correctly on valid input."""
        self.sr1.addResult(0.5, 0.01)
        self.sr1.addResult(0.5, 0.001)
        self.assertFloatEqual(self.sr1.effect_size, 0.5)
        self.assertFloatEqual(self.sr1.p_values, [0.01, 0.001])

    def test_addResult_invalid_input(self):
        """Adding invalid input raises error."""
        # Effect sizes don't match.
        self.sr1.addResult(0.5, 0.01)
        self.assertRaises(ValueError, self.sr1.addResult, 0.6, 0.001)
        self.assertFloatEqual(self.sr1.effect_size, 0.5)
        self.assertFloatEqual(self.sr1.p_values, [0.01])

        # Invalid p-value range.
        self.sr1 = StatsResults()
        self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1)
        self.assertTrue(self.sr1.effect_size is None)
        self.assertEqual(self.sr1.p_values, [])

        self.sr1.addResult(0.5, 0.01)
        self.sr1.addResult(0.5, 0.02)
        self.assertRaises(ValueError, self.sr1.addResult, 0.5, 1.1)
        self.assertRaises(ValueError, self.sr1.addResult, 0.5, -0.2)
        self.assertFloatEqual(self.sr1.effect_size, 0.5)
        self.assertFloatEqual(self.sr1.p_values, [0.01, 0.02])

    def test_isEmpty(self):
        """Test checking if results are empty or not."""
        self.assertTrue(self.sr1.isEmpty())

        self.sr1.addResult(0.5, 0.01)
        self.assertFalse(self.sr1.isEmpty())

    def test_str(self):
        """Test __str__ method."""
        # Empty results.
        obs = str(self.sr1)
        self.assertEqual(obs, 'Empty results')

        # Populated results.
        self.sr1.addResult(0.5, 0.01)
        self.sr1.addResult(0.5, 0.05)
        obs = str(self.sr1)
        self.assertEqual(obs, '0.50; ***, **')

    def test_check_p_value(self):
        """Raises error on invalid p-value."""
        self.sr1._check_p_value(0.0)
        self.sr1._check_p_value(0.5)
        self.sr1._check_p_value(1.0)

        self.assertRaises(ValueError, self.sr1._check_p_value, 1.5)
        self.assertRaises(ValueError, self.sr1._check_p_value, -1.5)

    def test_format_p_value_as_asterisk(self):
        """Test formatting a p-value to indicate statistical significance."""
        obs = self.sr1._format_p_value_as_asterisk(1.0)
        self.assertEqual(obs, 'x')

        obs = self.sr1._format_p_value_as_asterisk(0.09)
        self.assertEqual(obs, '*')

        obs = self.sr1._format_p_value_as_asterisk(0.045)
        self.assertEqual(obs, '**')

        obs = self.sr1._format_p_value_as_asterisk(0.01)
        self.assertEqual(obs, '***')

        obs = self.sr1._format_p_value_as_asterisk(0.0005)
        self.assertEqual(obs, '****')

    def test_format_p_value_as_asterisk_invalid_input(self):
        """Test supplying an invalid p-value results in error being thrown."""
        self.assertRaises(TypeError, self.sr1._format_p_value_as_asterisk, 1)
        self.assertRaises(TypeError, self.sr1._format_p_value_as_asterisk,
                          "0.05")
        self.assertRaises(TypeError, self.sr1._format_p_value_as_asterisk,
                          [0.05])

        self.assertRaises(ValueError, self.sr1._format_p_value_as_asterisk,
                          1.1)
        self.assertRaises(ValueError, self.sr1._format_p_value_as_asterisk,
                          -0.042)