def test_compare_alpha_diversities_parametric(self):
        """test main function properly compares alpha divs (parametric)"""

        self.assertFloatEqual(
            compare_alpha_diversities(self.rarefaction_file, self.mapping_file,
                                      'TTD', 10, 'parametric'),
            self.compared_alpha_diversities_TTD)

        # Should ignore num_permutations if test_type is parametric.
        self.assertFloatEqual(
            compare_alpha_diversities(self.rarefaction_file, self.mapping_file,
                                      'TTD', 10, 'parametric', 0),
            self.compared_alpha_diversities_TTD)
    def test_compare_alpha_diversities_parametric(self):
        """test main function properly compares alpha divs (parametric)"""
                
        self.assertFloatEqual(
            compare_alpha_diversities(self.rarefaction_file,
                                      self.mapping_file, 'TTD', 10,
                                      'parametric'),
            self.compared_alpha_diversities_TTD)

        # Should ignore num_permutations if test_type is parametric.
        self.assertFloatEqual(
            compare_alpha_diversities(self.rarefaction_file,
                                      self.mapping_file, 'TTD', 10,
                                      'parametric', 0),
            self.compared_alpha_diversities_TTD)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.num_permutations < 10:
        option_parser.error('Number of permuations must be greater than or '
                            'equal to 10.')

    rarefaction_lines = open(opts.alpha_diversity_fp, 'U')
    mapping_lines = open(opts.mapping_fp, 'U')
    category = opts.category
    depth = int(opts.depth)
    output_path = opts.output_fp

    result = compare_alpha_diversities(rarefaction_lines, mapping_lines,
        category, depth, opts.test_type, opts.num_permutations)
    
    rarefaction_lines.close()
    mapping_lines.close()

    corrected_result = _correct_compare_alpha_results(result,
        opts.correction_method)

    # write results
    outfile = open(output_path, 'w')
    header = 'Comparison\ttval\tpval'
    lines = [header]
    for k,v in corrected_result.items():
        lines.append('\t'.join(map(str,[k,v[0],v[1]])))
    outfile.write('\n'.join(lines))
    outfile.close()
Exemple #4
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.num_permutations < 10:
        option_parser.error('Number of permuations must be greater than or '
                            'equal to 10.')

    rarefaction_lines = open(opts.alpha_diversity_fp, 'U')
    mapping_lines = open(opts.mapping_fp, 'U')
    category = opts.category
    depth = opts.depth

    ttest_result, alphadiv_avgs = compare_alpha_diversities(
        rarefaction_lines, mapping_lines, category, depth, opts.test_type,
        opts.num_permutations)

    rarefaction_lines.close()
    mapping_lines.close()

    corrected_result = _correct_compare_alpha_results(ttest_result,
                                                      opts.correction_method)

    # write  results
    outfile = open(opts.output_fp, 'w')
    header = ('Group1\tGroup2\tGroup1 mean\tGroup1 std\tGroup2 mean\t'
              'Group2 std\tt stat\tp-value')
    lines = [header]
    for (t0, t1), v in corrected_result.items():
        lines.append('\t'.join(
            map(str, [
                t0, t1, alphadiv_avgs[t0][0], alphadiv_avgs[t0][1],
                alphadiv_avgs[t1][0], alphadiv_avgs[t1][1], v[0], v[1]
            ])))
    outfile.write('\n'.join(lines) + '\n')
    outfile.close()
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    if opts.num_permutations < 10:
        option_parser.error('Number of permuations must be greater than or '
                            'equal to 10.')

    rarefaction_lines = open(opts.alpha_diversity_fp, 'U')
    mapping_lines = open(opts.mapping_fp, 'U')
    category = opts.category
    depth = opts.depth

    ttest_result, alphadiv_avgs = compare_alpha_diversities(rarefaction_lines,
        mapping_lines, category, depth, opts.test_type, opts.num_permutations)
    
    rarefaction_lines.close()
    mapping_lines.close()

    corrected_result = _correct_compare_alpha_results(ttest_result,
        opts.correction_method)

    # write  results
    outfile = open(opts.output_fp, 'w')
    header = ('Group1\tGroup2\tGroup1 mean\tGroup1 std\tGroup2 mean\t'
              'Group2 std\tt stat\tp-value')
    lines = [header]
    for (t0, t1), v in corrected_result.items():
        lines.append('\t'.join(map(str,[t0,t1,alphadiv_avgs[t0][0],
            alphadiv_avgs[t0][1], alphadiv_avgs[t1][0],
            alphadiv_avgs[t1][1],v[0],v[1]])))
    outfile.write('\n'.join(lines) + '\n')
    outfile.close()
 def test_compare_alpha_diversity(self):
     """test main function properly compares alpha diversities"""
             
     self.assertEqual(
         self.compared_alpha_diversities_TTD,
         compare_alpha_diversities(self.rarefaction_file,
                                   self.mapping_file, 'TTD', 10))
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    mapping_fp = opts.mapping_fp
    alpha_diversity_fp = opts.alpha_diversity_fp
    categories = opts.categories.split(',')
    depth = opts.depth
    output_dir = opts.output_dir
    correction_method = opts.correction_method
    test_type = opts.test_type
    num_permutations = opts.num_permutations

    if num_permutations < 10:
        option_parser.error('Number of permuations must be greater than or '
                            'equal to 10.')

    create_dir(output_dir)
    for category in categories:
        stat_output_fp = join(output_dir, '%s_stats.txt' % category)
        boxplot_output_fp = join(output_dir, '%s_boxplots.pdf' % category)

        alpha_diversity_f = open(alpha_diversity_fp, 'U')
        mapping_f = open(mapping_fp, 'U')
        ttest_result, alphadiv_avgs = \
         compare_alpha_diversities(alpha_diversity_f,
                                   mapping_f,
                                   category,
                                   depth,
                                   test_type,
                                   num_permutations)
        alpha_diversity_f.close()
        mapping_f.close()

        corrected_result = _correct_compare_alpha_results(
            ttest_result, correction_method)

        # write stats results
        stat_output_f = open(stat_output_fp, 'w')
        header = ('Group1\tGroup2\tGroup1 mean\tGroup1 std\tGroup2 mean\t'
                  'Group2 std\tt stat\tp-value')
        lines = [header]
        for (t0, t1), v in corrected_result.items():
            lines.append('\t'.join(
                map(str, [
                    t0, t1, alphadiv_avgs[t0][0], alphadiv_avgs[t0][1],
                    alphadiv_avgs[t1][0], alphadiv_avgs[t1][1], v[0], v[1]
                ])))
        stat_output_f.write('\n'.join(lines) + '\n')
        stat_output_f.close()

        # write box plots
        alpha_diversity_f = open(alpha_diversity_fp, 'U')
        mapping_f = open(mapping_fp, 'U')
        boxplot = generate_alpha_diversity_boxplots(alpha_diversity_f,
                                                    mapping_f, category, depth)
        alpha_diversity_f.close()
        mapping_f.close()
        boxplot.savefig(boxplot_output_fp)
Exemple #8
0
    def test_run_alpha_rarefaction_stderr_and_stddev(self):
        """ run_alpha_rarefaction generates expected results """

        run_alpha_rarefaction(self.test_data['biom'][0],
                              self.test_data['map'][0],
                              self.test_out,
                              call_commands_serially,
                              self.params,
                              self.qiime_config,
                              tree_fp=self.test_data['tree'][0],
                              num_steps=5,
                              parallel=False,
                              min_rare_depth=3,
                              max_rare_depth=18,
                              status_update_callback=no_status_updates,
                              plot_stderr_and_stddev=True)

        html_fp_stderr = join(self.test_out, 'alpha_rarefaction_plots_stderr',
                              'rarefaction_plots.html')
        pd_averages_fp_stderr = join(self.test_out,
                                     'alpha_rarefaction_plots_stderr',
                                     'average_tables',
                                     'PD_whole_treeSampleType.txt')
        html_fp_stddev = join(self.test_out, 'alpha_rarefaction_plots_stddev',
                              'rarefaction_plots.html')
        pd_averages_fp_stddev = join(self.test_out,
                                     'alpha_rarefaction_plots_stddev',
                                     'average_tables',
                                     'PD_whole_treeSampleType.txt')
        pd_collated_fp = join(self.test_out, 'alpha_div_collated',
                              'PD_whole_tree.txt')

        # Confirm that palm and gut alpha diversities are different,
        # and suggestive of statistical significance (we only have a
        # few sequences, so we don't get significant results)
        ttest_res, alpha_avg = compare_alpha_diversities(
            open(pd_collated_fp),
            open(self.test_data['map'][0]),
            'SampleType',
            18,
            test_type='parametric')
        feces_palm_t = ttest_res[('feces', 'L_palm')][0]
        self.assertTrue(feces_palm_t < 0,
         "t-statistic too high: %1.3f, but should be less than 0"\
          % feces_palm_t)

        # check that final output files have non-zero size
        self.assertTrue(getsize(html_fp_stderr) > 0)
        self.assertTrue(getsize(pd_averages_fp_stderr) > 0)
        self.assertTrue(getsize(html_fp_stddev) > 0)
        self.assertTrue(getsize(pd_averages_fp_stddev) > 0)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
    def test_run_alpha_rarefaction_stderr_and_stddev(self):
        """ run_alpha_rarefaction generates expected results """

        run_alpha_rarefaction(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            self.test_out,
            call_commands_serially,
            self.params,
            self.qiime_config,
            tree_fp=self.test_data['tree'][0],
            num_steps=5,
            parallel=False,
            min_rare_depth=3,
            max_rare_depth=18,
            status_update_callback=no_status_updates,
            plot_stderr_and_stddev=True)

        html_fp_stderr = join(self.test_out, 'alpha_rarefaction_plots_stderr',
                              'rarefaction_plots.html')
        pd_averages_fp_stderr = join(
            self.test_out, 'alpha_rarefaction_plots_stderr',
            'average_tables', 'PD_whole_treeSampleType.txt')
        html_fp_stddev = join(self.test_out, 'alpha_rarefaction_plots_stddev',
                              'rarefaction_plots.html')
        pd_averages_fp_stddev = join(
            self.test_out, 'alpha_rarefaction_plots_stddev',
            'average_tables', 'PD_whole_treeSampleType.txt')
        pd_collated_fp = join(self.test_out, 'alpha_div_collated',
                              'PD_whole_tree.txt')

        # Confirm that palm and gut alpha diversities are different,
        # and suggestive of statistical significance (we only have a
        # few sequences, so we don't get significant results)
        ttest_res, alpha_avg = compare_alpha_diversities(open(pd_collated_fp),
                                                         open(
                                                             self.test_data[
                                                                 'map'][0]),
                                                         'SampleType',
                                                         18,
                                                         test_type='parametric')
        feces_palm_t = ttest_res[('feces', 'L_palm')][0]
        self.assertTrue(feces_palm_t < 0,
                        "t-statistic too high: %1.3f, but should be less than 0"
                        % feces_palm_t)

        # check that final output files have non-zero size
        self.assertTrue(getsize(html_fp_stderr) > 0)
        self.assertTrue(getsize(html_fp_stddev) > 0)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
    def test_compare_alpha_diversities_nonparametric(self):
        """test main function properly compares alpha divs (nonparametric)"""

        obs = compare_alpha_diversities(self.rarefaction_file,
                                        self.mapping_file, 'TTD', 10,
                                        'nonparametric')

        # Since p-values are stochastic, we'll check that they are sane and
        # that the t statistics are the same as we'd get for a parametric test.
        for comp, (t, p_val) in obs['TTD'].items():
            exp = self.compared_alpha_diversities_TTD['TTD'][comp]
            self.assertFloatEqual(t, exp[0])
            self.assertIsProb(float(p_val))
    def test_compare_alpha_diversities_nonparametric(self):
        """test main function properly compares alpha divs (nonparametric)"""

        obs = compare_alpha_diversities(self.rarefaction_file,
                                        self.mapping_file, 'TTD', 10,
                                        'nonparametric')

        # Since p-values are stochastic, we'll check that they are sane and
        # that the t statistics are the same as we'd get for a parametric test.
        for comp, (t, p_val) in obs['TTD'].items():
            exp = self.compared_alpha_diversities_TTD['TTD'][comp]
            self.assertFloatEqual(t, exp[0])
            self.assertIsProb(float(p_val))
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    rarefaction_lines = open(opts.alpha_diversity_fp, "U")
    mapping_lines = open(opts.mapping_fp, "U")
    category = opts.category
    depth = int(opts.depth)
    output_path = opts.output_fp

    result = compare_alpha_diversities(
        rarefaction_lines, mapping_lines, category, depth, opts.test_type, opts.num_permutations
    )
    outfile = open(output_path, "w")
    outfile.write(str(result))
    outfile.write("\n")

    outfile.close()
    rarefaction_lines.close()
    mapping_lines.close()
Exemple #13
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    rarefaction_lines = open(opts.alpha_diversity_fp, 'U')
    mapping_lines = open(opts.mapping_fp, 'U')
    category = opts.category
    depth = int(opts.depth)
    output_path = opts.output_fp

    result = compare_alpha_diversities(rarefaction_lines, mapping_lines,
                                       category, depth, opts.test_type,
                                       opts.num_permutations)
    outfile = open(output_path, 'w')
    outfile.write(str(result))
    outfile.write('\n')

    outfile.close()
    rarefaction_lines.close()
    mapping_lines.close()
Exemple #14
0
    def test_run_alpha_rarefaction_parallel(self):
        """ run_alpha_rarefaction generates expected results when run in parallel
        """

        run_alpha_rarefaction(
         self.test_data['biom'][0], 
         self.test_data['map'][0],
         self.test_out, 
         call_commands_serially,
         self.params,
         self.qiime_config,
         tree_fp=self.test_data['tree'][0],
         num_steps=5, 
         parallel=True, 
         min_rare_depth=3,
         max_rare_depth=18,
         status_update_callback=no_status_updates)
         
        html_fp = join(self.test_out,'alpha_rarefaction_plots',
         'rarefaction_plots.html')
        pd_averages_fp = join(self.test_out,'alpha_rarefaction_plots',
         'average_tables','PD_whole_treeSampleType.txt')
        pd_collated_fp = join(self.test_out,'alpha_div_collated',
         'PD_whole_tree.txt')
        
        # Confirm that palm and gut alpha diversities are different,
        # and suggestive of statistical significance (we only have a 
        # few sequences, so we don't get significant results)
        a = compare_alpha_diversities(open(pd_collated_fp), 
                                      open(self.test_data['map'][0]),
                                      'SampleType', 
                                      18,
                                      test_type='parametric')
        self.assertTrue(a['feces,L_palm'][1] < 0.15)
        
        # check that final output files have non-zero size
        self.assertTrue(getsize(html_fp) > 0)
        self.assertTrue(getsize(pd_averages_fp) > 0)
        
        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out,'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)
    rarefaction_lines = open(opts.alpha_diversity_fp, 'U')
    
    mapping_lines = open(opts.mapping_fp, 'U')
    category = opts.category
    depth = int(opts.depth)
    output_path = opts.output_fp
    
    
    result = compare_alpha_diversities(rarefaction_lines,\
                                       mapping_lines,\
                                       category,\
                                       depth)
    outfile = open(output_path, 'w')
    outfile.write(str(result))
    
    outfile.close()
    rarefaction_lines.close()
    mapping_lines.close()   
Exemple #16
0
    def test_compare_alpha_diversities(self):
        """Tests alpha diversities are correctly calculated."""
        # test 'Dose' at 480 inputs
        category = 'Dose'
        depth = 480
        test_type = 'parametric'
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(
            self.rarefaction_file,
            self.mapping_file,
            category=category,
            depth=depth,
            test_type=test_type)

        # hardcoded order of the terms in the keys otherwise would comps fail
        exp_tcomps = \
            {('Control', '2xDose'): (1.1746048668554037, 0.44899351189030801),
             ('1xDose', '2xDose'): (1.7650193854830403, 0.17574514418562981),
             ('Control', '1xDose'): (0.43618805086434992, 0.7052689260099092)}

        # test each key in expected results -- this won't catch if
        # obs_tcomps has extra entries, but test that via the next call
        for k in exp_tcomps:
            assert_almost_equal(exp_tcomps[k], obs_tcomps[k])
        self.assertEqual(set(exp_tcomps.keys()), set(obs_tcomps.keys()))

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'],
        # Control = ['Sam5']
        exp_ad_avgs = {
            '1xDose': (3.2511951575216664, 0.18664627928763661),
            '2xDose': (2.7539647172550001, 0.30099438035250015),
            'Control': (3.3663303519925001, 0.0)
        }
        for k in exp_ad_avgs:
            assert_almost_equal(exp_ad_avgs[k], obs_ad_avgs[k])

        # test 'Dose' at 480 inputs with nonparametric test
        seed(0)  # set the seed to reproduce random MC pvals
        category = 'Dose'
        depth = 480
        test_type = 'nonparametric'
        num_permutations = 100
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(
            self.rarefaction_file,
            self.mapping_file,
            category=category,
            depth=depth,
            test_type=test_type,
            num_permutations=num_permutations)
        exp_tcomps = {
            ('1xDose', '2xDose'): (1.7650193854830403, 0.13),
            ('Control', '1xDose'): (0.43618805086434992, 0.83),
            ('Control', '2xDose'): (1.1746048668554037, 0.62)
        }
        # test each key in expected results -- this won't catch if
        # obs_tcomps has extra entries, but test that via the next call
        for k in exp_tcomps:
            assert_almost_equal(exp_tcomps[k], obs_tcomps[k])
        self.assertEqual(set(exp_tcomps.keys()), set(obs_tcomps.keys()))

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'],
        # Control = ['Sam5']
        exp_ad_avgs = {
            'Control': (3.3663303519925001, 0.0),
            '1xDose': (3.2511951575216664, 0.18664627928763661),
            '2xDose': (2.7539647172550001, 0.30099438035250015)
        }

        for k in exp_ad_avgs:
            assert_almost_equal(exp_ad_avgs[k], obs_ad_avgs[k])

        # test it works with NA values
        # test 'Dose' at 500 inputs with paramteric test
        category = 'Dose'
        depth = 500
        test_type = 'parametric'
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(
            self.rarefaction_file,
            self.mapping_file,
            category=category,
            depth=depth,
            test_type=test_type)
        exp_tcomps = \
            {('Control', '2xDose'): (-0.63668873339963239, 0.63906168713487699),
             ('1xDose', '2xDose'): (None, None),
             ('Control', '1xDose'): (None, None)}
        for obs, exp in izip(obs_tcomps, exp_tcomps):
            self.assertEqual(obs, exp)

        # test that it works with nonparametric test - this was erroring.
        seed(0)
        test_type = 'nonparametric'
        exp_tcomps = \
            {('Control', '2xDose'): (-0.63668873339963239, 0.672),
             ('1xDose', '2xDose'): (None, None),
             ('Control', '1xDose'): (None, None)}
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(
            self.rarefaction_file,
            self.mapping_file,
            category=category,
            depth=depth,
            test_type=test_type)
        for obs, exp in izip(obs_tcomps, exp_tcomps):
            self.assertEqual(obs, exp)

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'],
        # Control = ['Sam5']
        # will fail on nan comparison so avoid this
        exp_ad_avgs = {
            '1xDose': (nan, nan),
            '2xDose': (3.1955144893699998, 0.84206819489000018),
            'Control': (2.2669008538500002, 0.0)
        }
        for k in exp_ad_avgs:
            if k != '1xDose':
                assert_almost_equal(exp_ad_avgs[k], obs_ad_avgs[k])
            if k == '1xDose':
                self.assertTrue(all(map(isnan, obs_ad_avgs[k])))

        # test that it works when no depth is passed
        category = 'Dose'
        depth = None  # should return depth = 910
        test_type = 'parametric'
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(
            self.rarefaction_file,
            self.mapping_file,
            category=category,
            depth=depth,
            test_type=test_type)

        # hardcoded order of the terms in the keys otherwise would comps fail
        exp_tcomps = \
            {('Control', '2xDose'): (3.3159701868634883, 0.1864642327553255),
             ('1xDose', '2xDose'): (-0.48227871733885291, 0.66260803238173183),
             ('Control', '1xDose'): (0.83283756452373126, 0.49255115337550748)}
        for obs, exp in izip(obs_tcomps, exp_tcomps):
            self.assertEqual(obs, exp)

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'],
        # Control = ['Sam5']
        exp_ad_avgs = {
            '1xDose': (2.6763340901916668, 0.36025734786901326),
            '2xDose': (2.8358041871949999, 0.04611264137749993),
            'Control': (3.1006488615725001, 0.0)
        }
        for k in exp_ad_avgs:
            assert_almost_equal(exp_ad_avgs[k], obs_ad_avgs[k])
    def test_compare_alpha_diversities(self):
        """Tests alpha diversities are correctly calculated."""
        # test 'Dose' at 480 inputs
        category = 'Dose'
        depth = 480
        test_type = 'parametric'
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)
        
        # hardcoded order of the terms in the keys otherwise would comps fail
        exp_tcomps = \
            {('Control','2xDose'): (1.1746048668554037, 0.44899351189030801),
             ('1xDose','2xDose'): (1.7650193854830403, 0.17574514418562981),
             ('Control','1xDose'): (0.43618805086434992, 0.7052689260099092)}
             
        # test each key in expected results -- this won't catch if 
        # obs_tcomps has extra entries, but test that via the next call
        for k in exp_tcomps:
            self.assertFloatEqual(exp_tcomps[k],obs_tcomps[k])
        self.assertEqual(set(exp_tcomps.keys()),set(obs_tcomps.keys()))

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'], 
        # Control = ['Sam5']
        exp_ad_avgs = {'1xDose':(3.2511951575216664, 0.18664627928763661),
        '2xDose':(2.7539647172550001, 0.30099438035250015),
        'Control':(3.3663303519925001, 0.0)}
        for k in exp_ad_avgs:
            self.assertFloatEqual(exp_ad_avgs[k],obs_ad_avgs[k])


        # test 'Dose' at 480 inputs with nonparametric test
        seed(0) # set the seed to reproduce random MC pvals
        category = 'Dose'
        depth = 480
        test_type = 'nonparametric'
        num_permutations = 100
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type, num_permutations=num_permutations)

        exp_tcomps = \
            {('Control','2xDose'): (1.1746048668554037, 0.63),
             ('1xDose','2xDose'): (1.7650193854830403, 0.09),
             ('Control','1xDose'): (0.43618805086434992, 0.76)}
 
        # test each key in expected results -- this won't catch if 
        # obs_tcomps has extra entries, but test that via the next call
        for k in exp_tcomps:
            self.assertFloatEqual(exp_tcomps[k],obs_tcomps[k])
        self.assertEqual(set(exp_tcomps.keys()),set(obs_tcomps.keys()))

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'], 
        # Control = ['Sam5']
        exp_ad_avgs = {'1xDose':(3.2511951575216664, 0.18664627928763661),
        '2xDose':(2.7539647172550001, 0.30099438035250015),
        'Control':(3.3663303519925001, 0.0)}
        for k in exp_ad_avgs:
            self.assertFloatEqual(exp_ad_avgs[k],obs_ad_avgs[k])


        # test it works with NA values
        # test 'Dose' at 500 inputs with paramteric test
        category = 'Dose'
        depth = 500
        test_type = 'parametric'
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)
        exp_tcomps = \
            {('Control','2xDose'): (-0.63668873339963239, 0.63906168713487699), 
             ('1xDose','2xDose'): (None,None), 
             ('Control','1xDose'): (None,None)}
        self.assertFloatEqual(obs_tcomps, exp_tcomps)
        # test that it works with nonparametric test - this was erroring.
        seed(0)
        test_type = 'nonparametric'
        exp_tcomps = \
            {('Control','2xDose'): (-0.63668873339963239, 0.675), 
             ('1xDose','2xDose'): (None,None), 
             ('Control','1xDose'): (None,None)}
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)
        self.assertFloatEqual(obs_tcomps, exp_tcomps)

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'], 
        # Control = ['Sam5']
        # will fail on nan comparison so avoid this
        exp_ad_avgs = {'1xDose':(nan, nan),
        '2xDose':(3.1955144893699998, 0.84206819489000018),
        'Control':(2.2669008538500002, 0.0)}
        for k in exp_ad_avgs:
            if k!='1xDose':
                self.assertFloatEqual(exp_ad_avgs[k],obs_ad_avgs[k])
            if k=='1xDose':
                self.assertTrue(all(map(isnan,obs_ad_avgs[k])))


        # test that it works when no depth is passed
        category = 'Dose'
        depth = None #should return depth = 910
        test_type = 'parametric'
        obs_tcomps, obs_ad_avgs = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)

        # hardcoded order of the terms in the keys otherwise would comps fail
        exp_tcomps = \
            {('Control','2xDose'): (3.3159701868634883, 0.1864642327553255),
             ('1xDose','2xDose'): (-0.48227871733885291, 0.66260803238173183),
             ('Control','1xDose'): (0.83283756452373126, 0.49255115337550748)}
        self.assertFloatEqual(obs_tcomps, exp_tcomps)

        # test that returned alpha diversity averages are correct
        # dose
        # 1xDose = ['Sam1','Sam2','Sam6'], 2xDose = ['Sam3','Sam4'], 
        # Control = ['Sam5']
        exp_ad_avgs = {'1xDose':(2.6763340901916668, 0.36025734786901326),
        '2xDose':(2.8358041871949999, 0.04611264137749993),
        'Control':(3.1006488615725001, 0.0)}
        for k in exp_ad_avgs:
            self.assertFloatEqual(exp_ad_avgs[k],obs_ad_avgs[k])
    def test_compare_alpha_diversities(self):
        """Tests alpha diversities are correctly calculated."""
        # test 'Dose' at 480 inputs
        category = 'Dose'
        depth = 480
        test_type = 'parametric'
        observed_results = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)
        
        # hardcoded order of the terms in the keys otherwise would comps fail
        expected_results = \
            {'Control,2xDose': (1.1746048668554037, 0.44899351189030801),
             '1xDose,2xDose': (1.7650193854830403, 0.17574514418562981),
             'Control,1xDose': (0.43618805086434992, 0.7052689260099092)}
             
        # test each key in expected results -- this won't catch if 
        # observed_results has extra entries, but test that via the next call
        for k in expected_results:
            self.assertEqual(expected_results[k],observed_results[k])
        self.assertEqual(set(expected_results.keys()),set(observed_results.keys()))

        # test 'Dose' at 480 inputs with nonparametric test
        seed(0) # set the seed to reproduce random MC pvals
        category = 'Dose'
        depth = 480
        test_type = 'nonparametric'
        num_permutations = 100
        observed_results = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type, num_permutations=num_permutations)

        expected_results = \
            {'Control,2xDose': (1.1746048668554037, 0.63),
             '1xDose,2xDose': (1.7650193854830403, 0.09),
             'Control,1xDose': (0.43618805086434992, 0.76)}
 
        # test each key in expected results -- this won't catch if 
        # observed_results has extra entries, but test that via the next call
        for k in expected_results:
            self.assertEqual(expected_results[k],observed_results[k])
        self.assertEqual(set(expected_results.keys()),set(observed_results.keys()))

        # test it works with NA values
        # test 'Dose' at 500 inputs with paramteric test
        category = 'Dose'
        depth = 500
        test_type = 'parametric'
        observed_results = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)
        expected_results = \
            {'Control,2xDose': (-0.63668873339963239, 0.63906168713487699), 
             '1xDose,2xDose': (None,None), 
             'Control,1xDose': (None,None)}
        self.assertEqual(observed_results, expected_results)
        # test that it works with nonparametric test - this was erroring.
        seed(0)
        test_type = 'nonparametric'
        expected_results = \
            {'Control,2xDose': (-0.63668873339963239, 0.675), 
             '1xDose,2xDose': (None,None), 
             'Control,1xDose': (None,None)}
        observed_results = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)
        self.assertEqual(observed_results, expected_results)

        # test that it works when no depth is passed
        category = 'Dose'
        depth = None #should return depth = 850
        test_type = 'parametric'
        observed_results = compare_alpha_diversities(self.rarefaction_file,
            self.mapping_file, category=category, depth=depth, 
            test_type=test_type)

        # hardcoded order of the terms in the keys otherwise would comps fail
        expected_results = \
            {'Control,2xDose': (3.3159701868634883, 0.1864642327553255),
             '1xDose,2xDose': (-0.48227871733885291, 0.66260803238173183),
             'Control,1xDose': (0.83283756452373126, 0.49255115337550748)}
        self.assertEqual(observed_results, expected_results)
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    mapping_fp = opts.mapping_fp
    alpha_diversity_fp = opts.alpha_diversity_fp
    categories = opts.categories.split(",")
    depth = opts.depth
    output_dir = opts.output_dir
    correction_method = opts.correction_method
    test_type = opts.test_type
    num_permutations = opts.num_permutations

    if num_permutations < 10:
        option_parser.error("Number of permuations must be greater than or " "equal to 10.")

    create_dir(output_dir)
    for category in categories:
        stat_output_fp = join(output_dir, "%s_stats.txt" % category)
        boxplot_output_fp = join(output_dir, "%s_boxplots.pdf" % category)

        alpha_diversity_f = open(alpha_diversity_fp, "U")
        mapping_f = open(mapping_fp, "U")
        ttest_result, alphadiv_avgs = compare_alpha_diversities(
            alpha_diversity_f, mapping_f, category, depth, test_type, num_permutations
        )
        alpha_diversity_f.close()
        mapping_f.close()

        corrected_result = _correct_compare_alpha_results(ttest_result, correction_method)

        # write stats results
        stat_output_f = open(stat_output_fp, "w")
        header = "Group1\tGroup2\tGroup1 mean\tGroup1 std\tGroup2 mean\t" "Group2 std\tt stat\tp-value"
        lines = [header]
        for (t0, t1), v in corrected_result.items():
            lines.append(
                "\t".join(
                    map(
                        str,
                        [
                            t0,
                            t1,
                            alphadiv_avgs[t0][0],
                            alphadiv_avgs[t0][1],
                            alphadiv_avgs[t1][0],
                            alphadiv_avgs[t1][1],
                            v[0],
                            v[1],
                        ],
                    )
                )
            )
        stat_output_f.write("\n".join(lines) + "\n")
        stat_output_f.close()

        # write box plots
        alpha_diversity_f = open(alpha_diversity_fp, "U")
        mapping_f = open(mapping_fp, "U")
        boxplot = generate_alpha_diversity_boxplots(alpha_diversity_f, mapping_f, category, depth)
        alpha_diversity_f.close()
        mapping_f.close()
        boxplot.savefig(boxplot_output_fp)