Python parse_distmat_to_dict Examples, qiime.parse.parse_distmat_to_dict Python Examples

Example #1

0

Show file

    def test_run_jackknifed_beta_diversity_parallel(self):
        """ run_jackknifed_beta_diversity generates expected results """

        run_jackknifed_beta_diversity(
            self.test_data['biom'][0],
            self.test_data['tree'][0],
            20,
            self.test_out,
            call_commands_serially,
            self.params,
            self.qiime_config,
            self.test_data['map'][0],
            parallel=True,
            status_update_callback=no_status_updates)

        weighted_unifrac_upgma_tree_fp = join(self.test_out,
                                              'weighted_unifrac',
                                              'upgma_cmp', 'jackknife_named_nodes.tre')
        unweighted_unifrac_upgma_tree_fp = join(
            self.test_out, 'unweighted_unifrac', 'upgma_cmp',
            'jackknife_named_nodes.tre')
        weighted_unifrac_emperor_index_fp = join(
            self.test_out, 'weighted_unifrac', 'emperor_pcoa_plots',
            'index.html')
        unweighted_unifrac_emperor_index_fp = join(
            self.test_out, 'unweighted_unifrac', 'emperor_pcoa_plots',
            'index.html')

        input_file_basename = splitext(split(self.test_data['biom'][0])[1])[0]
        unweighted_unifrac_dm_fp = join(self.test_out,
                                        'unweighted_unifrac_%s.txt' % input_file_basename)
        weighted_unifrac_dm_fp = join(self.test_out,
                                      'weighted_unifrac_%s.txt' % input_file_basename)

       # check for expected relations between values in the unweighted unifrac
        # distance matrix
        dm = parse_distmat_to_dict(open(unweighted_unifrac_dm_fp))
        self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'],
                        "Distance between pair of fecal samples is larger than distance"
                        " between fecal and palm sample (unweighted unifrac).")
        self.assertEqual(dm['f1']['f1'], 0)
        # check for expected relations between values in the weighted unifrac
        # distance matrix
        dm = parse_distmat_to_dict(open(weighted_unifrac_dm_fp))
        self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'],
                        "Distance between pair of fecal samples is larger than distance"
                        " between fecal and palm sample (unweighted unifrac).")
        self.assertEqual(dm['f1']['f1'], 0)

        # check that final output files have non-zero size
        self.assertTrue(getsize(weighted_unifrac_upgma_tree_fp) > 0)
        self.assertTrue(getsize(unweighted_unifrac_upgma_tree_fp) > 0)
        self.assertTrue(getsize(weighted_unifrac_emperor_index_fp) > 0)
        self.assertTrue(getsize(unweighted_unifrac_emperor_index_fp) > 0)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)

Example #2

0

Show file

    def test_run_beta_diversity_through_plots_even_sampling(self):
        """ run_beta_diversity_through_plots functions with even sampling
        """

        run_beta_diversity_through_plots(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            self.test_out,
            call_commands_serially,
            self.params,
            self.qiime_config,
            sampling_depth=20,
            tree_fp=self.test_data['tree'][0],
            parallel=False,
            status_update_callback=no_status_updates)

        unweighted_unifrac_dm_fp = join(self.test_out,
                                        'unweighted_unifrac_dm.txt')
        weighted_unifrac_dm_fp = join(self.test_out, 'weighted_unifrac_dm.txt')
        unweighted_unifrac_pc_fp = join(self.test_out,
                                        'unweighted_unifrac_pc.txt')
        weighted_unifrac_pc_fp = join(self.test_out, 'weighted_unifrac_pc.txt')
        weighted_unifrac_html_fp = join(self.test_out,
                                        'weighted_unifrac_emperor_pcoa_plot',
                                        'index.html')

        # check for expected relations between values in the unweighted unifrac
        # distance matrix
        dm = parse_distmat_to_dict(open(unweighted_unifrac_dm_fp))
        self.assertTrue(
            dm['f1']['f2'] < dm['f1']['p1'],
            "Distance between pair of fecal samples is larger than distance"
            " between fecal and palm sample (unweighted unifrac).")
        self.assertEqual(dm['f1']['f1'], 0)
        # check for expected relations between values in the weighted unifrac
        # distance matrix
        dm = parse_distmat_to_dict(open(weighted_unifrac_dm_fp))
        self.assertTrue(
            dm['f1']['f2'] < dm['f1']['p1'],
            "Distance between pair of fecal samples is larger than distance"
            " between fecal and palm sample (unweighted unifrac).")
        self.assertEqual(dm['f1']['f1'], 0)

        # check that final output files have non-zero size
        self.assertTrue(getsize(unweighted_unifrac_pc_fp) > 0)
        self.assertTrue(getsize(weighted_unifrac_pc_fp) > 0)
        self.assertTrue(getsize(weighted_unifrac_html_fp) > 0)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)

Example #3

0

Show file

File: test_downstream.py Project: Kleptobismol/qiime

    def test_run_beta_diversity_through_plots_even_sampling(self):
        """ run_beta_diversity_through_plots functions with even sampling
        """

        run_beta_diversity_through_plots(
            self.test_data['biom'][0],
            self.test_data['map'][0],
            self.test_out,
            call_commands_serially,
            self.params,
            self.qiime_config,
            sampling_depth=20,
            tree_fp=self.test_data['tree'][0],
            parallel=False,
            status_update_callback=no_status_updates)

        unweighted_unifrac_dm_fp = join(
            self.test_out,
            'unweighted_unifrac_dm.txt')
        weighted_unifrac_dm_fp = join(self.test_out, 'weighted_unifrac_dm.txt')
        unweighted_unifrac_pc_fp = join(
            self.test_out,
            'unweighted_unifrac_pc.txt')
        weighted_unifrac_pc_fp = join(self.test_out, 'weighted_unifrac_pc.txt')
        weighted_unifrac_html_fp = join(self.test_out,
                                        'weighted_unifrac_emperor_pcoa_plot', 'index.html')

        # check for expected relations between values in the unweighted unifrac
        # distance matrix
        dm = parse_distmat_to_dict(open(unweighted_unifrac_dm_fp))
        self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'],
                        "Distance between pair of fecal samples is larger than distance"
                        " between fecal and palm sample (unweighted unifrac).")
        self.assertEqual(dm['f1']['f1'], 0)
        # check for expected relations between values in the weighted unifrac
        # distance matrix
        dm = parse_distmat_to_dict(open(weighted_unifrac_dm_fp))
        self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'],
                        "Distance between pair of fecal samples is larger than distance"
                        " between fecal and palm sample (unweighted unifrac).")
        self.assertEqual(dm['f1']['f1'], 0)

        # check that final output files have non-zero size
        self.assertTrue(getsize(unweighted_unifrac_pc_fp) > 0)
        self.assertTrue(getsize(weighted_unifrac_pc_fp) > 0)
        self.assertTrue(getsize(weighted_unifrac_html_fp) > 0)

        # Check that the log file is created and has size > 0
        log_fp = glob(join(self.test_out, 'log*.txt'))[0]
        self.assertTrue(getsize(log_fp) > 0)

Example #4

0

Show file

File: test_categorized_dist_scatterplot.py Project: davidvilanova/qiime

    def test_get_avg_dists(self):
        """get_avg_dists functions as expected """
        dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3
s1\t0\t.5\t.6
s2\t.5\t0\t.7
s3\t.6\t.7\t0.0
""")
        distdict1 = parse_distmat_to_dict(dmtx_str)
        distdict2 = {
            's1': {
                's2': .5,
                's3': .6
            },
            's2': {
                's1': .5,
                's3': .7
            },
            's3': {
                's2': .7,
                's1': .6
            }
        }
        state1_samids = ['s1', 's2']
        state2_samids = ['s3', 's2']
        # note s2 in both
        exp_avgs = [.55, .7]
        obs_avgs = get_avg_dists(state1_samids, state2_samids, distdict1)
        assert_almost_equal(exp_avgs, obs_avgs)

Example #5

0

Show file

def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    map_data, map_header, map_comments = parse_mapping_file(open(
        opts.map, 'U'))
    map_dict = mapping_file_to_dict(map_data, map_header)

    distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U'))

    if opts.colorby == None:
        colorby_cats = [None]
    else:
        colorby_idx = map_header.index(opts.colorby)
        colorby_cats = list(set([map_data[i][colorby_idx] for\
            i in range(len(map_data))]))
    textfilename = os.path.splitext(opts.output_path)[0] + '.txt'
    text_fh = open(textfilename, 'w')
    text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n')
    colorby_cats.sort()
    plt.figure()
    for cat_num, cat in enumerate(colorby_cats):
        # collect the primary and secondary samples within this category
        state1_samids, state2_samids = get_sam_ids(map_data, map_header,
                                                   opts.colorby, cat,
                                                   opts.primary_state,
                                                   opts.secondary_state)
        state1_samids =\
            list(set(state1_samids).intersection(set(distdict.keys())))
        state2_samids =\
            list(set(state2_samids).intersection(set(distdict.keys())))
        if state1_samids == [] or state2_samids == [] or \
            (len(state1_samids) == 1 and state1_samids == state2_samids):
            raise RuntimeError("one category of samples didn't have any valid"+\
            " distances. try eliminating samples from -p or -s, or changing"+\
            " your mapping file with filter_samples_from_otu_table.py")
        # go through dmtx
        state1_avg_dists = get_avg_dists(state1_samids, state2_samids,
                                         distdict)

        # plot
        xvals = [float(map_dict[sam][opts.axis_category]) for\
            sam in state1_samids]
        try:
            color = plt.cm.jet(cat_num / (len(colorby_cats) - 1))
        except ZeroDivisionError:  # only one cat
            color = 'b'
        plt.scatter(xvals,
                    state1_avg_dists,
                    edgecolors=color,
                    alpha=.5,
                    facecolors='none')
        plt.xlabel(opts.axis_category)
        plt.ylabel('average distance')

        lines = [str(xvals[i])+'\t'+str(state1_avg_dists[i])+\
            '\t'+state1_samids[i]+'\n' for i in range(len(xvals))]
        text_fh.writelines(lines)

    if opts.colorby != None: plt.legend(colorby_cats)
    plt.savefig(opts.output_path)

Example #6

0

Show file

    def test_parse_distmat_to_dict(self):
        """parse_distmat should return dict of distmat"""
        lines = """\ta\tb\tc
a\t0\t1\t2
b\t1\t0\t3.5
c\t1\t3.5\t0
""".splitlines()
        exp = {
            'a': {
                'a': 0.0,
                'c': 2.0,
                'b': 1.0
            },
            'c': {
                'a': 1.0,
                'c': 0.0,
                'b': 3.5
            },
            'b': {
                'a': 1.0,
                'c': 3.5,
                'b': 0.0
            }
        }
        obs = parse_distmat_to_dict(lines)
        self.assertEqual(obs, exp)

        #should raise error because row and column headers don't match
        wrong_dist_mat = """\ta\ty\tx
a\t0\t1\t2
b\t1\t0\t3.5
c\t1\t3.5\t0
""".splitlines()
        self.failUnlessRaises(AssertionError, parse_distmat_to_dict,
                              wrong_dist_mat)

Example #7

0

Show file

File: categorized_dist_scatterplot.py Project: ElDeveloper/qiime

def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    map_data, map_header, map_comments = parse_mapping_file(
        open(opts.map, 'U'))
    map_dict = mapping_file_to_dict(map_data, map_header)

    distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U'))

    if opts.colorby is None:
        colorby_cats = [None]
    else:
        colorby_idx = map_header.index(opts.colorby)
        colorby_cats = list(set([map_data[i][colorby_idx] for
                                 i in range(len(map_data))]))
    textfilename = os.path.splitext(opts.output_path)[0] + '.txt'
    text_fh = open(textfilename, 'w')
    text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n')
    colorby_cats.sort()
    plt.figure()
    for cat_num, cat in enumerate(colorby_cats):
        # collect the primary and secondary samples within this category
        state1_samids, state2_samids = get_sam_ids(map_data, map_header,
                                                   opts.colorby, cat, opts.primary_state, opts.secondary_state)
        state1_samids =\
            list(set(state1_samids).intersection(set(distdict.keys())))
        state2_samids =\
            list(set(state2_samids).intersection(set(distdict.keys())))
        if state1_samids == [] or state2_samids == [] or \
                (len(state1_samids) == 1 and state1_samids == state2_samids):
            raise RuntimeError("one category of samples didn't have any valid" +
                               " distances. try eliminating samples from -p or -s, or changing" +
                               " your mapping file with filter_samples_from_otu_table.py")
        # go through dmtx
        state1_avg_dists = get_avg_dists(
            state1_samids,
            state2_samids,
            distdict)

        # plot
        xvals = [float(map_dict[sam][opts.axis_category]) for
                 sam in state1_samids]
        try:
            color = plt.cm.jet(cat_num / (len(colorby_cats) - 1))
        except ZeroDivisionError:  # only one cat
            color = 'b'
        plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5,
                    facecolors='none')
        plt.xlabel(opts.axis_category)
        plt.ylabel('average distance')

        lines = [str(xvals[i]) + '\t' + str(state1_avg_dists[i]) +
                 '\t' + state1_samids[i] + '\n' for i in range(len(xvals))]
        text_fh.writelines(lines)

    if opts.colorby is not None:
        plt.legend(colorby_cats)
    plt.savefig(opts.output_path)

Example #8

0

Show file

File: test_categorized_dist_scatterplot.py Project: Jorge-C/qiime

    def test_get_avg_dists(self):
        """get_avg_dists functions as expected """
        dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3
s1\t0\t.5\t.6
s2\t.5\t0\t.7
s3\t.6\t.7\t0.0
""")
        distdict1 = parse_distmat_to_dict(dmtx_str)
        distdict2 = {'s1':{'s2':.5,'s3':.6},'s2':{'s1':.5,'s3':.7},
            's3':{'s2':.7,'s1':.6}}
        state1_samids = ['s1','s2']
        state2_samids = ['s3','s2']
        # note s2 in both
        exp_avgs = [.55, .7]
        obs_avgs = get_avg_dists(state1_samids,state2_samids,distdict1)
        self.assertFloatEqual(exp_avgs, obs_avgs)

Example #9

0

Show file

File: test_parse.py Project: Ecogenomics/FrankenQIIME

    def test_parse_distmat_to_dict(self):
        """parse_distmat should return dict of distmat"""
        lines = """\ta\tb\tc
a\t0\t1\t2
b\t1\t0\t3.5
c\t1\t3.5\t0
""".splitlines()
        exp = {'a': {'a': 0.0, 'c': 2.0, 'b': 1.0},
                'c': {'a': 1.0, 'c': 0.0, 'b': 3.5},
                'b': {'a': 1.0, 'c': 3.5, 'b': 0.0}}
        obs = parse_distmat_to_dict(lines)
        self.assertEqual(obs, exp)

        #should raise error because row and column headers don't match
        wrong_dist_mat ="""\ta\ty\tx
a\t0\t1\t2
b\t1\t0\t3.5
c\t1\t3.5\t0
""".splitlines()
        self.failUnlessRaises(AssertionError, parse_distmat_to_dict, wrong_dist_mat)