def test_run_jackknifed_beta_diversity_parallel(self): """ run_jackknifed_beta_diversity generates expected results """ run_jackknifed_beta_diversity( self.test_data['biom'][0], self.test_data['tree'][0], 20, self.test_out, call_commands_serially, self.params, self.qiime_config, self.test_data['map'][0], parallel=True, status_update_callback=no_status_updates) weighted_unifrac_upgma_tree_fp = join(self.test_out, 'weighted_unifrac', 'upgma_cmp', 'jackknife_named_nodes.tre') unweighted_unifrac_upgma_tree_fp = join( self.test_out, 'unweighted_unifrac', 'upgma_cmp', 'jackknife_named_nodes.tre') weighted_unifrac_emperor_index_fp = join( self.test_out, 'weighted_unifrac', 'emperor_pcoa_plots', 'index.html') unweighted_unifrac_emperor_index_fp = join( self.test_out, 'unweighted_unifrac', 'emperor_pcoa_plots', 'index.html') input_file_basename = splitext(split(self.test_data['biom'][0])[1])[0] unweighted_unifrac_dm_fp = join(self.test_out, 'unweighted_unifrac_%s.txt' % input_file_basename) weighted_unifrac_dm_fp = join(self.test_out, 'weighted_unifrac_%s.txt' % input_file_basename) # check for expected relations between values in the unweighted unifrac # distance matrix dm = parse_distmat_to_dict(open(unweighted_unifrac_dm_fp)) self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'], "Distance between pair of fecal samples is larger than distance" " between fecal and palm sample (unweighted unifrac).") self.assertEqual(dm['f1']['f1'], 0) # check for expected relations between values in the weighted unifrac # distance matrix dm = parse_distmat_to_dict(open(weighted_unifrac_dm_fp)) self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'], "Distance between pair of fecal samples is larger than distance" " between fecal and palm sample (unweighted unifrac).") self.assertEqual(dm['f1']['f1'], 0) # check that final output files have non-zero size self.assertTrue(getsize(weighted_unifrac_upgma_tree_fp) > 0) self.assertTrue(getsize(unweighted_unifrac_upgma_tree_fp) > 0) self.assertTrue(getsize(weighted_unifrac_emperor_index_fp) > 0) self.assertTrue(getsize(unweighted_unifrac_emperor_index_fp) > 0) # Check that the log file is created and has size > 0 log_fp = glob(join(self.test_out, 'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)
def test_run_beta_diversity_through_plots_even_sampling(self): """ run_beta_diversity_through_plots functions with even sampling """ run_beta_diversity_through_plots( self.test_data['biom'][0], self.test_data['map'][0], self.test_out, call_commands_serially, self.params, self.qiime_config, sampling_depth=20, tree_fp=self.test_data['tree'][0], parallel=False, status_update_callback=no_status_updates) unweighted_unifrac_dm_fp = join(self.test_out, 'unweighted_unifrac_dm.txt') weighted_unifrac_dm_fp = join(self.test_out, 'weighted_unifrac_dm.txt') unweighted_unifrac_pc_fp = join(self.test_out, 'unweighted_unifrac_pc.txt') weighted_unifrac_pc_fp = join(self.test_out, 'weighted_unifrac_pc.txt') weighted_unifrac_html_fp = join(self.test_out, 'weighted_unifrac_emperor_pcoa_plot', 'index.html') # check for expected relations between values in the unweighted unifrac # distance matrix dm = parse_distmat_to_dict(open(unweighted_unifrac_dm_fp)) self.assertTrue( dm['f1']['f2'] < dm['f1']['p1'], "Distance between pair of fecal samples is larger than distance" " between fecal and palm sample (unweighted unifrac).") self.assertEqual(dm['f1']['f1'], 0) # check for expected relations between values in the weighted unifrac # distance matrix dm = parse_distmat_to_dict(open(weighted_unifrac_dm_fp)) self.assertTrue( dm['f1']['f2'] < dm['f1']['p1'], "Distance between pair of fecal samples is larger than distance" " between fecal and palm sample (unweighted unifrac).") self.assertEqual(dm['f1']['f1'], 0) # check that final output files have non-zero size self.assertTrue(getsize(unweighted_unifrac_pc_fp) > 0) self.assertTrue(getsize(weighted_unifrac_pc_fp) > 0) self.assertTrue(getsize(weighted_unifrac_html_fp) > 0) # Check that the log file is created and has size > 0 log_fp = glob(join(self.test_out, 'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)
def test_run_beta_diversity_through_plots_even_sampling(self): """ run_beta_diversity_through_plots functions with even sampling """ run_beta_diversity_through_plots( self.test_data['biom'][0], self.test_data['map'][0], self.test_out, call_commands_serially, self.params, self.qiime_config, sampling_depth=20, tree_fp=self.test_data['tree'][0], parallel=False, status_update_callback=no_status_updates) unweighted_unifrac_dm_fp = join( self.test_out, 'unweighted_unifrac_dm.txt') weighted_unifrac_dm_fp = join(self.test_out, 'weighted_unifrac_dm.txt') unweighted_unifrac_pc_fp = join( self.test_out, 'unweighted_unifrac_pc.txt') weighted_unifrac_pc_fp = join(self.test_out, 'weighted_unifrac_pc.txt') weighted_unifrac_html_fp = join(self.test_out, 'weighted_unifrac_emperor_pcoa_plot', 'index.html') # check for expected relations between values in the unweighted unifrac # distance matrix dm = parse_distmat_to_dict(open(unweighted_unifrac_dm_fp)) self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'], "Distance between pair of fecal samples is larger than distance" " between fecal and palm sample (unweighted unifrac).") self.assertEqual(dm['f1']['f1'], 0) # check for expected relations between values in the weighted unifrac # distance matrix dm = parse_distmat_to_dict(open(weighted_unifrac_dm_fp)) self.assertTrue(dm['f1']['f2'] < dm['f1']['p1'], "Distance between pair of fecal samples is larger than distance" " between fecal and palm sample (unweighted unifrac).") self.assertEqual(dm['f1']['f1'], 0) # check that final output files have non-zero size self.assertTrue(getsize(unweighted_unifrac_pc_fp) > 0) self.assertTrue(getsize(weighted_unifrac_pc_fp) > 0) self.assertTrue(getsize(weighted_unifrac_html_fp) > 0) # Check that the log file is created and has size > 0 log_fp = glob(join(self.test_out, 'log*.txt'))[0] self.assertTrue(getsize(log_fp) > 0)
def test_get_avg_dists(self): """get_avg_dists functions as expected """ dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3 s1\t0\t.5\t.6 s2\t.5\t0\t.7 s3\t.6\t.7\t0.0 """) distdict1 = parse_distmat_to_dict(dmtx_str) distdict2 = { 's1': { 's2': .5, 's3': .6 }, 's2': { 's1': .5, 's3': .7 }, 's3': { 's2': .7, 's1': .6 } } state1_samids = ['s1', 's2'] state2_samids = ['s3', 's2'] # note s2 in both exp_avgs = [.55, .7] obs_avgs = get_avg_dists(state1_samids, state2_samids, distdict1) assert_almost_equal(exp_avgs, obs_avgs)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) map_data, map_header, map_comments = parse_mapping_file(open( opts.map, 'U')) map_dict = mapping_file_to_dict(map_data, map_header) distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U')) if opts.colorby == None: colorby_cats = [None] else: colorby_idx = map_header.index(opts.colorby) colorby_cats = list(set([map_data[i][colorby_idx] for\ i in range(len(map_data))])) textfilename = os.path.splitext(opts.output_path)[0] + '.txt' text_fh = open(textfilename, 'w') text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n') colorby_cats.sort() plt.figure() for cat_num, cat in enumerate(colorby_cats): # collect the primary and secondary samples within this category state1_samids, state2_samids = get_sam_ids(map_data, map_header, opts.colorby, cat, opts.primary_state, opts.secondary_state) state1_samids =\ list(set(state1_samids).intersection(set(distdict.keys()))) state2_samids =\ list(set(state2_samids).intersection(set(distdict.keys()))) if state1_samids == [] or state2_samids == [] or \ (len(state1_samids) == 1 and state1_samids == state2_samids): raise RuntimeError("one category of samples didn't have any valid"+\ " distances. try eliminating samples from -p or -s, or changing"+\ " your mapping file with filter_samples_from_otu_table.py") # go through dmtx state1_avg_dists = get_avg_dists(state1_samids, state2_samids, distdict) # plot xvals = [float(map_dict[sam][opts.axis_category]) for\ sam in state1_samids] try: color = plt.cm.jet(cat_num / (len(colorby_cats) - 1)) except ZeroDivisionError: # only one cat color = 'b' plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5, facecolors='none') plt.xlabel(opts.axis_category) plt.ylabel('average distance') lines = [str(xvals[i])+'\t'+str(state1_avg_dists[i])+\ '\t'+state1_samids[i]+'\n' for i in range(len(xvals))] text_fh.writelines(lines) if opts.colorby != None: plt.legend(colorby_cats) plt.savefig(opts.output_path)
def test_parse_distmat_to_dict(self): """parse_distmat should return dict of distmat""" lines = """\ta\tb\tc a\t0\t1\t2 b\t1\t0\t3.5 c\t1\t3.5\t0 """.splitlines() exp = { 'a': { 'a': 0.0, 'c': 2.0, 'b': 1.0 }, 'c': { 'a': 1.0, 'c': 0.0, 'b': 3.5 }, 'b': { 'a': 1.0, 'c': 3.5, 'b': 0.0 } } obs = parse_distmat_to_dict(lines) self.assertEqual(obs, exp) #should raise error because row and column headers don't match wrong_dist_mat = """\ta\ty\tx a\t0\t1\t2 b\t1\t0\t3.5 c\t1\t3.5\t0 """.splitlines() self.failUnlessRaises(AssertionError, parse_distmat_to_dict, wrong_dist_mat)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) map_data, map_header, map_comments = parse_mapping_file( open(opts.map, 'U')) map_dict = mapping_file_to_dict(map_data, map_header) distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U')) if opts.colorby is None: colorby_cats = [None] else: colorby_idx = map_header.index(opts.colorby) colorby_cats = list(set([map_data[i][colorby_idx] for i in range(len(map_data))])) textfilename = os.path.splitext(opts.output_path)[0] + '.txt' text_fh = open(textfilename, 'w') text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n') colorby_cats.sort() plt.figure() for cat_num, cat in enumerate(colorby_cats): # collect the primary and secondary samples within this category state1_samids, state2_samids = get_sam_ids(map_data, map_header, opts.colorby, cat, opts.primary_state, opts.secondary_state) state1_samids =\ list(set(state1_samids).intersection(set(distdict.keys()))) state2_samids =\ list(set(state2_samids).intersection(set(distdict.keys()))) if state1_samids == [] or state2_samids == [] or \ (len(state1_samids) == 1 and state1_samids == state2_samids): raise RuntimeError("one category of samples didn't have any valid" + " distances. try eliminating samples from -p or -s, or changing" + " your mapping file with filter_samples_from_otu_table.py") # go through dmtx state1_avg_dists = get_avg_dists( state1_samids, state2_samids, distdict) # plot xvals = [float(map_dict[sam][opts.axis_category]) for sam in state1_samids] try: color = plt.cm.jet(cat_num / (len(colorby_cats) - 1)) except ZeroDivisionError: # only one cat color = 'b' plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5, facecolors='none') plt.xlabel(opts.axis_category) plt.ylabel('average distance') lines = [str(xvals[i]) + '\t' + str(state1_avg_dists[i]) + '\t' + state1_samids[i] + '\n' for i in range(len(xvals))] text_fh.writelines(lines) if opts.colorby is not None: plt.legend(colorby_cats) plt.savefig(opts.output_path)
def test_get_avg_dists(self): """get_avg_dists functions as expected """ dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3 s1\t0\t.5\t.6 s2\t.5\t0\t.7 s3\t.6\t.7\t0.0 """) distdict1 = parse_distmat_to_dict(dmtx_str) distdict2 = {'s1':{'s2':.5,'s3':.6},'s2':{'s1':.5,'s3':.7}, 's3':{'s2':.7,'s1':.6}} state1_samids = ['s1','s2'] state2_samids = ['s3','s2'] # note s2 in both exp_avgs = [.55, .7] obs_avgs = get_avg_dists(state1_samids,state2_samids,distdict1) self.assertFloatEqual(exp_avgs, obs_avgs)
def test_parse_distmat_to_dict(self): """parse_distmat should return dict of distmat""" lines = """\ta\tb\tc a\t0\t1\t2 b\t1\t0\t3.5 c\t1\t3.5\t0 """.splitlines() exp = {'a': {'a': 0.0, 'c': 2.0, 'b': 1.0}, 'c': {'a': 1.0, 'c': 0.0, 'b': 3.5}, 'b': {'a': 1.0, 'c': 3.5, 'b': 0.0}} obs = parse_distmat_to_dict(lines) self.assertEqual(obs, exp) #should raise error because row and column headers don't match wrong_dist_mat ="""\ta\ty\tx a\t0\t1\t2 b\t1\t0\t3.5 c\t1\t3.5\t0 """.splitlines() self.failUnlessRaises(AssertionError, parse_distmat_to_dict, wrong_dist_mat)