def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) map_data, map_header, map_comments = parse_mapping_file(open( opts.map, 'U')) map_dict = mapping_file_to_dict(map_data, map_header) distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U')) if opts.colorby == None: colorby_cats = [None] else: colorby_idx = map_header.index(opts.colorby) colorby_cats = list(set([map_data[i][colorby_idx] for\ i in range(len(map_data))])) textfilename = os.path.splitext(opts.output_path)[0] + '.txt' text_fh = open(textfilename, 'w') text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n') colorby_cats.sort() plt.figure() for cat_num, cat in enumerate(colorby_cats): # collect the primary and secondary samples within this category state1_samids, state2_samids = get_sam_ids(map_data, map_header, opts.colorby, cat, opts.primary_state, opts.secondary_state) state1_samids =\ list(set(state1_samids).intersection(set(distdict.keys()))) state2_samids =\ list(set(state2_samids).intersection(set(distdict.keys()))) if state1_samids == [] or state2_samids == [] or \ (len(state1_samids) == 1 and state1_samids == state2_samids): raise RuntimeError("one category of samples didn't have any valid"+\ " distances. try eliminating samples from -p or -s, or changing"+\ " your mapping file with filter_samples_from_otu_table.py") # go through dmtx state1_avg_dists = get_avg_dists(state1_samids, state2_samids, distdict) # plot xvals = [float(map_dict[sam][opts.axis_category]) for\ sam in state1_samids] try: color = plt.cm.jet(cat_num / (len(colorby_cats) - 1)) except ZeroDivisionError: # only one cat color = 'b' plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5, facecolors='none') plt.xlabel(opts.axis_category) plt.ylabel('average distance') lines = [str(xvals[i])+'\t'+str(state1_avg_dists[i])+\ '\t'+state1_samids[i]+'\n' for i in range(len(xvals))] text_fh.writelines(lines) if opts.colorby != None: plt.legend(colorby_cats) plt.savefig(opts.output_path)
def test_get_avg_dists(self): """get_avg_dists functions as expected """ dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3 s1\t0\t.5\t.6 s2\t.5\t0\t.7 s3\t.6\t.7\t0.0 """) distdict1 = parse_distmat_to_dict(dmtx_str) distdict2 = { 's1': { 's2': .5, 's3': .6 }, 's2': { 's1': .5, 's3': .7 }, 's3': { 's2': .7, 's1': .6 } } state1_samids = ['s1', 's2'] state2_samids = ['s3', 's2'] # note s2 in both exp_avgs = [.55, .7] obs_avgs = get_avg_dists(state1_samids, state2_samids, distdict1) assert_almost_equal(exp_avgs, obs_avgs)
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) map_data, map_header, map_comments = parse_mapping_file( open(opts.map, 'U')) map_dict = mapping_file_to_dict(map_data, map_header) distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U')) if opts.colorby is None: colorby_cats = [None] else: colorby_idx = map_header.index(opts.colorby) colorby_cats = list(set([map_data[i][colorby_idx] for i in range(len(map_data))])) textfilename = os.path.splitext(opts.output_path)[0] + '.txt' text_fh = open(textfilename, 'w') text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n') colorby_cats.sort() plt.figure() for cat_num, cat in enumerate(colorby_cats): # collect the primary and secondary samples within this category state1_samids, state2_samids = get_sam_ids(map_data, map_header, opts.colorby, cat, opts.primary_state, opts.secondary_state) state1_samids =\ list(set(state1_samids).intersection(set(distdict.keys()))) state2_samids =\ list(set(state2_samids).intersection(set(distdict.keys()))) if state1_samids == [] or state2_samids == [] or \ (len(state1_samids) == 1 and state1_samids == state2_samids): raise RuntimeError("one category of samples didn't have any valid" + " distances. try eliminating samples from -p or -s, or changing" + " your mapping file with filter_samples_from_otu_table.py") # go through dmtx state1_avg_dists = get_avg_dists( state1_samids, state2_samids, distdict) # plot xvals = [float(map_dict[sam][opts.axis_category]) for sam in state1_samids] try: color = plt.cm.jet(cat_num / (len(colorby_cats) - 1)) except ZeroDivisionError: # only one cat color = 'b' plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5, facecolors='none') plt.xlabel(opts.axis_category) plt.ylabel('average distance') lines = [str(xvals[i]) + '\t' + str(state1_avg_dists[i]) + '\t' + state1_samids[i] + '\n' for i in range(len(xvals))] text_fh.writelines(lines) if opts.colorby is not None: plt.legend(colorby_cats) plt.savefig(opts.output_path)
def test_get_avg_dists(self): """get_avg_dists functions as expected """ dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3 s1\t0\t.5\t.6 s2\t.5\t0\t.7 s3\t.6\t.7\t0.0 """) distdict1 = parse_distmat_to_dict(dmtx_str) distdict2 = {'s1':{'s2':.5,'s3':.6},'s2':{'s1':.5,'s3':.7}, 's3':{'s2':.7,'s1':.6}} state1_samids = ['s1','s2'] state2_samids = ['s3','s2'] # note s2 in both exp_avgs = [.55, .7] obs_avgs = get_avg_dists(state1_samids,state2_samids,distdict1) self.assertFloatEqual(exp_avgs, obs_avgs)