def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) map_data, map_header, map_comments = parse_mapping_file(open( opts.map, 'U')) map_dict = mapping_file_to_dict(map_data, map_header) distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U')) if opts.colorby == None: colorby_cats = [None] else: colorby_idx = map_header.index(opts.colorby) colorby_cats = list(set([map_data[i][colorby_idx] for\ i in range(len(map_data))])) textfilename = os.path.splitext(opts.output_path)[0] + '.txt' text_fh = open(textfilename, 'w') text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n') colorby_cats.sort() plt.figure() for cat_num, cat in enumerate(colorby_cats): # collect the primary and secondary samples within this category state1_samids, state2_samids = get_sam_ids(map_data, map_header, opts.colorby, cat, opts.primary_state, opts.secondary_state) state1_samids =\ list(set(state1_samids).intersection(set(distdict.keys()))) state2_samids =\ list(set(state2_samids).intersection(set(distdict.keys()))) if state1_samids == [] or state2_samids == [] or \ (len(state1_samids) == 1 and state1_samids == state2_samids): raise RuntimeError("one category of samples didn't have any valid"+\ " distances. try eliminating samples from -p or -s, or changing"+\ " your mapping file with filter_samples_from_otu_table.py") # go through dmtx state1_avg_dists = get_avg_dists(state1_samids, state2_samids, distdict) # plot xvals = [float(map_dict[sam][opts.axis_category]) for\ sam in state1_samids] try: color = plt.cm.jet(cat_num / (len(colorby_cats) - 1)) except ZeroDivisionError: # only one cat color = 'b' plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5, facecolors='none') plt.xlabel(opts.axis_category) plt.ylabel('average distance') lines = [str(xvals[i])+'\t'+str(state1_avg_dists[i])+\ '\t'+state1_samids[i]+'\n' for i in range(len(xvals))] text_fh.writelines(lines) if opts.colorby != None: plt.legend(colorby_cats) plt.savefig(opts.output_path)
def test_get_sam_ids(self): """set of sample ids in get_sam_ids should be correct""" map_file = StringIO.StringIO( """#SampleID Country AgeYears Family AgeCat h208A.1 Malawi 0.032854209 h208 Child h301A.1 Malawi 0.05 h301 Child h301B.1 Malawi 0.05 h301 Child USinfTw20.1 USA 0.083333333 USinfTw20 Child USinfTw20.2 USA 0.083333333 USinfTw20 Child USinfTw1.1 USA 0.083333333 USinfTw1 Child h10M Malawi 26 h10 Adult h68M Malawi 26 h68 Adult TS25 USA 26 USts9 Adult TS26 USA 26 USts9 Adult""") map_data, map_header, comments = parse_mapping_file(map_file) colorby = 'Country' cat = 'USA' primary_state = 'AgeCat:Child' ids1, ids2 = get_sam_ids(map_data, map_header, colorby, cat, primary_state, secondary_state=None) self.assertEqual(set(ids1), set(['USinfTw20.1', 'USinfTw20.2', 'USinfTw1.1'])) self.assertEqual(set(ids2), set(['TS25', 'TS26']))
def main(): option_parser, opts, args =\ parse_command_line_parameters(**script_info) map_data, map_header, map_comments = parse_mapping_file( open(opts.map, 'U')) map_dict = mapping_file_to_dict(map_data, map_header) distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U')) if opts.colorby is None: colorby_cats = [None] else: colorby_idx = map_header.index(opts.colorby) colorby_cats = list(set([map_data[i][colorby_idx] for i in range(len(map_data))])) textfilename = os.path.splitext(opts.output_path)[0] + '.txt' text_fh = open(textfilename, 'w') text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n') colorby_cats.sort() plt.figure() for cat_num, cat in enumerate(colorby_cats): # collect the primary and secondary samples within this category state1_samids, state2_samids = get_sam_ids(map_data, map_header, opts.colorby, cat, opts.primary_state, opts.secondary_state) state1_samids =\ list(set(state1_samids).intersection(set(distdict.keys()))) state2_samids =\ list(set(state2_samids).intersection(set(distdict.keys()))) if state1_samids == [] or state2_samids == [] or \ (len(state1_samids) == 1 and state1_samids == state2_samids): raise RuntimeError("one category of samples didn't have any valid" + " distances. try eliminating samples from -p or -s, or changing" + " your mapping file with filter_samples_from_otu_table.py") # go through dmtx state1_avg_dists = get_avg_dists( state1_samids, state2_samids, distdict) # plot xvals = [float(map_dict[sam][opts.axis_category]) for sam in state1_samids] try: color = plt.cm.jet(cat_num / (len(colorby_cats) - 1)) except ZeroDivisionError: # only one cat color = 'b' plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5, facecolors='none') plt.xlabel(opts.axis_category) plt.ylabel('average distance') lines = [str(xvals[i]) + '\t' + str(state1_avg_dists[i]) + '\t' + state1_samids[i] + '\n' for i in range(len(xvals))] text_fh.writelines(lines) if opts.colorby is not None: plt.legend(colorby_cats) plt.savefig(opts.output_path)
def test_get_sam_ids(self): """set of sample ids in get_sam_ids should be correct""" map_file = StringIO.StringIO("""#SampleID Country AgeYears Family AgeCat h208A.1 Malawi 0.032854209 h208 Child h301A.1 Malawi 0.05 h301 Child h301B.1 Malawi 0.05 h301 Child USinfTw20.1 USA 0.083333333 USinfTw20 Child USinfTw20.2 USA 0.083333333 USinfTw20 Child USinfTw1.1 USA 0.083333333 USinfTw1 Child h10M Malawi 26 h10 Adult h68M Malawi 26 h68 Adult TS25 USA 26 USts9 Adult TS26 USA 26 USts9 Adult""") map_data, map_header, comments = parse_mapping_file(map_file) colorby = 'Country' cat = 'USA' primary_state = 'AgeCat:Child' ids1, ids2 = get_sam_ids(map_data, map_header, colorby, cat, primary_state, secondary_state=None) self.assertEqual(set(ids1), set(['USinfTw20.1','USinfTw20.2','USinfTw1.1'])) self.assertEqual(set(ids2), set(['TS25','TS26']))