Beispiel #1
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    map_data, map_header, map_comments = parse_mapping_file(open(
        opts.map, 'U'))
    map_dict = mapping_file_to_dict(map_data, map_header)

    distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U'))

    if opts.colorby == None:
        colorby_cats = [None]
    else:
        colorby_idx = map_header.index(opts.colorby)
        colorby_cats = list(set([map_data[i][colorby_idx] for\
            i in range(len(map_data))]))
    textfilename = os.path.splitext(opts.output_path)[0] + '.txt'
    text_fh = open(textfilename, 'w')
    text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n')
    colorby_cats.sort()
    plt.figure()
    for cat_num, cat in enumerate(colorby_cats):
        # collect the primary and secondary samples within this category
        state1_samids, state2_samids = get_sam_ids(map_data, map_header,
                                                   opts.colorby, cat,
                                                   opts.primary_state,
                                                   opts.secondary_state)
        state1_samids =\
            list(set(state1_samids).intersection(set(distdict.keys())))
        state2_samids =\
            list(set(state2_samids).intersection(set(distdict.keys())))
        if state1_samids == [] or state2_samids == [] or \
            (len(state1_samids) == 1 and state1_samids == state2_samids):
            raise RuntimeError("one category of samples didn't have any valid"+\
            " distances. try eliminating samples from -p or -s, or changing"+\
            " your mapping file with filter_samples_from_otu_table.py")
        # go through dmtx
        state1_avg_dists = get_avg_dists(state1_samids, state2_samids,
                                         distdict)

        # plot
        xvals = [float(map_dict[sam][opts.axis_category]) for\
            sam in state1_samids]
        try:
            color = plt.cm.jet(cat_num / (len(colorby_cats) - 1))
        except ZeroDivisionError:  # only one cat
            color = 'b'
        plt.scatter(xvals,
                    state1_avg_dists,
                    edgecolors=color,
                    alpha=.5,
                    facecolors='none')
        plt.xlabel(opts.axis_category)
        plt.ylabel('average distance')

        lines = [str(xvals[i])+'\t'+str(state1_avg_dists[i])+\
            '\t'+state1_samids[i]+'\n' for i in range(len(xvals))]
        text_fh.writelines(lines)

    if opts.colorby != None: plt.legend(colorby_cats)
    plt.savefig(opts.output_path)
    def test_get_sam_ids(self):
        """set of sample ids in get_sam_ids should be correct"""
        map_file = StringIO.StringIO(
            """#SampleID	Country	AgeYears	Family	AgeCat
    h208A.1	Malawi	0.032854209	h208	Child
    h301A.1	Malawi	0.05	h301	Child
    h301B.1	Malawi	0.05	h301	Child
    USinfTw20.1	USA	0.083333333	USinfTw20	Child
    USinfTw20.2	USA	0.083333333	USinfTw20	Child
    USinfTw1.1	USA	0.083333333	USinfTw1	Child
    h10M	Malawi	26	h10	Adult
    h68M	Malawi	26	h68	Adult
    TS25	USA	26	USts9	Adult
    TS26	USA	26	USts9	Adult""")

        map_data, map_header, comments = parse_mapping_file(map_file)
        colorby = 'Country'
        cat = 'USA'
        primary_state = 'AgeCat:Child'
        ids1, ids2 = get_sam_ids(map_data,
                                 map_header,
                                 colorby,
                                 cat,
                                 primary_state,
                                 secondary_state=None)
        self.assertEqual(set(ids1),
                         set(['USinfTw20.1', 'USinfTw20.2', 'USinfTw1.1']))
        self.assertEqual(set(ids2), set(['TS25', 'TS26']))
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    map_data, map_header, map_comments = parse_mapping_file(
        open(opts.map, 'U'))
    map_dict = mapping_file_to_dict(map_data, map_header)

    distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U'))

    if opts.colorby is None:
        colorby_cats = [None]
    else:
        colorby_idx = map_header.index(opts.colorby)
        colorby_cats = list(set([map_data[i][colorby_idx] for
                                 i in range(len(map_data))]))
    textfilename = os.path.splitext(opts.output_path)[0] + '.txt'
    text_fh = open(textfilename, 'w')
    text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n')
    colorby_cats.sort()
    plt.figure()
    for cat_num, cat in enumerate(colorby_cats):
        # collect the primary and secondary samples within this category
        state1_samids, state2_samids = get_sam_ids(map_data, map_header,
                                                   opts.colorby, cat, opts.primary_state, opts.secondary_state)
        state1_samids =\
            list(set(state1_samids).intersection(set(distdict.keys())))
        state2_samids =\
            list(set(state2_samids).intersection(set(distdict.keys())))
        if state1_samids == [] or state2_samids == [] or \
                (len(state1_samids) == 1 and state1_samids == state2_samids):
            raise RuntimeError("one category of samples didn't have any valid" +
                               " distances. try eliminating samples from -p or -s, or changing" +
                               " your mapping file with filter_samples_from_otu_table.py")
        # go through dmtx
        state1_avg_dists = get_avg_dists(
            state1_samids,
            state2_samids,
            distdict)

        # plot
        xvals = [float(map_dict[sam][opts.axis_category]) for
                 sam in state1_samids]
        try:
            color = plt.cm.jet(cat_num / (len(colorby_cats) - 1))
        except ZeroDivisionError:  # only one cat
            color = 'b'
        plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5,
                    facecolors='none')
        plt.xlabel(opts.axis_category)
        plt.ylabel('average distance')

        lines = [str(xvals[i]) + '\t' + str(state1_avg_dists[i]) +
                 '\t' + state1_samids[i] + '\n' for i in range(len(xvals))]
        text_fh.writelines(lines)

    if opts.colorby is not None:
        plt.legend(colorby_cats)
    plt.savefig(opts.output_path)
    def test_get_sam_ids(self):
        """set of sample ids in get_sam_ids should be correct"""
        map_file = StringIO.StringIO("""#SampleID	Country	AgeYears	Family	AgeCat
    h208A.1	Malawi	0.032854209	h208	Child
    h301A.1	Malawi	0.05	h301	Child
    h301B.1	Malawi	0.05	h301	Child
    USinfTw20.1	USA	0.083333333	USinfTw20	Child
    USinfTw20.2	USA	0.083333333	USinfTw20	Child
    USinfTw1.1	USA	0.083333333	USinfTw1	Child
    h10M	Malawi	26	h10	Adult
    h68M	Malawi	26	h68	Adult
    TS25	USA	26	USts9	Adult
    TS26	USA	26	USts9	Adult""")

        map_data, map_header, comments = parse_mapping_file(map_file)
        colorby = 'Country'
        cat = 'USA'
        primary_state = 'AgeCat:Child'
        ids1, ids2 = get_sam_ids(map_data, map_header, colorby, cat, 
                primary_state, secondary_state=None)
        self.assertEqual(set(ids1),
            set(['USinfTw20.1','USinfTw20.2','USinfTw1.1']))
        self.assertEqual(set(ids2), set(['TS25','TS26']))