Beispiel #1
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    map_data, map_header, map_comments = parse_mapping_file(open(
        opts.map, 'U'))
    map_dict = mapping_file_to_dict(map_data, map_header)

    distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U'))

    if opts.colorby == None:
        colorby_cats = [None]
    else:
        colorby_idx = map_header.index(opts.colorby)
        colorby_cats = list(set([map_data[i][colorby_idx] for\
            i in range(len(map_data))]))
    textfilename = os.path.splitext(opts.output_path)[0] + '.txt'
    text_fh = open(textfilename, 'w')
    text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n')
    colorby_cats.sort()
    plt.figure()
    for cat_num, cat in enumerate(colorby_cats):
        # collect the primary and secondary samples within this category
        state1_samids, state2_samids = get_sam_ids(map_data, map_header,
                                                   opts.colorby, cat,
                                                   opts.primary_state,
                                                   opts.secondary_state)
        state1_samids =\
            list(set(state1_samids).intersection(set(distdict.keys())))
        state2_samids =\
            list(set(state2_samids).intersection(set(distdict.keys())))
        if state1_samids == [] or state2_samids == [] or \
            (len(state1_samids) == 1 and state1_samids == state2_samids):
            raise RuntimeError("one category of samples didn't have any valid"+\
            " distances. try eliminating samples from -p or -s, or changing"+\
            " your mapping file with filter_samples_from_otu_table.py")
        # go through dmtx
        state1_avg_dists = get_avg_dists(state1_samids, state2_samids,
                                         distdict)

        # plot
        xvals = [float(map_dict[sam][opts.axis_category]) for\
            sam in state1_samids]
        try:
            color = plt.cm.jet(cat_num / (len(colorby_cats) - 1))
        except ZeroDivisionError:  # only one cat
            color = 'b'
        plt.scatter(xvals,
                    state1_avg_dists,
                    edgecolors=color,
                    alpha=.5,
                    facecolors='none')
        plt.xlabel(opts.axis_category)
        plt.ylabel('average distance')

        lines = [str(xvals[i])+'\t'+str(state1_avg_dists[i])+\
            '\t'+state1_samids[i]+'\n' for i in range(len(xvals))]
        text_fh.writelines(lines)

    if opts.colorby != None: plt.legend(colorby_cats)
    plt.savefig(opts.output_path)
    def test_get_avg_dists(self):
        """get_avg_dists functions as expected """
        dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3
s1\t0\t.5\t.6
s2\t.5\t0\t.7
s3\t.6\t.7\t0.0
""")
        distdict1 = parse_distmat_to_dict(dmtx_str)
        distdict2 = {
            's1': {
                's2': .5,
                's3': .6
            },
            's2': {
                's1': .5,
                's3': .7
            },
            's3': {
                's2': .7,
                's1': .6
            }
        }
        state1_samids = ['s1', 's2']
        state2_samids = ['s3', 's2']
        # note s2 in both
        exp_avgs = [.55, .7]
        obs_avgs = get_avg_dists(state1_samids, state2_samids, distdict1)
        assert_almost_equal(exp_avgs, obs_avgs)
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)
    map_data, map_header, map_comments = parse_mapping_file(
        open(opts.map, 'U'))
    map_dict = mapping_file_to_dict(map_data, map_header)

    distdict = parse_distmat_to_dict(open(opts.distance_matrix, 'U'))

    if opts.colorby is None:
        colorby_cats = [None]
    else:
        colorby_idx = map_header.index(opts.colorby)
        colorby_cats = list(set([map_data[i][colorby_idx] for
                                 i in range(len(map_data))]))
    textfilename = os.path.splitext(opts.output_path)[0] + '.txt'
    text_fh = open(textfilename, 'w')
    text_fh.write(opts.axis_category + '\tdistance\tSampleID' + '\n')
    colorby_cats.sort()
    plt.figure()
    for cat_num, cat in enumerate(colorby_cats):
        # collect the primary and secondary samples within this category
        state1_samids, state2_samids = get_sam_ids(map_data, map_header,
                                                   opts.colorby, cat, opts.primary_state, opts.secondary_state)
        state1_samids =\
            list(set(state1_samids).intersection(set(distdict.keys())))
        state2_samids =\
            list(set(state2_samids).intersection(set(distdict.keys())))
        if state1_samids == [] or state2_samids == [] or \
                (len(state1_samids) == 1 and state1_samids == state2_samids):
            raise RuntimeError("one category of samples didn't have any valid" +
                               " distances. try eliminating samples from -p or -s, or changing" +
                               " your mapping file with filter_samples_from_otu_table.py")
        # go through dmtx
        state1_avg_dists = get_avg_dists(
            state1_samids,
            state2_samids,
            distdict)

        # plot
        xvals = [float(map_dict[sam][opts.axis_category]) for
                 sam in state1_samids]
        try:
            color = plt.cm.jet(cat_num / (len(colorby_cats) - 1))
        except ZeroDivisionError:  # only one cat
            color = 'b'
        plt.scatter(xvals, state1_avg_dists, edgecolors=color, alpha=.5,
                    facecolors='none')
        plt.xlabel(opts.axis_category)
        plt.ylabel('average distance')

        lines = [str(xvals[i]) + '\t' + str(state1_avg_dists[i]) +
                 '\t' + state1_samids[i] + '\n' for i in range(len(xvals))]
        text_fh.writelines(lines)

    if opts.colorby is not None:
        plt.legend(colorby_cats)
    plt.savefig(opts.output_path)
    def test_get_avg_dists(self):
        """get_avg_dists functions as expected """
        dmtx_str = StringIO.StringIO("""\ts1\ts2\ts3
s1\t0\t.5\t.6
s2\t.5\t0\t.7
s3\t.6\t.7\t0.0
""")
        distdict1 = parse_distmat_to_dict(dmtx_str)
        distdict2 = {'s1':{'s2':.5,'s3':.6},'s2':{'s1':.5,'s3':.7},
            's3':{'s2':.7,'s1':.6}}
        state1_samids = ['s1','s2']
        state2_samids = ['s3','s2']
        # note s2 in both
        exp_avgs = [.55, .7]
        obs_avgs = get_avg_dists(state1_samids,state2_samids,distdict1)
        self.assertFloatEqual(exp_avgs, obs_avgs)