Example #1
0
    def test_natsort_case_insensitive(self):
        """natsort should perform numeric comparisons on strings and is
           _not_ case-sensitive"""

        # string with alpha and numerics sort correctly
        s = [
            'sample1',
            'sample2',
            'sample11',
            'sample12',
            'SAmple1',
            'Sample2']

        # expected values
        exp_natsort = ['SAmple1', 'Sample2', 'sample1', 'sample2', 'sample11',
                       'sample12']
        exp_natsort_case_insensitive = ['sample1', 'SAmple1', 'sample2',
                                        'Sample2', 'sample11', 'sample12']

        # test natsort
        self.assertEqual(natsort(s), exp_natsort)
        # test natsort_case_insensitive
        self.assertEqual(natsort_case_insensitive(s),
                         exp_natsort_case_insensitive)

        s.reverse()
        # test natsort
        self.assertEqual(natsort(s), exp_natsort)
        # test natsort_case_insensitive
        self.assertEqual(natsort(list('cbaA321')), list('123Aabc'))

        # strings with alpha only sort correctly
        self.assertEqual(natsort_case_insensitive(list('cdBa')), list('aBcd'))

        # string of ints sort correctly
        self.assertEqual(natsort_case_insensitive(['11', '2', '1', '0']),
                         ['0', '1', '2', '11'])

        # strings of floats sort correctly
        self.assertEqual(natsort_case_insensitive(['1.11', '1.12', '1.00',
                                                  '0.009']), ['0.009', '1.00',
                                                              '1.11', '1.12'])

        # string of ints sort correctly
        self.assertEqual(natsort_case_insensitive([('11', 'A'), ('2', 'B'),
                                                  ('1', 'C'), ('0', 'D')]),
                         [('0', 'D'), ('1', 'C'),
                          ('2', 'B'), ('11', 'A')])
Example #2
0
    def test_natsort_case_insensitive(self):
        """natsort should perform numeric comparisons on strings and is
           _not_ case-sensitive"""

        # string with alpha and numerics sort correctly
        s = [
            'sample1',
            'sample2',
            'sample11',
            'sample12',
            'SAmple1',
            'Sample2']

        # expected values
        exp_natsort = ['SAmple1', 'Sample2', 'sample1', 'sample2', 'sample11',
                       'sample12']
        exp_natsort_case_insensitive = ['sample1', 'SAmple1', 'sample2',
                                        'Sample2', 'sample11', 'sample12']

        # test natsort
        self.assertEqual(natsort(s), exp_natsort)
        # test natsort_case_insensitive
        self.assertEqual(natsort_case_insensitive(s),
                         exp_natsort_case_insensitive)

        s.reverse()
        # test natsort
        self.assertEqual(natsort(s), exp_natsort)
        # test natsort_case_insensitive
        self.assertEqual(natsort(list('cbaA321')), list('123Aabc'))

        # strings with alpha only sort correctly
        self.assertEqual(natsort_case_insensitive(list('cdBa')), list('aBcd'))

        # string of ints sort correctly
        self.assertEqual(natsort_case_insensitive(['11', '2', '1', '0']),
                         ['0', '1', '2', '11'])

        # strings of floats sort correctly
        self.assertEqual(natsort_case_insensitive(['1.11', '1.12', '1.00',
                                                  '0.009']), ['0.009', '1.00',
                                                              '1.11', '1.12'])

        # string of ints sort correctly
        self.assertEqual(natsort_case_insensitive([('11', 'A'), ('2', 'B'),
                                                  ('1', 'C'), ('0', 'D')]),
                         [('0', 'D'), ('1', 'C'),
                          ('2', 'B'), ('11', 'A')])
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    output_fp = opts.output_fp

    map_data, header, comments = parse_mapping_file(opts.input_fp)

    if opts.category not in header:
        option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category)

    # use stdout or the user supplied file path
    if output_fp:
        fd = open(output_fp, 'w')
    else:
        fd = stdout

    result = defaultdict(int)
    cat_idx = header.index(opts.category)
    for samp in map_data:
        result[samp[cat_idx]] += 1

    for cat_val in natsort(result):
        if not cat_val:
            fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val])
        else:
            fd.write("%s\t%d\n" % (cat_val, result[cat_val]))

    fd.close()
Example #4
0
def main():
    option_parser, opts, args =\
        parse_command_line_parameters(**script_info)

    output_fp = opts.output_fp

    map_data, header, comments = parse_mapping_file(opts.mapping_file)

    if opts.category not in header:
        option_parser.error("%s doesn't appear to exist in the mapping file!" %
                            opts.category)

    # use stdout or the user supplied file path
    if output_fp:
        fd = open(output_fp, 'w')
    else:
        fd = stdout

    result = defaultdict(int)
    cat_idx = header.index(opts.category)
    for samp in map_data:
        result[samp[cat_idx]] += 1

    for cat_val in natsort(result):
        if not cat_val:
            fd.write("***UNSPECIFIED***\t%d\n" % result[cat_val])
        else:
            fd.write("%s\t%d\n" % (cat_val, result[cat_val]))

    fd.close()
Example #5
0
def combine_sample_dicts(sample_dicts):
    """ combines a list of sample_dicts into one otu table

    sample dicts is a list of dicts, each one {otu_id:num_seqs}

    output is a tuple:
    (otu_mtx (rows are otus), otu_ids (list))
    * otu_mtx has samples in order of dicts, otus sorted with natsort
    / human sort
    * otu_mtx will have all otus mentioned as keys in sample_dicts, even if
    they are abundance 0  ({otu_id:0,...})
    such otus will simply be rows of zeros
    """
    all_otu_ids = []
    for s in sample_dicts:
        all_otu_ids.extend(s.keys())

    all_otu_ids = list(set(all_otu_ids))
    all_otu_ids = natsort(all_otu_ids)

    # get index once now, for all samples, instead of all_otu_ids.index()
    indices = {}
    for i in range(len(all_otu_ids)):
        indices[all_otu_ids[i]] = i

    otu_mtx = zeros((len(all_otu_ids), len(sample_dicts)), int)
    # otus (rows) by samples (cols)
    for i, sample_dict in enumerate(sample_dicts):
        for otu, abund in sample_dict.items():
            otu_mtx[indices[otu], i] = abund

    return otu_mtx, all_otu_ids
Example #6
0
def combine_sample_dicts(sample_dicts):
    """ combines a list of sample_dicts into one otu table

    sample dicts is a list of dicts, each one {otu_id:num_seqs}

    output is a tuple:
    (otu_mtx (rows are otus), otu_ids (list))
    * otu_mtx has samples in order of dicts, otus sorted with natsort 
    / human sort
    * otu_mtx will have all otus mentioned as keys in sample_dicts, even if
    they are abundance 0  ({otu_id:0,...})
    such otus will simply be rows of zeros
    """
    all_otu_ids = []
    for s in sample_dicts:
        all_otu_ids.extend(s.keys())

    all_otu_ids = list(set(all_otu_ids))
    all_otu_ids = natsort(all_otu_ids)

    # get index once now, for all samples, instead of all_otu_ids.index()
    indices = {}
    for i in range(len(all_otu_ids)):
        indices[all_otu_ids[i]] = i

    otu_mtx = numpy.zeros((len(all_otu_ids), len(sample_dicts)), int)
    # otus (rows) by samples (cols)
    for i, sample_dict in enumerate(sample_dicts):
        for otu, abund in sample_dict.items():
            otu_mtx[indices[otu], i] = abund

    return otu_mtx, all_otu_ids
Example #7
0
def parse_smp_unifrac_distances(lines):
    """ """
    header = lines[0]
    data = lines[1::]

    unique_personal_ids = natsort(list(set([line.split('\t')[0] for line in data])))
    unique_time_points = list(set([line.split('\t')[1] for line in data]))

    # sort the time-points
    unique_time_points = sorted(unique_time_points, key=lambda x:float(x))

    _matrix = [line.split('\t') for line in data]

    output_data = zeros([len(unique_personal_ids), len(unique_time_points)])
    for index, person_id in enumerate(unique_personal_ids):
        sub_matrix = [row for row in _matrix if row[0] == person_id]

        # sort by time point submatrix
        sub_matrix = sorted(sub_matrix, key=lambda x: float(x[1]))

        for element in sub_matrix:
            per_value_index = unique_time_points.index(element[1])
            output_data[index][per_value_index] = float(element[2])

    return unique_personal_ids, unique_time_points, output_data
Example #8
0
    def test_natsort(self):
        """natsort should perform numeric comparisons on strings"""
        # string with alpha and numerics sort correctly
        s = 'sample1 sample2 sample11 sample12'.split()
        self.assertEqual(natsort(s), 
          'sample1 sample2 sample11 sample12'.split())
        s.reverse()
        self.assertEqual(natsort(s), 
          'sample1 sample2 sample11 sample12'.split())
        self.assertEqual(natsort(list('cba321')),list('123abc'))

        # strings with alpha only sort correctly
        self.assertEqual(natsort(list('cdba')),list('abcd'))

        # string of ints sort correctly
        self.assertEqual(natsort(['11','2','1','0']),
                               ['0','1','2','11'])

        # strings of floats sort correctly
        self.assertEqual(natsort(['1.11','1.12','1.00','0.009']),
                               ['0.009','1.00','1.11','1.12'])

        # string of ints sort correctly
        self.assertEqual(natsort([('11','A'),('2','B'),('1','C'),('0','D')]),
                            [('0','D'),('1','C'),('2','B'),('11','A')])
Example #9
0
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    coordinates_fp = opts.coordinates_fp
    mapping_file_fp = opts.mapping_file_fp
    category_header_name = opts.category
    output_fp = opts.output_fp

    coords_headers, coords_data, coords_eigenvalues, coords_percents = parse_coords(
        open(coordinates_fp, 'U'))
    mapping_data, mapping_headers, _ = parse_mapping_file(
        open(mapping_file_fp, 'U'))

    category_header_index = mapping_headers.index(category_header_name)
    category_names = list(
        set([line[category_header_index] for line in mapping_data]))

    xtitle = 'PC1 (%.0f%%)' % round(coords_percents[0])
    ytitle = 'PC2 (%.0f%%)' % round(coords_percents[1])
    main_figure = plt.figure()
    main_axes = main_figure.add_subplot(1, 1, 1, axisbg='white')
    plt.xlabel(xtitle)
    plt.ylabel(ytitle)
    main_axes.tick_params(axis='y')
    main_axes.tick_params(axis='x')

    # sort the data!!! that way you can match make_3d_plots.py
    for index, category in enumerate(natsort(category_names)):
        sample_ids_list = [
            line[0] for line in mapping_data
            if line[category_header_index] == category
        ]

        qiime_color = get_qiime_hex_string_color(index)

        if len(sample_ids_list) < 3:
            continue

        indices = [
            coords_headers.index(sample_id) for sample_id in sample_ids_list
        ]
        points = coords_data[indices, :2]  # * coords_percents[:2]

        hull = ConvexHull(points)
        main_axes.plot(points[:, 0], points[:, 1], 'o', color=qiime_color)
        for simplex in hull.simplices:
            main_axes.plot(points[simplex, 0], points[simplex, 1], 'k-')
        main_axes.plot(points[hull.vertices, 0],
                       points[hull.vertices, 1],
                       '--',
                       lw=2,
                       color=qiime_color)
        # plt.plot(points[hull.vertices[0],0], points[hull.vertices[0],1], '--', color=qiime_color)
    #plt.show()

    main_figure.savefig(output_fp)
Example #10
0
def color_groups(groups, colors, data_color_order):
    """Colors a set of groups in data_color_order, handling special colors.
    
    Modifies colors in-place.

    Cycles through data colors (i.e. wraps around when last color is reached).
    """
    group_num=-1
    for g in natsort(groups):
        if g not in colors:
            group_num+=1
            if group_num==len(data_color_order):
                group_num=0
            colors[g] = data_color_order[group_num]
Example #11
0
def main():
    option_parser, opts, args =\
       parse_command_line_parameters(**script_info)

    map_data, header, comments = parse_mapping_file(opts.input_fp)
    
    if opts.category not in header:
        option_parser.error("%s doesn't appear to exist in the mapping file!" % opts.category)

    result = defaultdict(int)
    cat_idx = header.index(opts.category)
    for samp in map_data:
        result[samp[cat_idx]] += 1

    for cat_val in natsort(result):
        if not cat_val:
            print "***UNSPECIFIED***\t%d" % result[cat_val]
        else:
            print "%s\t%d" % (cat_val, result[cat_val])
Example #12
0
def iter_color_groups(mapping, prefs):
    """Iterates over color groups for each category given mapping file/prefs.

    See get_group_colors for details of algorithm.
    """
    # Iterate through prefs and color by given mapping labels
    for key in natsort(prefs.keys()):
        col_name = prefs[key]["column"]
        if "colors" in prefs[key]:
            if isinstance(prefs[key]["colors"], dict):
                colors = prefs[key]["colors"].copy()  # copy so we can mutate
            else:
                colors = prefs[key]["colors"][:]
        else:
            colors = {}
        labelname = prefs[key]["column"]

        # Define groups and associate appropriate colors to each group
        groups = group_by_field(mapping, col_name)
        colors, data_colors, data_color_order = get_group_colors(groups, colors)

        yield labelname, groups, colors, data_colors, data_color_order
Example #13
0
def iter_color_groups(mapping, prefs):
    """Iterates over color groups for each category given mapping file/prefs.

    See get_group_colors for details of algorithm.
    """
    #Iterate through prefs and color by given mapping labels
    for key in natsort(prefs.keys()):
        col_name = prefs[key]['column']
        if 'colors' in prefs[key]:
            if isinstance(prefs[key]['colors'], dict):
                colors = prefs[key]['colors'].copy()    #copy so we can mutate
            else:
                colors = prefs[key]['colors'][:]
        else:
            colors={}
        labelname=prefs[key]['column']
        
        #Define groups and associate appropriate colors to each group
        groups = group_by_field(mapping, col_name)
        colors, data_colors, data_color_order = \
            get_group_colors(groups, colors)

        yield labelname, groups, colors, data_colors, data_color_order
Example #14
0
def make_html(rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, \
                imagetype,mapping_lookup, output_type="file_creation", all_plots=None):
    rarefaction_legend_mat
    legend_td = [
        '<b>Legend</b><div STYLE="border: thin black solid; height: 300px; width: 200px; font-size: 12px; overflow: auto;"><table>'
    ]
    summarized_table = []
    metric_select_html = []
    category_select_html = []
    data_table_html = []
    metrics = []
    category_colors = {}
    cat_iter = 0
    #iterate the legend dictionary
    for m in natsort(rarefaction_legend_mat):

        #Create the metric select box options
        metric_select_html.append('<option value="%s">%s</option>' % (m, m))
        metrics.append(m)

        #iterate through the categories in the legend dictionary
        for category in natsort(rarefaction_legend_mat[m]['groups']):

            #Create the select box options
            if cat_iter == 0:
                cat_links = []
                for i in rarefaction_legend_mat[m]['groups'][category]:
                    cat_links.append(mapping_lookup[category + '-' + i])
                category_select_html.append('<option value="%s">%s</option>' % \
                            (category+'$#!'+'$#!'.join(cat_links),category))

            plot_iterator = 0
            #iterate through the groups in the legend dictionary and create
            #the html formatted rows for each category and group
            for group in natsort(
                    rarefaction_legend_mat[m]['groups'][category]):
                sample_list = []
                category_colors[group]=\
                       rarefaction_legend_mat[m]['groups'][category][group]['groupcolor']

                for sample in natsort(rarefaction_legend_mat[m]['groups']
                                      [category][group]['groupsamples']):
                    sample_list.append('\'' + sample + '\'')

                plot_iterator = plot_iterator + 1

                legend_td.append(
                    '<tr id="%s" name="%s" style="display: none;"><td class="data" onmouseover="document.body.style.cursor=\'pointer\'"  onmouseout="document.body.style.cursor=\'default\'" onclick="toggle(%s)" id="%s" name="%s">&#x25B6;</td><td><input name="%s" type="checkbox" checked="True" onclick="show_hide_category(this)"></td><td style="color:%s">&#x25A0;&nbsp;</td><td class="data"><b>%s</b></td></tr>'
                    % (m + category, m + category,
                       "'" + m + mapping_lookup[category + '-' + group] + "'",
                       m + mapping_lookup[category + '-' + group],
                       ','.join(sample_list),
                       m + mapping_lookup[category + '-' + group] + '_raw.' +
                       imagetype, rarefaction_legend_mat[m]['groups'][category]
                       [group]['groupcolor'], group))

                for sample in natsort(rarefaction_legend_mat[m]['groups']
                                      [category][group]['groupsamples']):
                    sample = str(sample)
                    legend_td.append(
                        '<tr id="%s" name="%s" style="display: none;"><td class="data" align="right">&#x221F;</td><td></td><td style="color:%s">&#x25C6;</td><td class="data" align="left"><b>%s</b></td></tr>'
                        %
                        (m + mapping_lookup[category + '-' + group] + '_raw',
                         m + mapping_lookup[category + '-' + group],
                         rarefaction_legend_mat[m]['samples'][sample]['color'],
                         sample))

        cat_iter = 1

    #iterate through the data dictionary and format the rows for the html
    #data table
    for category in rarefaction_data_mat:
        data_table_html.append(
            '<tr name="%s" style="display: none;"><td class="headers">%s</td><td class="headers">Seqs/Sample</td>'
            % (category, category))
        for j in metrics:
            data_table_html.append(
                '<td class="headers">%s Ave.</td><td class="headers">%s Err.</td>'
                % (j, j))
        data_table_html.append('</tr>')
        #data_table_html.append('<tr name="%s" style="display: none;"></tr>' % (category))
        for g in natsort(rarefaction_data_mat[category]):
            for i in range(len(xaxisvals)):
                data_table_html.append(
                    '<tr name="%s" style="display: none;">' % (category))
                data_table_html.append(
                    '<td class="data" bgcolor="%s">%s</td><td class="data">%s</td>'
                    % (category_colors[g], g, xaxisvals[i]))
                for m in metrics:  #bugfix, was rarefaction_data_mat[category][g]
                    data_table_html.append(
                        '<td class="data">%s</td><td class="data">%s</td>' %
                        (rarefaction_data_mat[category][g][m]['ave'][i],
                         rarefaction_data_mat[category][g][m]['err'][i]))
        data_table_html.append('</tr>')

    legend_td.append('</table></div></div>')
    #Create the table that contains the plots and table
    plot_html = '%s' % ('\n'.join(legend_td))

    if output_type == "file_creation":
        #insert the formatted rows into the html string at the bottom of this file
        html_output=HTML % ('',
                            "img.setAttribute('src',\"./html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype)",
                            "img.setAttribute('src',\"./html_plots/\"+metric+array[i]+'_ave'+imagetype)",
                            "img.setAttribute('src',\"./html_plots/\"+arguments[0]+'_raw'+imagetype)",
                            '.'+imagetype,
                            '\n'.join(metric_select_html), \
                            '\n'.join(category_select_html), \
                            plot_html, \
                            '\n'.join(data_table_html))
    elif output_type == "memory":
        plots_html = ['all_plots = {}']
        for elements in all_plots:
            for k, v in elements.items():
                # the path is compatible with the javascript, see make_averages
                plots_html.append('all_plots["%s"] = "%s"' % (k, \
                   "data:image/png;base64," + urllib.quote(base64.b64encode(v.buf))))

        #insert the formatted rows into the html string at the bottom of this file
        html_output=HTML % ('\n'.join(plots_html),
                            "img.setAttribute('src',all_plots[\"plot/html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype])",
                            "img.setAttribute('src',all_plots[\"plot/html_plots/\"+metric+array[i]+'_ave'+imagetype])",
                            "img.setAttribute('src',all_plots[\"plot/html_plots/\"+arguments[0]+'_raw'+imagetype])",
                            '.'+imagetype,
                            '\n'.join(metric_select_html), \
                            '\n'.join(category_select_html), \
                            plot_html, \
                            '\n'.join(data_table_html))
    return html_output
def main():
    option_parser, opts, args = parse_command_line_parameters(**script_info)

    chuck_norris_joke = opts.chuck_norris_joke
    coordinates_fp = opts.coordinates_fp
    mapping_file_fp = opts.mapping_file_fp
    category_header_name = opts.category
    output_fp = opts.output_fp

    # have a swell day Yoshiki from the future 
    if chuck_norris_joke:
        o, e, _ = qiime_system_call('curl http://api.icndb.com/jokes/random')

        exec 'joke = %s' % o.strip()
        print joke['value']['joke']
        exit(0)

    coords_headers, coords_data, coords_eigenvalues, coords_percents =\
        parse_coords(open(coordinates_fp, 'U'))
    mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_file_fp, 'U'))

    category_header_index = mapping_headers.index(category_header_name)
    category_names = list(set([line[category_header_index]
        for line in mapping_data]))


    main_figure = plt.figure()
    main_axes = main_figure.add_subplot(1, 1, 1, axisbg='black')
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    main_axes.tick_params(axis='y', colors='none')
    main_axes.tick_params(axis='x', colors='none')
 

    # sort the data!!! that way you can match make_3d_plots.py
    sorted_categories = natsort(category_names)
    colors_used = []

    for index, category in enumerate(sorted_categories):
        sample_ids_list = [line[0] for line in mapping_data if line[category_header_index] == category]

        qiime_color = get_qiime_hex_string_color(index)

        if len(sample_ids_list) < 3:
            continue

        colors_used.append(qiime_color)

        indices = [coords_headers.index(sample_id) for sample_id in sample_ids_list]
        points = coords_data[indices, :2]# * coords_percents[:2]

        hull = ConvexHull(points)
        main_axes.plot(points[:,0], points[:,1], 'o', color=qiime_color)
        for simplex in hull.simplices:
            main_axes.plot(points[simplex,0], points[simplex,1], 'w-')
        main_axes.plot(points[hull.vertices,0], points[hull.vertices,1], '--', lw=2, color=qiime_color)
        # plt.plot(points[hull.vertices[0],0], points[hull.vertices[0],1], '--', color=qiime_color)
    # plt.show()

    main_figure.savefig(output_fp)

    name = splitext(output_fp)[0]
    extension = splitext(output_fp)[1].replace('.', '')

    make_legend(sorted_categories, colors_used, 0, 0, 'black', 'white', name,
                extension, 80)
Example #16
0
def make_plots(
    background_color,
    label_color,
    rares,
    ymax,
    xmax,
    output_dir,
    resolution,
    imagetype,
    groups,
    colors,
    data_colors,
    metric_name,
    labelname,
    rarefaction_data_mat,
    rarefaction_legend_mat,
    sample_dict,
    sample_data_colors,
    sample_colors,
    mapping_lookup,
    output_type="file_creation",
):
    """This is the main function for generating the rarefaction plots and html
        file."""
    # Get the alpha rare data
    raredata = rares

    # generate the filepath for the image file
    file_path = os.path.join(output_dir, splitext(split(raredata["headers"][0])[1])[0])

    all_plots_single = []
    # Sort and iterate through the groups
    for i in natsort(groups):

        # for k in groups[i]:
        for j in range(len(raredata["xaxis"])):
            group_field = i

            seq_per_sample_field = int(raredata["xaxis"][j])
            color_field = data_colors[colors[group_field]].toHex()

            # If a field is missing, then it means that one of the
            # samples did not contain enough sequences.
            # For this case, we will assign the value as n.a.
            try:
                average_field = raredata["series"][i][j]
                error_field = raredata["error"][i][j]
                if isnan(average_field):
                    error_field = nan
            except:
                average_field = nan
                error_field = nan

            # Add context to the data dictionary, which will be used in the html
            if rarefaction_data_mat[labelname].has_key(i):
                if rarefaction_data_mat[labelname][i].has_key(metric_name):
                    rarefaction_data_mat[labelname][i][metric_name]["ave"].append(
                        "".join("%10.3f" % ((raredata["series"][i][j])))
                    )
                    rarefaction_data_mat[labelname][i][metric_name]["err"].append(
                        "".join("%10.3f" % ((raredata["error"][i][j])))
                    )
                else:
                    rarefaction_data_mat[labelname][i][metric_name] = {}
                    rarefaction_data_mat[labelname][i][metric_name]["ave"] = []
                    rarefaction_data_mat[labelname][i][metric_name]["err"] = []
                    rarefaction_data_mat[labelname][i][metric_name]["ave"].append(
                        "".join("%10.3f" % ((raredata["series"][i][j])))
                    )
                    rarefaction_data_mat[labelname][i][metric_name]["err"].append(
                        "".join("%10.3f" % ((raredata["error"][i][j])))
                    )

            else:
                rarefaction_data_mat[labelname][i] = {}
                rarefaction_data_mat[labelname][i][metric_name] = {}
                rarefaction_data_mat[labelname][i][metric_name]["ave"] = []
                rarefaction_data_mat[labelname][i][metric_name]["err"] = []
                rarefaction_data_mat[labelname][i][metric_name]["ave"].append(
                    "".join("%10.3f" % ((raredata["series"][i][j])))
                )
                rarefaction_data_mat[labelname][i][metric_name]["err"].append(
                    "".join("%10.3f" % ((raredata["error"][i][j])))
                )

        # Create raw plots for each group in a category
        fpath = output_dir

        if output_type == "file_creation":
            rarefaction_legend_mat = save_single_rarefaction_plots(
                sample_dict,
                imagetype,
                metric_name,
                sample_data_colors,
                sample_colors,
                fpath,
                background_color,
                label_color,
                resolution,
                ymax,
                xmax,
                rarefaction_legend_mat,
                groups[i],
                labelname,
                i,
                mapping_lookup,
                output_type,
            )
        elif output_type == "memory":
            rarefaction_legend_mat, rare_plot_for_all = save_single_rarefaction_plots(
                sample_dict,
                imagetype,
                metric_name,
                sample_data_colors,
                sample_colors,
                fpath,
                background_color,
                label_color,
                resolution,
                ymax,
                xmax,
                rarefaction_legend_mat,
                groups[i],
                labelname,
                i,
                mapping_lookup,
                output_type,
            )
            all_plots_single.append(rare_plot_for_all)
    categories = [k for k in groups]

    # Create the rarefaction average plot and get updated legend information
    #
    if output_type == "file_creation":
        rarefaction_legend_mat = save_single_ave_rarefaction_plots(
            raredata["xaxis"],
            raredata["series"],
            raredata["error"],
            xmax,
            ymax,
            categories,
            labelname,
            imagetype,
            resolution,
            data_colors,
            colors,
            file_path,
            background_color,
            label_color,
            rarefaction_legend_mat,
            metric_name,
            mapping_lookup,
            output_type,
        )

        return rarefaction_data_mat, rarefaction_legend_mat
    elif output_type == "memory":
        rarefaction_legend_mat, all_plots_ave = save_single_ave_rarefaction_plots(
            raredata["xaxis"],
            raredata["series"],
            raredata["error"],
            xmax,
            ymax,
            categories,
            labelname,
            imagetype,
            resolution,
            data_colors,
            colors,
            file_path,
            background_color,
            label_color,
            rarefaction_legend_mat,
            metric_name,
            mapping_lookup,
            output_type,
        )

        return rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave
Example #17
0
def make_html(
    rarefaction_legend_mat,
    rarefaction_data_mat,
    xaxisvals,
    imagetype,
    mapping_lookup,
    output_type="file_creation",
    all_plots=None,
):
    rarefaction_legend_mat
    legend_td = [
        '<b>Legend</b><div STYLE="border: thin black solid; height: 300px; width: 200px; font-size: 12px; overflow: auto;"><table>'
    ]
    summarized_table = []
    metric_select_html = []
    category_select_html = []
    data_table_html = []
    metrics = []
    category_colors = {}
    cat_iter = 0
    # iterate the legend dictionary
    for m in natsort(rarefaction_legend_mat):

        # Create the metric select box options
        metric_select_html.append('<option value="%s">%s</option>' % (m, m))
        metrics.append(m)

        # iterate through the categories in the legend dictionary
        for category in natsort(rarefaction_legend_mat[m]["groups"]):

            # Create the select box options
            if cat_iter == 0:
                cat_links = []
                for i in rarefaction_legend_mat[m]["groups"][category]:
                    cat_links.append(mapping_lookup[category + "-" + i])
                category_select_html.append(
                    '<option value="%s">%s</option>' % (category + "$#!" + "$#!".join(cat_links), category)
                )

            plot_iterator = 0
            # iterate through the groups in the legend dictionary and create
            # the html formatted rows for each category and group
            for group in natsort(rarefaction_legend_mat[m]["groups"][category]):
                sample_list = []
                category_colors[group] = rarefaction_legend_mat[m]["groups"][category][group]["groupcolor"]

                for sample in natsort(rarefaction_legend_mat[m]["groups"][category][group]["groupsamples"]):
                    sample_list.append("'" + sample + "'")

                plot_iterator = plot_iterator + 1

                legend_td.append(
                    '<tr id="%s" name="%s" style="display: none;"><td class="data" onmouseover="document.body.style.cursor=\'pointer\'"  onmouseout="document.body.style.cursor=\'default\'" onclick="toggle(%s)" id="%s" name="%s">&#x25B6;</td><td><input name="%s" type="checkbox" checked="True" onclick="show_hide_category(this)"></td><td style="color:%s">&#x25A0;&nbsp;</td><td class="data"><b>%s</b></td></tr>'
                    % (
                        m + category,
                        m + category,
                        "'" + m + mapping_lookup[category + "-" + group] + "'",
                        m + mapping_lookup[category + "-" + group],
                        ",".join(sample_list),
                        m + mapping_lookup[category + "-" + group] + "_raw." + imagetype,
                        rarefaction_legend_mat[m]["groups"][category][group]["groupcolor"],
                        group,
                    )
                )

                for sample in natsort(rarefaction_legend_mat[m]["groups"][category][group]["groupsamples"]):
                    sample = str(sample)
                    legend_td.append(
                        '<tr id="%s" name="%s" style="display: none;"><td class="data" align="right">&#x221F;</td><td></td><td style="color:%s">&#x25C6;</td><td class="data" align="left"><b>%s</b></td></tr>'
                        % (
                            m + mapping_lookup[category + "-" + group] + "_raw",
                            m + mapping_lookup[category + "-" + group],
                            rarefaction_legend_mat[m]["samples"][sample]["color"],
                            sample,
                        )
                    )

        cat_iter = 1

    # iterate through the data dictionary and format the rows for the html
    # data table
    for category in rarefaction_data_mat:
        data_table_html.append(
            '<tr name="%s" style="display: none;"><td class="headers">%s</td><td class="headers">Seqs/Sample</td>'
            % (category, category)
        )
        for j in metrics:
            data_table_html.append('<td class="headers">%s Ave.</td><td class="headers">%s Err.</td>' % (j, j))
        data_table_html.append("</tr>")
        # data_table_html.append('<tr name="%s" style="display: none;"></tr>' % (category))
        for g in natsort(rarefaction_data_mat[category]):
            for i in range(len(xaxisvals)):
                data_table_html.append('<tr name="%s" style="display: none;">' % (category))
                data_table_html.append(
                    '<td class="data" bgcolor="%s">%s</td><td class="data">%s</td>'
                    % (category_colors[g], g, xaxisvals[i])
                )
                for m in metrics:  # bugfix, was rarefaction_data_mat[category][g]
                    data_table_html.append(
                        '<td class="data">%s</td><td class="data">%s</td>'
                        % (
                            rarefaction_data_mat[category][g][m]["ave"][i],
                            rarefaction_data_mat[category][g][m]["err"][i],
                        )
                    )
        data_table_html.append("</tr>")

    legend_td.append("</table></div></div>")
    # Create the table that contains the plots and table
    plot_html = "%s" % ("\n".join(legend_td))

    if output_type == "file_creation":
        # insert the formatted rows into the html string at the bottom of this file
        html_output = HTML % (
            "",
            "img.setAttribute('src',\"./html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype)",
            "img.setAttribute('src',\"./html_plots/\"+metric+array[i]+'_ave'+imagetype)",
            "img.setAttribute('src',\"./html_plots/\"+arguments[0]+'_raw'+imagetype)",
            "." + imagetype,
            "\n".join(metric_select_html),
            "\n".join(category_select_html),
            plot_html,
            "\n".join(data_table_html),
        )
    elif output_type == "memory":
        plots_html = ["all_plots = {}"]
        for elements in all_plots:
            for k, v in elements.items():
                # the path is compatible with the javascript, see make_averages
                plots_html.append(
                    'all_plots["%s"] = "%s"' % (k, "data:image/png;base64," + urllib.quote(base64.b64encode(v.buf)))
                )

        # insert the formatted rows into the html string at the bottom of this file
        html_output = HTML % (
            "\n".join(plots_html),
            "img.setAttribute('src',all_plots[\"plot/html_plots/\"+SelObject.value+array[i]+'_ave'+imagetype])",
            "img.setAttribute('src',all_plots[\"plot/html_plots/\"+metric+array[i]+'_ave'+imagetype])",
            "img.setAttribute('src',all_plots[\"plot/html_plots/\"+arguments[0]+'_raw'+imagetype])",
            "." + imagetype,
            "\n".join(metric_select_html),
            "\n".join(category_select_html),
            plot_html,
            "\n".join(data_table_html),
        )
    return html_output
Example #18
0
def make_averages(
    color_prefs,
    data,
    background_color,
    label_color,
    rares,
    output_dir,
    resolution,
    imagetype,
    ymax,
    suppress_webpage,
    std_type,
    output_type="file_creation",
):
    """This is the main function, which takes the rarefaction files, calls the
        functions to make plots and formatting the output html."""
    rarelines = []
    rarefaction_legend_mat = {}

    if ymax:
        user_ymax = True
    else:
        user_ymax = False

    if not suppress_webpage and output_type == "file_creation":
        # in this option the path must include the output directory
        all_output_dir = os.path.join(output_dir, "html_plots")
        ave_output_dir = os.path.join(output_dir, "average_plots")

        # Create the directories, where plots and data will be written
        create_dir(all_output_dir)

    elif output_type == "memory":
        # this is rather an artificial path to work with the javascript code
        all_output_dir = "plot/html_plots"
        ave_output_dir = "plot/average_plots"

    ave_data_file_path = os.path.join(output_dir, "average_tables")
    if output_type == "file_creation":
        create_dir(ave_output_dir)
        create_dir(ave_data_file_path, False)

    metric_num = 0
    rarefaction_legend_mat = {}
    rarefaction_data_mat = {}
    rare_num = 0

    # this is  a fix for the issue of writing field values as the filenames
    mapping_lookup = {}
    for i, column in enumerate(data["map"][0]):
        for j, row in enumerate(data["map"][1:]):
            mapping_lookup["%s-%s" % (column, row[i])] = "col_%s_row_%s" % (str(i), str(j))

    all_plots = []
    # Iterate through the rarefaction files
    for r in natsort(rares):

        raredata = rares[r]
        metric_name = r.split(".")[0]

        # convert the rarefaction data into variables
        col_headers, comments, rarefaction_fn, rarefaction_data = rares[r]

        # Here we only need to perform these steps once, since the data is
        # the same for all rarefaction files
        if rare_num == 0:

            # Remove samples from the mapping file, which contain no data after
            # rarefaction
            updated_mapping = []
            for j in data["map"]:

                # Add the mapping header
                if j[0] == "SampleID":
                    updated_mapping.append(j)

                # Determine if the sample exists in the rarefaction file
                for i in col_headers[3:]:
                    if j[0] == i:
                        updated_mapping.append(j)

            # Get the groups and colors for the updated mapping file
            groups_and_colors = iter_color_groups(updated_mapping, color_prefs)
            groups_and_colors = list(groups_and_colors)

        # parse the rarefaction data

        rare_mat_trans, seqs_per_samp, sampleIDs = get_rarefaction_data(rarefaction_data, col_headers)

        rarefaction_legend_mat[metric_name] = {}

        # Create dictionary variables and get the colors for each Sample
        sample_colors = None
        rarefaction_legend_mat[metric_name]["groups"] = {}
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            # Create a legend dictionary for html output
            rarefaction_legend_mat[metric_name]["groups"][labelname] = {}
            # If this is the first time iterating through the rarefaction data
            # create a data dictionary for html output
            if rare_num == 0:
                rarefaction_data_mat[labelname] = {}

            # If the labelname is SampleID, use the colors assigned
            if labelname == "SampleID":
                sample_colors = groups_and_colors[i][2]
                sample_data_colors = groups_and_colors[i][3]

        rare_num = 1

        # If sample colors were not assigned, create a list of sample colors
        if not sample_colors:
            samples_and_colors = iter_color_groups(
                updated_mapping,
                {"SampleID": {"column": "SampleID", "colors": (("red", (0, 100, 100)), ("blue", (240, 100, 100)))}},
            )
            samples_and_colors = list(samples_and_colors)
            sample_colors = samples_and_colors[0][2]
            sample_data_colors = samples_and_colors[0][3]

        sample_dict = {}
        # Create a dictionary containing the samples
        for i, sid in enumerate(sampleIDs):
            if sid in (i[0] for i in updated_mapping):
                sample_dict[sid] = {}
                for j, seq in enumerate(seqs_per_samp):
                    try:
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])
                    except (KeyError):
                        sample_dict[sid][seq] = []
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])

        # convert xvals to float
        xaxisvals = [float(x) for x in set(seqs_per_samp)]
        xaxisvals.sort()

        # get the rarefaction averages
        rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, sampleIDs)

        # calculate the max xval
        xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals) - 1] - xaxisvals[len(xaxisvals) - 2])

        """
        #get the overall average
        #overall_average = get_overall_averages(rare_mat_ave, sampleIDs)
        
        rarelines.append("#" + r + '\n')
          
        for s in sampleIDs:
            rarelines.append('%f'%overall_average[s] + '\n')
        """
        if not user_ymax:
            ymax = 0
            for i in range(len(groups_and_colors)):
                labelname = groups_and_colors[i][0]
                groups = groups_and_colors[i][1]
                colors = groups_and_colors[i][2]
                data_colors = groups_and_colors[i][3]
                ave_file_path = os.path.join(ave_data_file_path, metric_name)
                # save the rarefaction averages

                rare_lines = save_rarefaction_data(
                    rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type
                )

                # write out the rarefaction average data
                if output_type == "file_creation":
                    open(ave_file_path + labelname + ".txt", "w").writelines(rare_lines)

                # take the formatted rarefaction averages and format the results
                rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n"))

                # determine the ymax based on the average data
                # multiple the ymax, since the dots can end up on the border
                new_ymax = (
                    max([max(v) for v in rares_data["series"].values()])
                    + max([max(e) for e in rares_data["error"].values()])
                ) * 1.15
                if isnan(new_ymax):
                    new_ymax = (max([max(v) for v in rares_data["series"].values()])) * 1.15

                if new_ymax > ymax:
                    ymax = new_ymax

        iterator_num = 0

        # iterate through the groups
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            groups = groups_and_colors[i][1]
            colors = groups_and_colors[i][2]
            data_colors = groups_and_colors[i][3]
            data_color_order = groups_and_colors[i][4]

            # save the rarefaction averages
            rare_lines = save_rarefaction_data(
                rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type
            )

            # take the formatted rarefaction averages and format the results
            rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n"))

            if not suppress_webpage:

                if iterator_num == 0:
                    rarefaction_legend_mat[metric_name]["samples"] = {}
                    for o in sample_dict:
                        rarefaction_legend_mat[metric_name]["samples"][o] = {}
                        # Add values to the legend dictionary
                        rarefaction_legend_mat[metric_name]["samples"][o]["color"] = sample_data_colors[
                            sample_colors[o]
                        ].toHex()

                    iterator_num = 1

                # Iterate through the groups and create the legend dictionary
                for g in groups:
                    # generate the filepath for the image file
                    file_path = os.path.join(all_output_dir, metric_name + labelname + g)
                    # create a dictionary of samples and their colors
                    rarefaction_legend_mat[metric_name]["groups"][labelname][g] = {}
                    rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupsamples"] = groups[g]
                    rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupcolor"] = data_colors[
                        colors[g]
                    ].toHex()

                # Create the individual category average plots
                if output_type == "file_creation":
                    rarefaction_data_mat, rarefaction_legend_mat = make_plots(
                        background_color,
                        label_color,
                        rares_data,
                        ymax,
                        xmax,
                        all_output_dir,
                        resolution,
                        imagetype,
                        groups,
                        colors,
                        data_colors,
                        metric_name,
                        labelname,
                        rarefaction_data_mat,
                        rarefaction_legend_mat,
                        sample_dict,
                        sample_data_colors,
                        sample_colors,
                        mapping_lookup,
                        output_type,
                    )
                elif output_type == "memory":
                    rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave = make_plots(
                        background_color,
                        label_color,
                        rares_data,
                        ymax,
                        xmax,
                        all_output_dir,
                        resolution,
                        imagetype,
                        groups,
                        colors,
                        data_colors,
                        metric_name,
                        labelname,
                        rarefaction_data_mat,
                        rarefaction_legend_mat,
                        sample_dict,
                        sample_data_colors,
                        sample_colors,
                        mapping_lookup,
                        output_type,
                    )

                # generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0])

                # Create the average plots
                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(
                    rares_data["xaxis"],
                    rares_data["series"],
                    rares_data["error"],
                    xmax,
                    ymax,
                    categories,
                    labelname,
                    imagetype,
                    resolution,
                    data_colors,
                    colors,
                    file_path,
                    background_color,
                    label_color,
                    metric_name,
                    output_type,
                )

                if output_type == "memory":
                    all_plots.append(all_plots_rare)
                    all_plots.extend(all_plots_single)
                    all_plots.append(all_plots_ave)
            else:
                # generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0])

                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(
                    rares_data["xaxis"],
                    rares_data["series"],
                    rares_data["error"],
                    xmax,
                    ymax,
                    categories,
                    labelname,
                    imagetype,
                    resolution,
                    data_colors,
                    colors,
                    file_path,
                    background_color,
                    label_color,
                    metric_name,
                    output_type,
                )

    if not suppress_webpage:
        # format the html output
        html_output = make_html(
            rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, imagetype, mapping_lookup, output_type, all_plots
        )
    else:
        html_output = None

    return html_output
Example #19
0
def get_group_colors(groups, colors, data_colors=data_colors, data_color_order=data_color_order):
    """Figures out group colors for a specific series based on prefs.

    Algorithm is as follows:

    - For each name, color pair we know about:
        - Check if the name is one of the groups (exact match)
        - If it isn't, assume it's a prefix and pull out all the matching groups
        - If the color is just a string, set everything to the color with that
          name
        - Otherwise, assume that either it's a new color we're adding, or that
          it's a range for gradient coloring.
        - If it's a new color, create it and add it to added_data_colors.
        - If it's a gradient, make up all the new colors and add them to
          added_data_colors

    The current method for gradient coloring of columns (should perhaps
    replace with more general method) is to pass in any of the following:

    'colors':(('white', (0,0,100)),('red',(0,100,100)))

    makes gradient between white and red, applies to all samples
    
    'colors':{'RK':(('white',(0,0,100)),('red',(0,100,100))),
              'NF':(('white',(120,0,100)),('green',(120,100,100)))
             }
    pulls the combination samples starting with RK, colors with
    first gradient, then pulls the combination samples starting
    with NF, colors with the next gradient.

    Return values are:
    - colors: dict of {group_value:color_name}
    - data_colors: dict of {color_name:color_object}
    - data_color_order: order in which the data colors are used/written.
    """
    
    added_data_colors = {}
    if isinstance(colors, dict):
        #assume we're getting some of the colors out of a dict
        if colors.items() <> []:
            for k, v in sorted(colors.items()):
                if k not in groups: #assume is prefix
                    k_matches = [g for g in groups if g.startswith(k)]
                    if isinstance(v, str):  #just set everything to this color
                        for m in k_matches:
                            colors[m] = v
                    else:   #assume is new color or range
                        first, second = v
                        if isinstance(first, str): #new named color?
                            if first not in data_colors:
                                added_data_colors[first] = Color(first, second)
                            for m in k_matches:
                                colors[m] = first
                        else:   #new color range?
                            start_color, end_color = map(get_color,
                                                            [first,second])
                            num_colors = len(k_matches)
                            curr_data_colors = color_dict_to_objects(
                                make_color_dict(start_color,
                                start_hsv,end_color,end_hsv,num_colors))
                            curr_colors = {}
                            color_groups(k_matches, curr_colors,
                                natsort(curr_data_colors))
                            colors.update(curr_colors)
                            added_data_colors.update(curr_data_colors)
                    del colors[k]
                elif not isinstance(v, str):    #assume val is new color
                    color = get_color(v)
                    if color.Name not in data_colors:
                        added_data_colors[color.Name] = color
                    colors[k] = color.Name
            #handle any leftover groups
            color_groups(groups, colors, data_color_order)
            #add new colors
            data_colors.update(added_data_colors)
            if added_data_colors != {}:
                data_color_order.append(''.join(natsort(added_data_colors)))
        else:
            #handle case where no prefs is used
            color_groups(groups, colors, data_color_order)
    else:
        #handle the case where colors is a tuple for gradients
        start_color, end_color = map(get_color, colors)
        start_hsv=start_color.Coords
        end_hsv=end_color.Coords
        num_colors = len(groups)
        data_colors = color_dict_to_objects(
            make_color_dict(start_color, start_hsv, end_color, 
            end_hsv, num_colors))
        data_color_order = list(natsort(data_colors.keys()))
        colors = {}
        color_groups(groups, colors, data_color_order)

    return colors, data_colors, data_color_order
Example #20
0
def make_plots(background_color, label_color, rares, ymax, xmax,\
                output_dir, resolution, imagetype,groups,colors,data_colors, \
                metric_name,labelname,rarefaction_data_mat,\
                rarefaction_legend_mat,sample_dict,sample_data_colors,
                sample_colors,mapping_lookup, output_type="file_creation"):
    '''This is the main function for generating the rarefaction plots and html
        file.'''
    #Get the alpha rare data
    raredata = rares

    #generate the filepath for the image file
    file_path = os.path.join(output_dir, \
    splitext(split(raredata['headers'][0])[1])[0])

    all_plots_single = []
    #Sort and iterate through the groups
    for i in natsort(groups):

        #for k in groups[i]:
        for j in range(len(raredata['xaxis'])):
            group_field = i

            seq_per_sample_field = int(raredata['xaxis'][j])
            color_field = data_colors[colors[group_field]].toHex()

            #If a field is missing, then it means that one of the
            #samples did not contain enough sequences.
            #For this case, we will assign the value as n.a.
            try:
                average_field = raredata['series'][i][j]
                error_field = raredata['error'][i][j]
                if isnan(average_field):
                    error_field = nan
            except:
                average_field = nan
                error_field = nan

            #Add context to the data dictionary, which will be used in the html
            if rarefaction_data_mat[labelname].has_key(i):
                if rarefaction_data_mat[labelname][i].has_key(metric_name):
                    rarefaction_data_mat[labelname][i][metric_name][
                        'ave'].append(''.join('%10.3f' %
                                              ((raredata['series'][i][j]))))
                    rarefaction_data_mat[labelname][i][metric_name][
                        'err'].append(''.join('%10.3f' %
                                              ((raredata['error'][i][j]))))
                else:
                    rarefaction_data_mat[labelname][i][metric_name] = {}
                    rarefaction_data_mat[labelname][i][metric_name]['ave'] = []
                    rarefaction_data_mat[labelname][i][metric_name]['err'] = []
                    rarefaction_data_mat[labelname][i][metric_name][
                        'ave'].append(''.join('%10.3f' %
                                              ((raredata['series'][i][j]))))
                    rarefaction_data_mat[labelname][i][metric_name][
                        'err'].append(''.join('%10.3f' %
                                              ((raredata['error'][i][j]))))

            else:
                rarefaction_data_mat[labelname][i] = {}
                rarefaction_data_mat[labelname][i][metric_name] = {}
                rarefaction_data_mat[labelname][i][metric_name]['ave'] = []
                rarefaction_data_mat[labelname][i][metric_name]['err'] = []
                rarefaction_data_mat[labelname][i][metric_name]['ave'].append(
                    ''.join('%10.3f' % ((raredata['series'][i][j]))))
                rarefaction_data_mat[labelname][i][metric_name]['err'].append(
                    ''.join('%10.3f' % ((raredata['error'][i][j]))))

        #Create raw plots for each group in a category
        fpath = output_dir

        if output_type == "file_creation":
            rarefaction_legend_mat = save_single_rarefaction_plots( \
                                    sample_dict, \
                                    imagetype,metric_name, \
                                    sample_data_colors,sample_colors, \
                                    fpath,background_color, \
                                    label_color,resolution,ymax,xmax,
                                    rarefaction_legend_mat,groups[i],
                                    labelname,i,mapping_lookup, output_type)
        elif output_type == "memory":
            rarefaction_legend_mat, rare_plot_for_all = save_single_rarefaction_plots( \
                                    sample_dict, \
                                    imagetype,metric_name, \
                                    sample_data_colors,sample_colors, \
                                    fpath,background_color, \
                                    label_color,resolution,ymax,xmax,
                                    rarefaction_legend_mat,groups[i],
                                    labelname,i,mapping_lookup, output_type)
            all_plots_single.append(rare_plot_for_all)
    categories = [k for k in groups]

    #Create the rarefaction average plot and get updated legend information
    #
    if output_type == "file_creation":
        rarefaction_legend_mat = save_single_ave_rarefaction_plots(raredata['xaxis'], \
                           raredata['series'], raredata['error'], xmax, ymax, categories, \
                           labelname, imagetype, resolution, data_colors, \
                           colors, file_path, background_color, label_color, \
                           rarefaction_legend_mat, metric_name,mapping_lookup, output_type)

        return rarefaction_data_mat, rarefaction_legend_mat
    elif output_type == "memory":
        rarefaction_legend_mat, all_plots_ave = save_single_ave_rarefaction_plots(raredata['xaxis'], \
                           raredata['series'], raredata['error'], xmax, ymax, categories, \
                           labelname, imagetype, resolution, data_colors, \
                           colors, file_path, background_color, label_color, \
                           rarefaction_legend_mat, metric_name,mapping_lookup, output_type)

        return rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave
Example #21
0
def make_mage_output(groups, colors, coord_header, coords, pct_var, \
                     background_color,label_color,data_colors, \
                     taxa=None, custom_axes=None,name='', \
                     radius=None, alpha=.75, num_coords=10,scaled=False, \
                     coord_scale=1.05, edges=None, coords_low=None, \
                     coords_high=None, ellipsoid_prefs=None,
                     user_supplied_edges=False, ball_scale=1.0, \
                     arrow_colors={'line_color': 'white', 'head_color': 'red'}):
    """Convert groups, colors, coords and percent var into mage format"""
    result = []

    #Scale the coords and generate header labels
    if scaled:
        scalars = pct_var
        if custom_axes:
            # create a dummy vector of ones to avoid scaling custom axes
            custom_scalars = scalars[0] * np.ones(len(custom_axes))
            scalars = np.append(custom_scalars,scalars)
        coords = scale_pc_data_matrix(coords, scalars)
        if not coords_low is None:
            coords_low = scale_pc_data_matrix(coords_low, scalars)
        if not coords_high is None:
            coords_high = scale_pc_data_matrix(coords_high, scalars)
        header_suffix = '_scaled'
    else:
        header_suffix = '_unscaled'

    if radius is None:
        radius = float(auto_radius(coords))*float(ball_scale)
    else:
        radius = float(radius)*float(ball_scale)
        
    maxes = coords.max(0)[:num_coords]
    mins = coords.min(0)[:num_coords]
    pct_var = pct_var[:num_coords]    #scale from fraction
    
    #check that we didn't get fewer dimensions than we wanted
    if len(mins) < num_coords:
        num_coords = len(mins)
    min_maxes = flatten(zip(mins,maxes))
    
    if custom_axes:
        axis_names = ['PC%s' %(i+1) for i in xrange(num_coords - len(custom_axes))]
        axis_names = custom_axes + axis_names
    else:
        axis_names = ['PC%s' %(i+1) for i in xrange(num_coords)]

    #Write the header information
    result.append('@kinemage {%s}' % (name+header_suffix))
    result.append('@dimension '+' '.join(['{%s}'%(name) for name in axis_names]))
    result.append('@dimminmax '+ ' '.join(map(str, min_maxes)))
    result.append('@master {points}')
    result.append('@master {labels}')
    if edges:
        result.append('@master {edges}')

    if not taxa is None:
        result.append('@master {taxa_points}')
        result.append('@master {taxa_labels}')

    for name, color in sorted(data_colors.items()):
        result.append(color.toMage())

    if background_color=='white':
        result.append('@whitebackground')
        result.append('@hsvcolor {black} 0.0 0.0 0.0')
    else:
        result.append('@hsvcolor {white} 180.0 0.0 100.0')
    
    #Write the groups, colors and coords
    coord_dict = dict(zip(coord_header, coords))
    if not coords_low is None:
        coord_low_dict = dict(zip(coord_header, coords_low))
    if not coords_high is None:
        coord_high_dict = dict(zip(coord_header, coords_high))
    for group_name in natsort(groups):
        ids = groups[group_name]
        result.append('@group {%s (n=%s)} collapsible' % (group_name, len(ids)))

        color = colors[group_name]
        coord_lines = []
        for id_ in sorted(ids):
            if id_ in coord_dict:
                coord_lines.append('{%s} %s' % \
                    (id_, ' '.join(map(str, coord_dict[id_][:num_coords]))))

        # create list of balls, one for each sample
        result.append('@balllist color=%s radius=%s alpha=%s dimension=%s \
master={points} nobutton' % (color, radius, alpha, num_coords))
        result.append('\n'.join(coord_lines))
        # make ellipsoids if low and high coord bounds were received
        if (not coords_low is None) and (not coords_high is None):
            # create one trianglelist for each sample to define ellipsoids
            result += make_mage_ellipsoids(ids, coord_dict, coord_low_dict,
                                           coord_high_dict, color, ellipsoid_prefs)

        # create list of labels 
        result.append('@labellist color=%s radius=%s alpha=%s dimension=%s \
master={labels} nobutton' % (color, radius, alpha, num_coords))
        result.append('\n'.join(coord_lines))

    if not taxa is None:
        result += make_mage_taxa(taxa, num_coords, pct_var,
                                 scaled=scaled, scalars=None, radius=radius)

    #Write the axes on the bottom of the graph
    result.append('@group {axes} collapsible')
    state = 'on'
    axis_mins = mins*coord_scale
    axis_maxes = maxes*coord_scale

    if not custom_axes:
        custom_axes = []
    # draw each axis
    for i in xrange(num_coords):
        if i == 3:
            state = 'off'            
        result.append('@vectorlist {%s line} dimension=%s %s' % \
            (axis_names[i], num_coords, state))
            
        result.append(' '.join(map(str, axis_mins)) + ' ' + label_color)
        end = axis_mins.copy()
        end[i] = axis_maxes[i]
        result.append(' '.join(map(str, end)) + ' ' + label_color)
        end[i] *= coord_scale  #add scale factor to offset labels a little
            
        # custom axes come first, no "percent variance" shown
        if i < len(custom_axes):
            result.append('@labellist {%s} dimension=%s %s' % \
                              (axis_names[i], num_coords, state)) 
            result.append( ('{%s}' % (axis_names[i]))  + \
                               ' '.join(map(str, end)) + ' ' + label_color)
        # if all custom axes have been drawn, draw normal PC axes
        else:
            pct = pct_var[i-len(custom_axes)]
            result.append('@labellist {%s (%0.2g%%)} dimension=%s %s' % \
                              (axis_names[i], pct, num_coords, state))
            result.append( ('{%s (%0.2g%%)}' % (axis_names[i], pct))  + \
                               ' '.join(map(str, end)) + ' ' + label_color)

    #Write edges if requested
    if edges:
        result += make_edges_output(coord_dict, edges, num_coords, label_color,
                                    arrow_colors=arrow_colors,
                                    user_supplied_edges=user_supplied_edges)
    return result
Example #22
0
def make_averages(color_prefs, data, background_color, label_color, rares, \
                    output_dir,resolution,imagetype,ymax,suppress_webpage,
                    std_type, output_type="file_creation"):
    '''This is the main function, which takes the rarefaction files, calls the
        functions to make plots and formatting the output html.'''
    rarelines = []
    rarefaction_legend_mat = {}

    if ymax:
        user_ymax = True
    else:
        user_ymax = False

    if not suppress_webpage and output_type == "file_creation":
        # in this option the path must include the output directory
        all_output_dir = os.path.join(output_dir, 'html_plots')
        ave_output_dir = os.path.join(output_dir, 'average_plots')

        #Create the directories, where plots and data will be written
        create_dir(all_output_dir)

    elif output_type == 'memory':
        # this is rather an artificial path to work with the javascript code
        all_output_dir = 'plot/html_plots'
        ave_output_dir = 'plot/average_plots'

    ave_data_file_path = os.path.join(output_dir, 'average_tables')
    if output_type == "file_creation":
        create_dir(ave_output_dir)
        create_dir(ave_data_file_path, False)

    metric_num = 0
    rarefaction_legend_mat = {}
    rarefaction_data_mat = {}
    rare_num = 0

    # this is  a fix for the issue of writing field values as the filenames
    mapping_lookup = {}
    for i, column in enumerate(data['map'][0]):
        for j, row in enumerate(data['map'][1:]):
            mapping_lookup['%s-%s' % (column,row[i])]='col_%s_row_%s' % \
                                                        (str(i),str(j))

    all_plots = []
    #Iterate through the rarefaction files
    for r in natsort(rares):

        raredata = rares[r]
        metric_name = r.split('.')[0]

        #convert the rarefaction data into variables
        col_headers, comments, rarefaction_fn, rarefaction_data = rares[r]

        #Here we only need to perform these steps once, since the data is
        #the same for all rarefaction files
        if rare_num == 0:

            #Remove samples from the mapping file, which contain no data after
            #rarefaction
            updated_mapping = []
            for j in data['map']:

                #Add the mapping header
                if j[0] == 'SampleID':
                    updated_mapping.append(j)

                #Determine if the sample exists in the rarefaction file
                for i in col_headers[3:]:
                    if j[0] == i:
                        updated_mapping.append(j)

            #Get the groups and colors for the updated mapping file
            groups_and_colors = iter_color_groups(updated_mapping, color_prefs)
            groups_and_colors = list(groups_and_colors)

        #parse the rarefaction data

        rare_mat_trans, seqs_per_samp, sampleIDs = \
        get_rarefaction_data(rarefaction_data, col_headers)

        rarefaction_legend_mat[metric_name] = {}

        #Create dictionary variables and get the colors for each Sample
        sample_colors = None
        rarefaction_legend_mat[metric_name]['groups'] = {}
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            #Create a legend dictionary for html output
            rarefaction_legend_mat[metric_name]['groups'][labelname] = {}
            #If this is the first time iterating through the rarefaction data
            #create a data dictionary for html output
            if rare_num == 0:
                rarefaction_data_mat[labelname] = {}

            #If the labelname is SampleID, use the colors assigned
            if labelname == 'SampleID':
                sample_colors = groups_and_colors[i][2]
                sample_data_colors = groups_and_colors[i][3]

        rare_num = 1

        #If sample colors were not assigned, create a list of sample colors
        if not sample_colors:
            samples_and_colors=iter_color_groups(updated_mapping, \
                {'SampleID': {'column': 'SampleID', 'colors': \
                (('red', (0, 100, 100)), ('blue', (240, 100, 100)))}})
            samples_and_colors = list(samples_and_colors)
            sample_colors = samples_and_colors[0][2]
            sample_data_colors = samples_and_colors[0][3]

        sample_dict = {}
        #Create a dictionary containing the samples
        for i, sid in enumerate(sampleIDs):
            if sid in (i[0] for i in updated_mapping):
                sample_dict[sid] = {}
                for j, seq in enumerate(seqs_per_samp):
                    try:
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])
                    except (KeyError):
                        sample_dict[sid][seq] = []
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])

        #convert xvals to float
        xaxisvals = [float(x) for x in set(seqs_per_samp)]
        xaxisvals.sort()

        #get the rarefaction averages
        rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, \
        sampleIDs)

        #calculate the max xval
        xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals)-1] - \
        xaxisvals[len(xaxisvals)-2])
        '''
        #get the overall average
        #overall_average = get_overall_averages(rare_mat_ave, sampleIDs)
        
        rarelines.append("#" + r + '\n')
          
        for s in sampleIDs:
            rarelines.append('%f'%overall_average[s] + '\n')
        '''
        if not user_ymax:
            ymax = 0
            for i in range(len(groups_and_colors)):
                labelname = groups_and_colors[i][0]
                groups = groups_and_colors[i][1]
                colors = groups_and_colors[i][2]
                data_colors = groups_and_colors[i][3]
                ave_file_path = os.path.join(ave_data_file_path, metric_name)
                #save the rarefaction averages

                rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax,\
                                    labelname, colors, r, data_colors, groups,
                                    std_type)

                #write out the rarefaction average data
                if output_type == "file_creation":
                    open(ave_file_path + labelname + '.txt',
                         'w').writelines(rare_lines)

                #take the formatted rarefaction averages and format the results
                rares_data = parse_rarefaction_data( \
                                            ''.join(rare_lines[:]).split('\n'))

                #determine the ymax based on the average data
                #multiple the ymax, since the dots can end up on the border
                new_ymax=(max([max(v) for v in rares_data['series'].values()])+\
                    max([max(e) for e in rares_data['error'].values()])) * 1.15
                if isnan(new_ymax):
                    new_ymax=(max([max(v) for v in \
                                rares_data['series'].values()])) * 1.15

                if new_ymax > ymax:
                    ymax = new_ymax

        iterator_num = 0

        #iterate through the groups
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            groups = groups_and_colors[i][1]
            colors = groups_and_colors[i][2]
            data_colors = groups_and_colors[i][3]
            data_color_order = groups_and_colors[i][4]

            #save the rarefaction averages
            rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax, \
                                    labelname, colors, r, data_colors, groups,
                                    std_type)

            #take the formatted rarefaction averages and format the results
            rares_data = parse_rarefaction_data( \
                                        ''.join(rare_lines[:]).split('\n'))

            if not suppress_webpage:

                if iterator_num == 0:
                    rarefaction_legend_mat[metric_name]['samples'] = {}
                    for o in sample_dict:
                        rarefaction_legend_mat[metric_name]['samples'][o] = {}
                        #Add values to the legend dictionary
                        rarefaction_legend_mat[metric_name]['samples'][o][
                            'color'] = sample_data_colors[
                                sample_colors[o]].toHex()

                    iterator_num = 1

                #Iterate through the groups and create the legend dictionary
                for g in groups:
                    #generate the filepath for the image file
                    file_path = os.path.join(all_output_dir, \
                                                metric_name+labelname+g)
                    #create a dictionary of samples and their colors
                    rarefaction_legend_mat[metric_name]['groups'][labelname][
                        g] = {}
                    rarefaction_legend_mat[metric_name]['groups'][labelname][
                        g]['groupsamples'] = groups[g]
                    rarefaction_legend_mat[metric_name]['groups'][labelname][g]['groupcolor']=\
                                            data_colors[colors[g]].toHex()

                #Create the individual category average plots
                if output_type == "file_creation":
                    rarefaction_data_mat,rarefaction_legend_mat=make_plots(\
                                   background_color, label_color, \
                                   rares_data, ymax, xmax,all_output_dir, \
                                   resolution, imagetype,groups, colors, \
                                   data_colors,metric_name,labelname, \
                                   rarefaction_data_mat,rarefaction_legend_mat,
                                   sample_dict,sample_data_colors,
                                   sample_colors,mapping_lookup,output_type)
                elif output_type == "memory":
                    rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, \
                        all_plots_ave = make_plots(\
                                    background_color, label_color, \
                                    rares_data, ymax, xmax,all_output_dir, \
                                    resolution, imagetype,groups, colors, \
                                    data_colors,metric_name,labelname, \
                                    rarefaction_data_mat,rarefaction_legend_mat,
                                    sample_dict,sample_data_colors,
                                    sample_colors,mapping_lookup,output_type)

                #generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, \
                splitext(split(rares_data['headers'][0])[1])[0])

                #Create the average plots
                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \
                                       rares_data['error'], xmax, ymax, categories, \
                                       labelname, imagetype, resolution, data_colors, \
                                       colors, file_path, background_color, label_color, \
                                       metric_name, output_type)

                if output_type == "memory":
                    all_plots.append(all_plots_rare)
                    all_plots.extend(all_plots_single)
                    all_plots.append(all_plots_ave)
            else:
                #generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, \
                splitext(split(rares_data['headers'][0])[1])[0])

                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \
                                       rares_data['error'], xmax, ymax, categories, \
                                       labelname, imagetype, resolution, data_colors, \
                                       colors, file_path, background_color, label_color, \
                                       metric_name, output_type)

    if not suppress_webpage:
        #format the html output
        html_output=make_html(rarefaction_legend_mat, \
                                rarefaction_data_mat,xaxisvals,imagetype,mapping_lookup, output_type, all_plots)
    else:
        html_output = None

    return html_output