Example #1
0
 def test_parse_rarefaction_data(self):
     self.data = {}
     self.data['headers'] = ['PD_whole_tree.txt', 'Antibiotics']
     self.data['error'] = {'NA': [0.099969643842700004], \
     'Y': [0.105669693476, 1.08546135424, 1.5626248357999999], \
     'N': [0.101173002974]}
     self.data['options'] = ['Y', 'NA', 'N']
     self.data['xaxis'] = [10.0, 310.0, 610.0, 910.0, 1210.0, 1510.0, \
     1810.0, 2110.0, 2410.0, 2710.0, 3010.0]
     self.data['series'] = {'NA': [0.88581050485400004], \
     'Y': [0.918845147059, 7.1758656176500004, 9.9186072941199992], \
     'N': [0.92636763785999998]}
     self.data['color'] = {'NA': '#00ff00', 'Y': '#ff0000', 'N': '#0000ff'}
     
     self.rarefaction_series_data = ['# PD_whole_tree.txt',
     '# Antibiotics',
     'xaxis: 10.0\t310.0\t610.0\t910.0\t1210.0\t1510.0\t1810.0\t2110.0\
     \t2410.0\t2710.0\t3010.0\t',
     'xmax: 3310.0',
     '>> Y',
     'color #ff0000',
     'series 0.918845147059\t7.17586561765\t9.91860729412\t',
     'error 0.105669693476\t1.08546135424\t1.5626248358\t',
     '>> NA',
     'color #00ff00',
     'series 0.885810504854\t',
     'error 0.0999696438427\t',
     '>> N',
     'color #0000ff',
     'series 0.92636763786\t',
     'error 0.101173002974'
     ]
     test = parse_rarefaction_data(self.rarefaction_series_data)
     self.assertEqual(test, self.data)
Example #2
0
    def test_parse_rarefaction_data(self):
        self.data = {}
        self.data['headers'] = ['PD_whole_tree.txt', 'Antibiotics']
        self.data['error'] = {'NA': [0.099969643842700004], \
        'Y': [0.105669693476, 1.08546135424, 1.5626248357999999], \
        'N': [0.101173002974]}
        self.data['options'] = ['Y', 'NA', 'N']
        self.data['xaxis'] = [10.0, 310.0, 610.0, 910.0, 1210.0, 1510.0, \
        1810.0, 2110.0, 2410.0, 2710.0, 3010.0]
        self.data['series'] = {'NA': [0.88581050485400004], \
        'Y': [0.918845147059, 7.1758656176500004, 9.9186072941199992], \
        'N': [0.92636763785999998]}
        self.data['color'] = {'NA': '#00ff00', 'Y': '#ff0000', 'N': '#0000ff'}

        self.rarefaction_series_data = [
            '# PD_whole_tree.txt', '# Antibiotics',
            'xaxis: 10.0\t310.0\t610.0\t910.0\t1210.0\t1510.0\t1810.0\t2110.0\
        \t2410.0\t2710.0\t3010.0\t', 'xmax: 3310.0', '>> Y', 'color #ff0000',
            'series 0.918845147059\t7.17586561765\t9.91860729412\t',
            'error 0.105669693476\t1.08546135424\t1.5626248358\t', '>> NA',
            'color #00ff00', 'series 0.885810504854\t',
            'error 0.0999696438427\t', '>> N', 'color #0000ff',
            'series 0.92636763786\t', 'error 0.101173002974'
        ]
        test = parse_rarefaction_data(self.rarefaction_series_data)
        self.assertEqual(test, self.data)
def make_averages(
    color_prefs,
    data,
    background_color,
    label_color,
    rares,
    output_dir,
    resolution,
    imagetype,
    ymax,
    suppress_webpage,
    std_type,
    output_type="file_creation",
):
    """This is the main function, which takes the rarefaction files, calls the
        functions to make plots and formatting the output html."""
    rarelines = []
    rarefaction_legend_mat = {}

    if ymax:
        user_ymax = True
    else:
        user_ymax = False

    if not suppress_webpage and output_type == "file_creation":
        # in this option the path must include the output directory
        all_output_dir = os.path.join(output_dir, "html_plots")
        ave_output_dir = os.path.join(output_dir, "average_plots")

        # Create the directories, where plots and data will be written
        create_dir(all_output_dir)

    elif output_type == "memory":
        # this is rather an artificial path to work with the javascript code
        all_output_dir = "plot/html_plots"
        ave_output_dir = "plot/average_plots"

    ave_data_file_path = os.path.join(output_dir, "average_tables")
    if output_type == "file_creation":
        create_dir(ave_output_dir)
        create_dir(ave_data_file_path, False)

    metric_num = 0
    rarefaction_legend_mat = {}
    rarefaction_data_mat = {}
    rare_num = 0

    # this is  a fix for the issue of writing field values as the filenames
    mapping_lookup = {}
    for i, column in enumerate(data["map"][0]):
        for j, row in enumerate(data["map"][1:]):
            mapping_lookup["%s-%s" % (column, row[i])] = "col_%s_row_%s" % (str(i), str(j))

    all_plots = []
    # Iterate through the rarefaction files
    for r in natsort(rares):

        raredata = rares[r]
        metric_name = r.split(".")[0]

        # convert the rarefaction data into variables
        col_headers, comments, rarefaction_fn, rarefaction_data = rares[r]

        # Here we only need to perform these steps once, since the data is
        # the same for all rarefaction files
        if rare_num == 0:

            # Remove samples from the mapping file, which contain no data after
            # rarefaction
            updated_mapping = []
            for j in data["map"]:

                # Add the mapping header
                if j[0] == "SampleID":
                    updated_mapping.append(j)

                # Determine if the sample exists in the rarefaction file
                for i in col_headers[3:]:
                    if j[0] == i:
                        updated_mapping.append(j)

            # Get the groups and colors for the updated mapping file
            groups_and_colors = iter_color_groups(updated_mapping, color_prefs)
            groups_and_colors = list(groups_and_colors)

        # parse the rarefaction data

        rare_mat_trans, seqs_per_samp, sampleIDs = get_rarefaction_data(rarefaction_data, col_headers)

        rarefaction_legend_mat[metric_name] = {}

        # Create dictionary variables and get the colors for each Sample
        sample_colors = None
        rarefaction_legend_mat[metric_name]["groups"] = {}
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            # Create a legend dictionary for html output
            rarefaction_legend_mat[metric_name]["groups"][labelname] = {}
            # If this is the first time iterating through the rarefaction data
            # create a data dictionary for html output
            if rare_num == 0:
                rarefaction_data_mat[labelname] = {}

            # If the labelname is SampleID, use the colors assigned
            if labelname == "SampleID":
                sample_colors = groups_and_colors[i][2]
                sample_data_colors = groups_and_colors[i][3]

        rare_num = 1

        # If sample colors were not assigned, create a list of sample colors
        if not sample_colors:
            samples_and_colors = iter_color_groups(
                updated_mapping,
                {"SampleID": {"column": "SampleID", "colors": (("red", (0, 100, 100)), ("blue", (240, 100, 100)))}},
            )
            samples_and_colors = list(samples_and_colors)
            sample_colors = samples_and_colors[0][2]
            sample_data_colors = samples_and_colors[0][3]

        sample_dict = {}
        # Create a dictionary containing the samples
        for i, sid in enumerate(sampleIDs):
            if sid in (i[0] for i in updated_mapping):
                sample_dict[sid] = {}
                for j, seq in enumerate(seqs_per_samp):
                    try:
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])
                    except (KeyError):
                        sample_dict[sid][seq] = []
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])

        # convert xvals to float
        xaxisvals = [float(x) for x in set(seqs_per_samp)]
        xaxisvals.sort()

        # get the rarefaction averages
        rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, sampleIDs)

        # calculate the max xval
        xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals) - 1] - xaxisvals[len(xaxisvals) - 2])

        """
        #get the overall average
        #overall_average = get_overall_averages(rare_mat_ave, sampleIDs)
        
        rarelines.append("#" + r + '\n')
          
        for s in sampleIDs:
            rarelines.append('%f'%overall_average[s] + '\n')
        """
        if not user_ymax:
            ymax = 0
            for i in range(len(groups_and_colors)):
                labelname = groups_and_colors[i][0]
                groups = groups_and_colors[i][1]
                colors = groups_and_colors[i][2]
                data_colors = groups_and_colors[i][3]
                ave_file_path = os.path.join(ave_data_file_path, metric_name)
                # save the rarefaction averages

                rare_lines = save_rarefaction_data(
                    rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type
                )

                # write out the rarefaction average data
                if output_type == "file_creation":
                    open(ave_file_path + labelname + ".txt", "w").writelines(rare_lines)

                # take the formatted rarefaction averages and format the results
                rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n"))

                # determine the ymax based on the average data
                # multiple the ymax, since the dots can end up on the border
                new_ymax = (
                    max([max(v) for v in rares_data["series"].values()])
                    + max([max(e) for e in rares_data["error"].values()])
                ) * 1.15
                if isnan(new_ymax):
                    new_ymax = (max([max(v) for v in rares_data["series"].values()])) * 1.15

                if new_ymax > ymax:
                    ymax = new_ymax

        iterator_num = 0

        # iterate through the groups
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            groups = groups_and_colors[i][1]
            colors = groups_and_colors[i][2]
            data_colors = groups_and_colors[i][3]
            data_color_order = groups_and_colors[i][4]

            # save the rarefaction averages
            rare_lines = save_rarefaction_data(
                rare_mat_ave, xaxisvals, xmax, labelname, colors, r, data_colors, groups, std_type
            )

            # take the formatted rarefaction averages and format the results
            rares_data = parse_rarefaction_data("".join(rare_lines[:]).split("\n"))

            if not suppress_webpage:

                if iterator_num == 0:
                    rarefaction_legend_mat[metric_name]["samples"] = {}
                    for o in sample_dict:
                        rarefaction_legend_mat[metric_name]["samples"][o] = {}
                        # Add values to the legend dictionary
                        rarefaction_legend_mat[metric_name]["samples"][o]["color"] = sample_data_colors[
                            sample_colors[o]
                        ].toHex()

                    iterator_num = 1

                # Iterate through the groups and create the legend dictionary
                for g in groups:
                    # generate the filepath for the image file
                    file_path = os.path.join(all_output_dir, metric_name + labelname + g)
                    # create a dictionary of samples and their colors
                    rarefaction_legend_mat[metric_name]["groups"][labelname][g] = {}
                    rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupsamples"] = groups[g]
                    rarefaction_legend_mat[metric_name]["groups"][labelname][g]["groupcolor"] = data_colors[
                        colors[g]
                    ].toHex()

                # Create the individual category average plots
                if output_type == "file_creation":
                    rarefaction_data_mat, rarefaction_legend_mat = make_plots(
                        background_color,
                        label_color,
                        rares_data,
                        ymax,
                        xmax,
                        all_output_dir,
                        resolution,
                        imagetype,
                        groups,
                        colors,
                        data_colors,
                        metric_name,
                        labelname,
                        rarefaction_data_mat,
                        rarefaction_legend_mat,
                        sample_dict,
                        sample_data_colors,
                        sample_colors,
                        mapping_lookup,
                        output_type,
                    )
                elif output_type == "memory":
                    rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, all_plots_ave = make_plots(
                        background_color,
                        label_color,
                        rares_data,
                        ymax,
                        xmax,
                        all_output_dir,
                        resolution,
                        imagetype,
                        groups,
                        colors,
                        data_colors,
                        metric_name,
                        labelname,
                        rarefaction_data_mat,
                        rarefaction_legend_mat,
                        sample_dict,
                        sample_data_colors,
                        sample_colors,
                        mapping_lookup,
                        output_type,
                    )

                # generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0])

                # Create the average plots
                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(
                    rares_data["xaxis"],
                    rares_data["series"],
                    rares_data["error"],
                    xmax,
                    ymax,
                    categories,
                    labelname,
                    imagetype,
                    resolution,
                    data_colors,
                    colors,
                    file_path,
                    background_color,
                    label_color,
                    metric_name,
                    output_type,
                )

                if output_type == "memory":
                    all_plots.append(all_plots_rare)
                    all_plots.extend(all_plots_single)
                    all_plots.append(all_plots_ave)
            else:
                # generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, splitext(split(rares_data["headers"][0])[1])[0])

                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(
                    rares_data["xaxis"],
                    rares_data["series"],
                    rares_data["error"],
                    xmax,
                    ymax,
                    categories,
                    labelname,
                    imagetype,
                    resolution,
                    data_colors,
                    colors,
                    file_path,
                    background_color,
                    label_color,
                    metric_name,
                    output_type,
                )

    if not suppress_webpage:
        # format the html output
        html_output = make_html(
            rarefaction_legend_mat, rarefaction_data_mat, xaxisvals, imagetype, mapping_lookup, output_type, all_plots
        )
    else:
        html_output = None

    return html_output
Example #4
0
def make_averages(color_prefs, data, background_color, label_color, rares, \
                    output_dir,resolution,imagetype,ymax,suppress_webpage,
                    std_type, output_type="file_creation"):
    '''This is the main function, which takes the rarefaction files, calls the
        functions to make plots and formatting the output html.'''
    rarelines = []
    rarefaction_legend_mat = {}

    if ymax:
        user_ymax = True
    else:
        user_ymax = False

    if not suppress_webpage and output_type == "file_creation":
        # in this option the path must include the output directory
        all_output_dir = os.path.join(output_dir, 'html_plots')
        ave_output_dir = os.path.join(output_dir, 'average_plots')

        #Create the directories, where plots and data will be written
        create_dir(all_output_dir)

    elif output_type == 'memory':
        # this is rather an artificial path to work with the javascript code
        all_output_dir = 'plot/html_plots'
        ave_output_dir = 'plot/average_plots'

    ave_data_file_path = os.path.join(output_dir, 'average_tables')
    if output_type == "file_creation":
        create_dir(ave_output_dir)
        create_dir(ave_data_file_path, False)

    metric_num = 0
    rarefaction_legend_mat = {}
    rarefaction_data_mat = {}
    rare_num = 0

    # this is  a fix for the issue of writing field values as the filenames
    mapping_lookup = {}
    for i, column in enumerate(data['map'][0]):
        for j, row in enumerate(data['map'][1:]):
            mapping_lookup['%s-%s' % (column,row[i])]='col_%s_row_%s' % \
                                                        (str(i),str(j))

    all_plots = []
    #Iterate through the rarefaction files
    for r in natsort(rares):

        raredata = rares[r]
        metric_name = r.split('.')[0]

        #convert the rarefaction data into variables
        col_headers, comments, rarefaction_fn, rarefaction_data = rares[r]

        #Here we only need to perform these steps once, since the data is
        #the same for all rarefaction files
        if rare_num == 0:

            #Remove samples from the mapping file, which contain no data after
            #rarefaction
            updated_mapping = []
            for j in data['map']:

                #Add the mapping header
                if j[0] == 'SampleID':
                    updated_mapping.append(j)

                #Determine if the sample exists in the rarefaction file
                for i in col_headers[3:]:
                    if j[0] == i:
                        updated_mapping.append(j)

            #Get the groups and colors for the updated mapping file
            groups_and_colors = iter_color_groups(updated_mapping, color_prefs)
            groups_and_colors = list(groups_and_colors)

        #parse the rarefaction data

        rare_mat_trans, seqs_per_samp, sampleIDs = \
        get_rarefaction_data(rarefaction_data, col_headers)

        rarefaction_legend_mat[metric_name] = {}

        #Create dictionary variables and get the colors for each Sample
        sample_colors = None
        rarefaction_legend_mat[metric_name]['groups'] = {}
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            #Create a legend dictionary for html output
            rarefaction_legend_mat[metric_name]['groups'][labelname] = {}
            #If this is the first time iterating through the rarefaction data
            #create a data dictionary for html output
            if rare_num == 0:
                rarefaction_data_mat[labelname] = {}

            #If the labelname is SampleID, use the colors assigned
            if labelname == 'SampleID':
                sample_colors = groups_and_colors[i][2]
                sample_data_colors = groups_and_colors[i][3]

        rare_num = 1

        #If sample colors were not assigned, create a list of sample colors
        if not sample_colors:
            samples_and_colors=iter_color_groups(updated_mapping, \
                {'SampleID': {'column': 'SampleID', 'colors': \
                (('red', (0, 100, 100)), ('blue', (240, 100, 100)))}})
            samples_and_colors = list(samples_and_colors)
            sample_colors = samples_and_colors[0][2]
            sample_data_colors = samples_and_colors[0][3]

        sample_dict = {}
        #Create a dictionary containing the samples
        for i, sid in enumerate(sampleIDs):
            if sid in (i[0] for i in updated_mapping):
                sample_dict[sid] = {}
                for j, seq in enumerate(seqs_per_samp):
                    try:
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])
                    except (KeyError):
                        sample_dict[sid][seq] = []
                        sample_dict[sid][seq].append(rare_mat_trans[i][j])

        #convert xvals to float
        xaxisvals = [float(x) for x in set(seqs_per_samp)]
        xaxisvals.sort()

        #get the rarefaction averages
        rare_mat_ave = ave_seqs_per_sample(rare_mat_trans, seqs_per_samp, \
        sampleIDs)

        #calculate the max xval
        xmax = max(xaxisvals) + (xaxisvals[len(xaxisvals)-1] - \
        xaxisvals[len(xaxisvals)-2])
        '''
        #get the overall average
        #overall_average = get_overall_averages(rare_mat_ave, sampleIDs)
        
        rarelines.append("#" + r + '\n')
          
        for s in sampleIDs:
            rarelines.append('%f'%overall_average[s] + '\n')
        '''
        if not user_ymax:
            ymax = 0
            for i in range(len(groups_and_colors)):
                labelname = groups_and_colors[i][0]
                groups = groups_and_colors[i][1]
                colors = groups_and_colors[i][2]
                data_colors = groups_and_colors[i][3]
                ave_file_path = os.path.join(ave_data_file_path, metric_name)
                #save the rarefaction averages

                rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax,\
                                    labelname, colors, r, data_colors, groups,
                                    std_type)

                #write out the rarefaction average data
                if output_type == "file_creation":
                    open(ave_file_path + labelname + '.txt',
                         'w').writelines(rare_lines)

                #take the formatted rarefaction averages and format the results
                rares_data = parse_rarefaction_data( \
                                            ''.join(rare_lines[:]).split('\n'))

                #determine the ymax based on the average data
                #multiple the ymax, since the dots can end up on the border
                new_ymax=(max([max(v) for v in rares_data['series'].values()])+\
                    max([max(e) for e in rares_data['error'].values()])) * 1.15
                if isnan(new_ymax):
                    new_ymax=(max([max(v) for v in \
                                rares_data['series'].values()])) * 1.15

                if new_ymax > ymax:
                    ymax = new_ymax

        iterator_num = 0

        #iterate through the groups
        for i in range(len(groups_and_colors)):
            labelname = groups_and_colors[i][0]
            groups = groups_and_colors[i][1]
            colors = groups_and_colors[i][2]
            data_colors = groups_and_colors[i][3]
            data_color_order = groups_and_colors[i][4]

            #save the rarefaction averages
            rare_lines=save_rarefaction_data(rare_mat_ave, xaxisvals, xmax, \
                                    labelname, colors, r, data_colors, groups,
                                    std_type)

            #take the formatted rarefaction averages and format the results
            rares_data = parse_rarefaction_data( \
                                        ''.join(rare_lines[:]).split('\n'))

            if not suppress_webpage:

                if iterator_num == 0:
                    rarefaction_legend_mat[metric_name]['samples'] = {}
                    for o in sample_dict:
                        rarefaction_legend_mat[metric_name]['samples'][o] = {}
                        #Add values to the legend dictionary
                        rarefaction_legend_mat[metric_name]['samples'][o][
                            'color'] = sample_data_colors[
                                sample_colors[o]].toHex()

                    iterator_num = 1

                #Iterate through the groups and create the legend dictionary
                for g in groups:
                    #generate the filepath for the image file
                    file_path = os.path.join(all_output_dir, \
                                                metric_name+labelname+g)
                    #create a dictionary of samples and their colors
                    rarefaction_legend_mat[metric_name]['groups'][labelname][
                        g] = {}
                    rarefaction_legend_mat[metric_name]['groups'][labelname][
                        g]['groupsamples'] = groups[g]
                    rarefaction_legend_mat[metric_name]['groups'][labelname][g]['groupcolor']=\
                                            data_colors[colors[g]].toHex()

                #Create the individual category average plots
                if output_type == "file_creation":
                    rarefaction_data_mat,rarefaction_legend_mat=make_plots(\
                                   background_color, label_color, \
                                   rares_data, ymax, xmax,all_output_dir, \
                                   resolution, imagetype,groups, colors, \
                                   data_colors,metric_name,labelname, \
                                   rarefaction_data_mat,rarefaction_legend_mat,
                                   sample_dict,sample_data_colors,
                                   sample_colors,mapping_lookup,output_type)
                elif output_type == "memory":
                    rarefaction_data_mat, rarefaction_legend_mat, all_plots_single, \
                        all_plots_ave = make_plots(\
                                    background_color, label_color, \
                                    rares_data, ymax, xmax,all_output_dir, \
                                    resolution, imagetype,groups, colors, \
                                    data_colors,metric_name,labelname, \
                                    rarefaction_data_mat,rarefaction_legend_mat,
                                    sample_dict,sample_data_colors,
                                    sample_colors,mapping_lookup,output_type)

                #generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, \
                splitext(split(rares_data['headers'][0])[1])[0])

                #Create the average plots
                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \
                                       rares_data['error'], xmax, ymax, categories, \
                                       labelname, imagetype, resolution, data_colors, \
                                       colors, file_path, background_color, label_color, \
                                       metric_name, output_type)

                if output_type == "memory":
                    all_plots.append(all_plots_rare)
                    all_plots.extend(all_plots_single)
                    all_plots.append(all_plots_ave)
            else:
                #generate the filepath for the image file
                file_path = os.path.join(ave_output_dir, \
                splitext(split(rares_data['headers'][0])[1])[0])

                categories = [k for k in groups]
                all_plots_rare = save_ave_rarefaction_plots(rares_data['xaxis'], rares_data['series'], \
                                       rares_data['error'], xmax, ymax, categories, \
                                       labelname, imagetype, resolution, data_colors, \
                                       colors, file_path, background_color, label_color, \
                                       metric_name, output_type)

    if not suppress_webpage:
        #format the html output
        html_output=make_html(rarefaction_legend_mat, \
                                rarefaction_data_mat,xaxisvals,imagetype,mapping_lookup, output_type, all_plots)
    else:
        html_output = None

    return html_output