def genes_operons_plot(reference_value, contigs_fpaths, files_feature_in_contigs, plot_fpath, title):
    if not can_draw_plots:
        return

    logger.info('  Drawing ' + title + ' cumulative plot...')

    plots = []
    max_x = 0

    for contigs_fpath in contigs_fpaths:
        # calculate values for the plot
        feature_in_contigs = files_feature_in_contigs[contigs_fpath]

        x_vals = list(range(len(feature_in_contigs) + 1))
        y_vals = [0]
        total_full = 0
        for feature_amount in feature_in_contigs:
            total_full += feature_amount
            y_vals.append(total_full)

        if len(x_vals) > 0:
            max_x = max(x_vals[-1], max_x)

        color, ls = get_color_and_ls(contigs_fpath)
        plots.append(Plot(x_vals, y_vals, color, ls))

    if reference_value:
        plots.append(Plot([0, max_x], [reference_value, reference_value], reference_color, reference_ls))

    title = 'Cumulative # complete ' + title
    legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths]
    if reference_value:
        legend_list += ['Reference']
    create_plot(plot_fpath, title, plots, legend_list, x_label='Contig index', y_label=title)
def GC_content_plot(ref_fpath, contigs_fpaths, list_of_GC_distributions, plot_fpath):
    if not can_draw_plots or qconfig.no_gc:
        return
    title = 'GC content'
    logger.info('  Drawing ' + title + ' plot...')

    plots = []

    all_fpaths = contigs_fpaths
    if ref_fpath:
        all_fpaths = contigs_fpaths + [ref_fpath]

    for i, (GC_distribution_x, GC_distribution_y) in enumerate(list_of_GC_distributions):
        # for log scale
        for id2, v in enumerate(GC_distribution_y):
            if v == 0:
                GC_distribution_y[id2] = 0.1

        # add to plot
        if ref_fpath and (i == len(all_fpaths) - 1):
            color = reference_color
            ls = reference_ls
        else:
            color, ls = get_color_and_ls(all_fpaths[i])

        plots.append(Plot(GC_distribution_x, GC_distribution_y, color, ls))

    legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths]
    if ref_fpath:
        legend_list += ['Reference']
    create_plot(plot_fpath, title, plots, legend_list, x_label='GC (%)', y_label='# windows', x_limit=[0, 100])
def Nx_plot(results_dir, reduce_points, contigs_fpaths, lists_of_lengths, plot_fpath, title='Nx', reference_lengths=None):
    if can_draw_plots:
        logger.info('  Drawing ' + title + ' plot...')

    plots = []
    json_vals_x = []  # coordinates for Nx-like plots in HTML-report
    json_vals_y = []

    for id, (contigs_fpath, lengths) in enumerate(zip(contigs_fpaths, lists_of_lengths)):
        if not lengths:
            json_vals_x.append([])
            json_vals_y.append([])
            continue
        lengths.sort(reverse=True)
        vals_x = [0.0]
        vals_y = [lengths[0]]
        # calculate values for the plot
        vals_Nx = [0.0]
        vals_l = [lengths[0]]
        lcur = 0
        # if Nx-plot then we just use sum of contigs lengths, else use reference_length
        lsum = sum(lengths)
        if reference_lengths:
            lsum = reference_lengths[id]
        min_difference = 0
        if reduce_points:
            min_difference = qconfig.min_difference
        for l in lengths:
            lcur += l
            x = lcur * 100.0 / lsum
            if can_draw_plots:
                vals_Nx.append(vals_Nx[-1] + 1e-10) # eps
                vals_l.append(l)
                vals_Nx.append(x)
                vals_l.append(l)
            if vals_y[-1] - l > min_difference or len(vals_x) == 1:
                vals_x.append(vals_x[-1] + 1e-10) # eps
                vals_y.append(l)
                vals_x.append(x)
                vals_y.append(l)
            # add to plot
        json_vals_x.append(vals_x)
        json_vals_y.append(vals_y)
        if can_draw_plots:
            vals_Nx.append(vals_Nx[-1] + 1e-10) # eps
            vals_l.append(0.0)
            vals_x.append(vals_x[-1] + 1e-10) # eps
            vals_y.append(0.0)
            color, ls = get_color_and_ls(contigs_fpath)
            plots.append(Plot(vals_Nx, vals_l, color, ls))

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_coord(results_dir, json_vals_x, json_vals_y, 'coord' + title, contigs_fpaths)

    if not can_draw_plots:
        return

    legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths]
    create_plot(plot_fpath, title, plots, legend_list, x_label='x', y_label='Contig length', x_limit=[0, 100])
def contigs_GC_content_plot(contigs_fpath, GC_distributions, plot_fpath):
    if not can_draw_plots or qconfig.no_gc:
        return
    title = label_from_fpath(contigs_fpath) + ' GC content'
    logger.info('  Drawing ' + title + ' plot...')

    plots = []
    color, ls = get_color_and_ls(contigs_fpath)
    x_vals, y_vals = GC_distributions

    for GC_x, GC_y in zip(x_vals, y_vals):
        plots.append(Bar(GC_x, GC_y, color, width=5))

    legend_list = [label_from_fpath(contigs_fpath)]
    create_plot(plot_fpath, title, plots, legend_list, x_label='GC (%)', y_label='# contigs', x_limit=[0, 100])
Exemple #5
0
def cumulative_plot(reference, contigs_fpaths, lists_of_lengths, plot_fpath,
                    title):
    if not can_draw_plots:
        return

    logger.info('  Drawing cumulative plot...')

    plots = []
    max_x = 0

    for (contigs_fpath, lengths) in zip(contigs_fpaths, lists_of_lengths):
        y_vals = [0]
        for l in sorted(lengths, reverse=True):
            y_vals.append(y_vals[-1] + l)
        x_vals = list(range(0, len(y_vals)))
        if x_vals:
            max_x = max(x_vals[-1], max_x)
        color, ls = get_color_and_ls(contigs_fpath)
        plots.append(Plot(x_vals, y_vals, color, ls))

    if reference:
        y_vals = [0]
        for l in sorted(
                fastaparser.get_chr_lengths_from_fastafile(reference).values(),
                reverse=True):
            y_vals.append(y_vals[-1] + l)
        x_vals = list(range(0, len(y_vals)))
        # extend reference curve to the max X-axis point
        reference_length = y_vals[-1]
        max_x = max(max_x, x_vals[-1])
        y_vals.append(reference_length)
        x_vals.append(max_x)
        plots.append(Plot(x_vals, y_vals, reference_color, reference_ls))

    legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths]
    if reference:
        legend_list += ['Reference']

    create_plot(plot_fpath,
                title,
                plots,
                legend_list,
                x_label='Contig index',
                y_label='Cumulative length',
                x_limit=[0, max_x])
Exemple #6
0
def add_lengths_to_report(lengths, reporting, contigs_fpath):
    if reporting:
        ## filling column "Assembly" with names of assemblies
        report = reporting.get(contigs_fpath)

        ## filling columns "Number of contigs >=110 bp", ">=200 bp", ">=500 bp"
        is_broken = False
        if qconfig.scaffolds:
            if contigs_fpath in qconfig.dict_of_broken_scaffolds or \
                            plotter_data.get_color_and_ls(contigs_fpath)[1] == plotter_data.secondary_line_style:
                is_broken = True
        min_threshold = 0 if not is_broken else qconfig.min_contig
        report.add_field(reporting.Fields.CONTIGS__FOR_THRESHOLDS,
                         [sum(1 for l in lengths if l >= threshold) if threshold >= min_threshold else None
                          for threshold in qconfig.contig_thresholds])
        report.add_field(reporting.Fields.TOTALLENS__FOR_THRESHOLDS,
                         [sum(l for l in lengths if l >= threshold) if threshold >= min_threshold else None
                          for threshold in qconfig.contig_thresholds])
Exemple #7
0
def add_lengths_to_report(lengths, reporting, contigs_fpath):
    if reporting:
        ## filling column "Assembly" with names of assemblies
        report = reporting.get(contigs_fpath)

        ## filling columns "Number of contigs >=110 bp", ">=200 bp", ">=500 bp"
        is_broken = False
        if qconfig.scaffolds:
            if contigs_fpath in qconfig.dict_of_broken_scaffolds or \
                            plotter_data.get_color_and_ls(contigs_fpath)[1] == plotter_data.secondary_line_style:
                is_broken = True
        min_threshold = 0 if not is_broken else qconfig.min_contig
        report.add_field(reporting.Fields.CONTIGS__FOR_THRESHOLDS,
                         [sum(1 for l in lengths if l >= threshold) if threshold >= min_threshold else None
                          for threshold in qconfig.contig_thresholds])
        report.add_field(reporting.Fields.TOTALLENS__FOR_THRESHOLDS,
                         [sum(l for l in lengths if l >= threshold) if threshold >= min_threshold else None
                          for threshold in qconfig.contig_thresholds])
def histogram(contigs_fpaths, values, plot_fpath, title='', yaxis_title='', bottom_value=None,
              top_value=None):
    if not can_draw_plots:
        return
    if len(contigs_fpaths) < 2:  #
        logger.info('  Skipping drawing ' + title + ' histogram... (less than 2 columns histogram makes no sense)')
        return

    logger.info('  Drawing ' + title + ' histogram...')

    plots = []
    min_value = sorted(values)[0]
    max_value = sorted(values, reverse=True)[0]
    exponent = None
    if max_value == min_value:
        if max_value > 0:
            exponent = math.pow(10, math.floor(math.log(max_value, 10)))
        else:
            exponent = 1
    else:
        exponent = math.pow(10, math.floor(math.log(max_value - min_value, 10)))

    if not bottom_value:
        bottom_value = (math.floor(min_value / exponent) - 5) * exponent
    if not top_value:
        top_value = (math.ceil(max_value / exponent) + 1) * exponent

    #bars' params
    width = 0.3
    interval = width // 3
    start_pos = interval // 2

    for i, (contigs_fpath, val) in enumerate(zip(contigs_fpaths, values)):
        color, ls = get_color_and_ls(contigs_fpath)
        if ls == primary_line_style:
            hatch = ''
        else:
            hatch = 'x'
        plots.append(Bar(start_pos + (width + interval) * i, val, color, width=width, hatch=hatch))

    legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths]
    create_plot(plot_fpath, title, plots, legend_list, x_label='', y_label=yaxis_title, is_histogram=True,
                x_limit=[0, start_pos + width * len(contigs_fpaths) + interval * (len(contigs_fpaths) - 1)],
                y_limit=[max(bottom_value, 0), top_value])
Exemple #9
0
def draw_meta_summary_plot(html_fpath,
                           output_dirpath,
                           labels,
                           ref_names,
                           results,
                           plot_fpath,
                           title='',
                           reverse=False,
                           yaxis_title='',
                           print_all_refs=False,
                           logger=logger):
    if can_draw_plots:
        logger.info('  Drawing ' + title + ' metaQUAST summary plot...')

    plots = []
    ref_num = len(ref_names)
    contigs_num = len(labels)
    max_y = 0

    arr_x = []
    arr_y = []
    mean_values = []
    arr_y_by_refs = []
    for j in range(contigs_num):
        to_plot_x = []
        to_plot_y = []
        arr = list(range(1, ref_num + 1))
        for i in range(ref_num):
            arr[i] += 0.07 * (j - (contigs_num - 1) * 0.5)
            to_plot_x.append(arr[i])
            if results[i][j] and results[i][j] != '-':
                to_plot_y.append(parse_str_to_num(results[i][j]))
            elif print_all_refs:
                to_plot_y.append(0)
            else:
                to_plot_y.append(None)
        arr_x.append(to_plot_x)
        arr_y.append(to_plot_y)

    selected_refs = []
    for i in range(ref_num):
        points_y = [
            arr_y[j][i] for j in range(contigs_num) if i < len(arr_y[j])
        ]
        significant_points_y = [
            points_y[k] for k in range(len(points_y))
            if points_y[k] is not None
        ]
        if significant_points_y or print_all_refs:
            arr_y_by_refs.append(points_y)
            mean_values.append(
                sum(list(filter(None, points_y))) * 1.0 / len(points_y))
            selected_refs.append(ref_names[i])

    json_points_x = []
    json_points_y = []

    if not qconfig.use_input_ref_order:
        sorted_values = sorted(zip(mean_values, selected_refs, arr_y_by_refs),
                               reverse=reverse,
                               key=lambda x: x[0])
        mean_values, selected_refs, arr_y_by_refs = [
            [x[i] for x in sorted_values] for i in range(3)
        ]

    for j in range(contigs_num):
        points_x = [arr_x[j][i] for i in range(len(arr_y_by_refs))]
        points_y = [arr_y_by_refs[i][j] for i in range(len(arr_y_by_refs))]
        max_y = max(max_y, max(points_y))
        color, ls = get_color_and_ls(None, labels[j])
        plots.append(
            Plot(points_x,
                 points_y,
                 color=color,
                 ls='dotted',
                 marker='o',
                 markersize=7))
        if not qconfig.use_input_ref_order:
            json_points_x.append(points_x)
            json_points_y.append(points_y)

    refs_for_html = [
        r for r in selected_refs
    ]  # for summary html, we need to sort values by average value anyway
    if qconfig.use_input_ref_order:
        sorted_values = sorted(zip(mean_values, selected_refs, arr_y_by_refs),
                               reverse=reverse,
                               key=lambda x: x[0])
        mean_values, refs_for_html, arr_y_by_refs = [
            [x[i] for x in sorted_values] for i in range(3)
        ]
        for j in range(contigs_num):
            points_x = [arr_x[j][i] for i in range(len(arr_y_by_refs))]
            points_y = [arr_y_by_refs[i][j] for i in range(len(arr_y_by_refs))]
            json_points_x.append(points_x)
            json_points_y.append(points_y)

    if qconfig.html_report and html_fpath:
        from quast_libs.html_saver import html_saver
        html_saver.save_meta_summary(html_fpath, output_dirpath,
                                     json_points_x, json_points_y,
                                     title.replace(' ',
                                                   '_'), labels, refs_for_html)
    if can_draw_plots:
        legend_list = labels
        create_plot(plot_fpath,
                    title,
                    plots,
                    legend_list,
                    y_label=yaxis_title,
                    vertical_legend=True,
                    x_ticks=[''] + selected_refs,
                    vertical_ticks=True,
                    x_limit=[0, len(selected_refs) + 1],
                    add_to_report=False,
                    logger=logger)
Exemple #10
0
def coverage_histogram(contigs_fpaths,
                       values,
                       plot_fpath,
                       title='',
                       bin_size=None,
                       draw_bars=None,
                       max_cov=None,
                       low_threshold=None,
                       high_threshold=None):
    if not can_draw_plots:
        return

    logger.info('  Drawing ' + title + '...')

    plots = []
    max_y = 0
    max_x = max(len(v) for v in values)
    x_vals = list(range(0, max_x))
    bar_width = 1.0
    bar_widths = [bar_width] * max_x
    if high_threshold and draw_bars:
        x_vals.append(max_x + 1)
        bar_widths[-1] = 2.0
    x_ticks_labels = [
        str(x_val * bin_size + low_threshold) for x_val in x_vals
    ]
    if low_threshold:
        x_vals = [x_val + 1 for x_val in x_vals]
        x_vals[0] = 0
        bar_widths[0] = 2.0

    for i, (contigs_fpath, y_vals) in enumerate(zip(contigs_fpaths, values)):
        max_y = max(max(y_vals), max_y)
        color, ls = get_color_and_ls(contigs_fpath)
        if draw_bars:
            for x_val, y_val, bar_width in zip(x_vals, y_vals, bar_widths):
                if bar_width == 2:
                    plots.append(
                        Bar(x_val,
                            y_val,
                            color,
                            width=bar_width,
                            edgecolor='#595959',
                            hatch='x'))
                else:
                    plots.append(Bar(x_val, y_val, color, width=bar_width))
            plots.append(Bar(0, 0, color=color))
        else:
            y_vals.append(y_vals[-1])
            plot_x_vals = [x_val + 0.5 for x_val in x_vals]
            plot_x_vals[-1] += 1
            plots.append(
                Plot(plot_x_vals,
                     y_vals[:-1],
                     marker='o',
                     markersize=3,
                     color=color,
                     ls=ls))

    x_factor = max(1, len(x_vals) // 10)
    x_ticks = x_vals[::x_factor]
    x_ticks_labels = x_ticks_labels[::x_factor]

    if low_threshold:
        x_ticks_labels.insert(0, 0)
    if high_threshold:
        if low_threshold:
            last_tick = (high_threshold - low_threshold
                         ) // bin_size + 4  # first and last bars have width 2
        else:
            last_tick = high_threshold // bin_size + 2
        x_ticks = [x for x in x_ticks if x < last_tick]
        x_ticks_labels = x_ticks_labels[:len(x_ticks)]
        x_ticks.append(last_tick)
        x_ticks_labels.append(str(max_cov))

    for i in range(len(x_ticks) - 1, 0, -1):
        val, prev_val = x_ticks[i], x_ticks[i - 1]
        while val - 1 != prev_val:
            val -= 1
            x_ticks.insert(i, val)
            x_ticks_labels.insert(i, '')
    legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths]
    xlabel = 'Coverage depth (x)'
    ylabel = 'Total length'

    create_plot(plot_fpath,
                title,
                plots,
                legend_list,
                x_label=xlabel,
                y_label=ylabel,
                is_histogram=True,
                x_limit=[0, max(x_ticks)],
                y_limit=[0, max_y * 1.1],
                x_ticks=x_ticks_labels)
Exemple #11
0
def frc_plot(results_dir, ref_fpath, contigs_fpaths, contigs_aligned_lengths,
             features_in_contigs_by_file, plot_fpath, title):
    if can_draw_plots:
        logger.info('  Drawing ' + title + ' FRCurve plot...')

    plots = []
    max_y = 0
    max_x = 0
    ref_length = sum(
        fastaparser.get_chr_lengths_from_fastafile(ref_fpath).values())
    json_vals_x = []  # coordinates for Nx-like plots in HTML-report
    json_vals_y = []
    max_features = max(
        sum(feature_in_contigs)
        for feature_in_contigs in features_in_contigs_by_file.values()) + 1
    #create TSV file for metaquast features
    outf = open(results_dir + os.sep + "metaquast_frc.tsv", 'w')
    outf.write(
        "Assembly\tContig_ID\tContig_Length\tFeature_Count\tFeature_Type\n")
    aligned_contigs_fpaths = []
    idx = 0
    legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths]
    max_len = 0
    max_features = 0
    for contigs_fpath in contigs_fpaths:
        x_vals = [0]
        y_vals = [0]
        cumulative_len = 0
        cumulative_features = 0
        aligned_lengths = contigs_aligned_lengths[contigs_fpath]
        feature_in_contigs = features_in_contigs_by_file[contigs_fpath]
        contigs_lens = fastaparser.get_chr_lengths_from_fastafile(
            contigs_fpath)

        if not aligned_lengths or not feature_in_contigs:
            continue

        aligned_contigs_fpaths.append(contigs_fpath)
        len_with_zero_features = 0
        lengths = []
        non_zero_feature_in_contigs = []
        ctg_idx = 1
        #create unsorted TSV
        for l, feature in zip(aligned_lengths, feature_in_contigs):
            if l > 0:
                outf.write("%s\t%s\t%d\t%d\t%s\n" %
                           (legend_list[idx], ctg_idx, l, feature, title))
                ctg_idx += 1
            if feature == 0:
                len_with_zero_features += l
            if l > 0:
                lengths.append(l)
                non_zero_feature_in_contigs.append(feature)

        optimal_sorted_tuples = sorted(
            zip(lengths, non_zero_feature_in_contigs),
            reverse=True)  # sort by len/features ratio
        sorted_lengths = [tuple[0] for tuple in optimal_sorted_tuples]
        sorted_features = [tuple[1] for tuple in optimal_sorted_tuples]

        for tuple in optimal_sorted_tuples:
            cumulative_len += tuple[0]
            cumulative_features += tuple[1]
            y_vals.append(cumulative_features)
            x_vals.append(cumulative_len)
            #y_vals.append(cumulative_features)
            #x_vals.append(cumulative_len )

        json_vals_x.append(y_vals)
        json_vals_y.append(x_vals)
        max_y = max(max_y, max(y_vals))
        max_x = max(max_x, max(x_vals))
        idx += 1

        color, ls = get_color_and_ls(contigs_fpath)
        plots.append(Plot(x_vals, y_vals, color, ls))

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_coord(results_dir, json_vals_x, json_vals_y,
                              'coord' + title, aligned_contigs_fpaths)

    if can_draw_plots:
        title = 'FRCurve (' + title + ')'
        legend_list = [
            label_from_fpath(fpath) for fpath in aligned_contigs_fpaths
        ]
        create_plot(plot_fpath,
                    title,
                    plots,
                    legend_list,
                    x_label='Cumulative length',
                    y_label='Cumulative features',
                    y_limit=[0, max_y],
                    x_limit=[0, max_x])
Exemple #12
0
def frc_plot(results_dir, ref_fpath, contigs_fpaths, contigs_aligned_lengths,
             features_in_contigs_by_file, plot_fpath, title):
    if can_draw_plots:
        logger.info('  Drawing ' + title + ' FRCurve plot...')

    plots = []
    max_y = 0
    ref_length = sum(
        fastaparser.get_chr_lengths_from_fastafile(ref_fpath).values())
    json_vals_x = []  # coordinates for Nx-like plots in HTML-report
    json_vals_y = []
    max_features = max(
        sum(feature_in_contigs)
        for feature_in_contigs in features_in_contigs_by_file.values()) + 1

    aligned_contigs_fpaths = []
    for contigs_fpath in contigs_fpaths:
        aligned_lengths = contigs_aligned_lengths[contigs_fpath]
        feature_in_contigs = features_in_contigs_by_file[contigs_fpath]
        if not aligned_lengths or not feature_in_contigs:
            continue

        aligned_contigs_fpaths.append(contigs_fpath)
        len_with_zero_features = 0
        lengths = []
        non_zero_feature_in_contigs = []
        for l, feature in zip(aligned_lengths, feature_in_contigs):
            if feature == 0:
                len_with_zero_features += l
            else:
                lengths.append(l)
                non_zero_feature_in_contigs.append(feature)
        optimal_sorted_tuples = sorted(
            zip(lengths, non_zero_feature_in_contigs),
            key=lambda tuple: tuple[0] * 1.0 / tuple[1],
            reverse=True)  # sort by len/features ratio
        sorted_lengths = [tuple[0] for tuple in optimal_sorted_tuples]
        sorted_features = [tuple[1] for tuple in optimal_sorted_tuples]
        x_vals = []
        y_vals = []
        for features_n in range(max_features):
            features_cnt = 0
            cumulative_len = len_with_zero_features
            for l, feature in zip(sorted_lengths, sorted_features):
                if features_cnt + feature <= features_n:
                    features_cnt += feature
                    cumulative_len += l
                    if features_cnt == features_n:
                        break

            x_vals.append(features_n)
            y_vals.append(cumulative_len * 100.0 / ref_length)
            x_vals.append(features_n + 1)
            y_vals.append(cumulative_len * 100.0 / ref_length)

        json_vals_x.append(x_vals)
        json_vals_y.append(y_vals)
        max_y = max(max_y, max(y_vals))

        color, ls = get_color_and_ls(contigs_fpath)
        plots.append(Plot(x_vals, y_vals, color, ls))

    if qconfig.html_report:
        from quast_libs.html_saver import html_saver
        html_saver.save_coord(results_dir, json_vals_x, json_vals_y,
                              'coord' + title, aligned_contigs_fpaths)

    if can_draw_plots:
        title = 'FRCurve (' + title + ')'
        legend_list = [
            label_from_fpath(fpath) for fpath in aligned_contigs_fpaths
        ]
        create_plot(plot_fpath,
                    title,
                    plots,
                    legend_list,
                    x_label='Feature space',
                    y_label='Genome coverage (%)',
                    x_limit=[0, max_features],
                    y_limit=[0, max(100, max_y)])