def genes_operons_plot(reference_value, contigs_fpaths, files_feature_in_contigs, plot_fpath, title): if not can_draw_plots: return logger.info(' Drawing ' + title + ' cumulative plot...') plots = [] max_x = 0 for contigs_fpath in contigs_fpaths: # calculate values for the plot feature_in_contigs = files_feature_in_contigs[contigs_fpath] x_vals = list(range(len(feature_in_contigs) + 1)) y_vals = [0] total_full = 0 for feature_amount in feature_in_contigs: total_full += feature_amount y_vals.append(total_full) if len(x_vals) > 0: max_x = max(x_vals[-1], max_x) color, ls = get_color_and_ls(contigs_fpath) plots.append(Plot(x_vals, y_vals, color, ls)) if reference_value: plots.append(Plot([0, max_x], [reference_value, reference_value], reference_color, reference_ls)) title = 'Cumulative # complete ' + title legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths] if reference_value: legend_list += ['Reference'] create_plot(plot_fpath, title, plots, legend_list, x_label='Contig index', y_label=title)
def GC_content_plot(ref_fpath, contigs_fpaths, list_of_GC_distributions, plot_fpath): if not can_draw_plots or qconfig.no_gc: return title = 'GC content' logger.info(' Drawing ' + title + ' plot...') plots = [] all_fpaths = contigs_fpaths if ref_fpath: all_fpaths = contigs_fpaths + [ref_fpath] for i, (GC_distribution_x, GC_distribution_y) in enumerate(list_of_GC_distributions): # for log scale for id2, v in enumerate(GC_distribution_y): if v == 0: GC_distribution_y[id2] = 0.1 # add to plot if ref_fpath and (i == len(all_fpaths) - 1): color = reference_color ls = reference_ls else: color, ls = get_color_and_ls(all_fpaths[i]) plots.append(Plot(GC_distribution_x, GC_distribution_y, color, ls)) legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths] if ref_fpath: legend_list += ['Reference'] create_plot(plot_fpath, title, plots, legend_list, x_label='GC (%)', y_label='# windows', x_limit=[0, 100])
def Nx_plot(results_dir, reduce_points, contigs_fpaths, lists_of_lengths, plot_fpath, title='Nx', reference_lengths=None): if can_draw_plots: logger.info(' Drawing ' + title + ' plot...') plots = [] json_vals_x = [] # coordinates for Nx-like plots in HTML-report json_vals_y = [] for id, (contigs_fpath, lengths) in enumerate(zip(contigs_fpaths, lists_of_lengths)): if not lengths: json_vals_x.append([]) json_vals_y.append([]) continue lengths.sort(reverse=True) vals_x = [0.0] vals_y = [lengths[0]] # calculate values for the plot vals_Nx = [0.0] vals_l = [lengths[0]] lcur = 0 # if Nx-plot then we just use sum of contigs lengths, else use reference_length lsum = sum(lengths) if reference_lengths: lsum = reference_lengths[id] min_difference = 0 if reduce_points: min_difference = qconfig.min_difference for l in lengths: lcur += l x = lcur * 100.0 / lsum if can_draw_plots: vals_Nx.append(vals_Nx[-1] + 1e-10) # eps vals_l.append(l) vals_Nx.append(x) vals_l.append(l) if vals_y[-1] - l > min_difference or len(vals_x) == 1: vals_x.append(vals_x[-1] + 1e-10) # eps vals_y.append(l) vals_x.append(x) vals_y.append(l) # add to plot json_vals_x.append(vals_x) json_vals_y.append(vals_y) if can_draw_plots: vals_Nx.append(vals_Nx[-1] + 1e-10) # eps vals_l.append(0.0) vals_x.append(vals_x[-1] + 1e-10) # eps vals_y.append(0.0) color, ls = get_color_and_ls(contigs_fpath) plots.append(Plot(vals_Nx, vals_l, color, ls)) if qconfig.html_report: from quast_libs.html_saver import html_saver html_saver.save_coord(results_dir, json_vals_x, json_vals_y, 'coord' + title, contigs_fpaths) if not can_draw_plots: return legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths] create_plot(plot_fpath, title, plots, legend_list, x_label='x', y_label='Contig length', x_limit=[0, 100])
def contigs_GC_content_plot(contigs_fpath, GC_distributions, plot_fpath): if not can_draw_plots or qconfig.no_gc: return title = label_from_fpath(contigs_fpath) + ' GC content' logger.info(' Drawing ' + title + ' plot...') plots = [] color, ls = get_color_and_ls(contigs_fpath) x_vals, y_vals = GC_distributions for GC_x, GC_y in zip(x_vals, y_vals): plots.append(Bar(GC_x, GC_y, color, width=5)) legend_list = [label_from_fpath(contigs_fpath)] create_plot(plot_fpath, title, plots, legend_list, x_label='GC (%)', y_label='# contigs', x_limit=[0, 100])
def cumulative_plot(reference, contigs_fpaths, lists_of_lengths, plot_fpath, title): if not can_draw_plots: return logger.info(' Drawing cumulative plot...') plots = [] max_x = 0 for (contigs_fpath, lengths) in zip(contigs_fpaths, lists_of_lengths): y_vals = [0] for l in sorted(lengths, reverse=True): y_vals.append(y_vals[-1] + l) x_vals = list(range(0, len(y_vals))) if x_vals: max_x = max(x_vals[-1], max_x) color, ls = get_color_and_ls(contigs_fpath) plots.append(Plot(x_vals, y_vals, color, ls)) if reference: y_vals = [0] for l in sorted( fastaparser.get_chr_lengths_from_fastafile(reference).values(), reverse=True): y_vals.append(y_vals[-1] + l) x_vals = list(range(0, len(y_vals))) # extend reference curve to the max X-axis point reference_length = y_vals[-1] max_x = max(max_x, x_vals[-1]) y_vals.append(reference_length) x_vals.append(max_x) plots.append(Plot(x_vals, y_vals, reference_color, reference_ls)) legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths] if reference: legend_list += ['Reference'] create_plot(plot_fpath, title, plots, legend_list, x_label='Contig index', y_label='Cumulative length', x_limit=[0, max_x])
def add_lengths_to_report(lengths, reporting, contigs_fpath): if reporting: ## filling column "Assembly" with names of assemblies report = reporting.get(contigs_fpath) ## filling columns "Number of contigs >=110 bp", ">=200 bp", ">=500 bp" is_broken = False if qconfig.scaffolds: if contigs_fpath in qconfig.dict_of_broken_scaffolds or \ plotter_data.get_color_and_ls(contigs_fpath)[1] == plotter_data.secondary_line_style: is_broken = True min_threshold = 0 if not is_broken else qconfig.min_contig report.add_field(reporting.Fields.CONTIGS__FOR_THRESHOLDS, [sum(1 for l in lengths if l >= threshold) if threshold >= min_threshold else None for threshold in qconfig.contig_thresholds]) report.add_field(reporting.Fields.TOTALLENS__FOR_THRESHOLDS, [sum(l for l in lengths if l >= threshold) if threshold >= min_threshold else None for threshold in qconfig.contig_thresholds])
def histogram(contigs_fpaths, values, plot_fpath, title='', yaxis_title='', bottom_value=None, top_value=None): if not can_draw_plots: return if len(contigs_fpaths) < 2: # logger.info(' Skipping drawing ' + title + ' histogram... (less than 2 columns histogram makes no sense)') return logger.info(' Drawing ' + title + ' histogram...') plots = [] min_value = sorted(values)[0] max_value = sorted(values, reverse=True)[0] exponent = None if max_value == min_value: if max_value > 0: exponent = math.pow(10, math.floor(math.log(max_value, 10))) else: exponent = 1 else: exponent = math.pow(10, math.floor(math.log(max_value - min_value, 10))) if not bottom_value: bottom_value = (math.floor(min_value / exponent) - 5) * exponent if not top_value: top_value = (math.ceil(max_value / exponent) + 1) * exponent #bars' params width = 0.3 interval = width // 3 start_pos = interval // 2 for i, (contigs_fpath, val) in enumerate(zip(contigs_fpaths, values)): color, ls = get_color_and_ls(contigs_fpath) if ls == primary_line_style: hatch = '' else: hatch = 'x' plots.append(Bar(start_pos + (width + interval) * i, val, color, width=width, hatch=hatch)) legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths] create_plot(plot_fpath, title, plots, legend_list, x_label='', y_label=yaxis_title, is_histogram=True, x_limit=[0, start_pos + width * len(contigs_fpaths) + interval * (len(contigs_fpaths) - 1)], y_limit=[max(bottom_value, 0), top_value])
def draw_meta_summary_plot(html_fpath, output_dirpath, labels, ref_names, results, plot_fpath, title='', reverse=False, yaxis_title='', print_all_refs=False, logger=logger): if can_draw_plots: logger.info(' Drawing ' + title + ' metaQUAST summary plot...') plots = [] ref_num = len(ref_names) contigs_num = len(labels) max_y = 0 arr_x = [] arr_y = [] mean_values = [] arr_y_by_refs = [] for j in range(contigs_num): to_plot_x = [] to_plot_y = [] arr = list(range(1, ref_num + 1)) for i in range(ref_num): arr[i] += 0.07 * (j - (contigs_num - 1) * 0.5) to_plot_x.append(arr[i]) if results[i][j] and results[i][j] != '-': to_plot_y.append(parse_str_to_num(results[i][j])) elif print_all_refs: to_plot_y.append(0) else: to_plot_y.append(None) arr_x.append(to_plot_x) arr_y.append(to_plot_y) selected_refs = [] for i in range(ref_num): points_y = [ arr_y[j][i] for j in range(contigs_num) if i < len(arr_y[j]) ] significant_points_y = [ points_y[k] for k in range(len(points_y)) if points_y[k] is not None ] if significant_points_y or print_all_refs: arr_y_by_refs.append(points_y) mean_values.append( sum(list(filter(None, points_y))) * 1.0 / len(points_y)) selected_refs.append(ref_names[i]) json_points_x = [] json_points_y = [] if not qconfig.use_input_ref_order: sorted_values = sorted(zip(mean_values, selected_refs, arr_y_by_refs), reverse=reverse, key=lambda x: x[0]) mean_values, selected_refs, arr_y_by_refs = [ [x[i] for x in sorted_values] for i in range(3) ] for j in range(contigs_num): points_x = [arr_x[j][i] for i in range(len(arr_y_by_refs))] points_y = [arr_y_by_refs[i][j] for i in range(len(arr_y_by_refs))] max_y = max(max_y, max(points_y)) color, ls = get_color_and_ls(None, labels[j]) plots.append( Plot(points_x, points_y, color=color, ls='dotted', marker='o', markersize=7)) if not qconfig.use_input_ref_order: json_points_x.append(points_x) json_points_y.append(points_y) refs_for_html = [ r for r in selected_refs ] # for summary html, we need to sort values by average value anyway if qconfig.use_input_ref_order: sorted_values = sorted(zip(mean_values, selected_refs, arr_y_by_refs), reverse=reverse, key=lambda x: x[0]) mean_values, refs_for_html, arr_y_by_refs = [ [x[i] for x in sorted_values] for i in range(3) ] for j in range(contigs_num): points_x = [arr_x[j][i] for i in range(len(arr_y_by_refs))] points_y = [arr_y_by_refs[i][j] for i in range(len(arr_y_by_refs))] json_points_x.append(points_x) json_points_y.append(points_y) if qconfig.html_report and html_fpath: from quast_libs.html_saver import html_saver html_saver.save_meta_summary(html_fpath, output_dirpath, json_points_x, json_points_y, title.replace(' ', '_'), labels, refs_for_html) if can_draw_plots: legend_list = labels create_plot(plot_fpath, title, plots, legend_list, y_label=yaxis_title, vertical_legend=True, x_ticks=[''] + selected_refs, vertical_ticks=True, x_limit=[0, len(selected_refs) + 1], add_to_report=False, logger=logger)
def coverage_histogram(contigs_fpaths, values, plot_fpath, title='', bin_size=None, draw_bars=None, max_cov=None, low_threshold=None, high_threshold=None): if not can_draw_plots: return logger.info(' Drawing ' + title + '...') plots = [] max_y = 0 max_x = max(len(v) for v in values) x_vals = list(range(0, max_x)) bar_width = 1.0 bar_widths = [bar_width] * max_x if high_threshold and draw_bars: x_vals.append(max_x + 1) bar_widths[-1] = 2.0 x_ticks_labels = [ str(x_val * bin_size + low_threshold) for x_val in x_vals ] if low_threshold: x_vals = [x_val + 1 for x_val in x_vals] x_vals[0] = 0 bar_widths[0] = 2.0 for i, (contigs_fpath, y_vals) in enumerate(zip(contigs_fpaths, values)): max_y = max(max(y_vals), max_y) color, ls = get_color_and_ls(contigs_fpath) if draw_bars: for x_val, y_val, bar_width in zip(x_vals, y_vals, bar_widths): if bar_width == 2: plots.append( Bar(x_val, y_val, color, width=bar_width, edgecolor='#595959', hatch='x')) else: plots.append(Bar(x_val, y_val, color, width=bar_width)) plots.append(Bar(0, 0, color=color)) else: y_vals.append(y_vals[-1]) plot_x_vals = [x_val + 0.5 for x_val in x_vals] plot_x_vals[-1] += 1 plots.append( Plot(plot_x_vals, y_vals[:-1], marker='o', markersize=3, color=color, ls=ls)) x_factor = max(1, len(x_vals) // 10) x_ticks = x_vals[::x_factor] x_ticks_labels = x_ticks_labels[::x_factor] if low_threshold: x_ticks_labels.insert(0, 0) if high_threshold: if low_threshold: last_tick = (high_threshold - low_threshold ) // bin_size + 4 # first and last bars have width 2 else: last_tick = high_threshold // bin_size + 2 x_ticks = [x for x in x_ticks if x < last_tick] x_ticks_labels = x_ticks_labels[:len(x_ticks)] x_ticks.append(last_tick) x_ticks_labels.append(str(max_cov)) for i in range(len(x_ticks) - 1, 0, -1): val, prev_val = x_ticks[i], x_ticks[i - 1] while val - 1 != prev_val: val -= 1 x_ticks.insert(i, val) x_ticks_labels.insert(i, '') legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths] xlabel = 'Coverage depth (x)' ylabel = 'Total length' create_plot(plot_fpath, title, plots, legend_list, x_label=xlabel, y_label=ylabel, is_histogram=True, x_limit=[0, max(x_ticks)], y_limit=[0, max_y * 1.1], x_ticks=x_ticks_labels)
def frc_plot(results_dir, ref_fpath, contigs_fpaths, contigs_aligned_lengths, features_in_contigs_by_file, plot_fpath, title): if can_draw_plots: logger.info(' Drawing ' + title + ' FRCurve plot...') plots = [] max_y = 0 max_x = 0 ref_length = sum( fastaparser.get_chr_lengths_from_fastafile(ref_fpath).values()) json_vals_x = [] # coordinates for Nx-like plots in HTML-report json_vals_y = [] max_features = max( sum(feature_in_contigs) for feature_in_contigs in features_in_contigs_by_file.values()) + 1 #create TSV file for metaquast features outf = open(results_dir + os.sep + "metaquast_frc.tsv", 'w') outf.write( "Assembly\tContig_ID\tContig_Length\tFeature_Count\tFeature_Type\n") aligned_contigs_fpaths = [] idx = 0 legend_list = [label_from_fpath(fpath) for fpath in contigs_fpaths] max_len = 0 max_features = 0 for contigs_fpath in contigs_fpaths: x_vals = [0] y_vals = [0] cumulative_len = 0 cumulative_features = 0 aligned_lengths = contigs_aligned_lengths[contigs_fpath] feature_in_contigs = features_in_contigs_by_file[contigs_fpath] contigs_lens = fastaparser.get_chr_lengths_from_fastafile( contigs_fpath) if not aligned_lengths or not feature_in_contigs: continue aligned_contigs_fpaths.append(contigs_fpath) len_with_zero_features = 0 lengths = [] non_zero_feature_in_contigs = [] ctg_idx = 1 #create unsorted TSV for l, feature in zip(aligned_lengths, feature_in_contigs): if l > 0: outf.write("%s\t%s\t%d\t%d\t%s\n" % (legend_list[idx], ctg_idx, l, feature, title)) ctg_idx += 1 if feature == 0: len_with_zero_features += l if l > 0: lengths.append(l) non_zero_feature_in_contigs.append(feature) optimal_sorted_tuples = sorted( zip(lengths, non_zero_feature_in_contigs), reverse=True) # sort by len/features ratio sorted_lengths = [tuple[0] for tuple in optimal_sorted_tuples] sorted_features = [tuple[1] for tuple in optimal_sorted_tuples] for tuple in optimal_sorted_tuples: cumulative_len += tuple[0] cumulative_features += tuple[1] y_vals.append(cumulative_features) x_vals.append(cumulative_len) #y_vals.append(cumulative_features) #x_vals.append(cumulative_len ) json_vals_x.append(y_vals) json_vals_y.append(x_vals) max_y = max(max_y, max(y_vals)) max_x = max(max_x, max(x_vals)) idx += 1 color, ls = get_color_and_ls(contigs_fpath) plots.append(Plot(x_vals, y_vals, color, ls)) if qconfig.html_report: from quast_libs.html_saver import html_saver html_saver.save_coord(results_dir, json_vals_x, json_vals_y, 'coord' + title, aligned_contigs_fpaths) if can_draw_plots: title = 'FRCurve (' + title + ')' legend_list = [ label_from_fpath(fpath) for fpath in aligned_contigs_fpaths ] create_plot(plot_fpath, title, plots, legend_list, x_label='Cumulative length', y_label='Cumulative features', y_limit=[0, max_y], x_limit=[0, max_x])
def frc_plot(results_dir, ref_fpath, contigs_fpaths, contigs_aligned_lengths, features_in_contigs_by_file, plot_fpath, title): if can_draw_plots: logger.info(' Drawing ' + title + ' FRCurve plot...') plots = [] max_y = 0 ref_length = sum( fastaparser.get_chr_lengths_from_fastafile(ref_fpath).values()) json_vals_x = [] # coordinates for Nx-like plots in HTML-report json_vals_y = [] max_features = max( sum(feature_in_contigs) for feature_in_contigs in features_in_contigs_by_file.values()) + 1 aligned_contigs_fpaths = [] for contigs_fpath in contigs_fpaths: aligned_lengths = contigs_aligned_lengths[contigs_fpath] feature_in_contigs = features_in_contigs_by_file[contigs_fpath] if not aligned_lengths or not feature_in_contigs: continue aligned_contigs_fpaths.append(contigs_fpath) len_with_zero_features = 0 lengths = [] non_zero_feature_in_contigs = [] for l, feature in zip(aligned_lengths, feature_in_contigs): if feature == 0: len_with_zero_features += l else: lengths.append(l) non_zero_feature_in_contigs.append(feature) optimal_sorted_tuples = sorted( zip(lengths, non_zero_feature_in_contigs), key=lambda tuple: tuple[0] * 1.0 / tuple[1], reverse=True) # sort by len/features ratio sorted_lengths = [tuple[0] for tuple in optimal_sorted_tuples] sorted_features = [tuple[1] for tuple in optimal_sorted_tuples] x_vals = [] y_vals = [] for features_n in range(max_features): features_cnt = 0 cumulative_len = len_with_zero_features for l, feature in zip(sorted_lengths, sorted_features): if features_cnt + feature <= features_n: features_cnt += feature cumulative_len += l if features_cnt == features_n: break x_vals.append(features_n) y_vals.append(cumulative_len * 100.0 / ref_length) x_vals.append(features_n + 1) y_vals.append(cumulative_len * 100.0 / ref_length) json_vals_x.append(x_vals) json_vals_y.append(y_vals) max_y = max(max_y, max(y_vals)) color, ls = get_color_and_ls(contigs_fpath) plots.append(Plot(x_vals, y_vals, color, ls)) if qconfig.html_report: from quast_libs.html_saver import html_saver html_saver.save_coord(results_dir, json_vals_x, json_vals_y, 'coord' + title, aligned_contigs_fpaths) if can_draw_plots: title = 'FRCurve (' + title + ')' legend_list = [ label_from_fpath(fpath) for fpath in aligned_contigs_fpaths ] create_plot(plot_fpath, title, plots, legend_list, x_label='Feature space', y_label='Genome coverage (%)', x_limit=[0, max_features], y_limit=[0, max(100, max_y)])