def _correct_contigs(contigs_fpaths, corrected_dirpath, min_contig, labels): assemblies = [] for i, contigs_fpath in enumerate(contigs_fpaths): contigs_fname = os.path.basename(contigs_fpath) fname, ctg_fasta_ext = qutils.splitext_for_fasta_file(contigs_fname) label = labels[i] corr_fpath = qutils.unique_corrected_fpath( os.path.join(corrected_dirpath, label + ctg_fasta_ext)) assembly = Assembly(corr_fpath, label) logger.info(' %s ==> %s' % (contigs_fpath, label)) # Handle fasta lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath) if not sum(l for l in lengths if l >= min_contig): logger.warning( "Skipping %s because it doesn't contain contigs >= %d bp." % (os.path.basename(contigs_fpath), min_contig)) continue # correcting if not quast.correct_fasta(contigs_fpath, corr_fpath, min_contig): continue assemblies.append(assembly) return assemblies
def _correct_contigs(contigs_fpaths, corrected_dirpath, min_contig, labels): assemblies = [] for i, contigs_fpath in enumerate(contigs_fpaths): contigs_fname = os.path.basename(contigs_fpath) fname, ctg_fasta_ext = qutils.splitext_for_fasta_file(contigs_fname) label = labels[i] corr_fpath = qutils.unique_corrected_fpath( os.path.join(corrected_dirpath, label + ctg_fasta_ext)) assembly = Assembly(corr_fpath, label) logger.info(' %s ==> %s' % (contigs_fpath, label)) # Handle fasta lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath) if not sum(l for l in lengths if l >= min_contig): logger.warning("Skipping %s because it doesn't contain contigs >= %d bp." % (os.path.basename(contigs_fpath), min_contig)) continue # correcting if not quast.correct_fasta(contigs_fpath, corr_fpath, min_contig): continue assemblies.append(assembly) return assemblies
def _handle_fasta(contigs_fpath, corr_fpath, reporting): lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath) if not sum(l for l in lengths if l >= qconfig.min_contig): logger.warning( "Skipping %s because it doesn't contain contigs >= %d bp." % (qutils.label_from_fpath(corr_fpath), qconfig.min_contig)) return False # correcting if not correct_fasta(contigs_fpath, corr_fpath, qconfig.min_contig): return False ## filling column "Assembly" with names of assemblies report = reporting.get(corr_fpath) ## filling columns "Number of contigs >=110 bp", ">=200 bp", ">=500 bp" report.add_field(reporting.Fields.CONTIGS__FOR_THRESHOLDS, [ sum(1 for l in lengths if l >= threshold) for threshold in qconfig.contig_thresholds ]) report.add_field(reporting.Fields.TOTALLENS__FOR_THRESHOLDS, [ sum(l for l in lengths if l >= threshold) for threshold in qconfig.contig_thresholds ]) return True
def fillMetrics(kind, fasta_fn): metrics = self._metrics['spades'][kind] lengths_all = get_lengths_from_fastafile(fasta_fn) lengths_large = [l for l in lengths_all if l >= 500] def i(number): return locale.format("%d", int(number), grouping=True) def cval(func): return (i(func(lengths_large)), i(func(lengths_all))) metrics['Largest ' + kind] = (i(max(lengths_all)), ) metrics['Total Length'] = cval(sum) metrics['Number of ' + kind + 's'] = cval(len) for q in [50, 75, 90, 95]: metrics['N%s' % q] = cval(lambda x: N50(x, q))
def fillMetrics(kind, fasta_fn): metrics = self._metrics['spades'][kind] lengths_all = get_lengths_from_fastafile(fasta_fn) lengths_large = [l for l in lengths_all if l >= 500] def i(number): return locale.format("%d", int(number), grouping=True) def cval(func): return (i(func(lengths_large)), i(func(lengths_all))) metrics['Largest ' + kind] = (i(max(lengths_all)), ) metrics['Total Length'] = cval(sum) metrics['Number of ' + kind + 's'] = cval(len) for q in [50, 75, 90, 95]: metrics['N%s' % q] = cval(lambda x: N50(x, q))
def _handle_fasta(contigs_fpath, corr_fpath, reporting): lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath) if not sum(l for l in lengths if l >= qconfig.min_contig): logger.warning("Skipping %s because it doesn't contain contigs >= %d bp." % (qutils.label_from_fpath(corr_fpath), qconfig.min_contig)) return False # correcting if not correct_fasta(contigs_fpath, corr_fpath, qconfig.min_contig): return False ## filling column "Assembly" with names of assemblies report = reporting.get(corr_fpath) ## filling columns "Number of contigs >=110 bp", ">=200 bp", ">=500 bp" report.add_field(reporting.Fields.CONTIGS__FOR_THRESHOLDS, [sum(1 for l in lengths if l >= threshold) for threshold in qconfig.contig_thresholds]) report.add_field(reporting.Fields.TOTALLENS__FOR_THRESHOLDS, [sum(l for l in lengths if l >= threshold) for threshold in qconfig.contig_thresholds]) return True
def cumulative_plot(reference, contigs_fpaths, lists_of_lengths, plot_fpath, title): if matplotlib_error: return logger.info(' Drawing cumulative plot...') import matplotlib.pyplot import matplotlib.ticker figure = matplotlib.pyplot.figure() matplotlib.pyplot.rc('font', **font) max_x = 0 max_y = 0 color_id = 0 for (contigs_fpath, lenghts) in itertools.izip(contigs_fpaths, lists_of_lengths): lenghts.sort(reverse=True) # calculate values for the plot vals_contig_index = [0] vals_length = [0] lcur = 0 lind = 0 for l in lenghts: lcur += l lind += 1 x = lind vals_contig_index.append(x) y = lcur vals_length.append(y) # add to plot if len(vals_contig_index) > 0: max_x = max(vals_contig_index[-1], max_x) max_y = max(max_y, vals_length[-1]) color, ls, color_id = get_color_and_ls(color_id, contigs_fpath) matplotlib.pyplot.plot(vals_contig_index, vals_length, color=color, lw=line_width, ls=ls) if reference: reference_length = sum(fastaparser.get_lengths_from_fastafile(reference)) matplotlib.pyplot.plot([0, max_x], [reference_length, reference_length], color=reference_color, lw=line_width, ls=reference_ls) max_y = max(max_y, reference_length) if with_title: matplotlib.pyplot.title(title) matplotlib.pyplot.grid(with_grid) ax = matplotlib.pyplot.gca() # Shink current axis's height by 20% on the bottom box = ax.get_position() ax.set_position([box.x0, box.y0 + box.height * 0.2, box.width, box.height * 0.8]) legend_list = map(qutils.label_from_fpath, contigs_fpaths) if reference: legend_list += ['Reference'] # Put a legend below current axis try: # for matplotlib <= 2009-12-09 ax.legend(legend_list, loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True, shadow=True, ncol=n_columns) except Exception: # ZeroDivisionError: ValueError: pass ylabel = 'Cumulative length ' ylabel, mkfunc = y_formatter(ylabel, max_y) matplotlib.pyplot.xlabel('Contig index', fontsize=axes_fontsize) matplotlib.pyplot.ylabel(ylabel, fontsize=axes_fontsize) mkformatter = matplotlib.ticker.FuncFormatter(mkfunc) ax.yaxis.set_major_formatter(mkformatter) xLocator, yLocator = get_locators() ax.yaxis.set_major_locator(yLocator) ax.xaxis.set_major_locator(xLocator) #ax.set_yscale('log') #matplotlib.pyplot.ylim([0, int(float(max_y) * 1.1)]) plot_fpath += plots_file_ext matplotlib.pyplot.savefig(plot_fpath) logger.info(' saved to ' + plot_fpath) pdf_plots_figures.append(figure)
def cumulative_plot(reference, contigs_fpaths, lists_of_lengths, plot_fpath, title): if matplotlib_error: return logger.info(' Drawing cumulative plot...') import matplotlib.pyplot import matplotlib.ticker figure = matplotlib.pyplot.figure() matplotlib.pyplot.rc('font', **font) max_x = 0 max_y = 0 for (contigs_fpath, lenghts) in itertools.izip(contigs_fpaths, lists_of_lengths): vals_length = [0] for l in sorted(lenghts, reverse=True): vals_length.append(vals_length[-1] + l) vals_contig_index = range(0, len(vals_length)) if vals_contig_index: max_x = max(vals_contig_index[-1], max_x) max_y = max(max_y, vals_length[-1]) color, ls = get_color_and_ls(contigs_fpath) matplotlib.pyplot.plot(vals_contig_index, vals_length, color=color, lw=line_width, ls=ls) if reference: y_vals = [] for l in sorted(fastaparser.get_lengths_from_fastafile(reference), reverse=True): if y_vals: y_vals.append(y_vals[-1] + l) else: y_vals = [l] x_vals = range(1, len(y_vals) + 1) # for reference only: starting from X=1 # extend reference curve to the max X-axis point reference_length = y_vals[-1] max_x = max(max_x, x_vals[-1]) max_y = max(max_y, reference_length) y_vals.append(reference_length) x_vals.append(max_x) matplotlib.pyplot.plot(x_vals, y_vals, color=reference_color, lw=line_width, ls=reference_ls) if with_title: matplotlib.pyplot.title(title) matplotlib.pyplot.grid(with_grid) ax = matplotlib.pyplot.gca() # Shink current axis's height by 20% on the bottom box = ax.get_position() ax.set_position([box.x0, box.y0 + box.height * 0.2, box.width, box.height * 0.8]) legend_list = map(qutils.label_from_fpath, contigs_fpaths) if reference: legend_list += ['Reference'] # Put a legend below current axis try: # for matplotlib <= 2009-12-09 ax.legend(legend_list, loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True, shadow=True, ncol=n_columns if n_columns<3 else 3) except Exception: # ZeroDivisionError: ValueError: pass ylabel = 'Cumulative length ' ylabel, mkfunc = y_formatter(ylabel, max_y) matplotlib.pyplot.xlabel('Contig index', fontsize=axes_fontsize) matplotlib.pyplot.ylabel(ylabel, fontsize=axes_fontsize) mkformatter = matplotlib.ticker.FuncFormatter(mkfunc) ax.yaxis.set_major_formatter(mkformatter) xLocator, yLocator = get_locators() ax.yaxis.set_major_locator(yLocator) ax.xaxis.set_major_locator(xLocator) if logarithmic_x_scale: ax.set_xscale('log') #ax.set_yscale('log') #matplotlib.pyplot.ylim([0, int(float(max_y) * 1.1)]) plot_fpath += plots_file_ext matplotlib.pyplot.savefig(plot_fpath, bbox_inches='tight') logger.info(' saved to ' + plot_fpath) pdf_plots_figures.append(figure)