Beispiel #1
0
def _correct_contigs(contigs_fpaths, corrected_dirpath, min_contig, labels):
    assemblies = []

    for i, contigs_fpath in enumerate(contigs_fpaths):
        contigs_fname = os.path.basename(contigs_fpath)
        fname, ctg_fasta_ext = qutils.splitext_for_fasta_file(contigs_fname)

        label = labels[i]

        corr_fpath = qutils.unique_corrected_fpath(
            os.path.join(corrected_dirpath, label + ctg_fasta_ext))

        assembly = Assembly(corr_fpath, label)

        logger.info('  %s ==> %s' % (contigs_fpath, label))

        # Handle fasta
        lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath)
        if not sum(l for l in lengths if l >= min_contig):
            logger.warning(
                "Skipping %s because it doesn't contain contigs >= %d bp." %
                (os.path.basename(contigs_fpath), min_contig))
            continue

        # correcting
        if not quast.correct_fasta(contigs_fpath, corr_fpath, min_contig):
            continue

        assemblies.append(assembly)

    return assemblies
Beispiel #2
0
def _correct_contigs(contigs_fpaths, corrected_dirpath, min_contig, labels):
    assemblies = []

    for i, contigs_fpath in enumerate(contigs_fpaths):
        contigs_fname = os.path.basename(contigs_fpath)
        fname, ctg_fasta_ext = qutils.splitext_for_fasta_file(contigs_fname)

        label = labels[i]

        corr_fpath = qutils.unique_corrected_fpath(
            os.path.join(corrected_dirpath, label + ctg_fasta_ext))

        assembly = Assembly(corr_fpath, label)

        logger.info('  %s ==> %s' % (contigs_fpath, label))

        # Handle fasta
        lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath)
        if not sum(l for l in lengths if l >= min_contig):
            logger.warning("Skipping %s because it doesn't contain contigs >= %d bp."
                           % (os.path.basename(contigs_fpath), min_contig))
            continue

        # correcting
        if not quast.correct_fasta(contigs_fpath, corr_fpath, min_contig):
            continue

        assemblies.append(assembly)

    return assemblies
Beispiel #3
0
def _handle_fasta(contigs_fpath, corr_fpath, reporting):
    lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath)

    if not sum(l for l in lengths if l >= qconfig.min_contig):
        logger.warning(
            "Skipping %s because it doesn't contain contigs >= %d bp." %
            (qutils.label_from_fpath(corr_fpath), qconfig.min_contig))
        return False

    # correcting
    if not correct_fasta(contigs_fpath, corr_fpath, qconfig.min_contig):
        return False

    ## filling column "Assembly" with names of assemblies
    report = reporting.get(corr_fpath)

    ## filling columns "Number of contigs >=110 bp", ">=200 bp", ">=500 bp"
    report.add_field(reporting.Fields.CONTIGS__FOR_THRESHOLDS, [
        sum(1 for l in lengths if l >= threshold)
        for threshold in qconfig.contig_thresholds
    ])
    report.add_field(reporting.Fields.TOTALLENS__FOR_THRESHOLDS, [
        sum(l for l in lengths if l >= threshold)
        for threshold in qconfig.contig_thresholds
    ])
    return True
Beispiel #4
0
            def fillMetrics(kind, fasta_fn):
                metrics = self._metrics['spades'][kind]
                lengths_all = get_lengths_from_fastafile(fasta_fn)
                lengths_large = [l for l in lengths_all if l >= 500]

                def i(number):
                    return locale.format("%d", int(number), grouping=True)

                def cval(func):
                    return (i(func(lengths_large)), i(func(lengths_all)))

                metrics['Largest ' + kind] = (i(max(lengths_all)), )
                metrics['Total Length'] = cval(sum)
                metrics['Number of ' + kind + 's'] = cval(len)
                for q in [50, 75, 90, 95]:
                    metrics['N%s' % q] = cval(lambda x: N50(x, q))
Beispiel #5
0
            def fillMetrics(kind, fasta_fn):
                metrics = self._metrics['spades'][kind]
                lengths_all = get_lengths_from_fastafile(fasta_fn)
                lengths_large = [l for l in lengths_all if l >= 500]

                def i(number):
                    return locale.format("%d", int(number), grouping=True)

                def cval(func):
                    return (i(func(lengths_large)), i(func(lengths_all)))

                metrics['Largest ' + kind] = (i(max(lengths_all)), )
                metrics['Total Length'] = cval(sum)
                metrics['Number of ' + kind + 's'] = cval(len)
                for q in [50, 75, 90, 95]:
                    metrics['N%s' % q] = cval(lambda x: N50(x, q))
Beispiel #6
0
def _handle_fasta(contigs_fpath, corr_fpath, reporting):
    lengths = fastaparser.get_lengths_from_fastafile(contigs_fpath)

    if not sum(l for l in lengths if l >= qconfig.min_contig):
        logger.warning("Skipping %s because it doesn't contain contigs >= %d bp."
                % (qutils.label_from_fpath(corr_fpath), qconfig.min_contig))
        return False

    # correcting
    if not correct_fasta(contigs_fpath, corr_fpath, qconfig.min_contig):
        return False

    ## filling column "Assembly" with names of assemblies
    report = reporting.get(corr_fpath)

    ## filling columns "Number of contigs >=110 bp", ">=200 bp", ">=500 bp"
    report.add_field(reporting.Fields.CONTIGS__FOR_THRESHOLDS,
                     [sum(1 for l in lengths if l >= threshold)
                      for threshold in qconfig.contig_thresholds])
    report.add_field(reporting.Fields.TOTALLENS__FOR_THRESHOLDS,
                     [sum(l for l in lengths if l >= threshold)
                      for threshold in qconfig.contig_thresholds])
    return True
Beispiel #7
0
def cumulative_plot(reference, contigs_fpaths, lists_of_lengths, plot_fpath, title):
    if matplotlib_error:
        return

    logger.info('  Drawing cumulative plot...')
    import matplotlib.pyplot
    import matplotlib.ticker

    figure = matplotlib.pyplot.figure()
    matplotlib.pyplot.rc('font', **font)
    max_x = 0
    max_y = 0
    color_id = 0

    for (contigs_fpath, lenghts) in itertools.izip(contigs_fpaths, lists_of_lengths):
        lenghts.sort(reverse=True)
        # calculate values for the plot
        vals_contig_index = [0]
        vals_length = [0]
        lcur = 0
        lind = 0
        for l in lenghts:
            lcur += l
            lind += 1
            x = lind
            vals_contig_index.append(x)
            y = lcur
            vals_length.append(y)
            # add to plot

        if len(vals_contig_index) > 0:
            max_x = max(vals_contig_index[-1], max_x)
            max_y = max(max_y, vals_length[-1])

        color, ls, color_id = get_color_and_ls(color_id, contigs_fpath)
        matplotlib.pyplot.plot(vals_contig_index, vals_length, color=color, lw=line_width, ls=ls)

    if reference:
        reference_length = sum(fastaparser.get_lengths_from_fastafile(reference))
        matplotlib.pyplot.plot([0, max_x], [reference_length, reference_length],
                               color=reference_color, lw=line_width, ls=reference_ls)
        max_y = max(max_y, reference_length)

    if with_title:
        matplotlib.pyplot.title(title)
    matplotlib.pyplot.grid(with_grid)
    ax = matplotlib.pyplot.gca()
    # Shink current axis's height by 20% on the bottom
    box = ax.get_position()
    ax.set_position([box.x0, box.y0 + box.height * 0.2, box.width, box.height * 0.8])

    legend_list = map(qutils.label_from_fpath, contigs_fpaths)
    if reference:
        legend_list += ['Reference']

    # Put a legend below current axis
    try: # for matplotlib <= 2009-12-09
        ax.legend(legend_list, loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True,
            shadow=True, ncol=n_columns)
    except Exception: # ZeroDivisionError: ValueError:
        pass

    ylabel = 'Cumulative length '
    ylabel, mkfunc = y_formatter(ylabel, max_y)
    matplotlib.pyplot.xlabel('Contig index', fontsize=axes_fontsize)
    matplotlib.pyplot.ylabel(ylabel, fontsize=axes_fontsize)

    mkformatter = matplotlib.ticker.FuncFormatter(mkfunc)
    ax.yaxis.set_major_formatter(mkformatter)

    xLocator, yLocator = get_locators()
    ax.yaxis.set_major_locator(yLocator)
    ax.xaxis.set_major_locator(xLocator)
    #ax.set_yscale('log')

    #matplotlib.pyplot.ylim([0, int(float(max_y) * 1.1)])

    plot_fpath += plots_file_ext
    matplotlib.pyplot.savefig(plot_fpath)
    logger.info('    saved to ' + plot_fpath)
    pdf_plots_figures.append(figure)
Beispiel #8
0
def cumulative_plot(reference, contigs_fpaths, lists_of_lengths, plot_fpath, title):
    if matplotlib_error:
        return

    logger.info('  Drawing cumulative plot...')
    import matplotlib.pyplot
    import matplotlib.ticker

    figure = matplotlib.pyplot.figure()
    matplotlib.pyplot.rc('font', **font)
    max_x = 0
    max_y = 0

    for (contigs_fpath, lenghts) in itertools.izip(contigs_fpaths, lists_of_lengths):
        vals_length = [0]
        for l in sorted(lenghts, reverse=True):
            vals_length.append(vals_length[-1] + l)
        vals_contig_index = range(0, len(vals_length))
        if vals_contig_index:
            max_x = max(vals_contig_index[-1], max_x)
            max_y = max(max_y, vals_length[-1])
        color, ls = get_color_and_ls(contigs_fpath)
        matplotlib.pyplot.plot(vals_contig_index, vals_length, color=color, lw=line_width, ls=ls)

    if reference:
        y_vals = []
        for l in sorted(fastaparser.get_lengths_from_fastafile(reference), reverse=True):
            if y_vals:
                y_vals.append(y_vals[-1] + l)
            else:
                y_vals = [l]
        x_vals = range(1, len(y_vals) + 1) # for reference only: starting from X=1
        # extend reference curve to the max X-axis point
        reference_length = y_vals[-1]
        max_x = max(max_x, x_vals[-1])
        max_y = max(max_y, reference_length)
        y_vals.append(reference_length)
        x_vals.append(max_x)
        matplotlib.pyplot.plot(x_vals, y_vals,
                               color=reference_color, lw=line_width, ls=reference_ls)

    if with_title:
        matplotlib.pyplot.title(title)
    matplotlib.pyplot.grid(with_grid)
    ax = matplotlib.pyplot.gca()
    # Shink current axis's height by 20% on the bottom
    box = ax.get_position()
    ax.set_position([box.x0, box.y0 + box.height * 0.2, box.width, box.height * 0.8])

    legend_list = map(qutils.label_from_fpath, contigs_fpaths)
    if reference:
        legend_list += ['Reference']

    # Put a legend below current axis
    try: # for matplotlib <= 2009-12-09
        ax.legend(legend_list, loc='upper center', bbox_to_anchor=(0.5, -0.1), fancybox=True,
            shadow=True, ncol=n_columns if n_columns<3 else 3)
    except Exception: # ZeroDivisionError: ValueError:
        pass

    ylabel = 'Cumulative length '
    ylabel, mkfunc = y_formatter(ylabel, max_y)
    matplotlib.pyplot.xlabel('Contig index', fontsize=axes_fontsize)
    matplotlib.pyplot.ylabel(ylabel, fontsize=axes_fontsize)

    mkformatter = matplotlib.ticker.FuncFormatter(mkfunc)
    ax.yaxis.set_major_formatter(mkformatter)


    xLocator, yLocator = get_locators()
    ax.yaxis.set_major_locator(yLocator)
    ax.xaxis.set_major_locator(xLocator)
    if logarithmic_x_scale:
        ax.set_xscale('log')
    #ax.set_yscale('log')

    #matplotlib.pyplot.ylim([0, int(float(max_y) * 1.1)])

    plot_fpath += plots_file_ext
    matplotlib.pyplot.savefig(plot_fpath, bbox_inches='tight')
    logger.info('    saved to ' + plot_fpath)
    pdf_plots_figures.append(figure)