Ejemplo n.º 1
0
def correct_assemblies(contigs_fpaths, output_dirpath, labels):
    corrected_dirpath = os.path.join(output_dirpath, qconfig.corrected_dirname)
    # we need correction but do not need min-contig filtration
    min_contig = qconfig.min_contig
    qconfig.min_contig = 0
    corrected_contigs_fpaths, old_contigs_fpaths = qutils.correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting=None)
    qconfig.min_contig = min_contig
    assemblies = [Assembly(fpath, qutils.label_from_fpath(fpath)) for fpath in old_contigs_fpaths]
    corrected_labels = [asm.label for asm in assemblies]

    if qconfig.draw_plots or qconfig.html_report:
        from quast_libs import plotter
        corr_fpaths = [asm.fpath for asm in assemblies]
        corr_labels = [asm.label for asm in assemblies]
        plotter.save_colors_and_ls(corr_fpaths, labels=corr_labels)
    return assemblies, corrected_labels
Ejemplo n.º 2
0
def correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting):
    ## removing from contigs' names special characters because:
    ## 1) Some embedded tools can fail on some strings with "...", "+", "-", etc
    ## 2) Nucmer fails on names like "contig 1_bla_bla", "contig 2_bla_bla" (it interprets as a contig's name only the first word of caption and gets ambiguous contigs names)

    if qconfig.max_threads is None:
        qconfig.max_threads = 1

    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    if is_python2():
        from joblib import Parallel, delayed
    else:
        from joblib3 import Parallel, delayed
    logger.main_info('  Pre-processing...')
    corrected_info = Parallel(n_jobs=n_jobs)(delayed(parallel_correct_contigs)(i, contigs_fpath,
            corrected_dirpath, labels) for i, contigs_fpath in enumerate(contigs_fpaths))
    corrected_contigs_fpaths = []
    old_contigs_fpaths = []
    for contig_idx, (old_fpaths, corr_fpaths, broken_scaffold_fpaths, logs) in enumerate(corrected_info):
        label = labels[contig_idx]
        logger.main_info('\n'.join(logs))
        for old_fpath in old_fpaths:
            old_contigs_fpaths.append(old_fpath)
            qconfig.assembly_labels_by_fpath[old_fpath] = label
        for corr_fpath, lengths in corr_fpaths:
            corrected_contigs_fpaths.append(corr_fpath)
            qconfig.assembly_labels_by_fpath[corr_fpath] = label
            add_lengths_to_report(lengths, reporting, corr_fpath)
        for broken_fpath, lengths in broken_scaffold_fpaths:
            old_contigs_fpaths.append(broken_fpath)
            corrected_contigs_fpaths.append(broken_fpath)
            qconfig.assembly_labels_by_fpath[broken_fpath] = label + '_broken'
            add_lengths_to_report(lengths, reporting, broken_fpath)

    if qconfig.draw_plots or qconfig.html_report:
        from quast_libs import plotter
        if not plotter.dict_color_and_ls:
            plotter.save_colors_and_ls(corrected_contigs_fpaths)

    return corrected_contigs_fpaths, old_contigs_fpaths
Ejemplo n.º 3
0
def correct_contigs(contigs_fpaths, corrected_dirpath, labels, reporting):
    ## removing from contigs' names special characters because:
    ## 1) Some embedded tools can fail on some strings with "...", "+", "-", etc
    ## 2) Nucmer fails on names like "contig 1_bla_bla", "contig 2_bla_bla" (it interprets as a contig's name only the first word of caption and gets ambiguous contigs names)
    n_jobs = min(len(contigs_fpaths), qconfig.max_threads)
    from joblib import Parallel, delayed
    logger.main_info('  Pre-processing...')
    corrected_info = Parallel(n_jobs=n_jobs)(
        delayed(parallel_correct_contigs)(i, contigs_fpath, corrected_dirpath,
                                          labels)
        for i, contigs_fpath in enumerate(contigs_fpaths))
    corrected_contigs_fpaths = []
    old_contigs_fpaths = []
    for contig_idx, (old_fpaths, corr_fpaths, broken_scaffold_fpaths,
                     logs) in enumerate(corrected_info):
        label = labels[contig_idx]
        logger.main_info('\n'.join(logs))
        for old_fpath in old_fpaths:
            old_contigs_fpaths.append(old_fpath)
            qconfig.assembly_labels_by_fpath[old_fpath] = label
        for corr_fpath, lengths in corr_fpaths:
            corrected_contigs_fpaths.append(corr_fpath)
            qconfig.assembly_labels_by_fpath[corr_fpath] = label
            add_lengths_to_report(lengths, reporting, corr_fpath)
        for broken_fpath, lengths in broken_scaffold_fpaths:
            old_contigs_fpaths.append(broken_fpath)
            corrected_contigs_fpaths.append(broken_fpath)
            qconfig.assembly_labels_by_fpath[broken_fpath] = label + '_broken'
            add_lengths_to_report(lengths, reporting, broken_fpath)

    if qconfig.draw_plots or qconfig.html_report:
        from quast_libs import plotter
        if not plotter.dict_color_and_ls:
            plotter.save_colors_and_ls(corrected_contigs_fpaths)

    return corrected_contigs_fpaths, old_contigs_fpaths