Esempio n. 1
0
def search_sv_with_manta(main_ref_fpath, meta_ref_fpaths, output_dirpath,
                         err_path):
    logger.info('  Searching structural variations with Manta...')
    final_bed_fpath = os.path.join(output_dirpath, qconfig.manta_sv_fname)
    if os.path.exists(final_bed_fpath):
        logger.info('    Using existing file: ' + final_bed_fpath)
        return final_bed_fpath

    if meta_ref_fpaths:
        if is_python2():
            from joblib import Parallel, delayed
        else:
            from joblib3 import Parallel, delayed
        n_jobs = min(len(meta_ref_fpaths), qconfig.max_threads)
        if not qconfig.memory_efficient:
            bed_fpaths = Parallel(n_jobs=n_jobs)(
                delayed(process_one_ref)(cur_ref_fpath, output_dirpath,
                                         err_path)
                for cur_ref_fpath in meta_ref_fpaths)
        else:
            bed_fpaths = [
                process_one_ref(cur_ref_fpath, output_dirpath, err_path)
                for cur_ref_fpath in meta_ref_fpaths
            ]
        bed_fpaths = [f for f in bed_fpaths if f is not None]
        if bed_fpaths:
            qutils.cat_files(bed_fpaths, final_bed_fpath)
    else:
        process_one_ref(main_ref_fpath,
                        output_dirpath,
                        err_path,
                        bed_fpath=final_bed_fpath)
    logger.info('    Saving to: ' + final_bed_fpath)
    return final_bed_fpath
Esempio n. 2
0
def search_sv_with_gridss(main_ref_fpath, bam_fpath, meta_ref_fpaths, output_dirpath, err_fpath):
    logger.info('  Searching structural variations with GRIDSS...')
    final_bed_fpath = join(output_dirpath, qutils.name_from_fpath(main_ref_fpath) + '_' + qconfig.sv_bed_fname)
    if isfile(final_bed_fpath):
        logger.info('    Using existing file: ' + final_bed_fpath)
        return final_bed_fpath

    if not get_path_to_program('java') or not check_java_version(1.8):
        logger.warning('Java 1.8 or later is required to run GRIDSS. Please install it and rerun QUAST.')
        return None
    if not get_path_to_program('Rscript'):
        logger.warning('R is required to run GRIDSS. Please install it and rerun QUAST.')
        return None

    if meta_ref_fpaths:
        n_jobs = min(len(meta_ref_fpaths), qconfig.max_threads)
        threads_per_job = max(1, qconfig.max_threads // n_jobs)
        parallel_args = [(cur_ref_fpath, output_dirpath, err_fpath, threads_per_job) for cur_ref_fpath in meta_ref_fpaths]
        bed_fpaths = run_parallel(process_one_ref, parallel_args, n_jobs, filter_results=True)
        if bed_fpaths:
            qutils.cat_files(bed_fpaths, final_bed_fpath)
    else:
        process_one_ref(main_ref_fpath, output_dirpath, err_fpath, qconfig.max_threads, bam_fpath=bam_fpath, bed_fpath=final_bed_fpath)
    logger.info('    Saving to: ' + final_bed_fpath)
    return final_bed_fpath
Esempio n. 3
0
def search_sv_with_gridss(main_ref_fpath, bam_fpath, meta_ref_fpaths, output_dirpath, err_fpath):
    logger.info('  Searching structural variations with GRIDSS...')
    final_bed_fpath = join(output_dirpath, qutils.name_from_fpath(main_ref_fpath) + '_' + qconfig.sv_bed_fname)
    if isfile(final_bed_fpath):
        logger.info('    Using existing file: ' + final_bed_fpath)
        return final_bed_fpath

    if not get_path_to_program('java') or not check_java_version(1.8):
        logger.warning('Java 1.8 or later is required to run GRIDSS. Please install it and rerun QUAST.')
        return None
    if not get_path_to_program('Rscript'):
        logger.warning('R is required to run GRIDSS. Please install it and rerun QUAST.')
        return None

    if meta_ref_fpaths:
        n_jobs = min(len(meta_ref_fpaths), qconfig.max_threads)
        threads_per_job = max(1, qconfig.max_threads // n_jobs)
        parallel_args = [(cur_ref_fpath, output_dirpath, err_fpath, threads_per_job) for cur_ref_fpath in meta_ref_fpaths]
        bed_fpaths = run_parallel(process_one_ref, parallel_args, n_jobs, filter_results=True)
        if bed_fpaths:
            qutils.cat_files(bed_fpaths, final_bed_fpath)
    else:
        process_one_ref(main_ref_fpath, output_dirpath, err_fpath, qconfig.max_threads, bam_fpath=bam_fpath, bed_fpath=final_bed_fpath)
    logger.info('    Saving to: ' + final_bed_fpath)
    return final_bed_fpath
Esempio n. 4
0
def search_sv_with_manta(main_ref_fpath, meta_ref_fpaths, output_dirpath, err_path):
    logger.info('  Searching structural variations with Manta...')
    final_bed_fpath = os.path.join(output_dirpath, qconfig.manta_sv_fname)
    if os.path.exists(final_bed_fpath):
        logger.info('    Using existing file: ' + final_bed_fpath)
        return final_bed_fpath

    if meta_ref_fpaths:
        if is_python2():
            from joblib import Parallel, delayed
        else:
            from joblib3 import Parallel, delayed
        n_jobs = min(len(meta_ref_fpaths), qconfig.max_threads)
        bed_fpaths = Parallel(n_jobs=n_jobs)(delayed(process_one_ref)(cur_ref_fpath, output_dirpath, err_path) for cur_ref_fpath in meta_ref_fpaths)
        bed_fpaths = [f for f in bed_fpaths if f is not None]
        if bed_fpaths:
            qutils.cat_files(bed_fpaths, final_bed_fpath)
    else:
        process_one_ref(main_ref_fpath, output_dirpath, err_path, bed_fpath=final_bed_fpath)
    logger.info('    Saving to: ' + final_bed_fpath)
    return final_bed_fpath
Esempio n. 5
0
def run_processing_reads(contigs_fpaths, main_ref_fpath, meta_ref_fpaths, ref_labels, temp_output_dir, output_dir,
                         log_path, err_fpath):
    required_files = []
    bed_fpath, cov_fpath, physical_cov_fpath = None, None, None
    if main_ref_fpath:
        ref_name = qutils.name_from_fpath(main_ref_fpath)

        bed_fpath = qconfig.bed or join(output_dir, ref_name + '.bed')
        cov_fpath = qconfig.cov_fpath or join(output_dir, ref_name + '.cov')
        physical_cov_fpath = qconfig.phys_cov_fpath or join(output_dir, ref_name + '.physical.cov')
        required_files = [bed_fpath, cov_fpath, physical_cov_fpath]

        if qconfig.no_sv:
            logger.info('  Will not search Structural Variations (--fast or --no-sv is specified)')
            bed_fpath = None
        elif is_non_empty_file(bed_fpath):
            logger.info('  Using existing BED-file: ' + bed_fpath)
        elif not qconfig.forward_reads and not qconfig.interlaced_reads:
            if not qconfig.reference_sam and not qconfig.reference_bam:
                logger.info('  Will not search Structural Variations (needs paired-end reads)')
                bed_fpath = None
                qconfig.no_sv = True
        if qconfig.create_icarus_html:
            if is_non_empty_file(cov_fpath):
                is_correct_file = check_cov_file(cov_fpath)
                if is_correct_file:
                    logger.info('  Using existing reads coverage file: ' + cov_fpath)
            if is_non_empty_file(physical_cov_fpath):
                logger.info('  Using existing physical coverage file: ' + physical_cov_fpath)
        else:
            logger.info('  Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)')
            cov_fpath = None
            physical_cov_fpath = None
        if (is_non_empty_file(bed_fpath) or qconfig.no_sv) and \
                (not qconfig.create_icarus_html or (is_non_empty_file(cov_fpath) and is_non_empty_file(physical_cov_fpath))):
            required_files = []

    n_jobs = min(qconfig.max_threads, len(contigs_fpaths) + 1)
    max_threads_per_job = max(1, qconfig.max_threads // n_jobs)
    sam_fpaths = qconfig.sam_fpaths or [None] * len(contigs_fpaths)
    bam_fpaths = qconfig.bam_fpaths or [None] * len(contigs_fpaths)
    parallel_align_args = [(contigs_fpath, output_dir, temp_output_dir, log_path, err_fpath, max_threads_per_job,
                            sam_fpaths[index], bam_fpaths[index], index) for index, contigs_fpath in enumerate(contigs_fpaths)]
    if main_ref_fpath:
        parallel_align_args.append((main_ref_fpath, output_dir, temp_output_dir, log_path, err_fpath,
                                    max_threads_per_job, qconfig.reference_sam, qconfig.reference_bam, None, required_files, True))
    correct_chr_names, sam_fpaths, bam_fpaths = run_parallel(align_single_file, parallel_align_args, n_jobs)
    qconfig.sam_fpaths = sam_fpaths[:len(contigs_fpaths)]
    qconfig.bam_fpaths = bam_fpaths[:len(contigs_fpaths)]
    add_statistics_to_report(output_dir, contigs_fpaths, main_ref_fpath)
    save_reads(output_dir)
    if not main_ref_fpath:
        return None, None, None

    correct_chr_names = correct_chr_names[-1]
    sam_fpath, bam_fpath = sam_fpaths[-1], bam_fpaths[-1]
    qconfig.reference_sam = sam_fpath
    qconfig.reference_bam = bam_fpath
    if not required_files:
        return bed_fpath, cov_fpath, physical_cov_fpath
    if not all([sam_fpath, bam_fpath]):
        logger.info('  Failed searching structural variations.')
        return None, None, None

    sam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(sam_fpath, 'sorted'))
    bam_mapped_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_fpath, 'mapped'))
    bam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_mapped_fpath, 'sorted'))

    if is_non_empty_file(sam_sorted_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        if not is_non_empty_file(bam_sorted_fpath):
            sambamba_view(bam_fpath, bam_mapped_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped')
            sort_bam(bam_mapped_fpath, bam_sorted_fpath, err_fpath, logger)
        sambamba_view(bam_sorted_fpath, sam_sorted_fpath, qconfig.max_threads, err_fpath, logger)
    if qconfig.create_icarus_html and (not is_non_empty_file(cov_fpath) or not is_non_empty_file(physical_cov_fpath)):
        cov_fpath, physical_cov_fpath = get_coverage(temp_output_dir, main_ref_fpath, ref_name, bam_fpath, bam_sorted_fpath,
                                                     log_path, err_fpath, correct_chr_names, cov_fpath, physical_cov_fpath)
    if not is_non_empty_file(bed_fpath) and not qconfig.no_sv:
        if meta_ref_fpaths:
            logger.info('  Splitting SAM-file by references...')
        headers = []
        seq_lengths = {}
        with open(sam_fpath) as sam_file:
            for line in sam_file:
                if not line.startswith('@'):
                    break
                if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                    seq_name = line.split('\tSN:')[1].split('\t')[0]
                    seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                    seq_lengths[seq_name] = seq_length
                headers.append(line.strip())
        need_ref_splitting = False
        ref_files = {}
        if meta_ref_fpaths:
            global ref_sam_fpaths
            for cur_ref_fpath in meta_ref_fpaths:
                cur_ref_name = qutils.name_from_fpath(cur_ref_fpath)
                ref_sam_fpath = join(temp_output_dir, cur_ref_name + '.sam')
                ref_sam_fpaths[cur_ref_fpath] = ref_sam_fpath
                if is_non_empty_file(ref_sam_fpath):
                    logger.info('    Using existing split SAM-file for %s: %s' % (cur_ref_name, ref_sam_fpath))
                    ref_files[cur_ref_name] = None
                else:
                    ref_sam_file = open(ref_sam_fpath, 'w')
                    if not headers[0].startswith('@SQ'):
                        ref_sam_file.write(headers[0] + '\n')
                    for h in (h for h in headers if h.startswith('@SQ') and 'SN:' in h):
                        seq_name = h.split('\tSN:')[1].split('\t')[0]
                        if seq_name in ref_labels and ref_labels[seq_name] == cur_ref_name:
                            ref_sam_file.write(h + '\n')
                    ref_sam_file.write(headers[-1] + '\n')
                    ref_files[cur_ref_name] = ref_sam_file
                    need_ref_splitting = True

        trivial_deletions_fpath = \
            search_trivial_deletions(temp_output_dir, sam_sorted_fpath, ref_files, ref_labels, seq_lengths, need_ref_splitting)
        if get_gridss_fpath() and isfile(get_gridss_fpath()):
            try:
                gridss_sv_fpath = search_sv_with_gridss(main_ref_fpath, bam_mapped_fpath, meta_ref_fpaths, temp_output_dir, err_fpath)
                qutils.cat_files([gridss_sv_fpath, trivial_deletions_fpath], bed_fpath)
            except:
                pass
        if isfile(trivial_deletions_fpath) and not is_non_empty_file(bed_fpath):
            shutil.copy(trivial_deletions_fpath, bed_fpath)

    if not qconfig.no_sv:
        if is_non_empty_file(bed_fpath):
            logger.main_info('  Structural variations are in ' + bed_fpath)
        else:
            if isfile(bed_fpath):
                logger.main_info('  No structural variations were found.')
            else:
                logger.main_info('  Failed searching structural variations.')
            bed_fpath = None
    if is_non_empty_file(cov_fpath):
        logger.main_info('  Coverage distribution along the reference genome is in ' + cov_fpath)
    else:
        if not qconfig.create_icarus_html:
            logger.main_info('  Failed to calculate coverage distribution')
        cov_fpath = None
    return bed_fpath, cov_fpath, physical_cov_fpath
Esempio n. 6
0
def run_processing_reads(main_ref_fpath,
                         meta_ref_fpaths,
                         ref_labels,
                         reads_fpaths,
                         output_dirpath,
                         res_path,
                         log_path,
                         err_path,
                         sam_fpath=None,
                         bam_fpath=None,
                         bed_fpath=None):
    ref_name = qutils.name_from_fpath(main_ref_fpath)

    if not sam_fpath and bam_fpath:
        sam_fpath = get_safe_fpath(output_dirpath, bam_fpath[:-4] + '.sam')
    else:
        sam_fpath = sam_fpath or os.path.join(output_dirpath,
                                              ref_name + '.sam')
    bam_fpath = bam_fpath or get_safe_fpath(output_dirpath,
                                            sam_fpath[:-4] + '.bam')
    sam_sorted_fpath = get_safe_fpath(output_dirpath,
                                      add_suffix(sam_fpath, 'sorted'))
    bam_sorted_fpath = get_safe_fpath(output_dirpath,
                                      add_suffix(bam_fpath, 'sorted'))

    bed_fpath = bed_fpath or os.path.join(res_path, ref_name + '.bed')
    cov_fpath = os.path.join(res_path, ref_name + '.cov')
    physical_cov_fpath = os.path.join(res_path, ref_name + '.physical.cov')

    if qconfig.no_sv:
        logger.info(
            '  Will not search Structural Variations (--fast or --no-sv is specified)'
        )
        bed_fpath = None
    elif is_non_empty_file(bed_fpath):
        logger.info('  Using existing BED-file: ' + bed_fpath)
    if qconfig.create_icarus_html:
        if is_non_empty_file(cov_fpath):
            is_correct_file = check_cov_file(cov_fpath)
            if is_correct_file:
                logger.info('  Using existing reads coverage file: ' +
                            cov_fpath)
        if is_non_empty_file(physical_cov_fpath):
            logger.info('  Using existing physical coverage file: ' +
                        physical_cov_fpath)
    else:
        logger.info(
            '  Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)'
        )
        cov_fpath = None
        physical_cov_fpath = None
    if (is_non_empty_file(bed_fpath) or qconfig.no_sv) and \
            (not qconfig.create_icarus_html or (is_non_empty_file(cov_fpath) and is_non_empty_file(physical_cov_fpath))):
        return bed_fpath, cov_fpath, physical_cov_fpath

    logger.info('  ' + 'Pre-processing reads...')
    correct_chr_names = None
    if is_non_empty_file(sam_fpath):
        logger.info('  Using existing SAM-file: ' + sam_fpath)
        correct_chr_names = get_correct_names_for_chroms(
            output_dirpath, main_ref_fpath, sam_fpath, err_path, reads_fpaths)
    elif is_non_empty_file(bam_fpath):
        logger.info('  Using existing BAM-file: ' + bam_fpath)
        qutils.call_subprocess([
            sambamba_fpath('sambamba'), 'view', '-t',
            str(qconfig.max_threads), '-h', bam_fpath
        ],
                               stdout=open(sam_fpath, 'w'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
        correct_chr_names = get_correct_names_for_chroms(
            output_dirpath, main_ref_fpath, sam_fpath, err_path, reads_fpaths)
    if not correct_chr_names and reads_fpaths:
        logger.info('  Running BWA...')
        # use absolute paths because we will change workdir
        sam_fpath = os.path.abspath(sam_fpath)
        abs_reads_fpaths = []
        for reads_fpath in reads_fpaths:
            abs_reads_fpaths.append(os.path.abspath(reads_fpath))

        if len(abs_reads_fpaths) != 2:
            logger.error(
                '  You should specify files with forward and reverse reads.')
            logger.info('  Failed searching structural variations.')
            return None, None, None

        if not qconfig.no_check:
            if not paired_reads_names_are_equal(reads_fpaths, logger):
                logger.error(
                    '  Read names are discordant, skipping reads analysis!')
                logger.info('  Failed searching structural variations.')
                return None, None, None

        prev_dir = os.getcwd()
        os.chdir(output_dirpath)
        cmd = [bwa_fpath('bwa'), 'index', '-p', ref_name, main_ref_fpath]
        if os.path.getsize(
                main_ref_fpath
        ) > 2 * 1024**3:  # if reference size bigger than 2GB
            cmd += ['-a', 'bwtsw']
        qutils.call_subprocess(cmd,
                               stdout=open(log_path, 'a'),
                               stderr=open(err_path, 'a'),
                               logger=logger)

        cmd = bwa_fpath('bwa') + ' mem -t ' + str(
            qconfig.max_threads) + ' ' + ref_name + ' ' + abs_reads_fpaths[
                0] + ' ' + abs_reads_fpaths[1]

        qutils.call_subprocess(shlex.split(cmd),
                               stdout=open(sam_fpath, 'w'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
        logger.info('  Done.')
        os.chdir(prev_dir)
        if not os.path.exists(sam_fpath) or os.path.getsize(sam_fpath) == 0:
            logger.error('  Failed running BWA for the reference. See ' +
                         log_path + ' for information.')
            logger.info('  Failed searching structural variations.')
            return None, None, None
    elif not correct_chr_names:
        logger.info('  Failed searching structural variations.')
        return None, None, None
    logger.info('  Sorting SAM-file...')
    if (is_non_empty_file(sam_sorted_fpath)
            and all_read_names_correct(sam_sorted_fpath)
        ) and is_non_empty_file(bam_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        correct_sam_fpath = os.path.join(output_dirpath, ref_name +
                                         '.sam.correct')  # write in output dir
        clean_read_names(sam_fpath, correct_sam_fpath)
        bam_fpath = os.path.join(output_dirpath, ref_name + '.bam')
        bam_sorted_fpath = add_suffix(bam_fpath, 'sorted')
        qutils.call_subprocess([
            sambamba_fpath('sambamba'), 'view', '-t',
            str(qconfig.max_threads), '-h', '-f', 'bam', '-F', 'not unmapped',
            '-S', correct_sam_fpath
        ],
                               stdout=open(bam_fpath, 'w'),
                               stderr=open(err_path, 'a'),
                               logger=logger)
        qutils.call_subprocess([
            sambamba_fpath('sambamba'), 'sort', '-t',
            str(qconfig.max_threads), '-o', bam_sorted_fpath, bam_fpath
        ],
                               stderr=open(err_path, 'a'),
                               logger=logger)
        qutils.call_subprocess([
            sambamba_fpath('sambamba'), 'view', '-t',
            str(qconfig.max_threads), '-h', bam_sorted_fpath
        ],
                               stdout=open(sam_sorted_fpath, 'w'),
                               stderr=open(err_path, 'a'),
                               logger=logger)

    if qconfig.create_icarus_html and (
            not is_non_empty_file(cov_fpath)
            or not is_non_empty_file(physical_cov_fpath)):
        cov_fpath, physical_cov_fpath = get_coverage(
            output_dirpath, main_ref_fpath, ref_name, bam_fpath,
            bam_sorted_fpath, log_path, err_path, cov_fpath,
            physical_cov_fpath, correct_chr_names)
    if not is_non_empty_file(bed_fpath) and not qconfig.no_sv:
        if meta_ref_fpaths:
            logger.info('  Splitting SAM-file by references...')
        headers = []
        seq_name_length = {}
        with open(sam_fpath) as sam_file:
            for line in sam_file:
                if not line.startswith('@'):
                    break
                if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                    seq_name = line.split('\tSN:')[1].split('\t')[0]
                    seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                    seq_name_length[seq_name] = seq_length
                headers.append(line.strip())
        need_ref_splitting = False
        if meta_ref_fpaths:
            ref_files = {}
            for cur_ref_fpath in meta_ref_fpaths:
                ref = qutils.name_from_fpath(cur_ref_fpath)
                new_ref_sam_fpath = os.path.join(output_dirpath, ref + '.sam')
                if is_non_empty_file(new_ref_sam_fpath):
                    logger.info(
                        '    Using existing split SAM-file for %s: %s' %
                        (ref, new_ref_sam_fpath))
                    ref_files[ref] = None
                else:
                    new_ref_sam_file = open(new_ref_sam_fpath, 'w')
                    if not headers[0].startswith('@SQ'):
                        new_ref_sam_file.write(headers[0] + '\n')
                    chrs = []
                    for h in (h for h in headers
                              if h.startswith('@SQ') and 'SN:' in h):
                        seq_name = h.split('\tSN:')[1].split('\t')[0]
                        if seq_name in ref_labels and ref_labels[
                                seq_name] == ref:
                            new_ref_sam_file.write(h + '\n')
                            chrs.append(seq_name)
                    new_ref_sam_file.write(headers[-1] + '\n')
                    ref_files[ref] = new_ref_sam_file
                    need_ref_splitting = True
        deletions = []
        trivial_deletions_fpath = os.path.join(output_dirpath,
                                               qconfig.trivial_deletions_fname)
        logger.info(
            '  Looking for trivial deletions (long zero-covered fragments)...')
        need_trivial_deletions = True
        if os.path.exists(trivial_deletions_fpath):
            need_trivial_deletions = False
            logger.info('    Using existing file: ' + trivial_deletions_fpath)

        if need_trivial_deletions or need_ref_splitting:
            with open(sam_sorted_fpath) as sam_file:
                cur_deletion = None
                for line in sam_file:
                    mapping = Mapping.parse(line)
                    if mapping:
                        if mapping.ref == '*':
                            continue
                        # common case: continue current deletion (potential) on the same reference
                        if cur_deletion and cur_deletion.ref == mapping.ref:
                            if cur_deletion.next_bad is None:  # previous mapping was in region BEFORE 0-covered fragment
                                # just passed 0-covered fragment
                                if mapping.start - cur_deletion.prev_bad > QuastDeletion.MIN_GAP:
                                    cur_deletion.set_next_bad(mapping)
                                    if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                        cur_deletion.set_next_good(mapping)
                                        if cur_deletion.is_valid():
                                            deletions.append(cur_deletion)
                                        cur_deletion = QuastDeletion(
                                            mapping.ref).set_prev_good(mapping)
                                # continue region BEFORE 0-covered fragment
                                elif mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                    cur_deletion.set_prev_good(mapping)
                                else:
                                    cur_deletion.set_prev_bad(mapping)
                            else:  # previous mapping was in region AFTER 0-covered fragment
                                # just passed another 0-cov fragment between end of cur_deletion BAD region and this mapping
                                if mapping.start - cur_deletion.next_bad_end > QuastDeletion.MIN_GAP:
                                    if cur_deletion.is_valid(
                                    ):  # add previous fragment's deletion if needed
                                        deletions.append(cur_deletion)
                                    cur_deletion = QuastDeletion(
                                        mapping.ref).set_prev_bad(
                                            position=cur_deletion.next_bad_end)
                                # continue region AFTER 0-covered fragment (old one or new/another one -- see "if" above)
                                if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                    cur_deletion.set_next_good(mapping)
                                    if cur_deletion.is_valid():
                                        deletions.append(cur_deletion)
                                    cur_deletion = QuastDeletion(
                                        mapping.ref).set_prev_good(mapping)
                                else:
                                    cur_deletion.set_next_bad_end(mapping)
                        # special case: just started or just switched to the next reference
                        else:
                            if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                                cur_deletion.set_next_good(
                                    position=seq_name_length[cur_deletion.ref])
                                if cur_deletion.is_valid():
                                    deletions.append(cur_deletion)
                            cur_deletion = QuastDeletion(
                                mapping.ref).set_prev_good(mapping)

                        if need_ref_splitting:
                            cur_ref = ref_labels[mapping.ref]
                            if mapping.ref_next.strip(
                            ) == '=' or cur_ref == ref_labels[
                                    mapping.ref_next]:
                                if ref_files[cur_ref] is not None:
                                    ref_files[cur_ref].write(line)
                if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                    cur_deletion.set_next_good(
                        position=seq_name_length[cur_deletion.ref])
                    if cur_deletion.is_valid():
                        deletions.append(cur_deletion)
            if need_ref_splitting:
                for ref_handler in ref_files.values():
                    if ref_handler is not None:
                        ref_handler.close()
            if need_trivial_deletions:
                logger.info('  Trivial deletions: %d found' % len(deletions))
                logger.info('    Saving to: ' + trivial_deletions_fpath)
                with open(trivial_deletions_fpath, 'w') as f:
                    for deletion in deletions:
                        f.write(str(deletion) + '\n')

        if get_manta_fpath() and isfile(get_manta_fpath()):
            try:
                manta_sv_fpath = search_sv_with_manta(main_ref_fpath,
                                                      meta_ref_fpaths,
                                                      output_dirpath, err_path)
                qutils.cat_files([manta_sv_fpath, trivial_deletions_fpath],
                                 bed_fpath)
            except:
                pass
        if os.path.exists(
                trivial_deletions_fpath) and not is_non_empty_file(bed_fpath):
            shutil.copy(trivial_deletions_fpath, bed_fpath)

    if not qconfig.no_sv:
        if is_non_empty_file(bed_fpath):
            logger.main_info('  Structural variations are in ' + bed_fpath)
        else:
            if isfile(bed_fpath):
                logger.main_info('  No structural variations were found.')
            else:
                logger.main_info('  Failed searching structural variations.')
            bed_fpath = None
    if is_non_empty_file(cov_fpath):
        logger.main_info(
            '  Coverage distribution along the reference genome is in ' +
            cov_fpath)
    else:
        if not qconfig.create_icarus_html:
            logger.main_info('  Failed to calculate coverage distribution')
        cov_fpath = None
    return bed_fpath, cov_fpath, physical_cov_fpath
Esempio n. 7
0
def run_processing_reads(main_ref_fpath, meta_ref_fpaths, ref_labels, reads_fpaths, output_dirpath, res_path, log_path,
                         err_path, sam_fpath=None, bam_fpath=None, bed_fpath=None):
    ref_name = qutils.name_from_fpath(main_ref_fpath)

    if not sam_fpath and bam_fpath:
        sam_fpath = get_safe_fpath(output_dirpath, bam_fpath[:-4] + '.sam')
    else:
        sam_fpath = sam_fpath or os.path.join(output_dirpath, ref_name + '.sam')
    bam_fpath = bam_fpath or get_safe_fpath(output_dirpath, sam_fpath[:-4] + '.bam')
    sam_sorted_fpath = get_safe_fpath(output_dirpath, add_suffix(sam_fpath, 'sorted'))
    bam_sorted_fpath = get_safe_fpath(output_dirpath, add_suffix(bam_fpath, 'sorted'))

    bed_fpath = bed_fpath or os.path.join(res_path, ref_name + '.bed')
    cov_fpath = os.path.join(res_path, ref_name + '.cov')
    physical_cov_fpath = os.path.join(res_path, ref_name + '.physical.cov')

    if qconfig.no_sv:
        logger.info('  Will not search Structural Variations (--fast or --no-sv is specified)')
        bed_fpath = None
    elif is_non_empty_file(bed_fpath):
        logger.info('  Using existing BED-file: ' + bed_fpath)
    if qconfig.create_icarus_html:
        if is_non_empty_file(cov_fpath):
            is_correct_file = check_cov_file(cov_fpath)
            if is_correct_file:
                logger.info('  Using existing reads coverage file: ' + cov_fpath)
        if is_non_empty_file(physical_cov_fpath):
            logger.info('  Using existing physical coverage file: ' + physical_cov_fpath)
    else:
        logger.info('  Will not calculate coverage (--no-icarus or --space-efficient is specified)')
        cov_fpath = None
        physical_cov_fpath = None
    if (is_non_empty_file(bed_fpath) or qconfig.no_sv) and \
            (qconfig.space_efficient or (is_non_empty_file(cov_fpath) and is_non_empty_file(physical_cov_fpath))):
        return bed_fpath, cov_fpath, physical_cov_fpath

    logger.info('  ' + 'Pre-processing reads...')
    logger.info('  ' + 'Logging to %s...' % err_path)
    correct_chr_names = None
    if is_non_empty_file(sam_fpath):
        logger.info('  Using existing SAM-file: ' + sam_fpath)
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, main_ref_fpath, sam_fpath, err_path, reads_fpaths)
    elif is_non_empty_file(bam_fpath):
        logger.info('  Using existing BAM-file: ' + bam_fpath)
        qutils.call_subprocess([sambamba_fpath('sambamba'), 'view', '-t', str(qconfig.max_threads), '-h', bam_fpath],
                               stdout=open(sam_fpath, 'w'), stderr=open(err_path, 'a'), logger=logger)
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, main_ref_fpath, sam_fpath, err_path, reads_fpaths)
    if not correct_chr_names and reads_fpaths:
        logger.info('  Running BWA...')
        # use absolute paths because we will change workdir
        sam_fpath = os.path.abspath(sam_fpath)
        abs_reads_fpaths = []
        for reads_fpath in reads_fpaths:
            abs_reads_fpaths.append(os.path.abspath(reads_fpath))

        if len(abs_reads_fpaths) != 2:
            logger.error('  You should specify files with forward and reverse reads.')
            logger.info('  Failed searching structural variations.')
            return None, None, None

        if not qconfig.no_check:
            if not paired_reads_names_are_equal(reads_fpaths, logger):
                logger.error('  Read names are discordant, skipping reads analysis!')
                logger.info('  Failed searching structural variations.')
                return None, None, None

        prev_dir = os.getcwd()
        os.chdir(output_dirpath)
        cmd = [bwa_fpath('bwa'), 'index', '-p', ref_name, main_ref_fpath]
        if os.path.getsize(main_ref_fpath) > 2 * 1024 ** 3:  # if reference size bigger than 2GB
            cmd += ['-a', 'bwtsw']
        qutils.call_subprocess(cmd, stdout=open(log_path, 'a'), stderr=open(err_path, 'a'), logger=logger)

        cmd = bwa_fpath('bwa') + ' mem -t ' + str(qconfig.max_threads) + ' ' + ref_name + ' ' + abs_reads_fpaths[0] + ' ' + abs_reads_fpaths[1]

        qutils.call_subprocess(shlex.split(cmd), stdout=open(sam_fpath, 'w'), stderr=open(err_path, 'a'), logger=logger)
        logger.info('  Done.')
        os.chdir(prev_dir)
        if not os.path.exists(sam_fpath) or os.path.getsize(sam_fpath) == 0:
            logger.error('  Failed running BWA for the reference. See ' + log_path + ' for information.')
            logger.info('  Failed searching structural variations.')
            return None, None, None
    elif not correct_chr_names:
        logger.info('  Failed searching structural variations.')
        return None, None, None
    logger.info('  Sorting SAM-file...')
    if (is_non_empty_file(sam_sorted_fpath) and all_read_names_correct(sam_sorted_fpath)) and is_non_empty_file(bam_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        correct_sam_fpath = os.path.join(output_dirpath, ref_name + '.sam.correct')  # write in output dir
        clean_read_names(sam_fpath, correct_sam_fpath)
        bam_fpath = os.path.join(output_dirpath, ref_name + '.bam')
        bam_sorted_fpath = add_suffix(bam_fpath, 'sorted')
        qutils.call_subprocess([sambamba_fpath('sambamba'), 'view', '-t', str(qconfig.max_threads), '-h', '-f', 'bam',
                                '-F', 'not unmapped',  '-S', correct_sam_fpath],
                                stdout=open(bam_fpath, 'w'), stderr=open(err_path, 'a'), logger=logger)
        qutils.call_subprocess([sambamba_fpath('sambamba'), 'sort', '-t', str(qconfig.max_threads), '-o', bam_sorted_fpath,
                                bam_fpath], stderr=open(err_path, 'a'), logger=logger)
        qutils.call_subprocess([sambamba_fpath('sambamba'), 'view', '-t', str(qconfig.max_threads), '-h', bam_sorted_fpath],
                                stdout=open(sam_sorted_fpath, 'w'), stderr=open(err_path, 'a'), logger=logger)

    if qconfig.create_icarus_html and (not is_non_empty_file(cov_fpath) or not is_non_empty_file(physical_cov_fpath)):
        cov_fpath, physical_cov_fpath = get_coverage(output_dirpath, main_ref_fpath, ref_name, bam_fpath, bam_sorted_fpath,
                                                     log_path, err_path, cov_fpath, physical_cov_fpath, correct_chr_names)
    if not is_non_empty_file(bed_fpath) and not qconfig.no_sv:
        if meta_ref_fpaths:
            logger.info('  Splitting SAM-file by references...')
        headers = []
        seq_name_length = {}
        with open(sam_fpath) as sam_file:
            for line in sam_file:
                if not line.startswith('@'):
                    break
                if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                    seq_name = line.split('\tSN:')[1].split('\t')[0]
                    seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                    seq_name_length[seq_name] = seq_length
                headers.append(line.strip())
        need_ref_splitting = False
        if meta_ref_fpaths:
            ref_files = {}
            for cur_ref_fpath in meta_ref_fpaths:
                ref = qutils.name_from_fpath(cur_ref_fpath)
                new_ref_sam_fpath = os.path.join(output_dirpath, ref + '.sam')
                if is_non_empty_file(new_ref_sam_fpath):
                    logger.info('    Using existing split SAM-file for %s: %s' % (ref, new_ref_sam_fpath))
                    ref_files[ref] = None
                else:
                    new_ref_sam_file = open(new_ref_sam_fpath, 'w')
                    if not headers[0].startswith('@SQ'):
                        new_ref_sam_file.write(headers[0] + '\n')
                    chrs = []
                    for h in (h for h in headers if h.startswith('@SQ') and 'SN:' in h):
                        seq_name = h.split('\tSN:')[1].split('\t')[0]
                        if seq_name in ref_labels and ref_labels[seq_name] == ref:
                            new_ref_sam_file.write(h + '\n')
                            chrs.append(seq_name)
                    new_ref_sam_file.write(headers[-1] + '\n')
                    ref_files[ref] = new_ref_sam_file
                    need_ref_splitting = True
        deletions = []
        trivial_deletions_fpath = os.path.join(output_dirpath, qconfig.trivial_deletions_fname)
        logger.info('  Looking for trivial deletions (long zero-covered fragments)...')
        need_trivial_deletions = True
        if os.path.exists(trivial_deletions_fpath):
            need_trivial_deletions = False
            logger.info('    Using existing file: ' + trivial_deletions_fpath)

        if need_trivial_deletions or need_ref_splitting:
            with open(sam_sorted_fpath) as sam_file:
                cur_deletion = None
                for line in sam_file:
                    mapping = Mapping.parse(line)
                    if mapping:
                        if mapping.ref == '*':
                            continue
                        # common case: continue current deletion (potential) on the same reference
                        if cur_deletion and cur_deletion.ref == mapping.ref:
                            if cur_deletion.next_bad is None:  # previous mapping was in region BEFORE 0-covered fragment
                                # just passed 0-covered fragment
                                if mapping.start - cur_deletion.prev_bad > QuastDeletion.MIN_GAP:
                                    cur_deletion.set_next_bad(mapping)
                                    if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                        cur_deletion.set_next_good(mapping)
                                        if cur_deletion.is_valid():
                                            deletions.append(cur_deletion)
                                        cur_deletion = QuastDeletion(mapping.ref).set_prev_good(mapping)
                                # continue region BEFORE 0-covered fragment
                                elif mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                    cur_deletion.set_prev_good(mapping)
                                else:
                                    cur_deletion.set_prev_bad(mapping)
                            else:  # previous mapping was in region AFTER 0-covered fragment
                                # just passed another 0-cov fragment between end of cur_deletion BAD region and this mapping
                                if mapping.start - cur_deletion.next_bad_end > QuastDeletion.MIN_GAP:
                                    if cur_deletion.is_valid():   # add previous fragment's deletion if needed
                                        deletions.append(cur_deletion)
                                    cur_deletion = QuastDeletion(mapping.ref).set_prev_bad(position=cur_deletion.next_bad_end)
                                # continue region AFTER 0-covered fragment (old one or new/another one -- see "if" above)
                                if mapping.mapq >= Mapping.MIN_MAP_QUALITY:
                                    cur_deletion.set_next_good(mapping)
                                    if cur_deletion.is_valid():
                                        deletions.append(cur_deletion)
                                    cur_deletion = QuastDeletion(mapping.ref).set_prev_good(mapping)
                                else:
                                    cur_deletion.set_next_bad_end(mapping)
                        # special case: just started or just switched to the next reference
                        else:
                            if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                                cur_deletion.set_next_good(position=seq_name_length[cur_deletion.ref])
                                if cur_deletion.is_valid():
                                    deletions.append(cur_deletion)
                            cur_deletion = QuastDeletion(mapping.ref).set_prev_good(mapping)

                        if need_ref_splitting:
                            cur_ref = ref_labels[mapping.ref]
                            if mapping.ref_next.strip() == '=' or cur_ref == ref_labels[mapping.ref_next]:
                                if ref_files[cur_ref] is not None:
                                    ref_files[cur_ref].write(line)
                if cur_deletion and cur_deletion.ref in seq_name_length:  # switched to the next ref
                    cur_deletion.set_next_good(position=seq_name_length[cur_deletion.ref])
                    if cur_deletion.is_valid():
                        deletions.append(cur_deletion)
            if need_ref_splitting:
                for ref_handler in ref_files.values():
                    if ref_handler is not None:
                        ref_handler.close()
            if need_trivial_deletions:
                logger.info('  Trivial deletions: %d found' % len(deletions))
                logger.info('    Saving to: ' + trivial_deletions_fpath)
                with open(trivial_deletions_fpath, 'w') as f:
                    for deletion in deletions:
                        f.write(str(deletion) + '\n')

        if isfile(config_manta_fpath):
            try:
                manta_sv_fpath = search_sv_with_manta(main_ref_fpath, meta_ref_fpaths, output_dirpath, err_path)
                qutils.cat_files([manta_sv_fpath, trivial_deletions_fpath], bed_fpath)
            except:
                pass
        if os.path.exists(trivial_deletions_fpath) and not is_non_empty_file(bed_fpath):
            shutil.copy(trivial_deletions_fpath, bed_fpath)

    if not qconfig.no_sv:
        if is_non_empty_file(bed_fpath):
            logger.main_info('  Structural variations are in ' + bed_fpath)
        else:
            if isfile(bed_fpath):
                logger.main_info('  No structural variations were found.')
            else:
                logger.main_info('  Failed searching structural variations.')
            bed_fpath = None
    if is_non_empty_file(cov_fpath):
        logger.main_info('  Coverage distribution along the reference genome is in ' + cov_fpath)
    else:
        if not qconfig.create_icarus_html:
            logger.main_info('  Failed to calculate coverage distribution')
        cov_fpath = None
    return bed_fpath, cov_fpath, physical_cov_fpath
Esempio n. 8
0
def run_processing_reads(contigs_fpaths, main_ref_fpath, meta_ref_fpaths, ref_labels, temp_output_dir, output_dir,
                         log_path, err_fpath):
    required_files = []
    bed_fpath, cov_fpath, physical_cov_fpath = None, None, None
    if main_ref_fpath:
        ref_name = qutils.name_from_fpath(main_ref_fpath)

        bed_fpath = qconfig.bed or join(output_dir, ref_name + '.bed')
        cov_fpath = qconfig.cov_fpath or join(output_dir, ref_name + '.cov')
        physical_cov_fpath = qconfig.phys_cov_fpath or join(output_dir, ref_name + '.physical.cov')
        required_files = [bed_fpath, cov_fpath, physical_cov_fpath]

        if qconfig.no_sv:
            logger.info('  Will not search Structural Variations (--fast or --no-sv is specified)')
            bed_fpath = None
        elif is_non_empty_file(bed_fpath):
            logger.info('  Using existing BED-file: ' + bed_fpath)
        elif not qconfig.forward_reads and not qconfig.interlaced_reads:
            if not qconfig.reference_sam and not qconfig.reference_bam:
                logger.info('  Will not search Structural Variations (needs paired-end reads)')
                bed_fpath = None
                qconfig.no_sv = True
        if qconfig.create_icarus_html:
            if is_non_empty_file(cov_fpath):
                is_correct_file = check_cov_file(cov_fpath)
                if is_correct_file:
                    logger.info('  Using existing reads coverage file: ' + cov_fpath)
            if is_non_empty_file(physical_cov_fpath):
                logger.info('  Using existing physical coverage file: ' + physical_cov_fpath)
        else:
            logger.info('  Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)')
            cov_fpath = None
            physical_cov_fpath = None
        if (is_non_empty_file(bed_fpath) or qconfig.no_sv) and \
                (not qconfig.create_icarus_html or (is_non_empty_file(cov_fpath) and is_non_empty_file(physical_cov_fpath))):
            required_files = []

    n_jobs = min(qconfig.max_threads, len(contigs_fpaths) + 1)
    max_threads_per_job = max(1, qconfig.max_threads // n_jobs)
    sam_fpaths = qconfig.sam_fpaths or [None] * len(contigs_fpaths)
    bam_fpaths = qconfig.bam_fpaths or [None] * len(contigs_fpaths)
    parallel_align_args = [(contigs_fpath, output_dir, temp_output_dir, log_path, err_fpath, max_threads_per_job,
                            sam_fpaths[index], bam_fpaths[index], index) for index, contigs_fpath in enumerate(contigs_fpaths)]
    if main_ref_fpath:
        parallel_align_args.append((main_ref_fpath, output_dir, temp_output_dir, log_path, err_fpath,
                                    max_threads_per_job, qconfig.reference_sam, qconfig.reference_bam, None, required_files, True))
    correct_chr_names, sam_fpaths, bam_fpaths = run_parallel(align_single_file, parallel_align_args, n_jobs)
    qconfig.sam_fpaths = sam_fpaths[:len(contigs_fpaths)]
    qconfig.bam_fpaths = bam_fpaths[:len(contigs_fpaths)]
    add_statistics_to_report(output_dir, contigs_fpaths, main_ref_fpath)
    save_reads(output_dir)
    if not main_ref_fpath:
        return None, None, None

    correct_chr_names = correct_chr_names[-1]
    sam_fpath, bam_fpath = sam_fpaths[-1], bam_fpaths[-1]
    qconfig.reference_sam = sam_fpath
    qconfig.reference_bam = bam_fpath
    if not required_files:
        return bed_fpath, cov_fpath, physical_cov_fpath
    if not all([sam_fpath, bam_fpath]):
        logger.info('  Failed searching structural variations.')
        return None, None, None

    sam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(sam_fpath, 'sorted'))
    bam_mapped_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_fpath, 'mapped'))
    bam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_mapped_fpath, 'sorted'))

    if is_non_empty_file(sam_sorted_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        if not is_non_empty_file(bam_sorted_fpath):
            sambamba_view(bam_fpath, bam_mapped_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped')
            sort_bam(bam_mapped_fpath, bam_sorted_fpath, err_fpath, logger)
        sambamba_view(bam_sorted_fpath, sam_sorted_fpath, qconfig.max_threads, err_fpath, logger)
    if qconfig.create_icarus_html and (not is_non_empty_file(cov_fpath) or not is_non_empty_file(physical_cov_fpath)):
        cov_fpath, physical_cov_fpath = get_coverage(temp_output_dir, main_ref_fpath, ref_name, bam_fpath, bam_sorted_fpath,
                                                     log_path, err_fpath, correct_chr_names, cov_fpath, physical_cov_fpath)
    if not is_non_empty_file(bed_fpath) and not qconfig.no_sv:
        if meta_ref_fpaths:
            logger.info('  Splitting SAM-file by references...')
        headers = []
        seq_lengths = {}
        with open(sam_fpath) as sam_file:
            for line in sam_file:
                if not line.startswith('@'):
                    break
                if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                    seq_name = line.split('\tSN:')[1].split('\t')[0]
                    seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                    seq_lengths[seq_name] = seq_length
                headers.append(line.strip())
        need_ref_splitting = False
        ref_files = {}
        if meta_ref_fpaths:
            global ref_sam_fpaths
            for cur_ref_fpath in meta_ref_fpaths:
                cur_ref_name = qutils.name_from_fpath(cur_ref_fpath)
                ref_sam_fpath = join(temp_output_dir, cur_ref_name + '.sam')
                ref_sam_fpaths[cur_ref_fpath] = ref_sam_fpath
                if is_non_empty_file(ref_sam_fpath):
                    logger.info('    Using existing split SAM-file for %s: %s' % (cur_ref_name, ref_sam_fpath))
                    ref_files[cur_ref_name] = None
                else:
                    ref_sam_file = open(ref_sam_fpath, 'w')
                    if not headers[0].startswith('@SQ'):
                        ref_sam_file.write(headers[0] + '\n')
                    for h in (h for h in headers if h.startswith('@SQ') and 'SN:' in h):
                        seq_name = h.split('\tSN:')[1].split('\t')[0]
                        if seq_name in ref_labels and ref_labels[seq_name] == cur_ref_name:
                            ref_sam_file.write(h + '\n')
                    ref_sam_file.write(headers[-1] + '\n')
                    ref_files[cur_ref_name] = ref_sam_file
                    need_ref_splitting = True

        trivial_deletions_fpath = \
            search_trivial_deletions(temp_output_dir, sam_sorted_fpath, ref_files, ref_labels, seq_lengths, need_ref_splitting)
        if get_gridss_fpath() and isfile(get_gridss_fpath()):
            try:
                gridss_sv_fpath = search_sv_with_gridss(main_ref_fpath, bam_mapped_fpath, meta_ref_fpaths, temp_output_dir, err_fpath)
                qutils.cat_files([gridss_sv_fpath, trivial_deletions_fpath], bed_fpath)
            except:
                pass
        if isfile(trivial_deletions_fpath) and not is_non_empty_file(bed_fpath):
            shutil.copy(trivial_deletions_fpath, bed_fpath)

    if not qconfig.no_sv:
        if is_non_empty_file(bed_fpath):
            logger.main_info('  Structural variations are in ' + bed_fpath)
        else:
            if isfile(bed_fpath):
                logger.main_info('  No structural variations were found.')
            else:
                logger.main_info('  Failed searching structural variations.')
            bed_fpath = None
    if is_non_empty_file(cov_fpath):
        logger.main_info('  Coverage distribution along the reference genome is in ' + cov_fpath)
    else:
        if not qconfig.create_icarus_html:
            logger.main_info('  Failed to calculate coverage distribution')
        cov_fpath = None
    return bed_fpath, cov_fpath, physical_cov_fpath