Beispiel #1
0
def align_reference(ref_fpath, output_dir, using_reads='all', calculate_coverage=False):
    required_files = []
    ref_name = qutils.name_from_fpath(ref_fpath)
    cov_fpath = qconfig.cov_fpath or join(output_dir, ref_name + '.cov')
    uncovered_fpath = add_suffix(cov_fpath, 'uncovered')
    if using_reads != 'all':
        cov_fpath = add_suffix(cov_fpath, using_reads)
        uncovered_fpath = add_suffix(uncovered_fpath, using_reads)
    insert_size_fpath = join(output_dir, ref_name + '.is.txt')
    if not is_non_empty_file(uncovered_fpath):
        required_files.append(uncovered_fpath)
    if not is_non_empty_file(insert_size_fpath) and (using_reads == 'all' or using_reads == 'pe'):
        required_files.append(insert_size_fpath)

    temp_output_dir = join(output_dir, 'temp_output')
    if not isdir(temp_output_dir):
        os.makedirs(temp_output_dir)

    log_path = join(output_dir, 'reads_stats.log')
    err_fpath = join(output_dir, 'reads_stats.err')
    correct_chr_names, sam_fpath, bam_fpath = align_single_file(ref_fpath, output_dir, temp_output_dir, log_path, err_fpath,
                                                                qconfig.max_threads, sam_fpath=qconfig.reference_sam,
                                                                bam_fpath=qconfig.reference_bam, required_files=required_files,
                                                                is_reference=True, alignment_only=True, using_reads=using_reads)
    if not qconfig.optimal_assembly_insert_size or qconfig.optimal_assembly_insert_size == 'auto':
        if using_reads == 'pe' and sam_fpath:
            insert_size, std_dev = calculate_insert_size(sam_fpath, output_dir, ref_name)
            if not insert_size:
                logger.info('  Failed calculating insert size.')
            else:
                qconfig.optimal_assembly_insert_size = insert_size
        elif using_reads == 'all' and is_non_empty_file(insert_size_fpath):
            try:
                insert_size = int(open(insert_size_fpath).readline())
                if insert_size:
                    qconfig.optimal_assembly_insert_size = insert_size
            except:
                pass

    if not required_files:
        return sam_fpath, bam_fpath, uncovered_fpath
    if not sam_fpath:
        logger.info('  Failed detecting uncovered regions.')
        return None, None

    if calculate_coverage:
        bam_mapped_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_fpath, 'mapped'))
        bam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_mapped_fpath, 'sorted'))

        if is_non_empty_file(bam_sorted_fpath):
            logger.info('  Using existing sorted BAM-file: ' + bam_sorted_fpath)
        else:
            sambamba_view(bam_fpath, bam_mapped_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped')
            sort_bam(bam_mapped_fpath, bam_sorted_fpath, err_fpath, logger)
        if not is_non_empty_file(uncovered_fpath) and calculate_coverage:
            get_coverage(temp_output_dir, ref_fpath, ref_name, bam_fpath, bam_sorted_fpath, log_path, err_fpath,
                         correct_chr_names, cov_fpath, uncovered_fpath=uncovered_fpath, create_cov_files=False)
    return sam_fpath, bam_fpath, uncovered_fpath
Beispiel #2
0
def align_reference(ref_fpath, output_dir, using_reads='all'):
    required_files = []
    ref_name = qutils.name_from_fpath(ref_fpath)
    cov_fpath = qconfig.cov_fpath or join(output_dir, ref_name + '.cov')
    uncovered_fpath = add_suffix(cov_fpath, 'uncovered')
    if using_reads != 'all':
        cov_fpath = add_suffix(cov_fpath, using_reads)
        uncovered_fpath = add_suffix(uncovered_fpath, using_reads)
    insert_size_fpath = join(output_dir, ref_name + '.is.txt')
    if not is_non_empty_file(uncovered_fpath):
        required_files.append(uncovered_fpath)
    if not is_non_empty_file(insert_size_fpath) and (using_reads == 'all' or using_reads == 'paired_end'):
        required_files.append(insert_size_fpath)

    temp_output_dir = join(output_dir, 'temp_output')
    if not isdir(temp_output_dir):
        os.makedirs(temp_output_dir)

    log_path = join(output_dir, 'reads_stats.log')
    err_fpath = join(output_dir, 'reads_stats.err')
    correct_chr_names, sam_fpath, bam_fpath = align_single_file(ref_fpath, output_dir, temp_output_dir, log_path, err_fpath,
                                                                qconfig.max_threads, sam_fpath=qconfig.reference_sam,
                                                                bam_fpath=qconfig.reference_bam, required_files=required_files,
                                                                is_reference=True, alignment_only=True, using_reads=using_reads)
    qconfig.reference_sam = sam_fpath
    qconfig.reference_bam = bam_fpath

    if not qconfig.ideal_assembly_insert_size or qconfig.ideal_assembly_insert_size == 'auto':
        if using_reads == 'paired_end' and sam_fpath:
            insert_size = calculate_insert_size(sam_fpath, output_dir, ref_name)
            if not insert_size:
                logger.info('  Failed calculating insert size.')
            else:
                qconfig.ideal_assembly_insert_size = insert_size

    if not required_files:
        return bam_fpath, uncovered_fpath
    if not sam_fpath:
        logger.info('  Failed detecting uncovered regions.')
        return None, None

    bam_mapped_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_fpath, 'mapped'))
    bam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_mapped_fpath, 'sorted'))

    if is_non_empty_file(bam_sorted_fpath):
        logger.info('  Using existing sorted BAM-file: ' + bam_sorted_fpath)
    else:
        sambamba_view(bam_fpath, bam_mapped_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped')
        sort_bam(bam_mapped_fpath, bam_sorted_fpath, err_fpath, logger)
    if not is_non_empty_file(uncovered_fpath):
        get_coverage(temp_output_dir, ref_fpath, ref_name, bam_fpath, bam_sorted_fpath, log_path, err_fpath,
                     correct_chr_names, cov_fpath, uncovered_fpath=uncovered_fpath, create_cov_files=False)
    return bam_fpath, uncovered_fpath
Beispiel #3
0
def merge_sam_files(tmp_sam_fpaths, sam_fpath, bam_fpath, max_threads, err_fpath):
    tmp_bam_fpaths = []
    for tmp_sam_fpath in tmp_sam_fpaths:
        if is_non_empty_file(tmp_sam_fpath):
            tmp_bam_fpath = tmp_sam_fpath.replace('.sam', '.bam')
            tmp_bam_sorted_fpath = add_suffix(tmp_bam_fpath, 'sorted')
            if not is_non_empty_file(tmp_bam_sorted_fpath):
                sort_bam(tmp_bam_fpath, tmp_bam_sorted_fpath, err_fpath, logger)
            tmp_bam_fpaths.append(tmp_bam_sorted_fpath)
    qutils.call_subprocess([sambamba_fpath('sambamba'), 'merge', '-t', str(max_threads), bam_fpath] + tmp_bam_fpaths,
                           stderr=open(err_fpath, 'a'), logger=logger)
    sambamba_view(bam_fpath, sam_fpath, max_threads, err_fpath, logger)
    return sam_fpath
Beispiel #4
0
def get_physical_coverage(output_dirpath, ref_name, bam_fpath, log_path, err_fpath, cov_fpath, chr_len_fpath):
    if not isfile(bedtools_fpath('bamToBed')):
        logger.info('  Failed calculating physical coverage...')
        return None
    raw_cov_fpath = add_suffix(cov_fpath, 'raw')
    if not is_non_empty_file(raw_cov_fpath):
        logger.info('  Calculating physical coverage...')
        ## keep properly mapped, unique, and non-duplicate read pairs only
        bam_filtered_fpath = join(output_dirpath, ref_name + '.filtered.bam')
        sambamba_view(bam_fpath, bam_filtered_fpath, qconfig.max_threads, err_fpath, logger,
                      filter_rule='proper_pair and not supplementary and not duplicate')
        ## sort by read names
        bam_filtered_sorted_fpath = join(output_dirpath, ref_name + '.filtered.sorted.bam')
        sort_bam(bam_filtered_fpath, bam_filtered_sorted_fpath, err_fpath, logger, sort_rule='-n')
        bed_fpath = bam_to_bed(output_dirpath, ref_name, bam_filtered_sorted_fpath, err_fpath, logger, bedpe=True)
        calculate_genome_cov(bed_fpath, raw_cov_fpath, chr_len_fpath, err_fpath, logger)
    return raw_cov_fpath
Beispiel #5
0
def get_physical_coverage(output_dirpath, ref_name, bam_fpath, log_path, err_fpath, cov_fpath, chr_len_fpath):
    if not isfile(bedtools_fpath('bamToBed')):
        logger.info('  Failed calculating physical coverage...')
        return None
    raw_cov_fpath = add_suffix(cov_fpath, 'raw')
    if not is_non_empty_file(raw_cov_fpath):
        logger.info('  Calculating physical coverage...')
        ## keep properly mapped, unique, and non-duplicate read pairs only
        bam_filtered_fpath = join(output_dirpath, ref_name + '.filtered.bam')
        sambamba_view(bam_fpath, bam_filtered_fpath, qconfig.max_threads, err_fpath, logger,
                      filter_rule='proper_pair and not supplementary and not duplicate')
        ## sort by read names
        bam_filtered_sorted_fpath = join(output_dirpath, ref_name + '.filtered.sorted.bam')
        sort_bam(bam_filtered_fpath, bam_filtered_sorted_fpath, err_fpath, logger, sort_rule='-n')
        bed_fpath = bam_to_bed(output_dirpath, ref_name, bam_filtered_sorted_fpath, err_fpath, logger, bedpe=True)
        calculate_genome_cov(bed_fpath, raw_cov_fpath, chr_len_fpath, err_fpath, logger)
    return raw_cov_fpath
Beispiel #6
0
def process_one_ref(cur_ref_fpath, output_dirpath, err_fpath, max_threads, bam_fpath=None, bed_fpath=None):
    ref_name = qutils.name_from_fpath(cur_ref_fpath)
    if not bam_fpath:
        sam_fpath = join(output_dirpath, ref_name + '.sam')
        bam_fpath = join(output_dirpath, ref_name + '.bam')
        bam_sorted_fpath = join(output_dirpath, ref_name + '.sorted.bam')
    else:
        sam_fpath = bam_fpath.replace('.bam', '.sam')
        bam_sorted_fpath = add_suffix(bam_fpath, 'sorted')
    bed_fpath = bed_fpath or join(output_dirpath, ref_name + '.bed')
    if is_non_empty_file(bed_fpath):
        logger.info('  Using existing BED-file: ' + bed_fpath)
        return bed_fpath

    if not isfile(bam_sorted_fpath):
        sambamba_view(sam_fpath, bam_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped and proper_pair')
        sort_bam(bam_fpath, bam_sorted_fpath, err_fpath, logger, threads=max_threads)
    if not is_non_empty_file(bam_sorted_fpath + '.bai'):
        qutils.call_subprocess([sambamba_fpath('sambamba'), 'index', bam_sorted_fpath],
                               stderr=open(err_fpath, 'a'), logger=logger)
    create_fai_file(cur_ref_fpath)
    vcf_output_dirpath = join(output_dirpath, ref_name + '_gridss')
    vcf_fpath = join(vcf_output_dirpath, ref_name + '.vcf')
    if not is_non_empty_file(vcf_fpath):
        if isdir(vcf_output_dirpath):
            shutil.rmtree(vcf_output_dirpath, ignore_errors=True)
        os.makedirs(vcf_output_dirpath)
        max_mem = get_gridss_memory()
        env = os.environ.copy()
        env["PATH"] += os.pathsep + bwa_dirpath
        bwa_index(cur_ref_fpath, err_fpath, logger)
        qutils.call_subprocess(['java', '-ea', '-Xmx' + str(max_mem) + 'g', '-Dsamjdk.create_index=true', '-Dsamjdk.use_async_io_read_samtools=true',
                                '-Dsamjdk.use_async_io_write_samtools=true', '-Dsamjdk.use_async_io_write_tribble=true',
                                '-cp', get_gridss_fpath(), 'gridss.CallVariants', 'I=' + bam_sorted_fpath, 'O=' + vcf_fpath,
                                'ASSEMBLY=' + join(vcf_output_dirpath, ref_name + '.gridss.bam'), 'REFERENCE_SEQUENCE=' + cur_ref_fpath,
                                'WORKER_THREADS=' + str(max_threads), 'WORKING_DIR=' + vcf_output_dirpath],
                                stderr=open(err_fpath, 'a'), logger=logger, env=env)
    if is_non_empty_file(vcf_fpath):
        raw_bed_fpath = add_suffix(bed_fpath, 'raw')
        filtered_bed_fpath = add_suffix(bed_fpath, 'filtered')
        qutils.call_subprocess(['java', '-cp', get_gridss_fpath(), 'au.edu.wehi.idsv.VcfBreakendToBedpe',
                                'I=' + vcf_fpath, 'O=' + raw_bed_fpath, 'OF=' + filtered_bed_fpath, 'R=' + cur_ref_fpath,
                                'INCLUDE_HEADER=TRUE'], stderr=open(err_fpath, 'a'), logger=logger)
        reformat_bedpe(raw_bed_fpath, bed_fpath)
    return bed_fpath
Beispiel #7
0
def process_one_ref(cur_ref_fpath, output_dirpath, err_fpath, max_threads, bam_fpath=None, bed_fpath=None):
    ref_name = qutils.name_from_fpath(cur_ref_fpath)
    if not bam_fpath:
        sam_fpath = join(output_dirpath, ref_name + '.sam')
        bam_fpath = join(output_dirpath, ref_name + '.bam')
        bam_sorted_fpath = join(output_dirpath, ref_name + '.sorted.bam')
    else:
        sam_fpath = bam_fpath.replace('.bam', '.sam')
        bam_sorted_fpath = add_suffix(bam_fpath, 'sorted')
    bed_fpath = bed_fpath or join(output_dirpath, ref_name + '.bed')
    if is_non_empty_file(bed_fpath):
        logger.info('  Using existing BED-file: ' + bed_fpath)
        return bed_fpath

    if not isfile(bam_sorted_fpath):
        sambamba_view(sam_fpath, bam_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped')
        sort_bam(bam_fpath, bam_sorted_fpath, err_fpath, logger, threads=max_threads)
    if not is_non_empty_file(bam_sorted_fpath + '.bai'):
        qutils.call_subprocess([sambamba_fpath('sambamba'), 'index', bam_sorted_fpath],
                               stderr=open(err_fpath, 'a'), logger=logger)
    create_fai_file(cur_ref_fpath)
    vcf_output_dirpath = join(output_dirpath, ref_name + '_gridss')
    vcf_fpath = join(vcf_output_dirpath, ref_name + '.vcf')
    if not is_non_empty_file(vcf_fpath):
        if isdir(vcf_output_dirpath):
            shutil.rmtree(vcf_output_dirpath, ignore_errors=True)
        os.makedirs(vcf_output_dirpath)
        max_mem = get_gridss_memory()
        env = os.environ.copy()
        env["PATH"] += os.pathsep + bwa_dirpath
        bwa_index(cur_ref_fpath, err_fpath, logger)
        qutils.call_subprocess(['java', '-ea', '-Xmx' + str(max_mem) + 'g', '-Dsamjdk.create_index=true', '-Dsamjdk.use_async_io_read_samtools=true',
                                '-Dsamjdk.use_async_io_write_samtools=true', '-Dsamjdk.use_async_io_write_tribble=true',
                                '-cp', get_gridss_fpath(), 'gridss.CallVariants', 'I=' + bam_sorted_fpath, 'O=' + vcf_fpath,
                                'ASSEMBLY=' + join(vcf_output_dirpath, ref_name + '.gridss.bam'), 'REFERENCE_SEQUENCE=' + cur_ref_fpath,
                                'WORKER_THREADS=' + str(max_threads), 'WORKING_DIR=' + vcf_output_dirpath],
                                stderr=open(err_fpath, 'a'), logger=logger, env=env)
    if is_non_empty_file(vcf_fpath):
        raw_bed_fpath = add_suffix(bed_fpath, 'raw')
        filtered_bed_fpath = add_suffix(bed_fpath, 'filtered')
        qutils.call_subprocess(['java', '-cp', get_gridss_fpath(), 'au.edu.wehi.idsv.VcfBreakendToBedpe',
                                'I=' + vcf_fpath, 'O=' + raw_bed_fpath, 'OF=' + filtered_bed_fpath, 'R=' + cur_ref_fpath,
                                'INCLUDE_HEADER=TRUE'], stderr=open(err_fpath, 'a'), logger=logger)
        reformat_bedpe(raw_bed_fpath, bed_fpath)
    return bed_fpath
Beispiel #8
0
def merge_sam_files(tmp_sam_fpaths, sam_fpath, bam_fpath, output_dir, max_threads, err_fpath):
    merged_bam_fpath = add_suffix(bam_fpath, 'merged')
    tmp_bam_fpaths = []
    for tmp_sam_fpath in tmp_sam_fpaths:
        if is_non_empty_file(tmp_sam_fpath):
            tmp_bam_fpath = tmp_sam_fpath.replace('.sam', '.bam')
            tmp_bam_sorted_fpath = add_suffix(tmp_bam_fpath, 'sorted')
            if not is_non_empty_file(tmp_bam_sorted_fpath):
                sambamba_view(tmp_sam_fpath, tmp_bam_fpath, max_threads, err_fpath, logger, filter_rule=None)
                sort_bam(tmp_bam_fpath, tmp_bam_sorted_fpath, err_fpath, logger)
            tmp_bam_fpaths.append(tmp_bam_sorted_fpath)
    qutils.call_subprocess([sambamba_fpath('sambamba'), 'merge', '-t', str(max_threads), merged_bam_fpath] + tmp_bam_fpaths,
                           stderr=open(err_fpath, 'a'), logger=logger)
    qutils.call_subprocess([sambamba_fpath('sambamba'), 'markdup', '-r', '-t', str(max_threads), '--tmpdir',
                            output_dir, merged_bam_fpath, bam_fpath],
                           stderr=open(err_fpath, 'a'), logger=logger)
    sambamba_view(bam_fpath, sam_fpath, max_threads, err_fpath, logger)
    return merged_bam_fpath
Beispiel #9
0
def run_aligner(read_fpaths, ref_fpath, sam_fpath, out_sam_fpaths, output_dir, err_fpath, max_threads, reads_type):
    bwa_cmd = bwa_fpath('bwa') + ' mem -t ' + str(max_threads)
    insert_sizes = []
    for idx, reads in enumerate(read_fpaths):
        if isinstance(reads, str):
            if reads_type == 'pacbio' or reads_type == 'nanopore':
                if reads_type == 'pacbio':
                    preset = ' -ax map-pb '
                else:
                    preset = ' -ax map-ont '
                cmdline = minimap_fpath() + ' -t ' + str(max_threads) + preset + ref_fpath + ' ' + reads
            else:
                cmdline = bwa_cmd + (' -p ' if reads_type == 'pe' else ' ') + ref_fpath + ' ' + reads
        else:
            read1, read2 = reads
            cmdline = bwa_cmd + ' ' + ref_fpath + ' ' + read1 + ' ' + read2
        output_fpath = add_suffix(sam_fpath, reads_type + str(idx + 1))
        bam_fpath = output_fpath.replace('.sam', '.bam')
        if not is_non_empty_file(output_fpath):
            qutils.call_subprocess(shlex.split(cmdline), stdout=open(output_fpath, 'w'), stderr=open(err_fpath, 'a'), logger=logger)
        if not is_non_empty_file(bam_fpath):
            if not is_non_empty_file(bam_fpath):
                sambamba_view(output_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None)
            if reads_type == 'pe':
                bam_dedup_fpath = add_suffix(bam_fpath, 'dedup')
                qutils.call_subprocess([sambamba_fpath('sambamba'), 'markdup', '-r', '-t', str(max_threads), '--tmpdir',
                                        output_dir, bam_fpath, bam_dedup_fpath],
                                        stderr=open(err_fpath, 'a'), logger=logger)
                if exists(bam_dedup_fpath):
                    shutil.move(bam_dedup_fpath, bam_fpath)
        if reads_type == 'pe':
            insert_size, std_dev = calculate_insert_size(output_fpath, output_dir, basename(sam_fpath))
            if insert_size < qconfig.optimal_assembly_max_IS:
                insert_sizes.append(insert_size)
        out_sam_fpaths.append(output_fpath)

    if insert_sizes:
        qconfig.optimal_assembly_insert_size = max(insert_sizes)
        ref_name = qutils.name_from_fpath(ref_fpath)
        insert_size_fpath = join(output_dir, '..', ref_name + '.is.txt')
        with open(insert_size_fpath, 'w') as out:
            out.write(str(qconfig.optimal_assembly_insert_size))
Beispiel #10
0
def get_joiners(ref_name, sam_fpath, bam_fpath, output_dirpath, err_fpath,
                using_reads):
    bam_filtered_fpath = add_suffix(bam_fpath, 'filtered')
    if not is_non_empty_file(bam_filtered_fpath):
        filter_rule = 'not unmapped and not supplementary and not secondary_alignment'
        sambamba_view(bam_fpath,
                      bam_filtered_fpath,
                      qconfig.max_threads,
                      err_fpath,
                      logger,
                      filter_rule=filter_rule)
    bam_sorted_fpath = add_suffix(bam_fpath, 'sorted')
    if not is_non_empty_file(bam_sorted_fpath):
        sort_bam(bam_filtered_fpath,
                 bam_sorted_fpath,
                 err_fpath,
                 logger,
                 sort_rule='-n')
    bed_fpath = bam_to_bed(output_dirpath,
                           using_reads,
                           bam_sorted_fpath,
                           err_fpath,
                           logger,
                           bedpe=using_reads == 'mp')
    intervals = defaultdict(list)
    if using_reads == 'mp':
        insert_size, std_dev = calculate_insert_size(sam_fpath,
                                                     output_dirpath,
                                                     ref_name,
                                                     reads_suffix='mp')
        min_is = insert_size - std_dev
        max_is = insert_size + std_dev
    with open(bed_fpath) as bed:
        for l in bed:
            fs = l.split()
            if using_reads == 'mp' and insert_size:
                interval_len = int(fs[2]) - int(fs[1])
                if min_is <= abs(interval_len) <= max_is:
                    intervals[fs[0]].append((int(fs[1]), int(fs[2])))
            else:
                intervals[fs[0]].append((int(fs[1]), int(fs[2])))
    return intervals
Beispiel #11
0
def align_single_file(fpath, main_output_dir, output_dirpath, log_path, err_fpath, max_threads, sam_fpath=None, bam_fpath=None,
                      index=None, required_files=None, is_reference=False, alignment_only=False, using_reads='all'):
    filename = qutils.name_from_fpath(fpath)
    if not sam_fpath and bam_fpath:
        sam_fpath = get_safe_fpath(output_dirpath, bam_fpath[:-4] + '.sam')
    else:
        sam_fpath = sam_fpath or join(output_dirpath, filename + '.sam')
    bam_fpath = bam_fpath or get_safe_fpath(output_dirpath, sam_fpath[:-4] + '.bam')
    if using_reads != 'all':
        sam_fpath = join(output_dirpath, filename + '.' + using_reads + '.sam')
        bam_fpath = sam_fpath.replace('.sam', '.bam')
    if alignment_only or (is_reference and required_files and any(f.endswith('bed') for f in required_files)):
        required_files.append(sam_fpath)

    stats_fpath = get_safe_fpath(dirname(output_dirpath), filename + '.stat')
    index_str = qutils.index_to_str(index) if index is not None else ''

    reads_fpaths = qconfig.reads_fpaths
    correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)
    can_reuse = correct_chr_names is not None
    if not can_reuse and not reads_fpaths:
        return None, None, None
    if correct_chr_names and (not required_files or all(isfile(fpath) for fpath in required_files)):
        if not alignment_only:
            if isfile(stats_fpath):
                logger.info('  ' + index_str + 'Using existing flag statistics file ' + stats_fpath)
            elif isfile(bam_fpath):
                qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath],
                                       stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a'))
                analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger)
        if isfile(stats_fpath) or alignment_only:
            return correct_chr_names, sam_fpath, bam_fpath

    logger.info('  ' + index_str + 'Pre-processing reads...')
    if is_non_empty_file(sam_fpath) and can_reuse:
        logger.info('  ' + index_str + 'Using existing SAM-file: ' + sam_fpath)
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)
    elif is_non_empty_file(bam_fpath) and can_reuse:
        logger.info('  ' + index_str + 'Using existing BAM-file: ' + bam_fpath)
        sambamba_view(bam_fpath, sam_fpath, qconfig.max_threads, err_fpath, logger)
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)
    if (not correct_chr_names or not is_non_empty_file(sam_fpath)) and reads_fpaths:
        if is_reference:
            logger.info('  Running BWA for reference...')
        else:
            logger.info('  ' + index_str + 'Running BWA...')
        # use absolute paths because we will change workdir
        fpath = abspath(fpath)
        sam_fpath = abspath(sam_fpath)

        prev_dir = os.getcwd()
        os.chdir(output_dirpath)
        bwa_index(fpath, err_fpath, logger)
        sam_fpaths = align_reads(fpath, sam_fpath, using_reads, main_output_dir, err_fpath, max_threads)

        if len(sam_fpaths) > 1:
            merge_sam_files(sam_fpaths, sam_fpath, bam_fpath, max_threads, err_fpath)
        elif len(sam_fpaths) == 1:
            shutil.move(sam_fpaths[0], sam_fpath)
            tmp_bam_fpath = sam_fpaths[0].replace('.sam', '.bam')
            if is_non_empty_file(tmp_bam_fpath):
                shutil.move(tmp_bam_fpath, bam_fpath)

        logger.info('  ' + index_str + 'Done.')
        os.chdir(prev_dir)
        if not is_non_empty_file(sam_fpath):
            logger.error('  Failed running BWA for ' + fpath + '. See ' + log_path + ' for information.')
            return None, None, None
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)

    elif not correct_chr_names or not is_non_empty_file(sam_fpath):
        return None, None, None
    if is_reference:
        logger.info('  Sorting SAM-file for reference...')
    else:
        logger.info('  ' + index_str + 'Sorting SAM-file...')

    if can_reuse and is_non_empty_file(bam_fpath) and all_read_names_correct(sam_fpath):
        logger.info('  ' + index_str + 'Using existing BAM-file: ' + bam_fpath)
    else:
        correct_sam_fpath = join(output_dirpath, filename + '.' + using_reads + '.correct.sam')  # write in output dir
        sam_fpath = clean_read_names(sam_fpath, correct_sam_fpath)
        sambamba_view(correct_sam_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None)

    qutils.assert_file_exists(bam_fpath, 'bam file')
    if not alignment_only:
        if isfile(stats_fpath):
            logger.info('  ' + index_str + 'Using existing flag statistics file ' + stats_fpath)
        elif isfile(bam_fpath):
            qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath],
                                    stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a'))
            analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger)
        if is_reference:
            logger.info('  Analysis for reference is finished.')
        else:
            logger.info('  ' + index_str + 'Analysis is finished.')
    return correct_chr_names, sam_fpath, bam_fpath
Beispiel #12
0
def run_processing_reads(contigs_fpaths, main_ref_fpath, meta_ref_fpaths, ref_labels, temp_output_dir, output_dir,
                         log_path, err_fpath):
    required_files = []
    bed_fpath, cov_fpath, physical_cov_fpath = None, None, None
    if main_ref_fpath:
        ref_name = qutils.name_from_fpath(main_ref_fpath)

        bed_fpath = qconfig.bed or join(output_dir, ref_name + '.bed')
        cov_fpath = qconfig.cov_fpath or join(output_dir, ref_name + '.cov')
        physical_cov_fpath = qconfig.phys_cov_fpath or join(output_dir, ref_name + '.physical.cov')
        required_files = [bed_fpath, cov_fpath, physical_cov_fpath]

        if qconfig.no_sv:
            logger.info('  Will not search Structural Variations (--fast or --no-sv is specified)')
            bed_fpath = None
        elif is_non_empty_file(bed_fpath):
            logger.info('  Using existing BED-file: ' + bed_fpath)
        elif not qconfig.forward_reads and not qconfig.interlaced_reads:
            if not qconfig.reference_sam and not qconfig.reference_bam:
                logger.info('  Will not search Structural Variations (needs paired-end reads)')
                bed_fpath = None
                qconfig.no_sv = True
        if qconfig.create_icarus_html:
            if is_non_empty_file(cov_fpath):
                is_correct_file = check_cov_file(cov_fpath)
                if is_correct_file:
                    logger.info('  Using existing reads coverage file: ' + cov_fpath)
            if is_non_empty_file(physical_cov_fpath):
                logger.info('  Using existing physical coverage file: ' + physical_cov_fpath)
        else:
            logger.info('  Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)')
            cov_fpath = None
            physical_cov_fpath = None
        if (is_non_empty_file(bed_fpath) or qconfig.no_sv) and \
                (not qconfig.create_icarus_html or (is_non_empty_file(cov_fpath) and is_non_empty_file(physical_cov_fpath))):
            required_files = []

    n_jobs = min(qconfig.max_threads, len(contigs_fpaths) + 1)
    max_threads_per_job = max(1, qconfig.max_threads // n_jobs)
    sam_fpaths = qconfig.sam_fpaths or [None] * len(contigs_fpaths)
    bam_fpaths = qconfig.bam_fpaths or [None] * len(contigs_fpaths)
    parallel_align_args = [(contigs_fpath, output_dir, temp_output_dir, log_path, err_fpath, max_threads_per_job,
                            sam_fpaths[index], bam_fpaths[index], index) for index, contigs_fpath in enumerate(contigs_fpaths)]
    if main_ref_fpath:
        parallel_align_args.append((main_ref_fpath, output_dir, temp_output_dir, log_path, err_fpath,
                                    max_threads_per_job, qconfig.reference_sam, qconfig.reference_bam, None, required_files, True))
    correct_chr_names, sam_fpaths, bam_fpaths = run_parallel(align_single_file, parallel_align_args, n_jobs)
    qconfig.sam_fpaths = sam_fpaths[:len(contigs_fpaths)]
    qconfig.bam_fpaths = bam_fpaths[:len(contigs_fpaths)]
    add_statistics_to_report(output_dir, contigs_fpaths, main_ref_fpath)
    save_reads(output_dir)
    if not main_ref_fpath:
        return None, None, None

    correct_chr_names = correct_chr_names[-1]
    sam_fpath, bam_fpath = sam_fpaths[-1], bam_fpaths[-1]
    qconfig.reference_sam = sam_fpath
    qconfig.reference_bam = bam_fpath
    if not required_files:
        return bed_fpath, cov_fpath, physical_cov_fpath
    if not all([sam_fpath, bam_fpath]):
        logger.info('  Failed searching structural variations.')
        return None, None, None

    sam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(sam_fpath, 'sorted'))
    bam_mapped_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_fpath, 'mapped'))
    bam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_mapped_fpath, 'sorted'))

    if is_non_empty_file(sam_sorted_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        if not is_non_empty_file(bam_sorted_fpath):
            sambamba_view(bam_fpath, bam_mapped_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped')
            sort_bam(bam_mapped_fpath, bam_sorted_fpath, err_fpath, logger)
        sambamba_view(bam_sorted_fpath, sam_sorted_fpath, qconfig.max_threads, err_fpath, logger)
    if qconfig.create_icarus_html and (not is_non_empty_file(cov_fpath) or not is_non_empty_file(physical_cov_fpath)):
        cov_fpath, physical_cov_fpath = get_coverage(temp_output_dir, main_ref_fpath, ref_name, bam_fpath, bam_sorted_fpath,
                                                     log_path, err_fpath, correct_chr_names, cov_fpath, physical_cov_fpath)
    if not is_non_empty_file(bed_fpath) and not qconfig.no_sv:
        if meta_ref_fpaths:
            logger.info('  Splitting SAM-file by references...')
        headers = []
        seq_lengths = {}
        with open(sam_fpath) as sam_file:
            for line in sam_file:
                if not line.startswith('@'):
                    break
                if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                    seq_name = line.split('\tSN:')[1].split('\t')[0]
                    seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                    seq_lengths[seq_name] = seq_length
                headers.append(line.strip())
        need_ref_splitting = False
        ref_files = {}
        if meta_ref_fpaths:
            global ref_sam_fpaths
            for cur_ref_fpath in meta_ref_fpaths:
                cur_ref_name = qutils.name_from_fpath(cur_ref_fpath)
                ref_sam_fpath = join(temp_output_dir, cur_ref_name + '.sam')
                ref_sam_fpaths[cur_ref_fpath] = ref_sam_fpath
                if is_non_empty_file(ref_sam_fpath):
                    logger.info('    Using existing split SAM-file for %s: %s' % (cur_ref_name, ref_sam_fpath))
                    ref_files[cur_ref_name] = None
                else:
                    ref_sam_file = open(ref_sam_fpath, 'w')
                    if not headers[0].startswith('@SQ'):
                        ref_sam_file.write(headers[0] + '\n')
                    for h in (h for h in headers if h.startswith('@SQ') and 'SN:' in h):
                        seq_name = h.split('\tSN:')[1].split('\t')[0]
                        if seq_name in ref_labels and ref_labels[seq_name] == cur_ref_name:
                            ref_sam_file.write(h + '\n')
                    ref_sam_file.write(headers[-1] + '\n')
                    ref_files[cur_ref_name] = ref_sam_file
                    need_ref_splitting = True

        trivial_deletions_fpath = \
            search_trivial_deletions(temp_output_dir, sam_sorted_fpath, ref_files, ref_labels, seq_lengths, need_ref_splitting)
        if get_gridss_fpath() and isfile(get_gridss_fpath()):
            try:
                gridss_sv_fpath = search_sv_with_gridss(main_ref_fpath, bam_mapped_fpath, meta_ref_fpaths, temp_output_dir, err_fpath)
                qutils.cat_files([gridss_sv_fpath, trivial_deletions_fpath], bed_fpath)
            except:
                pass
        if isfile(trivial_deletions_fpath) and not is_non_empty_file(bed_fpath):
            shutil.copy(trivial_deletions_fpath, bed_fpath)

    if not qconfig.no_sv:
        if is_non_empty_file(bed_fpath):
            logger.main_info('  Structural variations are in ' + bed_fpath)
        else:
            if isfile(bed_fpath):
                logger.main_info('  No structural variations were found.')
            else:
                logger.main_info('  Failed searching structural variations.')
            bed_fpath = None
    if is_non_empty_file(cov_fpath):
        logger.main_info('  Coverage distribution along the reference genome is in ' + cov_fpath)
    else:
        if not qconfig.create_icarus_html:
            logger.main_info('  Failed to calculate coverage distribution')
        cov_fpath = None
    return bed_fpath, cov_fpath, physical_cov_fpath
Beispiel #13
0
def align_single_file(fpath, main_output_dir, output_dirpath, log_path, err_fpath, max_threads, sam_fpath=None, bam_fpath=None,
                      index=None, required_files=None, is_reference=False, alignment_only=False, using_reads='all'):
    filename = qutils.name_from_fpath(fpath)
    if not sam_fpath and bam_fpath:
        sam_fpath = get_safe_fpath(output_dirpath, bam_fpath[:-4] + '.sam')
    else:
        sam_fpath = sam_fpath or join(output_dirpath, filename + '.sam')
    bam_fpath = bam_fpath or get_safe_fpath(output_dirpath, sam_fpath[:-4] + '.bam')
    if using_reads != 'all':
        sam_fpath = join(output_dirpath, filename + '.' + using_reads + '.sam')
        bam_fpath = sam_fpath.replace('.sam', '.bam')
    if alignment_only or (is_reference and required_files and any(f.endswith('bed') for f in required_files)):
        required_files.append(sam_fpath)

    stats_fpath = get_safe_fpath(dirname(output_dirpath), filename + '.stat')
    index_str = qutils.index_to_str(index) if index is not None else ''

    reads_fpaths = qconfig.reads_fpaths
    correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)
    can_reuse = correct_chr_names is not None
    if not can_reuse and not reads_fpaths:
        return None, None, None
    if correct_chr_names and (not required_files or all(isfile(fpath) for fpath in required_files)):
        if not alignment_only:
            if isfile(stats_fpath):
                logger.info('  ' + index_str + 'Using existing flag statistics file ' + stats_fpath)
            elif isfile(bam_fpath):
                qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath],
                                       stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a'))
                analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger)
            calc_lap_score(reads_fpaths, sam_fpath, index, index_str, output_dirpath, fpath, filename, err_fpath)
        if isfile(stats_fpath) or alignment_only:
            return correct_chr_names, sam_fpath, bam_fpath

    logger.info('  ' + index_str + 'Pre-processing reads...')
    if is_non_empty_file(sam_fpath) and can_reuse:
        logger.info('  ' + index_str + 'Using existing SAM-file: ' + sam_fpath)
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)
    elif is_non_empty_file(bam_fpath) and can_reuse:
        logger.info('  ' + index_str + 'Using existing BAM-file: ' + bam_fpath)
        sambamba_view(bam_fpath, sam_fpath, qconfig.max_threads, err_fpath, logger)
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)
    if (not correct_chr_names or not is_non_empty_file(sam_fpath)) and reads_fpaths:
        if is_reference:
            logger.info('  Running BWA for reference...')
        else:
            logger.info('  ' + index_str + 'Running BWA...')
        # use absolute paths because we will change workdir
        fpath = abspath(fpath)
        sam_fpath = abspath(sam_fpath)

        prev_dir = os.getcwd()
        os.chdir(output_dirpath)
        bwa_index(fpath, err_fpath, logger)
        sam_fpaths = align_reads(fpath, sam_fpath, using_reads, main_output_dir, err_fpath, max_threads)

        if len(sam_fpaths) > 1:
            merge_sam_files(sam_fpaths, sam_fpath, bam_fpath, main_output_dir, max_threads, err_fpath)
        elif len(sam_fpaths) == 1:
            shutil.move(sam_fpaths[0], sam_fpath)
            sambamba_view(sam_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None)

        logger.info('  ' + index_str + 'Done.')
        os.chdir(prev_dir)
        if not is_non_empty_file(sam_fpath):
            logger.error('  Failed running BWA for ' + fpath + '. See ' + log_path + ' for information.')
            return None, None, None
        correct_chr_names = get_correct_names_for_chroms(output_dirpath, fpath, sam_fpath, err_fpath, reads_fpaths, logger, is_reference)
    elif not correct_chr_names or not is_non_empty_file(sam_fpath):
        return None, None, None
    if is_reference:
        logger.info('  Sorting SAM-file for reference...')
    else:
        logger.info('  ' + index_str + 'Sorting SAM-file...')

    if can_reuse and is_non_empty_file(bam_fpath) and all_read_names_correct(sam_fpath):
        logger.info('  ' + index_str + 'Using existing BAM-file: ' + bam_fpath)
    else:
        correct_sam_fpath = join(output_dirpath, filename + '.correct.sam')  # write in output dir
        sam_fpath = clean_read_names(sam_fpath, correct_sam_fpath)
        sambamba_view(correct_sam_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None)

    qutils.assert_file_exists(bam_fpath, 'bam file')
    if not alignment_only:
        if isfile(stats_fpath):
            logger.info('  ' + index_str + 'Using existing flag statistics file ' + stats_fpath)
        elif isfile(bam_fpath):
            qutils.call_subprocess([sambamba_fpath('sambamba'), 'flagstat', '-t', str(max_threads), bam_fpath],
                                    stdout=open(stats_fpath, 'w'), stderr=open(err_fpath, 'a'))
            analyse_coverage(output_dirpath, fpath, correct_chr_names, bam_fpath, stats_fpath, err_fpath, logger)
        calc_lap_score(reads_fpaths, sam_fpath, index, index_str, output_dirpath, fpath, filename, err_fpath)
        if is_reference:
            logger.info('  Analysis for reference is finished.')
        else:
            logger.info('  ' + index_str + 'Analysis is finished.')
    return correct_chr_names, sam_fpath, bam_fpath
Beispiel #14
0
def run_processing_reads(contigs_fpaths, main_ref_fpath, meta_ref_fpaths, ref_labels, temp_output_dir, output_dir,
                         log_path, err_fpath):
    required_files = []
    bed_fpath, cov_fpath, physical_cov_fpath = None, None, None
    if main_ref_fpath:
        ref_name = qutils.name_from_fpath(main_ref_fpath)

        bed_fpath = qconfig.bed or join(output_dir, ref_name + '.bed')
        cov_fpath = qconfig.cov_fpath or join(output_dir, ref_name + '.cov')
        physical_cov_fpath = qconfig.phys_cov_fpath or join(output_dir, ref_name + '.physical.cov')
        required_files = [bed_fpath, cov_fpath, physical_cov_fpath]

        if qconfig.no_sv:
            logger.info('  Will not search Structural Variations (--fast or --no-sv is specified)')
            bed_fpath = None
        elif is_non_empty_file(bed_fpath):
            logger.info('  Using existing BED-file: ' + bed_fpath)
        elif not qconfig.forward_reads and not qconfig.interlaced_reads:
            if not qconfig.reference_sam and not qconfig.reference_bam:
                logger.info('  Will not search Structural Variations (needs paired-end reads)')
                bed_fpath = None
                qconfig.no_sv = True
        if qconfig.create_icarus_html:
            if is_non_empty_file(cov_fpath):
                is_correct_file = check_cov_file(cov_fpath)
                if is_correct_file:
                    logger.info('  Using existing reads coverage file: ' + cov_fpath)
            if is_non_empty_file(physical_cov_fpath):
                logger.info('  Using existing physical coverage file: ' + physical_cov_fpath)
        else:
            logger.info('  Will not calculate coverage (--fast or --no-html, or --no-icarus, or --space-efficient is specified)')
            cov_fpath = None
            physical_cov_fpath = None
        if (is_non_empty_file(bed_fpath) or qconfig.no_sv) and \
                (not qconfig.create_icarus_html or (is_non_empty_file(cov_fpath) and is_non_empty_file(physical_cov_fpath))):
            required_files = []

    n_jobs = min(qconfig.max_threads, len(contigs_fpaths) + 1)
    max_threads_per_job = max(1, qconfig.max_threads // n_jobs)
    sam_fpaths = qconfig.sam_fpaths or [None] * len(contigs_fpaths)
    bam_fpaths = qconfig.bam_fpaths or [None] * len(contigs_fpaths)
    parallel_align_args = [(contigs_fpath, output_dir, temp_output_dir, log_path, err_fpath, max_threads_per_job,
                            sam_fpaths[index], bam_fpaths[index], index) for index, contigs_fpath in enumerate(contigs_fpaths)]
    if main_ref_fpath:
        parallel_align_args.append((main_ref_fpath, output_dir, temp_output_dir, log_path, err_fpath,
                                    max_threads_per_job, qconfig.reference_sam, qconfig.reference_bam, None, required_files, True))
    correct_chr_names, sam_fpaths, bam_fpaths = run_parallel(align_single_file, parallel_align_args, n_jobs)
    qconfig.sam_fpaths = sam_fpaths[:len(contigs_fpaths)]
    qconfig.bam_fpaths = bam_fpaths[:len(contigs_fpaths)]
    add_statistics_to_report(output_dir, contigs_fpaths, main_ref_fpath)
    save_reads(output_dir)
    if not main_ref_fpath:
        return None, None, None

    correct_chr_names = correct_chr_names[-1]
    sam_fpath, bam_fpath = sam_fpaths[-1], bam_fpaths[-1]
    qconfig.reference_sam = sam_fpath
    qconfig.reference_bam = bam_fpath
    if not required_files:
        return bed_fpath, cov_fpath, physical_cov_fpath
    if not all([sam_fpath, bam_fpath]):
        logger.info('  Failed searching structural variations.')
        return None, None, None

    sam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(sam_fpath, 'sorted'))
    bam_mapped_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_fpath, 'mapped'))
    bam_sorted_fpath = get_safe_fpath(temp_output_dir, add_suffix(bam_mapped_fpath, 'sorted'))

    if is_non_empty_file(sam_sorted_fpath):
        logger.info('  Using existing sorted SAM-file: ' + sam_sorted_fpath)
    else:
        if not is_non_empty_file(bam_sorted_fpath):
            sambamba_view(bam_fpath, bam_mapped_fpath, qconfig.max_threads, err_fpath, logger,  filter_rule='not unmapped')
            sort_bam(bam_mapped_fpath, bam_sorted_fpath, err_fpath, logger)
        sambamba_view(bam_sorted_fpath, sam_sorted_fpath, qconfig.max_threads, err_fpath, logger)
    if qconfig.create_icarus_html and (not is_non_empty_file(cov_fpath) or not is_non_empty_file(physical_cov_fpath)):
        cov_fpath, physical_cov_fpath = get_coverage(temp_output_dir, main_ref_fpath, ref_name, bam_fpath, bam_sorted_fpath,
                                                     log_path, err_fpath, correct_chr_names, cov_fpath, physical_cov_fpath)
    if not is_non_empty_file(bed_fpath) and not qconfig.no_sv:
        if meta_ref_fpaths:
            logger.info('  Splitting SAM-file by references...')
        headers = []
        seq_lengths = {}
        with open(sam_fpath) as sam_file:
            for line in sam_file:
                if not line.startswith('@'):
                    break
                if line.startswith('@SQ') and 'SN:' in line and 'LN:' in line:
                    seq_name = line.split('\tSN:')[1].split('\t')[0]
                    seq_length = int(line.split('\tLN:')[1].split('\t')[0])
                    seq_lengths[seq_name] = seq_length
                headers.append(line.strip())
        need_ref_splitting = False
        ref_files = {}
        if meta_ref_fpaths:
            global ref_sam_fpaths
            for cur_ref_fpath in meta_ref_fpaths:
                cur_ref_name = qutils.name_from_fpath(cur_ref_fpath)
                ref_sam_fpath = join(temp_output_dir, cur_ref_name + '.sam')
                ref_sam_fpaths[cur_ref_fpath] = ref_sam_fpath
                if is_non_empty_file(ref_sam_fpath):
                    logger.info('    Using existing split SAM-file for %s: %s' % (cur_ref_name, ref_sam_fpath))
                    ref_files[cur_ref_name] = None
                else:
                    ref_sam_file = open(ref_sam_fpath, 'w')
                    if not headers[0].startswith('@SQ'):
                        ref_sam_file.write(headers[0] + '\n')
                    for h in (h for h in headers if h.startswith('@SQ') and 'SN:' in h):
                        seq_name = h.split('\tSN:')[1].split('\t')[0]
                        if seq_name in ref_labels and ref_labels[seq_name] == cur_ref_name:
                            ref_sam_file.write(h + '\n')
                    ref_sam_file.write(headers[-1] + '\n')
                    ref_files[cur_ref_name] = ref_sam_file
                    need_ref_splitting = True

        trivial_deletions_fpath = \
            search_trivial_deletions(temp_output_dir, sam_sorted_fpath, ref_files, ref_labels, seq_lengths, need_ref_splitting)
        if get_gridss_fpath() and isfile(get_gridss_fpath()):
            try:
                gridss_sv_fpath = search_sv_with_gridss(main_ref_fpath, bam_mapped_fpath, meta_ref_fpaths, temp_output_dir, err_fpath)
                qutils.cat_files([gridss_sv_fpath, trivial_deletions_fpath], bed_fpath)
            except:
                pass
        if isfile(trivial_deletions_fpath) and not is_non_empty_file(bed_fpath):
            shutil.copy(trivial_deletions_fpath, bed_fpath)

    if not qconfig.no_sv:
        if is_non_empty_file(bed_fpath):
            logger.main_info('  Structural variations are in ' + bed_fpath)
        else:
            if isfile(bed_fpath):
                logger.main_info('  No structural variations were found.')
            else:
                logger.main_info('  Failed searching structural variations.')
            bed_fpath = None
    if is_non_empty_file(cov_fpath):
        logger.main_info('  Coverage distribution along the reference genome is in ' + cov_fpath)
    else:
        if not qconfig.create_icarus_html:
            logger.main_info('  Failed to calculate coverage distribution')
        cov_fpath = None
    return bed_fpath, cov_fpath, physical_cov_fpath