コード例 #1
0
ファイル: align_contigs.py プロジェクト: nwespe/quast
def run_minimap(out_fpath, ref_fpath, contigs_fpath, log_err_fpath, index, max_threads):
    preset = 'asm5' if qconfig.min_IDY >= 95 and not qconfig.is_combined_ref else 'asm10'
    # -s -- min CIGAR score, -z -- affects how often to stop alignment extension, -B -- mismatch penalty
    # -O -- gap penalty, -r -- max gap size
    mask_level = '1' if qconfig.is_combined_ref else '0.9'
    num_alignments = '100' if qconfig.is_combined_ref else '50'
    additional_options = ['-B5', '-O4,16', '--no-long-join', '-r', str(qconfig.MAX_INDEL_LENGTH),
                          '-N', num_alignments, '-s', str(qconfig.min_alignment), '-z', '200']
    cmdline = [minimap_fpath(), '-c', '-x', preset] + (additional_options if not qconfig.large_genome else []) + \
              ['--mask-level', mask_level, '--cs', '-t', str(max_threads), ref_fpath, contigs_fpath]
    return_code = qutils.call_subprocess(cmdline, stdout=open(out_fpath, 'w'), stderr=open(log_err_fpath, 'a'),
                                         indent='  ' + qutils.index_to_str(index))

    return return_code
コード例 #2
0
ファイル: unique_kmers.py プロジェクト: A-Tsai/quast
def align_kmers(output_dir, ref_fpath, kmers_fpath, log_err_fpath, max_threads):
    out_fpath = join(output_dir, 'kmers.coords')
    cmdline = [minimap_fpath(), '-ax', 'sr', '-s202', '--frag=no', '-t', str(max_threads), ref_fpath, kmers_fpath]
    qutils.call_subprocess(cmdline, stdout=open(out_fpath, 'w'), stderr=open(log_err_fpath, 'a'), indent='  ')
    kmers_pos_by_chrom = defaultdict(list)
    kmers_by_chrom = defaultdict(list)
    with open(out_fpath) as f:
        for line in f:
            fs = line.split('\t')
            if len(fs) < 10:
                continue
            contig, chrom, pos = fs[0], fs[2], fs[3]
            kmers_pos_by_chrom[chrom].append(int(pos))
            kmers_by_chrom[chrom].append(int(contig))
    return kmers_by_chrom, kmers_pos_by_chrom
コード例 #3
0
def get_unique_covered_regions(ref_fpath, tmp_dir, log_fpath, binary_fpath, insert_size, uncovered_fpath, use_long_reads=False):
    red_genome_dir = os.path.join(tmp_dir, 'tmp_red')
    if isdir(red_genome_dir):
        shutil.rmtree(red_genome_dir)
    os.makedirs(red_genome_dir)

    ref_name = qutils.name_from_fpath(ref_fpath)
    ref_symlink = os.path.join(red_genome_dir, ref_name + '.fa')  ## Red recognizes only *.fa files
    if os.path.islink(ref_symlink):
        os.remove(ref_symlink)
    os.symlink(ref_fpath, ref_symlink)

    logger.info('  ' + 'Running repeat masking tool...')
    repeats_fpath = os.path.join(tmp_dir, ref_name + '.rpt')
    if is_non_empty_file(repeats_fpath):
        return_code = 0
        logger.info('  ' + 'Using existing file ' + repeats_fpath + '...')
    else:
        return_code = qutils.call_subprocess([binary_fpath, '-gnm', red_genome_dir, '-rpt', tmp_dir, '-frm', '2', '-min', '5'],
                                             stdout=open(log_fpath, 'w'), stderr=open(log_fpath, 'w'), indent='    ')
    if return_code == 0 and repeats_fpath and exists(repeats_fpath):
        long_repeats_fpath = os.path.join(tmp_dir, qutils.name_from_fpath(ref_fpath) + '.long.rpt')
        with open(long_repeats_fpath, 'w') as out:
            with open(repeats_fpath) as in_f:
                for line in in_f:
                    l = line.split('\t')
                    repeat_len = int(l[2]) - int(l[1])
                    if repeat_len >= insert_size:
                        out.write(line[1:])

        repeats_fasta_fpath = os.path.join(tmp_dir, qutils.name_from_fpath(ref_fpath) + '.fasta')
        coords_fpath = os.path.join(tmp_dir, qutils.name_from_fpath(ref_fpath) + '.rpt.coords.txt')
        if not is_non_empty_file(coords_fpath):
            fasta_index_fpath = ref_fpath + '.fai'
            if exists(fasta_index_fpath):
                os.remove(fasta_index_fpath)
            qutils.call_subprocess([bedtools_fpath('bedtools'), 'getfasta', '-fi', ref_fpath, '-bed',
                                    long_repeats_fpath, '-fo', repeats_fasta_fpath],
                                    stderr=open(log_fpath, 'w'), indent='    ')
            cmdline = [minimap_fpath(), '-c', '-x', 'asm10', '-N', '50', '--mask-level', '1', '--no-long-join', '-r', '100',
                       '-t', str(qconfig.max_threads), '-z', '200', ref_fpath, repeats_fasta_fpath]
            qutils.call_subprocess(cmdline, stdout=open(coords_fpath, 'w'), stderr=open(log_fpath, 'a'))
        filtered_repeats_fpath, repeats_regions = check_repeats_instances(coords_fpath, long_repeats_fpath, use_long_reads)
        unique_covered_regions = remove_repeat_regions(ref_fpath, filtered_repeats_fpath, uncovered_fpath)
        return unique_covered_regions, repeats_regions
    return None, None
コード例 #4
0
ファイル: reads_analyzer.py プロジェクト: nwespe/quast
def run_aligner(read_fpaths, ref_fpath, sam_fpath, out_sam_fpaths, output_dir, err_fpath, max_threads, reads_type):
    bwa_cmd = bwa_fpath('bwa') + ' mem -t ' + str(max_threads)
    insert_sizes = []
    for idx, reads in enumerate(read_fpaths):
        if isinstance(reads, str):
            if reads_type == 'pacbio' or reads_type == 'nanopore':
                if reads_type == 'pacbio':
                    preset = ' -ax map-pb '
                else:
                    preset = ' -ax map-ont '
                cmdline = minimap_fpath() + ' -t ' + str(max_threads) + preset + ref_fpath + ' ' + reads
            else:
                cmdline = bwa_cmd + (' -p ' if reads_type == 'pe' else ' ') + ref_fpath + ' ' + reads
        else:
            read1, read2 = reads
            cmdline = bwa_cmd + ' ' + ref_fpath + ' ' + read1 + ' ' + read2
        output_fpath = add_suffix(sam_fpath, reads_type + str(idx + 1))
        bam_fpath = output_fpath.replace('.sam', '.bam')
        if not is_non_empty_file(output_fpath):
            qutils.call_subprocess(shlex.split(cmdline), stdout=open(output_fpath, 'w'), stderr=open(err_fpath, 'a'), logger=logger)
        if not is_non_empty_file(bam_fpath):
            if not is_non_empty_file(bam_fpath):
                sambamba_view(output_fpath, bam_fpath, max_threads, err_fpath, logger, filter_rule=None)
            if reads_type == 'pe':
                bam_dedup_fpath = add_suffix(bam_fpath, 'dedup')
                qutils.call_subprocess([sambamba_fpath('sambamba'), 'markdup', '-r', '-t', str(max_threads), '--tmpdir',
                                        output_dir, bam_fpath, bam_dedup_fpath],
                                        stderr=open(err_fpath, 'a'), logger=logger)
                if exists(bam_dedup_fpath):
                    shutil.move(bam_dedup_fpath, bam_fpath)
        if reads_type == 'pe':
            insert_size, std_dev = calculate_insert_size(output_fpath, output_dir, basename(sam_fpath))
            if insert_size < qconfig.optimal_assembly_max_IS:
                insert_sizes.append(insert_size)
        out_sam_fpaths.append(output_fpath)

    if insert_sizes:
        qconfig.optimal_assembly_insert_size = max(insert_sizes)
        ref_name = qutils.name_from_fpath(ref_fpath)
        insert_size_fpath = join(output_dir, '..', ref_name + '.is.txt')
        with open(insert_size_fpath, 'w') as out:
            out.write(str(qconfig.optimal_assembly_insert_size))
コード例 #5
0
def align_kmers(output_dir, ref_fpath, kmers_fpath, log_err_fpath, max_threads):
    out_fpath = join(output_dir, 'kmers.coords')
    cmdline = [minimap_fpath(), '-c', '-r1', '-s101', '-A1', '-B10', '-O39,81', '--secondary=no', '-t', str(max_threads), ref_fpath, kmers_fpath]
    qutils.call_subprocess(cmdline, stdout=open(out_fpath, 'w'), stderr=open(log_err_fpath, 'a'), indent='  ')
    return out_fpath