コード例 #1
0
def run_minimap_agb(out_fpath, ref_fpath, contigs_fpath, log_err_fpath, index, max_threads):  # run minimap2 for AGB
    mask_level = '1' if qconfig.min_IDY < 95 else '0.9'
    cmdline = [minimap_fpath(), '-cx', 'asm20', '--mask-level', mask_level, '-N', '100',
               '--score-N', '0', '-E', '1,0', '-f', '200', '--cs', '-t', str(max_threads), ref_fpath, contigs_fpath]
    return_code = qutils.call_subprocess(cmdline, stdout=open(out_fpath, 'w'), stderr=open(log_err_fpath, 'a'),
                                         indent='  ' + qutils.index_to_str(index))
    return return_code
コード例 #2
0
ファイル: align_contigs.py プロジェクト: HRGV/quast
def run_minimap(out_fpath, ref_fpath, contigs_fpath, log_err_fpath, index,
                max_threads):
    if qconfig.is_agv_mode:
        return run_minimap_agv(out_fpath, ref_fpath, contigs_fpath,
                               log_err_fpath, index, max_threads)

    preset = 'asm5' if qconfig.min_IDY >= 95 and not qconfig.is_combined_ref else 'asm10'
    # -s -- min CIGAR score, -z -- affects how often to stop alignment extension, -B -- mismatch penalty
    # -O -- gap penalty, -r -- max gap size
    mask_level = '1' if qconfig.is_combined_ref else '0.9'
    num_alignments = '100' if qconfig.is_combined_ref else '50'
    additional_options = [
        '-B5', '-O4,16', '--no-long-join', '-r',
        str(qconfig.MAX_INDEL_LENGTH), '-N', num_alignments, '-s',
        str(qconfig.min_alignment), '-z', '200'
    ]
    cmdline = [minimap_fpath(), '-c', '-x', preset] + (additional_options if not qconfig.large_genome else []) + \
              ['--mask-level', mask_level, '--min-occ', '200', '-g', '2500', '--score-N', '2', '--cs', '-t', str(max_threads), ref_fpath, contigs_fpath]
    return_code = qutils.call_subprocess(cmdline,
                                         stdout=open(out_fpath, 'w'),
                                         stderr=open(log_err_fpath, 'a'),
                                         indent='  ' +
                                         qutils.index_to_str(index))

    return return_code
コード例 #3
0
ファイル: align_contigs.py プロジェクト: ablab/quast
def run_minimap_agb(out_fpath, ref_fpath, contigs_fpath, log_err_fpath, index, max_threads):  # run minimap2 for AGB
    mask_level = '1' if qconfig.min_IDY < 95 else '0.9'
    cmdline = [minimap_fpath(), '-cx', 'asm20', '--mask-level', mask_level, '-N', '100',
               '--score-N', '0', '-E', '1,0', '-f', '200', '--cs', '-t', str(max_threads), ref_fpath, contigs_fpath]
    return_code = qutils.call_subprocess(cmdline, stdout=open(out_fpath, 'w'), stderr=open(log_err_fpath, 'a'),
                                         indent='  ' + qutils.index_to_str(index))
    return return_code
コード例 #4
0
ファイル: align_contigs.py プロジェクト: ablab/quast
def run_minimap(out_fpath, ref_fpath, contigs_fpath, log_err_fpath, index, max_threads):
    if qconfig.is_agb_mode:
        return run_minimap_agb(out_fpath, ref_fpath, contigs_fpath, log_err_fpath, index, max_threads)

    if qconfig.min_IDY < 90:
        preset = 'asm20'
    elif qconfig.min_IDY < 95:
        preset = 'asm10'
    else:
        preset = 'asm5'
    # -s -- min CIGAR score, -z -- affects how often to stop alignment extension, -B -- mismatch penalty
    # -O -- gap penalty, -r -- max gap size
    mask_level = '1' if qconfig.is_combined_ref else '0.9'
    num_alignments = '100' if qconfig.is_combined_ref else '50'
    additional_options = ['-B5', '-O4,16', '--no-long-join', '-r', str(qconfig.MAX_INDEL_LENGTH),
                          '-N', num_alignments, '-s', str(qconfig.min_alignment), '-z', '200']
    cmdline = [minimap_fpath(), '-c', '-x', preset] + (additional_options if not qconfig.large_genome else []) + \
              ['--mask-level', mask_level, '--min-occ', '200', '-g', '2500', '--score-N', '2', '--cs', '-t', str(max_threads), ref_fpath, contigs_fpath]
    return_code = qutils.call_subprocess(cmdline, stdout=open(out_fpath, 'w'), stderr=open(log_err_fpath, 'a'),
                                         indent='  ' + qutils.index_to_str(index))

    return return_code
コード例 #5
0
def align_kmers(output_dir, ref_fpath, kmers_fpath, log_err_fpath,
                max_threads):
    out_fpath = join(output_dir, 'kmers.coords')
    cmdline = [
        minimap_fpath(), '-cx', 'sr', '-s' + str(qconfig.unique_kmer_len * 2),
        '--frag=no', '-t',
        str(max_threads), ref_fpath, kmers_fpath
    ]
    qutils.call_subprocess(cmdline,
                           stdout=open(out_fpath, 'w'),
                           stderr=open(log_err_fpath, 'a'),
                           indent='  ')
    kmers_pos_by_chrom = defaultdict(list)
    kmers_by_chrom = defaultdict(list)
    with open(out_fpath) as f:
        for line in f:
            fs = line.split('\t')
            if len(fs) < 10:
                continue
            contig, chrom, pos = fs[0], fs[5], fs[7]
            kmers_pos_by_chrom[chrom].append(int(pos))
            kmers_by_chrom[chrom].append(int(contig))
    return kmers_by_chrom, kmers_pos_by_chrom
コード例 #6
0
def get_unique_covered_regions(ref_fpath,
                               tmp_dir,
                               log_fpath,
                               binary_fpath,
                               insert_size,
                               uncovered_fpath,
                               use_long_reads=False):
    red_genome_dir = os.path.join(tmp_dir, 'tmp_red')
    if isdir(red_genome_dir):
        shutil.rmtree(red_genome_dir)
    os.makedirs(red_genome_dir)

    ref_name = qutils.name_from_fpath(ref_fpath)
    ref_symlink = os.path.join(red_genome_dir, ref_name +
                               '.fa')  ## Red recognizes only *.fa files
    if os.path.islink(ref_symlink):
        os.remove(ref_symlink)
    os.symlink(ref_fpath, ref_symlink)

    logger.info('  ' + 'Running repeat masking tool...')
    repeats_fpath = os.path.join(tmp_dir, ref_name + '.rpt')
    if is_non_empty_file(repeats_fpath):
        return_code = 0
        logger.info('  ' + 'Using existing file ' + repeats_fpath + '...')
    else:
        return_code = qutils.call_subprocess([
            binary_fpath, '-gnm', red_genome_dir, '-rpt', tmp_dir, '-frm', '2',
            '-min', '5'
        ],
                                             stdout=open(log_fpath, 'w'),
                                             stderr=open(log_fpath, 'w'),
                                             indent='    ')
    if return_code == 0 and repeats_fpath and exists(repeats_fpath):
        long_repeats_fpath = os.path.join(
            tmp_dir,
            qutils.name_from_fpath(ref_fpath) + '.long.rpt')
        with open(long_repeats_fpath, 'w') as out:
            with open(repeats_fpath) as in_f:
                for line in in_f:
                    l = line.split('\t')
                    repeat_len = int(l[2]) - int(l[1])
                    if repeat_len >= insert_size:
                        out.write(line[1:])

        repeats_fasta_fpath = os.path.join(
            tmp_dir,
            qutils.name_from_fpath(ref_fpath) + '.fasta')
        coords_fpath = os.path.join(
            tmp_dir,
            qutils.name_from_fpath(ref_fpath) + '.rpt.coords.txt')
        if not is_non_empty_file(coords_fpath):
            fasta_index_fpath = ref_fpath + '.fai'
            if exists(fasta_index_fpath):
                os.remove(fasta_index_fpath)
            qutils.call_subprocess([
                bedtools_fpath('bedtools'), 'getfasta', '-fi', ref_fpath,
                '-bed', long_repeats_fpath, '-fo', repeats_fasta_fpath
            ],
                                   stderr=open(log_fpath, 'w'),
                                   indent='    ')
            cmdline = [
                minimap_fpath(), '-c', '-x', 'asm10', '-N', '50',
                '--mask-level', '1', '--no-long-join', '-r', '100', '-t',
                str(qconfig.max_threads), '-z', '200', ref_fpath,
                repeats_fasta_fpath
            ]
            qutils.call_subprocess(cmdline,
                                   stdout=open(coords_fpath, 'w'),
                                   stderr=open(log_fpath, 'a'))
        filtered_repeats_fpath, repeats_regions = check_repeats_instances(
            coords_fpath, long_repeats_fpath, use_long_reads)
        unique_covered_regions = remove_repeat_regions(ref_fpath,
                                                       filtered_repeats_fpath,
                                                       uncovered_fpath)
        return unique_covered_regions, repeats_regions
    return None, None