Beispiel #1
0
    def test_get_num_threads():
        'tests get_num_threads'
        threads = 3
        assert get_num_threads(threads) == threads

        threads = False
        assert get_num_threads(threads) == 1

        threads = True
        assert get_num_threads(threads) == os.sysconf('SC_NPROCESSORS_ONLN')

        threads = True
        limit_by_memory = 1024
        assert 1 <= get_num_threads(threads, limit_by_memory) <= os.sysconf('SC_NPROCESSORS_ONLN')
Beispiel #2
0
def realign_bam(bam_fpath, reference_fpath, out_bam_fpath, java_conf=None,
                threads=False, tmp_dir=None):
    'It realigns the bam using GATK Local realignment around indels'
    #reference sam index
    create_sam_reference_index(reference_fpath)

    #reference picard dict
    create_picard_dict(reference_fpath, java_conf=java_conf)

    #bam index
    create_bam_index(bam_fpath)

    #the intervals to realign
    gatk_path = guess_jar_dir('GenomeAnalysisTK.jar', java_conf)
    gatk_jar = os.path.join(gatk_path, 'GenomeAnalysisTK.jar')
    intervals_fhand = tempfile.NamedTemporaryFile(suffix='.intervals')
    cmd = java_cmd(java_conf=java_conf)
    cmd.extend(['-jar', gatk_jar, '-T', 'RealignerTargetCreator',
           '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name])

    #according to GATK this is experimental, so it might be a good idea to
    #do it in just one thread. In version 1.0.4498. This options is removed
    # so parallel = False
    parallel = False
    if parallel and threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    call(cmd, raise_on_error=True, add_ext_dir=False)

    #the realignment itself
    unsorted_bam = NamedTemporaryFile(suffix='.bam')
    cmd = java_cmd(java_conf=java_conf)
    cmd.extend(['-Djava.io.tmpdir=%s' % tempfile.gettempdir(),
           '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath,
           '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name,
           '-o', unsorted_bam.name])
    if parallel and threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    call(cmd, raise_on_error=True, add_ext_dir=False)
    # now we have to realign the bam
    sort_bam_sam(unsorted_bam.name, out_bam_fpath, java_conf=java_conf,
                 tmp_dir=tmp_dir)
def map_reads_with_bwa(reference_fpath, reads_fpath, bam_fpath, parameters):
    'It maps the reads to the reference using bwa and returns a bam file'
    colorspace   = parameters['colorspace']
    reads_length = parameters['reads_length']
    threads      = parameters['threads']
    java_conf    = parameters['java_conf']
    tmp_dir      = parameters['tmp_dir'] if 'tmp_dir' in parameters else None

    threads = get_num_threads(threads)
    #the reference should have an index
    bwt_fpath = reference_fpath + '.bwt'
    if not os.path.exists(bwt_fpath):
        create_bwa_reference(reference_fpath, color=colorspace)

    output_ali = 'output.ali'
    bam_file_bam = 'bam_file.bam'
    output_sai = 'output.sai'
    if reads_length == 'short':
        cmd = ['bwa', 'aln', reference_fpath, reads_fpath,
               '-t', str(threads)]
        if colorspace:
            cmd.append('-c')
        sai_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_sai, mode='wb')
        call(cmd, stdout=sai_fhand, raise_on_error=True)

        cmd = ['bwa', 'samse', reference_fpath, sai_fhand.name, reads_fpath]
        ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali, mode='w')
        call(cmd, stdout=ali_fhand, raise_on_error=True)

    elif reads_length == 'long':
        cmd = ['bwa', 'dbwtsw', reference_fpath, reads_fpath,
               '-t', str(threads)]
        ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali)
        call(cmd, stdout=ali_fhand, raise_on_error=True)
    else:
        raise ValueError('Reads length: short or long')

    if 'unmapped_fhand' in parameters and parameters['unmapped_fhand'] is not None:
        out_ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali)
        get_out_unmapped(ali_fhand, parameters['unmapped_fhand'], out_ali_fhand)
        ali_fhand = out_ali_fhand
    # From sam to Bam
#    unsorted_bam = os.path.join(temp_dir.name, bam_file_bam)
    unsorted_bam = NamedTemporaryFile(dir=tmp_dir, suffix=bam_file_bam)
    sam2bam(ali_fhand.name, unsorted_bam.name)
    # sort bam file
    sort_bam_sam(unsorted_bam.name, bam_fpath, sort_method='coordinate',
                 java_conf=java_conf, strict_validation=False, tmp_dir=tmp_dir)
 def _get_num_threads(self):
     'It calculates the number of threads to use'
     threads = self._project_settings['General_settings']['threads']
     return get_num_threads(threads)
Beispiel #5
0
 def _get_num_threads(self):
     "It calculates the number of threads to use"
     threads = self._project_settings["General_settings"]["threads"]
     return get_num_threads(threads)
 def _get_num_threads(self):
     'It calculates the number of threads for snv calling analisys'
     threads = self._project_settings['General_settings']['threads']
     return get_num_threads(threads, PYSAM_MEMORY_LIMIT)