def test_get_num_threads(): 'tests get_num_threads' threads = 3 assert get_num_threads(threads) == threads threads = False assert get_num_threads(threads) == 1 threads = True assert get_num_threads(threads) == os.sysconf('SC_NPROCESSORS_ONLN') threads = True limit_by_memory = 1024 assert 1 <= get_num_threads(threads, limit_by_memory) <= os.sysconf('SC_NPROCESSORS_ONLN')
def realign_bam(bam_fpath, reference_fpath, out_bam_fpath, java_conf=None, threads=False, tmp_dir=None): 'It realigns the bam using GATK Local realignment around indels' #reference sam index create_sam_reference_index(reference_fpath) #reference picard dict create_picard_dict(reference_fpath, java_conf=java_conf) #bam index create_bam_index(bam_fpath) #the intervals to realign gatk_path = guess_jar_dir('GenomeAnalysisTK.jar', java_conf) gatk_jar = os.path.join(gatk_path, 'GenomeAnalysisTK.jar') intervals_fhand = tempfile.NamedTemporaryFile(suffix='.intervals') cmd = java_cmd(java_conf=java_conf) cmd.extend(['-jar', gatk_jar, '-T', 'RealignerTargetCreator', '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name]) #according to GATK this is experimental, so it might be a good idea to #do it in just one thread. In version 1.0.4498. This options is removed # so parallel = False parallel = False if parallel and threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) call(cmd, raise_on_error=True, add_ext_dir=False) #the realignment itself unsorted_bam = NamedTemporaryFile(suffix='.bam') cmd = java_cmd(java_conf=java_conf) cmd.extend(['-Djava.io.tmpdir=%s' % tempfile.gettempdir(), '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath, '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name, '-o', unsorted_bam.name]) if parallel and threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) call(cmd, raise_on_error=True, add_ext_dir=False) # now we have to realign the bam sort_bam_sam(unsorted_bam.name, out_bam_fpath, java_conf=java_conf, tmp_dir=tmp_dir)
def map_reads_with_bwa(reference_fpath, reads_fpath, bam_fpath, parameters): 'It maps the reads to the reference using bwa and returns a bam file' colorspace = parameters['colorspace'] reads_length = parameters['reads_length'] threads = parameters['threads'] java_conf = parameters['java_conf'] tmp_dir = parameters['tmp_dir'] if 'tmp_dir' in parameters else None threads = get_num_threads(threads) #the reference should have an index bwt_fpath = reference_fpath + '.bwt' if not os.path.exists(bwt_fpath): create_bwa_reference(reference_fpath, color=colorspace) output_ali = 'output.ali' bam_file_bam = 'bam_file.bam' output_sai = 'output.sai' if reads_length == 'short': cmd = ['bwa', 'aln', reference_fpath, reads_fpath, '-t', str(threads)] if colorspace: cmd.append('-c') sai_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_sai, mode='wb') call(cmd, stdout=sai_fhand, raise_on_error=True) cmd = ['bwa', 'samse', reference_fpath, sai_fhand.name, reads_fpath] ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali, mode='w') call(cmd, stdout=ali_fhand, raise_on_error=True) elif reads_length == 'long': cmd = ['bwa', 'dbwtsw', reference_fpath, reads_fpath, '-t', str(threads)] ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali) call(cmd, stdout=ali_fhand, raise_on_error=True) else: raise ValueError('Reads length: short or long') if 'unmapped_fhand' in parameters and parameters['unmapped_fhand'] is not None: out_ali_fhand = NamedTemporaryFile(dir=tmp_dir, suffix=output_ali) get_out_unmapped(ali_fhand, parameters['unmapped_fhand'], out_ali_fhand) ali_fhand = out_ali_fhand # From sam to Bam # unsorted_bam = os.path.join(temp_dir.name, bam_file_bam) unsorted_bam = NamedTemporaryFile(dir=tmp_dir, suffix=bam_file_bam) sam2bam(ali_fhand.name, unsorted_bam.name) # sort bam file sort_bam_sam(unsorted_bam.name, bam_fpath, sort_method='coordinate', java_conf=java_conf, strict_validation=False, tmp_dir=tmp_dir)
def _get_num_threads(self): 'It calculates the number of threads to use' threads = self._project_settings['General_settings']['threads'] return get_num_threads(threads)
def _get_num_threads(self): "It calculates the number of threads to use" threads = self._project_settings["General_settings"]["threads"] return get_num_threads(threads)
def _get_num_threads(self): 'It calculates the number of threads for snv calling analisys' threads = self._project_settings['General_settings']['threads'] return get_num_threads(threads, PYSAM_MEMORY_LIMIT)