Esempio n. 1
0
def bamsam_converter(input_fhand, output_fhand, java_conf=None):
    'Converts between sam and bam'
    picard_path = guess_jar_dir('SortSam.jar', java_conf)
    picard_jar = os.path.join(picard_path, 'SamFormatConverter.jar')
    cmd = java_cmd(java_conf)
    cmd.extend(['-jar', picard_jar, 'INPUT=' + input_fhand,
                'OUTPUT=' + output_fhand])
    call(cmd, raise_on_error=True, add_ext_dir=False)
Esempio n. 2
0
def realign_bam(bam_fpath, reference_fpath, out_bam_fpath, java_conf=None,
                threads=False, tmp_dir=None):
    'It realigns the bam using GATK Local realignment around indels'
    #reference sam index
    create_sam_reference_index(reference_fpath)

    #reference picard dict
    create_picard_dict(reference_fpath, java_conf=java_conf)

    #bam index
    create_bam_index(bam_fpath)

    #the intervals to realign
    gatk_path = guess_jar_dir('GenomeAnalysisTK.jar', java_conf)
    gatk_jar = os.path.join(gatk_path, 'GenomeAnalysisTK.jar')
    intervals_fhand = tempfile.NamedTemporaryFile(suffix='.intervals')
    cmd = java_cmd(java_conf=java_conf)
    cmd.extend(['-jar', gatk_jar, '-T', 'RealignerTargetCreator',
           '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name])

    #according to GATK this is experimental, so it might be a good idea to
    #do it in just one thread. In version 1.0.4498. This options is removed
    # so parallel = False
    parallel = False
    if parallel and threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    call(cmd, raise_on_error=True, add_ext_dir=False)

    #the realignment itself
    unsorted_bam = NamedTemporaryFile(suffix='.bam')
    cmd = java_cmd(java_conf=java_conf)
    cmd.extend(['-Djava.io.tmpdir=%s' % tempfile.gettempdir(),
           '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath,
           '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name,
           '-o', unsorted_bam.name])
    if parallel and threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    call(cmd, raise_on_error=True, add_ext_dir=False)
    # now we have to realign the bam
    sort_bam_sam(unsorted_bam.name, out_bam_fpath, java_conf=java_conf,
                 tmp_dir=tmp_dir)
Esempio n. 3
0
def sort_bam_sam(in_fpath, out_fpath, sort_method='coordinate',
                 java_conf=None, tmp_dir=None, strict_validation=True):
    'It sorts a bam file using picard'
    picard_path = guess_jar_dir('SortSam.jar', java_conf)
    picard_sort_jar = os.path.join(picard_path, 'SortSam.jar')
    java_cmd_ = java_cmd(java_conf)
    java_cmd_.extend(['-jar', picard_sort_jar, 'INPUT=' + in_fpath,
           'OUTPUT=' + out_fpath, 'SORT_ORDER=' + sort_method])

    if not strict_validation:
        java_cmd_.append('VALIDATION_STRINGENCY=LENIENT')

    if tmp_dir:
        java_cmd_.append('TMP_DIR=%s' % tmp_dir)

    stdout, stderr, retcode = call(java_cmd_, raise_on_error=False, add_ext_dir=False)
    err_msg = 'No space left on device'
    if retcode and (err_msg in stdout or err_msg in stderr):
        raise RuntimeError('Picard sort consumed all space in device.' + stderr)
    elif retcode:
        msg = 'Error running picard: %s\n stderr: %s\n stdout: %s' % \
                                                (' '.join(java_cmd_), stderr,
                                                 stdout)
        raise RuntimeError(msg)