Example #1
0
def _realign_bam(bam_fpath, reference_fpath, out_bam_fpath, threads=False):
    'It realigns the bam using GATK Local realignment around indels'
    # reference sam index
    _create_sam_reference_index(reference_fpath)

    # reference picard dict
    _create_picard_dict(reference_fpath)

    # bam index
    index_bam(bam_fpath)

    # the intervals to realign
#     gatk_dir = get_setting("GATK_DIR")
#     gatk_jar = os.path.join(gatk_dir, 'GenomeAnalysisTK.jar')
    gatk_jar = get_setting('GATK_JAR')
    intervals_fhand = NamedTemporaryFile(suffix='.intervals')
    stderr = NamedTemporaryFile(suffix='picard.stderr')
    stdout = NamedTemporaryFile(suffix='picard.stdout')
    cmd = ['java', '-jar', gatk_jar, '-T', 'RealignerTargetCreator',
           '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name]
    check_call(cmd, stderr=stderr, stdout=stdout)

    # the realignment itself
    cmd = ['java', '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath,
           '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name,
           '-o', out_bam_fpath]

    if threads and threads > 1:
        cmd.extend(['-nt', str(get_num_threads(threads))])
    check_call(cmd, stderr=stderr, stdout=stdout)
    intervals_fhand.close()
Example #2
0
def _create_picard_dict(fpath):
    'It creates a picard dict if if it does not exist'
    dict_path = os.path.splitext(fpath)[0] + '.dict'
    if os.path.exists(dict_path):
        return
    picard_jar = get_setting("PICARD_JAR")
    cmd = ['java', '-jar', picard_jar, 'CreateSequenceDictionary',
           'R=%s' % fpath,
           'O=%s' % dict_path]
    stderr = NamedTemporaryFile(suffix='picard.stderr')
    check_call(cmd, stderr=stderr)
Example #3
0
def merge_sams(in_fpaths, out_fpath):
    picard_jar = get_setting("PICARD_JAR")

    cmd = ['java', '-jar', picard_jar, 'MergeSamFiles',
           'O={}'.format(out_fpath)]
    for in_fpath in in_fpaths:
        cmd.append('I={}'.format(in_fpath))
    stderr = NamedTemporaryFile(suffix='picard.stderr')
    stdout = NamedTemporaryFile(suffix='picard.stdout')
    try:
        check_call(cmd, stderr=stderr, stdout=stdout)
    except CalledProcessError:
        sys.stderr.write(open(stderr.name).read())
        sys.stdout.write(open(stdout.name).read())
Example #4
0
def sort_bam(in_bam_fpath, out_bam_fpath=None):

    if out_bam_fpath is None:
        out_bam_fpath = in_bam_fpath

    if out_bam_fpath == in_bam_fpath:
        sorted_fhand = NamedTemporaryFile(suffix='.sorted.bam', delete=False)
        temp_out_fpath = sorted_fhand.name
    else:
        temp_out_fpath = out_bam_fpath

    picard_jar = get_setting("PICARD_JAR")
    cmd = ['java', '-jar', picard_jar, 'SortSam',
           'INPUT={0}'.format(in_bam_fpath),
           'OUTPUT={0}'.format(temp_out_fpath),
           'SORT_ORDER=coordinate', 'VALIDATION_STRINGENCY=LENIENT']
    stderr = NamedTemporaryFile(suffix='picard.stderr')
    check_call(cmd, stderr=stderr)

    if temp_out_fpath != out_bam_fpath:
        shutil.move(temp_out_fpath, out_bam_fpath)
Example #5
0
import pysam
try:
    from pysam.csamtools import Samfile
except ImportError:
    from pysam import Samfile
from crumbs.statistics import (draw_histogram_ascii, IntCounter, LABELS,
                               BestItemsKeeper)

from bam_crumbs.settings import get_setting
from bam_crumbs.utils.flag import SAM_FLAG_BINARIES, SAM_FLAGS
from bam_crumbs.utils.bin import get_binary_path
from collections import Counter

# pylint: disable=C0111

DEFAULT_N_BINS = get_setting('DEFAULT_N_BINS')
DEFAULT_N_MOST_ABUNDANT_REFERENCES = get_setting(
    'DEFAULT_N_MOST_ABUNDANT_REFERENCES')


def count_reads(ref_name, bams, start=None, end=None):
    'It returns the count of aligned reads in the region'
    count = 0
    for bam in bams:
        count += bam.count(reference=ref_name, start=start, end=end)
    return count


class ArrayWrapper(object):
    'A thin wrapper around numpy to have the same interface as IntCounter'
Example #6
0
try:
    from pysam.csamtools import Samfile
except ImportError:
    from pysam import Samfile
from crumbs.statistics import (draw_histogram_ascii, IntCounter, LABELS,
                               BestItemsKeeper)

from bam_crumbs.settings import get_setting
from bam_crumbs.utils.flag import SAM_FLAG_BINARIES, SAM_FLAGS
from bam_crumbs.utils.bin import get_binary_path
from collections import Counter

# pylint: disable=C0111


DEFAULT_N_BINS = get_setting('DEFAULT_N_BINS')
DEFAULT_N_MOST_ABUNDANT_REFERENCES = get_setting('DEFAULT_N_MOST_ABUNDANT_REFERENCES')


def count_reads(ref_name, bams, start=None, end=None):
    'It returns the count of aligned reads in the region'
    count = 0
    for bam in bams:
        count += bam.count(reference=ref_name, start=start, end=end)
    return count


class ArrayWrapper(object):
    'A thin wrapper around numpy to have the same interface as IntCounter'
    def __init__(self, array, bins=DEFAULT_N_BINS):
        self.array = array