def _realign_bam(bam_fpath, reference_fpath, out_bam_fpath, threads=False): 'It realigns the bam using GATK Local realignment around indels' # reference sam index _create_sam_reference_index(reference_fpath) # reference picard dict _create_picard_dict(reference_fpath) # bam index index_bam(bam_fpath) # the intervals to realign # gatk_dir = get_setting("GATK_DIR") # gatk_jar = os.path.join(gatk_dir, 'GenomeAnalysisTK.jar') gatk_jar = get_setting('GATK_JAR') intervals_fhand = NamedTemporaryFile(suffix='.intervals') stderr = NamedTemporaryFile(suffix='picard.stderr') stdout = NamedTemporaryFile(suffix='picard.stdout') cmd = ['java', '-jar', gatk_jar, '-T', 'RealignerTargetCreator', '-I', bam_fpath, '-R', reference_fpath, '-o', intervals_fhand.name] check_call(cmd, stderr=stderr, stdout=stdout) # the realignment itself cmd = ['java', '-jar', gatk_jar, '-I', bam_fpath, '-R', reference_fpath, '-T', 'IndelRealigner', '-targetIntervals', intervals_fhand.name, '-o', out_bam_fpath] if threads and threads > 1: cmd.extend(['-nt', str(get_num_threads(threads))]) check_call(cmd, stderr=stderr, stdout=stdout) intervals_fhand.close()
def _create_picard_dict(fpath): 'It creates a picard dict if if it does not exist' dict_path = os.path.splitext(fpath)[0] + '.dict' if os.path.exists(dict_path): return picard_jar = get_setting("PICARD_JAR") cmd = ['java', '-jar', picard_jar, 'CreateSequenceDictionary', 'R=%s' % fpath, 'O=%s' % dict_path] stderr = NamedTemporaryFile(suffix='picard.stderr') check_call(cmd, stderr=stderr)
def merge_sams(in_fpaths, out_fpath): picard_jar = get_setting("PICARD_JAR") cmd = ['java', '-jar', picard_jar, 'MergeSamFiles', 'O={}'.format(out_fpath)] for in_fpath in in_fpaths: cmd.append('I={}'.format(in_fpath)) stderr = NamedTemporaryFile(suffix='picard.stderr') stdout = NamedTemporaryFile(suffix='picard.stdout') try: check_call(cmd, stderr=stderr, stdout=stdout) except CalledProcessError: sys.stderr.write(open(stderr.name).read()) sys.stdout.write(open(stdout.name).read())
def sort_bam(in_bam_fpath, out_bam_fpath=None): if out_bam_fpath is None: out_bam_fpath = in_bam_fpath if out_bam_fpath == in_bam_fpath: sorted_fhand = NamedTemporaryFile(suffix='.sorted.bam', delete=False) temp_out_fpath = sorted_fhand.name else: temp_out_fpath = out_bam_fpath picard_jar = get_setting("PICARD_JAR") cmd = ['java', '-jar', picard_jar, 'SortSam', 'INPUT={0}'.format(in_bam_fpath), 'OUTPUT={0}'.format(temp_out_fpath), 'SORT_ORDER=coordinate', 'VALIDATION_STRINGENCY=LENIENT'] stderr = NamedTemporaryFile(suffix='picard.stderr') check_call(cmd, stderr=stderr) if temp_out_fpath != out_bam_fpath: shutil.move(temp_out_fpath, out_bam_fpath)
import pysam try: from pysam.csamtools import Samfile except ImportError: from pysam import Samfile from crumbs.statistics import (draw_histogram_ascii, IntCounter, LABELS, BestItemsKeeper) from bam_crumbs.settings import get_setting from bam_crumbs.utils.flag import SAM_FLAG_BINARIES, SAM_FLAGS from bam_crumbs.utils.bin import get_binary_path from collections import Counter # pylint: disable=C0111 DEFAULT_N_BINS = get_setting('DEFAULT_N_BINS') DEFAULT_N_MOST_ABUNDANT_REFERENCES = get_setting( 'DEFAULT_N_MOST_ABUNDANT_REFERENCES') def count_reads(ref_name, bams, start=None, end=None): 'It returns the count of aligned reads in the region' count = 0 for bam in bams: count += bam.count(reference=ref_name, start=start, end=end) return count class ArrayWrapper(object): 'A thin wrapper around numpy to have the same interface as IntCounter'
try: from pysam.csamtools import Samfile except ImportError: from pysam import Samfile from crumbs.statistics import (draw_histogram_ascii, IntCounter, LABELS, BestItemsKeeper) from bam_crumbs.settings import get_setting from bam_crumbs.utils.flag import SAM_FLAG_BINARIES, SAM_FLAGS from bam_crumbs.utils.bin import get_binary_path from collections import Counter # pylint: disable=C0111 DEFAULT_N_BINS = get_setting('DEFAULT_N_BINS') DEFAULT_N_MOST_ABUNDANT_REFERENCES = get_setting('DEFAULT_N_MOST_ABUNDANT_REFERENCES') def count_reads(ref_name, bams, start=None, end=None): 'It returns the count of aligned reads in the region' count = 0 for bam in bams: count += bam.count(reference=ref_name, start=start, end=end) return count class ArrayWrapper(object): 'A thin wrapper around numpy to have the same interface as IntCounter' def __init__(self, array, bins=DEFAULT_N_BINS): self.array = array