def run_bwa(target, ref_idx, tmp_fasta, out_dir, num_threads): tmp_sort = tmpFileGet() out_path = tmpFileGet(tmpDir=out_dir) cmd = [['bwa', 'mem', '-t', num_threads, ref_idx, tmp_fasta], ['samtools', 'view', '-b', '-'], ['samtools', 'sort', '-O', 'bam', '-T', tmp_sort, '-']] runProc(cmd, stdout=out_path)
def extract_reads(bam, offset=50000): tmp_reads = tmpFileGet(suffix='reads.fq') tmp_shuf = tmpFileGet() region_strs = ['{}:{}-{}'.format(chrom, start - offset, stop + offset) for chrom, start, stop, para in regions] view_cmd = ['samtools', 'view', '-b', bam] view_cmd.extend(region_strs) cmd = [view_cmd, ['samtools', 'bamshuf', '-Ou', '-', tmp_shuf], ['samtools', 'bam2fq', '-']] with open(tmp_reads, 'w') as tmp_paired_h: runProc(cmd, stdout=tmp_reads) return tmp_reads
def extract_fastq(target, genome, institute, tissue, reference, out_dir, experiment, bam_path, num_threads, ref_genome): if is_paired_sequencing(bam_path): fwd_fastq_path = tmpFileGet(prefix=experiment, suffix='fwd.fastq') rev_fastq_path = tmpFileGet(prefix=experiment, suffix='rev.fastq') cmd = ['samtools', 'fastq', '-1', fwd_fastq_path, '-2', rev_fastq_path, bam_path] runProc(cmd) run_paired_star(target, genome, institute, tissue, reference, out_dir, experiment, fwd_fastq_path, rev_fastq_path, num_threads, ref_genome) else: fastq_path = tmpFileGet(prefix=experiment) cmd = ['samtools', 'fastq', '-0', fastq_path, bam_path] runProc(cmd) run_single_star(target, genome, institute, tissue, reference, out_dir, experiment, fastq_path, num_threads, ref_genome)
def extract_reads(bam, offset=50000): tmp_reads = tmpFileGet(suffix='reads.fq') tmp_shuf = tmpFileGet() region_strs = [ '{}:{}-{}'.format(chrom, start - offset, stop + offset) for chrom, start, stop, para in regions ] view_cmd = ['samtools', 'view', '-b', bam] view_cmd.extend(region_strs) cmd = [ view_cmd, ['samtools', 'bamshuf', '-Ou', '-', tmp_shuf], ['samtools', 'bam2fq', '-'] ] with open(tmp_reads, 'w') as tmp_paired_h: runProc(cmd, stdout=tmp_reads) return tmp_reads
def __init__(self, dataFormat, liftFile, unmappedOutFile=None): """dataFormat should be 'genePred' or 'bedPlus=N'""" self.dataFormat = dataFormat self.liftFile = liftFile self.unmappedOutFile = unmappedOutFile self.unmappedOutFileTmp = None if self.unmappedOutFile is None: self.unmappedOutFileTmp = self.unmappedOutFile = fileOps.tmpFileGet("gencode.dropped")
def remap_reads(tmp_reads, index, out_bam): sort_tmp = tmpFileGet() cmd = [['bwa', 'mem', '-p', index, tmp_reads], ['samtools', 'view', '-b', '-'], ['samtools', 'sort', '-T', sort_tmp, '-O', 'bam', '-']] with open(out_bam, 'w') as f_h: runProc(cmd, stdout=f_h) cmd = ['samtools', 'index', out_bam] runProc(cmd)
def cat(target, args): fofn = tmpFileGet() files = [os.path.join(target.getGlobalTempDir(), x) for x in os.listdir(target.getGlobalTempDir())] files = [x for x in files if os.path.isfile(x)] assert len(files) > 0 with open(fofn, 'w') as outf: for x in files: outf.write(x + "\n") cmd = ['samtools', 'merge', '-b', fofn, args.outBam] runProc(cmd)
def main(): args = parse_args() if args.outBam is None: out_bam = tmpFileGet(suffix='merged.sorted.bam') else: out_bam = args.outBam build_remapped_bam(args.inBam, args.consensusRef, out_bam) wgs_results = pileup(out_bam, args.consensusVcf) aln_size = get_aln_size(args.consensusRef) plot_results(wgs_results, args.outPdf, aln_size) if args.outBam is None: os.remove(out_bam)
def cat(target, args): fofn = tmpFileGet() files = [ os.path.join(target.getGlobalTempDir(), x) for x in os.listdir(target.getGlobalTempDir()) ] files = [x for x in files if os.path.isfile(x)] assert len(files) > 0 with open(fofn, 'w') as outf: for x in files: outf.write(x + "\n") cmd = ['samtools', 'merge', '-b', fofn, args.outBam] runProc(cmd)