Ejemplo n.º 1
0
def run_bwa(target, ref_idx, tmp_fasta, out_dir, num_threads):
    tmp_sort = tmpFileGet()
    out_path = tmpFileGet(tmpDir=out_dir)
    cmd = [['bwa', 'mem', '-t', num_threads, ref_idx, tmp_fasta],
           ['samtools', 'view', '-b', '-'],
           ['samtools', 'sort', '-O', 'bam', '-T', tmp_sort, '-']]
    runProc(cmd, stdout=out_path)
Ejemplo n.º 2
0
def run_bwa(target, ref_idx, tmp_fasta, out_dir, num_threads):
    tmp_sort = tmpFileGet()
    out_path = tmpFileGet(tmpDir=out_dir)
    cmd = [['bwa', 'mem', '-t', num_threads, ref_idx, tmp_fasta],
           ['samtools', 'view', '-b', '-'],
           ['samtools', 'sort', '-O', 'bam', '-T', tmp_sort, '-']]
    runProc(cmd, stdout=out_path)
Ejemplo n.º 3
0
def extract_reads(bam, offset=50000):
    tmp_reads = tmpFileGet(suffix='reads.fq')
    tmp_shuf = tmpFileGet()
    region_strs = ['{}:{}-{}'.format(chrom, start - offset, stop + offset) for chrom, start, stop, para in regions]
    view_cmd = ['samtools', 'view', '-b', bam]
    view_cmd.extend(region_strs)
    cmd = [view_cmd,
           ['samtools', 'bamshuf', '-Ou', '-', tmp_shuf],
           ['samtools', 'bam2fq', '-']]
    with open(tmp_reads, 'w') as tmp_paired_h:
        runProc(cmd, stdout=tmp_reads)
    return tmp_reads
def extract_fastq(target, genome, institute, tissue, reference, out_dir, experiment, bam_path, num_threads, ref_genome):
    if is_paired_sequencing(bam_path):
        fwd_fastq_path = tmpFileGet(prefix=experiment, suffix='fwd.fastq')
        rev_fastq_path = tmpFileGet(prefix=experiment, suffix='rev.fastq')
        cmd = ['samtools', 'fastq', '-1', fwd_fastq_path, '-2', rev_fastq_path, bam_path]
        runProc(cmd)
        run_paired_star(target, genome, institute, tissue, reference, out_dir, experiment, fwd_fastq_path,
                        rev_fastq_path, num_threads, ref_genome)
    else:
        fastq_path = tmpFileGet(prefix=experiment)
        cmd = ['samtools', 'fastq', '-0', fastq_path, bam_path]
        runProc(cmd)
        run_single_star(target, genome, institute, tissue, reference, out_dir, experiment, fastq_path,
                        num_threads, ref_genome)
Ejemplo n.º 5
0
def extract_reads(bam, offset=50000):
    tmp_reads = tmpFileGet(suffix='reads.fq')
    tmp_shuf = tmpFileGet()
    region_strs = [
        '{}:{}-{}'.format(chrom, start - offset, stop + offset)
        for chrom, start, stop, para in regions
    ]
    view_cmd = ['samtools', 'view', '-b', bam]
    view_cmd.extend(region_strs)
    cmd = [
        view_cmd, ['samtools', 'bamshuf', '-Ou', '-', tmp_shuf],
        ['samtools', 'bam2fq', '-']
    ]
    with open(tmp_reads, 'w') as tmp_paired_h:
        runProc(cmd, stdout=tmp_reads)
    return tmp_reads
Ejemplo n.º 6
0
 def __init__(self, dataFormat, liftFile, unmappedOutFile=None):
     """dataFormat should be 'genePred' or 'bedPlus=N'"""
     self.dataFormat = dataFormat
     self.liftFile = liftFile
     self.unmappedOutFile = unmappedOutFile
     self.unmappedOutFileTmp = None
     if self.unmappedOutFile is None:
         self.unmappedOutFileTmp = self.unmappedOutFile = fileOps.tmpFileGet("gencode.dropped")
Ejemplo n.º 7
0
def remap_reads(tmp_reads, index, out_bam):
    sort_tmp = tmpFileGet()
    cmd = [['bwa', 'mem', '-p', index, tmp_reads],
           ['samtools', 'view', '-b', '-'],
           ['samtools', 'sort', '-T', sort_tmp, '-O', 'bam', '-']]
    with open(out_bam, 'w') as f_h:
        runProc(cmd, stdout=f_h)
    cmd = ['samtools', 'index', out_bam]
    runProc(cmd)
Ejemplo n.º 8
0
def remap_reads(tmp_reads, index, out_bam):
    sort_tmp = tmpFileGet()
    cmd = [['bwa', 'mem', '-p', index, tmp_reads],
                  ['samtools', 'view', '-b', '-'],
                  ['samtools', 'sort', '-T', sort_tmp, '-O', 'bam', '-']]
    with open(out_bam, 'w') as f_h:
        runProc(cmd, stdout=f_h)
    cmd = ['samtools', 'index', out_bam]
    runProc(cmd)
Ejemplo n.º 9
0
def cat(target, args):
    fofn = tmpFileGet()
    files = [os.path.join(target.getGlobalTempDir(), x) for x in os.listdir(target.getGlobalTempDir())]
    files = [x for x in files if os.path.isfile(x)]
    assert len(files) > 0
    with open(fofn, 'w') as outf:
        for x in files:
            outf.write(x + "\n")
    cmd = ['samtools', 'merge', '-b', fofn, args.outBam]
    runProc(cmd)
Ejemplo n.º 10
0
def main():
    args = parse_args()
    if args.outBam is None:
        out_bam = tmpFileGet(suffix='merged.sorted.bam')
    else:
        out_bam = args.outBam
    build_remapped_bam(args.inBam, args.consensusRef, out_bam)
    wgs_results = pileup(out_bam, args.consensusVcf)
    aln_size = get_aln_size(args.consensusRef)
    plot_results(wgs_results, args.outPdf, aln_size)
    if args.outBam is None:
        os.remove(out_bam)
Ejemplo n.º 11
0
def main():
    args = parse_args()
    if args.outBam is None:
        out_bam = tmpFileGet(suffix='merged.sorted.bam')
    else:
        out_bam = args.outBam
    build_remapped_bam(args.inBam, args.consensusRef, out_bam)
    wgs_results = pileup(out_bam, args.consensusVcf)
    aln_size = get_aln_size(args.consensusRef)
    plot_results(wgs_results, args.outPdf, aln_size)
    if args.outBam is None:
        os.remove(out_bam)
Ejemplo n.º 12
0
def cat(target, args):
    fofn = tmpFileGet()
    files = [
        os.path.join(target.getGlobalTempDir(), x)
        for x in os.listdir(target.getGlobalTempDir())
    ]
    files = [x for x in files if os.path.isfile(x)]
    assert len(files) > 0
    with open(fofn, 'w') as outf:
        for x in files:
            outf.write(x + "\n")
    cmd = ['samtools', 'merge', '-b', fofn, args.outBam]
    runProc(cmd)