def run_somaticsniper(): params.f = ref params.F = 'vcf' params[''] = [tumor, normal, outfile] cmd = '{ssniper} {args}'.format(ssniper = ssniper, args = cmdargs(params)) runcmd(cmd) if gz: runcmd(['gzip', outfile])
def run_gatk(): # generate interval list file intvfile = {{job.outdir | path.join: "interval.list" | quote}} cmd = '{samtools} idxstats {tumor!r} | head -1 | cut -f1 > {intvfile!r}'.format( samtools = samtools, tumor = tumor, intvfile = intvfile ) runcmd(cmd) mem = mem2(mem, 'java') params['I:tumor'] = tumor params['I:normal'] = normal params.R = ref params.o = outfile params.nct = nthread params.L = intvfile cmd = '{gatk} -T MuTect2 {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format( gatk = gatk, mem = mem, tmpdir = tmpdir, args = cmdargs(params, dash = '-', equal = ' ') ) runcmd(cmd) if gz: runcmd(['gzip', outfile])
def run_strelka(): cparams = {{args.configParams | repr}} cparams.normalBam = normal cparams.tumorBam = tumor cparams.referenceFasta = ref cparams.runDir = joboutdir runcmd('{strelka} {args}'.format(strelka = strelka, args = cmdargs(cparams))) params.m = 'local' params.g = mem2(mem, 'G')[:-1] params.j = nthread runcmd('{joboutdir}/runWorkflow.py {args}'.format(joboutdir = joboutdir, args = cmdargs(params))) snvvcf = path.join(joboutdir, 'results', 'variants', 'somatic.snvs.vcf.gz') indvcf = path.join(joboutdir, 'results', 'variants', 'somatic.indels.vcf.gz') _mergeAndAddGT(snvvcf, indvcf, outfile) if gz: runcmd(['gzip', outfile])
def run_vardict(): params.v = True params.G = ref params.b = '{}|{}'.format(tumor, normal) cmd = '{vardict} {args} > {outfile!r}'.format( vardict = vardict, args = cmdargs(params), outfile = outfile ) runcmd(cmd) if gz: runcmd(['gzip', outfile])
def run_virmid(): params.R = ref params.D = tumor params.N = normal params.w = joboutdir cmd = '{virmid} {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format( virmid = virmid, mem = mem2(mem, 'java'), tmpdir = tmpdir, args = cmdargs(params) ) runcmd(['mv', path.join(joboutdir, '*.virmid.som.passed.vcf'), outfile]) if gz: runcmd(['gzip', outfile])
def getAlleleCount(bamfile, snpfile, outfile): brcparams = Box() brcparams.f = ref brcparams.w = 0 brcparams.l = snpfile brcparams[''] = bamfile cmd = '{bamrc} {args} > {outfile!r}'.format( bamrc = bamrc, args = cmdargs(brcparams, equal = ' '), outfile = outfile + '.tmp') runcmd(cmd) # reformated output to desired format reader = TsvReader(outfile + '.tmp', cnames = False) snper = TsvReader(snpfile, cnames = False) #chr1 564773 C 14 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:14:... G:0:... T:0:... N:0:... writer = TsvWriter(outfile) writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount'] for r in reader: while True: try: snp = next(snper) except StopIteration: break # use the end position, in case it's 0-based if snp[0] == r[0] and snp[2] == r[1]: counts = dict( A = r[5].split(':', 2)[1], C = r[6].split(':', 2)[1], G = r[7].split(':', 2)[1], T = r[8].split(':', 2)[1] ) rec = TsvRecord() rec.Chrm = r[0] rec.pos = r[1] rec.Total = r[3] rec.A = counts['A'] rec.C = counts['C'] rec.G = counts['G'] rec.T = counts['T'] # if reference allele is unknown, assuming all are ref alleles rec.refCount = counts.get(snp[6].upper(), r[3]) # if mut allele is unknown, assuming no mutations happened rec.mutCount = counts.get(snp[7].upper(), 0) writer.write(rec) # go to next snp break else: # go to next r continue writer.close()
def runChrom(file1, file2, chrom): outfile1_list = list(file1[:-3].rpartition('.')) outfile1_list.insert(-2, '-' + chrom) outfile1 = ''.join(outfile1_list) outfile2 = list(file2[:-3].rpartition('.')) outfile2.insert(-2, '-' + chrom) outfile2 = ''.join(outfile2) outfile1_list.insert(-2, '.subtracted') outfile = ''.join(outfile1_list) vfcmd = '{} -h {!r} {!r} > {!r}' runcmd(vfcmd.format(tabix, file1, chrom, outfile1)) runcmd(vfcmd.format(tabix, file2, chrom, outfile2)) cmd = '{bedtools} subtract {params} > {outfile!r}' params = {'a': outfile1, 'b': outfile2} if rmany: params['A'] = True params = cmdargs(params, dash='-', equal=' ') runcmd( cmd.format(bedtools=bedtools, params=params, outfile=outfile)) remove(outfile1) remove(outfile2) return outfile
params = {{args.params}} fq1 = {{o.fq1 | quote}} fq2 = {{o.fq2 | quote}} try: {% case args.tool %} {% when 'wgsim' %} {% if args.gz %} fq1 = "{{o.fq1 | [:-3]}}" fq2 = "{{o.fq2 | [:-3]}}" {% endif %} params['N'] = {{args.num}} params['1'] = {{args.len1}} params['2'] = {{args.len2}} params['S'] = {{i.seed | lambda x: -1 if x is None else x}} cmd = '{{args.wgsim}} %s "%s" "%s" "%s"' % (cmdargs(params), ref, fq1, fq2) runcmd (cmd) {% if args.gz %} runcmd ('gzip "%s"' % fq1) runcmd ('gzip "%s"' % fq2) {% endif %} {% when 'dwgsim' %} prefix = {{o.fq1 | [:-8] | quote}} {% if args.gz %} fq1 = "{{o.fq1 | [:-3]}}" fq2 = "{{o.fq2 | [:-3]}}" prefix = "{{o.fq1 | [:-11]}}" {% endif %}
outfile = {{o.outfile | quote}} argssql = {{args.sql | quote}} inopts = {{args.inopts | repr}} outopts = {{args.outopts | repr}} if sqlfile: with open(sqlfile) as f: sql = ' '.join(f.readlines()).strip() if argssql: logger.warning('`args.sql` is ignored, as `i.sqlfile` is provided.') else: sql = argssql if not sql: raise ValueError('One of `i.sqlfile` and `args.sql` is requied.') params = { 'H': inopts.cnames, 'd': inopts.delimit, 'e': inopts.encoding, 'z': (inopts.gz == 'auto' and infile.endswith('.gz')) or inopts.gz is True, 'D': outopts.delimit if outopts.delimit is not None else inopts.delimit, 'O': outopts.cnames if outopts.cnames is not None else inopts.cnames, 'E': outopts.encoding if outopts.encoding is not None else inopts.encoding } c = cmd.Cmd(['cat', infile]).pipe('q {} {} > {!r}'.format( cmdargs(params), cmdargs({' ': sql})[2:], outfile ), shell = True).run() logger.info('Done: %s', c.cmd)
from pyppl import Box from bioprocs.utils import runcmd, cmdargs params = {{args.params}} ####### htseq {% if args.tool == 'htseq' %} {% if i.infile.endswith('.bam') %} params['f'] = 'bam' {% endif %} cmd = '{{args.htseq}} %s "{{i.infile}}" "{{args.refgene}}" > "{{o.outfile}}"' % (cmdargs(params)) runcmd (cmd) {% endif %}
mem = mem2 ({{args.mem | quote}}, 'java') minlen = str({{args.minlen}} * 2) adfile = "{{job.outdir}}/adapters.fa" with open (adfile, "w") as ad: ad.write (">PE1\n") ad.write (seqrev({{args.adapter1 | quote}}) + "\n") ad.write (">PE1_rc\n") ad.write ({{args.adapter1 | quote}} + "\n") ad.write (">PE2\n") ad.write (seqrev({{args.adapter2 | quote}}) + "\n") ad.write (">PE2_rc\n") ad.write ({{args.adapter2 | quote}} + "\n") params['threads'] = {{args.nthread}} cmd = '{{args.trimmomatic}} %s PE %s "{{i.fq1}}" "{{i.fq2}}" "{{o.outfq1}}" /dev/null "{{o.outfq2}}" /dev/null ILLUMINACLIP:%s:2:30:10 LEADING:{{args.cut5}} TRAILING:{{args.cut3}} SLIDINGWINDOW:4:{{args.minq}} MINLEN:%s' % (mem, cmdargs(params, dash = '-', equal = ' '), adfile, minlen) runcmd (cmd) {% when 'cutadapt' %} params['a'] = {{args.adapter1 | quote}} params['A'] = {{args.adapter2 | quote}} params['u'] = "{{args.cut5}}" params['u'] = "-{{args.cut3}}" params['U'] = "{{args.cut5}}" params['U'] = "-{{args.cut3}}" params['m'] = {{args.minlen}} params['q'] = "{{args.minq}},{{args.minq}}" params['o'] = {{o.outfq1 | quote}} params['p'] = {{o.outfq2 | quote}} cmd = '{{args.cutadapt}} %s {{ i.fq1 | quote }} {{ i.fq2 | quote }}' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd)
from os import path from pyppl import Box from bioprocs.utils import runcmd, cmdargs params = {} region = {{i.region | quote}} outfile = {{o.outfile | quote}} if path.isfile(region): if path.getsize(region) > 0: params['R'] = {{i.region | quote}} params.update({{args.params}}) cmd = '{{args.tabix}} %s "{{i.infile}}" > "{{o.outfile}}"' % cmdargs( params, equal=' ') runcmd(cmd) else: open(outfile, 'w').close() else: if region: params.update({{args.params}}) cmd = '{{args.tabix}} %s "{{i.infile}}" {{i.region}} > "{{o.outfile}}"' % cmdargs( params, equal=' ') runcmd(cmd) else: open(outfile, 'w').close()
from pyppl import Box from bioprocs.utils import runcmd, mem2, cmdargs params = {{args.params}} try: {% case args.tool %} {% when 'trimmomatic' %} mem = mem2 ({{args.mem | quote}}, "java") minlen = str({{args.minlen}} * 2) adfile = "{{job.outdir}}/adapters.fa" with open (adfile, "w") as ad: ad.write (">TruSeq3_IndexedAdapter\n") ad.write ({{args.adapter | quote}} + "\n") params['threads'] = {{args.nthread}} cmd = '{{args.trimmomatic}} %s SE %s "{{in.fq}}" "{{out.outfq}}" ILLUMINACLIP:%s:2:30:10 LEADING:{{args.cut5}} TRAILING:{{args.cut3}} SLIDINGWINDOW:4:{{args.minq}} MINLEN:%s' % (mem, cmdargs(params, dash = '-', equal = ' '), adfile, minlen) runcmd (cmd) {% when 'cutadapt' %} params['a'] = {{args.adapter | quote}} params['u'] = "{{args.cut5}}" params['u'] = "-{{args.cut3}}" params['m'] = {{args.minlen}} params['q'] = "{{args.minq}},{{args.minq}}" params['o'] = {{out.outfq | quote}} cmd = '{{args.cutadapt}} %s "{{in.fq}}"' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd) {% when 'skewer' %} params['m'] = 'any' params['t'] = {{args.nthread}}
"1": ref + ' ' + alt, "2": alt + ' ' + alt } return compGTs.get(gt, '0 0') logger.info('Writing tped file ...') tpedWriter = TsvWriter(tpedfile) for r in inreader: (chrom, pos, _, ref, alt) = r[0].split('_') if chrom.startswith('chr'): chrom = chrom[3:] chrom = chrmaps.get(chrom, chrom) tpedWriter.write([chrom, r[0], 0, pos] + [getCompondGT(gt, ref, alt) for gt in r.values()[:]]) tpedWriter.close() logger.info("Converting using plink ...") cmd = '{} {}'.format( plink, cmdargs({ 'tfile': prefix, 'make-bed': True, 'out': prefix }, equal=' ')) runcmd(cmd) if not keeptxt: remove(tpedfile) remove(tfamfile)
############# picard {% when 'picard' %} mem = mem2({{ args.mem | quote }}, 'java') infiles = {{ i.infiles }} for i, infile in enumerate(infiles): params['I' + ' ' * i] = infile {% if args.nthread > 1 %} params['USE_THREADING'] = 'true' {% else %} params['USE_THREADING'] = 'false' {% endif %} params['TMP_DIR'] = tmpdir params['O'] = {{o.outfile | quote}} params['AS'] = 'true' cmd = '{{args.picard}} MergeSamFiles %s -Djava.io.tmpdir="%s" %s' % (mem, tmpdir, cmdargs(params, dash = '', equal = '=')) runcmd (cmd) ############# bamutil {% when 'bamutil' %} infiles = {{ i.infiles }} for i, infile in enumerate(infiles): params['i' + ' ' * i] = infile params['o'] = {{o.outfile | quote}} cmd = '{{args.bamutil}} mergeBam %s' % cmdargs(params) runcmd (cmd) ############# samtools {% when 'samtools' %} inlist = path.join({{job.outdir | quote}}, 'bamlist.txt')
tmpfile = outfile + '.tmp' skip = {{args.inopts | lambda x: x.get('skip', 0)}} delimit = {{args.inopts | lambda x: x.get('delimit', '\t') | quote}} comment = {{args.inopts | lambda x: x.get('comment', '#') | quote}} if not skip and not comment: tmpfile = infile else: with open(infile) as readerSkip, open(outfile, 'w') as writerSkip: for i, line in enumerate(readerSkip): if i >= skip: break writerSkip.write(line) readerTmp = TsvReader(infile, delimit = delimit, comment = comment, skip = skip, ftype = 'nometa', head = False) #readerTmp.autoMeta() writerTmp = TsvWriter(tmpfile, delimit = delimit, ftype = 'nometa') #writerTmp.meta.update(readerTmp.meta) for r in readerTmp: writerTmp.write(r) writerTmp.close() {% if args.case %} case = "LANG=C" {% else %} case = "LANG=en_US.UTF-8" {% endif %} cmd = '%s sort %s "%s" >> {{o.outfile | quote}}' % (case, cmdargs(params), tmpfile) {% endif %} runcmd(cmd)
makedirs (tmpdir) params = {{args.params}} try: {% case args.tool %} ############## picard {% when 'picard' %} mem = mem2({{ args.mem | quote }}) params['-Djava.io.tmpdir'] = tmpdir params['TMP_DIR'] = tmpdir params['I'] = {{i.infile | quote}} params['O'] = {{o.outfile | quote}} for k,v in rg.items(): params['RG' + k] = v runcmd ('{{args.picard}} AddOrReplaceReadGroups %s %s' % (mem, cmdargs(params, dash='', equal='='))) ############## bamutil {% when 'bamutil' %} params['RG'] = "@RG\\tID:%s\\t%s" % (rg['ID'], "\\t".join([k + ":" + v for k,v in rg.items() if k!='ID'])) params['in'] = {{i.infile | quote}} params['out'] = {{o.outfile | quote}} runcmd ('{{args.bamutil}} polishBam %s' % cmdargs(params, equal = ' ')) {% endcase %} except Exception as ex: stderr.write ("Job failed: %s" % str(ex)) raise finally: rmtree (tmpdir)
runcmd(cmd) if gz: runcmd(['gzip', outfile]) def run_snvsniffer(): # generate a header file theader = {{job.outdir | path.join: bn(i.tumor) | @append: '.header' | quote}} nheader = {{job.outdir | path.join: bn(i.normal) | @append: '.header' | quote}} cmd = '{samtools} view -H {infile!r} > {hfile!r}' runcmd(cmd.format(samtools = samtools, infile = tumor, hfile = theader)) runcmd(cmd.format(samtools = samtools, infile = normal, hfile = nheader)) params.g = ref params.o = outfile params[''] = [theader, nheader, tumor, normal] cmd = '{ssniffer} somatic {args}'.format(ssniffer, cmdargs(params)) runcmd(cmd) if gz: runcmd(['gzip', outfile]) def _mergeAndAddGT(snvvcf, indvcf, outfile): from pysam import VariantFile snv = VariantFile(snvvcf) ind = VariantFile(indvcf) snv.header.info.add('TYPE', 1, 'String', 'Type of somatic mutation') ind.header.info.add('TYPE', 1, 'String', 'Type of somatic mutation') snv.header.info.add('QSI', 1, 'Integer', 'Quality score for any somatic variant, ie. for the ALT haplotype to be present at a significantly different frequency in the tumor and normal') snv.header.info.add('TQSI', 1, 'Integer', 'Data tier used to compute QSI') snv.header.info.add('QSI_NT', 1, 'Integer', 'Quality score reflecting the joint probability of a somatic variant and NT') snv.header.info.add('TQSI_NT', 1, 'Integer', 'Data tier used to compute QSI_NT') snv.header.info.add('IC', 1, 'Integer', 'Number of times RU repeats in the indel allele')
infile = {{i.infile | quote}} outfile = {{o.outfile | quote}} gz = {{args.gz | repr}} params = {{args.params | repr}} tabix = {{args.tabix | quote}} # make sure outfile without .gz if gz: outfile = outfile[:-3] gztype = gzip_type(infile) if gztype == 'gzip': gunzip(infile, outfile) if gz: bgzip(outfile) elif gztype == 'bgzip': if gz: sys.symlink(infile, outfile + '.gz') else: gunzip(infile, outfile) else: if gz: bgzip(infile, outfile + '.gz') else: sys.symlink(infile, outfile) cmd = '{} {} {!r}'.format(tabix, cmdargs({'p': 'vcf'}), outfile + '.gz' if gz else outfile) runcmd(cmd)
# run gdc-client to download the data gdc = '{} download '.format(gdc_client) args = Box({ 'm': infile, 'n': nthread, 'd': outdir, 'retry-amount': '3', 'debug': True, 'log-file': path.join(outdir, 'gdc-client.log') }) if token: args.t = token args.update(params) cmd2run = gdc + cmdargs(args, equal=' ') system(cmd2run) # check if all the data sucessfully downloaded with open(infile) as fin: ids = [ line.split()[0] for line in fin if line.strip() and not line.startswith('id') ] del args['m'] for i in ids: if not path.isdir(path.join(outdir, i)): logger.warning('File failed to download: {}'.format(i)) cmd2run = gdc + cmdargs(args, equal=' ') + ' ' + i runcmd(cmd2run)
from bioprocs.utils import runcmd, cmdargs infile = {{i.infile | quote}} outfile = {{o.outfile | quote}} header = {{args.header | repr}} by = {{args.by | quote}} tool = {{args.tool | quote}} if header: # write the header to outfile params = cmdargs({ 'e': '^#', }, dash='-', equal=' ') if infile.endswith('.gz'): cmd = 'zcat {infile} | grep {cmdargs} > {outfile}' else: cmd = 'grep {cmdargs} {infile} > {outfile}' runcmd(cmd.format(cmdargs=params, infile=infile, outfile=outfile)) if tool == 'sort': if infile.endswith('.gz'): cmd = 'zcat {infile} | grep "^#" | sort {cmdargs} >> {outfile}' else: cmd = 'grep -v "^#" {infile} | sort {cmdargs} >> {outfile}' if by.lower().startswith('coord'): params = cmdargs({'k#1': '1,1', 'k#2': '2,2n'}, dash='-', equal=' ') else: params = cmdargs({ 'k#1': '3,3', 'k#2': '1,1',
fqfile1 = fqfile1[:-3] fqfile2 = fqfile2[:-3] {% endif %} params = {{args.params}} try: {% case args.tool %} {% when 'biobambam' %} params['gz'] = 0 params['F'] = fqfile1 params['F2'] = fqfile2 params['T'] = path.join(tmpdir, infile + '.tmp') params['filename'] = infile if infile.endswith('.sam'): params['inputformat'] = 'sam' cmd = '{{args.biobambam}} %s' % cmdargs(params, dash = '', equal = '=') runcmd (cmd) {% when 'bedtools' %} params['i'] = infile params['fq'] = fqfile1 params['fq2'] = fqfile2 cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd) {% when 'samtools' %} params['t'] = True params['1'] = fqfile1 params['2'] = fqfile2 cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params, dash = '-', equal = ' '), infile) runcmd (cmd) {% when 'picard' %} mem = mem2({{ args.mem | quote }}, 'Java')
indir = {{i.indir | quote}} outfile = {{o.outfile | quote}} plink = {{args.plink | quote}} samid = {{args.samid | quote}} snpid = {{args.snpid | quote}} addchr = {{args.addchr | repr}} nors = {{args.nors | quote}} chroms = {{args.chroms | repr}} bedfile = glob(path.join(indir, '*.bed'))[0] input = path.splitext(bedfile)[0] output = path.splitext(outfile)[0] params = {'bfile': input, 'recode': 'A-transpose', 'out': output} cmd = '%s %s 1>&2' % (plink, cmdargs(params, equal=' ')) runcmd(cmd) fams = TsvReader(input + '.fam', ftype='nometa', delimit=' ', head=False) if samid == 'fid': header = "\t" + "\t".join(fams.dump(0)) + "\n" elif samid == 'iid': header = "\t" + "\t".join(fams.dump(1)) + "\n" else: header = "\t" + "\t".join(r[0] + '_' + r[1] for r in fams) + "\n" fams.close() gts = TsvReader(output + '.traw', ftype='nometa', skip=1, head=False) with open(outfile, 'w') as fout: fout.write(header)
from sys import stderr from pyppl import Box from bioprocs.utils import runcmd, cmdargs fq = {{i.fq | quote}} params = {{args.params}} try: {% if args.tool == 'fastqc' %} params['o'] = {{o.outdir | quote}} cmd = '{{args.fastqc}} %s "{{i.fq}}"' % cmdargs(params) runcmd(cmd) {% else %} raise Exception('Tool {{args.tool}} %s not supported.') {% endif %} except Exception as ex: stderr.write ("Job failed: %s" % str(ex)) raise
shell.TOOLS['cnvkit'] = cnvkit envs = dict(OPENBLAS_NUM_THREADS=str(nthread), OMP_NUM_THREADS=str(nthread), NUMEXPR_NUM_THREADS=str(nthread), MKL_NUM_THREADS=str(nthread)) ckshell = shell.Shell(subcmd=True, equal=' ', envs=envs, cwd=outdir).cnvkit # generate target file params_t = params.target params_t.o = path.join(outdir, prefix + '.bed') ckshell.target(exbaits, **params_t).run() # generate access file if not accfile: accfile = path.join(outdir, prefix + '.access.bed') params_a = params.access params_a.o = accfile ckshell.access(ref, **params_a).run() # autobin params_b = params.autobin params_b.t = params_t.o params_b.g = accfile params_b[''] = infiles runcmd('cd {wdir}; {cnvkit} autobin {args}'.format( wdir=shell.shquote(outdir), cnvkit=shell.shquote(cnvkit), args=cmdargs(params_b, equal=' ')), env=envs)
accessfile = "{workdir}/1/output/cnvkit_access.bed".format(workdir = workdir) targetfile = "{workdir}/1/output/cnvkit_targets.bed".format(workdir = workdir) refcnn = "{workdir}/1/output/reference.cnn".format(workdir = workdir) fixedCnr = "{outdir}/{infn}.cnr".format(outdir = outdir, infn = infn) segfile = "{outdir}/{infn}.cns".format(outdir = outdir, infn = infn) callfile = "{outdir}/{infn}.call.cns".format(outdir = outdir, infn = infn) # report files breaksfile = "{outdir}/{infn}.breaks.txt".format(outdir = outdir, infn = infn) gainlossfile = "{outdir}/{infn}.gainloss.txt".format(outdir = outdir, infn = infn) metricsfile = "{outdir}/{infn}.metrics.txt".format(outdir = outdir, infn = infn) segmetricsfile= "{outdir}/{infn}.segmetrics.txt".format(outdir = outdir, infn = infn) openblas_nthr = "export OPENBLAS_NUM_THREADS={nthread}; export OMP_NUM_THREADS={nthread}; export NUMEXPR_NUM_THREADS={nthread}; export MKL_NUM_THREADS={nthread}; ".format(nthread = nthread) cnvkitAccessParams = params.access cnvkitAccessParams['o'] = accessfile cmd1 = openblas_nthr + '{cnvkit} access {ref} {args}'.format(cnvkit = cnvkit, ref = repr(ref), args = cmdargs(cnvkitAccessParams)) cnvkitTargetParams = params.target cnvkitTargetParams['o'] = targetfile cmd2 = '{cnvkit} target {accessfile} {args}'.format(cnvkit = cnvkit, accessfile = repr(accessfile), args = cmdargs(cnvkitTargetParams)) log2log('CNVkit: Run access and target at job #0 ...') poll.first(cmd1 + '; ' + cmd2, lockfile = 'access.poll.lock') log2log('CNVkit: Run access and target at job #0 ... done') cnvkitCoverageParams = params.coverage cnvkitCoverageParams['p'] = nthread cnvkitCoverageParams['o'] = targetCov cmd = openblas_nthr + '{cnvkit} coverage {infile} {targetfile} {args}'.format(cnvkit = cnvkit, infile = repr(infile), targetfile = repr(targetfile), args = cmdargs(cnvkitCoverageParams)) log2log('CNVkit: Run coverage at all jobs ...')
params['input-vcf'] = {{i.infile | quote}} params['output-maf'] = {{o.outfile | quote}} params['vep-data'] = {{args.vepDb | quote}} params['vep-forks'] = {{args.nthread}} params['filter-vcf'] = {{args.filtervcf | quote}} params['ref-fasta'] = {{args.ref | quote}} params['vep-path'] = path.dirname(vep) {% if args.tumor1st %} params['tumor-id'] = samples.pop(0) params['normal-id'] = samples[0] if samples else 'NORMAL' {% else %} params['normal-id'] = samples.pop(0) params['tumor-id'] = samples[0] if samples else 'NORMAL' {% endif %} cmd = '{{args.vcf2maf}} %s' % (cmdargs(params, equal=' ')) runcmd(cmd) {% else %} cmds = [] for sample in samples: vtparams = {} vtparams['a'] = True vtparams['c'] = sample vtparams['e'] = True samplevcf = "{{job.outdir}}/{{i.infile | fn}}-%s.vcf" % sample cmd = '{{args.vcftools}} %s {{i.infile | quote}} > "%s"' % (cmdargs(vtparams), samplevcf) # vcf2maf.pl --input-vcf ZYYP-ZYYB.vcf --output-maf ZYYP-ZYYB.snpEff.maf --tumor-id ZXLT-ZXLB_TUMOR --normal-id ZXLT-ZXLB_NORMAL --vep-data /path/to/vep/cache/ --filter-vcf /path/to/vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz --ref-fasta /path/to/hs37d5/phase2_reference_assembly_sequence/hs37d5.fa --vep-path /path/to/miniconda2/bin params['input-vcf'] = samplevcf params['output-maf'] = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample
from os import path from pyppl import Box from vcf import Reader as Vcf from bioprocs.utils import runcmd, cmdargs, logger infile = {{i.infile | quote}} idxfile = infile + '.tbi' if not path.isfile(idxfile): raise ValueError('Vcf file needs to be indexed') outdir = {{o.outdir | quote}} plink = {{args.plink | repr}} params = {{args.params | repr}} params.vcf = infile params['make-bed'] = True params.out = path.join(outdir, {{i.infile | fn2 | quote}}) args = cmdargs(params, equal=' ') cmd = '{} {} 1>&2'.format(plink, args) runcmd(cmd)
ocdir = path.join(outdir, 'query-{}.tomtom'.format(i + 1)) ocdirs.append(ocdir) writer = MemeWriter(qfile) writer.meta = reader.meta writer.writeMeta() for _ in range(joblist[i]): try: writer.write(reader.next()) except StopIteration: break writer.close() thparams = params.copy() thparams[""] = [qfile, mfile2] thparams.thresh = qval thparams.oc = ocdir cmdps.append((tomtom, cmdargs(thparams, dash='-', equal=' '))) reader.close() Parallel(nthread, raiseExc=True).run('{} {}', cmdps) writer = TsvWriter(outfile) reader = TsvReader(path.join(ocdirs[0], 'tomtom.txt'), comment='##', cnames=lambda header: header[1:].strip().split("\t")) writer.cnames = reader.cnames writer.writeHead(lambda cnames: "#" + "\t".join(cnames)) reader.close() for ocdir in ocdirs: reader = TsvReader( path.join(ocdir, 'tomtom.txt'), comment='##', cnames=lambda header: header[1:].strip().split("\t"))
{% endif %} params = {{args.params}} try: {% case args.tool %} ############# biobambam {% when 'biobambam' %} params['gz'] = 0 #bug #params['S'] = fqfile params['filename'] = infile params['T'] = path.join(tmpdir, infile + '.tmp') if infile.endswith('.sam'): params['inputformat'] = 'sam' cmd = '{{args.biobambam}} %s > "%s"' % (cmdargs(params, dash = '', equal = '='), fqfile) runcmd (cmd) ############# bedtools {% when 'bedtools' %} params['i'] = infile params['fq'] = fqfile cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd) ############# samtools {% when 'samtools' %} params['t'] = True params['s'] = fqfile