def all(self, todo, *args, **kwargs): lockfilename = 'poll.all.lock' if 'lockfile' in kwargs: lockfilename = kwargs['lockfile'] del kwargs['lockfile'] lockfilenames = [ path.join(self.workdir, str(jobindex + 1), 'output', lockfilename) \ for jobindex in range(self.joblen) ] lockfiles = [HardFileLock(f) for f in lockfilenames] #log2pyppl('Doing stuff at job #%s ... ' % self.jobindex) # with lockfiles[self.jobindex]: #lockfiles[self.jobindex].acquire() try: if callable(todo): todo(*args, **kwargs) else: runcmd(todo.format(*args, **kwargs)) except: raise finally: open(lockfilenames[self.jobindex], 'w').close() #lockfiles[self.jobindex].release() #log2pyppl('Doing stuff at job #%s ... done ' % self.jobindex) for i, lockfile in enumerate(lockfiles): if i == self.jobindex: continue #log2pyppl('Waiting for lock file: %s ... ' % self.jobindex) #Poll.wait(lambda x: not path.exists(x), lockfilenames[i]) #log2pyppl('Waiting for job: %s ... ' % self.jobindex) with lockfile: pass
def run_somaticsniper(): params.f = ref params.F = 'vcf' params[''] = [tumor, normal, outfile] cmd = '{ssniper} {args}'.format(ssniper = ssniper, args = cmdargs(params)) runcmd(cmd) if gz: runcmd(['gzip', outfile])
def first(self, todo, *args, **kwargs): lockfilename = 'poll.first.lock' if 'lockfile' in kwargs: lockfilename = kwargs['lockfile'] del kwargs['lockfile'] # make sure it's cleaned when job reset lockfilename = path.join(self.workdir, '1', 'output', lockfilename) lockfile = HardFileLock(lockfilename) if self.jobindex == 0: #log2pyppl('JOB #0: DOING stuff ... ') #with lockfile: #lockfile.acquire() try: if callable(todo): todo(*args, **kwargs) else: runcmd(todo.format(*args, **kwargs)) except: raise finally: open(lockfilename, 'w').close() #log2pyppl('JOB #0: DOING stuff ... done') else: #log2pyppl('JOB #x: waiting ... for flag file') #Poll.wait(lambda x: not path.exists(x), lockfilename) #log2pyppl('JOB #x: waiting ... for job #0') with lockfile: pass
def non1st(self, todo, *args, **kwargs): lockfilename = 'poll.non1st.lock' if 'lockfile' in kwargs: lockfilename = kwargs['lockfile'] del kwargs['lockfile'] lockfilenames = [ path.join(self.workdir, str(jobindex + 1), 'output', lockfilename) \ for jobindex in range(self.joblen) ] lockfiles = [HardFileLock(f) for f in lockfilenames] if self.jobindex == 0: for i, lockfilename in enumerate(lockfilenames): if i == 0: continue #Poll.wait(lambda x: not path.exists(x), lockfilename) with lockfiles[i]: pass else: #with lockfiles[self.jobindex]: #lockfiles[self.jobindex].acquire() try: if callable(todo): todo(*args, **kwargs) else: runcmd(todo.format(*args, **kwargs)) except: raise finally: open(lockfilename[self.jobindex], 'w').close()
def gunzip(fn, outfn=None): args = {'f': True, '1': True} if not outfn: cmd = 'gunzip {!r} {}'.format(fn, args) else: cmd = 'gunzip {!r} {} -c > {!r}'.format(fn, args, outfn) runcmd(cmd)
def run_vardict(): params.v = True params.G = ref params.b = '{}|{}'.format(tumor, normal) cmd = '{vardict} {args} > {outfile!r}'.format( vardict = vardict, args = cmdargs(params), outfile = outfile ) runcmd(cmd) if gz: runcmd(['gzip', outfile])
def run_virmid(): params.R = ref params.D = tumor params.N = normal params.w = joboutdir cmd = '{virmid} {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format( virmid = virmid, mem = mem2(mem, 'java'), tmpdir = tmpdir, args = cmdargs(params) ) runcmd(['mv', path.join(joboutdir, '*.virmid.som.passed.vcf'), outfile]) if gz: runcmd(['gzip', outfile])
def getAlleleCount(bamfile, snpfile, outfile): brcparams = Box() brcparams.f = ref brcparams.w = 0 brcparams.l = snpfile brcparams[''] = bamfile cmd = '{bamrc} {args} > {outfile!r}'.format( bamrc = bamrc, args = cmdargs(brcparams, equal = ' '), outfile = outfile + '.tmp') runcmd(cmd) # reformated output to desired format reader = TsvReader(outfile + '.tmp', cnames = False) snper = TsvReader(snpfile, cnames = False) #chr1 564773 C 14 =:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 A:0:0.00:0.00:0.00:0:0:0.00:0.00:0.00:0:0.00:0.00:0.00 C:14:... G:0:... T:0:... N:0:... writer = TsvWriter(outfile) writer.cnames = ['Chrm', 'pos', 'A', 'C', 'G', 'T', 'Total', 'refCount', 'mutCount'] for r in reader: while True: try: snp = next(snper) except StopIteration: break # use the end position, in case it's 0-based if snp[0] == r[0] and snp[2] == r[1]: counts = dict( A = r[5].split(':', 2)[1], C = r[6].split(':', 2)[1], G = r[7].split(':', 2)[1], T = r[8].split(':', 2)[1] ) rec = TsvRecord() rec.Chrm = r[0] rec.pos = r[1] rec.Total = r[3] rec.A = counts['A'] rec.C = counts['C'] rec.G = counts['G'] rec.T = counts['T'] # if reference allele is unknown, assuming all are ref alleles rec.refCount = counts.get(snp[6].upper(), r[3]) # if mut allele is unknown, assuming no mutations happened rec.mutCount = counts.get(snp[7].upper(), 0) writer.write(rec) # go to next snp break else: # go to next r continue writer.close()
def run_gatk(): # generate interval list file intvfile = {{job.outdir | path.join: "interval.list" | quote}} cmd = '{samtools} idxstats {tumor!r} | head -1 | cut -f1 > {intvfile!r}'.format( samtools = samtools, tumor = tumor, intvfile = intvfile ) runcmd(cmd) mem = mem2(mem, 'java') params['I:tumor'] = tumor params['I:normal'] = normal params.R = ref params.o = outfile params.nct = nthread params.L = intvfile cmd = '{gatk} -T MuTect2 {mem} -Djava.io.tmpdir={tmpdir!r} {args}'.format( gatk = gatk, mem = mem, tmpdir = tmpdir, args = cmdargs(params, dash = '-', equal = ' ') ) runcmd(cmd) if gz: runcmd(['gzip', outfile])
def tabindex(vcf, outdir): if not vcf.endswith('.gz'): gzfile = path.join(outdir, path.basename(vcf) + '.gz') bgzip_cmd = 'bgzip {!r} -c > {!r}'.format(vcf, gzfile) runcmd(bgzip_cmd) runcmd('{} {!r}'.format(tabix, gzfile)) else: gzfile = path.join(outdir, path.basename(vcf)) # it is gzipped, try to find the index file (.tbi) idxfile = path.join(outdir, path.basename(vcf) + '.tbi') symlink(vcf, gzfile) while True: try: link = readlink(vcf) tbifile = link + '.tbi' if path.isfile(tbifile): symlink(tbifile, gzfile + '.tbi') break vcf = link except OSError: break if not path.isfile(idxfile): index_cmd = '{} {!r}'.format(tabix, gzfile) runcmd(index_cmd) return gzfile
def run_strelka(): cparams = {{args.configParams | repr}} cparams.normalBam = normal cparams.tumorBam = tumor cparams.referenceFasta = ref cparams.runDir = joboutdir runcmd('{strelka} {args}'.format(strelka = strelka, args = cmdargs(cparams))) params.m = 'local' params.g = mem2(mem, 'G')[:-1] params.j = nthread runcmd('{joboutdir}/runWorkflow.py {args}'.format(joboutdir = joboutdir, args = cmdargs(params))) snvvcf = path.join(joboutdir, 'results', 'variants', 'somatic.snvs.vcf.gz') indvcf = path.join(joboutdir, 'results', 'variants', 'somatic.indels.vcf.gz') _mergeAndAddGT(snvvcf, indvcf, outfile) if gz: runcmd(['gzip', outfile])
def runChrom(file1, file2, chrom): outfile1_list = list(file1[:-3].rpartition('.')) outfile1_list.insert(-2, '-' + chrom) outfile1 = ''.join(outfile1_list) outfile2 = list(file2[:-3].rpartition('.')) outfile2.insert(-2, '-' + chrom) outfile2 = ''.join(outfile2) outfile1_list.insert(-2, '.subtracted') outfile = ''.join(outfile1_list) vfcmd = '{} -h {!r} {!r} > {!r}' runcmd(vfcmd.format(tabix, file1, chrom, outfile1)) runcmd(vfcmd.format(tabix, file2, chrom, outfile2)) cmd = '{bedtools} subtract {params} > {outfile!r}' params = {'a': outfile1, 'b': outfile2} if rmany: params['A'] = True params = cmdargs(params, dash='-', equal=' ') runcmd( cmd.format(bedtools=bedtools, params=params, outfile=outfile)) remove(outfile1) remove(outfile2) return outfile
fqfile2 = fqfile2[:-3] {% endif %} params = {{args.params}} try: {% case args.tool %} {% when 'biobambam' %} params['gz'] = 0 params['F'] = fqfile1 params['F2'] = fqfile2 params['T'] = path.join(tmpdir, infile + '.tmp') params['filename'] = infile if infile.endswith('.sam'): params['inputformat'] = 'sam' cmd = '{{args.biobambam}} %s' % cmdargs(params, dash = '', equal = '=') runcmd (cmd) {% when 'bedtools' %} params['i'] = infile params['fq'] = fqfile1 params['fq2'] = fqfile2 cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd) {% when 'samtools' %} params['t'] = True params['1'] = fqfile1 params['2'] = fqfile2 cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params, dash = '-', equal = ' '), infile) runcmd (cmd) {% when 'picard' %} mem = mem2({{ args.mem | quote }}, 'Java') params[mem] = True
fq1 = {{o.fq1 | quote}} fq2 = {{o.fq2 | quote}} try: {% case args.tool %} {% when 'wgsim' %} {% if args.gz %} fq1 = "{{o.fq1 | [:-3]}}" fq2 = "{{o.fq2 | [:-3]}}" {% endif %} params['N'] = {{args.num}} params['1'] = {{args.len1}} params['2'] = {{args.len2}} params['S'] = {{i.seed | lambda x: -1 if x is None else x}} cmd = '{{args.wgsim}} %s "%s" "%s" "%s"' % (cmdargs(params), ref, fq1, fq2) runcmd (cmd) {% if args.gz %} runcmd ('gzip "%s"' % fq1) runcmd ('gzip "%s"' % fq2) {% endif %} {% when 'dwgsim' %} prefix = {{o.fq1 | [:-8] | quote}} {% if args.gz %} fq1 = "{{o.fq1 | [:-3]}}" fq2 = "{{o.fq2 | [:-3]}}" prefix = "{{o.fq1 | [:-11]}}" {% endif %} params['N'] = {{args.num}}
cmd = '{{args.awk}} -v sample="{sample}" index={index} -f {awkfile} {infile}'.format( sample = sample, index = 10 + i, awk = str(repr(awkfile)), infile = str(repr(infile)) ) cmds.append(cmd) ########### gatk {% elif args.tool == 'gatk' %} for sample in samples: params = {} params['R'] = {{args.ref | quote}} params['V'] = {{i.infile | quote}} params['o'] = "{{o.outdir}}/{{i.infile | fn}}-%s.vcf" % sample params['sample_name'] = sample params['excludeFiltered'] = True params['excludeNonVariants'] = True params.update({{args.params}}) cmd = '{{args.gatk}} -T SelectVariants %s' % (cmdargs(params, equal=' ')) cmds.append(cmd) {% endif %} {% if args.nthread == 1 %} for cmd in cmds: runcmd(cmd) {% else %} p = Parallel({{args.nthread}}) p.run('{}', [(cmd,) for cmd in cmds]) {% endif %}
infile = {{i.infile | quote}} outfile = {{o.outfile | quote}} header = {{args.header | repr}} by = {{args.by | quote}} tool = {{args.tool | quote}} if header: # write the header to outfile params = cmdargs({ 'e': '^#', }, dash='-', equal=' ') if infile.endswith('.gz'): cmd = 'zcat {infile} | grep {cmdargs} > {outfile}' else: cmd = 'grep {cmdargs} {infile} > {outfile}' runcmd(cmd.format(cmdargs=params, infile=infile, outfile=outfile)) if tool == 'sort': if infile.endswith('.gz'): cmd = 'zcat {infile} | grep "^#" | sort {cmdargs} >> {outfile}' else: cmd = 'grep -v "^#" {infile} | sort {cmdargs} >> {outfile}' if by.lower().startswith('coord'): params = cmdargs({'k#1': '1,1', 'k#2': '2,2n'}, dash='-', equal=' ') else: params = cmdargs({ 'k#1': '3,3', 'k#2': '1,1', 'k#3': '2,2n' },
def sam2bam(samfile, bamfile): logger.info('Converting sam to bam: ') logger.info('- %s' % samfile) logger.info('- %s' % bamfile) cmd = '%s view -Sb "%s" > "%s"; rm -f "%s"' % (samtools, samfile, bamfile, samfile) runcmd(cmd)
from os import makedirs, path from shutil import rmtree from pyppl import Box from bioprocs.utils import cmdargs, runcmd, mem2 tmpdir = path.join("{{args.tmpdir}}", "{{proc.id}}.{{i.infile | fn}}.{{job.index}}") if not path.exists(tmpdir): makedirs(tmpdir) mem = mem2({{args.mem | quote}}, 'java') ref = {{args.ref | quote}} params = {{args.params}} try: params['TMP_DIR'] = tmpdir params['I'] = {{i.infile | quote}} params['O'] = {{o.outfile | quote}} params['R'] = ref runcmd('{{args.picard}} ReorderSam %s -Djava.io.tmpdir="%s" %s' % (mem, tmpdir, cmdargs(params, dash='', equal='='))) except Exception as ex: stderr.write("Job failed: %s" % str(ex)) raise finally: rmtree(tmpdir)
descs[key] = desc_prefix + builtin_descs[fname](ffunc) else: realfilters[fname] = ffunc if callable(ffunc) else eval(ffunc) descs[fname] = desc_prefix + fname reader = vcf.Reader(filename=infile) for fname, fdesc in descs.items(): reader.filters[fname] = vcf.parser._Filter(id = fname, desc = fdesc) writer = vcf.Writer(open(outfile, 'w'), reader) while True: try: record = reader.next() for fname, ffunc in realfilters.items(): if ffunc(record, record.samples): record.FILTER = record.FILTER or [] record.FILTER.append(fname) if keep or not record.FILTER: writer.write_record(record) except StopIteration: break except: continue writer.close() if gz: runcmd(['bgzip', outfile])
params = {{args.params}} try: {% case args.tool %} ############# biobambam {% when 'biobambam' %} params['gz'] = 0 #bug #params['S'] = fqfile params['filename'] = infile params['T'] = path.join(tmpdir, infile + '.tmp') if infile.endswith('.sam'): params['inputformat'] = 'sam' cmd = '{{args.biobambam}} %s > "%s"' % (cmdargs(params, dash = '', equal = '='), fqfile) runcmd (cmd) ############# bedtools {% when 'bedtools' %} params['i'] = infile params['fq'] = fqfile cmd = '{{args.bedtools}} bamtofastq %s' % cmdargs(params, dash = '-', equal = ' ') runcmd (cmd) ############# samtools {% when 'samtools' %} params['t'] = True params['s'] = fqfile cmd = '{{args.samtools}} fastq %s "%s"' % (cmdargs(params), infile)
makedirs (tmpdir) params = {{args.params}} try: {% case args.tool %} ############## picard {% when 'picard' %} mem = mem2({{ args.mem | quote }}) params['-Djava.io.tmpdir'] = tmpdir params['TMP_DIR'] = tmpdir params['I'] = {{i.infile | quote}} params['O'] = {{o.outfile | quote}} for k,v in rg.items(): params['RG' + k] = v runcmd ('{{args.picard}} AddOrReplaceReadGroups %s %s' % (mem, cmdargs(params, dash='', equal='='))) ############## bamutil {% when 'bamutil' %} params['RG'] = "@RG\\tID:%s\\t%s" % (rg['ID'], "\\t".join([k + ":" + v for k,v in rg.items() if k!='ID'])) params['in'] = {{i.infile | quote}} params['out'] = {{o.outfile | quote}} runcmd ('{{args.bamutil}} polishBam %s' % cmdargs(params, equal = ' ')) {% endcase %} except Exception as ex: stderr.write ("Job failed: %s" % str(ex)) raise finally: rmtree (tmpdir)
openfunc = gzip.open if infile1.endswith('.gz') else open with openfunc(infile1) as fin, open(outfile, 'w') as fout: for line in fin: if not line.startswith('#'): break fout.write(line) # go directly if not bychrom: cmd = '{bedtools} subtract {params} >> {outfile}' params = {'a': infile1, 'b': infile2} if rmany: params['A'] = True params = cmdargs(params, dash='-', equal=' ') runcmd(cmd.format(bedtools=bedtools, params=params, outfile=outfile)) else: # check if infile1 is tabix indexed def tabindex(vcf, outdir): if not vcf.endswith('.gz'): gzfile = path.join(outdir, path.basename(vcf) + '.gz') bgzip_cmd = 'bgzip {!r} -c > {!r}'.format(vcf, gzfile) runcmd(bgzip_cmd) runcmd('{} {!r}'.format(tabix, gzfile)) else: gzfile = path.join(outdir, path.basename(vcf)) # it is gzipped, try to find the index file (.tbi) idxfile = path.join(outdir, path.basename(vcf) + '.tbi') symlink(vcf, gzfile) while True: try:
if gz: runcmd(['gzip', outfile]) def run_somaticsniper(): params.f = ref params.F = 'vcf' params[''] = [tumor, normal, outfile] cmd = '{ssniper} {args}'.format(ssniper = ssniper, args = cmdargs(params)) runcmd(cmd) if gz: runcmd(['gzip', outfile]) def run_snvsniffer(): # generate a header file theader = {{job.outdir | path.join: bn(i.tumor) | @append: '.header' | quote}} nheader = {{job.outdir | path.join: bn(i.normal) | @append: '.header' | quote}} cmd = '{samtools} view -H {infile!r} > {hfile!r}' runcmd(cmd.format(samtools = samtools, infile = tumor, hfile = theader)) runcmd(cmd.format(samtools = samtools, infile = normal, hfile = nheader)) params.g = ref params.o = outfile params[''] = [theader, nheader, tumor, normal] cmd = '{ssniffer} somatic {args}'.format(ssniffer, cmdargs(params)) runcmd(cmd) if gz: runcmd(['gzip', outfile]) def _mergeAndAddGT(snvvcf, indvcf, outfile): from pysam import VariantFile snv = VariantFile(snvvcf) ind = VariantFile(indvcf)
def bgzip(fn, outfn=None): if not outfn: cmd = 'bgzip {!r}'.format(fn) else: cmd = 'bgzip {!r} -c > {!r}'.format(fn, outfn) runcmd(cmd)
shell.TOOLS['cnvkit'] = cnvkit envs = dict(OPENBLAS_NUM_THREADS=str(nthread), OMP_NUM_THREADS=str(nthread), NUMEXPR_NUM_THREADS=str(nthread), MKL_NUM_THREADS=str(nthread)) ckshell = shell.Shell(subcmd=True, equal=' ', envs=envs, cwd=outdir).cnvkit # generate target file params_t = params.target params_t.o = path.join(outdir, prefix + '.bed') ckshell.target(exbaits, **params_t).run() # generate access file if not accfile: accfile = path.join(outdir, prefix + '.access.bed') params_a = params.access params_a.o = accfile ckshell.access(ref, **params_a).run() # autobin params_b = params.autobin params_b.t = params_t.o params_b.g = accfile params_b[''] = infiles runcmd('cd {wdir}; {cnvkit} autobin {args}'.format( wdir=shell.shquote(outdir), cnvkit=shell.shquote(cnvkit), args=cmdargs(params_b, equal=' ')), env=envs)
tool = {{args.tool | repr}} picard = {{args.picard | repr}} chain = {{args.lochain | repr}} ref = {{args.ref | repr}} params = {{args.params | repr}} mem = {{args.mem | repr}} tmpdir = {{args.tmpdir | repr}} if not chain: logger.error('Chain file (args.lochain) not provided!') exit(1) # picard LiftoverVcf -Xmx4g -Xms1g I=TCGA-05-4382-10.vcf O=1.vcf CHAIN=liftovers/hg38ToHg19.over.chain.gz R=ucsc_hg19.fa REJECT=r.vcf if tool == 'picard': params.I = infile params.O = outfile params.CHAIN = chain params.REJECT = umfile params.R = ref javamem = mem2(mem, 'java') for jm in javamem.split(): params['-' + jm[1:]] = True params['-Djava.io.tmpdir'] = tmpdir cmd = '{picard} LiftoverVcf {params}' runcmd( cmd.format(picard=picard, params=cmdargs(params, equal='=', dash='')))
params['output-maf'] = {{o.outfile | quote}} params['vep-data'] = {{args.vepDb | quote}} params['vep-forks'] = {{args.nthread}} params['filter-vcf'] = {{args.filtervcf | quote}} params['ref-fasta'] = {{args.ref | quote}} params['vep-path'] = path.dirname(vep) {% if args.tumor1st %} params['tumor-id'] = samples.pop(0) params['normal-id'] = samples[0] if samples else 'NORMAL' {% else %} params['normal-id'] = samples.pop(0) params['tumor-id'] = samples[0] if samples else 'NORMAL' {% endif %} cmd = '{{args.vcf2maf}} %s' % (cmdargs(params, equal=' ')) runcmd(cmd) {% else %} cmds = [] for sample in samples: vtparams = {} vtparams['a'] = True vtparams['c'] = sample vtparams['e'] = True samplevcf = "{{job.outdir}}/{{i.infile | fn}}-%s.vcf" % sample cmd = '{{args.vcftools}} %s {{i.infile | quote}} > "%s"' % (cmdargs(vtparams), samplevcf) # vcf2maf.pl --input-vcf ZYYP-ZYYB.vcf --output-maf ZYYP-ZYYB.snpEff.maf --tumor-id ZXLT-ZXLB_TUMOR --normal-id ZXLT-ZXLB_NORMAL --vep-data /path/to/vep/cache/ --filter-vcf /path/to/vep/ExAC_nonTCGA.r0.3.1.sites.vep.vcf.gz --ref-fasta /path/to/hs37d5/phase2_reference_assembly_sequence/hs37d5.fa --vep-path /path/to/miniconda2/bin params['input-vcf'] = samplevcf params['output-maf'] = "{{job.outdir}}/{{i.infile | fn}}-%s.maf" % sample params['vep-data'] = {{args.vepDb | quote}}
cnvkitReferenceParams['f'] = ref cmd = openblas_nthr + '{cnvkit} reference {workdir}/*/output/*/*.targetcov.cnn {args}'.format(cnvkit = cnvkit, workdir = repr(workdir), args = cmdargs(cnvkitReferenceParams)) log2log('CNVkit: Run reference at job #0 ...') poll.first(cmd, lockfile = 'reference.poll.lock') log2log('CNVkit: Run reference at job #0 ... done') mtfile = "{outdir}/cnvkit_mt".format(outdir = outdir) open(mtfile, 'w').close() cnvkitFixParams = params.fix cnvkitFixParams['o'] = fixedCnr cmd = openblas_nthr + '{cnvkit} fix {targetCov} {mtfile} {refcnn} {args}'.format(cnvkit = cnvkit, targetCov = repr(targetCov), mtfile = repr(mtfile), refcnn = repr(refcnn), args = cmdargs(cnvkitFixParams)) log2log('CNVkit: Run fix at all jobs ...') runcmd (cmd) log2log('CNVkit: Run fix at all jobs ... done') if path.getsize(fixedCnr) < 60: open(segfile, 'w').write('chromosome start end gene log2 depth probes weight\\n') else: cnvkitSegmentParams = params.segment cnvkitSegmentParams['o'] = segfile cnvkitSegmentParams['p'] = nthread cmd = openblas_nthr + '{cnvkit} segment {args} {fixedCnr}'.format(cnvkit = cnvkit, args = cmdargs(cnvkitSegmentParams), fixedCnr = repr(fixedCnr)) log2log('CNVkit: Run segment at all jobs ...') runcmd (cmd) log2log('CNVkit: Run segment at all jobs ... done') if path.getsize(segfile) < 60:
cmdps.append((tomtom, cmdargs(thparams, dash='-', equal=' '))) reader.close() Parallel(nthread, raiseExc=True).run('{} {}', cmdps) writer = TsvWriter(outfile) reader = TsvReader(path.join(ocdirs[0], 'tomtom.txt'), comment='##', cnames=lambda header: header[1:].strip().split("\t")) writer.cnames = reader.cnames writer.writeHead(lambda cnames: "#" + "\t".join(cnames)) reader.close() for ocdir in ocdirs: reader = TsvReader( path.join(ocdir, 'tomtom.txt'), comment='##', cnames=lambda header: header[1:].strip().split("\t")) for r in reader: writer.write(r) reader.close() writer.close() else: params[""] = [mfile1, mfile2] params.thresh = qval params.oc = outdir cmd = '{tomtom} {params}'.format(tomtom=tomtom, params=cmdargs(params, dash='-', equal=' ')) runcmd(cmd)
'debug': True, 'log-file': path.join(outdir, 'gdc-client.log') }) if token: args.t = token args.update(params) cmd2run = gdc + cmdargs(args, equal=' ') system(cmd2run) # check if all the data sucessfully downloaded with open(infile) as fin: ids = [ line.split()[0] for line in fin if line.strip() and not line.startswith('id') ] del args['m'] for i in ids: if not path.isdir(path.join(outdir, i)): logger.warning('File failed to download: {}'.format(i)) cmd2run = gdc + cmdargs(args, equal=' ') + ' ' + i runcmd(cmd2run) rename(path.join(outdir, 'https:', 'api.gdc.cancer.gov', 'data', i), path.join(outdir, i)) if path.isdir(path.join(outdir, 'https:')): rmdir(path.join(outdir, 'https:', 'api.gdc.cancer.gov', 'data')) rmdir(path.join(outdir, 'https:', 'api.gdc.cancer.gov')) rmdir(path.join(outdir, 'https:'))