" REFERENCE_SEQUENCE=", opts.refseq, " WINDOW_SIZE=", opts.windowsize, " MINIMUM_GENOME_FRACTION=", opts.mingenofrac, " INPUT=", opts.input, " OUTPUT=", tempout, " TMP_DIR=", opts.tmp_dir, " CHART_OUTPUT=", temppdf, " SUMMARY_OUTPUT=", temptab, " VALIDATION_STRINGENCY=LENIENT", ] process = Popen("".join(cl), shell=True, stderr=standard_fd, stdout=standard_fd, cwd=output_folder) return_value = process.wait() standard_fd.close() fixPicardOutputs( tempout=temptab, output_dir=output_folder, log_file=opts.log_file, html_output=os.path.join(output_folder, opts.html_output), progname=progname, cl=cl, )
def picVal(opts=None): """ called with sam so no need to convert """ assert opts <> None killme = string.punctuation + string.whitespace trantab = string.maketrans(killme, '_' * len(killme)) title = opts.title.translate(trantab) tempout = os.path.join(opts.output_dir, 'rgPicardValidate.out') temptab = os.path.join(opts.output_dir, 'rgPicardValidate.xls') opts.log_file = opts.log or os.path.join(opts.output_dir, 'rgPicardValidate_%s.log' % title) # Create output folder and save our R script in there. stf = open(opts.log_file, 'w') sortedfile = None if verbose: print '# opts.ignore', opts.ignore, ' opts.sortme=', opts.sortme if opts.sortme: fd, sortedfile = tempfile.mkstemp(suffix='rgcleansam.sorted.bam') if opts.datatype == 'sam': # need to work with a bam tempbam = samToBam(opts.input, opts.outdir) pysam.sort(tempbam, sortedfile) else: # is already bam pysam.sort(opts.input, sortedfile) cl = [ 'java -Xmx', opts.maxjheap, ' -jar ', opts.jar, ' O=', tempout, ' TMP_DIR=', opts.tmp_dir ] if verbose: print '# cl so far', cl if opts.sortme: cl.append(' I=%s' % sortedfile) else: cl.append(' I=%s' % opts.input) if opts.maxoutput == '0': opts.maxoutput = '65535' cl.append(' MAX_OUTPUT=%s' % opts.maxoutput) if opts.ignore[0] <> 'None': # picard error values to ignore cl += [' IGNORE=%s' % x for x in opts.ignore if x <> 'None'] if opts.bisulphite.lower() <> 'false': cl.append(' IS_BISULFITE_SEQUENCED=true') if opts.refseq <> '': cl += [ ' R=%s' % opts.refseq, ] s1 = ' '.join(['"%s"' % x for x in cl]) s = '## rgPicardValidate.py about to Popen:\n%s\n' % s1 stf.write(s) if verbose: print s pefilename = os.path.join(opts.output_dir, 'rgPicardValidate_%s.errors' % title) picerrors = open(pefilename, 'w') process = Popen(''.join(cl), shell=True, stderr=picerrors, stdout=picerrors, cwd=opts.output_dir) return_value = process.wait() picerrors.close() pe = open(pefilename, 'r').readlines() stf.write('## got %d rows - first few =%s\n' % (len(pe), '\n'.join(pe[:5]))) if opts.dryrun <> 'dryrun': # want to run cleansam if opts.dryrun == 'sam': outformat = 'sam' newsam = opts.sam elif opts.dryrun == 'bam': outformat = 'bam' newsam = opts.bam cleanSam(insam=opts.input, newsam=newsam, picardErrors=pe, outformat=outformat, sortme=opts.sortme) stf.close() fixPicardOutputs(tempout=tempout, output_dir=opts.output_dir, log_file=opts.log_file, html_output=opts.html_output, progname=progname, cl=cl, transpose=False) if opts.sortme: os.unlink(sortedfile) if opts.datatype == 'sam': # was converted os.unlink(tempbam) # temporary
stderr=picerrors, stdout=picerrors, cwd=opts.output_dir) return_value = process.wait() picerrors.close() pe = open(pefilename, 'r').readlines() stf.write('## got %d rows - first few =%s\n' % (len(pe), '\n'.join(pe[:5]))) if opts.dryrun <> 'dryrun': # want to run cleansam if opts.dryrun == 'sam': outformat = 'sam' newsam = opts.sam elif opts.dryrun == 'bam': outformat = 'bam' newsam = opts.bam cleanSam(insam=opts.input, newsam=newsam, picardErrors=pe, outformat=outformat, sortme=opts.sortme) stf.close() fixPicardOutputs(tempout=tempout, output_dir=opts.output_dir, log_file=opts.log_file, html_output=opts.html_output, progname=progname, cl=cl, transpose=False) if opts.sortme: os.unlink(sortedfile)
# Create output folder and save our R script in there. standard_fd = open(opts.log_file, 'w') cl = [ 'java -Xmx', opts.maxjheap, ' -jar ', opts.jar, ' REFERENCE_SEQUENCE=', opts.refseq, ' WINDOW_SIZE=', opts.windowsize, ' MINIMUM_GENOME_FRACTION=', opts.mingenofrac, ' INPUT=', opts.input, ' OUTPUT=', tempout, ' TMP_DIR=', opts.tmp_dir, ' CHART_OUTPUT=', temppdf, ' SUMMARY_OUTPUT=', temptab, ' VALIDATION_STRINGENCY=LENIENT' ] process = Popen(''.join(cl), shell=True, stderr=standard_fd, stdout=standard_fd, cwd=opts.output_dir) return_value = process.wait() cl = 'mogrify -format jpg -resize x500 %s' % ( temppdf) # make the jpg for fixPicardOutputs to find process = Popen(cl, shell=True, stderr=standard_fd, stdout=standard_fd, cwd=opts.output_dir) return_value = process.wait() standard_fd.close() fixPicardOutputs(tempout=temptab, output_dir=opts.output_dir, log_file=opts.log_file, html_output=opts.html_output, progname=progname, cl=cl)
else: bedToPicInterval(opts.target, targetf) tempout = os.path.join(opts.output_dir,'rgPicardHsMetrics.out') opts.log_file = opts.log or os.path.join(opts.output_dir, '%s.log' % title) sfd = open(opts.log_file, 'w') cl = ['java -Xmx',opts.maxjheap,' -jar ',opts.jar, ' BAIT_INTERVALS=',baitf,' TARGET_INTERVALS=', targetf,' INPUT=',os.path.abspath(opts.input), ' OUTPUT=',tempout, ' VALIDATION_STRINGENCY=LENIENT', ' TMP_DIR=',opts.tmp_dir ] process = Popen(''.join(cl), shell=True, stderr=sfd, stdout=sfd, cwd=opts.output_dir) return_value = process.wait() sfd.close() fixPicardOutputs(tempout=tempout, output_dir=opts.output_dir, log_file=opts.log_file, html_output=os.path.join(opts.output_dir, opts.html_output), progname=progname, cl=cl)
destination = os.path.join(output_folder, new_filename) shutil.copy(os.path.abspath(opts.refseq), destination) opts.refseq = destination standard_fd = open(opts.log_file, 'w') cl = [ 'java -Xmx', opts.maxjheap, ' -jar ', opts.jar, ' REFERENCE_SEQUENCE=', os.path.abspath(opts.refseq), ' ASSUME_SORTED=', opts.assume_sorted, ''.join([' ADAPTER_SEQUENCE=%s' % x for x in opts.adapters]), ' IS_BISULFITE_SEQUENCED=', opts.bisulphite, ' MAX_INSERT_SIZE=', opts.maxinsert, ' INPUT=', os.path.abspath(opts.input), ' OUTPUT=', tempout, ' VALIDATION_STRINGENCY=LENIENT', ' TMP_DIR=', opts.tmp_dir ] process = Popen(''.join(cl), shell=True, stderr=standard_fd, stdout=standard_fd, cwd=opts.output_dir) return_value = process.wait() standard_fd.close() fixPicardOutputs(tempout=tempout, output_dir=opts.output_dir, log_file=opts.log_file, html_output=os.path.join(opts.output_dir, opts.html_output), progname=progname, cl=cl)
print s pefilename = os.path.join(opts.output_dir, 'rgPicardValidate_%s.errors' % title) picerrors = open(pefilename,'w') process = Popen(''.join(cl), shell=True, stderr=picerrors, stdout=picerrors, cwd=opts.output_dir) return_value = process.wait() picerrors.close() pe = open(pefilename,'r').readlines() stf.write('## got %d rows - first few =%s\n' % (len(pe), '\n'.join(pe[:5]))) if opts.dryrun <> 'dryrun': # want to run cleansam if opts.dryrun == 'sam': outformat = 'sam' newsam = opts.sam elif opts.dryrun == 'bam': outformat = 'bam' newsam = opts.bam cleanSam(insam=opts.input, newsam=newsam, picardErrors=pe,outformat=outformat,sortme=opts.sortme) stf.close() fixPicardOutputs(tempout=tempout,output_dir=opts.output_dir, log_file=opts.log_file,html_output=opts.html_output,progname=progname,cl=cl,transpose=False) if opts.sortme: os.unlink(sortedfile)
def picVal(opts=None): """ called with sam so no need to convert """ assert opts <> None killme = string.punctuation + string.whitespace trantab = string.maketrans(killme,'_'*len(killme)) title = opts.title.translate(trantab) tempout = os.path.join(opts.output_dir,'rgPicardValidate.out') temptab = os.path.join(opts.output_dir,'rgPicardValidate.xls') opts.log_file = opts.log or os.path.join(opts.output_dir, 'rgPicardValidate_%s.log' % title) # Create output folder and save our R script in there. stf = open(opts.log_file, 'w') sortedfile=None if verbose: print '# opts.ignore',opts.ignore,' opts.sortme=',opts.sortme if opts.sortme: fd,sortedfile = tempfile.mkstemp(suffix='rgcleansam.sorted.bam') if opts.datatype == 'sam': # need to work with a bam tempbam = samToBam(opts.input,opts.outdir) pysam.sort(tempbam,sortedfile) else: # is already bam pysam.sort(opts.input,sortedfile) cl = ['java -Xmx',opts.maxjheap,' -jar ',opts.jar,' O=',tempout,' TMP_DIR=',opts.tmp_dir] if verbose: print '# cl so far',cl if opts.sortme: cl.append(' I=%s' % sortedfile) else: cl.append(' I=%s' % opts.input) if opts.maxoutput == '0': opts.maxoutput = '65535' cl.append(' MAX_OUTPUT=%s' % opts.maxoutput) if opts.ignore[0] <> 'None': # picard error values to ignore cl += [' IGNORE=%s' % x for x in opts.ignore if x <> 'None'] if opts.bisulphite.lower() <> 'false': cl.append(' IS_BISULFITE_SEQUENCED=true') if opts.refseq <> '': cl += [' R=%s' % opts.refseq,] s1 = ' '.join(['"%s"' % x for x in cl]) s = '## rgPicardValidate.py about to Popen:\n%s\n' % s1 stf.write(s) if verbose: print s pefilename = os.path.join(opts.output_dir, 'rgPicardValidate_%s.errors' % title) picerrors = open(pefilename,'w') process = Popen(''.join(cl), shell=True, stderr=picerrors, stdout=picerrors, cwd=opts.output_dir) return_value = process.wait() picerrors.close() pe = open(pefilename,'r').readlines() stf.write('## got %d rows - first few =%s\n' % (len(pe), '\n'.join(pe[:5]))) if opts.dryrun <> 'dryrun': # want to run cleansam if opts.dryrun == 'sam': outformat = 'sam' newsam = opts.sam elif opts.dryrun == 'bam': outformat = 'bam' newsam = opts.bam cleanSam(insam=opts.input, newsam=newsam, picardErrors=pe,outformat=outformat,sortme=opts.sortme) stf.close() fixPicardOutputs(tempout=tempout,output_dir=opts.output_dir, log_file=opts.log_file,html_output=opts.html_output,progname=progname,cl=cl,transpose=False) if opts.sortme: os.unlink(sortedfile) if opts.datatype == 'sam': # was converted os.unlink(tempbam) # temporary
except: pass title = opts.namejob.translate(trantab) tempout = os.path.join(opts.output_dir,'rgPicardGCBiasMetrics.out') temppdf = os.path.join(opts.output_dir,'rgPicardGCBiasMetrics.pdf') temptab = os.path.join(opts.output_dir,'rgPicardGCBiasMetrics.xls') opts.log_file = opts.log or os.path.join(opts.output_dir, '%s.log' % title) # Create output folder and save our R script in there. standard_fd = open(opts.log_file, 'w') cl = ['java -Xmx',opts.maxjheap,' -jar ',opts.jar,' REFERENCE_SEQUENCE=',opts.refseq,' WINDOW_SIZE=',opts.windowsize, ' MINIMUM_GENOME_FRACTION=',opts.mingenofrac,' INPUT=',opts.input,' OUTPUT=',tempout,' TMP_DIR=',opts.tmp_dir, ' CHART_OUTPUT=',temppdf,' SUMMARY_OUTPUT=',temptab,' VALIDATION_STRINGENCY=LENIENT'] process = Popen(''.join(cl), shell=True, stderr=standard_fd, stdout=standard_fd, cwd=opts.output_dir) return_value = process.wait() cl = 'mogrify -format jpg -resize x500 %s' % (temppdf) # make the jpg for fixPicardOutputs to find process = Popen(cl, shell=True, stderr=standard_fd, stdout=standard_fd, cwd=opts.output_dir) return_value = process.wait() standard_fd.close() fixPicardOutputs(tempout=temptab,output_dir=opts.output_dir, log_file=opts.log_file,html_output=opts.html_output,progname=progname,cl=cl)