def checkNtvl(ci, ni): '''Check is intervals withing the same chromosome are non overlapping sorted increasingly by starting coordinate ''' # print 'curr:', ci # print 'next:', ni if not ci: return 0 if ci[0] == ni[0]: if int(ci[2]) > int(ni[1]) or int(ci[1]) > int(ni[2]): logProc.logProc(outf, outdir, ' ', 'failed', stderr_out=inf+' contains overlapping intervals or not sorted by starting coordinate '+' '.join(ci)) return 1 return 0
def checkNtvl(ci, ni): '''Check is intervals withing the same chromosome are non overlapping sorted increasingly by starting coordinate ''' # print 'curr:', ci # print 'next:', ni if not ci: return 0 if ci[0] == ni[0]: if int(ci[2]) > int(ni[1]) or int(ci[1]) > int(ni[2]): logProc.logProc( outf, outdir, ' ', 'failed', stderr_out=inf + ' contains overlapping intervals or not sorted by starting coordinate ' + ' '.join(ci)) return 1 return 0
''' Take a union of supplied bed files. ''' import sys, subprocess sys.path.insert(0, '/nethome/asalomatov/projects/ppln') import logProc print '\nsys.args :', sys.argv outf, outdir = sys.argv[1:3] cmd = 'cat '+ ' '.join(sys.argv[3:]) + ' | sort -V -k1,1 -k2,2 | uniq > ' + sys.argv[1] #print cmd logProc.logProc(outf, outdir, cmd, 'started') p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode == 0: logProc.logProc(outf, outdir, cmd, 'finished') else: logProc.logProc(outf, outdir, cmd, 'failed', stderr)
''' Filter BAMs based on criteria defined in --filter switches. ''' import sys, subprocess sys.path.insert(0, '/nethome/asalomatov/projects/ppln') import logProc nctFlag = '-nct 4' print '\nsys.args :', sys.argv[1:] inbam, recalibtbl, outbam, refGenome, tmpdir, gatk, outdir = sys.argv[1:] cmd = 'java -Xms750m -Xmx2500m -XX:+UseSerialGC -Djava.io.tmpdir=%(tmpdir)s -jar %(gatk)s --read_filter BadCigar --read_filter NotPrimaryAlignment -T PrintReads -I %(inbam)s -o %(outbam)s -R %(refGenome)s -BQSR %(recalibtbl)s %(nctFlag)s' cmd = cmd % locals() #print cmd logProc.logProc(outbam, outdir, cmd, 'started') p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode == 0: logProc.logProc(outbam, outdir, cmd, 'finished') else: logProc.logProc(outbam, outdir, cmd, 'failed', stderr)
''' ''' import sys from sets import Set sys.path.insert(0, '/nethome/asalomatov/projects/ppln') import logProc if len(sys.argv) == 1: print 'Usage:' print sys.argv[ 0], 'input.bed', 'output.bed', 'logdir', 'filter1', 'filter2', 'filter3' N = 4 inf, outf, outdir = sys.argv[1:N] fltrs = sys.argv[N:] print fltrs cmd = ' ' logProc.logProc(outf, outdir, cmd, 'started') with open(outf, 'w') as fout: with open(inf, 'r') as fin: for l in fin: ls = l.split() if ls[3] in fltrs: fout.write('\t'.join(ls) + '\n') logProc.logProc(outf, outdir, cmd, 'finished')
sys.path.insert(0, "/nethome/asalomatov/projects/ppln") import logProc nctFlag = "-nct 4" options = """\ -U LENIENT_VCF_PROCESSING \ --read_filter BadCigar \ --read_filter NotPrimaryAlignment \ -T PrintReads \ -I %(inbam)s \ -o %(outbam)s \ -R %(refGenome)s \ --filter_mismatching_base_and_quals \ --filter_bases_not_stored \ --filter_reads_with_N_cigar \ %(nctFlag)s""" print "\nsys.args :", sys.argv[1:] inbam, inbai, outbam, gatkjar, refGenome, tmpdir, outdir = sys.argv[1:] cmd = "java -Xms750m -Xmx2500m -XX:+UseSerialGC -Djava.io.tmpdir=%(tmpdir)s -jar %(gatkjar)s " + options cmd = cmd % locals() # print cmd logProc.logProc(outbam, outdir, cmd, "started") p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode == 0: logProc.logProc(outbam, outdir, cmd, "finished") else: logProc.logProc(outbam, outdir, cmd, "failed", stderr)
--assemble=1 \ --hapScoreThreshold=10\ --scThreshold=0.99 \ --filteredReadsFrac=0.9 \ --rmsmqThreshold=20 \ --qdThreshold=0 \ --abThreshold=0.0001 \ --minVarFreq=0.0 ''' # cmd = "%(platypus)s callVariants %(inbams)s --output=%(outfile)s --refFile=%(refGenome)s --regions=%(inregions)s " # cmd = "%(platypus)s callVariants %(inbams)s --output=%(outfile)s --refFile=%(refGenome)s --regions=%(inregions)s %(options)s " #--output=%(outfile)s cmd = "%(platypus)s callVariants %(inbams)s --output=- --refFile=%(refGenome)s --regions=%(inregions)s %(options)s " cmd += "| %(bcftools)s filter -O v --soft-filter 'PlatQualDepth' -e '(FR[0] <= 0.5 && TC < 4 && %%QUAL < 20) || (TC < 13 && %%QUAL < 10) || (FR[0] > 0.5 && TC < 4 && %%QUAL < 50)' -m '+' | %(vcflibdir)s/vcfallelicprimitives --keep-geno | %(vcflibdir)s/vcfstreamsort | %(bgzip)s -c > %(outfile)s" print cmd cmd = cmd % locals() print cmd logProc.logProc(outfile, outdir, cmd, 'started') p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode == 0: logProc.logProc(outfile, outdir, cmd, 'finished') else: logProc.logProc(outfile, outdir, cmd, 'failed', stderr) except Exception as e: logProc.logProc(outfile, outdir, ' ', 'failed', stderr_out=e.message) raise ''' /bioinfo/software/installs/bcbio_nextgen/150607/bin/Platypus.py callVariants --bamFiles=/mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/11480.mo_SSCtest-20-re-fxgr-flr-dp-23-rlgn-rclb.bam,/mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/11480.fa_SSCtest-20-re-fxgr-flr-dp-23-rlgn-rclb.bam, /mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/11480.p1_SSCtest-20-re-fxgr-flr-dp-23-rlgn-rclb.bam --output=- --refFile=/bioinfo/data/bcbio_nextgen/150607/genomes/Hsapiens/GRCh37/seq/GRCh37.fa --regions=/mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/10__bin__11480-uni-mrg.bed --logF ileName=/dev/null --verbosity=1 --assemble=1 --hapScoreThreshold=10 --scThreshold=0.99 --filteredReadsFrac=0.9 --rmsmqThreshold=20 --qdThreshold=0 --abThreshold=0.0001 --minVarFreq=0.0 '''
''' ''' import sys from sets import Set sys.path.insert(0, '/nethome/asalomatov/projects/ppln') import logProc if len(sys.argv) == 1: print 'Usage:' print sys.argv[0], 'input.bed', 'output.bed', 'logdir', 'filter1', 'filter2', 'filter3' N = 4 inf, outf, outdir = sys.argv[1:N] fltrs = sys.argv[N:] print fltrs cmd = ' ' logProc.logProc(outf, outdir, cmd, 'started') with open(outf, 'w') as fout: with open(inf, 'r') as fin: for l in fin: ls = l.split() if ls[3] in fltrs: fout.write('\t'.join(ls)+'\n') logProc.logProc(outf, outdir, cmd, 'finished')
if int(ni[2]) - int(ci[1]) > w or ci[0] != ni[0]: with open(fname, 'w') as fout: fout.write('\t'.join(ci) + '\n') ci[0] = ni[0] ci[1] = ni[1] ci[2] = ni[2] return True else: ci[2] = ni[2] return False inf, outf, wdw, outdir = sys.argv[1:] try: logProc.logProc(outf, outdir, sys.argv[0], 'started') curr_ntvl = [] written = True bin_num = 0 fname = os.path.join(os.path.dirname(outf), str(bin_num) + '__' + os.path.basename(outf)) with open(inf, 'r') as fin: for l in fin: ls = l.split() if checkNtvl(curr_ntvl, ls): sys.exit(1) written = accumNtvl(curr_ntvl, ls, int(wdw), fname) if written: bin_num += 1 fname = os.path.join( os.path.dirname(outf), str(bin_num) + '__' + os.path.basename(outf))
if int(ni[2]) - int(ci[1]) > w or ci[0] != ni[0]: with open(fname, 'w') as fout: fout.write('\t'.join(ci)+'\n') ci[0] = ni[0] ci[1] = ni[1] ci[2] = ni[2] return True else: ci[2] = ni[2] return False inf, outf, wdw, outdir = sys.argv[1:] try: logProc.logProc(outf, outdir, sys.argv[0], 'started') curr_ntvl = [] written = True bin_num = 0 fname = os.path.join(os.path.dirname(outf), str(bin_num)+'__'+os.path.basename(outf)) with open(inf, 'r') as fin: for l in fin: ls = l.split() if checkNtvl(curr_ntvl, ls): sys.exit(1) written = accumNtvl(curr_ntvl, ls, int(wdw), fname) if written: bin_num += 1 fname = os.path.join(os.path.dirname(outf), str(bin_num)+'__'+os.path.basename(outf)) if not written: with open(fname, 'w') as fout: fout.write('\t'.join(curr_ntvl)+'\n')
print '\nsys.args :', sys.argv[1:] N = 8 refGenome, freebayes, vcflibdir, bgzip, outdir, outfile, inbed = sys.argv[ 1:N] fl = ' -b ' inbams = '' for f in sys.argv[N:]: inbams += fl + f options = ''' \ --ploidy 2 \ --min-repeat-entropy 1''' cmd = "%(freebayes)s %(inbams)s -f %(refGenome)s --targets %(inbed)s %(options)s " # cmd += "| %(vcflibdir)s/vcffilter -f 'QUAL > 5' -F 'QUALlt5' -s | %(vcflibdir)s/vcfallelicprimitives | %(vcflibdir)s/vcfstreamsort | %(bgzip)s -c > %(outfile)s" cmd += "| %(vcflibdir)s/vcffilter -f 'QUAL > 5' -F 'QUALlt5' -s | %(bgzip)s -c > %(outfile)s" cmd = cmd % locals() print cmd logProc.logProc(outfile, outdir, cmd, 'started') p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = p.communicate() if p.returncode == 0: logProc.logProc(outfile, outdir, cmd, 'finished') else: logProc.logProc(outfile, outdir, cmd, 'failed', stderr) except Exception as e: logProc.logProc(outfile, outdir, ' ', 'failed', stderr_out=e.message) raise