Example #1
0
def checkNtvl(ci, ni):
    '''Check is intervals withing the same chromosome are non overlapping
    sorted increasingly by starting coordinate
    '''
#    print 'curr:',  ci
#    print 'next:',  ni
    if not ci:
        return 0
    if ci[0] == ni[0]:
        if int(ci[2]) > int(ni[1]) or int(ci[1]) > int(ni[2]):
            logProc.logProc(outf, outdir, ' ', 'failed', stderr_out=inf+' contains overlapping intervals or not sorted by starting coordinate '+' '.join(ci))
            return 1
    return 0
Example #2
0
def checkNtvl(ci, ni):
    '''Check is intervals withing the same chromosome are non overlapping
    sorted increasingly by starting coordinate
    '''
    #    print 'curr:',  ci
    #    print 'next:',  ni
    if not ci:
        return 0
    if ci[0] == ni[0]:
        if int(ci[2]) > int(ni[1]) or int(ci[1]) > int(ni[2]):
            logProc.logProc(
                outf,
                outdir,
                ' ',
                'failed',
                stderr_out=inf +
                ' contains overlapping intervals or not sorted by starting coordinate '
                + ' '.join(ci))
            return 1
    return 0
Example #3
0
'''
Take a union of supplied bed files.
'''
import sys, subprocess
sys.path.insert(0, '/nethome/asalomatov/projects/ppln')
import logProc

print '\nsys.args   :', sys.argv
outf, outdir = sys.argv[1:3]
cmd = 'cat '+ ' '.join(sys.argv[3:]) + ' | sort -V -k1,1 -k2,2 | uniq > ' + sys.argv[1] 
#print cmd
logProc.logProc(outf, outdir, cmd, 'started')
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode == 0:
    logProc.logProc(outf, outdir, cmd, 'finished')
else:
    logProc.logProc(outf, outdir, cmd, 'failed', stderr)

Example #4
0
'''
Filter BAMs based on criteria defined in --filter switches.
'''
import sys, subprocess
sys.path.insert(0, '/nethome/asalomatov/projects/ppln')
import logProc

nctFlag = '-nct 4'

print '\nsys.args   :', sys.argv[1:]
inbam, recalibtbl, outbam, refGenome, tmpdir, gatk, outdir = sys.argv[1:]
cmd = 'java -Xms750m -Xmx2500m -XX:+UseSerialGC -Djava.io.tmpdir=%(tmpdir)s -jar %(gatk)s --read_filter BadCigar --read_filter NotPrimaryAlignment -T PrintReads -I %(inbam)s -o %(outbam)s -R %(refGenome)s -BQSR %(recalibtbl)s %(nctFlag)s'
cmd = cmd % locals()
#print cmd
logProc.logProc(outbam, outdir, cmd, 'started')
p = subprocess.Popen(cmd,
                     shell=True,
                     stdout=subprocess.PIPE,
                     stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode == 0:
    logProc.logProc(outbam, outdir, cmd, 'finished')
else:
    logProc.logProc(outbam, outdir, cmd, 'failed', stderr)
Example #5
0
'''

'''
import sys
from sets import Set
sys.path.insert(0, '/nethome/asalomatov/projects/ppln')
import logProc

if len(sys.argv) == 1:
    print 'Usage:'
    print sys.argv[
        0], 'input.bed', 'output.bed', 'logdir', 'filter1', 'filter2', 'filter3'

N = 4
inf, outf, outdir = sys.argv[1:N]
fltrs = sys.argv[N:]
print fltrs
cmd = ' '
logProc.logProc(outf, outdir, cmd, 'started')

with open(outf, 'w') as fout:
    with open(inf, 'r') as fin:
        for l in fin:
            ls = l.split()
            if ls[3] in fltrs:
                fout.write('\t'.join(ls) + '\n')

logProc.logProc(outf, outdir, cmd, 'finished')
Example #6
0
sys.path.insert(0, "/nethome/asalomatov/projects/ppln")
import logProc


nctFlag = "-nct 4"
options = """\
-U LENIENT_VCF_PROCESSING \
--read_filter BadCigar \
--read_filter NotPrimaryAlignment \
-T PrintReads \
-I %(inbam)s  \
-o %(outbam)s  \
-R %(refGenome)s  \
--filter_mismatching_base_and_quals  \
--filter_bases_not_stored  \
--filter_reads_with_N_cigar  \
%(nctFlag)s"""

print "\nsys.args   :", sys.argv[1:]
inbam, inbai, outbam, gatkjar, refGenome, tmpdir, outdir = sys.argv[1:]
cmd = "java -Xms750m -Xmx2500m -XX:+UseSerialGC -Djava.io.tmpdir=%(tmpdir)s -jar %(gatkjar)s " + options
cmd = cmd % locals()
# print cmd
logProc.logProc(outbam, outdir, cmd, "started")
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = p.communicate()
if p.returncode == 0:
    logProc.logProc(outbam, outdir, cmd, "finished")
else:
    logProc.logProc(outbam, outdir, cmd, "failed", stderr)
Example #7
0
    --assemble=1 \
    --hapScoreThreshold=10\
    --scThreshold=0.99 \
    --filteredReadsFrac=0.9 \
    --rmsmqThreshold=20 \
    --qdThreshold=0 \
    --abThreshold=0.0001 \
    --minVarFreq=0.0 '''
#    cmd = "%(platypus)s callVariants %(inbams)s --output=%(outfile)s --refFile=%(refGenome)s --regions=%(inregions)s "
#    cmd = "%(platypus)s callVariants %(inbams)s --output=%(outfile)s --refFile=%(refGenome)s --regions=%(inregions)s %(options)s "
#--output=%(outfile)s 
    cmd = "%(platypus)s callVariants %(inbams)s --output=- --refFile=%(refGenome)s --regions=%(inregions)s %(options)s "
    cmd += "| %(bcftools)s filter -O v --soft-filter 'PlatQualDepth' -e '(FR[0] <= 0.5 && TC < 4 && %%QUAL < 20) || (TC < 13 && %%QUAL < 10) ||      (FR[0] > 0.5 && TC < 4 && %%QUAL < 50)' -m '+' | %(vcflibdir)s/vcfallelicprimitives --keep-geno | %(vcflibdir)s/vcfstreamsort | %(bgzip)s -c > %(outfile)s"
    print cmd
    cmd = cmd % locals()
    print cmd
    logProc.logProc(outfile, outdir, cmd, 'started')
    p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    if p.returncode == 0:
        logProc.logProc(outfile, outdir, cmd, 'finished')
    else:
        logProc.logProc(outfile, outdir, cmd, 'failed', stderr)
except Exception as e:
    logProc.logProc(outfile, outdir, ' ', 'failed', stderr_out=e.message)
    raise

'''
/bioinfo/software/installs/bcbio_nextgen/150607/bin/Platypus.py callVariants --bamFiles=/mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/11480.mo_SSCtest-20-re-fxgr-flr-dp-23-rlgn-rclb.bam,/mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/11480.fa_SSCtest-20-re-fxgr-flr-dp-23-rlgn-rclb.bam, /mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/11480.p1_SSCtest-20-re-fxgr-flr-dp-23-rlgn-rclb.bam --output=- --refFile=/bioinfo/data/bcbio_nextgen/150607/genomes/Hsapiens/GRCh37/seq/GRCh37.fa --regions=/mnt/scratch/asalomatov/bioppln/run11480dsPIPE03_EX_ns/work/10__bin__11480-uni-mrg.bed --logF ileName=/dev/null     --verbosity=1     --assemble=1     --hapScoreThreshold=10 --scThreshold=0.99     --filteredReadsFrac=0.9     --rmsmqThreshold=20 --qdThreshold=0     --abThreshold=0.0001     --minVarFreq=0.0 
'''
Example #8
0
'''

'''
import sys
from sets import Set
sys.path.insert(0, '/nethome/asalomatov/projects/ppln')
import logProc

if len(sys.argv) == 1:
    print 'Usage:'
    print sys.argv[0], 'input.bed', 'output.bed', 'logdir', 'filter1', 'filter2', 'filter3'

N = 4
inf, outf, outdir = sys.argv[1:N]
fltrs = sys.argv[N:]
print fltrs
cmd = ' '
logProc.logProc(outf, outdir, cmd, 'started')

with open(outf, 'w') as fout:
    with open(inf, 'r') as fin:
        for l in fin:
            ls = l.split()
            if ls[3] in fltrs:
                fout.write('\t'.join(ls)+'\n')

logProc.logProc(outf, outdir, cmd, 'finished')
Example #9
0
        if int(ni[2]) - int(ci[1]) > w or ci[0] != ni[0]:
            with open(fname, 'w') as fout:
                fout.write('\t'.join(ci) + '\n')
            ci[0] = ni[0]
            ci[1] = ni[1]
            ci[2] = ni[2]
            return True
        else:
            ci[2] = ni[2]
            return False


inf, outf, wdw, outdir = sys.argv[1:]

try:
    logProc.logProc(outf, outdir, sys.argv[0], 'started')
    curr_ntvl = []
    written = True
    bin_num = 0
    fname = os.path.join(os.path.dirname(outf),
                         str(bin_num) + '__' + os.path.basename(outf))
    with open(inf, 'r') as fin:
        for l in fin:
            ls = l.split()
            if checkNtvl(curr_ntvl, ls): sys.exit(1)
            written = accumNtvl(curr_ntvl, ls, int(wdw), fname)
            if written:
                bin_num += 1
                fname = os.path.join(
                    os.path.dirname(outf),
                    str(bin_num) + '__' + os.path.basename(outf))
Example #10
0
        if int(ni[2]) - int(ci[1]) > w or ci[0] != ni[0]:
            with open(fname, 'w') as fout:
                fout.write('\t'.join(ci)+'\n')
            ci[0] = ni[0]
            ci[1] = ni[1]
            ci[2] = ni[2]
            return True
        else:
            ci[2] = ni[2]
            return False
    

inf, outf, wdw, outdir = sys.argv[1:]

try:
    logProc.logProc(outf, outdir, sys.argv[0], 'started')
    curr_ntvl = []
    written = True
    bin_num = 0
    fname = os.path.join(os.path.dirname(outf), str(bin_num)+'__'+os.path.basename(outf))
    with open(inf, 'r') as fin:
        for l in fin:
            ls = l.split()
            if checkNtvl(curr_ntvl, ls): sys.exit(1)
            written = accumNtvl(curr_ntvl, ls, int(wdw), fname)
            if written:
                bin_num += 1
                fname = os.path.join(os.path.dirname(outf), str(bin_num)+'__'+os.path.basename(outf))
    if not written:
        with open(fname, 'w') as fout:
            fout.write('\t'.join(curr_ntvl)+'\n')
Example #11
0
    print '\nsys.args   :', sys.argv[1:]
    N = 8
    refGenome, freebayes, vcflibdir, bgzip, outdir, outfile, inbed = sys.argv[
        1:N]
    fl = ' -b '
    inbams = ''
    for f in sys.argv[N:]:
        inbams += fl + f
    options = ''' \
    --ploidy 2 \
    --min-repeat-entropy 1'''

    cmd = "%(freebayes)s %(inbams)s -f %(refGenome)s --targets %(inbed)s %(options)s "
    #    cmd += "| %(vcflibdir)s/vcffilter -f 'QUAL > 5' -F 'QUALlt5' -s | %(vcflibdir)s/vcfallelicprimitives | %(vcflibdir)s/vcfstreamsort | %(bgzip)s -c > %(outfile)s"
    cmd += "| %(vcflibdir)s/vcffilter -f 'QUAL > 5' -F 'QUALlt5' -s | %(bgzip)s -c > %(outfile)s"
    cmd = cmd % locals()
    print cmd
    logProc.logProc(outfile, outdir, cmd, 'started')
    p = subprocess.Popen(cmd,
                         shell=True,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    if p.returncode == 0:
        logProc.logProc(outfile, outdir, cmd, 'finished')
    else:
        logProc.logProc(outfile, outdir, cmd, 'failed', stderr)
except Exception as e:
    logProc.logProc(outfile, outdir, ' ', 'failed', stderr_out=e.message)
    raise