Beispiel #1
0
def create_sh(pooldirs, poolref):
    # create sh files
    print(Bcolors.BOLD + '\nwriting sh files' + Bcolors.ENDC)
    for pooldir in pooldirs:
        pool = op.basename(pooldir)
        print(Bcolors.BOLD + '\npool = %s' % pool + Bcolors.ENDC)
        ref = poolref[pool]
        print('\tsending pooldir and ref to 01_trim-fastq.py')
        subprocess.call([shutil.which('python'),
                         op.join(os.environ['HOME'], 'gatk_pipeline/01_trim-fastq.py'),
                         pooldir,
                         ref])
    print("\n")
    balance_queue.main('balance_queue.py', 'trim')
def create_sh(pooldirs, poolref, parentdir):
    """Run 01_trim-fastq.py to sbatch trimming jobs, then balance queue.

    Positional arguments:
    pooldirs - a list of subdirectories in parentdir for gsroups of pools
    poolref - dictionary with key = pool, val = /path/to/ref
    """
    # create sh files
    print(Bcolors.BOLD + '\nwriting sh files' + Bcolors.ENDC)
    for pooldir in pooldirs:
        pool = op.basename(pooldir)
        print(Bcolors.BOLD + '\npool = %s' % pool + Bcolors.ENDC)
        ref = poolref[pool]
        print('\tsending pooldir and ref to 01_trim-fastq.py')
        subprocess.call([
            shutil.which('python'),
            op.join(os.environ['HOME'], 'pipeline/01_trim-fastq.py'), pooldir,
            ref
        ])
    print("\n")
    balance_queue.main('balance_queue.py', 'trim', parentdir)
# get more dup stats
module load samtools/1.9
samtools flagstat {dupfile} > {dupflag}
module unload samtools

# call next step
source $HOME/.bashrc
export PYTHONPATH="${{PYTHONPATH}}:$HOME/gatk_pipeline"
export SQUEUE_FORMAT="%.8i %.8u %.12a %.68j %.3t %16S %.10L %.5D %.4C %.6b %.7m %N (%r)"

python $HOME/gatk_pipeline/04_scatter-gvcf.py {dupfile} {pooldir} {samp}

"""

# create shdir and file
shdir = op.join(pooldir, 'shfiles/03_mark_build_shfiles')
for d in [shdir, dupdir]:
    makedir(d)
file = op.join(shdir, '%(pool)s-%(samp)s-mark.sh' % locals())
with open(file, 'w') as o:
    o.write("%s" % text)

# sbatch file
os.chdir(shdir)
print('shdir = ', shdir)
subprocess.call([shutil.which('sbatch'), file])

# balance queue
balance_queue.main('balance_queue.py', 'mark')
balance_queue.main('balance_queue.py', 'bwa')
module load java
export _JAVA_OPTIONS="-Xms256m -Xmx28g"
java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \
-T RealignerTargetCreator -R %(ref)s --num_threads 32 -I %(dupfile)s -o %(listfile)s
module unload gatk

# next step
source $HOME/.bashrc
export PYTHONPATH="${PYTHONPATH}:$HOME/pipeline"
export SQUEUE_FORMAT="%%.8i %%.8u %%.12a %%.68j %%.3t %%16S %%.10L %%.5D %%.4C %%.6b %%.7m %%N (%%r)"
python $HOME/pipeline/05_indelRealign_crisp.py %(pooldir)s %(samp)s %(dupfile)s %(ref)s

''' % locals()

# create shdir and shfile
shdir = op.join(pooldir, 'shfiles/04_realignTarget_shfiles')
for d in [aligndir, shdir]:
    makedir(d)
file = op.join(shdir, '%(pool)s-%(samp)s-realign.sh' % locals())
with open(file, 'w') as o:
    o.write("%s" % text)

# sbatch file
os.chdir(shdir)
print('shdir =', shdir)
subprocess.call([shutil.which('sbatch'), file])

# balance queue
balance_queue.main('balance_queue.py', 'realign')
balance_queue.main('balance_queue.py', 'mark')
Beispiel #5
0
module load gatk/3.8
module load java
export _JAVA_OPTIONS="-Xms256m -Xmx7g"
java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \
-T IndelRealigner -R %(ref)s -I %(dupfile)s -targetIntervals %(listfile)s -o %(realbam)s
module unload gatk

# sbatch CRISP job if all pooled bamfiles have been created
source $HOME/.bashrc
export PYTHONPATH="${PYTHONPATH}:$HOME/pipeline"
export SQUEUE_FORMAT="%%.8i %%.8u %%.12a %%.68j %%.3t %%16S %%.10L %%.5D %%.4C %%.6b %%.7m %%N (%%r)"
python $HOME/pipeline/start_crispANDvarscan.py %(parentdir)s %(pool)s
python $HOME/pipeline/balance_queue.py bedfile

''' % locals()

# create shdir and shfile
shdir = op.join(pooldir, 'shfiles/05_indelRealign_shfiles')
makedir(shdir)
file = op.join(shdir, '%(pool)s-%(samp)s-indelRealign.sh' % locals())
with open(file, 'w') as o:
    o.write("%s" % text)

os.chdir(shdir)
print('shdir = ', shdir)
subprocess.call([shutil.which('sbatch'), file])

balance_queue.main('balance_queue.py', 'indelRealign')
balance_queue.main('balance_queue.py', 'realign')
Beispiel #6
0
#SBATCH --time=23:59:00
#SBATCH --mem=55000M
#SBATCH --nodes=1
#SBATCH --ntasks=32
#SBATCH --cpus-per-task=1
#SBATCH --job-name={pool}-{samp}-bwa
#SBATCH --output={pool}-{samp}-bwa_%j.out 
{email_text}

{bwatext}

# mark and build
source $HOME/.bashrc
export PYTHONPATH="${{PYTHONPATH}}:$HOME/gatk_pipeline"
export SQUEUE_FORMAT="%.8i %.8u %.12a %.68j %.3t %16S %.10L %.5D %.4C %.6b %.7m %N (%r)"
python $HOME/gatk_pipeline/03_mark_build.py {pooldir} {samp}
'''

# create shfile
qsubfile = op.join(bwashdir, f'{pool}-{samp}-bwa.sh')
with open(qsubfile, 'w') as o:
    o.write("%s" % text)

# sbatch file
os.chdir(bwashdir)
print('shdir = ', shdir)
subprocess.call([shutil.which('sbatch'), qsubfile])

balance_queue.main('balance_queue.py', 'bwa')
balance_queue.main('balance_queue.py', 'trim')
Beispiel #7
0
{email_text}

module load java
module load gatk/3.8
export _JAVA_OPTIONS="-Xms256m -Xmx7g"
java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \
-T IndelRealigner -R {ref} -I {dupfile} -targetIntervals {listfile} -o {realbam}
module unload gatk

# sbatch varscan jobs if all pooled bamfiles have been created
source {bash_variables}
python $HOME/pipeline/start_varscan.py {parentdir} {pool}
python $HOME/pipeline/balance_queue.py bedfile {parentdir}

'''

# create shdir and shfile
shdir = op.join(pooldir, 'shfiles/05_indelRealign_shfiles')
makedir(shdir)
file = op.join(shdir, f'{pool}-{samp}-indelRealign.sh')
with open(file, 'w') as o:
    o.write("%s" % text)

os.chdir(shdir)
print('shdir = ', shdir)
subprocess.call([shutil.which('sbatch'), file])


balance_queue.main('balance_queue.py', 'indelRealign', parentdir)
balance_queue.main('balance_queue.py', 'realign', parentdir)