def create_sh(pooldirs, poolref): # create sh files print(Bcolors.BOLD + '\nwriting sh files' + Bcolors.ENDC) for pooldir in pooldirs: pool = op.basename(pooldir) print(Bcolors.BOLD + '\npool = %s' % pool + Bcolors.ENDC) ref = poolref[pool] print('\tsending pooldir and ref to 01_trim-fastq.py') subprocess.call([shutil.which('python'), op.join(os.environ['HOME'], 'gatk_pipeline/01_trim-fastq.py'), pooldir, ref]) print("\n") balance_queue.main('balance_queue.py', 'trim')
def create_sh(pooldirs, poolref, parentdir): """Run 01_trim-fastq.py to sbatch trimming jobs, then balance queue. Positional arguments: pooldirs - a list of subdirectories in parentdir for gsroups of pools poolref - dictionary with key = pool, val = /path/to/ref """ # create sh files print(Bcolors.BOLD + '\nwriting sh files' + Bcolors.ENDC) for pooldir in pooldirs: pool = op.basename(pooldir) print(Bcolors.BOLD + '\npool = %s' % pool + Bcolors.ENDC) ref = poolref[pool] print('\tsending pooldir and ref to 01_trim-fastq.py') subprocess.call([ shutil.which('python'), op.join(os.environ['HOME'], 'pipeline/01_trim-fastq.py'), pooldir, ref ]) print("\n") balance_queue.main('balance_queue.py', 'trim', parentdir)
# get more dup stats module load samtools/1.9 samtools flagstat {dupfile} > {dupflag} module unload samtools # call next step source $HOME/.bashrc export PYTHONPATH="${{PYTHONPATH}}:$HOME/gatk_pipeline" export SQUEUE_FORMAT="%.8i %.8u %.12a %.68j %.3t %16S %.10L %.5D %.4C %.6b %.7m %N (%r)" python $HOME/gatk_pipeline/04_scatter-gvcf.py {dupfile} {pooldir} {samp} """ # create shdir and file shdir = op.join(pooldir, 'shfiles/03_mark_build_shfiles') for d in [shdir, dupdir]: makedir(d) file = op.join(shdir, '%(pool)s-%(samp)s-mark.sh' % locals()) with open(file, 'w') as o: o.write("%s" % text) # sbatch file os.chdir(shdir) print('shdir = ', shdir) subprocess.call([shutil.which('sbatch'), file]) # balance queue balance_queue.main('balance_queue.py', 'mark') balance_queue.main('balance_queue.py', 'bwa')
module load java export _JAVA_OPTIONS="-Xms256m -Xmx28g" java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \ -T RealignerTargetCreator -R %(ref)s --num_threads 32 -I %(dupfile)s -o %(listfile)s module unload gatk # next step source $HOME/.bashrc export PYTHONPATH="${PYTHONPATH}:$HOME/pipeline" export SQUEUE_FORMAT="%%.8i %%.8u %%.12a %%.68j %%.3t %%16S %%.10L %%.5D %%.4C %%.6b %%.7m %%N (%%r)" python $HOME/pipeline/05_indelRealign_crisp.py %(pooldir)s %(samp)s %(dupfile)s %(ref)s ''' % locals() # create shdir and shfile shdir = op.join(pooldir, 'shfiles/04_realignTarget_shfiles') for d in [aligndir, shdir]: makedir(d) file = op.join(shdir, '%(pool)s-%(samp)s-realign.sh' % locals()) with open(file, 'w') as o: o.write("%s" % text) # sbatch file os.chdir(shdir) print('shdir =', shdir) subprocess.call([shutil.which('sbatch'), file]) # balance queue balance_queue.main('balance_queue.py', 'realign') balance_queue.main('balance_queue.py', 'mark')
module load gatk/3.8 module load java export _JAVA_OPTIONS="-Xms256m -Xmx7g" java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \ -T IndelRealigner -R %(ref)s -I %(dupfile)s -targetIntervals %(listfile)s -o %(realbam)s module unload gatk # sbatch CRISP job if all pooled bamfiles have been created source $HOME/.bashrc export PYTHONPATH="${PYTHONPATH}:$HOME/pipeline" export SQUEUE_FORMAT="%%.8i %%.8u %%.12a %%.68j %%.3t %%16S %%.10L %%.5D %%.4C %%.6b %%.7m %%N (%%r)" python $HOME/pipeline/start_crispANDvarscan.py %(parentdir)s %(pool)s python $HOME/pipeline/balance_queue.py bedfile ''' % locals() # create shdir and shfile shdir = op.join(pooldir, 'shfiles/05_indelRealign_shfiles') makedir(shdir) file = op.join(shdir, '%(pool)s-%(samp)s-indelRealign.sh' % locals()) with open(file, 'w') as o: o.write("%s" % text) os.chdir(shdir) print('shdir = ', shdir) subprocess.call([shutil.which('sbatch'), file]) balance_queue.main('balance_queue.py', 'indelRealign') balance_queue.main('balance_queue.py', 'realign')
#SBATCH --time=23:59:00 #SBATCH --mem=55000M #SBATCH --nodes=1 #SBATCH --ntasks=32 #SBATCH --cpus-per-task=1 #SBATCH --job-name={pool}-{samp}-bwa #SBATCH --output={pool}-{samp}-bwa_%j.out {email_text} {bwatext} # mark and build source $HOME/.bashrc export PYTHONPATH="${{PYTHONPATH}}:$HOME/gatk_pipeline" export SQUEUE_FORMAT="%.8i %.8u %.12a %.68j %.3t %16S %.10L %.5D %.4C %.6b %.7m %N (%r)" python $HOME/gatk_pipeline/03_mark_build.py {pooldir} {samp} ''' # create shfile qsubfile = op.join(bwashdir, f'{pool}-{samp}-bwa.sh') with open(qsubfile, 'w') as o: o.write("%s" % text) # sbatch file os.chdir(bwashdir) print('shdir = ', shdir) subprocess.call([shutil.which('sbatch'), qsubfile]) balance_queue.main('balance_queue.py', 'bwa') balance_queue.main('balance_queue.py', 'trim')
{email_text} module load java module load gatk/3.8 export _JAVA_OPTIONS="-Xms256m -Xmx7g" java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \ -T IndelRealigner -R {ref} -I {dupfile} -targetIntervals {listfile} -o {realbam} module unload gatk # sbatch varscan jobs if all pooled bamfiles have been created source {bash_variables} python $HOME/pipeline/start_varscan.py {parentdir} {pool} python $HOME/pipeline/balance_queue.py bedfile {parentdir} ''' # create shdir and shfile shdir = op.join(pooldir, 'shfiles/05_indelRealign_shfiles') makedir(shdir) file = op.join(shdir, f'{pool}-{samp}-indelRealign.sh') with open(file, 'w') as o: o.write("%s" % text) os.chdir(shdir) print('shdir = ', shdir) subprocess.call([shutil.which('sbatch'), file]) balance_queue.main('balance_queue.py', 'indelRealign', parentdir) balance_queue.main('balance_queue.py', 'realign', parentdir)