def create_combine(pids, parentdir, pool, program, shdir):
    """Create command file to combine crisp or varscan jobs once they're finished.

    Positional arguments:
    pids = list of slurm job id dependencies (the jobs that need to finish first)
    ...
    """
    pooldir = op.join(parentdir, pool)
    email_text = get_email_info(parentdir, 'final')
    dependencies = '#SBATCH --dependency=afterok:' + ','.join(pids)
    text = f'''#!/bin/bash
#SBATCH --job-name={pool}-combine-{program}
#SBATCH --time=12:00:00
#SBATCH --mem=20000M
#SBATCH --cpus-per-task=1
#SBATCH --output={pool}-combine-{program}_%j.out
{dependencies}
{email_text}


source $HOME/.bashrc
export PYTHONPATH="${{PYTHONPATH}}:$HOME/pipeline"
export SQUEUE_FORMAT="%.8i %.8u %.12a %.68j %.3t %16S %.10L %.5D %.4C %.6b %.7m %N (%r)"

python $HOME/pipeline/combine_crispORvarscan.py {pooldir} {program} {pool}

'''
    combfile = op.join(shdir, f'{pool}-combine-{program}.sh')
    with open(combfile, 'w') as o:
        o.write("%s" % text)
    sbatch(combfile)
    print(f'sbatched {program} combinefile with dependencies: ' + ','.join(pids))
Ejemplo n.º 2
0
def create_combine(pids, parentdir, pool, program, shdir):
    """Create command file to combine varscan jobs once they're finished.

    Positional arguments:
    pids = list of slurm job id dependencies (the jobs that need to finish first)
    ...
    """
    pooldir = op.join(parentdir, pool)
    email_text = get_email_info(parentdir, 'final')
    dependencies = '#SBATCH --dependency=afterok:' + ','.join(pids)
    bash_variables = op.join(parentdir, 'bash_variables')
    text = f'''#!/bin/bash
#SBATCH --job-name={pool}-combine-{program}
#SBATCH --time=12:00:00
#SBATCH --mem=20000M
#SBATCH --cpus-per-task=1
#SBATCH --output={pool}-combine-{program}_%j.out
{dependencies}
{email_text}


source {bash_variables}

python $HOME/pipeline/combine_varscan.py {pooldir} {program} {pool}

'''
    combfile = op.join(shdir, f'{pool}-combine-{program}.sh')
    with open(combfile, 'w') as o:
        o.write("%s" % text)
    sbatch(combfile)
    print(f'sbatched {program} combinefile with dependencies: ' +
          ','.join(pids))
Ejemplo n.º 3
0
import sys, os, balance_queue, subprocess, shutil
from os import path as op
from coadaptree import makedir, get_email_info, pklload

thisfile, pooldir, samp = sys.argv
sortfiles = pklload(op.join(pooldir, '%s_sortfiles.pkl' % samp))

# MarkDuplicates
dupdir = op.join(pooldir, '03_dedup_rg_filtered_indexed_sorted_bamfiles')
pool = op.basename(pooldir)
dupfile = op.join(dupdir, "%s_rd.bam" % samp)
dupflag = dupfile.replace(".bam", ".bam.flagstats")
dupstat = op.join(dupdir, "%s_rd_dupstat.txt" % samp)

# create sh file
email_text = get_email_info(op.dirname(pooldir), '03')
joined = ' I='.join(sortfiles)
text = f"""#!/bin/bash
#SBATCH --time=11:59:00
#SBATCH --mem=30000M
#SBATCH --ntasks=1
#SBATCH --job-name={pool}-{samp}-mark
#SBATCH --output={pool}-{samp}-mark_%j.out 
{email_text}

# remove dups
module load picard/2.18.9
java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTPICARD/picard.jar MarkDuplicates \
I={joined} O={dupfile} MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 M={dupstat} REMOVE_DUPLICATES=true
java -jar $EBROOTPICARD/picard.jar BuildBamIndex I={dupfile}
module unload picard
import os, sys, balance_queue, subprocess, shutil
from os import path as op
from coadaptree import makedir, pklload, get_email_info

thisfile, pooldir, samp, dupfile = sys.argv

# RealignerTargetCreator
aligndir = op.join(pooldir, '04_realign')
listfile = op.join(aligndir, '%s_realingment_targets.list' % samp)

# get ref
parentdir = op.dirname(pooldir)
pool = op.basename(pooldir)
ref = pklload(op.join(parentdir, 'poolref.pkl'))[pool]

email_text = get_email_info(parentdir, '04')
text = '''#!/bin/bash
#SBATCH --time=7-00:00:00
#SBATCH --mem=30000M
#SBATCH --nodes=1
#SBATCH --ntasks=32
#SBATCH --cpus-per-task=1
#SBATCH --job-name=%(pool)s-%(samp)s-realign
#SBATCH --output=%(pool)s-%(samp)s-realign_%%j.out 
%(email_text)s

# realign using the GATK
module load gatk/3.8
module load java
export _JAVA_OPTIONS="-Xms256m -Xmx28g"
java -Djava.io.tmpdir=$SLURM_TMPDIR -jar $EBROOTGATK/GenomeAnalysisTK.jar \
### imports
import sys
import os
from os import path as op
from os import listdir
import pickle
import numpy as np
from coadaptree import fs, createdirs, pklload, get_email_info
### 

### args
thisfile, parentdir = sys.argv
if parentdir.endswith("/"):
    parentdir = parentdir[:-1]
poolref = pklload(op.join(parentdir, 'poolref.pkl'))
email_info = get_email_info(parentdir, 'concat')
###

### dirs
shdir   = op.join(parentdir, 'shfiles/concat')
catdir  = op.join(parentdir, 'concatenated_vcfs')
filtdir = op.join(parentdir, 'filtered_snps')
createdirs([shdir,catdir,filtdir])
###

# get the snpfiles
snpdir = op.join(parentdir, 'snps')
snpfiles = [f.replace('.tbi', '') for f in fs(snpdir) if 'snp' in op.basename(f) and f.endswith('.tbi')]
os.system('echo "len(snpfiles) = %s"' % str(len(snpfiles)))

# sort snpfiles by pool
"""

### imports
import sys, os, pickle, subprocess
from os import path as op
import numpy as np
from coadaptree import fs, createdirs, pklload, get_email_info
from genotyping_scheduler import startscheduler, bigbrother, delsched
###

### args
thisfile, parentdir = sys.argv
if parentdir.endswith("/"):
    parentdir = parentdir[:-1]
poolref = pklload(op.join(parentdir, 'poolref.pkl'))
email_info = get_email_info(parentdir, 'final')
bash_variables = op.join(parentdir, 'bash_variables')
maf = pklload(op.join(parentdir, 'maf.pkl'))
###

# make a reservation file so other jobs don't call 05.py
resfile = op.join(parentdir, 'shfiles/06_reservation.txt')
if not op.exists(resfile):
    startscheduler(resfile)
else:
    print('06.py was running')
    bigbrother(resfile, DIR=None)

### dirs
shdir = op.join(parentdir, 'shfiles/concat')
catdir = op.join(parentdir, 'concatenated_vcfs')