def schedule_jobs(to_run_dict,scheduler,jobname_base,logbase,queue,requeue=None,njobs=None,duration=None,mem=2048,flags='',MAX_RETRY=MAX_RETRY,slurm_cores=1): if duration is None: try: duration = opts.max_job_duration except: duration = DURATION_DFAULT if njobs is None: njobs = len(trd) if scheduler == 'lsf': LSF.lsf_run_until_done(to_run_dict,logbase,queue,flags,jobname_base,njobs,MAX_RETRY) if requeue: LSF.lsf_run_until_done(to_run_dict,logbase,requeue,flags,jobname_base,njobs,MAX_RETRY) elif scheduler == 'slurm': kwargs = {} #make right before copying if slurm_cores > 1: kwargs['ntasks-per-node'] = '%s' % slurm_cores SLURM.run_until_done(to_run_dict,jobname_base,logbase,duration,mem,njobs,queue,MAX_RETRY=MAX_RETRY,**kwargs) if requeue: SLURM.run_until_done(to_run_dict,jobname_base,logbase,duration,mem,njobs,requeue,MAX_RETRY=MAX_RETRY,**kwargs) else: errstr = 'scheduler must be one of ["lsf","slurm"]; is %s' % opts.scheduler raise ValueError, errstr
compress_cmd = ' | %s -c ' % compress else: compress_ext = '' compress_cmd = '' #First off, get read group files rgcmd = 'get_bam_rg_by_sample.py %s %s' % (bamfile, outroot) ret = os.system(rgcmd) if ret != 0: print >> sys.stderr, 'failed:\n%s' % rgcmd raise OSError sample_files = glob(os.path.join(outroot,'*.rgids.txt')) bambase = os.path.basename(bamfile) stcmdstr = samtools_cmd.replace(' ','_') trd = {} for sf in sample_files: sm = os.path.basename(sf).rsplit('.',2)[0] outfile = os.path.join(outroot, '%s-%s-%s%s%s' % (bambase, sm, stcmdstr, ext, compress_ext)) cmd = 'samtools view -hR %s %s | samtools view -bS - | samtools %s /dev/stdin %s> %s' % (sf, bamfile, samtools_cmd, compress_cmd, outfile) slurmbase = os.path.join(outroot,'%s-%s-%s' % (bambase, sm, stcmdstr)) run_safe.add_cmd(trd, slurmbase, cmd,force_write=True) logfile = os.path.join(outroot,'%s-%s_SLURMwd/log' % (bambase,stcmdstr)) SLURM.run_until_done(trd,'samtools_by_indiv',logfile,MAX_DUR,MAX_RAM,100,PARTITION,MAX_RETRY=3)
cmd = 'overlap_preprocess.py -fc %s -l %s -idx %s -pp "-w -u -s %s %s" %s %s' % (fc,l,idx,cs,opts.force_db_id and '--force_db_id' or '',r1,r2) ss_base = os.path.join(os.path.dirname(r1),'ol_preprocess_lane%s_index%s_DBID%s' % (l,idx,opts.force_db_id)) run_safe.add_cmd(ol_to_run_dict, ss_base, cmd, force_write=True) print pp_to_run_dict print print ol_to_run_dict to_run_dict = {} to_run_dict.update(pp_to_run_dict) to_run_dict.update(ol_to_run_dict) jobname_base = 'preprocess' logbase = os.path.join(opts.outroot,'slurmlog','preprocess') print >> sys.stderr, 'run %s logs in %s' % (jobname_base,logbase) SLURM.run_until_done(to_run_dict,jobname_base,logbase,opts.max_job_duration,(opts.job_ram+1)*1024,opts.num_batches,opts.queue,MAX_RETRY=MAX_RETRY) #collect individual fastq/fastq pairs #(LATER: GET INDIVIDUAL FILES BY DB LOOKUP; REQUIRES HANDLING MOUSE DB ID LOOKUP IF SET) fq_to_run = sample_fq_from_expected(expected_fq_d) #print fq_to_run map_reads_cmd = map_reads_exec + ' -gr %s -n %s -q %s -sched %s -mjd %s -v %s -s \'"%s"\' -g \'"%s"\' -gh \'"%s"\' -mp \'"%s"\' %s %s %s ' % \ (opts.job_ram, \ opts.num_batches, \ opts.queue, \ opts.scheduler, \ opts.max_job_duration, \ vcfname, \ opts.stampy_argstr, \ opts.gatk_argstr, \
def call_variants_gatk_lsf(bams,ref,outroot,vcfbase,njobs=100,gatk_program='UnifiedGenotyper',gatk_args='-out_mode EMIT_ALL_CONFIDENT_SITES -dcov 200 -glm BOTH',gatk_jar=gatk_jar,gatk_ram=4,tmpdir=None,queue='normal_serial',job_ram='30000',MAX_RETRY=MAX_RETRY,include_regions=None,compress_vcf=True,fallback_queue='',scheduler=None,duration=None): if duration is None: try: duration = opts.max_job_duration except: duration = DURATION_DFAULT if scheduler is None: scheduler = 'slurm' if tmpdir is None: tmpdir = os.path.join(outroot,'gatk_tmp') bamstr = ' -I '.join(bams) regions = partition_reference(ref,njobs,include_regions) vcfbasename = vcfbase.endswith('.vcf') and vcfbase[:-4] or vcfbase gatkoutvcfbase = '%s-GATK-%s' % (vcfbasename,gatk_program) if compress_vcf: vcfext = '.vcf.gz' else: vcfext = '.vcf' gatkoutvcf = os.path.join(outroot,gatkoutvcfbase+vcfext) vcf_parts_root = os.path.join(outroot,gatkoutvcfbase+'-vcf_parts') try: os.makedirs(vcf_parts_root) except: pass logfile = os.path.join(vcf_parts_root,'logs',gatk_program) ser_to_run_dict = {} if scheduler == 'slurm': par_to_run_dict = {} print >> sys.stderr, 'Calculate %s runs: ' % gatk_program for i,reg in enumerate(regions): print >> sys.stderr, '\r\t%s / %s' % (i+1,len(regions)), reg = [r for r in reg if not r.split(':')[0] in skip_contigs] if len(reg) == 0: continue start,end = start_end_strs(reg) regstr = ' -L '.join(reg) partvcf = os.path.join(vcf_parts_root,'%s_%dof%d_%sto%s%s' % (gatkoutvcfbase,i,len(regions),start,end,vcfext)) part_sh = os.path.join(vcf_parts_root,'%s_%dof%d_%sto%s.sh' % (gatkoutvcfbase,i,len(regions),start,end)) cmd = 'java -Xmx%sg -Djava.io.tmpdir=%s -jar %s -R %s -T %s -o %s %s -I %s -L %s' % (gatk_ram,tmpdir,gatk_jar,ref,gatk_program,partvcf,gatk_args,bamstr,regstr) #open(part_sh,'w').write('#!/usr/bin/env bash\n'+cmd+'\n') #os.system('chmod +x %s' % part_sh) if scheduler == 'slurm': nprevsub = len(SLURM.previous_submissions(logfile,partvcf+'.sh')) if nprevsub < MAX_RETRY: ser_to_run_dict[partvcf] = run_safe.safe_script(cmd,partvcf,force_write=True) else: duration=MAX_DURATION print >> sys.stderr, '\n%s failed %s previous runs; %s thread X %s core invoked' % (partvcf,nprevsub,GATK_PAR_NT,GATK_PAR_NCT) cmd += ' -nt %s -nct %s' % (GATK_PAR_NT,GATK_PAR_NCT) par_to_run_dict[partvcf] = run_safe.safe_script(cmd,partvcf,force_write=True) else: ser_to_run_dict[partvcf] = run_safe.safe_script(cmd,partvcf,force_write=True) #SLURM here #SERIAL (one core) RUNS schedule_jobs(ser_to_run_dict,scheduler,gatk_program,logfile,queue,requeue=fallback_queue,njobs=njobs,duration=duration,mem=(gatk_ram*1024)+JOB_MEM_OVERHEAD,flags='-R "select[mem>%s]"' % job_ram,MAX_RETRY=MAX_RETRY) trd_keys = ser_to_run_dict.keys() #PARALLEL (multithread) RUNS if scheduler == 'slurm': mt_cores = GATK_PAR_NT*GATK_PAR_NCT mt_ram = ( (GATK_PAR_NT*gatk_ram*1024)+(JOB_MEM_OVERHEAD*GATK_PAR_NT) ) / float(mt_cores) mt_ram = int(mt_ram) print >> sys.stderr, '\nrun multithreaded %s: %s jobs; ram-per-core: %s cores: %s' % (gatk_program,len(par_to_run_dict),mt_ram,mt_cores) schedule_jobs(par_to_run_dict,scheduler,gatk_program,logfile,queue,requeue=fallback_queue,njobs=njobs,duration=duration,mem=mt_ram,flags='-R "select[mem>%s]"' % job_ram,MAX_RETRY=MAX_RETRY,slurm_cores=mt_cores) trd_keys.extend(par_to_run_dict.keys()) #LSF.lsf_run_until_done(to_run_dict,logfile,queue,'-R "select[mem>%s]"' % job_ram, 'gatk',njobs,MAX_RETRY) #if fallback_queue: # LSF.lsf_run_until_done(to_run_dict,logfile,fallback_queue,'-R "select[mem>%s]"' % job_ram, 'gatk',njobs,MAX_RETRY) cmd = merge_vcf_parts_cmd(trd_keys,ref,gatkoutvcf,gatk_jar,gatk_ram,tmpdir) ret = os.system(run_safe.safe_script(cmd,gatkoutvcf)) if ret != 0: raise OSError, 'VCF merge failed:\n%s' % cmd
#!/usr/bin/env python queue = "general" max_job_duration = 1440 job_ram = (2 + 1) * 1024 job_batches = 500 import os, sys, SLURM, run_safe geno, pheno, runs = sys.argv[1:] basedir, basename = os.path.split(pheno) donedir = os.path.join(basedir, os.path.splitext(basename)[0] + "-permute-donedir/") logfile = os.path.join(basedir, os.path.splitext(basename)[0] + "-permute-logs/log-") if not os.path.exists(donedir): os.makedirs(donedir) trd = {} for i in range(int(runs)): run_safe.add_cmd(trd, donedir + str(i), "random_wigs.py %s %s %s" % (geno, pheno, i), force_write=True) # LSF.lsf_run_until_done(trd,logfile,queue,'','random-wigs',1000,3) SLURM.run_until_done(trd, "random-wigs", logfile, max_job_duration, job_ram, job_batches, queue, MAX_RETRY=3)
#!/usr/bin/env python queue = 'general' max_job_duration = 1440 job_ram = (4+1)*1024 job_batches = 500 import os,sys,SLURM,run_safe from glob import glob genodir,pheno = sys.argv[1:] basedir = os.path.dirname(pheno) #donedir = os.path.join(basedir,'donedir/') logfile = os.path.join(basedir,'logs/SLURM/') #if not os.path.exists(donedir): os.makedirs(donedir) genos = glob(os.path.join(genodir,'*-geno.txt')) print >> sys.stderr, 'wigs on %s contigs' % len(genos) trd = {} for geno in genos: outbase = geno.replace('-geno.txt','_output-') run_safe.add_cmd(trd,outbase[:-1],'wigs simevo -g %s -s %s -f %s -x 40000 -b 500 -t 100 -d 1' % (geno,pheno,outbase) ,force_write=True) SLURM.run_until_done(trd,'wigs-by-chrom',logfile,max_job_duration,job_ram,job_batches,queue,MAX_RETRY=3)