#!/usr/bin/env python from video_analysis import submit_summarize_runs, viz_vidtools import LSF, run_safe import time, os, sys from glob import glob os.chdir('/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/') logfile = '../rbt-logs/log' currjobs = submit_summarize_runs.get_currjobs() analysis_dirs = filter(None, [ submit_summarize_runs.get_successful_analysis_dir( vid, submit_summarize_runs.RERUN_COEFFS, currjobs=currjobs, **submit_summarize_runs.DEFAULT_PARAMS) for vid in sorted(glob('*/merge6mbit_720_*.mp4')) if os.path.exists(viz_vidtools.cfg_fn(vid)) and 'end' in open(viz_vidtools.cfg_fn(vid)).read() ]) trd = {} for analysis_dir in analysis_dirs: #print >> sys.stderr, analysis_dir rbtdone = os.path.join(analysis_dir, 'rainbowtron') cmd = 'run_rainbowtron.py %s' % analysis_dir run_safe.add_cmd(trd, rbtdone, cmd, force_write=True) LSF.lsf_run_until_done(trd, logfile, 'normal_serial', '', 'rainbow', 100, 3)
compress_cmd = ' | %s -c ' % compress else: compress_ext = '' compress_cmd = '' #First off, get read group files rgcmd = 'get_bam_rg_by_sample.py %s %s' % (bamfile, outroot) ret = os.system(rgcmd) if ret != 0: print >> sys.stderr, 'failed:\n%s' % rgcmd raise OSError sample_files = glob(os.path.join(outroot,'*.rgids.txt')) bambase = os.path.basename(bamfile) stcmdstr = samtools_cmd.replace(' ','_') trd = {} for sf in sample_files: sm = os.path.basename(sf).rsplit('.',2)[0] outfile = os.path.join(outroot, '%s-%s-%s%s%s' % (bambase, sm, stcmdstr, ext, compress_ext)) cmd = 'samtools view -hR %s %s | samtools view -bS - | samtools %s /dev/stdin %s> %s' % (sf, bamfile, samtools_cmd, compress_cmd, outfile) slurmbase = os.path.join(outroot,'%s-%s-%s' % (bambase, sm, stcmdstr)) run_safe.add_cmd(trd, slurmbase, cmd,force_write=True) logfile = os.path.join(outroot,'%s-%s_SLURMwd/log' % (bambase,stcmdstr)) SLURM.run_until_done(trd,'samtools_by_indiv',logfile,MAX_DUR,MAX_RAM,100,PARTITION,MAX_RETRY=3)
expected_fq_d[glob_key] = 1 else: errstr = 'no fastq for %s' % d raise ValueError, errstr preprocess_targets = list(set(preprocess_targets)) ol_to_run_dict = {} pp_to_run_dict = {} for (r1,r2),(fc,l,idx,cs) in preprocess_targets: if r2 is None: #single read; preprocess only cmd = 'preprocess_radtag_lane.py -w -u -s %s -fc %s -l %s -idx %s %s %s' % (cs,fc,l,idx,opts.force_db_id and '--force_db_id' or '',r1) ss_base = os.path.join(os.path.dirname(r1),'sr_preprocess_lane%s_index%s_DBID%s' % (l,idx,opts.force_db_id)) run_safe.add_cmd(pp_to_run_dict, ss_base, cmd, force_write=True) else: cmd = 'overlap_preprocess.py -fc %s -l %s -idx %s -pp "-w -u -s %s %s" %s %s' % (fc,l,idx,cs,opts.force_db_id and '--force_db_id' or '',r1,r2) ss_base = os.path.join(os.path.dirname(r1),'ol_preprocess_lane%s_index%s_DBID%s' % (l,idx,opts.force_db_id)) run_safe.add_cmd(ol_to_run_dict, ss_base, cmd, force_write=True) print pp_to_run_dict print print ol_to_run_dict to_run_dict = {} to_run_dict.update(pp_to_run_dict) to_run_dict.update(ol_to_run_dict) jobname_base = 'preprocess' logbase = os.path.join(opts.outroot,'slurmlog','preprocess')
# MAPPING COMPLETE # Genotyping steps follow # IF --reduced_reads, then realign and reduce before merging to single bam # otherwise merge into 1 bam, then realign (if --realign) then perform relevant GATK/samtools/whatever steps import subprocess # run reduced reads (and single-sample realignment) if opts.reduce_reads: to_run_dict = {} rr_rg_ref_bams = [] for bam in rg_ref_bams: rr_bam = os.path.splitext(bam)[0] + '.realigned.reduced.bam' rr_done = os.path.splitext(bam)[0] + '.realigned.reduced' rr_cmd = 'realign_reduce_bam.py %s %s' % (bam,reference_fasta) run_safe.add_cmd(to_run_dict,rr_done,rr_cmd,force_write=True) rr_rg_ref_bams.append(rr_bam) #SLURM here to_run = rr_rg_ref_bams runs = 0 while to_run: if runs >= 3: print >> sys.stderr, '3 attempts made without success; the following bams did not complete:\n%s' % '\n'.join(to_run) raise ValueError logfile = os.path.join(outroot,'%slog' % opts.scheduler,'realign-reduce-log') schedule_jobs(to_run_dict,opts.scheduler,'realign-reduce',logfile,opts.lsf_queue,requeue=opts.fallback_queue,njobs=njobs,duration=opts.max_job_duration,mem=(opts.gatk_ram*1024)+JOB_MEM_OVERHEAD,flags='-R "select[mem>20000]"',MAX_RETRY=MAX_RETRY) #LSF.lsf_run_until_done(to_run_dict,logfile,opts.lsf_queue,'-R "select[mem>20000]"','realign-reduce',njobs,MAX_RETRY) #CHECK BAMS WITH samtools
#!/usr/bin/env python queue = "general" max_job_duration = 1440 job_ram = (2 + 1) * 1024 job_batches = 500 import os, sys, SLURM, run_safe geno, pheno, runs = sys.argv[1:] basedir, basename = os.path.split(pheno) donedir = os.path.join(basedir, os.path.splitext(basename)[0] + "-permute-donedir/") logfile = os.path.join(basedir, os.path.splitext(basename)[0] + "-permute-logs/log-") if not os.path.exists(donedir): os.makedirs(donedir) trd = {} for i in range(int(runs)): run_safe.add_cmd(trd, donedir + str(i), "random_wigs.py %s %s %s" % (geno, pheno, i), force_write=True) # LSF.lsf_run_until_done(trd,logfile,queue,'','random-wigs',1000,3) SLURM.run_until_done(trd, "random-wigs", logfile, max_job_duration, job_ram, job_batches, queue, MAX_RETRY=3)
#!/usr/bin/env python from video_analysis import submit_summarize_runs, viz_vidtools import LSF, run_safe import time, os, sys from glob import glob os.chdir("/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/") logfile = "../rbt-logs/log" currjobs = submit_summarize_runs.get_currjobs() analysis_dirs = filter( None, [ submit_summarize_runs.get_successful_analysis_dir( vid, submit_summarize_runs.RERUN_COEFFS, currjobs=currjobs, **submit_summarize_runs.DEFAULT_PARAMS ) for vid in sorted(glob("*/merge6mbit_720_*.mp4")) if os.path.exists(viz_vidtools.cfg_fn(vid)) and "end" in open(viz_vidtools.cfg_fn(vid)).read() ], ) trd = {} for analysis_dir in analysis_dirs: # print >> sys.stderr, analysis_dir rbtdone = os.path.join(analysis_dir, "rainbowtron") cmd = "run_rainbowtron.py %s" % analysis_dir run_safe.add_cmd(trd, rbtdone, cmd, force_write=True) LSF.lsf_run_until_done(trd, logfile, "normal_serial", "", "rainbow", 100, 3)
#!/usr/bin/env python queue = 'general' max_job_duration = 1440 job_ram = (4+1)*1024 job_batches = 500 import os,sys,SLURM,run_safe from glob import glob genodir,pheno = sys.argv[1:] basedir = os.path.dirname(pheno) #donedir = os.path.join(basedir,'donedir/') logfile = os.path.join(basedir,'logs/SLURM/') #if not os.path.exists(donedir): os.makedirs(donedir) genos = glob(os.path.join(genodir,'*-geno.txt')) print >> sys.stderr, 'wigs on %s contigs' % len(genos) trd = {} for geno in genos: outbase = geno.replace('-geno.txt','_output-') run_safe.add_cmd(trd,outbase[:-1],'wigs simevo -g %s -s %s -f %s -x 40000 -b 500 -t 100 -d 1' % (geno,pheno,outbase) ,force_write=True) SLURM.run_until_done(trd,'wigs-by-chrom',logfile,max_job_duration,job_ram,job_batches,queue,MAX_RETRY=3)