def run_lsf_blat(subjects, queries, blattile, blatargstr='', num_batches=100, queue='normal_serial'): '''submits mcl_id_triples_by_blat.py jobs to LSF intended as an example of parallelization over a compute grid; uses a module LSF.py for interaction with scheduler ''' import LSF, run_safe blatargstr += ' -tileSize=%s' % blattile blatargstr += ' -stepSize=%s' % (int(blattile) / 2) #cmds = [] labf = [] to_run_dict = {} for q in queries: for subject in subjects: subjname = os.path.basename(subject).rstrip('.fa').rstrip('_subj') outbase = q.rstrip('.fa').rstrip( '_query') + '_blat' + '-subj' + subjname + blatargstr.replace( '=', '').replace(' ', '') labf.append(outbase + '.label.gz') # ESCAPES UNNECESSARY WITH safe_script #cmds.append('%smcl_id_triples_by_blat.py %s %s \\"%s\\" %s' % (radtag_denovo,subject,q,blatargstr,outbase)) cmd = '%s %s %s %s "%s" %s' % ( sys.executable, os.path.join(radtag_denovo, 'mcl_id_triples_by_blat.py'), subject, q, blatargstr, outbase) to_run_dict[outbase] = run_safe.safe_script(cmd, outbase) logfile = os.path.join(os.path.dirname(subjects[0]), 'blat-log/blat-log') LSF.lsf_run_until_done(to_run_dict, logfile, queue, '-R "select[mem > 20000]"', 'blat2mat', num_batches, 3) # REPLACED BY lsf_run_until_done ABOVE #logfiles = glob(logfile+'*.lsflog') #for lf in logfiles: # try: # os.unlink(lf) # except: # pass #print >> sys.stderr, 'LSF %s\nlog: %s' % (cmds,logfile) #import time #while len(cmds) > 0: # jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',bsub_flags='-R "select[mem > 20000]"',jobname_base='blat2mat',num_batches=num_batches) # time.sleep(20) # LSF.lsf_wait_for_jobs(jobids,logfile,namedict=namedict) # logfiles = glob(logfile+'*.lsflog') # cmds = reduce(lambda x,y:x+y, [LSF.lsf_no_success_from_log(lf) for lf in logfiles]) if not all([os.path.exists(f) for f in labf]): raise OSError, 'blat failed' return labf
def schedule_jobs(to_run_dict,scheduler,jobname_base,logbase,queue,requeue=None,njobs=None,duration=None,mem=2048,flags='',MAX_RETRY=MAX_RETRY,slurm_cores=1): if duration is None: try: duration = opts.max_job_duration except: duration = DURATION_DFAULT if njobs is None: njobs = len(trd) if scheduler == 'lsf': LSF.lsf_run_until_done(to_run_dict,logbase,queue,flags,jobname_base,njobs,MAX_RETRY) if requeue: LSF.lsf_run_until_done(to_run_dict,logbase,requeue,flags,jobname_base,njobs,MAX_RETRY) elif scheduler == 'slurm': kwargs = {} #make right before copying if slurm_cores > 1: kwargs['ntasks-per-node'] = '%s' % slurm_cores SLURM.run_until_done(to_run_dict,jobname_base,logbase,duration,mem,njobs,queue,MAX_RETRY=MAX_RETRY,**kwargs) if requeue: SLURM.run_until_done(to_run_dict,jobname_base,logbase,duration,mem,njobs,requeue,MAX_RETRY=MAX_RETRY,**kwargs) else: errstr = 'scheduler must be one of ["lsf","slurm"]; is %s' % opts.scheduler raise ValueError, errstr
def run_lsf_blat(subjects,queries,blattile,blatargstr='',num_batches=100,queue='normal_serial'): '''submits mcl_id_triples_by_blat.py jobs to LSF intended as an example of parallelization over a compute grid; uses a module LSF.py for interaction with scheduler ''' import LSF,run_safe blatargstr += ' -tileSize=%s' % blattile blatargstr += ' -stepSize=%s' % (int(blattile)/2) #cmds = [] labf = [] to_run_dict = {} for q in queries: for subject in subjects: subjname = os.path.basename(subject).rstrip('.fa').rstrip('_subj') outbase = q.rstrip('.fa').rstrip('_query')+'_blat'+'-subj'+subjname+blatargstr.replace('=','').replace(' ','') labf.append(outbase+'.label.gz') # ESCAPES UNNECESSARY WITH safe_script #cmds.append('%smcl_id_triples_by_blat.py %s %s \\"%s\\" %s' % (radtag_denovo,subject,q,blatargstr,outbase)) cmd = '%s %s %s %s "%s" %s' % (sys.executable, os.path.join(radtag_denovo, 'mcl_id_triples_by_blat.py'),subject,q,blatargstr,outbase) to_run_dict[outbase] = run_safe.safe_script(cmd,outbase) logfile = os.path.join(os.path.dirname(subjects[0]),'blat-log/blat-log') LSF.lsf_run_until_done(to_run_dict, logfile, queue, '-R "select[mem > 20000]"', 'blat2mat', num_batches, 3) # REPLACED BY lsf_run_until_done ABOVE #logfiles = glob(logfile+'*.lsflog') #for lf in logfiles: # try: # os.unlink(lf) # except: # pass #print >> sys.stderr, 'LSF %s\nlog: %s' % (cmds,logfile) #import time #while len(cmds) > 0: # jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',bsub_flags='-R "select[mem > 20000]"',jobname_base='blat2mat',num_batches=num_batches) # time.sleep(20) # LSF.lsf_wait_for_jobs(jobids,logfile,namedict=namedict) # logfiles = glob(logfile+'*.lsflog') # cmds = reduce(lambda x,y:x+y, [LSF.lsf_no_success_from_log(lf) for lf in logfiles]) if not all([os.path.exists(f) for f in labf]): raise OSError, 'blat failed' return labf
#!/usr/bin/env python from video_analysis import submit_summarize_runs, viz_vidtools import LSF, run_safe import time, os, sys from glob import glob os.chdir('/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/') logfile = '../rbt-logs/log' currjobs = submit_summarize_runs.get_currjobs() analysis_dirs = filter(None, [ submit_summarize_runs.get_successful_analysis_dir( vid, submit_summarize_runs.RERUN_COEFFS, currjobs=currjobs, **submit_summarize_runs.DEFAULT_PARAMS) for vid in sorted(glob('*/merge6mbit_720_*.mp4')) if os.path.exists(viz_vidtools.cfg_fn(vid)) and 'end' in open(viz_vidtools.cfg_fn(vid)).read() ]) trd = {} for analysis_dir in analysis_dirs: #print >> sys.stderr, analysis_dir rbtdone = os.path.join(analysis_dir, 'rainbowtron') cmd = 'run_rainbowtron.py %s' % analysis_dir run_safe.add_cmd(trd, rbtdone, cmd, force_write=True) LSF.lsf_run_until_done(trd, logfile, 'normal_serial', '', 'rainbow', 100, 3)
#!/usr/bin/env python from video_analysis import submit_summarize_runs, viz_vidtools import LSF, run_safe import time, os, sys from glob import glob os.chdir("/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/") logfile = "../rbt-logs/log" currjobs = submit_summarize_runs.get_currjobs() analysis_dirs = filter( None, [ submit_summarize_runs.get_successful_analysis_dir( vid, submit_summarize_runs.RERUN_COEFFS, currjobs=currjobs, **submit_summarize_runs.DEFAULT_PARAMS ) for vid in sorted(glob("*/merge6mbit_720_*.mp4")) if os.path.exists(viz_vidtools.cfg_fn(vid)) and "end" in open(viz_vidtools.cfg_fn(vid)).read() ], ) trd = {} for analysis_dir in analysis_dirs: # print >> sys.stderr, analysis_dir rbtdone = os.path.join(analysis_dir, "rainbowtron") cmd = "run_rainbowtron.py %s" % analysis_dir run_safe.add_cmd(trd, rbtdone, cmd, force_write=True) LSF.lsf_run_until_done(trd, logfile, "normal_serial", "", "rainbow", 100, 3)
else: if FORCE_PAR: h,w = vidtools.extract_keyframe(vid).shape th = h - (crops[1]+crops[3]) tw = w - (crops[0]+crops[2]) pixw = 255 pixh = int((float(th)/tw)*pixw) parstr = '-aspect %s:%s' % (pixw,pixh) else: parstr = '' cropstr = '-vf crop=in_w-%s:in_h-%s:%s:%s' % (crops[0]+crops[2],crops[1]+crops[3],crops[0],crops[1]) cmd = 'ffmpeg -ss %s -t %s -i %s -y %s -r 29.97 -b 20000k %s %s' % (offset,dur,vid,cropstr,parstr,outvid) to_run_dict[outvid] = run_safe.safe_script(cmd,outvid,force_write=True) logfile = os.path.join(os.path.dirname(vid),'logs','crop-log') LSF.lsf_run_until_done(to_run_dict,logfile,queue,'-R "select[mem>%s]"' % job_ram, 'crop-ffmpeg',10, MAX_RETRY) #cmds = [] #rerun = True #while rerun: # for clab,crops in cropsdict.items(): # outbase,outext = os.path.splitext(vid) # outvid = '%s_%s_%s-%s%s' % (outbase,clab,offset,dur,outext) # if os.path.exists(outvid) and ( vidtools.vid_duration(outvid) == dur ): # print >> sys.stderr, '%s present and expected size, skip' % outvid # else: # cropstr = '-vf crop=in_w-%s:in_h-%s:%s:%s' % (crops[0]+crops[2],crops[1]+crops[3],crops[0],crops[1]) # cmd = 'ffmpeg -ss %s -t %s -i %s -y %s -b 20000k %s' % (offset,dur,vid,cropstr,outvid) # cmds.append(cmd) # #