def run_lsf_blat(subject,queries,blattile,blatargstr='',num_batches=100): '''submits mcl_id_triples_by_blat.py jobs to LSF intended as an example of parallelization over a compute grid; uses a module LSF.py for interaction with scheduler ''' import LSF blatargstr += ' -tileSize=%s' % blattile blatargstr += ' -stepSize=%s' % (int(blattile)/2) cmds = [] labf = [] for q in queries: outbase = q.rstrip('.fa').rstrip('_query')+'_blat'+blatargstr.replace('=','').replace(' ','') labf.append(outbase+'.label') cmds.append('%smcl_id_triples_by_blat.py %s %s \\"%s\\" %s' % (radtag_denovo,subject,q,blatargstr,outbase)) logfile = os.path.join(os.path.dirname(subject),'blat-log') try: os.unlink(logfile) except: pass #print >> sys.stderr, 'LSF %s\nlog: %s' % (cmds,logfile) import time while len(cmds) > 0: jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',jobname_base='blat2mat',num_batches=num_batches) time.sleep(20) LSF.lsf_wait_for_jobs(jobids,logfile,namedict=namedict) cmds = LSF.lsf_no_success_from_log(logfile) return labf
def run_lsf_blat(subjects,queries,blattile,blatargstr='',num_batches=100): '''submits mcl_id_triples_by_blat.py jobs to LSF intended as an example of parallelization over a compute grid; uses a module LSF.py for interaction with scheduler ''' import LSF blatargstr += ' -tileSize=%s' % blattile blatargstr += ' -stepSize=%s' % (int(blattile)/2) cmds = [] labf = [] for q in queries: for subject in subjects: subjname = os.path.basename(subject).rstrip('.fa').rstrip('_subj') outbase = q.rstrip('.fa').rstrip('_query')+'_blat'+'-subj'+subjname+blatargstr.replace('=','').replace(' ','') labf.append(outbase+'.label') cmds.append('%smcl_id_triples_by_blat.py %s %s \\"%s\\" %s' % (radtag_denovo,subject,q,blatargstr,outbase)) logfile = os.path.join(os.path.dirname(subjects[0]),'blat-log') logfiles = glob(logfile+'*.lsflog') for lf in logfiles: try: os.unlink(lf) except: pass #print >> sys.stderr, 'LSF %s\nlog: %s' % (cmds,logfile) import time while len(cmds) > 0: jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',bsub_flags='-R "select[mem > 20000]"',jobname_base='blat2mat',num_batches=num_batches) time.sleep(20) LSF.lsf_wait_for_jobs(jobids,logfile,namedict=namedict) logfiles = glob(logfile+'*.lsflog') cmds = reduce(lambda x,y:x+y, [LSF.lsf_no_success_from_log(lf) for lf in logfiles]) if not all([os.path.exists(f) for f in labf]): raise OSError, 'blat failed' return labf
def run_parallel_blasts(clids,mapname,gr,tab,uniqued,blastdb,grname=None,nbatches = 10): if grname is None: grname = os.path.basename(gr) batchlen = len(clids) / nbatches clid_batches = [clids[i:i+batchlen] for i in range(0,len(clids),batchlen)] cmds = [] blastouts = [] for cb in clid_batches: seqfile = os.path.join(outroot,'%s_%s-%s_%sclust_%s.fa' % (os.path.splitext(mapname)[0],cb[0],cb[-1],len(cb),os.path.splitext(grname)[0])) blast_out = os.path.join(outroot,'%s-%s.blast' % tuple([os.path.splitext(os.path.basename(f))[0] for f in [seqfile,blastdb]])) blastouts.append(blast_out) if os.path.exists(blast_out): continue clidlist = '\n'.join(cb) cmds.append('echo -e "%s" | blast_from_rad_clusters.py %s %s %s %s %s %s' % (clidlist,gr,tab,uniqued,seqfile,blastdb,blast_out)) logfile = os.path.join(outroot,'rad-clust-blast-log') jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',bsub_flags='-R "select[mem>30000]"', jobname_base='radclustblast',num_batches=nbatches) LSF.lsf_wait_for_jobs(jobids,logfile,namedict=namedict,restart_z=24) return blastouts
cmds = [] rerun = True while rerun: for clab,crops in cropsdict.items(): outbase,outext = os.path.splitext(vid) outvid = '%s_%s_%s-%s%s' % (outbase,clab,offset,dur,outext) if os.path.exists(outvid) and ( vidtools.vid_duration(outvid) == dur ): print >> sys.stderr, '%s present and expected size, skip' % outvid else: cropstr = '-vf crop=in_w-%s:in_h-%s:%s:%s' % (crops[0]+crops[2],crops[1]+crops[3],crops[0],crops[1]) cmd = 'ffmpeg -ss %s -t %s -i %s -y %s -b 20000k %s' % (offset,dur,vid,cropstr,outvid) cmds.append(cmd) logfile = os.path.join(os.path.dirname(vid),'crop-log') jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',jobname_base='vid2crop') LSF.lsf_wait_for_jobs(jobids,logfile,'normal_serial',namedict=namedict) cmds = LSF.lsf_no_success_from_log(logfile) if len(cmds) == 0: rerun = False sys.exit() rerun = True while rerun: outstr = os.path.join(outroot,'%07d.png') execstr = 'ffmpeg -ss %s -t %s -i %s -r %s -y %s %s 2> /dev/null' % (offset,dur,vid,fps,cropstr,outstr) print >> sys.stderr, 'execute %s\nrunning %s' % (nrun,execstr) os.system(execstr) nrun += 1
new_tdirs = vidtools.parallel_v2p(vid, fps,tdir=source_root,queue='short_serial',num_jobs=20,crops=crops,extract_windows=nonex_wins) tdirs = exist_paths + new_tdirs cmds = ['reanalyze_activity_segments.py %s %s %s' % (source_adir, d, new_seglen) for d in tdirs] logfile = source_root+'/reanalyze-log' final_summary = source_root+'/%0.1fsec_summary.pdf' % new_seglen if os.path.exists(final_summary): print >>sys.stderr, 'merged summary %s exists; will not reanalyze' % final_summary do = False else: do = True passes = 0 while do and passes < 3: jids,ndict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',jobname_base='reanalyze') LSF.lsf_wait_for_jobs(jids,logfile,ndict) time.sleep(10) unfinished = LSF.lsf_no_success_from_log(logfile) if unfinished: print >> sys.stderr, 'not finished: %s' % unfinished cmds = unfinished else: do = False passes += 1 if not os.path.exists(final_summary): print >> sys.stderr, 'write summary to '+final_summary os.system('gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=%s %s/*-*/30fps/analysis/1.0sec*/summary.pdf' % (final_summary,source_root))
#!/usr/bin/env python import os,sys,LSF from PIL import Image from glob import glob if len(sys.argv[1:]) > 1 or sys.argv[1].endswith('*.png'): for f in sys.argv[1:]: im = Image.open(f) try: h=im.histogram() except: print 'invalid image: ' + f os.unlink(f) else: outfile = sys.argv[1]+'/../drop-broken-pngs-log' print >> sys.stderr, 'master running, target dir %s output in %s' % (sys.argv[1],outfile) cmds = [] images = glob(sys.argv[1]+'/*.png') for i in range(0,len(images),1000): cmds.append(sys.argv[0]+' '+(' '.join(images[i:i+1000]))) jids,ndict = LSF.lsf_jobs_submit(cmds,outfile,jobname_base='pngdrop',num_batches=400) LSF.lsf_wait_for_jobs(jids,restart_outfile=outfile,namedict=ndict,restart_z=12)
restart_z = 12 while rerun: cmds = [] for i in range(0,len(images),int(segment_step)): this_out = os.path.join( outroot,'%07d-%07d.mice' % (i,i+segment_step)) if not os.path.exists(this_out): cmd = 'summarize_segment.py -m %s -s %d -e %d -p %d -t %d -r %s -o %s -x \\"%s\\" -b \\"%s\\" -g \\"%s\\" %s' \ % (mask,i,i+segment_step,pixav,timeav,outroot,mousez,opts['xybounds'],opts['burrow_entrance_xy'],opts['ground_anchors'],imagedir) cmds.append(cmd) # drop last command (incomplete segment) dropcmd = cmds.pop() logfile = os.path.join(outroot,'summarize-segment-log') print >> sys.stderr,'running summary of %s segments, log written to %s\n' % (len(cmds),logfile) print >> sys.stderr,'bundle into %s batches' % num_batches jids,ndict = LSF.lsf_jobs_submit(cmds,logfile,opts['summarize_queue'],jobname_base='summarize',num_batches=num_batches,bsub_flags='-R "select[mem > 30000]"') jobids.update(jids) namedict.update(ndict) if glob(outroot+'/*.mice'): restart_z = None else: restart_z = 12 LSF.lsf_wait_for_jobs(jobids,os.path.join(outroot,'restarts'),namedict=namedict,restart_z=restart_z) #restart_z=None) jobids = {} namedict = {} #remove .mice corresponding to mis-sized frames for f in glob(outroot+'/*.frame'): fgrp = os.path.splitext(f)[0] if os.path.getsize(fgrp+'.frame') != 8 * SHAPE[0] * SHAPE[1]: print >> sys.stderr, 'remove missized file %s (obs: %s exp %s)' % (f, os.path.getsize(fgrp+'.frame'), 8 * SHAPE[0] * SHAPE[1])
'reanalyze_activity_segments.py %s %s %s' % (source_adir, d, new_seglen) for d in tdirs ] logfile = source_root + '/reanalyze-log' final_summary = source_root + '/%0.1fsec_summary.pdf' % new_seglen if os.path.exists(final_summary): print >> sys.stderr, 'merged summary %s exists; will not reanalyze' % final_summary do = False else: do = True passes = 0 while do and passes < 3: jids, ndict = LSF.lsf_jobs_submit(cmds, logfile, 'normal_serial', jobname_base='reanalyze') LSF.lsf_wait_for_jobs(jids, logfile, ndict) time.sleep(10) unfinished = LSF.lsf_no_success_from_log(logfile) if unfinished: print >> sys.stderr, 'not finished: %s' % unfinished cmds = unfinished else: do = False passes += 1 if not os.path.exists(final_summary): print >> sys.stderr, 'write summary to ' + final_summary os.system( 'gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=%s %s/*-*/30fps/analysis/1.0sec*/summary.pdf'
opts["burrow_entrance_xy"], opts["ground_anchors"], imagedir, ) ) cmds.append(cmd) # drop last command (incomplete segment) dropcmd = cmds.pop() logfile = os.path.join(outroot, "summarize-segment-log") print >>sys.stderr, "running summary of %s segments, log written to %s\n" % (len(cmds), logfile) print >>sys.stderr, "bundle into %s batches" % num_batches jids, ndict = LSF.lsf_jobs_submit( cmds, logfile, opts["summarize_queue"], jobname_base="summarize", num_batches=num_batches, bsub_flags='-R "select[mem > 30000]"', ) jobids.update(jids) namedict.update(ndict) if glob(outroot + "/*.mice"): restart_z = None else: restart_z = 12 LSF.lsf_wait_for_jobs( jobids, os.path.join(outroot, "restarts"), namedict=namedict, restart_z=restart_z ) # restart_z=None) jobids = {} namedict = {}
while rerun: for clab, crops in cropsdict.items(): outbase, outext = os.path.splitext(vid) outvid = '%s_%s_%s-%s%s' % (outbase, clab, offset, dur, outext) if os.path.exists(outvid) and (vidtools.vid_duration(outvid) == dur): print >> sys.stderr, '%s present and expected size, skip' % outvid else: cropstr = '-vf crop=in_w-%s:in_h-%s:%s:%s' % ( crops[0] + crops[2], crops[1] + crops[3], crops[0], crops[1]) cmd = 'ffmpeg -ss %s -t %s -i %s -y %s -b 20000k %s' % ( offset, dur, vid, cropstr, outvid) cmds.append(cmd) logfile = os.path.join(os.path.dirname(vid), 'crop-log') jobids, namedict = LSF.lsf_jobs_submit(cmds, logfile, 'normal_serial', jobname_base='vid2crop') LSF.lsf_wait_for_jobs(jobids, logfile, 'normal_serial', namedict=namedict) cmds = LSF.lsf_no_success_from_log(logfile) if len(cmds) == 0: rerun = False sys.exit() rerun = True while rerun: outstr = os.path.join(outroot, '%07d.png') execstr = 'ffmpeg -ss %s -t %s -i %s -r %s -y %s %s 2> /dev/null' % ( offset, dur, vid, fps, cropstr, outstr) print >> sys.stderr, 'execute %s\nrunning %s' % (nrun, execstr)