예제 #1
0
def run_lsf_blat(subjects,
                 queries,
                 blattile,
                 blatargstr='',
                 num_batches=100,
                 queue='normal_serial'):
    '''submits mcl_id_triples_by_blat.py jobs to LSF

    intended as an example of parallelization over a compute grid;
    uses a module LSF.py for interaction with scheduler

    '''
    import LSF, run_safe

    blatargstr += ' -tileSize=%s' % blattile
    blatargstr += ' -stepSize=%s' % (int(blattile) / 2)

    #cmds = []
    labf = []
    to_run_dict = {}
    for q in queries:
        for subject in subjects:
            subjname = os.path.basename(subject).rstrip('.fa').rstrip('_subj')
            outbase = q.rstrip('.fa').rstrip(
                '_query') + '_blat' + '-subj' + subjname + blatargstr.replace(
                    '=', '').replace(' ', '')
            labf.append(outbase + '.label.gz')
            # ESCAPES UNNECESSARY WITH safe_script
            #cmds.append('%smcl_id_triples_by_blat.py %s %s \\"%s\\" %s' % (radtag_denovo,subject,q,blatargstr,outbase))
            cmd = '%s %s %s %s "%s" %s' % (
                sys.executable,
                os.path.join(radtag_denovo, 'mcl_id_triples_by_blat.py'),
                subject, q, blatargstr, outbase)
            to_run_dict[outbase] = run_safe.safe_script(cmd, outbase)

    logfile = os.path.join(os.path.dirname(subjects[0]), 'blat-log/blat-log')
    LSF.lsf_run_until_done(to_run_dict, logfile, queue,
                           '-R "select[mem > 20000]"', 'blat2mat', num_batches,
                           3)

    # REPLACED BY lsf_run_until_done ABOVE
    #logfiles = glob(logfile+'*.lsflog')
    #for lf in logfiles:
    #    try:
    #        os.unlink(lf)
    #    except:
    #        pass
    #print >> sys.stderr, 'LSF %s\nlog: %s' % (cmds,logfile)
    #import time
    #while len(cmds) > 0:
    #    jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',bsub_flags='-R "select[mem > 20000]"',jobname_base='blat2mat',num_batches=num_batches)
    #    time.sleep(20)
    #    LSF.lsf_wait_for_jobs(jobids,logfile,namedict=namedict)
    #    logfiles = glob(logfile+'*.lsflog')
    #    cmds = reduce(lambda x,y:x+y, [LSF.lsf_no_success_from_log(lf) for lf in logfiles])

    if not all([os.path.exists(f) for f in labf]):
        raise OSError, 'blat failed'

    return labf
def schedule_jobs(to_run_dict,scheduler,jobname_base,logbase,queue,requeue=None,njobs=None,duration=None,mem=2048,flags='',MAX_RETRY=MAX_RETRY,slurm_cores=1):

    if duration is None:
        try:
            duration = opts.max_job_duration
        except:
            duration = DURATION_DFAULT
    if njobs is None:
        njobs = len(trd)
        
    if scheduler == 'lsf':
        LSF.lsf_run_until_done(to_run_dict,logbase,queue,flags,jobname_base,njobs,MAX_RETRY)
        if requeue:
            LSF.lsf_run_until_done(to_run_dict,logbase,requeue,flags,jobname_base,njobs,MAX_RETRY)
    elif scheduler == 'slurm':
        kwargs = {}
        #make right before copying
        if slurm_cores > 1:
            kwargs['ntasks-per-node'] = '%s' % slurm_cores
        SLURM.run_until_done(to_run_dict,jobname_base,logbase,duration,mem,njobs,queue,MAX_RETRY=MAX_RETRY,**kwargs)
        if requeue:
            SLURM.run_until_done(to_run_dict,jobname_base,logbase,duration,mem,njobs,requeue,MAX_RETRY=MAX_RETRY,**kwargs)
    else:
        errstr = 'scheduler must be one of ["lsf","slurm"]; is %s' % opts.scheduler
        raise ValueError, errstr
예제 #3
0
파일: rtd_run.py 프로젝트: alexagrf/rtd
def run_lsf_blat(subjects,queries,blattile,blatargstr='',num_batches=100,queue='normal_serial'):
    '''submits mcl_id_triples_by_blat.py jobs to LSF

    intended as an example of parallelization over a compute grid;
    uses a module LSF.py for interaction with scheduler

    '''
    import LSF,run_safe
    
    blatargstr += ' -tileSize=%s' % blattile
    blatargstr += ' -stepSize=%s' % (int(blattile)/2)

    #cmds = []
    labf = []
    to_run_dict = {}
    for q in queries:
        for subject in subjects:
            subjname = os.path.basename(subject).rstrip('.fa').rstrip('_subj')
            outbase = q.rstrip('.fa').rstrip('_query')+'_blat'+'-subj'+subjname+blatargstr.replace('=','').replace(' ','')
            labf.append(outbase+'.label.gz')
            # ESCAPES UNNECESSARY WITH safe_script  
            #cmds.append('%smcl_id_triples_by_blat.py %s %s \\"%s\\" %s' % (radtag_denovo,subject,q,blatargstr,outbase))
            cmd = '%s %s %s %s "%s" %s' % (sys.executable, os.path.join(radtag_denovo, 'mcl_id_triples_by_blat.py'),subject,q,blatargstr,outbase)
            to_run_dict[outbase] = run_safe.safe_script(cmd,outbase)


    logfile = os.path.join(os.path.dirname(subjects[0]),'blat-log/blat-log')
    LSF.lsf_run_until_done(to_run_dict, logfile, queue, '-R "select[mem > 20000]"', 'blat2mat', num_batches, 3)

    # REPLACED BY lsf_run_until_done ABOVE
    #logfiles = glob(logfile+'*.lsflog')
    #for lf in logfiles:
    #    try:
    #        os.unlink(lf)
    #    except:
    #        pass
    #print >> sys.stderr, 'LSF %s\nlog: %s' % (cmds,logfile)
    #import time
    #while len(cmds) > 0:
    #    jobids,namedict = LSF.lsf_jobs_submit(cmds,logfile,'normal_serial',bsub_flags='-R "select[mem > 20000]"',jobname_base='blat2mat',num_batches=num_batches)
    #    time.sleep(20)
    #    LSF.lsf_wait_for_jobs(jobids,logfile,namedict=namedict)
    #    logfiles = glob(logfile+'*.lsflog')
    #    cmds = reduce(lambda x,y:x+y, [LSF.lsf_no_success_from_log(lf) for lf in logfiles])

    if not all([os.path.exists(f) for f in labf]):
        raise OSError, 'blat failed'

    return labf
#!/usr/bin/env python

from video_analysis import submit_summarize_runs, viz_vidtools
import LSF, run_safe
import time, os, sys
from glob import glob

os.chdir('/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/')
logfile = '../rbt-logs/log'
currjobs = submit_summarize_runs.get_currjobs()

analysis_dirs = filter(None, [
    submit_summarize_runs.get_successful_analysis_dir(
        vid,
        submit_summarize_runs.RERUN_COEFFS,
        currjobs=currjobs,
        **submit_summarize_runs.DEFAULT_PARAMS)
    for vid in sorted(glob('*/merge6mbit_720_*.mp4'))
    if os.path.exists(viz_vidtools.cfg_fn(vid))
    and 'end' in open(viz_vidtools.cfg_fn(vid)).read()
])
trd = {}
for analysis_dir in analysis_dirs:
    #print >> sys.stderr, analysis_dir
    rbtdone = os.path.join(analysis_dir, 'rainbowtron')
    cmd = 'run_rainbowtron.py %s' % analysis_dir
    run_safe.add_cmd(trd, rbtdone, cmd, force_write=True)

LSF.lsf_run_until_done(trd, logfile, 'normal_serial', '', 'rainbow', 100, 3)
#!/usr/bin/env python

from video_analysis import submit_summarize_runs, viz_vidtools
import LSF, run_safe
import time, os, sys
from glob import glob

os.chdir("/n/hoekstrafs2/burrowing/antfarms/data/_2012cross/")
logfile = "../rbt-logs/log"
currjobs = submit_summarize_runs.get_currjobs()

analysis_dirs = filter(
    None,
    [
        submit_summarize_runs.get_successful_analysis_dir(
            vid, submit_summarize_runs.RERUN_COEFFS, currjobs=currjobs, **submit_summarize_runs.DEFAULT_PARAMS
        )
        for vid in sorted(glob("*/merge6mbit_720_*.mp4"))
        if os.path.exists(viz_vidtools.cfg_fn(vid)) and "end" in open(viz_vidtools.cfg_fn(vid)).read()
    ],
)
trd = {}
for analysis_dir in analysis_dirs:
    # print >> sys.stderr, analysis_dir
    rbtdone = os.path.join(analysis_dir, "rainbowtron")
    cmd = "run_rainbowtron.py %s" % analysis_dir
    run_safe.add_cmd(trd, rbtdone, cmd, force_write=True)


LSF.lsf_run_until_done(trd, logfile, "normal_serial", "", "rainbow", 100, 3)
예제 #6
0
	else:
		if FORCE_PAR:
			h,w = vidtools.extract_keyframe(vid).shape
			th = h - (crops[1]+crops[3])
			tw = w - (crops[0]+crops[2])
			pixw = 255
			pixh = int((float(th)/tw)*pixw)
			parstr = '-aspect %s:%s' % (pixw,pixh)
		else:
			parstr = ''
		cropstr = '-vf crop=in_w-%s:in_h-%s:%s:%s' % (crops[0]+crops[2],crops[1]+crops[3],crops[0],crops[1])
		cmd = 'ffmpeg -ss %s -t %s -i %s -y %s -r 29.97 -b 20000k %s %s' % (offset,dur,vid,cropstr,parstr,outvid)
		to_run_dict[outvid] = run_safe.safe_script(cmd,outvid,force_write=True)

logfile = os.path.join(os.path.dirname(vid),'logs','crop-log')
LSF.lsf_run_until_done(to_run_dict,logfile,queue,'-R "select[mem>%s]"' % job_ram, 'crop-ffmpeg',10, MAX_RETRY)

#cmds = []
#rerun = True
#while rerun:
#	for clab,crops in cropsdict.items():
#		outbase,outext = os.path.splitext(vid)
#		outvid = '%s_%s_%s-%s%s' % (outbase,clab,offset,dur,outext)
#		if os.path.exists(outvid) and ( vidtools.vid_duration(outvid) == dur ):
#			print >> sys.stderr, '%s present and expected size, skip' % outvid
#		else:
#			cropstr = '-vf crop=in_w-%s:in_h-%s:%s:%s' % (crops[0]+crops[2],crops[1]+crops[3],crops[0],crops[1])
#			cmd = 'ffmpeg -ss %s -t %s -i %s -y %s -b 20000k %s' % (offset,dur,vid,cropstr,outvid)
#			cmds.append(cmd)
#
#