def create_daligner_tasks(run_jobs_fn, wd, db_prefix, db_file, config, pread_aln = False): tasks = dict() # uid -> parameters-dict nblock = support.get_nblock(db_file) re_daligner = re.compile(r'\bdaligner\b') line_count = 0 job_descs = falcon_kit.functional.get_daligner_job_descriptions(open(run_jobs_fn), db_prefix) if not job_descs: raise Exception("No daligner jobs generated in '%s'." %run_jobs_fn) for desc, bash in job_descs.iteritems(): job_uid = '%08d' %line_count line_count += 1 jobd = os.path.join(wd, "./job_%s" % job_uid) support.make_dirs(jobd) call = "cd %s; ln -sf ../.%s.bps .; ln -sf ../.%s.idx .; ln -sf ../%s.db ." % (jobd, db_prefix, db_prefix, db_prefix) rc = os.system(call) if rc: raise Exception("Failure in system call: %r -> %d" %(call, rc)) job_done = os.path.abspath("%s/job_%s_done" %(jobd, job_uid)) if pread_aln: bash = re_daligner.sub("daligner_p", bash) script_fn = os.path.join(jobd , "rj_%s.sh"% (job_uid)) # also implies run-dir args = { 'daligner_script': bash, 'db_prefix': db_prefix, 'config': config, 'job_done': job_done, 'script_fn': script_fn, } daligner_task = args #make_daligner_task( task_run_daligner ) tasks[jobd] = daligner_task return tasks
def create_daligner_tasks(run_jobs_fn, wd, db_prefix, db_file, config, pread_aln=False): tasks = dict() # uid -> parameters-dict nblock = support.get_nblock(db_file) re_daligner = re.compile(r'\bdaligner\b') line_count = 0 single = (nblock == 1) job_descs = falcon_kit.functional.get_daligner_job_descriptions( open(run_jobs_fn), db_prefix, single=single) if not job_descs: raise Exception("No daligner jobs generated in '%s'." % run_jobs_fn) for desc, bash in job_descs.iteritems(): job_uid = '%08d' % line_count line_count += 1 jobd = os.path.join(wd, "./job_%s" % job_uid) support.make_dirs(jobd) call = "cd %s; ln -sf ../.%s.bps .; ln -sf ../.%s.idx .; ln -sf ../%s.db ." % ( jobd, db_prefix, db_prefix, db_prefix) rc = os.system(call) if rc: raise Exception("Failure in system call: %r -> %d" % (call, rc)) job_done = os.path.abspath("%s/job_%s_done" % (jobd, job_uid)) if pread_aln: bash = re_daligner.sub("daligner_p", bash) script_fn = os.path.join(jobd, "rj_%s.sh" % (job_uid)) # also implies run-dir args = { 'daligner_script': bash, 'db_prefix': db_prefix, 'config': config, 'job_done': job_done, 'script_fn': script_fn, } daligner_task = args #make_daligner_task( task_run_daligner ) tasks[jobd] = daligner_task return tasks
def create_merge_tasks(i_fofn_fn, run_jobs_fn, wd, db_prefix, config): #merge_scripts = bash.scripts_merge(config, db_prefix, run_jobs_fn) tasks = {} # pid -> (merge_params, cons_params) mjob_data = {} with open(run_jobs_fn) as f : for l in f: l = l.strip().split() if l[0] not in ( "LAsort", "LAmerge", "mv" ): continue if l[0] == "LAsort": # We now run this part w/ daligner, but we still need # a small script for some book-keeping. p_id = int( l[2].split(".")[1] ) mjob_data.setdefault( p_id, [] ) #mjob_data[p_id].append( " ".join(l) ) # Already done w/ daligner! if l[0] == "LAmerge": l2 = l[2].split(".") if l2[1][0] == "L": p_id = int( l[2].split(".")[2] ) mjob_data.setdefault( p_id, [] ) mjob_data[p_id].append( " ".join(l) ) else: p_id = int( l[2].split(".")[1] ) mjob_data.setdefault( p_id, [] ) mjob_data[p_id].append( " ".join(l) ) if l[0] == "mv": l2 = l[1].split(".") if l2[1][0] == "L": p_id = int( l[1].split(".")[2] ) mjob_data.setdefault( p_id, [] ) mjob_data[p_id].append( " ".join(l) ) else: p_id = int( l[1].split(".")[1] ) mjob_data.setdefault( p_id, [] ) mjob_data[p_id].append( " ".join(l) ) # Could be L1.* or preads.* re_las = re.compile(r'\.(\d*)(\.\d*)?\.las$') for p_id in mjob_data: s_data = mjob_data[p_id] support.make_dirs("%s/preads" % (wd) ) support.make_dirs("%s/las_files" % (wd) ) merge_subdir = "m_%05d" %p_id merge_dir = os.path.join(wd, merge_subdir) support.make_dirs(merge_dir) #merge_script_file = os.path.abspath( "%s/m_%05d/m_%05d.sh" % (wd, p_id, p_id) ) merge_script = StringIO.StringIO() with cd(merge_dir): print("i_fofn_fn=%r" %i_fofn_fn) # Since we could be in the gather-task-dir, instead of globbing, # we will read the fofn. for fn in open(i_fofn_fn).read().splitlines(): basename = os.path.basename(fn) mo = re_las.search(basename) if not mo: continue left_block = int(mo.group(1)) if left_block != p_id: # By convention, m_00005 merges L1.5.*.las, etc. continue rel_fn = os.path.relpath(fn) print("symlink %r <- %r" %(rel_fn, os.path.basename(fn))) os.symlink(rel_fn, os.path.basename(fn)) for l in s_data: print >> merge_script, l las_bfn = '%s.%d.las' %(db_prefix, p_id) #print >> merge_script, 'echo %s >| %s' %(las_bfn, merged_las_fofn_bfn) #job_done = makePypeLocalFile(os.path.abspath( "%s/m_%05d/m_%05d_done" % (wd, p_id, p_id) )) parameters = {"script": merge_script.getvalue(), "merge_subdir": merge_subdir, "config": config} merge_task = parameters fasta_bfn = "out.%05d.fasta" %p_id out_file_fn = os.path.abspath("%s/preads/%s" %(wd, fasta_bfn)) #out_done = makePypeLocalFile(os.path.abspath( "%s/preads/c_%05d_done" % (wd, p_id) )) parameters = { "db_fn": '../{}'.format(db_prefix), "las_fn": '../{}/{}'.format(merge_subdir, las_bfn), # assuming merge ran in merge_dir "out_file_fn": out_file_fn, #"out_done": out_done, "config": config} cons_task = parameters tasks[p_id] = (merge_task, cons_task, las_bfn, fasta_bfn) return tasks
def create_merge_tasks(i_fofn_fn, run_jobs_fn, wd, db_prefix, config): #merge_scripts = bash.scripts_merge(config, db_prefix, run_jobs_fn) tasks = {} # pid -> (merge_params, cons_params) mjob_data = {} with open(run_jobs_fn) as f: for l in f: l = l.strip().split() if l[0] not in ("LAsort", "LAmerge", "mv"): continue if l[0] == "LAsort": # We now run this part w/ daligner, but we still need # a small script for some book-keeping. p_id = int(l[2].split(".")[1]) mjob_data.setdefault(p_id, []) #mjob_data[p_id].append( " ".join(l) ) # Already done w/ daligner! if l[0] == "LAmerge": l2 = l[2].split(".") if l2[1][0] == "L": p_id = int(l[2].split(".")[2]) mjob_data.setdefault(p_id, []) mjob_data[p_id].append(" ".join(l)) else: p_id = int(l[2].split(".")[1]) mjob_data.setdefault(p_id, []) mjob_data[p_id].append(" ".join(l)) if l[0] == "mv": l2 = l[1].split(".") if l2[1][0] == "L": p_id = int(l[1].split(".")[2]) mjob_data.setdefault(p_id, []) mjob_data[p_id].append(" ".join(l)) else: p_id = int(l[1].split(".")[1]) mjob_data.setdefault(p_id, []) mjob_data[p_id].append(" ".join(l)) # Could be L1.* or preads.* re_las = re.compile(r'\.(\d*)(\.\d*)?\.las$') for p_id in mjob_data: s_data = mjob_data[p_id] support.make_dirs("%s/preads" % (wd)) support.make_dirs("%s/las_files" % (wd)) merge_subdir = "m_%05d" % p_id merge_dir = os.path.join(wd, merge_subdir) support.make_dirs(merge_dir) #merge_script_file = os.path.abspath( "%s/m_%05d/m_%05d.sh" % (wd, p_id, p_id) ) merge_script = StringIO.StringIO() with cd(merge_dir): print("i_fofn_fn=%r" % i_fofn_fn) # Since we could be in the gather-task-dir, instead of globbing, # we will read the fofn. for fn in open(i_fofn_fn).read().splitlines(): basename = os.path.basename(fn) mo = re_las.search(basename) if not mo: continue left_block = int(mo.group(1)) if left_block != p_id: # By convention, m_00005 merges L1.5.*.las, etc. continue symlink(fn) for l in s_data: print >> merge_script, l las_bfn = '%s.%d.las' % (db_prefix, p_id) #print >> merge_script, 'echo %s >| %s' %(las_bfn, merged_las_fofn_bfn) #job_done = makePypeLocalFile(os.path.abspath( "%s/m_%05d/m_%05d_done" % (wd, p_id, p_id) )) parameters = { "script": merge_script.getvalue(), "merge_subdir": merge_subdir, "config": config } merge_task = parameters fasta_bfn = "out.%05d.fasta" % p_id out_file_fn = os.path.abspath("%s/preads/%s" % (wd, fasta_bfn)) #out_done = makePypeLocalFile(os.path.abspath( "%s/preads/c_%05d_done" % (wd, p_id) )) parameters = { "db_fn": '{}/{}'.format(os.getcwd(), db_prefix), "las_fn": '{}/{}/{}'.format(os.getcwd(), merge_subdir, las_bfn), # assuming merge ran in merge_dir "out_file_fn": out_file_fn, #"out_done": out_done, "config": config } cons_task = parameters tasks[p_id] = (merge_task, cons_task, las_bfn, fasta_bfn) return tasks