Ejemplo n.º 1
0
def create_daligner_tasks(run_jobs_fn, wd, db_prefix, db_file, config, pread_aln = False):
    tasks = dict() # uid -> parameters-dict

    nblock = support.get_nblock(db_file)

    re_daligner = re.compile(r'\bdaligner\b')

    line_count = 0
    job_descs = falcon_kit.functional.get_daligner_job_descriptions(open(run_jobs_fn), db_prefix)
    if not job_descs:
        raise Exception("No daligner jobs generated in '%s'." %run_jobs_fn)
    for desc, bash in job_descs.iteritems():
        job_uid = '%08d' %line_count
        line_count += 1
        jobd = os.path.join(wd, "./job_%s" % job_uid)
        support.make_dirs(jobd)
        call = "cd %s; ln -sf ../.%s.bps .; ln -sf ../.%s.idx .; ln -sf ../%s.db ." % (jobd, db_prefix, db_prefix, db_prefix)
        rc = os.system(call)
        if rc:
            raise Exception("Failure in system call: %r -> %d" %(call, rc))
        job_done = os.path.abspath("%s/job_%s_done" %(jobd, job_uid))
        if pread_aln:
            bash = re_daligner.sub("daligner_p", bash)
        script_fn = os.path.join(jobd , "rj_%s.sh"% (job_uid)) # also implies run-dir
        args = {
            'daligner_script': bash,
            'db_prefix': db_prefix,
            'config': config,
            'job_done': job_done,
            'script_fn': script_fn,
        }
        daligner_task = args #make_daligner_task( task_run_daligner )
        tasks[jobd] = daligner_task
    return tasks
Ejemplo n.º 2
0
def create_daligner_tasks(run_jobs_fn,
                          wd,
                          db_prefix,
                          db_file,
                          config,
                          pread_aln=False):
    tasks = dict()  # uid -> parameters-dict

    nblock = support.get_nblock(db_file)

    re_daligner = re.compile(r'\bdaligner\b')

    line_count = 0
    single = (nblock == 1)
    job_descs = falcon_kit.functional.get_daligner_job_descriptions(
        open(run_jobs_fn), db_prefix, single=single)
    if not job_descs:
        raise Exception("No daligner jobs generated in '%s'." % run_jobs_fn)
    for desc, bash in job_descs.iteritems():
        job_uid = '%08d' % line_count
        line_count += 1
        jobd = os.path.join(wd, "./job_%s" % job_uid)
        support.make_dirs(jobd)
        call = "cd %s; ln -sf ../.%s.bps .; ln -sf ../.%s.idx .; ln -sf ../%s.db ." % (
            jobd, db_prefix, db_prefix, db_prefix)
        rc = os.system(call)
        if rc:
            raise Exception("Failure in system call: %r -> %d" % (call, rc))
        job_done = os.path.abspath("%s/job_%s_done" % (jobd, job_uid))
        if pread_aln:
            bash = re_daligner.sub("daligner_p", bash)
        script_fn = os.path.join(jobd, "rj_%s.sh" %
                                 (job_uid))  # also implies run-dir
        args = {
            'daligner_script': bash,
            'db_prefix': db_prefix,
            'config': config,
            'job_done': job_done,
            'script_fn': script_fn,
        }
        daligner_task = args  #make_daligner_task( task_run_daligner )
        tasks[jobd] = daligner_task
    return tasks
Ejemplo n.º 3
0
def create_merge_tasks(i_fofn_fn, run_jobs_fn, wd, db_prefix, config):
    #merge_scripts = bash.scripts_merge(config, db_prefix, run_jobs_fn)
    tasks = {} # pid -> (merge_params, cons_params)
    mjob_data = {}

    with open(run_jobs_fn) as f :
        for l in f:
            l = l.strip().split()
            if l[0] not in ( "LAsort", "LAmerge", "mv" ):
                continue
            if l[0] == "LAsort":
                # We now run this part w/ daligner, but we still need
                # a small script for some book-keeping.
                p_id = int( l[2].split(".")[1] )
                mjob_data.setdefault( p_id, [] )
                #mjob_data[p_id].append(  " ".join(l) ) # Already done w/ daligner!
            if l[0] == "LAmerge":
                l2 = l[2].split(".")
                if l2[1][0] == "L":
                    p_id = int(  l[2].split(".")[2] )
                    mjob_data.setdefault( p_id, [] )
                    mjob_data[p_id].append(  " ".join(l) )
                else:
                    p_id = int( l[2].split(".")[1] )
                    mjob_data.setdefault( p_id, [] )
                    mjob_data[p_id].append(  " ".join(l) )
            if l[0] == "mv":
                l2 = l[1].split(".")
                if l2[1][0] == "L":
                    p_id = int(  l[1].split(".")[2] )
                    mjob_data.setdefault( p_id, [] )
                    mjob_data[p_id].append(  " ".join(l) )
                else:
                    p_id = int( l[1].split(".")[1] )
                    mjob_data.setdefault( p_id, [] )
                    mjob_data[p_id].append(  " ".join(l) )

    # Could be L1.* or preads.*
    re_las = re.compile(r'\.(\d*)(\.\d*)?\.las$')

    for p_id in mjob_data:
        s_data = mjob_data[p_id]

        support.make_dirs("%s/preads" % (wd) )
        support.make_dirs("%s/las_files" % (wd) )
        merge_subdir = "m_%05d" %p_id
        merge_dir = os.path.join(wd, merge_subdir)
        support.make_dirs(merge_dir)
        #merge_script_file = os.path.abspath( "%s/m_%05d/m_%05d.sh" % (wd, p_id, p_id) )
        merge_script = StringIO.StringIO()
        with cd(merge_dir):
            print("i_fofn_fn=%r" %i_fofn_fn)
            # Since we could be in the gather-task-dir, instead of globbing,
            # we will read the fofn.
            for fn in open(i_fofn_fn).read().splitlines():
                basename = os.path.basename(fn)
                mo = re_las.search(basename)
                if not mo:
                    continue
                left_block = int(mo.group(1))
                if left_block != p_id:
                    # By convention, m_00005 merges L1.5.*.las, etc.
                    continue
                rel_fn = os.path.relpath(fn)
                print("symlink %r <- %r" %(rel_fn, os.path.basename(fn)))
                os.symlink(rel_fn, os.path.basename(fn))

        for l in s_data:
            print >> merge_script, l
        las_bfn = '%s.%d.las' %(db_prefix, p_id)
        #print >> merge_script, 'echo %s >| %s' %(las_bfn, merged_las_fofn_bfn)

        #job_done = makePypeLocalFile(os.path.abspath( "%s/m_%05d/m_%05d_done" % (wd, p_id, p_id)  ))
        parameters =  {"script": merge_script.getvalue(),
                       "merge_subdir": merge_subdir,
                       "config": config}
        merge_task = parameters

        fasta_bfn = "out.%05d.fasta" %p_id
        out_file_fn = os.path.abspath("%s/preads/%s" %(wd, fasta_bfn))
        #out_done = makePypeLocalFile(os.path.abspath( "%s/preads/c_%05d_done" % (wd, p_id)  ))
        parameters =  {
                       "db_fn": '../{}'.format(db_prefix),
                       "las_fn": '../{}/{}'.format(merge_subdir, las_bfn), # assuming merge ran in merge_dir
                       "out_file_fn": out_file_fn,
                       #"out_done": out_done,
                       "config": config}
        cons_task = parameters
        tasks[p_id] = (merge_task, cons_task, las_bfn, fasta_bfn)

    return tasks
Ejemplo n.º 4
0
def create_merge_tasks(i_fofn_fn, run_jobs_fn, wd, db_prefix, config):
    #merge_scripts = bash.scripts_merge(config, db_prefix, run_jobs_fn)
    tasks = {}  # pid -> (merge_params, cons_params)
    mjob_data = {}

    with open(run_jobs_fn) as f:
        for l in f:
            l = l.strip().split()
            if l[0] not in ("LAsort", "LAmerge", "mv"):
                continue
            if l[0] == "LAsort":
                # We now run this part w/ daligner, but we still need
                # a small script for some book-keeping.
                p_id = int(l[2].split(".")[1])
                mjob_data.setdefault(p_id, [])
                #mjob_data[p_id].append(  " ".join(l) ) # Already done w/ daligner!
            if l[0] == "LAmerge":
                l2 = l[2].split(".")
                if l2[1][0] == "L":
                    p_id = int(l[2].split(".")[2])
                    mjob_data.setdefault(p_id, [])
                    mjob_data[p_id].append(" ".join(l))
                else:
                    p_id = int(l[2].split(".")[1])
                    mjob_data.setdefault(p_id, [])
                    mjob_data[p_id].append(" ".join(l))
            if l[0] == "mv":
                l2 = l[1].split(".")
                if l2[1][0] == "L":
                    p_id = int(l[1].split(".")[2])
                    mjob_data.setdefault(p_id, [])
                    mjob_data[p_id].append(" ".join(l))
                else:
                    p_id = int(l[1].split(".")[1])
                    mjob_data.setdefault(p_id, [])
                    mjob_data[p_id].append(" ".join(l))

    # Could be L1.* or preads.*
    re_las = re.compile(r'\.(\d*)(\.\d*)?\.las$')

    for p_id in mjob_data:
        s_data = mjob_data[p_id]

        support.make_dirs("%s/preads" % (wd))
        support.make_dirs("%s/las_files" % (wd))
        merge_subdir = "m_%05d" % p_id
        merge_dir = os.path.join(wd, merge_subdir)
        support.make_dirs(merge_dir)
        #merge_script_file = os.path.abspath( "%s/m_%05d/m_%05d.sh" % (wd, p_id, p_id) )
        merge_script = StringIO.StringIO()
        with cd(merge_dir):
            print("i_fofn_fn=%r" % i_fofn_fn)
            # Since we could be in the gather-task-dir, instead of globbing,
            # we will read the fofn.
            for fn in open(i_fofn_fn).read().splitlines():
                basename = os.path.basename(fn)
                mo = re_las.search(basename)
                if not mo:
                    continue
                left_block = int(mo.group(1))
                if left_block != p_id:
                    # By convention, m_00005 merges L1.5.*.las, etc.
                    continue
                symlink(fn)

        for l in s_data:
            print >> merge_script, l
        las_bfn = '%s.%d.las' % (db_prefix, p_id)
        #print >> merge_script, 'echo %s >| %s' %(las_bfn, merged_las_fofn_bfn)

        #job_done = makePypeLocalFile(os.path.abspath( "%s/m_%05d/m_%05d_done" % (wd, p_id, p_id)  ))
        parameters = {
            "script": merge_script.getvalue(),
            "merge_subdir": merge_subdir,
            "config": config
        }
        merge_task = parameters

        fasta_bfn = "out.%05d.fasta" % p_id
        out_file_fn = os.path.abspath("%s/preads/%s" % (wd, fasta_bfn))
        #out_done = makePypeLocalFile(os.path.abspath( "%s/preads/c_%05d_done" % (wd, p_id)  ))
        parameters = {
            "db_fn": '{}/{}'.format(os.getcwd(), db_prefix),
            "las_fn":
            '{}/{}/{}'.format(os.getcwd(), merge_subdir,
                              las_bfn),  # assuming merge ran in merge_dir
            "out_file_fn": out_file_fn,
            #"out_done": out_done,
            "config": config
        }
        cons_task = parameters
        tasks[p_id] = (merge_task, cons_task, las_bfn, fasta_bfn)

    return tasks