Example #1
0
def test_xform_script_for_preads():
    # Technically, we never have more than one daligner in a script, but that
    # could change in pbsmrtpipe, since it limits the number of chunks.
    script = 'daligner x y\nLAsort a b\ndaligner x1 y1\n'
    expected = 'daligner_p x y\nLAsort a b\ndaligner_p x1 y1\n'
    result = f.xform_script_for_preads(script)
    eq_(result, expected)

    script = 'daligner x y\nLAsort a b\ndaligner x1 y1\n'
    expected = script # no-op
    result = f.xform_script_for_raw_reads(script)
    eq_(result, expected)

    eq_(f.get_script_xformer(True), f.xform_script_for_preads)
    eq_(f.get_script_xformer(False), f.xform_script_for_raw_reads)
Example #2
0
def test_xform_script_for_preads():
    # Technically, we never have more than one daligner in a script, but that
    # could change in pbsmrtpipe, since it limits the number of chunks.
    script = 'daligner x y\nLAsort a b\ndaligner x1 y1\n'
    expected = 'daligner_p x y\nLAsort a b\ndaligner_p x1 y1\n'
    result = f.xform_script_for_preads(script)
    eq_(result, expected)

    script = 'daligner x y\nLAsort a b\ndaligner x1 y1\n'
    expected = script  # no-op
    result = f.xform_script_for_raw_reads(script)
    eq_(result, expected)

    eq_(f.get_script_xformer(True), f.xform_script_for_preads)
    eq_(f.get_script_xformer(False), f.xform_script_for_raw_reads)
Example #3
0
def write_run_daligner_chunks_falcon(
        pread_aln,
        chunk_file,
        config_json_fn,
        run_jobs_fn,
        max_total_nchunks,
        dir_name,
        chunk_base_name,
        chunk_ext,
        chunk_keys):
    db_prefix = 'preads' if pread_aln else 'raw_reads'
    xform_script = get_script_xformer(pread_aln)
    def chunk():
        # cmds is actually a list of small bash scripts, including linefeeds.
        cmds = get_daligner_job_descriptions(open(run_jobs_fn), db_prefix).values()
        if max_total_nchunks < len(cmds):
            log.debug("max_total_nchunks < # daligner cmds: %d < %d" %(
                max_total_nchunks, len(cmds)))
            cmds = joined_strs(cmds, max_total_nchunks)
        symlink_dazzdb(os.path.dirname(run_jobs_fn), db_prefix)
        for i, script in enumerate(cmds):
            chunk_id = '_'.join([chunk_base_name, str(i)])
            chunk_name = '.'.join([chunk_id, chunk_ext])
            chunk_path = os.path.join(dir_name, chunk_name)
            script = xform_script(script)
            open(chunk_path, 'w').write(script)
            d = {}
            d[chunk_keys[1]] = os.path.abspath(chunk_path)
            d[chunk_keys[0]] = config_json_fn
            c = PipelineChunk(chunk_id, **d)
            yield c
    chunks = list(chunk())
    write_pipeline_chunks(chunks, chunk_file, comment=None)
Example #4
0
def write_run_daligner_chunks_falcon(pread_aln, chunk_file, config_json_fn,
                                     run_jobs_fn, max_total_nchunks, dir_name,
                                     chunk_base_name, chunk_ext, chunk_keys):
    db_prefix = 'preads' if pread_aln else 'raw_reads'
    xform_script = get_script_xformer(pread_aln)

    def chunk():
        # cmds is actually a list of small bash scripts, including linefeeds.
        cmds = get_daligner_job_descriptions(open(run_jobs_fn),
                                             db_prefix).values()
        if max_total_nchunks < len(cmds):
            log.debug("max_total_nchunks < # daligner cmds: %d < %d" %
                      (max_total_nchunks, len(cmds)))
            cmds = joined_strs(cmds, max_total_nchunks)
        symlink_dazzdb(os.path.dirname(run_jobs_fn), db_prefix)
        for i, script in enumerate(cmds):
            chunk_id = '_'.join([chunk_base_name, str(i)])
            chunk_name = '.'.join([chunk_id, chunk_ext])
            chunk_path = os.path.join(dir_name, chunk_name)
            script = xform_script(script)
            open(chunk_path, 'w').write(script)
            d = {}
            d[chunk_keys[1]] = os.path.abspath(chunk_path)
            d[chunk_keys[0]] = config_json_fn
            c = PipelineChunk(chunk_id, **d)
            yield c

    chunks = list(chunk())
    write_pipeline_chunks(chunks, chunk_file, comment=None)