Beispiel #1
0
def merge(proc_dir, job1, jobn, n, stride):
    pipeline = Plumber()
    map_jobn = NuMap(worker_num=jobn,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    p1 = Piper(Worker(merge_star, (proc_dir, n)), parallel=map_jobn)
    p2 = Piper(Worker(npasser), parallel=map_jobn)
    pipeline.add_pipe((p1, p2))
    return pipeline
Beispiel #2
0
def csort(proc_dir, job1, jobn, memn, n, stride):
    csort_mem = "%sG" % memn
    pipeline = Plumber()
    map_jobn = NuMap(worker_num=jobn,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    p1 = Piper(Worker(sambamba_csort, (proc_dir, n, csort_mem)),
               parallel=map_jobn)
    p2 = Piper(Worker(npasser), parallel=map_jobn)
    pipeline.add_pipe((p1, p2))
    return pipeline
Beispiel #3
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from papy.core import Piper, Dagger

def l33t(inbox):
    word = inbox[0]
    return word.replace('e', '3').replace('o', '0')

def join(inbox):
    left_word, right_word = inbox
    return left_word + " " + right_word

left_l33t = Piper(l33t, branch=1)
right_l33t = Piper(l33t, branch=2)
join_l33t = Piper(join)

pipeline = Dagger()
pipeline.add_pipe((left_l33t, join_l33t))
pipeline.add_pipe((right_l33t, join_l33t))
end = pipeline.get_outputs()[0]

pipeline.connect([
                  ['hello', 'hi'],
                  ['world', 'folks']
                  ])
pipeline.start()
print list(end)


Beispiel #4
0
    "dir": os.getcwd(),
    "executable": "python",
    "script": "python_script.py",
    "in": ("greeting", ),
    "out": (("package", "txt"), ),
    "params": {}
}

sh_worker = Worker(script, (sh_cfg, ))
py_worker = Worker(script, (py_cfg, ))

# execution engine
numap = NuMap()

# function nodes
sh_piper = Piper(sh_worker, parallel=numap)
py_piper = Piper(py_worker, parallel=numap)

# topology
pipeline = Dagger()
pipeline.add_pipe((sh_piper, py_worker))
end = pipeline.get_outputs()[0]

# runtime
pipeline.connect([[{
    "message": "work_moar.txt"
}, {
    "message": "nevar_give_up.txt"
}]])

pipeline.start()
Beispiel #5
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from numap import NuMap
from papy.core import Dagger, Piper
from papy.util.func import ipasser


def merge(inbox):
    word1 = inbox[0]
    word2 = inbox[1]
    return word1 + word2


# function nodes
merge_p = Piper(merge)

inp1_p = Piper(ipasser)
inp2_p = Piper(ipasser)

# topology
pipeline = Dagger()
pipeline.add_pipe((inp2_p, merge_p), branch="2")
pipeline.add_pipe((inp1_p, merge_p), branch="1")

end = pipeline.get_outputs()[0]
# # runtime
pipeline.connect([['hello ', 'world '], ["world", "hello"]])
pipeline.start()
print list(end)
Beispiel #6
0
def align(proc_dir, job1, jobn, memn, stride, n, unstranded, genome_idx,
          full_model, genome_seq, rrna_seq, merge_mode, star_mem,
          alig_star_params, chim_star_params, prune, keep_fastq):

    cutmrg_script = "work_bbcutmrg_pe.sh" if merge_mode == "bb" else "work_cutmrg_pe.sh"
    cutmrg_cfg = {
        "id":
        "cutmrg",
        "evaluator":
        EVALUATOR,
        "preamble":
        PREAMBLE,
        "dir":
        proc_dir,
        "executable":
        "bash",
        "script":
        "%s/%s" % (WORK_DIR, cutmrg_script),
        "in": (
            "fastq1",
            "fastq2",
        ),
        "out": (
            ("main", "log"),
            ("cut", "log"),
            ("mrg", "log"),
            ("cutfq1", "fq"),
            ("cutfq2", "fq"),
            ("mrgfq1", "fq"),
            ("mrgfq2", "fq"),
            ("mrgfq3", "fq"),
            ("isize", "txt"),
            ("stats", "txt"),
            ("dir", None),
        ),
        "params": {
            "min_len": 25,
            "cutxargs": "k=31 qskip=3 rieb=t tbo=t tpe=t",
            "mrgxargs":
            "k=31 prefilter=2 minoverlap=10 extend2=20 iterations=5",
            "seq1": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC",
            "seq2":
            "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT",
            "rrna": rrna_seq,
            "prune": prune,
            "xmx": memn,
            "p": n,
        }
    }

    star_alig_params = {
        "keep_fastq": keep_fastq,
        "genomeDir": genome_idx,
        "runThreadN": n,
        "genomeLoad": star_mem,
        ## spliced alignment
        "outFilterType": "BySJout",
        "outSAMstrandField": "intronMotif" if unstranded else "None",
        "alignSJoverhangMin": 8,
        "alignSJDBoverhangMin": 3,
        "scoreGenomicLengthLog2scale": 0,
        "alignIntronMin": 20,
        "alignIntronMax": 1000000,
        "alignMatesGapMax": 1000000,
    }
    star_alig_params.update(alig_star_params)

    star_chim_params = {
        "keep_fastq": keep_fastq,
        "genomeDir": genome_idx,
        "runThreadN": n,
        "genomeLoad": star_mem,
        "outFilterType": "Normal",
        ## chimeric alignment
        "alignIntronMax": 150000,
        "alignMatesGapMax": 150000,
        "chimSegmentMin": 10,
        "chimJunctionOverhangMin": 1,
        "chimScoreSeparation": 0,
        "chimScoreJunctionNonGTAG": 0,
        "chimScoreDropMax": 1000,
        "chimScoreMin": 1,
    }
    star_chim_params.update(chim_star_params)

    star_alig_cfg = {
        "id": "alig",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "python2",
        "script": "%s/%s" % (WORK_DIR, "work_star_alig_pe.py"),
        "in": (
            "cutfq1",
            "cutfq2",
        ),
        "out": (
            ("main", "log"),
            ("dir", None),
        ),
        "params": star_alig_params
    }

    star_chim_cfg = {
        "id": "chim",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "python2",
        "script": "%s/%s" % (WORK_DIR, "work_star_chim_pe.py"),
        "in": (
            "mrgfq1",
            "mrgfq2",
            "mrgfq3",
        ),
        "out": (
            ("main", "log"),
            ("dir", None),
        ),
        "params": star_chim_params
    }

    pipeline = Plumber()
    map_job1 = NuMap(worker_num=job1,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    map_jobn = NuMap(worker_num=jobn,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    p1 = Piper(Worker(link_fq, (proc_dir, )), parallel=map_job1)
    p2 = Piper(Worker(script, (cutmrg_cfg, )),
               parallel=(map_jobn if merge_mode == "bb" else map_job1))
    p3a = Piper(Worker(script, (star_alig_cfg, )), parallel=map_jobn)
    p3b = Piper(Worker(script, (star_chim_cfg, )), parallel=map_jobn)
    p4a = Piper(Worker(move_alig_star, (proc_dir, )), parallel=map_job1)
    p4b = Piper(Worker(move_chim_star, (proc_dir, )), parallel=map_job1)
    p5 = Piper(Worker(npasser), parallel=map_jobn)
    pipeline.add_pipe((p1, p2, p3a, p4a, p5))
    pipeline.add_pipe((p1, p2, p3b, p4b, p5))
    return pipeline
Beispiel #7
0
from papy.util.func import dump_item, load_item
from numap import NuMap, imports
from papy.core import Piper, Worker

@imports(['os'])
def upstream(inbox):
    up_pid = os.getpid()
    return str(up_pid)

@imports(['os'])
def downstream(inbox):
    up_pid = inbox[0]
    down_pid = os.getpid()
    return "%s->%s" % (up_pid, down_pid)

host1 = NuMap()
host2 = NuMap()

up = Worker((upstream, dump_item))
dn = Worker((load_item, downstream))
up_ = Piper(up, parallel=host1)
dn_ = Piper(dn, parallel=host2)

up_([['hello', 'world', 'hi', 'folks']])
dn_([up_])

up_.start()
dn_.start()

print list(dn_)
Beispiel #8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from papy.core import Piper


def l33t(inbox):
    word = inbox[0]
    return word.replace('e', '3').replace('o', '0')


l33t_piper = Piper(l33t)
l33t_piper([['hello', 'world']])
l33t_piper.start()

print list(l33t_piper)
def l33t(inbox):
    word = inbox[0]
    leet_yuk = (word.replace('e', '3').replace('o', '0'), 'yuk')
    print "I'll produce: %s and %s" % leet_yuk
    return leet_yuk


def upper(inbox):
    word = inbox[0]
    return word.upper()


def noyuk(inbox):
    print "I got 2 words and 2 yuks: %s" % inbox
    word_box1, yuk_box1, word_box2, yuk_box2 = inbox
    return word_box1[0] + ' ' + word_box2[0]


l33t_piper = Piper(l33t, produce=2)
upper_piper = Piper(upper, spawn=2)
noyuk_piper = Piper(noyuk, consume=4)

l33t_piper([['hello', 'world', 'hi', 'folks']])
upper_piper([l33t_piper])
noyuk_piper([upper_piper])

l33t_piper.start()
upper_piper.start()
noyuk_piper.start()
print list(noyuk_piper)
Beispiel #10
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from papy.core import Piper

def l33t(inbox):
    word = inbox[0]
    leet_yuk = (word.replace('e', '3').replace('o', '0'), 'yuk')
    print "I'll produce: %s and %s" % leet_yuk
    return leet_yuk

def noyuk(inbox):
    print "I got 2 words and 2 yuks: %s" % inbox
    word_box1, yuk_box1, word_box2, yuk_box2 = inbox
    return word_box1[0] + ' ' + word_box2[0]


l33t_piper = Piper(l33t, produce=2)
noyuk_piper = Piper(noyuk, consume=4)

l33t_piper([['hello', 'world', 'hi', 'folks']])
noyuk_piper([l33t_piper])

l33t_piper.start()
noyuk_piper.start()
print list(noyuk_piper)


Beispiel #11
0
# wraps clean_seq and defines a specific sequence type and fixes
cleaner = Worker(clean_seq, kwargs={'type': 'aa', 'fixes': [('.', '-')]})
# >>> arr = cleaner(['AGA.TA'])
# wraps timestamp
stamper = Worker(timestamp)
# >>> arr = stamper([arr])

# Step 3 (representing computational resources)
# creates a resource that allows to utilize all local processors
local_computer = NuMap()

# Step 4 (creating processing nodes)
# this attaches a single computational resource to the two processing nodes
# the stamper_node will be tracked i.e. it will store the results of computation
# in memory.
cleaner_node = Piper(cleaner, parallel=local_computer)
stamper_node = Piper(stamper, parallel=local_computer, track=True)

# Step 5 (constructing a workflow graph)
# we construct a workflow graph add the two processing nodes and define the
# connection between them.
workflow = Plumber()
workflow.add_pipe((cleaner_node, stamper_node))

# Step 6 (execute the workflow)
# this starts the workflow, processes data in the "background" and waits
# until all data-items have been processed.
workflow.start([['AGA.TA', 'TG..AA']])
workflow.run()
workflow.wait()
results = workflow.stats['pipers_tracked'][stamper_node][0]
Beispiel #12
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from papy.core import Piper


def l33t(inbox):
    word = inbox[0]
    leet_yuk = (word.replace('e', '3').replace('o', '0'), 'yuk')
    print "I'll produce: %s and %s" % leet_yuk
    return leet_yuk


def noyuk(inbox):
    print "I got 2 words and 2 yuks: %s" % inbox
    word_box1, yuk_box1, word_box2, yuk_box2 = inbox
    return word_box1[0] + ' ' + word_box2[0]


l33t_piper = Piper(l33t, produce=2)
noyuk_piper = Piper(noyuk, consume=4)

l33t_piper([['hello', 'world', 'hi', 'folks']])
noyuk_piper([l33t_piper])

l33t_piper.start()
noyuk_piper.start()
print list(noyuk_piper)
Beispiel #13
0
def final(proc_dir, job1, jobn, n, stride, full_model, genome_idx, genome_seq,
          skip_mixcr, skip_cover):

    ## bamstat
    bamstat_cfg = {
        "id": "bamstat",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "bash",
        "script": "%s/%s" % (WORK_DIR, "work_bamstat.sh"),
        "in": ("alig_csort", ),
        "out": (
            ("main", "log"),
            ("idxstat", "txt"),
            ("flgstat", "txt"),
        ),
        "params": {}
    }

    virus_cfg = {
        "id": "virus",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "bash",
        "script": "%s/%s" % (WORK_DIR, "work_virus.sh"),
        "in": ("alig_csort", ),
        "out": (
            ("main", "log"),
            ("call", "txt"),
        ),
        "params": {}
    }

    gzip_cfg = {
        "id": "gzip",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "bash",
        "script": "%s/%s" % (WORK_DIR, "work_gzip.sh"),
        "in": (
            "sj",
            "junc_se",
            "junc_pe",
        ),
        "out": (("main", "log"), ),
        "params": {}
    }

    cram_cfg = {
        "id":
        "pack",
        "evaluator":
        EVALUATOR,
        "preamble":
        PREAMBLE,
        "dir":
        proc_dir,
        "executable":
        "bash",
        "script":
        "%s/%s" % (WORK_DIR, "work_cram.sh"),
        "in": (
            "alig_csort",
            "chim_pe_csort",
            "chim_se_csort",
        ),
        "out": (
            ("main", "log"),
            ("alig_csort", "cram"),
            ("chim_pe_csort", "cram"),
            ("chim_se_csort", "cram"),
        ),
        "params": {
            "genome": genome_seq,
            "qbin": "2,10,20,25,30,35,40,42"
        }
    }

    cover_cfg = {
        "id":
        "cover",
        "evaluator":
        EVALUATOR,
        "preamble":
        PREAMBLE,
        "dir":
        proc_dir,
        "executable":
        "python2",
        "script":
        "%s/%s" % (WORK_DIR, "work_coverage.py"),
        "in": (
            "alig_csort",
            "chim_pe_csort",
            "chim_se_csort",
        ),
        "out": (
            ("main", "log"),
            ("alig_csort", "bw"),
            ("chim_pe_csort", "bw"),
            ("chim_se_csort", "bw"),
        ),
        "params": {
            "chrom_length": genome_idx / "chrNameLength.txt"
        }
    }

    mixcr_cfg = {
        "id":
        "mixcr",
        "evaluator":
        EVALUATOR,
        "preamble":
        PREAMBLE,
        "dir":
        proc_dir,
        "executable":
        "bash",
        "script":
        "%s/%s" % (WORK_DIR, "work_mixcr.sh"),
        "in": (
            "alig_csort",
            "unmap_1",
            "unmap_2",
        ),
        "out": (
            ("main", "log"),
            ("aln.rep", "txt"),
            ("fix1.rep", "txt"),
            ("fix2.rep", "txt"),
            ("ext.rep", "txt"),
            ("asm.rep", "txt"),
            ("index", "bin"),
            ("alig", "vdjca"),
            ("clone", "clns"),
            ("dir", None),
        ),
        "params": {
            "p": n,
            "TRA": "chr14:21543538-22556037",
            "TRB": "chr7:142290000-142820000",
            "TRG": "chr7:38237000-38382000",
            "IGK": "chr2:88789991-90313268",
            "IGH": "chr14:105580000-106880000",
            "IGL": "chr22:22020000-22927000",
        }
    }

    ## topology
    pipeline = Plumber()
    map_job1 = NuMap(worker_num=job1,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    map_jobn = NuMap(worker_num=jobn,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    p1 = Piper(Worker(ipasser), parallel=map_job1)
    p2b = Piper(Worker(script, (cram_cfg, )), parallel=map_job1)
    p2c = Piper(Worker(script, (gzip_cfg, )), parallel=map_job1)
    p2d = Piper(Worker(script, (bamstat_cfg, )), parallel=map_job1)
    p2e = Piper(Worker(script, (virus_cfg, )), parallel=map_job1)
    p3 = Piper(Worker(npasser), parallel=map_job1)
    pipeline.add_pipe((p1, p2b, p3))
    pipeline.add_pipe((p1, p2c, p3))
    pipeline.add_pipe((p1, p2d, p3))
    pipeline.add_pipe((p1, p2e, p3))
    if not skip_mixcr:
        p2f = Piper(Worker(script, (mixcr_cfg, )), parallel=map_jobn)
        pipeline.add_pipe((p1, p2f, p3))
    if not skip_cover:
        p2g = Piper(Worker(script, (cover_cfg, )), parallel=map_job1)
        pipeline.add_pipe((p1, p2g, p3))
    return pipeline
Beispiel #14
0
def quant(proc_dir, job1, jobn, n, stride, unstranded, prot_model, full_model,
          linc_model):

    ## gene counting
    prot_cfg = {
        "id": "prot",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "python2",
        "script": "%s/work_featc.py" % WORK_DIR,
        "in": ("alig_nsort", ),
        "out": (("main", "log"), ("cts", "cts"), ("tmp", None)),
        "params": {
            "paired_end": True,
            "stranded": "0",  # always unstranded
            "duplicates": "",  # count duplicates
            "gtf": prot_model,
            "n": n,
            "xargs": ""
        }
    }

    full_cfg = {
        "id": "full",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "python2",
        "script": "%s/work_featc.py" % WORK_DIR,
        "in": ("alig_nsort", ),
        "out": (("main", "log"), ("cts", "cts"), ("tmp", None)),
        "params": {
            "paired_end": True,
            "stranded": "0"
            if unstranded else "1",  # first read is on the transcript strand
            "duplicates": "",  # count duplicates
            "gtf": full_model,
            "n": n,
            "xargs": "",
        }
    }

    linc_cfg = {
        "id": "linc",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "python2",
        "script": "%s/work_featc.py" % WORK_DIR,
        "in": ("alig_nsort", ),
        "out": (("main", "log"), ("cts", "cts"), ("tmp", None)),
        "params": {
            "paired_end": True,
            "stranded": "0"
            if unstranded else "1",  # first read is on the transcript strand
            "duplicates": "",  # count duplicates
            "gtf": linc_model,
            "n": n,
            "xargs": ""
        }
    }

    cfgs = [prot_cfg, full_cfg, linc_cfg]
    ## topology
    pipeline = Plumber()
    map_job1 = NuMap(worker_num=job1,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    map_job4 = NuMap(worker_num=job1 / 4,
                     ordered=False,
                     stride=stride,
                     buffer=10000)

    p1 = Piper(Worker(ipasser), parallel=map_job1)
    p2 = Piper(Worker(npasser), parallel=map_job1)
    for cfg in cfgs:
        p = Piper(Worker(script, (cfg, )), parallel=map_job4)
        pipeline.add_pipe((p1, p, p2))
    return pipeline
Beispiel #15
0

def l33t(inbox):
    word = inbox[0]
    return word.replace('e', '3').replace('o', '0')


def l33ter(inbox):
    word = inbox[0]
    return word.replace('l', '1')


# execution endgine
numap = NuMap()

# function nodes
l33t_piper = Piper(l33t, parallel=numap)
l33ter_piper = Piper(l33ter, parallel=numap, track=True)

# topology
pipeline = Plumber()
pipeline.add_pipe((l33t_piper, l33ter_piper))
end = pipeline.get_outputs()[0]

# runtime
pipeline.start([['hello', 'world']])
pipeline.run()
pipeline.wait()
print pipeline.stats['pipers_tracked'][end]
assert [{0: 'h3110', 1: 'w0r1d'}] == pipeline.stats['pipers_tracked'][end]
Beispiel #16
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from papy.core import Piper


def nimm2(inbox):
    left_box, right_box = inbox
    left_word, right_word = left_box[0], right_box[0]
    return left_word + ' ' + right_word


l33t_piper = Piper(nimm2, consume=2)
l33t_piper([['hello', 'world', 'hi', 'folks']])  # length of 4
l33t_piper.start()

out = list(l33t_piper)
assert out == ['hello world', 'hi folks']  # length of 2
print out
Beispiel #17
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#python /usr/lib/python2.6/site-packages/rpyc/servers/classic_server.py -m 'forking'

from numap import NuMap, imports
from papy.core import Piper


@imports(['os'])
def hello_from(inbox):
    word = inbox[0]
    up_pid = os.getpid()
    return (word, up_pid)


somehost = NuMap(worker_num=0, worker_remote=[('localhost', 2)])

remote_piper = Piper(hello_from, parallel=somehost)
remote_piper([['hello', 'world', 'hi', 'folks']])
remote_piper.start()

print list(remote_piper)
Beispiel #18
0

def upper(inbox):
    word = inbox[0]
    return word.upper()


def E_to_3(inbox):
    word = inbox[0]
    return word.replace('E', '3')


def O_to_0(inbox):
    word = inbox[0]
    return word.replace('O', '0')


upper_fork = Piper(upper)
E_end = Piper(E_to_3, branch=1)
O_end = Piper(O_to_0, branch=2)

pipeline = Dagger()
pipeline.add_pipe((upper_fork, E_end))
pipeline.add_pipe((upper_fork, O_end))

left_end, right_end = pipeline.get_outputs()

pipeline.connect([['hello', 'world']])
pipeline.start()
print zip(left_end, right_end)
Beispiel #19
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from numap import NuMap
from papy.core import Piper


def l33t(inbox):
    word = inbox[0]
    return word.replace('e', '3').replace('o', '0')


numap = NuMap()

l33t_piper = Piper(l33t, parallel=numap)
l33t_piper([['hello', 'world']])
l33t_piper.start()

print list(l33t_piper)
Beispiel #20
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from papy.core import Piper


def l33t(inbox):
    word = inbox[0]
    return (word.replace('e', '3').replace('o', '0'), ) * 2


l33t_piper = Piper(l33t, produce=2)

l33t_piper([['hello', 'world']])
l33t_piper.start()

print list(l33t_piper)
Beispiel #21
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#python /usr/lib/python2.6/site-packages/rpyc/servers/classic_server.py -m 'forking'

from numap import NuMap, imports
from papy.core import Piper

@imports(['os'])
def hello_from(inbox):
    word = inbox[0]
    up_pid = os.getpid()
    return (word, up_pid)

somehost = NuMap(worker_num=0, worker_remote=[('localhost', 2)])

remote_piper = Piper(hello_from, parallel=somehost)
remote_piper([['hello', 'world', 'hi', 'folks']])
remote_piper.start()

print list(remote_piper)
Beispiel #22
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from papy.core import Piper

def nimm2(inbox):
    left_box, right_box = inbox
    left_word, right_word = left_box[0], right_box[0]
    return left_word + ' ' + right_word

l33t_piper = Piper(nimm2, consume=2)
l33t_piper([['hello', 'world', 'hi', 'folks']]) # length of 4
l33t_piper.start()

out = list(l33t_piper)
assert out == ['hello world', 'hi folks'] # length of 2
print out


Beispiel #23
0
from numap import NuMap
from papy.core import Dagger, Piper


def l33t(inbox):
    word = inbox[0]
    return word.replace('e', '3').replace('o', '0')


def l33ter(inbox):
    word = inbox[0]
    return word.replace('l', '1')


# execution endgine
numap = NuMap()

# function nodes
l33t_piper = Piper(l33t, parallel=numap)
l33ter_piper = Piper(l33ter, parallel=numap)

# topology
pipeline = Dagger()
pipeline.add_pipe((l33t_piper, l33ter_piper))
end = pipeline.get_outputs()[0]

# runtime
pipeline.connect([['hello', 'world']])
pipeline.start()
print list(end)
Beispiel #24
0
def preqc(proc_dir, job1, jobn, n, stride):

    kmer_cfg = {
        "id": "kmer",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "bash",
        "script": "%s/%s" % (WORK_DIR, "work_kmer.sh"),
        "in": (
            "fastq1",
            "fastq2",
        ),
        "out": (
            ("main", "log"),
            ("kmer1", "txt"),
            ("kmer2", "txt"),
        ),
        "params": {
            "k": 6,
        }
    }

    fastqc_cfg = {
        "id": "fastqc",
        "evaluator": EVALUATOR,
        "preamble": PREAMBLE,
        "dir": proc_dir,
        "executable": "bash",
        "script": "%s/%s" % (WORK_DIR, "work_fastqc.sh"),
        "in": (
            "fastq1",
            "fastq2",
        ),
        "out": (
            ("main", "log"),
            ("report", None),
        ),
        "params": {}
    }

    map_job1 = NuMap(worker_num=job1,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    map_jobn = NuMap(worker_num=jobn,
                     ordered=False,
                     stride=stride,
                     buffer=10000)
    p1 = Piper(Worker(sample_fq, (
        proc_dir,
        1000000,
    )), parallel=map_job1)
    p2 = Piper(Worker(script, (kmer_cfg, )), parallel=map_job1)
    p3 = Piper(Worker(script, (fastqc_cfg, )), parallel=map_job1)
    p4 = Piper(Worker(npasser), parallel=map_job1)
    # topology
    pipeline = Plumber()
    pipeline.add_pipe((p1, p2, p4))
    pipeline.add_pipe((p1, p3, p4))
    return pipeline