def merge(proc_dir, job1, jobn, n, stride): pipeline = Plumber() map_jobn = NuMap(worker_num=jobn, ordered=False, stride=stride, buffer=10000) p1 = Piper(Worker(merge_star, (proc_dir, n)), parallel=map_jobn) p2 = Piper(Worker(npasser), parallel=map_jobn) pipeline.add_pipe((p1, p2)) return pipeline
def numap(self, sim, concrete_states): num_violations = 0 from numap import NuMap with PickleStreamWriter(self.fname) as writer: for trace in NuMap(func=sim, iterable=concrete_states, ordered=False, stride=1, buffer=1000): writer.write(trace) if check_prop_violation(self.prop, trace): num_violations += 1
def csort(proc_dir, job1, jobn, memn, n, stride): csort_mem = "%sG" % memn pipeline = Plumber() map_jobn = NuMap(worker_num=jobn, ordered=False, stride=stride, buffer=10000) p1 = Piper(Worker(sambamba_csort, (proc_dir, n, csort_mem)), parallel=map_jobn) p2 = Piper(Worker(npasser), parallel=map_jobn) pipeline.add_pipe((p1, p2)) return pipeline
#!/usr/bin/env python # -*- coding: utf-8 -*- #python /usr/lib/python2.6/site-packages/rpyc/servers/classic_server.py -m 'forking' from numap import NuMap, imports from papy.core import Piper @imports(['os']) def hello_from(inbox): word = inbox[0] up_pid = os.getpid() return (word, up_pid) somehost = NuMap(worker_num=0, worker_remote=[('localhost', 2)]) remote_piper = Piper(hello_from, parallel=somehost) remote_piper([['hello', 'world', 'hi', 'folks']]) remote_piper.start() print list(remote_piper)
def printer(element): print element return element LEFT_INPUT = ('L0', 'L1', 'L2', 'L3') RIGHT_INPUT = ('R0', 'R1', 'R2', 'R3') # LEFT_INPUT RIGHT_INPUT # | | # |(printer) |(printer) # | | # left_iter right_iter numap = NuMap(stride=2) left_iter = numap.add_task(printer, LEFT_INPUT) right_iter = numap.add_task(printer, RIGHT_INPUT) numap.start() # 2 2 2 2 # ------ ------ ------ ------ # order of input L0, L1, R0, R1, L2, L3, R2, R3 L0 = left_iter.next() L1 = left_iter.next() R0 = right_iter.next() R1 = right_iter.next() L2 = left_iter.next() L3 = left_iter.next() R2 = right_iter.next()
def preqc(proc_dir, job1, jobn, n, stride): kmer_cfg = { "id": "kmer", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, "work_kmer.sh"), "in": ( "fastq1", "fastq2", ), "out": ( ("main", "log"), ("kmer1", "txt"), ("kmer2", "txt"), ), "params": { "k": 6, } } fastqc_cfg = { "id": "fastqc", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, "work_fastqc.sh"), "in": ( "fastq1", "fastq2", ), "out": ( ("main", "log"), ("report", None), ), "params": {} } map_job1 = NuMap(worker_num=job1, ordered=False, stride=stride, buffer=10000) map_jobn = NuMap(worker_num=jobn, ordered=False, stride=stride, buffer=10000) p1 = Piper(Worker(sample_fq, ( proc_dir, 1000000, )), parallel=map_job1) p2 = Piper(Worker(script, (kmer_cfg, )), parallel=map_job1) p3 = Piper(Worker(script, (fastqc_cfg, )), parallel=map_job1) p4 = Piper(Worker(npasser), parallel=map_job1) # topology pipeline = Plumber() pipeline.add_pipe((p1, p2, p4)) pipeline.add_pipe((p1, p3, p4)) return pipeline
def printer(element): print element return element LEFT_INPUT = ("L0", "L1", "L2", "L3") RIGHT_INPUT = ("R0", "R1", "R2", "R3") # LEFT_INPUT RIGHT_INPUT # | | # |(printer) |(printer) # | | # left_iter right_iter numap = NuMap(stride=2) left_iter = numap.add_task(printer, LEFT_INPUT) right_iter = numap.add_task(printer, RIGHT_INPUT) numap.start() # 2 2 2 2 # ------ ------ ------ ------ # order of input L0, L1, R0, R1, L2, L3, R2, R3 L0 = left_iter.next() L1 = left_iter.next() R0 = right_iter.next() R1 = right_iter.next() L2 = left_iter.next() L3 = left_iter.next() R2 = right_iter.next()
# -*- coding: utf-8 -*- from numap import NuMap from itertools import izip def left(element): print "in left: %s" % (element,) return element def right(element): print "in right: %s" % (element,) return element def root(element): print "in root: %s" % (element,) return element LEFT = ('left_0', 'left_1', 'left_2', 'left_3', 'left_4') RIGHT = ('right_0', 'right_1', 'right_2', 'right_3', 'right_4') nu_chain = NuMap() left_out = nu_chain.add_task(left, LEFT) right_out = nu_chain.add_task(right, RIGHT) root_out = nu_chain.add_task(root, izip(left_out, right_out)) nu_chain.start() results = tuple(root_out) assert results == (('left_0', 'right_0'), ('left_1', 'right_1'), ('left_2', 'right_2'), ('left_3', 'right_3'), ('left_4', 'right_4'))
# -*- coding: utf-8 -*- from numap import NuMap def source(element): print "in source: %s" % (element, ) return element def pipe(element): print "in pipe: %s" % (element, ) return element def sink(element): print "in sink: %s" % (element, ) return element ELEMENTS = ('element_0', 'element_1', 'element_2', 'element_3', 'element_4') nu_chain = NuMap() source_out = nu_chain.add_task(source, ELEMENTS) pipe_out = nu_chain.add_task(pipe, source_out) sink_out = nu_chain.add_task(sink, pipe_out) nu_chain.start() results = tuple(sink_out) assert results == ('element_0', 'element_1', 'element_2', 'element_3', 'element_4')
arr = inbox[0] arr.meta['timestamp'] = "%s_%s_%s@%s:%s:%s" % time.localtime()[0:6] return arr # Step 2 (wrapping function into workers) # wraps clean_seq and defines a specific sequence type and fixes cleaner = Worker(clean_seq, kwargs={'type': 'aa', 'fixes': [('.', '-')]}) # >>> arr = cleaner(['AGA.TA']) # wraps timestamp stamper = Worker(timestamp) # >>> arr = stamper([arr]) # Step 3 (representing computational resources) # creates a resource that allows to utilize all local processors local_computer = NuMap() # Step 4 (creating processing nodes) # this attaches a single computational resource to the two processing nodes # the stamper_node will be tracked i.e. it will store the results of computation # in memory. cleaner_node = Piper(cleaner, parallel=local_computer) stamper_node = Piper(stamper, parallel=local_computer, track=True) # Step 5 (constructing a workflow graph) # we construct a workflow graph add the two processing nodes and define the # connection between them. workflow = Plumber() workflow.add_pipe((cleaner_node, stamper_node)) # Step 6 (execute the workflow)
#!/usr/bin/env python # -*- coding: utf-8 -*- from numap import NuMap def source(element): print "in source: %s" % (element,) return element def pipe(element): print "in pipe: %s" % (element,) return element def sink(element): print "in sink: %s" % (element,) return element ELEMENTS = ('element_0', 'element_1', 'element_2', 'element_3', 'element_4') nu_chain = NuMap() source_out = nu_chain.add_task(source, ELEMENTS) pipe_out = nu_chain.add_task(pipe, source_out) sink_out = nu_chain.add_task(sink, pipe_out) nu_chain.start() results = tuple(sink_out) assert results == ('element_0', 'element_1', 'element_2', 'element_3', 'element_4')
def final(proc_dir, job1, jobn, n, stride, full_model, genome_idx, genome_seq, skip_mixcr, skip_cover): ## bamstat bamstat_cfg = { "id": "bamstat", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, "work_bamstat.sh"), "in": ("alig_csort", ), "out": ( ("main", "log"), ("idxstat", "txt"), ("flgstat", "txt"), ), "params": {} } virus_cfg = { "id": "virus", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, "work_virus.sh"), "in": ("alig_csort", ), "out": ( ("main", "log"), ("call", "txt"), ), "params": {} } gzip_cfg = { "id": "gzip", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, "work_gzip.sh"), "in": ( "sj", "junc_se", "junc_pe", ), "out": (("main", "log"), ), "params": {} } cram_cfg = { "id": "pack", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, "work_cram.sh"), "in": ( "alig_csort", "chim_pe_csort", "chim_se_csort", ), "out": ( ("main", "log"), ("alig_csort", "cram"), ("chim_pe_csort", "cram"), ("chim_se_csort", "cram"), ), "params": { "genome": genome_seq, "qbin": "2,10,20,25,30,35,40,42" } } cover_cfg = { "id": "cover", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "python2", "script": "%s/%s" % (WORK_DIR, "work_coverage.py"), "in": ( "alig_csort", "chim_pe_csort", "chim_se_csort", ), "out": ( ("main", "log"), ("alig_csort", "bw"), ("chim_pe_csort", "bw"), ("chim_se_csort", "bw"), ), "params": { "chrom_length": genome_idx / "chrNameLength.txt" } } mixcr_cfg = { "id": "mixcr", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, "work_mixcr.sh"), "in": ( "alig_csort", "unmap_1", "unmap_2", ), "out": ( ("main", "log"), ("aln.rep", "txt"), ("fix1.rep", "txt"), ("fix2.rep", "txt"), ("ext.rep", "txt"), ("asm.rep", "txt"), ("index", "bin"), ("alig", "vdjca"), ("clone", "clns"), ("dir", None), ), "params": { "p": n, "TRA": "chr14:21543538-22556037", "TRB": "chr7:142290000-142820000", "TRG": "chr7:38237000-38382000", "IGK": "chr2:88789991-90313268", "IGH": "chr14:105580000-106880000", "IGL": "chr22:22020000-22927000", } } ## topology pipeline = Plumber() map_job1 = NuMap(worker_num=job1, ordered=False, stride=stride, buffer=10000) map_jobn = NuMap(worker_num=jobn, ordered=False, stride=stride, buffer=10000) p1 = Piper(Worker(ipasser), parallel=map_job1) p2b = Piper(Worker(script, (cram_cfg, )), parallel=map_job1) p2c = Piper(Worker(script, (gzip_cfg, )), parallel=map_job1) p2d = Piper(Worker(script, (bamstat_cfg, )), parallel=map_job1) p2e = Piper(Worker(script, (virus_cfg, )), parallel=map_job1) p3 = Piper(Worker(npasser), parallel=map_job1) pipeline.add_pipe((p1, p2b, p3)) pipeline.add_pipe((p1, p2c, p3)) pipeline.add_pipe((p1, p2d, p3)) pipeline.add_pipe((p1, p2e, p3)) if not skip_mixcr: p2f = Piper(Worker(script, (mixcr_cfg, )), parallel=map_jobn) pipeline.add_pipe((p1, p2f, p3)) if not skip_cover: p2g = Piper(Worker(script, (cover_cfg, )), parallel=map_job1) pipeline.add_pipe((p1, p2g, p3)) return pipeline
def quant(proc_dir, job1, jobn, n, stride, unstranded, prot_model, full_model, linc_model): ## gene counting prot_cfg = { "id": "prot", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "python2", "script": "%s/work_featc.py" % WORK_DIR, "in": ("alig_nsort", ), "out": (("main", "log"), ("cts", "cts"), ("tmp", None)), "params": { "paired_end": True, "stranded": "0", # always unstranded "duplicates": "", # count duplicates "gtf": prot_model, "n": n, "xargs": "" } } full_cfg = { "id": "full", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "python2", "script": "%s/work_featc.py" % WORK_DIR, "in": ("alig_nsort", ), "out": (("main", "log"), ("cts", "cts"), ("tmp", None)), "params": { "paired_end": True, "stranded": "0" if unstranded else "1", # first read is on the transcript strand "duplicates": "", # count duplicates "gtf": full_model, "n": n, "xargs": "", } } linc_cfg = { "id": "linc", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "python2", "script": "%s/work_featc.py" % WORK_DIR, "in": ("alig_nsort", ), "out": (("main", "log"), ("cts", "cts"), ("tmp", None)), "params": { "paired_end": True, "stranded": "0" if unstranded else "1", # first read is on the transcript strand "duplicates": "", # count duplicates "gtf": linc_model, "n": n, "xargs": "" } } cfgs = [prot_cfg, full_cfg, linc_cfg] ## topology pipeline = Plumber() map_job1 = NuMap(worker_num=job1, ordered=False, stride=stride, buffer=10000) map_job4 = NuMap(worker_num=job1 / 4, ordered=False, stride=stride, buffer=10000) p1 = Piper(Worker(ipasser), parallel=map_job1) p2 = Piper(Worker(npasser), parallel=map_job1) for cfg in cfgs: p = Piper(Worker(script, (cfg, )), parallel=map_job4) pipeline.add_pipe((p1, p, p2)) return pipeline
def align(proc_dir, job1, jobn, memn, stride, n, unstranded, genome_idx, full_model, genome_seq, rrna_seq, merge_mode, star_mem, alig_star_params, chim_star_params, prune, keep_fastq): cutmrg_script = "work_bbcutmrg_pe.sh" if merge_mode == "bb" else "work_cutmrg_pe.sh" cutmrg_cfg = { "id": "cutmrg", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "bash", "script": "%s/%s" % (WORK_DIR, cutmrg_script), "in": ( "fastq1", "fastq2", ), "out": ( ("main", "log"), ("cut", "log"), ("mrg", "log"), ("cutfq1", "fq"), ("cutfq2", "fq"), ("mrgfq1", "fq"), ("mrgfq2", "fq"), ("mrgfq3", "fq"), ("isize", "txt"), ("stats", "txt"), ("dir", None), ), "params": { "min_len": 25, "cutxargs": "k=31 qskip=3 rieb=t tbo=t tpe=t", "mrgxargs": "k=31 prefilter=2 minoverlap=10 extend2=20 iterations=5", "seq1": "AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC", "seq2": "AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT", "rrna": rrna_seq, "prune": prune, "xmx": memn, "p": n, } } star_alig_params = { "keep_fastq": keep_fastq, "genomeDir": genome_idx, "runThreadN": n, "genomeLoad": star_mem, ## spliced alignment "outFilterType": "BySJout", "outSAMstrandField": "intronMotif" if unstranded else "None", "alignSJoverhangMin": 8, "alignSJDBoverhangMin": 3, "scoreGenomicLengthLog2scale": 0, "alignIntronMin": 20, "alignIntronMax": 1000000, "alignMatesGapMax": 1000000, } star_alig_params.update(alig_star_params) star_chim_params = { "keep_fastq": keep_fastq, "genomeDir": genome_idx, "runThreadN": n, "genomeLoad": star_mem, "outFilterType": "Normal", ## chimeric alignment "alignIntronMax": 150000, "alignMatesGapMax": 150000, "chimSegmentMin": 10, "chimJunctionOverhangMin": 1, "chimScoreSeparation": 0, "chimScoreJunctionNonGTAG": 0, "chimScoreDropMax": 1000, "chimScoreMin": 1, } star_chim_params.update(chim_star_params) star_alig_cfg = { "id": "alig", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "python2", "script": "%s/%s" % (WORK_DIR, "work_star_alig_pe.py"), "in": ( "cutfq1", "cutfq2", ), "out": ( ("main", "log"), ("dir", None), ), "params": star_alig_params } star_chim_cfg = { "id": "chim", "evaluator": EVALUATOR, "preamble": PREAMBLE, "dir": proc_dir, "executable": "python2", "script": "%s/%s" % (WORK_DIR, "work_star_chim_pe.py"), "in": ( "mrgfq1", "mrgfq2", "mrgfq3", ), "out": ( ("main", "log"), ("dir", None), ), "params": star_chim_params } pipeline = Plumber() map_job1 = NuMap(worker_num=job1, ordered=False, stride=stride, buffer=10000) map_jobn = NuMap(worker_num=jobn, ordered=False, stride=stride, buffer=10000) p1 = Piper(Worker(link_fq, (proc_dir, )), parallel=map_job1) p2 = Piper(Worker(script, (cutmrg_cfg, )), parallel=(map_jobn if merge_mode == "bb" else map_job1)) p3a = Piper(Worker(script, (star_alig_cfg, )), parallel=map_jobn) p3b = Piper(Worker(script, (star_chim_cfg, )), parallel=map_jobn) p4a = Piper(Worker(move_alig_star, (proc_dir, )), parallel=map_job1) p4b = Piper(Worker(move_chim_star, (proc_dir, )), parallel=map_job1) p5 = Piper(Worker(npasser), parallel=map_jobn) pipeline.add_pipe((p1, p2, p3a, p4a, p5)) pipeline.add_pipe((p1, p2, p3b, p4b, p5)) return pipeline
#!/usr/bin/env python # -*- coding: utf-8 -*- from numap import NuMap from time import sleep SLEEP = 'sleep' AWAKE = 'awake' def sleeper(element): if element == SLEEP: sleep(2) return element ELEMENTS = (AWAKE, SLEEP, AWAKE, AWAKE) print "if ordered results are in the order of the input" result_iterator = NuMap(sleeper, ELEMENTS, ordered=True) results = tuple(result_iterator) print 'got: ', results print "if not ordered results are in the order of the computation" result_iterator = NuMap(sleeper, ELEMENTS, ordered=False) results = tuple(result_iterator) print 'got: ', results
def resources(args): size, worker_num = args rsrc = NuMap(worker_num=worker_num) return rsrc
from papy.util.func import dump_item, load_item from numap import NuMap, imports from papy.core import Piper, Worker @imports(['os']) def upstream(inbox): up_pid = os.getpid() return str(up_pid) @imports(['os']) def downstream(inbox): up_pid = inbox[0] down_pid = os.getpid() return "%s->%s" % (up_pid, down_pid) host1 = NuMap() host2 = NuMap() up = Worker((upstream, dump_item)) dn = Worker((load_item, downstream)) up_ = Piper(up, parallel=host1) dn_ = Piper(dn, parallel=host2) up_([['hello', 'world', 'hi', 'folks']]) dn_([up_]) up_.start() dn_.start() print list(dn_)
SLEEP = "sleep" AWAKE = "awake" def sleeper(element): if element == SLEEP: sleep(2) return element ELEMENTS = (AWAKE, SLEEP, AWAKE, AWAKE) print "results, which timeout are **not** skipped" result_iterator = NuMap(sleeper, ELEMENTS, skip=False) print result_iterator.next(timeout=3) try: result_iterator.next(timeout=1) except TimeoutError: print "timout" print result_iterator.next(timeout=3) print result_iterator.next(timeout=3) print result_iterator.next(timeout=3) print "got 4 results\n" print "results, which timeout are skipped" result_iterator = NuMap(sleeper, ELEMENTS, skip=True) print result_iterator.next(timeout=3) try:
#!/usr/bin/env python # -*- coding: utf-8 -*- from numap import NuMap def hello_world(element, *args, **kwargs): print "Hello element: %s " % element, print "Hello args: %s" % (args, ), print "Hello kwargs: %s" % (kwargs, ) return element ELEMENTS = ('element_0', 'element_1', 'element_2', 'element_3', 'element_4') result_iterator = NuMap(hello_world, ELEMENTS, args=('arg_0', 'arg_1'), kwargs={ 'kwarg_0': 'val_0', 'kwarg_1': 'val_1' }) results = tuple(result_iterator) assert results == ('element_0', 'element_1', 'element_2', 'element_3', 'element_4')
def left(element): print "in left: %s" % (element, ) return element def right(element): print "in right: %s" % (element, ) return element def root(element): print "in root: %s" % (element, ) return element LEFT = ('left_0', 'left_1', 'left_2', 'left_3', 'left_4') RIGHT = ('right_0', 'right_1', 'right_2', 'right_3', 'right_4') nu_chain = NuMap() left_out = nu_chain.add_task(left, LEFT) right_out = nu_chain.add_task(right, RIGHT) root_out = nu_chain.add_task(root, izip(left_out, right_out)) nu_chain.start() results = tuple(root_out) assert results == (('left_0', 'right_0'), ('left_1', 'right_1'), ('left_2', 'right_2'), ('left_3', 'right_3'), ('left_4', 'right_4'))
from time import sleep SLEEP = 'sleep' AWAKE = 'awake' def sleeper(element): if element == SLEEP: sleep(2) return element ELEMENTS = (AWAKE, SLEEP, AWAKE, AWAKE) print "results, which timeout are **not** skipped" result_iterator = NuMap(sleeper, ELEMENTS, skip=False) print result_iterator.next(timeout=3) try: result_iterator.next(timeout=1) except TimeoutError: print 'timout' print result_iterator.next(timeout=3) print result_iterator.next(timeout=3) print result_iterator.next(timeout=3) print "got 4 results\n" print "results, which timeout are skipped" result_iterator = NuMap(sleeper, ELEMENTS, skip=True) print result_iterator.next(timeout=3) try: result_iterator.next(timeout=1)
#!/usr/bin/env python # -*- coding: utf-8 -*- from numap import NuMap def hello_world(element): print("Hello element: %s" % element) return element ELEMENTS = ('element_0', 'element_1', 'element_2', 'element_3', 'element_4') result_iterator = NuMap(hello_world, ELEMENTS) results = tuple(result_iterator) assert results == ('element_0', 'element_1', 'element_2', 'element_3', 'element_4')
from numap import NuMap from papy.core import Dagger, Piper def l33t(inbox): word = inbox[0] return word.replace('e', '3').replace('o', '0') def l33ter(inbox): word = inbox[0] return word.replace('l', '1') # execution endgine numap = NuMap() # function nodes l33t_piper = Piper(l33t, parallel=numap) l33ter_piper = Piper(l33ter, parallel=numap) # topology pipeline = Dagger() pipeline.add_pipe((l33t_piper, l33ter_piper)) end = pipeline.get_outputs()[0] # runtime pipeline.connect([['hello', 'world']]) pipeline.start() print list(end)
#!/usr/bin/env python # -*- coding: utf-8 -*- from numap import NuMap, imports #python /usr/lib/python2.6/site-packages/rpyc/servers/classic_server.py -m 'forking' @imports(['os']) def hello_remote(element, ): return "got%s" % element ELEMENTS = (0, 1, 2, 3, 4) result_iterator = NuMap(hello_remote, ELEMENTS, worker_num=0, \ worker_remote=[('localhost', 2)]) results = tuple(result_iterator) print results