def parse_doctype_range(rangestr): """Return a list of the doctypes in the range specified expanded as a list of integers. This is used to validate arguments. The actual range strings are passed onto the superfastmatch server. >>> parse_doctype_range('1-2:5:7-9') [1, 2, 5, 7, 8, 9] >>> parse_doctype_range('') >>> parse_doctype_range('1') [1] >>> parse_doctype_range('7-7') [7] """ if not rangestr: raise Exception('Invalid doctype range ({0})'.format(rangestr)) split_on_hyphen = lambda s: s.split('-') def expand(rng): if len(rng) == 1: return int(rng[0]) elif len(rng) == 2: return range(int(rng[0]), int(rng[1]) + 1) else: raise Exception('Unrecognized range data type') return (stream.Stream(rangestr.split(':')) >> stream.map(split_on_hyphen) >> stream.map(expand) >> stream.flatten >> list)
def Gregory(type=float): """Return partial sums of the Gregory series converging to atan(1) == pi/4. Yield 1 - 1/3 + 1/5 - 1/7 + ... computed with the given type. """ return seq(type(1), step=2) >> map(lambda x: 1 / x) >> alt_sign >> fold( operator.add)
def run( self ): if self.type == "TRANSIENT": sink = item[:1] else: sink = min dataFilter = MovAvg(10) nop = map(myPrint1) elements = [nop, nop, nop, nop, nop] elements[0] = map(myPrint1) elements[1] = map(lambda x: dataFilter(x)) elements[2] = filter(lambda x: bigVals(x, 40)) elements[3] = map(notify_ctrl) self.myGen() >> elements[0] >> elements[1] >> elements[2] >> elements[3] >> elements[4] >> sink
def extract_sopr(options): if not os.path.exists(ORIG_DIR): mkdir_p(ORIG_DIR) if options.get('loglevel', None): log.setLevel(options['loglevel']) cache_paths = glob(os.path.join(CACHE_DIR, 'sopr/*/*/*.zip')) log.debug("cache paths ({num}):".format(num=len(cache_paths)) + "\n\t".join(cache_paths)) extracted = cache_paths >> filter(lambda x: check_ext(x, ext='.zip')) \ >> map(lambda p: translate_dir(p, from_dir=CACHE_DIR, to_dir=ORIG_DIR)) \ >> ThreadPool(extract_all_zips) for path, destination_dir, num_files in extracted: log.info("successfully extracted " + "{path} to {dest_dir} ({num} files)".format( path=path, dest_dir=destination_dir, num=num_files)) for url, exception in extracted.failure: log.error("extracting from {path} failed: {exception}".format( url=url, exception=exception))
def shutdown(poolclass, n): e = Executor(poolclass, map(lambda x: x*x), poolsize=2) e.submit(*range(n)) e.shutdown() print e.result >> list assert e.inputfeeder_thread.is_alive() == False assert e.resulttracker_thread.is_alive() == False assert e.failuretracker_thread.is_alive() == False
def shutdown(poolclass, n): e = Executor(poolclass, map(lambda x: x * x), poolsize=2) e.submit(*range(n)) e.shutdown() print e.result >> list assert e.inputfeeder_thread.is_alive() == False assert e.resulttracker_thread.is_alive() == False assert e.failuretracker_thread.is_alive() == False
def cancel(poolclass, n): e = Executor(poolclass, map(lambda x: x * x), poolsize=2) t1 = threading.Thread(target=lambda: e.submit(*range(n // 2))) t2 = threading.Thread(target=lambda: e.submit(*range(n // 2))) t1.start() t2.start() cancelled = e.cancel(*range(0, n, 2)) t1.join() t2.join() e.close() completed = len(e.result >> list) print completed, cancelled assert completed + cancelled == n
def cancel(poolclass, n): e = Executor(poolclass, map(lambda x: x*x), poolsize=2) t1 = threading.Thread(target=lambda: e.submit(*range(n//2))) t2 = threading.Thread(target=lambda: e.submit(*range(n//2))) t1.start() t2.start() cancelled = e.cancel(*range(0, n, 2)) t1.join() t2.join() e.close() completed = len(e.result >> list) print completed, cancelled assert completed + cancelled == n
def confirm_download_schedule(schedule): """Reports the total number of bytes and total number of files to download. Also lists the inaccessible files (based on HEAD response). Then asks user to confirm downloading. """ def content_length(tpl): return tpl[2][1] def status_code(tpl): return tpl[2][0] def href(tpl): return tpl[0] def is_OK(tpl): return status_code(tpl) == 200 def not_OK(tpl): return status_code(tpl) != 200 increment = lambda x, _: x + 1 file_count = ( schedule >> stream.filter(is_OK) >> stream.reduce(increment, 0)) bytes_to_download = ( schedule >> stream.filter(is_OK) >> stream.map(content_length) >> sum) inaccessible_files = (schedule >> stream.filter(not_OK) >> list) if len(inaccessible_files) > 0: print print "Some files are inaccessible:" for (idx, sched) in enumerate(inaccessible_files): print "%d: %d %s" % (idx, status_code(sched), href(sched)) if bytes_to_download > 0: print print "Need to download %s in %d files." % ( pretty_bytes(bytes_to_download), file_count) print print "Are you sure you want to continue? [Y/n]" user_input = raw_input("> ") return (user_input.upper() in ("", "Y", "YES")) else: print print "Nothing to download." return False
def confirm_download_schedule(schedule): """Reports the total number of bytes and total number of files to download. Also lists the inaccessible files (based on HEAD response). Then asks user to confirm downloading. """ def content_length(tpl): return tpl[2][1] def status_code(tpl): return tpl[2][0] def href(tpl): return tpl[0] def is_OK(tpl): return status_code(tpl) == 200 def not_OK(tpl): return status_code(tpl) != 200 increment = lambda x, _: x + 1 file_count = schedule >> stream.filter(is_OK) >> stream.reduce(increment, 0) bytes_to_download = schedule >> stream.filter(is_OK) >> stream.map(content_length) >> sum inaccessible_files = schedule >> stream.filter(not_OK) >> list if len(inaccessible_files) > 0: print print "Some files are inaccessible:" for (idx, sched) in enumerate(inaccessible_files): print "%d: %d %s" % (idx, status_code(sched), href(sched)) if bytes_to_download > 0: print print "Need to download %s in %d files." % (pretty_bytes(bytes_to_download), file_count) print print "Are you sure you want to continue? [Y/n]" user_input = raw_input("> ") return user_input.upper() in ("", "Y", "YES") else: print print "Nothing to download." return False
def __call__(self): """Use mutex to protect self.d.""" try: with QMutexLocker(self.master.mutex): #t = QTime() #t.start() #for node in self.master.tree.nodes: #if not self.stopped: #node.update(self.master.tree.query, self.query) #self.master.tree.query = self.query self.master.tree.update(self.query) #print(t.elapsed()) nodes = sorted(self.master.tree.nodes, key=lambda node: node())\ >> sm.map(lambda node: Runnable(node, self.master.worker))\ >> sm.item[:self.upper_bound] model = RunnableModel(nodes) self._finished = not self.stopped return None if self.stopped else model except Exception as e: print(e)
#define what we need to do moving averages weights = [1.0/win_len for i in range(win_len)] def inner(window): """ Computes the inner product of window and weights. weights must be defined outside to avoid a useless rezipping when using this in a stream. """ acc = sum((i*w for i,w in zip(window, weights))) return acc #get an infinite stream of uniform random floats zsource = repeatcall(rand.random) # WIRING # make our moving average window winstream = ( zsource >> chop(win_len) ) # compute the windowed average xstream = ( winstream >> stream.map(inner) ) # EXECUTING if view_len > 0: ts = time() for i in range(view_len): fp.write(str(next(xstream.iterator))+'\n') print("time: %f" % (time()-ts), file=sys.stderr) print("items_per_sec: %f" % (view_len/(time()-ts)), file=sys.stderr) if view_len < 0: while True: fp.write(str(next(xstream.iterator))+'\n')
user 0m7.046s sys 0m0.020s $ time python ./feeder.py -s # sequential real 0m13.072s user 0m7.596s sys 0m0.067s """ def blocking_producer(): for n in range(25): time.sleep(0.01) yield 42 if __name__ == '__main__': f = lambda x: x**x**3 import sys try: if sys.argv[1] == '-s': ## use a single thread blocking_producer() >> map(f) >> reduce(operator.add) elif sys.argv[1] == '-t': ## use a feeder in a separate thread ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add) except IndexError: ## use a feeder in a child process ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
def load_image(flatten=True): def load_image_inner(image_filename): return Bunch(image_filename=image_filename, image=imread(image_filename, flatten=flatten)) return stream.map(load_image_inner)
odd_ans = reduce(ops.add, odds ) static_time = time() - ts #streaming computation # create our filters cong_2 = lambda x: x%2==0 evens = filter(cong_2) odds = filter(lambda x: not cong_2(x)) ts = time() # wire the split into the filters instream >> tee(evens) instream >> odds # wire up the map and fold (scan/accumulate) foldedevens = (evens >> stream.map(math.sqrt) >> fold(ops.add)) print(time() - ts) sqrtodds = odds >> (stream.Processor(my_sqrt)) print("established the sqrter %f" % (time() - ts)) foldedodd = sqrtodds >> stream.fold(ops.add) print("made odd folder: %f" % (time() - ts)) # force execution soans = foldedodd >> item[-1:] print(soans) print(time() - ts) seans = foldedevens >> item[:] print(time() - ts) stream_time = time() - ts #print(even_ans) #print(seans >> item[:])
def test_ForkedFeeder(): result = ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add) pprint(result) assert result == expected
def submit(poolclass, n): e = Executor(poolclass, map(lambda x: x*x), poolsize=3) e.submit(*range(n)) e.close() assert sum(e.result) == result[n]
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) import stream ## Test scenario based on ../example/feeder.py def blocking_producer(): for n in range(25): time.sleep(0.01) yield 42 f = lambda x: x ** 2 expected = blocking_producer() >> stream.map(f) >> stream.reduce(operator.add) ## Test cases def test_ThreadedFeeder(): result = stream.ThreadedFeeder(blocking_producer) >> stream.map(f) >> stream.reduce(operator.add) pprint(result) assert result == expected def test_ForkedFeeder(): result = stream.ForkedFeeder(blocking_producer) >> stream.map(f) >> stream.reduce(operator.add) pprint(result) assert result == expected
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) import stream ## Test scenario based on ../example/feeder.py def blocking_producer(): for n in range(25): time.sleep(0.01) yield 42 f = lambda x: x**2 expected = blocking_producer() >> stream.map(f) >> stream.reduce(operator.add) ## Test cases def test_ThreadedFeeder(): result = stream.ThreadedFeeder(blocking_producer) >> stream.map( f) >> stream.reduce(operator.add) pprint(result) assert result == expected def test_ForkedFeeder(): result = stream.ForkedFeeder(blocking_producer) >> stream.map( f) >> stream.reduce(operator.add) pprint(result)
def test_ThreadedFeeder(): result = stream.ThreadedFeeder(blocking_producer) >> stream.map( f) >> stream.reduce(operator.add) pprint(result) assert result == expected
def alternating(n): values = [] for i in range(1, n + 1): values.append(i) values.append(-i) return values def randomized(n): values = [] for _ in range(n): values.append(randint(-sys.maxint, sys.maxint)) return values for v in [10, 100, 1000] >> stream.map(alternating): dataset.append(v) for v in [10, 100, 1000] >> stream.map(randomized): dataset.append(v) func = stream.filter(lambda x: x & 1) resultset = dataset >> stream.map(lambda s: s >> func >> set) >> list ## Test scenario def threadpool(i): result = dataset[i] >> stream.ThreadPool(func, poolsize=2) >> set pprint(result)
real 0m7.231s user 0m7.046s sys 0m0.020s $ time python ./feeder.py -s # sequential real 0m13.072s user 0m7.596s sys 0m0.067s """ def blocking_producer(): for n in range(25): time.sleep(0.01) yield 42 if __name__ == '__main__': f = lambda x: x**x**3 import sys try: if sys.argv[1] == '-s': ## use a single thread blocking_producer() >> map(f) >> reduce(operator.add) elif sys.argv[1] == '-t': ## use a feeder in a separate thread ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add) except IndexError: ## use a feeder in a child process ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add)
dataset = [] def alternating(n): values = [] for i in range(1, n+1): values.append(i) values.append(-i) return values def randomized(n): values = [] for _ in range(n): values.append(randint(-sys.maxint, sys.maxint)) return values for v in [10, 100, 1000] >> map(alternating): dataset.append(v) for v in [10, 100, 1000] >> map(randomized): dataset.append(v) func = filter(lambda x: x&1) resultset = dataset >> map(lambda s: s >> func >> set) >> list ## Test scenario def threadpool(i): result = dataset[i] >> ThreadPool(func, poolsize=2) >> set pprint(result)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from stream import ThreadedFeeder, ForkedFeeder, map, reduce ## Test scenario based on ../example/feeder.py def blocking_producer(): for n in range(25): time.sleep(0.01) yield 42 f = lambda x: x**2 expected = blocking_producer() >> map(f) >> reduce(operator.add) ## Test cases def test_ThreadedFeeder(): result = ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add) pprint(result) assert result == expected def test_ForkedFeeder(): result = ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add) pprint(result) assert result == expected
def submit(poolclass, n): e = Executor(poolclass, map(lambda x: x * x), poolsize=3) e.submit(*range(n)) e.close() assert sum(e.result) == result[n]
def Gregory(type=float): """Return partial sums of the Gregory series converging to atan(1) == pi/4. Yield 1 - 1/3 + 1/5 - 1/7 + ... computed with the given type. """ return seq(type(1), step=2) >> map(lambda x: 1/x) >> alt_sign >> fold(operator.add)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) from stream import ThreadedFeeder, ForkedFeeder, map, reduce ## Test scenario based on ../example/feeder.py def blocking_producer(): for n in range(25): time.sleep(0.01) yield 42 f = lambda x: x**2 expected = blocking_producer() >> map(f) >> reduce(operator.add) ## Test cases def test_ThreadedFeeder(): result = ThreadedFeeder(blocking_producer) >> map(f) >> reduce( operator.add) pprint(result) assert result == expected def test_ForkedFeeder(): result = ForkedFeeder(blocking_producer) >> map(f) >> reduce(operator.add) pprint(result) assert result == expected
def test_ForkedFeeder(): result = stream.ForkedFeeder(blocking_producer) >> stream.map(f) >> stream.reduce(operator.add) pprint(result) assert result == expected
def test_ThreadedFeeder(): result = ThreadedFeeder(blocking_producer) >> map(f) >> reduce(operator.add) pprint(result) assert result == expected