def myprocess(window): try: db = setup(url=self.url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) return window
def clean(): db = setup() db.drop_collection('buffer') db.drop_collection('stream') db.drop_collection('window') db.drop_collection('processed') disconnect('minibatch')
def myprocess(window): try: db = setup(alias='consumer') print("consuming ... {}".format(window.data)) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) return window
def get_or_create(cls, name, url=None, **kwargs): # critical section # this may fail in concurrency situations from minibatch import setup try: setup(alias='minibatch', url=url) except Exception as e: warning("Stream setup resulted in {}".format(str(e))) try: stream = Stream.objects(name=name).no_cache().get() except Stream.DoesNotExist: pass try: stream = Stream(name=name or uuid4().hex, status=STATUS_OPEN, **kwargs).save() except NotUniqueError: stream = Stream.objects(name=name).no_cache().get() return stream
def producer(data): import os import time import random # sleep to simulate multiple time windows time.sleep(random.randrange(0, 1, 1) / 10.0) data.update({'pid': os.getpid()}) db = setup(alias='producer') stream_name = 'test' stream = Stream.get_or_create(stream_name) print("producing ... {}".format(data)) stream.append(data)
def main(): from multiprocessing import Pool, Process import time clean() emitp = Process(target=consumer) emitp.start() pool = Pool(4) data = [{'value': i} for i in range(0, 100)] pool.map(producer, data, 1) time.sleep(5) emitp.terminate() db = setup() print("processed items:") print(list(doc for doc in db.processed.find()))
def setUp(self): self.url = 'mongodb://localhost/test' delete_database(url=self.url) self.db = setup(url=self.url)
def delete_database(url=None, dbname='test'): """ test support """ db = setup(url=url, dbname=dbname) db.client.drop_database(dbname) return db