def get_or_create(cls, name, url=None, interval=None, batchsize=1, **kwargs): # critical section # this may fail in concurrency situations from minibatch import connectdb try: connectdb(alias='minibatch', url=url, **kwargs) except Exception as e: warning("Stream setup resulted in {} {}".format(type(e), str(e))) try: stream = Stream.objects(name=name).no_cache().get() except Stream.DoesNotExist: try: stream = Stream(name=name or uuid4().hex, interval=interval, status=STATUS_OPEN).save() except NotUniqueError: pass stream = Stream.objects(name=name).no_cache().get() stream.batchsize = batchsize return stream
def producer(data): import os import time import random # sleep to simulate multiple time windows time.sleep(random.randrange(0, 1, 1) / 10.0) data.update({'pid': os.getpid()}) connectdb(alias='producer') stream_name = 'test' stream = Stream.get_or_create(stream_name) print("producing ... {}".format(data)) stream.append(data)
def clean(): db = connectdb() db.drop_collection('buffer') db.drop_collection('stream') db.drop_collection('window') db.drop_collection('processed') disconnect('minibatch')
def main(): print("setting up") clean() # setup mqtt source and producer mqtt_broker = 'mqtt://*****:*****@localhost' topic = 'TEST/MESSAGE' source = MQTTSource(mqtt_broker, topic) producer = MQTTSink(mqtt_broker, topic) # attach to the stream s = stream('test') s.attach(source) # set up a streaming function emitp = Process(target=consumer) emitp.start() # publish some messages print("publishing messages") for i in range(10): producer.put(dict(foo='bar', time=datetime.now().isoformat())) sleep(.1) # check we got the messages print("wait to receive all messages") sleep(3) db = connectdb() docs = list(doc for doc in db.processed.find()) print("processed items:", len(docs)) print(docs) emitp.terminate() source.disconnect() producer.disconnect()
def process(window): db = connectdb(url=url) # calculate average time t_delta it took for documents to be received since insertion dtnow = datetime.utcnow() t_delta = sum((dtnow - doc['dt']).microseconds for doc in window.data) / len(window.data) db.processed.insert_one(dict(delta=t_delta))
def myprocess(window): try: db = connectdb(url=url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) return window
def main(): print("setting up") clean() # setup mqtt source and producer url = 'mongodb://localhost/test' db = connectdb(url=url) source_coll = db['source'] sink_coll = db['processed'] source = MongoSource(source_coll) producer = MongoSink(sink_coll) # attach to the stream s = stream('test') s.attach(source) # set up a streaming function emitp = Process(target=consumer) emitp.start() # publish some messages print("publishing messages") for i in range(10): producer.put(dict(foo='bar', time=datetime.utcnow().isoformat())) sleep(.1) # check we got the messages print("wait to receive all messages") sleep(3) docs = list(doc for doc in sink_coll.find()) print("processed items:", len(docs)) print(docs) emitp.terminate()
def myprocess(window): try: db = connectdb(alias='consumer') print("consuming ... {}".format(window.data)) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) return window
def myprocess(window): logger.debug("*** processing") try: db = connectdb(url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: print(e) raise
def myprocess(window): logger.debug("*** processing {}".format(window.data)) from minibatch import connectdb try: sleepdot(5) db = connectdb(url=url) db.processed.insert_one({'data': window.data or {}}) except Exception as e: logger.error(e) raise return window
def main(): from multiprocessing import Pool, Process import time clean() emitp = Process(target=consumer) emitp.start() pool = Pool(4) data = [{'value': i} for i in range(0, 100)] pool.map(producer, data, 1) time.sleep(5) emitp.terminate() db = connectdb() print("processed items:") print(list(doc for doc in db.processed.find()))
def delete_database(url=None, dbname='test'): """ test support """ db = connectdb(url=url, dbname=dbname) db.client.drop_database(dbname) return db
def setUp(self): self.url = 'mongodb://localhost/test' delete_database(url=self.url) self.db = connectdb(url=self.url)
def process(window): db = connectdb(url=url) db.processed.insert(window.data)
def setUp(self): delete_database() self.om = Omega() db = self.om.datasets.mongodb self.url = self.om.mongo_url + '?authSource=admin' connectdb(url=self.url)
def emit(window): # this runs in a sep thread, so reconnect db db = connectdb(url) db.processed.insert(window.data)