def test_consumer(self): # we simply inject a mock KafkaConsumer into the KafkaSource # as we don't want to test KafkaConsumer but KafkaSource message = MagicMock() message.value = dict(foo='bar') source = KafkaSource('topic') consumer = MagicMock() consumer.__iter__.return_value = [message] source._consumer = consumer s = stream('test', url=self.url) s.attach(source) def consumer(q): url = str(self.url) @streaming('test', executor=LocalExecutor(), url=url, queue=q) def process(window): db = connectdb(url=url) db.processed.insert(window.data) q = Queue() p = Process(target=consumer, args=(q, )) p.start() sleep(1) q.put(True) p.join() docs = list(self.db.processed.find()) self.assertEqual(len(docs), 1)
def main(): print("setting up") clean() # setup mqtt source and producer mqtt_broker = 'mqtt://*****:*****@localhost' topic = 'TEST/MESSAGE' source = MQTTSource(mqtt_broker, topic) producer = MQTTSink(mqtt_broker, topic) # attach to the stream s = stream('test') s.attach(source) # set up a streaming function emitp = Process(target=consumer) emitp.start() # publish some messages print("publishing messages") for i in range(10): producer.put(dict(foo='bar', time=datetime.now().isoformat())) sleep(.1) # check we got the messages print("wait to receive all messages") sleep(3) db = connectdb() docs = list(doc for doc in db.processed.find()) print("processed items:", len(docs)) print(docs) emitp.terminate() source.disconnect() producer.disconnect()
def test_sink(self): om = self.om db = self.db url = str(self.url) source = DatasetSource(om, 'stream-test') sink = DatasetSink(om, 'stream-sink') s = stream('test', url=url) s.attach(source) def emit(window): # this runs in a sep thread, so reconnect db db = connectdb(url) db.processed.insert(window.data) om.datasets.put({'foo': 'bar'}, 'stream-test') sleep(1) em = CountWindow('test', emitfn=emit, forwardfn=sink.put) em.run(blocking=False) sleep(1) s.stop() docs = list(db.processed.find()) docs = list(om.datasets.collection('stream-sink').find()) self.assertEqual(len(docs), 1)
def main(): print("setting up") clean() # setup mqtt source and producer url = 'mongodb://localhost/test' db = connectdb(url=url) source_coll = db['source'] sink_coll = db['processed'] source = MongoSource(source_coll) producer = MongoSink(sink_coll) # attach to the stream s = stream('test') s.attach(source) # set up a streaming function emitp = Process(target=consumer) emitp.start() # publish some messages print("publishing messages") for i in range(10): producer.put(dict(foo='bar', time=datetime.utcnow().isoformat())) sleep(.1) # check we got the messages print("wait to receive all messages") sleep(3) docs = list(doc for doc in sink_coll.find()) print("processed items:", len(docs)) print(docs) emitp.terminate()
def test_stream(self): """ Test a stream writes to a buffer """ from minibatch import stream om = self.om om.datasets.mongodb s = stream('test', url=self.url) s.append({'foo': 'bar1'}) s.append({'foo': 'bar2'}) count = len(list(doc for doc in Buffer.objects.all())) self.assertEqual(count, 2)
def test_source(self): # we simply inject a mock MQTTClient into the MQTTSource source = MQTTSource('localhost', 'TEST/#') client = MagicMock() client.loop_forever = lambda *args: sleep(10) source._client = client s = stream('test', url=self.url) s.attach(source) s.append = MagicMock() message = MagicMock() message.payload = json.dumps({'foo': 'bar'}).encode('utf-8') source.on_message(client, {}, message) s.append.assert_called() s.stop()
def _run_streaming_test(self, N, interval, timeout=10): # set up a source collection that we want to steram coll = self.db['test'] source = MongoSource(coll, size=N) # attach to the stream s = stream('test', url=self.url) s.attach(source) # stream consumer def consumer(q, interval): url = str(self.url) @streaming('test', size=interval, executor=LocalExecutor(), url=url, queue=q) def process(window): db = connectdb(url=url) # calculate average time t_delta it took for documents to be received since insertion dtnow = datetime.utcnow() t_delta = sum((dtnow - doc['dt']).microseconds for doc in window.data) / len(window.data) db.processed.insert_one(dict(delta=t_delta)) # give it some input q = Queue() p = Process(target=consumer, args=(q, interval)) p.start() for x in range(0, N, interval): docs = [{ 'foo': 'bar', 'dt': datetime.utcnow() } for i in range(interval)] coll.insert_many(docs) sleep(1) sleep(timeout) s.stop() q.put(True) p.terminate() # check buffer is empty buffered_docs = list(Buffer.objects.filter()) self.assertEqual(len(buffered_docs), 0) # return processed docs (in sink) docs = list(self.db.processed.find()) return docs
def test_sink(self): # we simply inject a mock KafkaProducer into the KafkaSink s = stream('test', url=self.url) s.append(dict(foo='baz')) db = self.db sink_coll = db['processed'] sink = MongoSink(sink_coll) em = make_emitter('test', url=self.url, sink=sink, emitfn=lambda v: v) t = Thread(target=em.run) t.start() sleep(1) em._stop = True docs = list(sink_coll.find()) self.assertEqual(len(docs), 1)
def test_sink(self): # we simply inject a mock KafkaProducer into the KafkaSink s = stream('test', url=self.url) s.append(dict(foo='baz')) sink = KafkaSink('test') producer = MagicMock() sink._producer = producer # create a threaded emitter that we can stop em = make_emitter('test', url=self.url, sink=sink, emitfn=lambda v: v) t = Thread(target=em.run) t.start() sleep(1) em._stop = True # check the sink got called and forward to the mock KafkaProducer producer.send.assert_called_with('test', value={'foo': 'baz'})
def test_timed_window_relaxed(self): """ Test timed windows work ok """ from minibatch import streaming def consumer(q, url): # note the stream decorator blocks the consumer and runs the decorated # function asynchronously upon the window criteria is satisfied om = Omega(mongo_url=url) @streaming('test', interval=1, keep=True, url=url, queue=q, relaxed=True, sink=DatasetSink(om, 'consumer')) def myprocess(window): return {'myprocess': True, 'data': window.data} # start stream and consumer q = Queue() s = stream('test', url=self.url) proc = Process(target=consumer, args=( q, self.url, )) proc.start() # fill stream for i in range(10): s.append({'index': i}) sleep(1) # give it some time to process sleep(2) q.put(True) proc.join() # expect at least 5 entries (10 x .5 = 5 seconds), each of length 1-2 windows = list( doc for doc in self.om.datasets.collection('consumer').find()) self.assertGreater(len(windows), 5) # most windows have one or more entries self.assertTrue(sum(len(w['data']) >= 1 for w in windows) >= 4) # all messages were processed self.assertEqual(sum(len(w['data']) for w in windows), 10)
def test_source(self): celeryapp = DummyCeleryApp() source = CeleryEventSource(celeryapp) s = stream('test', url=self.url) # mock stream append because sut is CeleryEventSource, not append s.append = MagicMock() # mock event source event = { 'name': 'test', 'uuid': '12345', 'state': 'SUCCESS', 'runtime': 1.0, } celeryapp.source = source celeryapp.dummy_events = [event] s.attach(source) source.stream(s) s.append.assert_called() s.stop()
def test_fixed_size(self): """ Test batch windows of fixed sizes work ok """ from minibatch import streaming, stream def consumer(q, url): # note the stream decorator blocks the consumer and runs the decorated # function asynchronously upon the window criteria is satisfied om = Omega(mongo_url=url) @streaming('test', size=2, url=url, keep=True, queue=q, sink=DatasetSink(om, 'consumer')) def myprocess(window): return {'myprocess': True, 'data': window.data} # start stream and consumer s = stream('test', url=self.url) q = Queue() proc = Process(target=consumer, args=(q, self.url)) proc.start() # fill stream for i in range(10): s.append({'index': i}) # give it some time to process sleep(5) q.put(True) proc.join() # expect 5 entries, each of length 2 windows = list( doc for doc in self.om.datasets.collection('consumer').find()) self.assertEqual(len(windows), 5) self.assertTrue(all(len(w['data']) == 2 for w in windows))