Esempio n. 1
0
    def test_consumer(self):
        # we simply inject a mock KafkaConsumer into the KafkaSource
        # as we don't want to test KafkaConsumer but KafkaSource
        message = MagicMock()
        message.value = dict(foo='bar')
        source = KafkaSource('topic')
        consumer = MagicMock()
        consumer.__iter__.return_value = [message]
        source._consumer = consumer
        s = stream('test', url=self.url)
        s.attach(source)

        def consumer(q):
            url = str(self.url)

            @streaming('test', executor=LocalExecutor(), url=url, queue=q)
            def process(window):
                db = connectdb(url=url)
                db.processed.insert(window.data)

        q = Queue()
        p = Process(target=consumer, args=(q, ))
        p.start()
        sleep(1)
        q.put(True)
        p.join()

        docs = list(self.db.processed.find())
        self.assertEqual(len(docs), 1)
Esempio n. 2
0
def main():
    print("setting up")
    clean()
    # setup mqtt source and producer
    mqtt_broker = 'mqtt://*****:*****@localhost'
    topic = 'TEST/MESSAGE'
    source = MQTTSource(mqtt_broker, topic)
    producer = MQTTSink(mqtt_broker, topic)
    # attach to the stream
    s = stream('test')
    s.attach(source)
    # set up a streaming function
    emitp = Process(target=consumer)
    emitp.start()
    # publish some messages
    print("publishing messages")
    for i in range(10):
        producer.put(dict(foo='bar', time=datetime.now().isoformat()))
        sleep(.1)
    # check we got the messages
    print("wait to receive all messages")
    sleep(3)
    db = connectdb()
    docs = list(doc for doc in db.processed.find())
    print("processed items:", len(docs))
    print(docs)
    emitp.terminate()
    source.disconnect()
    producer.disconnect()
Esempio n. 3
0
        def test_sink(self):
            om = self.om
            db = self.db
            url = str(self.url)

            source = DatasetSource(om, 'stream-test')
            sink = DatasetSink(om, 'stream-sink')
            s = stream('test', url=url)
            s.attach(source)

            def emit(window):
                # this runs in a sep thread, so reconnect db
                db = connectdb(url)
                db.processed.insert(window.data)

            om.datasets.put({'foo': 'bar'}, 'stream-test')
            sleep(1)

            em = CountWindow('test', emitfn=emit, forwardfn=sink.put)
            em.run(blocking=False)
            sleep(1)
            s.stop()

            docs = list(db.processed.find())
            docs = list(om.datasets.collection('stream-sink').find())
            self.assertEqual(len(docs), 1)
Esempio n. 4
0
def main():
    print("setting up")
    clean()
    # setup mqtt source and producer
    url = 'mongodb://localhost/test'
    db = connectdb(url=url)
    source_coll = db['source']
    sink_coll = db['processed']
    source = MongoSource(source_coll)
    producer = MongoSink(sink_coll)
    # attach to the stream
    s = stream('test')
    s.attach(source)
    # set up a streaming function
    emitp = Process(target=consumer)
    emitp.start()
    # publish some messages
    print("publishing messages")
    for i in range(10):
        producer.put(dict(foo='bar', time=datetime.utcnow().isoformat()))
        sleep(.1)
    # check we got the messages
    print("wait to receive all messages")
    sleep(3)
    docs = list(doc for doc in sink_coll.find())
    print("processed items:", len(docs))
    print(docs)
    emitp.terminate()
Esempio n. 5
0
 def test_stream(self):
     """
     Test a stream writes to a buffer
     """
     from minibatch import stream
     om = self.om
     om.datasets.mongodb
     s = stream('test', url=self.url)
     s.append({'foo': 'bar1'})
     s.append({'foo': 'bar2'})
     count = len(list(doc for doc in Buffer.objects.all()))
     self.assertEqual(count, 2)
Esempio n. 6
0
 def test_source(self):
     # we simply inject a mock MQTTClient into the MQTTSource
     source = MQTTSource('localhost', 'TEST/#')
     client = MagicMock()
     client.loop_forever = lambda *args: sleep(10)
     source._client = client
     s = stream('test', url=self.url)
     s.attach(source)
     s.append = MagicMock()
     message = MagicMock()
     message.payload = json.dumps({'foo': 'bar'}).encode('utf-8')
     source.on_message(client, {}, message)
     s.append.assert_called()
     s.stop()
Esempio n. 7
0
    def _run_streaming_test(self, N, interval, timeout=10):
        # set up a source collection that we want to steram
        coll = self.db['test']
        source = MongoSource(coll, size=N)
        # attach to the stream
        s = stream('test', url=self.url)
        s.attach(source)

        # stream consumer
        def consumer(q, interval):
            url = str(self.url)

            @streaming('test',
                       size=interval,
                       executor=LocalExecutor(),
                       url=url,
                       queue=q)
            def process(window):
                db = connectdb(url=url)
                # calculate average time t_delta it took for documents to be received since insertion
                dtnow = datetime.utcnow()
                t_delta = sum((dtnow - doc['dt']).microseconds
                              for doc in window.data) / len(window.data)
                db.processed.insert_one(dict(delta=t_delta))

        # give it some input
        q = Queue()
        p = Process(target=consumer, args=(q, interval))
        p.start()

        for x in range(0, N, interval):
            docs = [{
                'foo': 'bar',
                'dt': datetime.utcnow()
            } for i in range(interval)]
            coll.insert_many(docs)
            sleep(1)

        sleep(timeout)
        s.stop()
        q.put(True)
        p.terminate()

        # check buffer is empty
        buffered_docs = list(Buffer.objects.filter())
        self.assertEqual(len(buffered_docs), 0)

        # return processed docs (in sink)
        docs = list(self.db.processed.find())
        return docs
Esempio n. 8
0
 def test_sink(self):
     # we simply inject a mock KafkaProducer into the KafkaSink
     s = stream('test', url=self.url)
     s.append(dict(foo='baz'))
     db = self.db
     sink_coll = db['processed']
     sink = MongoSink(sink_coll)
     em = make_emitter('test', url=self.url, sink=sink, emitfn=lambda v: v)
     t = Thread(target=em.run)
     t.start()
     sleep(1)
     em._stop = True
     docs = list(sink_coll.find())
     self.assertEqual(len(docs), 1)
Esempio n. 9
0
 def test_sink(self):
     # we simply inject a mock KafkaProducer into the KafkaSink
     s = stream('test', url=self.url)
     s.append(dict(foo='baz'))
     sink = KafkaSink('test')
     producer = MagicMock()
     sink._producer = producer
     # create a threaded emitter that we can stop
     em = make_emitter('test', url=self.url, sink=sink, emitfn=lambda v: v)
     t = Thread(target=em.run)
     t.start()
     sleep(1)
     em._stop = True
     # check the  sink got called and forward to the mock KafkaProducer
     producer.send.assert_called_with('test', value={'foo': 'baz'})
Esempio n. 10
0
        def test_timed_window_relaxed(self):
            """
            Test timed windows work ok
            """
            from minibatch import streaming

            def consumer(q, url):
                # note the stream decorator blocks the consumer and runs the decorated
                # function asynchronously upon the window criteria is satisfied
                om = Omega(mongo_url=url)

                @streaming('test',
                           interval=1,
                           keep=True,
                           url=url,
                           queue=q,
                           relaxed=True,
                           sink=DatasetSink(om, 'consumer'))
                def myprocess(window):
                    return {'myprocess': True, 'data': window.data}

            # start stream and consumer
            q = Queue()
            s = stream('test', url=self.url)
            proc = Process(target=consumer, args=(
                q,
                self.url,
            ))
            proc.start()
            # fill stream
            for i in range(10):
                s.append({'index': i})
                sleep(1)
            # give it some time to process
            sleep(2)
            q.put(True)
            proc.join()
            # expect at least 5 entries (10 x .5 = 5 seconds), each of length 1-2
            windows = list(
                doc for doc in self.om.datasets.collection('consumer').find())
            self.assertGreater(len(windows), 5)
            # most windows have one or more entries
            self.assertTrue(sum(len(w['data']) >= 1 for w in windows) >= 4)
            # all messages were processed
            self.assertEqual(sum(len(w['data']) for w in windows), 10)
Esempio n. 11
0
 def test_source(self):
     celeryapp = DummyCeleryApp()
     source = CeleryEventSource(celeryapp)
     s = stream('test', url=self.url)
     # mock stream append because sut is CeleryEventSource, not append
     s.append = MagicMock()
     # mock event source
     event = {
         'name': 'test',
         'uuid': '12345',
         'state': 'SUCCESS',
         'runtime': 1.0,
     }
     celeryapp.source = source
     celeryapp.dummy_events = [event]
     s.attach(source)
     source.stream(s)
     s.append.assert_called()
     s.stop()
Esempio n. 12
0
        def test_fixed_size(self):
            """
            Test batch windows of fixed sizes work ok
            """
            from minibatch import streaming, stream

            def consumer(q, url):
                # note the stream decorator blocks the consumer and runs the decorated
                # function asynchronously upon the window criteria is satisfied
                om = Omega(mongo_url=url)

                @streaming('test',
                           size=2,
                           url=url,
                           keep=True,
                           queue=q,
                           sink=DatasetSink(om, 'consumer'))
                def myprocess(window):
                    return {'myprocess': True, 'data': window.data}

            # start stream and consumer
            s = stream('test', url=self.url)
            q = Queue()
            proc = Process(target=consumer, args=(q, self.url))
            proc.start()
            # fill stream
            for i in range(10):
                s.append({'index': i})
            # give it some time to process
            sleep(5)
            q.put(True)
            proc.join()
            # expect 5 entries, each of length 2
            windows = list(
                doc for doc in self.om.datasets.collection('consumer').find())
            self.assertEqual(len(windows), 5)
            self.assertTrue(all(len(w['data']) == 2 for w in windows))