Ejemplo n.º 1
0
    async def test_memory_checkpoint(self, *args):
        # *args pass through mock
        # first consumer
        checkpointer = MemoryCheckPointer(name="test")

        consumer_a = Consumer(
            stream_name=None,
            checkpointer=checkpointer,
            max_shard_consumers=1,
            endpoint_url=ENDPOINT_URL,
        )

        consumer_a.get_shard_iterator.return_value = True

        consumer_a.shards = [{
            "ShardId": "test-1",
            "SequenceNumberRange": {}
        }, {
            "ShardId": "test-2",
            "SequenceNumberRange": {}
        }]

        consumer_a.stream_status = consumer_a.ACTIVE
        consumer_a.shards_status = consumer_a.INITIALIZE

        await consumer_a.sync_shards()

        shards = [s["ShardId"] for s in consumer_a.shards if s.get("stats")]

        # Expect only one shard assigned as max = 1
        self.assertEqual(["test-1"], shards)

        # second consumer (note: max_shard_consumers needs to be 2 as uses checkpointer to get allocated shards)

        consumer_b = Consumer(
            stream_name=None,
            checkpointer=checkpointer,
            max_shard_consumers=2,
            endpoint_url=ENDPOINT_URL,
        )

        consumer_b.get_shard_iterator.return_value = True

        consumer_b.shards = [{
            "ShardId": "test-1",
            "SequenceNumberRange": {}
        }, {
            "ShardId": "test-2",
            "SequenceNumberRange": {}
        }]

        consumer_b.stream_status = consumer_b.ACTIVE
        consumer_b.shards_status = consumer_b.INITIALIZE

        await consumer_b.sync_shards()

        shards = [s["ShardId"] for s in consumer_b.shards if s.get("stats")]

        # Expect only one shard assigned as max = 2
        self.assertEqual(["test-1", "test-2"], shards)
Ejemplo n.º 2
0
    async def test_producer_and_consumer_consume_from_start_after(self):

        # Don't flush, close producer immediately to test all data is written to stream on exit.
        async with Producer(
                stream_name=self.stream_name,
                endpoint_url=ENDPOINT_URL,
                processor=StringProcessor(),
        ) as producer:
            # Put enough data to ensure it will require more than one put
            # ie test overflow behaviour
            for _ in range(15):
                await producer.put(self.random_string(100 * 1024))

        results = []

        async with Consumer(
                stream_name=self.stream_name,
                endpoint_url=ENDPOINT_URL,
                processor=StringProcessor(),
        ) as consumer:
            async for item in consumer:
                results.append(item)

        # Expect to have consumed from start as default iterator_type=TRIM_HORIZON
        self.assertEquals(len(results), 15)
Ejemplo n.º 3
0
    async def test_producer_and_consumer_consume_with_msgpack_aggregator(self):

        processor = MsgpackProcessor()

        async with Producer(stream_name=self.stream_name,
                            endpoint_url=ENDPOINT_URL,
                            processor=processor) as producer:

            for x in range(0, 10):
                await producer.put({"test": x})

            await producer.flush()

            results = []

            async with Consumer(
                    stream_name=self.stream_name,
                    endpoint_url=ENDPOINT_URL,
                    processor=processor,
            ) as consumer:
                async for item in consumer:
                    results.append(item)

            # Expect to have consumed from start as default iterator_type=TRIM_HORIZON

            self.assertEqual(len(results), 10)

            self.assertEquals(results[0], {"test": 0})
            self.assertEquals(results[-1], {"test": 9})
Ejemplo n.º 4
0
    async def test_producer_and_consumer_consume_throttle(self):
        async with Producer(stream_name=self.stream_name,
                            endpoint_url=ENDPOINT_URL) as producer:

            for i in range(0, 100):
                await producer.put("test")

            await producer.flush()

            results = []

            async with Consumer(
                    stream_name=self.stream_name,
                    endpoint_url=ENDPOINT_URL,
                    record_limit=10,
                    # 2 per second
                    shard_fetch_rate=2,
            ) as consumer:

                from datetime import datetime

                dt = datetime.now()

                while (datetime.now() - dt).total_seconds() < 3.05:
                    async for item in consumer:
                        results.append(item)

            # Expect 2*3*10 = 60  ie at most 6 iterations of 10 records
            self.assertGreaterEqual(len(results), 50)
            self.assertLessEqual(len(results), 70)
Ejemplo n.º 5
0
    async def test_producer_producer_limit(self):
        # Expect some throughput errors

        async with Producer(
                stream_name=self.STREAM_NAME_SINGLE_SHARD,
                processor=StringProcessor(),
                put_bandwidth_limit_per_shard=1500,
        ) as producer:

            async with Consumer(
                    stream_name=self.STREAM_NAME_SINGLE_SHARD,
                    processor=StringProcessor(),
                    iterator_type="LATEST",
            ) as consumer:

                await consumer.start_consumer()

                # Wait a bit just to be sure iterator is gonna get late
                await asyncio.sleep(3)

                for x in range(20):
                    await producer.put(self.random_string(1024 * 250))

                # todo: async timeout
                output = []
                while len(output) < 20:
                    async for item in consumer:
                        output.append(item)

                self.assertEquals(len(output), 20)
                self.assertTrue(producer.throughput_exceeded_count > 0)
Ejemplo n.º 6
0
    async def test_producer_and_consumer(self):

        async with Producer(stream_name=self.stream_name,
                            endpoint_url=ENDPOINT_URL) as producer:
            pass

            async with Consumer(stream_name=self.stream_name,
                                endpoint_url=ENDPOINT_URL):
                pass
Ejemplo n.º 7
0
    async def test_producer_and_consumer(self):

        async with Producer(
            stream_name=self.stream_name, endpoint_url=ENDPOINT_URL
        ) as producer:
            await producer.create_stream(shards=1)

            async with Consumer(
                stream_name=self.stream_name, endpoint_url=ENDPOINT_URL
            ):
                pass
Ejemplo n.º 8
0
    async def test_memory_checkpoint(self):
        # first consumer
        checkpointer = MemoryCheckPointer(name="test")

        consumer_a = Consumer(stream_name=None,
                              checkpointer=checkpointer,
                              max_shard_consumers=1)

        self.patch_consumer_fetch(consumer_a)

        consumer_a.shards = [{"ShardId": "test-1"}, {"ShardId": "test-2"}]

        await consumer_a.fetch()

        shards = [s["ShardId"] for s in consumer_a.shards if s.get("stats")]

        # Expect only one shard assigned as max = 1
        self.assertEqual(["test-1"], shards)

        # second consumer (note: max_shard_consumers needs to be 2 as uses checkpointer to get allocated shards)

        consumer_b = Consumer(stream_name=None,
                              checkpointer=checkpointer,
                              max_shard_consumers=2)

        self.patch_consumer_fetch(consumer_b)

        consumer_b.shards = [{"ShardId": "test-1"}, {"ShardId": "test-2"}]

        await consumer_b.fetch()

        shards = [s["ShardId"] for s in consumer_b.shards if s.get("stats")]

        # Expect only one shard assigned as max = 1
        self.assertEqual(["test-2"], shards)
Ejemplo n.º 9
0
    async def test_stream_does_not_exist(self):

        await asyncio.sleep(2)

        # Producer
        with self.assertRaises(exceptions.StreamDoesNotExist):
            async with Producer(stream_name="test_stream_does_not_exist",
                                endpoint_url=ENDPOINT_URL) as producer:
                await producer.put("test")

        # Consumer
        with self.assertRaises(exceptions.StreamDoesNotExist):
            async with Consumer(stream_name="test_stream_does_not_exist",
                                endpoint_url=ENDPOINT_URL):
                pass
Ejemplo n.º 10
0
    async def test_producer_and_consumer_consume_from_start_flush(self):
        async with Producer(stream_name=self.stream_name,
                            endpoint_url=ENDPOINT_URL) as producer:
            await producer.put({"test": 123})

            await producer.flush()

            results = []

            async with Consumer(stream_name=self.stream_name,
                                endpoint_url=ENDPOINT_URL) as consumer:
                async for item in consumer:
                    results.append(item)

            # Expect to have consumed from start as default iterator_type=TRIM_HORIZON
            self.assertEquals([{"test": 123}], results)
Ejemplo n.º 11
0
    async def test_producer_and_consumer_consume_with_bytes(self):
        class ByteSerializer(Serializer):
            def serialize(self, msg):
                result = str.encode(msg)
                return result

            def deserialize(self, data):
                return data

        class ByteProcessor(Processor, NetstringAggregator, ByteSerializer):
            pass

        processor = ByteProcessor()

        async with Producer(
            stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, processor=processor
        ) as producer:

            for x in range(0, 2):
                await producer.put(f"{x}")

            await producer.flush()

            results = []

            checkpointer = MemoryCheckPointer(name="test")

            async with Consumer(
                stream_name=self.stream_name,
                endpoint_url=ENDPOINT_URL,
                processor=processor,
                checkpointer=checkpointer,
            ) as consumer:
                async for item in consumer:
                    results.append(item)
                    await checkpointer.checkpoint(
                        shard_id=consumer.shards[0]["ShardId"], sequence="seq"
                    )

                async for item in consumer:
                    results.append(item)

            self.assertEquals(len(results), 2)

            await checkpointer.close()

            self.assertEquals(len(checkpointer.get_all_checkpoints()), 1)
Ejemplo n.º 12
0
    async def test_producer_and_consumer_consume_multiple_shards_with_redis_checkpointer(
        self,
    ):
        stream_name = "test_{}".format(str(uuid.uuid4())[0:8])
        async with Producer(
            stream_name=stream_name,
            endpoint_url=ENDPOINT_URL,
            create_stream=stream_name,
            create_stream_shards=2,
        ) as producer:

            for i in range(0, 100):
                await producer.put("test.{}".format(i))

            await producer.flush()

            results = []

            checkpointer = RedisCheckPointer(
                name="test-{}".format(str(uuid.uuid4())[0:8]), heartbeat_frequency=3
            )

            async with Consumer(
                stream_name=stream_name,
                endpoint_url=ENDPOINT_URL,
                checkpointer=checkpointer,
                record_limit=10,
            ) as consumer:

                # consumer will stop if no msgs
                for i in range(0, 6):
                    async for item in consumer:
                        results.append(item)
                    await asyncio.sleep(0.5)

                self.assertEquals(100, len(results))

                checkpoints = checkpointer.get_all_checkpoints()

                self.assertEquals(2, len(checkpoints))

                # Expect both shards to have been used/set
                for item in checkpoints.values():
                    self.assertIsNotNone(item)
Ejemplo n.º 13
0
    async def test_consumer_consume_fetch_limit(self):

        async with Consumer(stream_name=self.STREAM_NAME_SINGLE_SHARD,
                            sleep_time_no_records=0.0001,
                            shard_fetch_rate=500,
                            iterator_type="LATEST") as consumer:
            await consumer.start()

            # GetShardIterator has a limit of five transactions per second per account per open shard

            for i in range(0, 500):
                await consumer.fetch()
                # sleep 50ms
                await asyncio.sleep(0.05)

            shard_stats = [s["stats"] for s in consumer.shards][0].to_data()

            self.assertTrue(shard_stats["throttled"] > 0,
                            msg="Expected to be throttled")
Ejemplo n.º 14
0
    async def test_producer_and_consumer_consume_queue_full(self):
        async with Producer(stream_name=self.stream_name,
                            endpoint_url=ENDPOINT_URL) as producer:

            for i in range(0, 100):
                await producer.put("test")

            await producer.flush()

            results = []

            async with Consumer(
                    stream_name=self.stream_name,
                    endpoint_url=ENDPOINT_URL,
                    max_queue_size=20,
            ) as consumer:

                async for item in consumer:
                    results.append(item)

            # Expect 20 only as queue is full and we don't wait on queue
            self.assertEqual(20, len(results))
Ejemplo n.º 15
0
    async def test_consumer_checkpoint(self):

        checkpointer = MemoryCheckPointer(name="test")

        results = []

        async with Producer(
                stream_name=self.STREAM_NAME_SINGLE_SHARD,
                processor=StringProcessor(),
        ) as producer:

            async with Consumer(
                    stream_name=self.STREAM_NAME_SINGLE_SHARD,
                    checkpointer=checkpointer,
                    processor=StringProcessor(),
                    iterator_type="LATEST",
            ) as consumer:

                # Manually start
                await consumer.start_consumer()

                await producer.put("test")

                await producer.flush()

                for i in range(3):
                    async for item in consumer:
                        results.append(item)

            checkpoints = checkpointer.get_all_checkpoints()

            # Expect 1 as only 1 shard
            self.assertEquals(1, len(checkpoints))

            self.assertIsNotNone(checkpoints[list(
                checkpoints.keys())[0]]["sequence"])

            self.assertListEqual(results, ["test"])
Ejemplo n.º 16
0
async def test_producer(data, processor):
    log.info("Testing with {}".format(processor.__class__.__name__))
    async with Producer(stream_name="test",
                        processor=processor,
                        max_queue_size=100000) as producer:

        await producer.create_stream(shards=1, ignore_exists=True)

        async with Consumer(
                stream_name="test",
                processor=processor,
                max_queue_size=100000,
                iterator_type="LATEST",
        ) as consumer:

            # ensure set up before producer puts records as using LATEST
            await consumer.start_consumer(wait_iterations=0)

            with Timer() as t:
                for item in data:
                    await producer.put(item)
                await producer.flush()

            total = 0
            while total < len(data):
                async for _ in consumer:
                    total += 1

    if len(data) != total:
        log.error("Failed to read all records.. expected {} read {}".format(
            len(data), total))
        return False, None

    log.info("Completed {} records (read: {}) in {} seconds".format(
        len(data), total, round(t.elapsed, 2)))

    return True, round(t.elapsed, 2)
Ejemplo n.º 17
0
    async def test_producer_and_consumer_consume_with_checkpointer_and_latest(
            self):
        async with Producer(stream_name=self.stream_name,
                            endpoint_url=ENDPOINT_URL) as producer:

            await producer.put("test.A")

            results = []

            checkpointer = MemoryCheckPointer(name="test")

            async with Consumer(
                    stream_name=self.stream_name,
                    endpoint_url=ENDPOINT_URL,
                    checkpointer=checkpointer,
                    iterator_type="LATEST",
            ) as consumer:

                async for item in consumer:
                    results.append(item)

            # Expect none as LATEST
            self.assertEquals([], results)

            checkpoints = checkpointer.get_all_checkpoints()

            # Expect 1 as only 1 shard
            self.assertEquals(1, len(checkpoints))

            # none as no records yet (using LATEST)
            self.assertIsNone(checkpoints[list(
                checkpoints.keys())[0]]["sequence"])

            results = []

            log.info("checkpointer checkpoints: {}".format(checkpoints))

            log.info("Starting consumer again..")

            async with Consumer(
                    stream_name=self.stream_name,
                    endpoint_url=ENDPOINT_URL,
                    checkpointer=checkpointer,
                    iterator_type="LATEST",
                    sleep_time_no_records=0.5,
            ) as consumer:

                # Manually start
                await consumer.start_consumer()

                await producer.put("test.B")

                await producer.flush()

                log.info("waiting..")

                await asyncio.sleep(1)

                log.info("about to consume..")

                async for item in consumer:
                    results.append(item)

            self.assertEquals(["test.B"], results)

            checkpoints = checkpointer.get_all_checkpoints()

            log.info("checkpointer checkpoints: {}".format(checkpoints))

            # expect not None as has processed records
            self.assertIsNotNone(checkpoints[list(
                checkpoints.keys())[0]]["sequence"])

            # now add some records
            for i in range(0, 10):
                await producer.put("test.{}".format(i))

            await producer.flush()

            await asyncio.sleep(1)

            results = []

            async with Consumer(
                    stream_name=self.stream_name,
                    endpoint_url=ENDPOINT_URL,
                    checkpointer=checkpointer,
                    iterator_type="LATEST",
                    sleep_time_no_records=0.5,
            ) as consumer:

                async for item in consumer:
                    results.append(item)

            # Expect results as checkpointer resumed from prior sequence
            self.assertEquals(10, len(results))
Ejemplo n.º 18
0
    async def test_resharding(self):

        stream_name = self.STREAM_NAME_SINGLE_SHARD

        # Create stream with 2x shards. Add some records
        async with Producer(stream_name=stream_name,
                            shard_refresh_timer=15) as producer:

            for i in range(0, 50):
                await producer.put("test.{}".format(i))

            await producer.flush()

            results = []

            checkpointer = RedisCheckPointer(name="test-{}".format(
                str(uuid.uuid4())[0:8]),
                                             heartbeat_frequency=3)

            async with Consumer(
                    stream_name=stream_name,
                    checkpointer=checkpointer,
                    record_limit=5,
                    # Limit the queue so there records will remain in the shards
                    max_queue_size=5,
                    shard_refresh_timer=15) as consumer:

                for i in range(0, 3):
                    async for item in consumer:
                        results.append(item)
                    await asyncio.sleep(0.5)

                log.info(f"Consumed {len(results)} records")

                # Start reshard operation
                # TODO: Producer not writing to new shards, shards are being found and checkpointed
                await producer.client.update_shard_count(
                    StreamName=stream_name,
                    TargetShardCount=2,
                    ScalingType='UNIFORM_SCALING')

                await self.describe_stream(client=producer.client,
                                           stream_name=stream_name)

                await asyncio.sleep(1)

                await self.describe_stream(client=producer.client,
                                           stream_name=stream_name)

                # Now add some more records

                for i in range(50, 100):
                    await producer.put("test.{}".format(i))

                await producer.flush()

                await asyncio.sleep(10)

                for i in range(0, 20):
                    async for item in consumer:
                        results.append(item)
                    await asyncio.sleep(2)

                log.info(f"Consumed {len(results)} records")

                assert len(results) == 100
Ejemplo n.º 19
0
    async def test_resharding(self, *args):
        # *args pass through mock

        stream_name = "test_{}".format(str(uuid.uuid4())[0:8])

        # Create stream with 2x shards. Add some records
        async with Producer(stream_name=stream_name,
                            endpoint_url=ENDPOINT_URL,
                            create_stream=stream_name,
                            create_stream_shards=2,
                            shard_refresh_timer=15) as producer:

            for i in range(0, 50):
                await producer.put("test.{}".format(i))

            await producer.flush()

            results = []

            checkpointer = RedisCheckPointer(name="test-{}".format(
                str(uuid.uuid4())[0:8]),
                                             heartbeat_frequency=3)

            async with Consumer(
                    stream_name=stream_name,
                    endpoint_url=ENDPOINT_URL,
                    checkpointer=checkpointer,
                    record_limit=5,
                    # Limit the queue so there records will remain in the shards
                    max_queue_size=5,
                    shard_refresh_timer=15) as consumer:

                for i in range(0, 3):
                    async for item in consumer:
                        results.append(item)
                    await asyncio.sleep(0.5)

                log.info(f"Consumed {len(results)} records")

                # Start reshard operation
                await producer.client.update_shard_count(
                    StreamName=stream_name,
                    TargetShardCount=4,
                    ScalingType='UNIFORM_SCALING')

                await self.describe_stream(client=producer.client,
                                           stream_name=stream_name)

                await asyncio.sleep(1)

                await self.describe_stream(client=producer.client,
                                           stream_name=stream_name)

                # Now add some more records

                for i in range(50, 100):
                    await producer.put("test.{}".format(i))

                await producer.flush()

                for i in range(0, 10):
                    async for item in consumer:
                        results.append(item)
                    await asyncio.sleep(0.5)

                log.info(f"Consumed {len(results)} records")

                assert len(results) == 100