Esempio n. 1
0
    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        test_worker = ConsumerWorker(
            self.dataset,
            producer=producer,
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )

        test_worker.flush_batch(
            [
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("1", {"project_id": 1})],
                ),
                ProcessedMessage(
                    action=ProcessorAction.REPLACE, data=[("2", {"project_id": 2})],
                ),
            ]
        )

        assert [(m._topic, m._key, m._value) for m in producer.messages] == [
            ("event-replacements", b"1", b'{"project_id": 1}'),
            ("event-replacements", b"2", b'{"project_id": 2}'),
        ]
Esempio n. 2
0
    def eventstream(dataset_name):
        dataset = get_dataset(dataset_name)
        ensure_table_exists(dataset)
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message = KafkaMessage(
            TopicPartition('topic', 0),
            0,
            http_request.data,
        )

        type_ = record[1]
        metrics = DummyMetricsBackend()
        if type_ == 'insert':
            from snuba.consumer import ConsumerWorker
            worker = ConsumerWorker(dataset,
                                    producer=None,
                                    replacements_topic=None,
                                    metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker
            worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ('ok', 200, {'Content-Type': 'text/plain'})
Esempio n. 3
0
    def eventstream(*, dataset: Dataset):
        record = json.loads(http_request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        message: Message[KafkaPayload] = Message(
            Partition(Topic("topic"), 0),
            0,
            KafkaPayload(None, http_request.data, []),
            datetime.now(),
        )

        type_ = record[1]

        storage = dataset.get_writable_storage()
        assert storage is not None

        if type_ == "insert":
            from snuba.consumer import ConsumerWorker

            worker = ConsumerWorker(storage, metrics=metrics)
        else:
            from snuba.replacer import ReplacerWorker

            worker = ReplacerWorker(storage, metrics=metrics)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ("ok", 200, {"Content-Type": "text/plain"})
Esempio n. 4
0
    def test_offsets(self):
        event = self.event

        message: Message[KafkaPayload] = Message(
            Partition(Topic("events"), 456),
            123,
            KafkaPayload(
                None, json.dumps((0, "insert", event)).encode("utf-8")
            ),  # event doesn't really matter
            datetime.now(),
        )

        test_worker = ConsumerWorker(
            self.dataset,
            producer=FakeConfluentKafkaProducer(),
            replacements_topic=Topic(
                enforce_table_writer(self.dataset)
                .get_stream_loader()
                .get_replacement_topic_spec()
                .topic_name
            ),
            metrics=self.metrics,
        )
        batch = [test_worker.process_message(message)]
        test_worker.flush_batch(batch)

        assert self.clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" % self.table
        ) == [(self.event["project_id"], self.event["event_id"], 123, 456)]
Esempio n. 5
0
    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        replacement_topic = enforce_table_writer(
            self.dataset).get_stream_loader().get_replacement_topic_spec()
        test_worker = ConsumerWorker(self.dataset, producer,
                                     replacement_topic.topic_name,
                                     self.metrics)

        test_worker.flush_batch([
            ProcessedMessage(
                action=ProcessorAction.REPLACE,
                data=[('1', {
                    'project_id': 1
                })],
            ),
            ProcessedMessage(
                action=ProcessorAction.REPLACE,
                data=[('2', {
                    'project_id': 2
                })],
            ),
        ])

        assert [(m._topic, m._key, m._value) for m in producer.messages] == \
            [('event-replacements', b'1', b'{"project_id": 1}'), ('event-replacements', b'2', b'{"project_id": 2}')]
Esempio n. 6
0
    def test_produce_replacement_messages(self):
        topic = 'topic'
        producer = FakeKafkaProducer()
        test_worker = ConsumerWorker(self.clickhouse, self.table, producer, topic)

        test_worker.flush_batch([
            (processor.REPLACE, ('1', {'project_id': 1})),
            (processor.REPLACE, ('2', {'project_id': 2})),
        ])

        assert [(m._topic, m._key, m._value) for m in producer.messages] == \
            [('topic', b'1', b'{"project_id": 1}'), ('topic', b'2', b'{"project_id": 2}')]
Esempio n. 7
0
    def test_produce_replacement_messages(self):
        producer = FakeKafkaProducer()
        test_worker = ConsumerWorker(
            self.dataset, producer,
            self.dataset.get_default_replacement_topic())

        test_worker.flush_batch([
            (self.dataset.get_processor().REPLACE, ('1', {
                'project_id': 1
            })),
            (self.dataset.get_processor().REPLACE, ('2', {
                'project_id': 2
            })),
        ])

        assert [(m._topic, m._key, m._value) for m in producer.messages] == \
            [('event-replacements', b'1', b'{"project_id": 1}'), ('event-replacements', b'2', b'{"project_id": 2}')]
Esempio n. 8
0
    def test_offsets(self):
        event = self.event

        class FakeMessage(object):
            def value(self):
                # event doesn't really matter
                return json.dumps((0, 'insert', event))

            def offset(self):
                return 123

            def partition(self):
                return 456

        test_worker = ConsumerWorker(self.clickhouse, self.table, FakeKafkaProducer(), 'topic')
        batch = [test_worker.process_message(FakeMessage())]
        test_worker.flush_batch(batch)

        assert self.clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" % self.table
        ) == [(self.event['project_id'], self.event['event_id'], 123, 456)]
Esempio n. 9
0
    def test_offsets(self):
        event = self.event

        message = KafkaMessage(
            TopicPartition('events', 456),
            123,
            json.dumps((0, 'insert',
                        event)).encode('utf-8')  # event doesn't really matter
        )

        replacement_topic = enforce_table_writer(
            self.dataset).get_stream_loader().get_replacement_topic_spec()
        test_worker = ConsumerWorker(self.dataset,
                                     FakeConfluentKafkaProducer(),
                                     replacement_topic.topic_name,
                                     self.metrics)
        batch = [test_worker.process_message(message)]
        test_worker.flush_batch(batch)

        assert self.clickhouse.execute(
            "SELECT project_id, event_id, offset, partition FROM %s" %
            self.table) == [(self.event['project_id'], self.event['event_id'],
                             123, 456)]
Esempio n. 10
0
    def test_produce_replacement_messages(self):
        producer = FakeConfluentKafkaProducer()
        test_worker = ConsumerWorker(
            self.dataset.get_writable_storage(),
            producer=producer,
            replacements_topic=Topic(
                enforce_table_writer(self.dataset).get_stream_loader().
                get_replacement_topic_spec().topic_name),
            metrics=self.metrics,
        )

        test_worker.flush_batch([
            ReplacementBatch("1", [{
                "project_id": 1
            }]),
            ReplacementBatch("2", [{
                "project_id": 2
            }]),
        ])

        assert [(m._topic, m._key, m._value) for m in producer.messages] == [
            ("event-replacements", b"1", b'{"project_id":1}'),
            ("event-replacements", b"2", b'{"project_id":2}'),
        ]
Esempio n. 11
0
    def eventstream():
        record = json.loads(request.data)

        version = record[0]
        if version != 2:
            raise RuntimeError("Unsupported protocol version: %s" % record)

        class Message(object):
            def __init__(self, value):
                self._value = value

            def value(self):
                return self._value

            def partition(self):
                return None

            def offset(self):
                return None

        message = Message(request.data)

        type_ = record[1]
        if type_ == 'insert':
            from snuba.consumer import ConsumerWorker
            worker = ConsumerWorker(clickhouse_rw, settings.CLICKHOUSE_TABLE, producer=None, replacements_topic=None)
        else:
            from snuba.replacer import ReplacerWorker
            worker = ReplacerWorker(clickhouse_rw, settings.CLICKHOUSE_TABLE)

        processed = worker.process_message(message)
        if processed is not None:
            batch = [processed]
            worker.flush_batch(batch)

        return ('ok', 200, {'Content-Type': 'text/plain'})