def test_produce_replacement_messages(self): producer = FakeConfluentKafkaProducer() test_worker = ConsumerWorker( self.dataset, producer=producer, replacements_topic=Topic( enforce_table_writer(self.dataset) .get_stream_loader() .get_replacement_topic_spec() .topic_name ), metrics=self.metrics, ) test_worker.flush_batch( [ ProcessedMessage( action=ProcessorAction.REPLACE, data=[("1", {"project_id": 1})], ), ProcessedMessage( action=ProcessorAction.REPLACE, data=[("2", {"project_id": 2})], ), ] ) assert [(m._topic, m._key, m._value) for m in producer.messages] == [ ("event-replacements", b"1", b'{"project_id": 1}'), ("event-replacements", b"2", b'{"project_id": 2}'), ]
def eventstream(dataset_name): dataset = get_dataset(dataset_name) ensure_table_exists(dataset) record = json.loads(http_request.data) version = record[0] if version != 2: raise RuntimeError("Unsupported protocol version: %s" % record) message = KafkaMessage( TopicPartition('topic', 0), 0, http_request.data, ) type_ = record[1] metrics = DummyMetricsBackend() if type_ == 'insert': from snuba.consumer import ConsumerWorker worker = ConsumerWorker(dataset, producer=None, replacements_topic=None, metrics=metrics) else: from snuba.replacer import ReplacerWorker worker = ReplacerWorker(clickhouse_rw, dataset, metrics=metrics) processed = worker.process_message(message) if processed is not None: batch = [processed] worker.flush_batch(batch) return ('ok', 200, {'Content-Type': 'text/plain'})
def eventstream(*, dataset: Dataset): record = json.loads(http_request.data) version = record[0] if version != 2: raise RuntimeError("Unsupported protocol version: %s" % record) message: Message[KafkaPayload] = Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, http_request.data, []), datetime.now(), ) type_ = record[1] storage = dataset.get_writable_storage() assert storage is not None if type_ == "insert": from snuba.consumer import ConsumerWorker worker = ConsumerWorker(storage, metrics=metrics) else: from snuba.replacer import ReplacerWorker worker = ReplacerWorker(storage, metrics=metrics) processed = worker.process_message(message) if processed is not None: batch = [processed] worker.flush_batch(batch) return ("ok", 200, {"Content-Type": "text/plain"})
def test_offsets(self): event = self.event message: Message[KafkaPayload] = Message( Partition(Topic("events"), 456), 123, KafkaPayload( None, json.dumps((0, "insert", event)).encode("utf-8") ), # event doesn't really matter datetime.now(), ) test_worker = ConsumerWorker( self.dataset, producer=FakeConfluentKafkaProducer(), replacements_topic=Topic( enforce_table_writer(self.dataset) .get_stream_loader() .get_replacement_topic_spec() .topic_name ), metrics=self.metrics, ) batch = [test_worker.process_message(message)] test_worker.flush_batch(batch) assert self.clickhouse.execute( "SELECT project_id, event_id, offset, partition FROM %s" % self.table ) == [(self.event["project_id"], self.event["event_id"], 123, 456)]
def test_produce_replacement_messages(self): producer = FakeConfluentKafkaProducer() replacement_topic = enforce_table_writer( self.dataset).get_stream_loader().get_replacement_topic_spec() test_worker = ConsumerWorker(self.dataset, producer, replacement_topic.topic_name, self.metrics) test_worker.flush_batch([ ProcessedMessage( action=ProcessorAction.REPLACE, data=[('1', { 'project_id': 1 })], ), ProcessedMessage( action=ProcessorAction.REPLACE, data=[('2', { 'project_id': 2 })], ), ]) assert [(m._topic, m._key, m._value) for m in producer.messages] == \ [('event-replacements', b'1', b'{"project_id": 1}'), ('event-replacements', b'2', b'{"project_id": 2}')]
def test_produce_replacement_messages(self): topic = 'topic' producer = FakeKafkaProducer() test_worker = ConsumerWorker(self.clickhouse, self.table, producer, topic) test_worker.flush_batch([ (processor.REPLACE, ('1', {'project_id': 1})), (processor.REPLACE, ('2', {'project_id': 2})), ]) assert [(m._topic, m._key, m._value) for m in producer.messages] == \ [('topic', b'1', b'{"project_id": 1}'), ('topic', b'2', b'{"project_id": 2}')]
def test_produce_replacement_messages(self): producer = FakeKafkaProducer() test_worker = ConsumerWorker( self.dataset, producer, self.dataset.get_default_replacement_topic()) test_worker.flush_batch([ (self.dataset.get_processor().REPLACE, ('1', { 'project_id': 1 })), (self.dataset.get_processor().REPLACE, ('2', { 'project_id': 2 })), ]) assert [(m._topic, m._key, m._value) for m in producer.messages] == \ [('event-replacements', b'1', b'{"project_id": 1}'), ('event-replacements', b'2', b'{"project_id": 2}')]
def test_offsets(self): event = self.event class FakeMessage(object): def value(self): # event doesn't really matter return json.dumps((0, 'insert', event)) def offset(self): return 123 def partition(self): return 456 test_worker = ConsumerWorker(self.clickhouse, self.table, FakeKafkaProducer(), 'topic') batch = [test_worker.process_message(FakeMessage())] test_worker.flush_batch(batch) assert self.clickhouse.execute( "SELECT project_id, event_id, offset, partition FROM %s" % self.table ) == [(self.event['project_id'], self.event['event_id'], 123, 456)]
def test_offsets(self): event = self.event message = KafkaMessage( TopicPartition('events', 456), 123, json.dumps((0, 'insert', event)).encode('utf-8') # event doesn't really matter ) replacement_topic = enforce_table_writer( self.dataset).get_stream_loader().get_replacement_topic_spec() test_worker = ConsumerWorker(self.dataset, FakeConfluentKafkaProducer(), replacement_topic.topic_name, self.metrics) batch = [test_worker.process_message(message)] test_worker.flush_batch(batch) assert self.clickhouse.execute( "SELECT project_id, event_id, offset, partition FROM %s" % self.table) == [(self.event['project_id'], self.event['event_id'], 123, 456)]
def test_produce_replacement_messages(self): producer = FakeConfluentKafkaProducer() test_worker = ConsumerWorker( self.dataset.get_writable_storage(), producer=producer, replacements_topic=Topic( enforce_table_writer(self.dataset).get_stream_loader(). get_replacement_topic_spec().topic_name), metrics=self.metrics, ) test_worker.flush_batch([ ReplacementBatch("1", [{ "project_id": 1 }]), ReplacementBatch("2", [{ "project_id": 2 }]), ]) assert [(m._topic, m._key, m._value) for m in producer.messages] == [ ("event-replacements", b"1", b'{"project_id":1}'), ("event-replacements", b"2", b'{"project_id":2}'), ]
def eventstream(): record = json.loads(request.data) version = record[0] if version != 2: raise RuntimeError("Unsupported protocol version: %s" % record) class Message(object): def __init__(self, value): self._value = value def value(self): return self._value def partition(self): return None def offset(self): return None message = Message(request.data) type_ = record[1] if type_ == 'insert': from snuba.consumer import ConsumerWorker worker = ConsumerWorker(clickhouse_rw, settings.CLICKHOUSE_TABLE, producer=None, replacements_topic=None) else: from snuba.replacer import ReplacerWorker worker = ReplacerWorker(clickhouse_rw, settings.CLICKHOUSE_TABLE) processed = worker.process_message(message) if processed is not None: batch = [processed] worker.flush_batch(batch) return ('ok', 200, {'Content-Type': 'text/plain'})