Example #1
0
def test_generate_new_offsets():
    offsets = [Offset('group_1', 'topic_1', 0, 100),
               Offset('group_1', 'topic_1', 1, 200),
               Offset('group_1', 'topic_1', 2, 500),
               Offset('group_1', 'topic_1', 3, 505)]
    partitions = [
        Partition('topic_1', 0, [Message(1, 'key_1', 'val_1', 123456789, None),
                                 Message(99, 'key_2', 'val_2', 123456789, None),
                                 Message(101, 'key_3', 'val_3', 123456789, None),
                                 Message(199, 'key_4', 'val_4', 123456789, None),
                                 Message(499, 'key_5', 'val_5', 123456789, None),]),
        Partition('topic_1', 1, [Message(1, 'key_1', 'val_1', 123456789, None),
                                 Message(99, 'key_2', 'val_2', 123456789, None),
                                 Message(101, 'key_3', 'val_3', 123456789, None),
                                 Message(199, 'key_4', 'val_4', 123456789, None),
                                 Message(499, 'key_5', 'val_5', 123456789, None),]),
        Partition('topic_1', 2, [Message(1, 'key_1', 'val_1', 123456789, None),
                                 Message(99, 'key_2', 'val_2', 123456789, None),
                                 Message(101, 'key_3', 'val_3', 123456789, None),
                                 Message(199, 'key_4', 'val_4', 123456789, None),
                                 Message(499, 'key_5', 'val_5', 123456789, None),]),
        Partition('topic_1', 3, [Message(1, 'key_1', 'val_1', 123456789, None),
                                 Message(99, 'key_2', 'val_2', 123456789, None),
                                 Message(101, 'key_3', 'val_3', 123456789, None),
                                 Message(199, 'key_4', 'val_4', 123456789, None),
                                 Message(499, 'key_5', 'val_5', 123456789, None),])
    ]
    new_offsets = generate_new_offsets(offsets, partitions)
    assert new_offsets == [Offset(consumer_group='group_1', topic='topic_1', partition=0, value=2),
                           Offset(consumer_group='group_1', topic='topic_1', partition=1, value=4),
                           Offset(consumer_group='group_1', topic='topic_1', partition=2, value=5),
                           Offset(consumer_group='group_1', topic='topic_1', partition=3, value=5)]
Example #2
0
def test_partition_from_file(tmpdir):
    partition = Partition(
        'topic_name_1', 1, [Message(1, 'Key', 'Message_1', 123456789)])
    file_path = os.path.join(tmpdir, 'topic_name_1_1.sqlite3')
    partition.to_file(file_path)
    from_file_partition = Partition.from_file(file_path)
    assert from_file_partition.topic == partition.topic
    assert from_file_partition.name == partition.name
    assert from_file_partition.messages == partition.messages
Example #3
0
 def backup(self):
     # Read topic messages from kafka broker
     if self.config.kafka_library == 'confluent':
         reader = ConfluentKafkaReader(self.config.brokers)
     else:
         reader = PythonKafkaReader(self.config.brokers)
     if self.config.ignore_missing_topics:
         logger.debug('Filter out topics that are not in Kafka broker')
         broker_topic_names = reader.list_topics()
         topics = []
         for t in self.config.topics:
             if t not in broker_topic_names:
                 logger.debug(f'Ignore topic {t} since it is '
                              'missing in kafka broker')
                 continue
             topics.append(t)
     else:
         topics = self.config.topics
     reader.subscribe(topics)
     msg_dict = reader.read(timeout=self.config.consumer_timeout)
     partitions = [
         Partition(topic, partition_no, msgs)
         for (topic, partition_no), msgs in msg_dict.items()
     ]
     # Fetch consumer group offsets
     admin_client = ConfluentAdminClient(self.config.brokers)
     offsets = admin_client.get_consumer_offsets(
         topics, no_of_threads=self.config.threads)
     # Write topic messages and consumer offsets to disk
     data_flow_manager = DataFlowManager(self.config.data)
     data_flow_manager.write(offsets, partitions)
Example #4
0
def test_partition_to_file(tmpdir):
    partition = Partition(
        'topic_name_1', 1, [Message(1, 'Key', 'Message_1', 123456789, [('key', b'val_1')])])
    file_path = os.path.join(tmpdir, 'topic_name_1_1.sqlite3')
    partition.to_file(file_path)
    conn = sqlite3.connect(file_path)
    cursor = conn.cursor()
    cursor.execute('SELECT * FROM metadata LIMIT 1')
    metadata = cursor.fetchone()
    assert metadata == ('topic_name_1', 1)
    cursor.execute('SELECT * FROM data')
    messages = cursor.fetchmany()
    msgs = [Message.from_row(*m) for m in messages]
    assert msgs
    assert len(msgs) == 1
    assert msgs == [Message(1, 'Key', 'Message_1', 123456789, [('key', b'val_1')])]
    conn.close()
Example #5
0
def test_data_flow_manager_write(tmpdir):
    data_flow_manager = DataFlowManager(tmpdir)
    offsets = [
        Offset('group_1', 'topic_1', 0, 10),
        Offset('group_1', 'topic_2', 0, 1)
    ]
    partitions = [
        Partition('topic_1', 0,
                  [Message(0, 'key_1', 'val_1', 123456789, None)]),
        Partition('topic_1', 1,
                  [Message(0, 'key_1', 'val_1', 123456789, None)]),
        Partition('topic_2', 0,
                  [Message(0, 'key_1', 'val_1', 123456789, None)]),
    ]
    data_flow_manager.write(offsets, partitions)
    assert os.path.isdir(os.path.join(tmpdir, 'partitions'))
    assert len(os.listdir(os.path.join(tmpdir, 'partitions'))) == 3
    assert os.path.isfile(os.path.join(tmpdir, 'offsets.sqlite3'))
Example #6
0
def test_data_flow_manager_read(tmpdir):
    data_flow_manager = DataFlowManager(tmpdir)
    offsets = [
        Offset('group_1', 'topic_1', 0, 10),
        Offset('group_1', 'topic_2', 0, 1)
    ]
    partitions = [
        Partition('topic_1', 0,
                  [Message(0, 'key_1', 'val_1', 123456789, None)]),
        Partition('topic_1', 1,
                  [Message(0, 'key_1', 'val_1', 123456789, None)]),
        Partition('topic_2', 0,
                  [Message(0, 'key_1', 'val_1', 123456789, None)]),
    ]
    data_flow_manager.write(offsets, partitions)
    from_disk_offsets, from_disk_partitions = data_flow_manager.read()
    assert from_disk_offsets == offsets
    assert from_disk_partitions == partitions
Example #7
0
 def read(
     self,
     topics: Optional[List[str]] = None
 ) -> Tuple[List[Offset], List[Partition]]:
     partitions = []
     for file_name in os.listdir(self.partition_file_dir):
         if not file_name.endswith('sqlite3'):
             continue
         file_path = os.path.join(self.partition_file_dir, file_name)
         partition = Partition.from_file(file_path)
         logger.debug(f'Read {len(partition.messages)} for topic: '
                      f'{partition.topic} partition: {partition.name} '
                      'from disk')
         # TODO: Consider using filename as filter or Write required
         # metadata about filename and topic/partition in one main sqlite3
         # file.
         if topics and partition.topic not in topics:
             continue
         partitions.append(partition)
     offset_manager = OffsetManager.from_file(self.offset_file_path)
     if topics:
         offsets = [o for o in offset_manager.offsets if o.topic in topics]
     offsets = offset_manager.offsets
     return (offsets, partitions)
Example #8
0
def test_partition():
    assert Partition('topic_name', 1, [])
Example #9
0
import os
import sqlite3
import pytest

from ksnap.message import Message
from ksnap.partition import Partition
from ksnap.offset import (Offset, OffsetManager, _calculate_new_offset,
                          _generate_partition_dict, generate_new_offsets)

PARTITIONS = Partition(
    'topic_1', 1, [Message(0, 'key_1', 'val_1', 123456789, None),
                   Message(99, 'key_2', 'val_2', 123456789, None),
                   Message(100, 'key_3', 'val_3', 123456789, None),
                   Message(200, 'key_4', 'val_4', 123456789, None),
                   Message(499, 'key_5', 'val_5', 123456789, None),])


@pytest.mark.parametrize(
    'curr_offset_val, partition, expected',
    [
        (100, PARTITIONS, 2),
        (1, PARTITIONS, 1),
        (500, PARTITIONS, 5),
        (600, PARTITIONS, 5),
    ]
)
def test_calculate_new_offset(curr_offset_val, partition, expected):
    new_offset = _calculate_new_offset(curr_offset_val, partition)
    assert new_offset == expected