Example #1
0
class AbstractConsumer(ABC):
    def __init__(self, group_id: str, topic_name: str, last: bool):
        offset_reset = "earliest"
        if last:
            offset_reset = "latest"

        self._config = Config().create_confluent_config()
        self._config.update({
            "group.id": group_id,
            "error_cb": raise_for_kafka_error,
            # We need to commit offsets manually once we"re sure it got saved
            # to the sink
            "enable.auto.commit": True,
            "enable.partition.eof": False,
            # We need this to start at the last committed offset instead of the
            # latest when subscribing for the first time
            "default.topic.config": {
                "auto.offset.reset": offset_reset
            },
        })
        self._consumer = confluent_kafka.Consumer(self._config)
        self._subscribe(topic_name)

    def _subscribe(self, topic: str) -> None:
        self._consumer.subscribe([topic])

    @abstractmethod
    def consume(self, amount: int) -> int:
        pass

    def _consume_single_message(self, timeout=30) -> Optional[Message]:
        message = self._consumer.poll(timeout=timeout)
        raise_for_message(message)
        return message
Example #2
0
def test_kafka_python_config(config: Config):
    config.context_switch("context_5")
    expected_config = {
        "bootstrap_servers":
        ["kafka:9094", "kafka1:9094", "kafka2:9094", "kafka3:9094"],
        "security_protocol":
        "SASL_SSL",
        "sasl_mechanism":
        "PLAIN",
        "sasl_plain_username":
        "******",
        "sasl_plain_password":
        "******",
        "ssl_cafile":
        "/my/ca.crt",
        "ssl_certfile":
        "/my/certificate.crt",
        "ssl_keyfile":
        "/my/certificate.key",
        "ssl_password":
        "******",
    }

    actual_config = config.create_kafka_python_config()
    assert expected_config == actual_config
Example #3
0
def test_pykafka_config(mocker: mock, config: Config):
    ssl_config_sentinel = mock.sentinel.ssl_config
    ssl_config_mock = mocker.patch("esque.config.SslConfig", return_value=ssl_config_sentinel)
    plain_authenticator_sentinel = mock.sentinel.plain_authenticator
    plain_authenticator_mock = mocker.patch(
        "pykafka.sasl_authenticators.PlainAuthenticator", return_value=plain_authenticator_sentinel
    )

    config.context_switch("context_5")
    expected_config = {
        "hosts": "kafka:9094,kafka1:9094,kafka2:9094,kafka3:9094",
        "sasl_authenticator": plain_authenticator_sentinel,
        "ssl_config": ssl_config_sentinel,
    }
    actual_config = config.create_pykafka_config()
    assert expected_config == actual_config
    ssl_config_mock.assert_called_with(
        **{
            "cafile": "/my/ca.crt",
            "certfile": "/my/certificate.crt",
            "keyfile": "/my/certificate.key",
            "password": "******",
        }
    )
    plain_authenticator_mock.assert_called_with(user="******", password="******", security_protocol="SASL_SSL")
Example #4
0
 def __init__(self):
     self._config = Config()
     self.confluent_client = AdminClient(
         self._config.create_confluent_config())
     self.pykafka_client = pykafka.client.KafkaClient(
         **self._config.create_pykafka_config(), broker_version="1.0.0")
     self.confluent_client.poll(timeout=1)
     self.__topic_controller = None
Example #5
0
 def __init__(self):
     self.queue_length = 100000
     self.internal_queue_length_limit = self.queue_length / 0.5
     self._config = Config().create_confluent_config()
     self._config.update({
         "on_delivery": delivery_callback,
         "error_cb": raise_for_kafka_error,
         "queue.buffering.max.messages": self.queue_length,
     })
Example #6
0
def test_sasl_params(config: Config):
    assert config.sasl_params == {}
    config.context_switch("context_5")
    assert config.sasl_params == {
        "mechanism": "PLAIN",
        "user": "******",
        "password": "******"
    }
    assert config.sasl_mechanism == "PLAIN"
Example #7
0
def test_ssl_params(config: Config):
    assert config.ssl_params == {}
    config.context_switch("context_5")
    assert config.ssl_params == {
        "cafile": "/my/ca.crt",
        "certfile": "/my/certificate.crt",
        "keyfile": "/my/certificate.key",
        "password": "******",
    }
Example #8
0
def test_current_context_bootstrap_servers(config: Config):
    assert config.bootstrap_servers == ["localhost:9091"]

    config.context_switch("context_3")

    assert config.bootstrap_servers == [
        "node01.cool-domain.com:9093",
        "node02.cool-domain.com:9093",
        "node03.cool-domain.com:9093",
    ]
Example #9
0
class Producer(ABC):
    def __init__(self):
        self.queue_length = 100000
        self.internal_queue_length_limit = self.queue_length / 0.5
        self._config = Config().create_confluent_config()
        self._config.update({
            "on_delivery": delivery_callback,
            "error_cb": raise_for_kafka_error,
            "queue.buffering.max.messages": self.queue_length,
        })

    @abstractmethod
    def produce(self, topic_name: str) -> int:
        pass
Example #10
0
 def _create_config(self):
     try:
         self._config = Config.get_instance()
     except ConfigNotExistsException:
         click.echo(f"No config provided in {config_dir()}")
         if ensure_approval(
                 f"Should a sample file be created in {config_dir()}"):
             config_dir().mkdir(exist_ok=True)
             copyfile(sample_config_path().as_posix(), config_path())
         else:
             raise
         if ensure_approval("Do you want to modify the config file now?"):
             click.edit(filename=config_path().as_posix())
         self._config = Config.get_instance()
Example #11
0
def test_fix_missing_context_config(interactive_cli_runner: CliRunner,
                                    load_config: config_loader):
    load_config(LOAD_BROKEN_CONFIG)

    _cfg = Config(disable_validation=True)
    assert _cfg.current_context not in _cfg.available_contexts

    interactive_cli_runner.invoke(esque,
                                  args=["config", "fix"],
                                  catch_exceptions=False)

    _cfg = Config.get_instance()

    assert _cfg.current_context in _cfg.available_contexts
Example #12
0
def unittest_config(request: FixtureRequest,
                    load_config: config_loader) -> Config:
    conffile, _ = load_config(LOAD_INTEGRATION_TEST_CONFIG)
    esque_config = Config.get_instance()
    if request.config.getoption("--local"):
        esque_config.context_switch("local")
    return esque_config
Example #13
0
 def __init__(
     self,
     group_id: str,
     topic_name: str,
     output_directory: pathlib.Path,
     last: bool,
     match: str = None,
     initialize_default_output_directory: bool = False,
     enable_auto_commit: bool = True,
 ):
     super().__init__(
         group_id,
         topic_name,
         output_directory,
         last,
         match,
         initialize_default_output_directory,
         enable_auto_commit,
     )
     self.schema_registry_client = SchemaRegistryClient(
         Config.get_instance().schema_registry)
     self.writers[-1] = (StdOutAvroWriter(
         schema_registry_client=self.schema_registry_client)
                         if output_directory is None else AvroFileWriter(
                             self.output_directory /
                             "partition_any", self.schema_registry_client))
     if self._initialize_default_output_directory and self.output_directory is not None:
         self.writers[-1].init_destination_directory()
Example #14
0
def test_validation_called(mocker: mock, load_config: config_loader):
    conf_path, conf_content = load_config()
    validator_mock = mocker.patch("esque.validation.validate_esque_config")
    Config()

    validated_config_dict, = validator_mock.call_args[0]
    assert validated_config_dict == yaml.safe_load(conf_content)
Example #15
0
def test_invalid_config(load_config: config_loader):
    conf_path, conf_content = load_config()
    conf_content += '\nasdf:"'
    conf_path.write_text(conf_content)

    with pytest.raises(ScannerError):
        Config()
Example #16
0
 def commit_offsets(self, consumer_id: str, offsets: List[TopicPartition]):
     config = Config.get_instance()
     consumer = Consumer({
         "group.id": consumer_id,
         **config.create_confluent_config()
     })
     consumer.commit(offsets=offsets, asynchronous=False)
     consumer.close()
Example #17
0
def test_confluent_config(config: Config):
    config.context_switch("context_5")
    expected_config = {
        "bootstrap.servers": "kafka:9094,kafka1:9094,kafka2:9094,kafka3:9094",
        "security.protocol": "SASL_SSL",
        "schema.registry.url": "http://schema-registry.example.com",
        "sasl.mechanisms": "PLAIN",
        "sasl.username": "******",
        "sasl.password": "******",
        "ssl.ca.location": "/my/ca.crt",
        "ssl.certificate.location": "/my/certificate.crt",
        "ssl.key.location": "/my/certificate.key",
        "ssl.key.password": "******",
    }

    actual_config = config.create_confluent_config(include_schema_registry=True)
    assert expected_config == actual_config
Example #18
0
    def _get_partitions(
            self,
            topic: Topic,
            retrieve_last_timestamp: bool,
            get_partition_watermarks: bool = True) -> List[Partition]:
        assert not (
            retrieve_last_timestamp and not get_partition_watermarks
        ), "Can not retrieve timestamp without partition watermarks"

        config = Config.get_instance().create_confluent_config()
        config.update({
            "group.id": ESQUE_GROUP_ID,
            "topic.metadata.refresh.interval.ms": "250"
        })
        with closing(confluent_kafka.Consumer(config)) as consumer:
            confluent_topic = consumer.list_topics(
                topic=topic.name).topics[topic.name]
            partitions: List[Partition] = []
            if not get_partition_watermarks:
                return [
                    Partition(partition_id, -1, -1, meta.isrs, meta.leader,
                              meta.replicas, None) for partition_id, meta in
                    confluent_topic.partitions.items()
                ]
            for partition_id, meta in confluent_topic.partitions.items():
                try:
                    low, high = consumer.get_watermark_offsets(
                        TopicPartition(topic=topic.name,
                                       partition=partition_id))
                except KafkaException:
                    # retry after metadata should be refreshed (also consider small network delays)
                    # unfortunately we cannot explicitly cause and wait for a metadata refresh
                    time.sleep(1)
                    low, high = consumer.get_watermark_offsets(
                        TopicPartition(topic=topic.name,
                                       partition=partition_id))

                latest_timestamp = None
                if high > low and retrieve_last_timestamp:
                    assignment = [
                        TopicPartition(topic=topic.name,
                                       partition=partition_id,
                                       offset=high - 1)
                    ]
                    consumer.assign(assignment)
                    msg = consumer.poll(timeout=10)
                    if msg is None:
                        logger.warning(
                            f"Due to timeout latest timestamp for topic `{topic.name}` "
                            f"and partition `{partition_id}` is missing.")
                    else:
                        latest_timestamp = float(msg.timestamp()[1]) / 1000
                partition = Partition(partition_id, low, high, meta.isrs,
                                      meta.leader, meta.replicas,
                                      latest_timestamp)
                partitions.append(partition)
        return partitions
Example #19
0
def test_config_too_new(load_config: config_loader):
    conf_path, conf_content = load_config()
    data = yaml.safe_load(conf_content)
    data["version"] = CURRENT_VERSION + 1
    with conf_path.open("w") as f:
        yaml.dump(data, f)

    with pytest.raises(ConfigTooNew):
        Config()
Example #20
0
def config_fix(state: State):
    """Fix simple errors in esque config.

    Fixes simple errors like wrong current_contexts in the esque config when the configs was tampered with manually."""
    try:
        state.config.context_switch(state.config.current_context)
        click.echo("Your config seems fine. 🎉")
    except ValidationException:
        _cfg: Config = Config(disable_validation=True)
        if _cfg.current_context not in _cfg.available_contexts:
            click.echo(
                f"Found invalid current context. Switching context to state {_cfg.available_contexts[0]}."
            )
            _cfg.context_switch(_cfg.available_contexts[0])
            Config.set_instance(_cfg)
            state.config.save()
        else:
            click.echo(
                "Can't fix this configuration error try fixing it manually.")
Example #21
0
def consumer(topic_object: Topic, consumer_group):
    _config = Config().create_confluent_config()
    _config.update({
        "group.id": consumer_group,
        "error_cb": raise_for_kafka_error,
        # We need to commit offsets manually once we"re sure it got saved
        # to the sink
        "enable.auto.commit": False,
        "enable.partition.eof": False,
        # We need this to start at the last committed offset instead of the
        # latest when subscribing for the first time
        "default.topic.config": {
            "auto.offset.reset": "latest"
        },
    })
    _consumer = confluent_kafka.Consumer(_config)
    _consumer.assign(
        [TopicPartition(topic=topic_object.name, partition=0, offset=0)])
    yield _consumer
Example #22
0
class Cluster:
    def __init__(self):
        self._config = Config()
        self.confluent_client = AdminClient(
            self._config.create_confluent_config())
        self.pykafka_client = pykafka.client.KafkaClient(
            **self._config.create_pykafka_config(), broker_version="1.0.0")
        self.confluent_client.poll(timeout=1)
        self.__topic_controller = None

    @property
    def topic_controller(self) -> TopicController:
        if self.__topic_controller is None:
            self.__topic_controller = TopicController(self, self._config)
        return self.__topic_controller

    @property
    def bootstrap_servers(self):
        return self._config.bootstrap_servers

    def get_metadata(self):
        return self.confluent_client.list_topics(timeout=1)

    @property
    def brokers(self):
        metadata = self.confluent_client.list_topics(timeout=1)
        return sorted(
            [{
                "id": broker.id,
                "host": broker.host,
                "port": broker.port
            } for broker in metadata.brokers.values()],
            key=operator.itemgetter("id"),
        )

    def retrieve_config(self, config_type: ConfigResource.Type, id):
        requested_resources = [ConfigResource(config_type, str(id))]
        futures = self.confluent_client.describe_configs(requested_resources)
        (old_resource, future), = futures.items()
        future = ensure_kafka_futures_done([future])
        result = future.result()
        return unpack_confluent_config(result)
Example #23
0
def test_edit_config(mocker: mock, interactive_cli_runner: CliRunner,
                     load_config: config_loader):
    conf_path, old_conf_text = load_config()
    data = yaml.safe_load(old_conf_text)
    data["contexts"]["dupe"] = data["contexts"]["context_1"]
    mocker.patch.object(click, "edit", return_value=yaml.dump(data))

    result = interactive_cli_runner.invoke(config_edit, catch_exceptions=False)
    assert result.exit_code == 0
    config = Config()
    assert "dupe" in config.available_contexts
Example #24
0
def target_topic_avro_consumer(unittest_config: Config, target_topic: Tuple[str, int]) -> AvroConsumer:
    consumer = AvroConsumer(
        {
            "group.id": "asdf",
            "enable.auto.commit": False,
            "enable.partition.eof": False,
            **unittest_config.create_confluent_config(include_schema_registry=True),
        }
    )
    consumer.assign([TopicPartition(topic=target_topic[0], partition=i, offset=0) for i in range(target_topic[1])])
    yield consumer
    consumer.close()
Example #25
0
    def __init__(self, group_id: str, topic_name: str, last: bool):
        offset_reset = "earliest"
        if last:
            offset_reset = "latest"

        self._config = Config().create_confluent_config()
        self._config.update({
            "group.id": group_id,
            "error_cb": raise_for_kafka_error,
            # We need to commit offsets manually once we"re sure it got saved
            # to the sink
            "enable.auto.commit": True,
            "enable.partition.eof": False,
            # We need this to start at the last committed offset instead of the
            # latest when subscribing for the first time
            "default.topic.config": {
                "auto.offset.reset": offset_reset
            },
        })
        self._consumer = confluent_kafka.Consumer(self._config)
        self._subscribe(topic_name)
Example #26
0
 def __init__(self, topic_name: str, match: str = None):
     self.queue_length = 100000
     self.internal_queue_length_limit = self.queue_length / 0.5
     self._config = Config.get_instance().create_confluent_config()
     self._setup_config()
     self.logger = logging.getLogger(__name__)
     self._topic_name = topic_name
     self._match = match
     self._producer = None
     if self._match is not None:
         self._rule_tree = RuleTree(match)
     else:
         self._rule_tree = None
     self.create_internal_producer()
Example #27
0
    def __init__(self):
        self.no_verify = False

        try:
            self.config = Config()
        except ConfigNotExistsException:
            click.echo(f"No config provided in {config_dir()}")
            config_dir().mkdir(exist_ok=True)
            if ensure_approval(
                    f"Should a sample file be created in {config_dir()}"):
                copyfile(sample_config_path().as_posix(), config_path())
            if ensure_approval("Do you want to modify the config file now?"):
                click.edit(filename=config_path().as_posix())
            sys.exit(0)
        self._cluster = None
Example #28
0
 def _setup_config(self):
     offset_reset = "earliest"
     if self._last:
         offset_reset = "latest"
     self._config = Config.get_instance().create_confluent_config()
     self._config.update({
         "group.id": self._group_id,
         "error_cb": log_error,
         # We need to commit offsets manually once we"re sure it got saved
         # to the sink
         "enable.auto.commit": self._enable_auto_commit,
         "enable.partition.eof": True,
         # We need this to start at the last committed offset instead of the
         # latest when subscribing for the first time
         "default.topic.config": {
             "auto.offset.reset": offset_reset
         },
     })
Example #29
0
def randomly_generated_consumer_groups(filled_topic,
                                       unittest_config: Config,
                                       prefix="") -> str:
    randomly_generated_consumer_group = prefix + "".join(
        random.choices(ascii_letters, k=8))
    _config = unittest_config.create_confluent_config()
    _config.update({
        "group.id": randomly_generated_consumer_group,
        "enable.auto.commit": False,
        "default.topic.config": {
            "auto.offset.reset": "latest"
        },
    })
    _consumer = confluent_kafka.Consumer(_config)
    _consumer.assign(
        [TopicPartition(topic=filled_topic.name, partition=0, offset=0)])
    for i in range(2):
        msg = _consumer.consume(timeout=10)[0]
        _consumer.commit(msg, asynchronous=False)
    return randomly_generated_consumer_group
Example #30
0
 def __init__(self, working_dir: pathlib.Path):
     super().__init__(working_dir)
     self._config.update({"schema.registry.url": Config().schema_registry})
     self._producer = AvroProducer(self._config)