def ping(state: State, times: int, wait: int): """Test the connection to the kafka cluster. Ping the kafka cluster by writing messages to and reading messages from it. After the specified number of "pings", return the minimum, maximum, and average time for the round trip. """ topic_controller = state.cluster.topic_controller deltas = [] try: try: topic_controller.create_topics([Topic(PING_TOPIC)]) except TopicAlreadyExistsException: pass producer = PingProducer(PING_TOPIC) consumer = ConsumerFactory().create_ping_consumer(group_id=PING_GROUP_ID, topic_name=PING_TOPIC) click.echo(f"Pinging with {state.cluster.bootstrap_servers}.") for i in range(times): producer.produce() _, delta = consumer.consume() deltas.append(delta) click.echo(f"m_seq={i} time={delta:.2f}ms") sleep(wait) except KeyboardInterrupt: return topic_controller.delete_topic(Topic(PING_TOPIC)) click.echo("--- statistics ---") click.echo(f"{len(deltas)} messages sent/received.") click.echo(f"min/avg/max = {min(deltas):.2f}/{(sum(deltas) / len(deltas)):.2f}/{max(deltas):.2f} ms")
def test_alter_topic_config_works(topic_controller: TopicController, topic_id: str): initial_topic = Topic(topic_id, config={"cleanup.policy": "delete"}) topic_controller.create_topics([initial_topic]) topic_controller.update_from_cluster(initial_topic) config = initial_topic.config assert config.get("cleanup.policy") == "delete" change_topic = Topic(topic_id, config={"cleanup.policy": "compact"}) topic_controller.alter_configs([change_topic]) topic_controller.update_from_cluster(change_topic) after_changes_applied_topic = topic_controller.get_cluster_topic(topic_id) final_config = after_changes_applied_topic.config assert final_config.get("cleanup.policy") == "compact"
def test_get_topics_with_prefix( non_interactive_cli_runner: CliRunner, topic_controller: TopicController, confluent_admin_client: confluent_kafka.admin.AdminClient, ): topic_base = "".join(random.choices(ascii_letters, k=5)) prefix_1 = "ab" prefix_2 = "fx" new_topics = [ prefix_1 + topic_base, prefix_2 + topic_base, prefix_1 + prefix_2 + topic_base ] topic_controller.create_topics( [Topic(new_topic, replication_factor=1) for new_topic in new_topics]) confluent_admin_client.poll(timeout=1) result = non_interactive_cli_runner.invoke(get_topics, ["-p", prefix_1, "-o", "json"], catch_exceptions=False) assert result.exit_code == 0 retrieved_topics = json.loads(result.output) assert len(retrieved_topics) > 1 for retrieved_topic in retrieved_topics: assert retrieved_topic.startswith(prefix_1)
def delete_topics(state: State, topic_list: Tuple[str]): """Delete multiple topics WARNING: This command cannot be undone, and all data in the topics will be lost. """ topic_names = list(topic_list) + get_piped_stdin_arguments() topic_controller = state.cluster.topic_controller current_topics = [ topic.name for topic in topic_controller.list_topics(get_topic_objects=False) ] existing_topics: List[str] = [] for topic in topic_names: if topic in current_topics: click.echo(f"Deleting {click.style(topic, fg='green')}") existing_topics.append(topic) else: click.echo( f"Skipping {click.style(topic, fg='yellow')} — does not exist") if not existing_topics: click.echo( click.style("The provided list contains no existing topics.", fg="red")) else: if ensure_approval("Are you sure?", no_verify=state.no_verify): topic_controller.delete_topics( [Topic(topic_name) for topic_name in existing_topics]) click.echo( click.style( f"Topics '{existing_topics}' successfully deleted.", fg="green"))
def _prepare_partition_offsets(self, topic: Topic, partition_data: Dict[int, int], timestamps: bool = False): topic_watermarks = topic.watermarks offsets = {} for partition_id, consumer_offset in partition_data.items(): # TODO somehow include this in the returned dictionary if partition_id not in topic_watermarks: log.warning( f"Found invalid offset! Partition {partition_id} does not exist for topic {topic.name}" ) offsets[partition_id] = { "consumer_offset": consumer_offset, "topic_low_watermark": topic_watermarks[partition_id].low, "topic_high_watermark": topic_watermarks[partition_id].high, "consumer_lag": topic_watermarks[partition_id].high - consumer_offset, } if timestamps: extended_partition_data = topic.get_partition_data( partition_id) if extended_partition_data and extended_partition_data.latest_message_timestamp: offsets[partition_id][ "latest_timestamp"] = extended_partition_data.latest_message_timestamp return offsets
def test_topic_creation_raises_for_wrong_config( topic_controller: TopicController, confluent_admin_client: confluent_kafka.admin.AdminClient, topic_id: str): topics = confluent_admin_client.list_topics(timeout=5).topics.keys() assert topic_id not in topics # We only have 1 broker for tests, so a higher replication should fail with pytest.raises(KafkaException): topic_controller.create_topics([Topic(topic_id, replication_factor=2)])
def update_from_cluster( self, topic: Topic, *, retrieve_last_timestamp: bool = False, retrieve_partition_watermarks: bool = True) -> Topic: """Takes a topic and, based on its name, updates all attributes from the cluster""" topic.partition_data = self._get_partitions( topic, retrieve_last_timestamp, get_partition_watermarks=retrieve_partition_watermarks) topic.config = self.cluster.retrieve_config(ConfigResource.Type.TOPIC, topic.name) topic.is_only_local = False return topic
def update_from_cluster(self, topic: Topic): """Takes a topic and, based on its name, updates all attributes from the cluster""" confluent_topic: ConfluentTopic = self._get_client_topic( topic.name, ClientTypes.Confluent) pykafka_topic: PyKafkaTopic = self._get_client_topic( topic.name, ClientTypes.PyKafka) low_watermarks = pykafka_topic.earliest_available_offsets() high_watermarks = pykafka_topic.latest_available_offsets() topic.partition_data = self._get_partition_data( confluent_topic, low_watermarks, high_watermarks, topic) topic.config = self.cluster.retrieve_config(ConfigResource.Type.TOPIC, topic.name) topic.is_only_local = False return topic
def test_topic_creation_works( topic_controller: TopicController, confluent_admin_client: confluent_kafka.admin.AdminClient, topic_id: str): topics = confluent_admin_client.list_topics(timeout=5).topics.keys() assert topic_id not in topics topic_controller.create_topics([Topic(topic_id, replication_factor=1)]) topics = confluent_admin_client.list_topics(timeout=5).topics.keys() assert topic_id in topics
def create_topic(state: State, topic_name: str, like: str): """Create a topic. Create a topic called TOPIC_NAME with the option of providing a template topic, <template_topic>, from which all the configuration options will be copied. """ if not ensure_approval("Are you sure?", no_verify=state.no_verify): click.echo("Aborted!") return topic_controller = state.cluster.topic_controller if like: template_config = topic_controller.get_cluster_topic(like) topic = Topic( topic_name, template_config.num_partitions, template_config.replication_factor, template_config.config ) else: topic = Topic(topic_name) topic_controller.create_topics([topic]) click.echo(click.style(f"Topic with name '{topic.name}' successfully created.", fg="green"))
def topic_with_defaults( partitions: Optional[int], replication_factor: Optional[int], state: State, topic_name: str ) -> Topic: if partitions is None: partitions = state.config.default_num_partitions if replication_factor is None: replication_factor = state.config.default_replication_factor topic = Topic(topic_name, num_partitions=partitions, replication_factor=replication_factor) return topic
def get_cluster_topic(self, topic_name: str, *, retrieve_last_timestamp: bool = False, retrieve_partition_watermarks: bool = True) -> Topic: """Convenience function getting an existing topic based on topic_name""" return self.update_from_cluster( Topic(topic_name), retrieve_last_timestamp=retrieve_last_timestamp, retrieve_partition_watermarks=retrieve_partition_watermarks, )
def delete_topic(state: State, topic_name: str): """Delete a topic WARNING: This command cannot be undone, and all data in the topic will be lost. """ topic_controller = state.cluster.topic_controller if ensure_approval("Are you sure?", no_verify=state.no_verify): topic_controller.delete_topic(Topic(topic_name)) assert topic_name not in (t.name for t in topic_controller.list_topics(get_topic_objects=False)) click.echo(click.style(f"Topic with name '{topic_name}' successfully deleted.", fg="green"))
def test_alter_topic_config_only_changes_mentioned_attributes( topic_controller: TopicController, topic_id: str): initial_topic = Topic(topic_id, config={ "cleanup.policy": "delete", "min.compaction.lag.ms": "1000000" }) topic_controller.create_topics([initial_topic]) topic_controller.update_from_cluster(initial_topic) config = initial_topic.config assert config.get("cleanup.policy") == "delete" assert config.get("min.compaction.lag.ms") == "1000000" change_topic = Topic(topic_id, config={"cleanup.policy": "compact"}) topic_controller.alter_configs([change_topic]) topic_controller.update_from_cluster(change_topic) after_changes_applied_topic = topic_controller.get_cluster_topic(topic_id) final_config = after_changes_applied_topic.config assert final_config.get("cleanup.policy") == "compact" assert final_config.get("min.compaction.lag.ms") == "1000000"
def test_create_existing_topic_fails( non_interactive_cli_runner: CliRunner, confluent_admin_client: confluent_kafka.admin.AdminClient, topic_id: str, state: State, ): state.cluster.topic_controller.create_topics( [Topic(topic_id, replication_factor=1, num_partitions=1)]) result = non_interactive_cli_runner.invoke( esque, args=["create", "topic", "--no-verify", topic_id], catch_exceptions=True) assert isinstance(result.exception, ValidationException) assert topic_id in result.exception.message assert "exists" in result.exception.message.lower()
def test_topic_diff(topic_controller: TopicController, topic_id: str): # the value we get from cluster configs is as string # testing against this is important to ensure consistency default_delete_retention = "86400000" topic_conf = { "name": topic_id, "replication_factor": 1, "num_partitions": 50, "config": { "cleanup.policy": "compact" }, } get_diff = topic_controller.diff_with_cluster conf = json.loads(json.dumps(topic_conf)) topic = Topic.from_dict(conf) topic_controller.create_topics([topic]) assert not get_diff( topic).has_changes, "Shouldn't have diff on just created topic" conf = json.loads(json.dumps(topic_conf)) conf["config"]["cleanup.policy"] = "delete" topic = Topic.from_dict(conf) diff = TopicDiff().set_diff("cleanup.policy", "compact", "delete") assert get_diff(topic) == diff, "Should have a diff on cleanup.policy" conf = json.loads(json.dumps(topic_conf)) conf["config"]["delete.retention.ms"] = 1500 topic = Topic.from_dict(conf) diff = TopicDiff().set_diff("delete.retention.ms", default_delete_retention, 1500) assert get_diff(topic) == diff, "Should have a diff on delete.retention.ms" # the same as before, but this time with string values conf = json.loads(json.dumps(topic_conf)) conf["config"]["delete.retention.ms"] = "1500" topic = Topic.from_dict(conf) diff = TopicDiff().set_diff("delete.retention.ms", default_delete_retention, "1500") assert get_diff(topic) == diff, "Should have a diff on delete.retention.ms" conf = json.loads(json.dumps(topic_conf)) conf["num_partitions"] = 3 topic = Topic.from_dict(conf) diff = TopicDiff().set_diff("num_partitions", 50, 3) assert get_diff(topic) == diff, "Should have a diff on num_partitions" conf = json.loads(json.dumps(topic_conf)) conf["replication_factor"] = 3 topic = Topic.from_dict(conf) diff = TopicDiff().set_diff("replication_factor", 1, 3) assert get_diff(topic) == diff, "Should have a diff on replication_factor"
def topic_from_template( template_topic: str, partitions: Optional[int], replication_factor: Optional[int], topic_controller: TopicController, topic_name: str, ) -> Topic: template_config = topic_controller.get_cluster_topic(template_topic) if partitions is None: partitions = template_config.num_partitions if replication_factor is None: replication_factor = template_config.replication_factor config = template_config.config topic = Topic(topic_name, num_partitions=partitions, replication_factor=replication_factor, config=config) return topic
def _prepare_offsets(self, topic: Topic, partition_data: Dict[int, int], timestamps: bool = False): extended_partition_data = topic.partition_data topic_watermarks = topic.watermarks new_consumer_offsets = { "consumer_offset": (float("inf"), float("-inf")), "topic_low_watermark": (float("inf"), float("-inf")), "topic_high_watermark": (float("inf"), float("-inf")), "consumer_lag": (float("inf"), float("-inf")), } for partition_id, consumer_offset in partition_data.items(): current_offset = consumer_offset new_consumer_offsets["consumer_offset"] = self._update_minmax( new_consumer_offsets["consumer_offset"], current_offset) new_consumer_offsets["topic_low_watermark"] = self._update_minmax( new_consumer_offsets["topic_low_watermark"], topic_watermarks[partition_id].low) new_consumer_offsets["topic_high_watermark"] = self._update_minmax( new_consumer_offsets["topic_high_watermark"], topic_watermarks[partition_id].high) new_consumer_offsets["consumer_lag"] = self._update_minmax( new_consumer_offsets["consumer_lag"], topic_watermarks[partition_id].high - current_offset) if timestamps: extended_partition_data = topic.get_partition_data( partition_id) if extended_partition_data and extended_partition_data.latest_message_timestamp: if "latest_timestamp" in new_consumer_offsets: new_consumer_offsets[ "latest_timestamp"] = self._update_minmax( new_consumer_offsets["latest_timestamp"], extended_partition_data. latest_message_timestamp) else: new_consumer_offsets["latest_timestamp"] = ( extended_partition_data.latest_message_timestamp, extended_partition_data.latest_message_timestamp, ) return new_consumer_offsets
def delete_topic(state: State, topic_name: str): """Delete a single topic WARNING: This command cannot be undone, and all data in the topic will be lost. """ topic_controller = state.cluster.topic_controller current_topics = [ topic.name for topic in topic_controller.list_topics(get_topic_objects=False) ] if topic_name not in current_topics: click.echo( click.style(f"Topic [{topic_name}] doesn't exist on the cluster.", fg="red")) else: click.echo(f"Deleting {click.style(topic_name, fg='green')}") if ensure_approval("Are you sure?", no_verify=state.no_verify): topic_controller.delete_topics([Topic(topic_name)]) click.echo( click.style(f"Topic '{topic_name}' successfully deleted.", fg="green"))
def test_topic_creation_with_template_works( non_interactive_cli_runner: CliRunner, state: State, confluent_admin_client: confluent_kafka.admin.AdminClient, topic_id: str, ): topic_1 = topic_id + "_1" topic_2 = topic_id + "_2" topics = confluent_admin_client.list_topics(timeout=5).topics.keys() assert topic_1 not in topics replication_factor = 1 num_partitions = 1 config = { "cleanup.policy": "delete", "delete.retention.ms": "123456", "file.delete.delay.ms": "789101112", "flush.messages": "12345678910111213", "flush.ms": "123456789", } state.cluster.topic_controller.create_topics([ Topic(topic_1, replication_factor=replication_factor, num_partitions=num_partitions, config=config) ]) result = non_interactive_cli_runner.invoke( esque, args=["create", "topic", "--no-verify", "-l", topic_1, topic_2], catch_exceptions=False) assert result.exit_code == 0 config_from_template = state.cluster.topic_controller.get_cluster_topic( topic_2) assert config_from_template.replication_factor == replication_factor assert config_from_template.num_partitions == num_partitions for config_key, value in config.items(): assert config_from_template.config[config_key] == value
def get_cluster_topic(self, topic_name: str) -> Topic: """Convenience function getting an existing topic based on topic_name""" return self.update_from_cluster(Topic(topic_name))
def changed_topic_object(cluster: Cluster, topic: str): yield Topic(topic, 1, 3, {"cleanup.policy": "compact"})
def produce( state: State, topic: str, to_context: str, directory: str, avro: bool, binary: bool, match: str = None, read_from_stdin: bool = False, ignore_stdin_errors: bool = False, ): """Produce messages to a topic. Write messages to a given topic in a given context. These messages can come from either a directory <directory> that was previously written to with "esque consume" or from JSON objects coming in via STDIN. If reading from STDIN, then data will be expected as single-line JSON objects with the message key and the message value always being a string. The --avro option is currently not supported when reading from STDIN. With the --binary option those strings are expected to contain the base64 encoded binary data. By default, the data in the messages is treated utf-8 encoded strings and will be used as-is. In addition to "key" and "value" one can also define headers as list of objects with a "key" and a "value" attribute with the former being a string and the latter being a string, "null" or simply not defined. \b So valid json objects for reading from stdin would be: {"key": "foo", "value": "bar", "headers":[{"key":"h1", "value":"v1"},{"key":"h2"}]} {"key": "foo", "value": null, "partition": 1} {"key": "foo"} \b EXAMPLES: # Write all messages from the files in <directory> to TOPIC in the <destination_ctx> context. esque produce -d <directory> -t <destination_ctx> TOPIC \b # Start environment in terminal to write messages to TOPIC in the <destination_ctx> context. esque produce --stdin -f <destination_ctx> -y TOPIC \b # Copy source_topic to destination_topic. esque consume -f first-context --stdout source_topic | esque produce -t second-context --stdin destination_topic """ if not to_context: to_context = state.config.current_context state.config.context_switch(to_context) if not read_from_stdin: if not directory: raise ValueError( "Need to provide directory if not reading from stdin.") else: directory = pathlib.Path(directory) elif avro: raise ValueError( "Cannot read avro data from stdin. Use a directory instead.") if binary and avro: raise ValueError( "Cannot set data to be interpreted as binary AND avro.") topic_controller = Cluster().topic_controller if not topic_controller.topic_exists(topic): if ensure_approval( f"Topic {topic!r} does not exist, do you want to create it?", no_verify=state.no_verify): topic_controller.create_topics([Topic(topic)]) else: click.echo(click.style("Aborted!", bg="red")) return builder = PipelineBuilder() input_handler = create_input_handler(directory, read_from_stdin) builder.with_input_handler(input_handler) input_message_serializer = create_input_message_serializer( directory, avro, binary) builder.with_input_message_serializer(input_message_serializer) output_message_serializer = create_output_serializer( avro, binary, topic, state) builder.with_output_message_serializer(output_message_serializer) output_handler = create_output_handler(to_context, topic) builder.with_output_handler(output_handler) if match: builder.with_stream_decorator(yield_only_matching_messages(match)) counter, counter_decorator = event_counter() builder.with_stream_decorator(counter_decorator) pipeline = builder.build() pipeline.run_pipeline() click.echo( green_bold(str(counter.message_count)) + " messages successfully produced to topic " + blue_bold(topic) + " in context " + blue_bold(to_context) + ".")
def ping(state: State, times: int, wait: int): """Test the connection to the kafka cluster. Ping the kafka cluster by writing messages to and reading messages from it. After the specified number of "pings", return the minimum, maximum, and average time for the round trip. \b The abbreviations in the output have the following meaning: c2s: client to server (time of creation till kafka wrote it to disk) s2c: server to client (time from kafka write to disk till client received it again) c2c: client to client (complete round trip) """ topic_controller = state.cluster.topic_controller if not topic_controller.topic_exists(PING_TOPIC): if ensure_approval( f"Topic {PING_TOPIC!r} does not exist, do you want to create it?", no_verify=state.no_verify): topic_config = { "cleanup.policy": "compact,delete", "retention.ms": int(datetime.timedelta(days=1).microseconds / 1000), "message.timestamp.type": "LogAppendTime", } topic_controller.create_topics( [Topic(PING_TOPIC, num_partitions=10, config=topic_config)]) else: click.echo(click.style("Aborted!", bg="red")) return ping_id = uuid.uuid4().bytes click.echo("Initializing producer.") output_handler = KafkaHandler( KafkaHandlerConfig(scheme="kafka", host=state.config.current_context, path=PING_TOPIC)) output_handler.write_message(create_tombstone_message(ping_id)) input_handler = KafkaHandler( KafkaHandlerConfig(scheme="kafka", host=state.config.current_context, path=PING_TOPIC)) input_stream = filter(key_matches(ping_id), skip_stream_events(input_handler.message_stream())) message_iterator = iter(input_stream) click.echo("Initializing consumer.") input_handler.seek(KafkaHandler.OFFSET_AT_LAST_MESSAGE) next(message_iterator) click.echo( f"Pinging cluster with bootstrap servers {state.cluster.bootstrap_servers}." ) deltas = [] try: for i in range(times): output_handler.write_message(create_ping_message(ping_id)) msg_recieved = next(message_iterator) dt_created = dt_from_bytes(msg_recieved.value) dt_delivered = msg_recieved.timestamp dt_received = datetime.datetime.now(tz=datetime.timezone.utc) time_client_to_server_ms = (dt_delivered - dt_created).microseconds / 1000 time_server_to_client_ms = (dt_received - dt_delivered).microseconds / 1000 time_client_to_client_ms = (dt_received - dt_created).microseconds / 1000 deltas.append((time_client_to_server_ms, time_server_to_client_ms, time_client_to_client_ms)) click.echo(f"m_seq={i} c2s={time_client_to_server_ms:.2f}ms " f"s2c={time_server_to_client_ms:.2f}ms " f"c2c={time_client_to_client_ms:.2f}ms") sleep(wait) except KeyboardInterrupt: return # make sure our ping messages get cleaned up output_handler.write_message(create_tombstone_message(ping_id)) click.echo("--- statistics ---") click.echo(f"{len(deltas)} messages sent/received.") c2s_times, s2c_times, c2c_times = zip(*deltas) click.echo(f"c2s {stats(c2s_times)}") click.echo(f"s2c {stats(s2c_times)}") click.echo(f"c2c {stats(c2c_times)}")
def produce( state: State, topic: str, to_context: str, directory: str, avro: bool, match: str = None, read_from_stdin: bool = False, ignore_stdin_errors: bool = False, ): """Produce messages to a topic. Write messages to a given topic in a given context. These messages can come from either a directory <directory> containing files corresponding to the different partitions or from STDIN. \b EXAMPLES: # Write all messages from the files in <directory> to TOPIC in the <destination_ctx> context. esque produce -d <directory> -t <destination_ctx> TOPIC \b # Start environment in terminal to write messages to TOPIC in the <destination_ctx> context. esque produce --stdin -f <destination_ctx> -y TOPIC \b # Copy source_topic to destination_topic. esque consume -f first-context --stdout source_topic | esque produce -t second-context --stdin destination_topic """ if directory is None and not read_from_stdin: raise ValueError("You have to provide a directory or use the --stdin flag.") if directory is not None: input_directory = Path(directory) if not input_directory.exists(): raise ValueError(f"Directory {directory} does not exist!") if not to_context: to_context = state.config.current_context state.config.context_switch(to_context) topic_controller = state.cluster.topic_controller if topic not in map(attrgetter("name"), topic_controller.list_topics(get_topic_objects=False)): click.echo(f"Topic {blue_bold(topic)} does not exist in context {blue_bold(to_context)}.") if ensure_approval(f"Would you like to create it now?"): topic_controller.create_topics([Topic(topic)]) else: raise TopicDoesNotExistException(f"Topic {topic} does not exist!", -1) stdin = click.get_text_stream("stdin") if read_from_stdin and isatty(stdin): click.echo( "Type the messages to produce, " + ("in JSON format, " if not ignore_stdin_errors else "") + blue_bold("one per line") + ". End with " + blue_bold("CTRL+D") + "." ) elif read_from_stdin and not isatty(stdin): click.echo(f"Reading messages from an external source, {blue_bold('one per line')}).") else: click.echo( f"Producing from directory {blue_bold(str(directory))} to topic {blue_bold(topic)}" f" in target context {blue_bold(to_context)}" ) producer = ProducerFactory().create_producer( topic_name=topic, input_directory=input_directory if not read_from_stdin else None, avro=avro, match=match, ignore_stdin_errors=ignore_stdin_errors, ) total_number_of_messages_produced = producer.produce() click.echo( green_bold(str(total_number_of_messages_produced)) + " messages successfully produced to topic " + blue_bold(topic) + " in context " + blue_bold(to_context) + "." )
def apply(state: State, file: str): """Apply a set of topic configurations. Create new topics and apply changes to existing topics, as specified in the config yaml file <file>. """ # Get topic data based on the YAML yaml_topic_configs = yaml.safe_load(open(file)).get("topics") yaml_topics = [Topic.from_dict(conf) for conf in yaml_topic_configs] yaml_topic_names = [t.name for t in yaml_topics] if not len(yaml_topic_names) == len(set(yaml_topic_names)): raise ValidationException("Duplicate topic names in the YAML!") # Get topic data based on the cluster state topic_controller = state.cluster.topic_controller cluster_topics = topic_controller.list_topics(search_string="|".join(yaml_topic_names)) cluster_topic_names = [t.name for t in cluster_topics] # Calculate changes to_create = [yaml_topic for yaml_topic in yaml_topics if yaml_topic.name not in cluster_topic_names] to_edit = [ yaml_topic for yaml_topic in yaml_topics if yaml_topic not in to_create and topic_controller.diff_with_cluster(yaml_topic).has_changes ] to_edit_diffs = {t.name: topic_controller.diff_with_cluster(t) for t in to_edit} to_ignore = [yaml_topic for yaml_topic in yaml_topics if yaml_topic not in to_create and yaml_topic not in to_edit] # Sanity check - the 3 groups of topics should be complete and have no overlap assert ( set(to_create).isdisjoint(set(to_edit)) and set(to_create).isdisjoint(set(to_ignore)) and set(to_edit).isdisjoint(set(to_ignore)) and len(to_create) + len(to_edit) + len(to_ignore) == len(yaml_topics) ) # Print diffs so the user can check click.echo(pretty_unchanged_topic_configs(to_ignore)) click.echo(pretty_new_topic_configs(to_create)) click.echo(pretty_topic_diffs(to_edit_diffs)) # Check for actionable changes if len(to_edit) + len(to_create) == 0: click.echo("No changes detected, aborting!") return # Warn users & abort when replication & num_partition changes are attempted if any(not diff.is_valid for _, diff in to_edit_diffs.items()): click.echo( "Changes to `replication_factor` and `num_partitions` can not be applied on already existing topics." ) click.echo("Cancelling due to invalid changes") return # Get approval if not ensure_approval("Apply changes?", no_verify=state.no_verify): click.echo("Cancelling changes") return # apply changes topic_controller.create_topics(to_create) topic_controller.alter_configs(to_edit) # output confirmation changes = {"unchanged": len(to_ignore), "created": len(to_create), "changed": len(to_edit)} click.echo(click.style(pretty({"Successfully applied changes": changes}), fg="green"))
def get_local_topic(self, topic_name: str) -> Topic: return Topic(topic_name)
def test_apply(interactive_cli_runner: CliRunner, topic_controller: TopicController, topic_id: str): topic_name = f"apply_{topic_id}" topic_1 = { "name": topic_name + "_1", "replication_factor": 1, "num_partitions": 50, "config": { "cleanup.policy": "compact" }, } topic_2 = { "name": topic_name + "_2", "replication_factor": 1, "num_partitions": 5, "config": { "cleanup.policy": "delete", "delete.retention.ms": 50000 }, } apply_conf = {"topics": [topic_1]} # 1: topic creation path = save_yaml(topic_id, apply_conf) result = interactive_cli_runner.invoke(esque, args=["apply", "-f", path], input="Y\n", catch_exceptions=False) assert (result.exit_code == 0 and "Successfully applied changes" in result.output), f"Calling apply failed, error: {result.output}" # 2: change cleanup policy to delete topic_1["config"]["cleanup.policy"] = "delete" path = save_yaml(topic_id, apply_conf) result = interactive_cli_runner.invoke(esque, args=["apply", "-f", path], input="Y\n", catch_exceptions=False) assert (result.exit_code == 0 and "Successfully applied changes" in result.output), f"Calling apply failed, error: {result.output}" # 3: add another topic and change the first one again apply_conf["topics"].append(topic_2) topic_1["config"]["cleanup.policy"] = "compact" path = save_yaml(topic_id, apply_conf) result = interactive_cli_runner.invoke(esque, args=["apply", "-f", path], input="Y\n", catch_exceptions=False) assert (result.exit_code == 0 and "Successfully applied changes" in result.output), f"Calling apply failed, error: {result.output}" # 4: no changes result = interactive_cli_runner.invoke(esque, args=["apply", "-f", path], catch_exceptions=False) assert (result.exit_code == 0 and "No changes detected, aborting" in result.output), f"Calling apply failed, error: {result.output}" # 5: change partitions - this attempt should be cancelled topic_1["num_partitions"] = 3 topic_1["config"]["cleanup.policy"] = "delete" path = save_yaml(topic_id, apply_conf) result = interactive_cli_runner.invoke(esque, args=["apply", "-f", path], input="Y\n", catch_exceptions=False) assert (result.exit_code == 1 and "to `replication_factor` and `num_partitions`" in result.output), f"Calling apply failed, error: {result.output}" # reset config to the old settings again topic_1["num_partitions"] = 50 topic_1["config"]["cleanup.policy"] = "compact" # final: check results in the cluster to make sure they match for topic_conf in apply_conf["topics"]: topic_from_conf = Topic.from_dict(topic_conf) assert not topic_controller.diff_with_cluster( topic_from_conf ).has_changes, f"Topic configs don't match, diff is {topic_controller.diff_with_cluster(topic_from_conf)}"
def transfer( state: State, from_topic: str, to_topic: str, from_context: str, to_context: str, number: int, last: bool, avro: bool, binary: bool, consumergroup: str, match: str = None, ): """Transfer messages between two topics. Read messages from the source topic in the source context and write them into the destination topic in the destination context. This function is shorthand for using a combination of `esque consume` and `esque produce` \b EXAMPLES: # Transfer the first 10 messages from TOPIC1 in the current context to TOPIC2 in context DSTCTX. esque transfer --first -n 10 --from-topic TOPIC1 --to-topic TOPIC2 --to-context DSTCTX \b # Transfer the first 10 messages from TOPIC1 in the context SRCCTX to TOPIC2 in context DSTCTX, assuming the messages are AVRO. esque transfer --first -n 10 --avro --from-topic TOPIC1 --from-context SRCCTX --to-topic TOPIC2 --to-context DSTCTX """ if not from_context: from_context = state.config.current_context state.config.context_switch(from_context) if binary and avro: raise ValueError("Cannot set data to be interpreted as binary AND avro.") if not to_context: to_context = from_context if from_context == to_context and from_topic == to_topic: raise ValueError("Cannot transfer data to the same topic.") topic_controller = Cluster().topic_controller if not topic_controller.topic_exists(to_topic): if ensure_approval(f"Topic {to_topic!r} does not exist, do you want to create it?", no_verify=state.no_verify): topic_controller.create_topics([Topic(to_topic)]) else: click.echo(click.style("Aborted!", bg="red")) return builder = PipelineBuilder() input_message_serializer = create_input_serializer(avro, binary, state) builder.with_input_message_serializer(input_message_serializer) input_handler = create_input_handler(consumergroup, from_context, from_topic) builder.with_input_handler(input_handler) output_message_serializer = create_output_serializer(avro, binary, to_topic, state) builder.with_output_message_serializer(output_message_serializer) output_handler = create_output_handler(to_context, to_topic) builder.with_output_handler(output_handler) if last: start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE else: start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE builder.with_range(start=start, limit=number) if match: builder.with_stream_decorator(yield_only_matching_messages(match)) counter, counter_decorator = event_counter() builder.with_stream_decorator(counter_decorator) pipeline = builder.build() pipeline.run_pipeline() click.echo( green_bold(str(counter.message_count)) + " messages consumed from topic " + blue_bold(from_topic) + " in context " + blue_bold(to_context) + " and produced to topic " + blue_bold(to_topic) + " in context " + blue_bold(to_context) + "." )