Ejemplo n.º 1
0
def transfer(state: State, topic: str, from_context: str, to_context: str,
             numbers: int, last: bool, avro: bool, keep_file: bool):
    current_timestamp_milliseconds = int(round(time.time() * 1000))
    unique_name = topic + "_" + str(current_timestamp_milliseconds)
    group_id = "group_for_" + unique_name
    directory_name = "message_" + unique_name
    base_dir = Path(directory_name)
    state.config.context_switch(from_context)

    with HandleFileOnFinished(base_dir, keep_file) as working_dir:
        number_consumed_messages = _consume_to_file(working_dir, topic,
                                                    group_id, from_context,
                                                    numbers, avro, last)

        if number_consumed_messages == 0:
            click.echo(
                click.style("Execution stopped, because no messages consumed.",
                            fg="red"))
            click.echo(
                bold(
                    "Possible reasons: The topic is empty or the starting offset was set too high."
                ))
            return

        click.echo("\nReady to produce to context " + blue_bold(to_context) +
                   " and target topic " + blue_bold(topic))

        if not ensure_approval("Do you want to proceed?\n",
                               no_verify=state.no_verify):
            return

        state.config.context_switch(to_context)
        _produce_from_file(topic, to_context, working_dir, avro)
Ejemplo n.º 2
0
def _produce_from_file(topic: str, to_context: str, working_dir: pathlib.Path,
                       avro: bool):
    if avro:
        producer = AvroFileProducer(working_dir)
    else:
        producer = FileProducer(working_dir)
    click.echo("\nStart producing to topic " + blue_bold(topic) +
               " in target context " + blue_bold(to_context))
    number_produced_messages = producer.produce(topic)
    click.echo(
        green_bold(str(number_produced_messages)) +
        " messages successfully produced to context " +
        green_bold(to_context) + " and topic " + green_bold(topic) + ".")
Ejemplo n.º 3
0
def _consume_to_file(working_dir: pathlib.Path, topic: str, group_id: str,
                     from_context: str, numbers: int, avro: bool,
                     last: bool) -> int:
    if avro:
        consumer = AvroFileConsumer(group_id, topic, working_dir, last)
    else:
        consumer = FileConsumer(group_id, topic, working_dir, last)
    click.echo("\nStart consuming from topic " + blue_bold(topic) +
               " in source context " + blue_bold(from_context))
    number_consumed_messages = consumer.consume(int(numbers))
    click.echo(
        blue_bold(str(number_consumed_messages)) + " messages consumed.")

    return number_consumed_messages
Ejemplo n.º 4
0
def produce(
    state: State,
    topic: str,
    to_context: str,
    directory: str,
    avro: bool,
    match: str = None,
    read_from_stdin: bool = False,
    ignore_stdin_errors: bool = False,
):
    """Produce messages to a topic.

       Write messages to a given topic in a given context. These messages can come from either a directory <directory>
       containing files corresponding to the different partitions or from STDIN.

       \b
       EXAMPLES:
       # Write all messages from the files in <directory> to TOPIC in the <destination_ctx> context.
       esque produce -d <directory> -t <destination_ctx> TOPIC

       \b
       # Start environment in terminal to write messages to TOPIC in the <destination_ctx> context.
       esque produce --stdin -f <destination_ctx> -y TOPIC

       \b
       # Copy source_topic to destination_topic.
       esque consume -f first-context --stdout source_topic | esque produce -t second-context --stdin destination_topic
       """
    if directory is None and not read_from_stdin:
        raise ValueError("You have to provide a directory or use the --stdin flag.")

    if directory is not None:
        input_directory = Path(directory)
        if not input_directory.exists():
            raise ValueError(f"Directory {directory} does not exist!")

    if not to_context:
        to_context = state.config.current_context
    state.config.context_switch(to_context)

    topic_controller = state.cluster.topic_controller
    if topic not in map(attrgetter("name"), topic_controller.list_topics(get_topic_objects=False)):
        click.echo(f"Topic {blue_bold(topic)} does not exist in context {blue_bold(to_context)}.")
        if ensure_approval(f"Would you like to create it now?"):
            topic_controller.create_topics([Topic(topic)])
        else:
            raise TopicDoesNotExistException(f"Topic {topic} does not exist!", -1)

    stdin = click.get_text_stream("stdin")
    if read_from_stdin and isatty(stdin):
        click.echo(
            "Type the messages to produce, "
            + ("in JSON format, " if not ignore_stdin_errors else "")
            + blue_bold("one per line")
            + ". End with "
            + blue_bold("CTRL+D")
            + "."
        )
    elif read_from_stdin and not isatty(stdin):
        click.echo(f"Reading messages from an external source, {blue_bold('one per line')}).")
    else:
        click.echo(
            f"Producing from directory {blue_bold(str(directory))} to topic {blue_bold(topic)}"
            f" in target context {blue_bold(to_context)}"
        )
    producer = ProducerFactory().create_producer(
        topic_name=topic,
        input_directory=input_directory if not read_from_stdin else None,
        avro=avro,
        match=match,
        ignore_stdin_errors=ignore_stdin_errors,
    )
    total_number_of_messages_produced = producer.produce()
    click.echo(
        green_bold(str(total_number_of_messages_produced))
        + " messages successfully produced to topic "
        + blue_bold(topic)
        + " in context "
        + blue_bold(to_context)
        + "."
    )
Ejemplo n.º 5
0
def consume(
    state: State,
    topic: str,
    from_context: str,
    numbers: int,
    match: str,
    last: bool,
    avro: bool,
    directory: str,
    consumergroup: str,
    preserve_order: bool,
    write_to_stdout: bool,
):
    """Consume messages from a topic.

    Read messages from a given topic in a given context. These messages can either be written
    to files in an automatically generated directory (default behavior), or to STDOUT.

    \b
    EXAMPLES:
    # Consume the first 10 messages from TOPIC in the current context and print them to STDOUT in order.
    esque consume --first -n 10 --preserve-order --stdout TOPIC

    \b
    # Consume <n> messages, starting from the 10th, from TOPIC in the <source_ctx> context and write them to files.
    esque consume --match "message.offset > 9" -n <n> TOPIC -f <source_ctx>

    \b
    # Copy source_topic in first context to destination_topic in second-context.
    esque consume -f first-context --stdout source_topic | esque produce -t second-context --stdin destination_topic
    """
    current_timestamp_milliseconds = int(round(time.time() * 1000))
    consumergroup_prefix = "group_for_"

    if directory and write_to_stdout:
        raise ValueError("Cannot write to a directory and STDOUT, please pick one!")

    if not from_context:
        from_context = state.config.current_context
    state.config.context_switch(from_context)

    if topic not in map(attrgetter("name"), state.cluster.topic_controller.list_topics(get_topic_objects=False)):
        raise TopicDoesNotExistException(f"Topic {topic} does not exist!", -1)

    if not consumergroup:
        consumergroup = consumergroup_prefix + topic + "_" + str(current_timestamp_milliseconds)
    if not directory:
        directory = Path() / "messages" / topic / str(current_timestamp_milliseconds)
    output_directory = Path(directory)

    if not write_to_stdout:
        click.echo(f"Creating directory {blue_bold(str(output_directory))} if it does not exist.")
        output_directory.mkdir(parents=True, exist_ok=True)
        click.echo(f"Start consuming from topic {blue_bold(topic)} in source context {blue_bold(from_context)}.")
    if preserve_order:
        partitions = []
        for partition in state.cluster.topic_controller.get_cluster_topic(topic).partitions:
            partitions.append(partition.partition_id)
        total_number_of_consumed_messages = consume_to_file_ordered(
            output_directory=output_directory,
            topic=topic,
            group_id=consumergroup,
            partitions=partitions,
            numbers=numbers,
            avro=avro,
            match=match,
            last=last,
            write_to_stdout=write_to_stdout,
        )
    else:
        total_number_of_consumed_messages = consume_to_files(
            output_directory=output_directory,
            topic=topic,
            group_id=consumergroup,
            numbers=numbers,
            avro=avro,
            match=match,
            last=last,
            write_to_stdout=write_to_stdout,
        )

    if not write_to_stdout:
        click.echo(f"Output generated to {blue_bold(str(output_directory))}")
        if total_number_of_consumed_messages == numbers or numbers == sys.maxsize:
            click.echo(blue_bold(str(total_number_of_consumed_messages)) + " messages consumed.")
        else:
            click.echo(
                "Only found "
                + bold(str(total_number_of_consumed_messages))
                + " messages in topic, out of "
                + blue_bold(str(numbers))
                + " required."
            )
Ejemplo n.º 6
0
def consume(
    state: State,
    topic: str,
    from_context: str,
    number: Optional[int],
    match: str,
    last: bool,
    avro: bool,
    binary: bool,
    directory: str,
    consumergroup: str,
    preserve_order: bool,
    write_to_stdout: bool,
    pretty_print: bool,
):
    """Consume messages from a topic.

    Read messages from a given topic in a given context. These messages can either be written
    to files in an automatically generated directory (default behavior), or to STDOUT.

    If writing to STDOUT, then data will be represented as a JSON object with the message key and the message value
    always being a string.
    With the --avro option, those strings are JSON serialized objects.
    With the --binary option those strings contain the base64 encoded binary data.
    Without any of the two options, the data in the messages is treated utf-8 encoded strings and will be used as-is.

    \b
    EXAMPLES:
    # Consume the first 10 messages from TOPIC in the current context and print them to STDOUT in order.
    esque consume --first -n 10 --preserve-order --pretty-print --stdout TOPIC

    \b
    # Consume <n> messages, starting from the 10th, from TOPIC in the <source_ctx> context and write them to files.
    esque consume --match "message.offset > 9" -n <n> TOPIC -f <source_ctx>

    \b
    # Extract json objects from keys
    esque consume --stdout --avro TOPIC | jq '.key | fromjson'

    \b
    # Extract binary data from keys (depending on the data this could mess up your console)
    esque consume --stdout --binary TOPIC | jq '.key | @base64d'
    """
    if not from_context:
        from_context = state.config.current_context
    state.config.context_switch(from_context)

    if not write_to_stdout and not directory:
        directory = Path() / "messages" / topic / datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    if binary and avro:
        raise ValueError("Cannot set data to be interpreted as binary AND avro.")

    builder = PipelineBuilder()

    input_message_serializer = create_input_serializer(avro, binary, state)
    builder.with_input_message_serializer(input_message_serializer)

    input_handler = create_input_handler(consumergroup, from_context, topic)
    builder.with_input_handler(input_handler)

    output_handler = create_output_handler(directory, write_to_stdout, binary, pretty_print)
    builder.with_output_handler(output_handler)

    output_message_serializer = create_output_message_serializer(write_to_stdout, directory, avro, binary)
    builder.with_output_message_serializer(output_message_serializer)

    if last:
        start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE
    else:
        start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE

    builder.with_range(start=start, limit=number)

    if preserve_order:
        topic_data = Cluster().topic_controller.get_cluster_topic(topic, retrieve_partition_watermarks=False)
        builder.with_stream_decorator(yield_messages_sorted_by_timestamp(len(topic_data.partitions)))

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    if not write_to_stdout:
        if counter.message_count == number:
            click.echo(blue_bold(str(counter.message_count)) + " messages consumed.")
        else:
            click.echo(
                "Only found "
                + bold(str(counter.message_count))
                + " messages in topic, out of "
                + blue_bold(str(number))
                + " required."
            )
Ejemplo n.º 7
0
def produce(
    state: State,
    topic: str,
    to_context: str,
    directory: str,
    avro: bool,
    binary: bool,
    match: str = None,
    read_from_stdin: bool = False,
    ignore_stdin_errors: bool = False,
):
    """Produce messages to a topic.

    Write messages to a given topic in a given context. These messages can come from either a directory <directory>
    that was previously written to with "esque consume" or from JSON objects coming in via STDIN.

    If reading from STDIN, then data will be expected as single-line JSON objects with the message key and the
    message value always being a string.
    The --avro option is currently not supported when reading from STDIN.
    With the --binary option those strings are expected to contain the base64 encoded binary data.
    By default, the data in the messages is treated utf-8 encoded strings and will be used as-is.
    In addition to "key" and "value" one can also define headers as list of objects with a "key" and a "value" attribute
    with the former being a string and the latter being a string, "null" or simply not defined.

    \b
    So valid json objects for reading from stdin would be:
    {"key": "foo", "value": "bar", "headers":[{"key":"h1", "value":"v1"},{"key":"h2"}]}
    {"key": "foo", "value": null, "partition": 1}
    {"key": "foo"}

    \b
    EXAMPLES:
    # Write all messages from the files in <directory> to TOPIC in the <destination_ctx> context.
    esque produce -d <directory> -t <destination_ctx> TOPIC

    \b
    # Start environment in terminal to write messages to TOPIC in the <destination_ctx> context.
    esque produce --stdin -f <destination_ctx> -y TOPIC

    \b
    # Copy source_topic to destination_topic.
    esque consume -f first-context --stdout source_topic | esque produce -t second-context --stdin destination_topic
    """
    if not to_context:
        to_context = state.config.current_context
    state.config.context_switch(to_context)

    if not read_from_stdin:
        if not directory:
            raise ValueError(
                "Need to provide directory if not reading from stdin.")
        else:
            directory = pathlib.Path(directory)
    elif avro:
        raise ValueError(
            "Cannot read avro data from stdin. Use a directory instead.")

    if binary and avro:
        raise ValueError(
            "Cannot set data to be interpreted as binary AND avro.")

    topic_controller = Cluster().topic_controller
    if not topic_controller.topic_exists(topic):
        if ensure_approval(
                f"Topic {topic!r} does not exist, do you want to create it?",
                no_verify=state.no_verify):
            topic_controller.create_topics([Topic(topic)])
        else:
            click.echo(click.style("Aborted!", bg="red"))
            return

    builder = PipelineBuilder()

    input_handler = create_input_handler(directory, read_from_stdin)
    builder.with_input_handler(input_handler)

    input_message_serializer = create_input_message_serializer(
        directory, avro, binary)
    builder.with_input_message_serializer(input_message_serializer)

    output_message_serializer = create_output_serializer(
        avro, binary, topic, state)
    builder.with_output_message_serializer(output_message_serializer)

    output_handler = create_output_handler(to_context, topic)
    builder.with_output_handler(output_handler)

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    click.echo(
        green_bold(str(counter.message_count)) +
        " messages successfully produced to topic " + blue_bold(topic) +
        " in context " + blue_bold(to_context) + ".")
Ejemplo n.º 8
0
def transfer(
    state: State,
    from_topic: str,
    to_topic: str,
    from_context: str,
    to_context: str,
    number: int,
    last: bool,
    avro: bool,
    binary: bool,
    consumergroup: str,
    match: str = None,
):
    """Transfer messages between two topics.

    Read messages from the source topic in the source context and write them into the destination topic in the destination context.
    This function is shorthand for using a combination of `esque consume` and `esque produce`

    \b
    EXAMPLES:
    # Transfer the first 10 messages from TOPIC1 in the current context to TOPIC2 in context DSTCTX.
    esque transfer --first -n 10 --from-topic TOPIC1 --to-topic TOPIC2 --to-context DSTCTX

    \b
    # Transfer the first 10 messages from TOPIC1 in the context SRCCTX to TOPIC2 in context DSTCTX, assuming the messages are AVRO.
    esque transfer --first -n 10 --avro --from-topic TOPIC1 --from-context SRCCTX --to-topic TOPIC2 --to-context DSTCTX
    """
    if not from_context:
        from_context = state.config.current_context
    state.config.context_switch(from_context)

    if binary and avro:
        raise ValueError("Cannot set data to be interpreted as binary AND avro.")

    if not to_context:
        to_context = from_context

    if from_context == to_context and from_topic == to_topic:
        raise ValueError("Cannot transfer data to the same topic.")

    topic_controller = Cluster().topic_controller
    if not topic_controller.topic_exists(to_topic):
        if ensure_approval(f"Topic {to_topic!r} does not exist, do you want to create it?", no_verify=state.no_verify):
            topic_controller.create_topics([Topic(to_topic)])
        else:
            click.echo(click.style("Aborted!", bg="red"))
            return

    builder = PipelineBuilder()

    input_message_serializer = create_input_serializer(avro, binary, state)
    builder.with_input_message_serializer(input_message_serializer)

    input_handler = create_input_handler(consumergroup, from_context, from_topic)
    builder.with_input_handler(input_handler)

    output_message_serializer = create_output_serializer(avro, binary, to_topic, state)
    builder.with_output_message_serializer(output_message_serializer)

    output_handler = create_output_handler(to_context, to_topic)
    builder.with_output_handler(output_handler)

    if last:
        start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE
    else:
        start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE

    builder.with_range(start=start, limit=number)

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    click.echo(
        green_bold(str(counter.message_count))
        + " messages consumed from topic "
        + blue_bold(from_topic)
        + " in context "
        + blue_bold(to_context)
        + " and produced to topic "
        + blue_bold(to_topic)
        + " in context "
        + blue_bold(to_context)
        + "."
    )