Python PipelineBuilder.with_range 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: esque.io.pipeline

클래스/타입: PipelineBuilder

메소드/함수: with_range

hotexamples.com에서의 예제들: 4

Python PipelineBuilder.with_range - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 esque.io.pipeline.PipelineBuilder.with_range에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PipelineBuilder(14)

build(13)

with_input_handler(6)

with_input_message_serializer(5)

with_message_reader(5)

with_message_writer(5)

with_output_handler(5)

with_output_message_serializer(5)

with_range(4)

with_input_from_uri(1)

with_output_from_uri(1)

with_stream_decorator(1)

예제 #1

파일 보기

파일: test_pipeline.py 프로젝트: real-digital/esque

def test_limited_read_with_relative_offset_from_end(
        dummy_message_writer: DummyMessageWriter,
        binary_messages: List[BinaryMessage],
        prepared_builder: PipelineBuilder):
    prepared_builder.with_range(start=-2, limit=1)
    pipeline = prepared_builder.build()

    assert isinstance(pipeline, Pipeline)
    pipeline.run_pipeline()
    assert dummy_message_writer.get_written_messages(
    ) == binary_messages[-2:-1]

예제 #2

파일 보기

파일: test_pipeline.py 프로젝트: real-digital/esque

def test_limited_read_with_absolute_offset(
        dummy_message_writer: DummyMessageWriter,
        binary_messages: List[BinaryMessage],
        prepared_builder: PipelineBuilder):
    prepared_builder.with_range(start=1, limit=1)
    pipeline = prepared_builder.build()

    assert isinstance(pipeline, Pipeline)
    pipeline.run_pipeline()
    assert len(dummy_message_writer.get_written_messages()) == 1
    assert dummy_message_writer.get_written_messages()[0] in [
        msg for msg in binary_messages if msg.offset >= 1
    ]

예제 #3

파일 보기

파일: consume.py 프로젝트: real-digital/esque

def consume(
    state: State,
    topic: str,
    from_context: str,
    number: Optional[int],
    match: str,
    last: bool,
    avro: bool,
    binary: bool,
    directory: str,
    consumergroup: str,
    preserve_order: bool,
    write_to_stdout: bool,
    pretty_print: bool,
):
    """Consume messages from a topic.

    Read messages from a given topic in a given context. These messages can either be written
    to files in an automatically generated directory (default behavior), or to STDOUT.

    If writing to STDOUT, then data will be represented as a JSON object with the message key and the message value
    always being a string.
    With the --avro option, those strings are JSON serialized objects.
    With the --binary option those strings contain the base64 encoded binary data.
    Without any of the two options, the data in the messages is treated utf-8 encoded strings and will be used as-is.

    \b
    EXAMPLES:
    # Consume the first 10 messages from TOPIC in the current context and print them to STDOUT in order.
    esque consume --first -n 10 --preserve-order --pretty-print --stdout TOPIC

    \b
    # Consume <n> messages, starting from the 10th, from TOPIC in the <source_ctx> context and write them to files.
    esque consume --match "message.offset > 9" -n <n> TOPIC -f <source_ctx>

    \b
    # Extract json objects from keys
    esque consume --stdout --avro TOPIC | jq '.key | fromjson'

    \b
    # Extract binary data from keys (depending on the data this could mess up your console)
    esque consume --stdout --binary TOPIC | jq '.key | @base64d'
    """
    if not from_context:
        from_context = state.config.current_context
    state.config.context_switch(from_context)

    if not write_to_stdout and not directory:
        directory = Path() / "messages" / topic / datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    if binary and avro:
        raise ValueError("Cannot set data to be interpreted as binary AND avro.")

    builder = PipelineBuilder()

    input_message_serializer = create_input_serializer(avro, binary, state)
    builder.with_input_message_serializer(input_message_serializer)

    input_handler = create_input_handler(consumergroup, from_context, topic)
    builder.with_input_handler(input_handler)

    output_handler = create_output_handler(directory, write_to_stdout, binary, pretty_print)
    builder.with_output_handler(output_handler)

    output_message_serializer = create_output_message_serializer(write_to_stdout, directory, avro, binary)
    builder.with_output_message_serializer(output_message_serializer)

    if last:
        start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE
    else:
        start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE

    builder.with_range(start=start, limit=number)

    if preserve_order:
        topic_data = Cluster().topic_controller.get_cluster_topic(topic, retrieve_partition_watermarks=False)
        builder.with_stream_decorator(yield_messages_sorted_by_timestamp(len(topic_data.partitions)))

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    if not write_to_stdout:
        if counter.message_count == number:
            click.echo(blue_bold(str(counter.message_count)) + " messages consumed.")
        else:
            click.echo(
                "Only found "
                + bold(str(counter.message_count))
                + " messages in topic, out of "
                + blue_bold(str(number))
                + " required."
            )

예제 #4

파일 보기

파일: transfer.py 프로젝트: real-digital/esque

def transfer(
    state: State,
    from_topic: str,
    to_topic: str,
    from_context: str,
    to_context: str,
    number: int,
    last: bool,
    avro: bool,
    binary: bool,
    consumergroup: str,
    match: str = None,
):
    """Transfer messages between two topics.

    Read messages from the source topic in the source context and write them into the destination topic in the destination context.
    This function is shorthand for using a combination of `esque consume` and `esque produce`

    \b
    EXAMPLES:
    # Transfer the first 10 messages from TOPIC1 in the current context to TOPIC2 in context DSTCTX.
    esque transfer --first -n 10 --from-topic TOPIC1 --to-topic TOPIC2 --to-context DSTCTX

    \b
    # Transfer the first 10 messages from TOPIC1 in the context SRCCTX to TOPIC2 in context DSTCTX, assuming the messages are AVRO.
    esque transfer --first -n 10 --avro --from-topic TOPIC1 --from-context SRCCTX --to-topic TOPIC2 --to-context DSTCTX
    """
    if not from_context:
        from_context = state.config.current_context
    state.config.context_switch(from_context)

    if binary and avro:
        raise ValueError("Cannot set data to be interpreted as binary AND avro.")

    if not to_context:
        to_context = from_context

    if from_context == to_context and from_topic == to_topic:
        raise ValueError("Cannot transfer data to the same topic.")

    topic_controller = Cluster().topic_controller
    if not topic_controller.topic_exists(to_topic):
        if ensure_approval(f"Topic {to_topic!r} does not exist, do you want to create it?", no_verify=state.no_verify):
            topic_controller.create_topics([Topic(to_topic)])
        else:
            click.echo(click.style("Aborted!", bg="red"))
            return

    builder = PipelineBuilder()

    input_message_serializer = create_input_serializer(avro, binary, state)
    builder.with_input_message_serializer(input_message_serializer)

    input_handler = create_input_handler(consumergroup, from_context, from_topic)
    builder.with_input_handler(input_handler)

    output_message_serializer = create_output_serializer(avro, binary, to_topic, state)
    builder.with_output_message_serializer(output_message_serializer)

    output_handler = create_output_handler(to_context, to_topic)
    builder.with_output_handler(output_handler)

    if last:
        start = KafkaHandler.OFFSET_AFTER_LAST_MESSAGE
    else:
        start = KafkaHandler.OFFSET_AT_FIRST_MESSAGE

    builder.with_range(start=start, limit=number)

    if match:
        builder.with_stream_decorator(yield_only_matching_messages(match))

    counter, counter_decorator = event_counter()

    builder.with_stream_decorator(counter_decorator)

    pipeline = builder.build()
    pipeline.run_pipeline()

    click.echo(
        green_bold(str(counter.message_count))
        + " messages consumed from topic "
        + blue_bold(from_topic)
        + " in context "
        + blue_bold(to_context)
        + " and produced to topic "
        + blue_bold(to_topic)
        + " in context "
        + blue_bold(to_context)
        + "."
    )