Ejemplo n.º 1
0
def test_from_klio_message_raises(klio_message):
    payload = {"no": "bytes casting"}

    with pytest.raises(
        exceptions.KlioMessagePayloadException, match="Returned payload"
    ):
        serializer.from_klio_message(klio_message, payload)
Ejemplo n.º 2
0
    def update_kmsg_metadata(self, raw_kmsg):
        """Update KlioMessage to enable partial bottom-up execution.

        Args:
            raw_kmsg (bytes): Unserialized KlioMessage
        Returns:
            bytes: KlioMessage deserialized to ``bytes`` with updated intended
                recipients metadata.
        """
        # Use `serializer.to_klio_message` instead of @handle_klio in order to
        # get the full KlioMessage object (not just the data).
        kmsg = serializer.to_klio_message(
            raw_kmsg, kconfig=self._klio.config, logger=self._klio.logger
        )

        # Make sure upstream job doesn't skip the message
        upstream_job = self._generate_upstream_job_object()
        lmtd = kmsg.metadata.intended_recipients.limited
        lmtd.recipients.extend([upstream_job])

        # Assign the current job to `trigger_children_of` so that top-down
        # execution resumes after this job is done.
        current_job = self._generate_current_job_object()
        lmtd.recipients.extend([current_job])
        lmtd.trigger_children_of.CopyFrom(current_job)
        return serializer.from_klio_message(kmsg)
Ejemplo n.º 3
0
def test_from_klio_message(klio_message, payload, exp_payload):
    expected = _get_klio_message()
    if exp_payload:
        expected.data.payload = exp_payload

    expected_str = expected.SerializeToString()

    actual_message = serializer.from_klio_message(klio_message, payload)
    assert expected_str == actual_message
Ejemplo n.º 4
0
def test_from_klio_message_v1():
    payload = b"some-payload"
    msg = klio_pb2.KlioMessage()
    msg.version = klio_pb2.Version.V1
    msg.data.payload = payload

    expected_str = msg.SerializeToString()

    actual_message = serializer.from_klio_message(msg, payload)
    assert expected_str == actual_message
Ejemplo n.º 5
0
def test_from_klio_message_tagged_output(klio_message):
    payload = b"some payload"
    expected_msg = _get_klio_message()
    expected_msg.data.payload = payload

    expected = pvalue.TaggedOutput("a-tag", expected_msg.SerializeToString())

    tagged_payload = pvalue.TaggedOutput("a-tag", payload)
    actual_message = serializer.from_klio_message(klio_message, tagged_payload)

    # can't compare expected vs actual directly since pvalue.TaggedOutput
    # hasn't implemented the comparison operators
    assert expected.tag == actual_message.tag
    assert expected.value == actual_message.value
Ejemplo n.º 6
0
def __from_klio_message_generator(self, kmsg, payload, orig_item):
    try:
        yield serializer.from_klio_message(kmsg, payload)

    except Exception as err:
        self._klio.logger.error(_ERROR_MSG_KMSG_TO_BYTES.format(kmsg, err),
                                exc_info=True)
        # Since the yielded value in the `try` clause may not tagged, that
        # one will be used by default by whatever executed this function,
        # and anything that has a tagged output value (like this dropped one)
        # will just be ignored, which is fine for dropped values.
        # But if the caller function wanted to, they could access this via
        # pcoll.drop.
        # We won't try to serialize kmsg to bytes since something already
        # went wrong.
        yield pvalue.TaggedOutput("drop", orig_item)
        # explicitly return so that Beam doesn't call `next` and
        # executes the next `yield`
        return
Ejemplo n.º 7
0
def __serialize_klio_message(metrics, ctx, func, incoming_item, *args,
                             **kwargs):
    metrics.received.inc()
    # manipulate `ctx` to handle both methods and functions depending on
    # what we're wrapping. Functions just have `ctx` object, but methods
    # have `self._klio` as its context, and we also need access to `self`
    # in order to call the method
    _self = ctx
    if not isinstance(ctx, core.KlioContext):
        ctx = _self._klio

    with metrics.timer:
        try:
            kmsg = serializer.to_klio_message(incoming_item, ctx.config,
                                              ctx.logger)
        except Exception as err:
            ctx.logger.error(
                _ERROR_MSG_KMSG_FROM_BYTES.format(incoming_item, err),
                exc_info=True,
            )
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(incoming_item, ctx)
            # Since the returned value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            return pvalue.TaggedOutput("drop", incoming_item)

        try:
            ret = func(_self, kmsg.data, *args, **kwargs)
            if isinstance(ret, types.GeneratorType):
                raise TypeError("can't pickle generator object: '{}'".format(
                    func.__name__))
        except TypeError:
            metrics.error.inc()
            # If we get here, we threw a type error because we found a generator
            # and those can't be pickled. But there's no need to do any special
            # error handling - this will contain enough info for the user so
            # we just re-raise
            raise

        except Exception as err:
            log_msg, exc_info = __get_user_error_message(
                err, func.__name__, kmsg)
            ctx.logger.error(log_msg, exc_info=exc_info)
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(kmsg, ctx)
            # Since the returned value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            # We won't try to serialize kmsg to bytes since something already
            # went wrong.
            return pvalue.TaggedOutput("drop", incoming_item)

        try:
            to_ret = serializer.from_klio_message(kmsg, ret)
            metrics.success.inc()
            return to_ret

        except Exception as err:
            ctx.logger.error(_ERROR_MSG_KMSG_TO_BYTES.format(kmsg, err),
                             exc_info=True)
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(kmsg, ctx)
            # Since the returned value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            # We won't try to serialize kmsg to bytes since something already
            # went wrong.
            return pvalue.TaggedOutput("drop", incoming_item)