Exemple #1
0
def test_klio_filter_force(global_force, mock_config):
    mock_config.job_config.data.outputs[0].force = global_force

    kmsg1 = klio_pb2.KlioMessage()
    kmsg1.metadata.force = True
    kmsg2 = klio_pb2.KlioMessage()
    kmsg2.metadata.force = False
    pcoll = [kmsg1.SerializeToString(), kmsg2.SerializeToString()]

    with test_pipeline.TestPipeline() as p:
        p | beam.Create(pcoll) | helpers.KlioFilterForce()

    actual_counters = p.result.metrics().query()["counters"]
    force_ctr = actual_counters[0]
    assert "KlioFilterForce" == force_ctr.key.metric.namespace
    assert "kmsg-process-force" == force_ctr.key.metric.name

    if global_force:
        assert 1 == len(actual_counters)
        assert 2 == force_ctr.committed
    else:
        assert 2 == len(actual_counters)
        skip_ctr = actual_counters[1]
        assert 1 == force_ctr.committed
        assert 1 == skip_ctr.committed
        assert "KlioFilterForce" == skip_ctr.key.metric.namespace
        assert "kmsg-skip-force" == skip_ctr.key.metric.name
def _assert_expected_msg(actual):
    actual_msg = klio_pb2.KlioMessage()
    actual_msg.ParseFromString(actual)

    expected_msg = klio_pb2.KlioMessage()
    expected_msg.data.element = b"2"
    assert actual_msg == expected_msg
Exemple #3
0
def test_klio_filter_ping(global_ping, mock_config):
    mock_config.job_config.data.inputs[0].ping = global_ping

    kmsg1 = klio_pb2.KlioMessage()
    kmsg1.metadata.ping = True
    kmsg2 = klio_pb2.KlioMessage()
    kmsg2.metadata.ping = False
    pcoll = [kmsg1.SerializeToString(), kmsg2.SerializeToString()]

    with test_pipeline.TestPipeline() as p:
        p | beam.Create(pcoll) | helpers.KlioFilterPing()

    actual_counters = p.result.metrics().query()["counters"]
    pass_thru_ctr = actual_counters[0]
    assert "KlioFilterPing" == pass_thru_ctr.key.metric.namespace
    assert "kmsg-skip-ping" == pass_thru_ctr.key.metric.name

    if global_ping:
        assert 1 == len(actual_counters)
        assert 2 == pass_thru_ctr.committed
    else:
        assert 2 == len(actual_counters)
        process_ctr = actual_counters[1]
        assert 1 == pass_thru_ctr.committed
        assert 1 == process_ctr.committed
        assert "KlioFilterPing" == process_ctr.key.metric.namespace
        assert "kmsg-process-ping" == process_ctr.key.metric.name
Exemple #4
0
def test_trigger_upstream_job(mock_config, mocker, caplog):
    mock_gcs_client = mocker.patch("klio.transforms._helpers.gcsio.GcsIO")
    mock_gcs_client.return_value.exists.return_value = False
    mock_pubsub_client = mocker.patch("google.cloud.pubsub.PublisherClient")

    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = b"does_not_exist"

    exp_current_job = klio_pb2.KlioJob()
    exp_current_job.job_name = "a-job"
    exp_current_job.gcp_project = "not-a-real-project"
    exp_upstream_job = klio_pb2.KlioJob()
    exp_upstream_job.job_name = "upstream-job"
    exp_upstream_job.gcp_project = "upstream-project"
    exp_kmsg = klio_pb2.KlioMessage()
    exp_kmsg.version = klio_pb2.Version.V2
    exp_kmsg.data.element = b"does_not_exist"
    exp_lmtd = exp_kmsg.metadata.intended_recipients.limited
    exp_lmtd.recipients.extend([exp_upstream_job, exp_current_job])
    exp_lmtd.trigger_children_of.CopyFrom(exp_current_job)

    options = pipeline_options.PipelineOptions([])
    options.view_as(pipeline_options.StandardOptions).streaming = True

    with test_pipeline.TestPipeline(options=options) as p:
        in_pcol = p | beam.Create([kmsg.SerializeToString()])
        input_data = in_pcol | helpers.KlioGcsCheckInputExists()

        _ = input_data.not_found | helpers.KlioTriggerUpstream(
            upstream_job_name="upstream-job",
            upstream_topic="projects/upstream-project/topics/does-not-exist",
        )

    mock_gcs_client.return_value.exists.assert_called_once_with(
        "gs://hopefully-this-bucket-doesnt-exist/does_not_exist")
    mock_pubsub_client.return_value.publish.assert_called_once_with(
        mock_pubsub_client.return_value.topic_path.return_value,
        exp_kmsg.SerializeToString(),
    )

    actual_counters = p.result.metrics().query()["counters"]
    assert 2 == len(actual_counters)

    data_not_found_ctr = actual_counters[0]
    trigger_upstream_ctr = actual_counters[1]
    assert 1 == data_not_found_ctr.committed
    assert "KlioGcsCheckInputExists" == data_not_found_ctr.key.metric.namespace
    assert "kmsg-data-not-found-input" == data_not_found_ctr.key.metric.name
    assert 1 == trigger_upstream_ctr.committed
    assert "KlioTriggerUpstream" == trigger_upstream_ctr.key.metric.namespace
    assert "kmsg-trigger-upstream" == trigger_upstream_ctr.key.metric.name

    expected_log_msg = "Triggering upstream upstream-job for does_not_exist"
    for record in caplog.records:
        if expected_log_msg in record.message:
            assert True
            break
    else:
        assert False, "Expected log message not found"
Exemple #5
0
def _generate_kmsg_with_payload(element):
    message = klio_pb2.KlioMessage()
    message.version = klio_pb2.Version.V2
    message.metadata.intended_recipients.anyone.SetInParent()
    message.data.element = bytes(str(element["entity_id"]), "utf-8")
    message.data.payload = bytes(json.dumps(element), "utf-8")
    return message.SerializeToString()
Exemple #6
0
def _dump_to_klio_message(key, payload):
    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = key
    out = io.BytesIO()
    np.save(out, payload)
    kmsg.data.payload = out.getvalue()
    return kmsg.SerializeToString()
Exemple #7
0
def test_read_messages_timestamp_attribute_rfc3339_success(
    mocker,
    patch_sub_client,
    patch_msg_manager,
):
    exp_entity_id = "entity_id"
    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = bytes(exp_entity_id, "utf-8")
    data = kmsg.SerializeToString()
    attributes = {"time": "2018-03-12T13:37:01.234567Z"}
    publish_time_secs = 1337000000
    publish_time_nanos = 133700000
    ack_id = "ack_id"
    pull_response = beam_test_utils.create_pull_response([
        beam_test_utils.PullResponseMessage(data, attributes,
                                            publish_time_secs,
                                            publish_time_nanos, ack_id)
    ])
    pmsg = b_pubsub.PubsubMessage(data, attributes)
    expected_elements = [
        beam_testing_util.TestWindowedValue(
            pmsg,
            beam_utils.timestamp.Timestamp.from_rfc3339(attributes["time"]),
            [beam_transforms.window.GlobalWindow()],
        ),
    ]
    patch_sub_client.pull.return_value = pull_response

    options = pipeline_options.PipelineOptions([])
    options.view_as(pipeline_options.StandardOptions).streaming = True
    with beam_test_pipeline.TestPipeline(options=options) as p:
        pcoll = p | b_pubsub.ReadFromPubSub(
            "projects/fakeprj/topics/a_topic",
            None,
            None,
            with_attributes=True,
            timestamp_attribute="time",
        )
        # Check original functionality that was kept the same
        beam_testing_util.assert_that(
            pcoll,
            beam_testing_util.equal_to(expected_elements),
            reify_windows=True,
        )

    # Check overridden functionality:
    # 1. Check that auto-acking is skipped
    patch_sub_client.acknowledge.assert_not_called()
    # 2. Check that MessageManager daemon threads were started
    patch_msg_manager.assert_called_once_with(
        patch_sub_client.subscription_path())
    # 3. Check that messages were added to the MessageManager
    patch_msg_manager.return_value.add.assert_called_once_with(ack_id, pmsg)
    # 4. Check that one message is handled at a time, instead of the
    #    original 10
    patch_sub_client.pull.assert_called_once_with(mocker.ANY,
                                                  max_messages=1,
                                                  return_immediately=True)

    patch_sub_client.api.transport.channel.close.assert_called_once_with()
Exemple #8
0
def test_klio_drop(mock_config, caplog):
    kmsg = klio_pb2.KlioMessage()

    with test_pipeline.TestPipeline() as p:
        p | beam.Create([kmsg.SerializeToString()]) | helpers.KlioDrop()

    # beam produces 50+ log messages so let's just iterate and find what
    # we're looking for *shrug*
    for rec in caplog.records:
        if "Dropping KlioMessage" in rec.message:
            assert True
            break
    else:
        assert False, "Expected log message not found"

    actual_counters = p.result.metrics().query()["counters"]
    assert 3 == len(actual_counters)
    received_ctr = actual_counters[0]
    drop_ctr = actual_counters[1]
    success_ctr = actual_counters[2]

    assert 1 == received_ctr.committed
    assert "KlioDrop.process" == received_ctr.key.metric.namespace
    assert "kmsg-received" == received_ctr.key.metric.name

    assert 1 == drop_ctr.committed
    assert "KlioDrop" == drop_ctr.key.metric.namespace
    assert "kmsg-drop" == drop_ctr.key.metric.name

    assert 1 == success_ctr.committed
    assert "KlioDrop.process" == success_ctr.key.metric.namespace
    assert "kmsg-success" == success_ctr.key.metric.name
Exemple #9
0
def assert_expected_klio_msg_from_avro_write(element):
    file_path_read = os.path.join(FIXTURE_PATH, "elements_text_file.txt")
    with open(file_path_read, "rb") as fr:
        expected_elements = fr.read().splitlines()
    message = klio_pb2.KlioMessage()
    message.ParseFromString(element)
    assert message.data.element in expected_elements
Exemple #10
0
def test_read_messages_timestamp_attribute_fail_parse(patch_sub_client):
    exp_entity_id = "entity_id"
    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = bytes(exp_entity_id, "utf-8")
    data = kmsg.SerializeToString()

    attributes = {"time": "1337 unparseable"}
    publish_time_secs = 1520861821
    publish_time_nanos = 234567000
    ack_id = "ack_id"
    pull_response = beam_test_utils.create_pull_response([
        beam_test_utils.PullResponseMessage(data, attributes,
                                            publish_time_secs,
                                            publish_time_nanos, ack_id)
    ])
    patch_sub_client.pull.return_value = pull_response

    options = pipeline_options.PipelineOptions([])
    options.view_as(pipeline_options.StandardOptions).streaming = True
    p = beam_test_pipeline.TestPipeline(options=options)
    _ = p | b_pubsub.ReadFromPubSub(
        "projects/fakeprj/topics/a_topic",
        None,
        None,
        with_attributes=True,
        timestamp_attribute="time",
    )
    with pytest.raises(ValueError, match=r"parse"):
        p.run()

    patch_sub_client.acknowledge.assert_not_called()
    patch_sub_client.api.transport.channel.close.assert_called_with()
 def _convert_raw_pubsub_message(ack_id, pmessage):
     # TODO: either use klio.message.serializer.to_klio_message, or
     # figure out how to handle when a parsed_message can't be parsed
     # into a KlioMessage (will need to somehow get the klio context)
     kmsg = klio_pb2.KlioMessage()
     kmsg.ParseFromString(pmessage.data)
     entity_id = kmsg.data.element.decode("utf-8")
     psk_msg = PubSubKlioMessage(ack_id, entity_id)
     return psk_msg
Exemple #12
0
 def read_records(self, file_name, range_tracker):
     records = super(_KlioFastAvroSource, self).read_records(
         file_name=file_name, range_tracker=range_tracker
     )
     for record in records:
         message = klio_pb2.KlioMessage()
         message.version = klio_pb2.Version.V2
         message.metadata.intended_recipients.anyone.SetInParent()
         message.data.element = bytes(json.dumps(record).encode("utf-8"))
         yield message.SerializeToString()
Exemple #13
0
    def read_records(self, file_name, range_tracker):
        records = super(_KlioReadFromTextSource,
                        self).read_records(file_name, range_tracker)

        for record in records:
            record_as_bytes = record.encode("utf-8")
            message = klio_pb2.KlioMessage()
            message.version = klio_pb2.Version.V2
            message.metadata.intended_recipients.anyone.SetInParent()
            message.data.element = record_as_bytes
            yield message.SerializeToString()
Exemple #14
0
 def write_record(self, file_handle, encoded_element):
     """Writes a single encoded record.
     Args:
         file_handle (str): a referential identifier that points to an
             audio file found in the configured output data location.
         encoded_element (KlioMessage): KlioMessage
     """
     message = klio_pb2.KlioMessage()
     message.ParseFromString(encoded_element)
     record = message.data.element
     super(_KlioTextSink, self).write_encoded_record(file_handle, record)
Exemple #15
0
def subtract_filter_from_full(key_pair):
    key, pair_data = key_pair
    full = _unpickle_from_klio_message(pair_data["full"][0])
    nn_filter = _unpickle_from_klio_message(pair_data["nnfilter"][0])

    net = full - nn_filter
    payload = pickle.dumps(net)
    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = key
    kmsg.data.payload = payload

    return (key, kmsg.SerializeToString())
Exemple #16
0
def test_klio_debug(mock_config):
    kmsg = klio_pb2.KlioMessage()

    with test_pipeline.TestPipeline() as p:
        p | beam.Create([kmsg.SerializeToString()
                         ]) | helpers.KlioDebugMessage()

    actual_counters = p.result.metrics().query()["counters"]
    assert 1 == len(actual_counters)
    assert 1 == actual_counters[0].committed
    assert "KlioDebugMessage" == actual_counters[0].key.metric.namespace
    assert "kmsg-debug" == actual_counters[0].key.metric.name
Exemple #17
0
def assert_audit(actual):
    job = klio_pb2.KlioJob()
    job.job_name = "a-job"
    job.gcp_project = "not-a-real-project"
    audit_log_item = klio_pb2.KlioJobAuditLogItem()
    audit_log_item.klio_job.CopyFrom(job)
    exp_msg = klio_pb2.KlioMessage()
    exp_msg.version = klio_pb2.Version.V2
    exp_msg.metadata.job_audit_log.extend([audit_log_item])
    expected = exp_msg.SerializeToString()

    assert expected == actual
    return actual
Exemple #18
0
def test_trigger_upstream_job(mock_config, mocker, capsys):
    mock_gcs_client = mocker.patch("klio.transforms._helpers.gcsio.GcsIO")
    mock_gcs_client.return_value.exists.return_value = False
    mock_pubsub_client = mocker.patch("google.cloud.pubsub.PublisherClient")

    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = b"does_not_exist"

    exp_current_job = klio_pb2.KlioJob()
    exp_current_job.job_name = "a-job"
    exp_current_job.gcp_project = "not-a-real-project"
    exp_upstream_job = klio_pb2.KlioJob()
    exp_upstream_job.job_name = "upstream-job"
    exp_upstream_job.gcp_project = "upstream-project"
    exp_kmsg = klio_pb2.KlioMessage()
    exp_kmsg.version = klio_pb2.Version.V2
    exp_kmsg.data.element = b"does_not_exist"
    exp_lmtd = exp_kmsg.metadata.intended_recipients.limited
    exp_lmtd.recipients.extend([exp_upstream_job, exp_current_job])
    exp_lmtd.trigger_children_of.CopyFrom(exp_current_job)

    options = pipeline_options.PipelineOptions([])
    options.view_as(pipeline_options.StandardOptions).streaming = True

    with test_pipeline.TestPipeline(options=options) as p:
        in_pcol = p | beam.Create([kmsg.SerializeToString()])
        input_data = in_pcol | helpers.KlioGcsCheckInputExists()

        _ = input_data.not_found | helpers.KlioTriggerUpstream(
            upstream_job_name="upstream-job",
            upstream_topic="projects/upstream-project/topics/does-not-exist",
        )

    mock_gcs_client.return_value.exists.assert_called_once_with(
        "gs://hopefully-this-bucket-doesnt-exist/does_not_exist")
    mock_pubsub_client.return_value.publish.assert_called_once_with(
        mock_pubsub_client.return_value.topic_path.return_value,
        exp_kmsg.SerializeToString(),
    )
Exemple #19
0
def to_klio_message(incoming_message, kconfig=None, logger=None):
    """Serialize ``bytes`` to a :ref:`KlioMessage <klio-message>`.

    .. tip::

        Set ``job_config.allow_non_klio_messages`` to ``True`` in
        ``klio-job.yaml`` in order to process non-``KlioMessages`` as
        regular ``bytes``. This function will create a new ``KlioMessage``
        and set the incoming ``bytes`` to ``KlioMessage.data.element``.

    Args:
        incoming_message (bytes): Incoming bytes to parse into a \
            ``KlioMessage``.
        kconfig (klio_core.config.KlioConfig): the current job's
            configuration.
        logger (logging.Logger): the logger associated with the Klio
            job.
    Returns:
        klio_core.proto.klio_pb2.KlioMessage: a ``KlioMessage``.
    Raises:
        klio_core.proto.klio_pb2._message.DecodeError: incoming message
            can not be parsed into a ``KlioMessage`` and
            ``job_config.allow_non_klio_messages`` in ``klio-job.yaml``
            is set to ``False``.
    """
    # TODO: when making a generic de/ser func, be sure to assert
    # kconfig and logger exists
    parsed_message = klio_pb2.KlioMessage()

    try:
        parsed_message.ParseFromString(incoming_message)

    except klio_pb2._message.DecodeError as e:
        if kconfig.job_config.allow_non_klio_messages:
            # We are assuming that we have been given "raw" data that is not in
            # the form of a serialized KlioMessage.
            parsed_message.data.element = incoming_message
            # default to set recipients to anyone - can't know who the
            # appropriate recipient is when it's not a real klio msg
            parsed_message.metadata.intended_recipients.anyone.SetInParent()
            parsed_message.version = klio_pb2.Version.V2
        else:
            logger.error(
                "Can not parse incoming message. To support non-Klio "
                "messages, add `job_config.allow_non_klio_messages = true` "
                "in the job's `klio-job.yaml` file."
            )
            raise e

    parsed_message = _handle_msg_compat(parsed_message)
    return parsed_message
Exemple #20
0
def subtract_filter_from_full(key_pair):
    # key_pair looks like
    # (element, {"full": [<serialized numpy array>], "nnfilter": [<serialized numpy array>]})
    key, pair_data = key_pair
    full = _load_from_msg(pair_data["full"][0])
    nn_filter = _load_from_msg(pair_data["nnfilter"][0])

    net = full - nn_filter
    payload = pickle.dumps(net)
    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = key
    kmsg.data.payload = payload

    return (key, kmsg.SerializeToString())
    def mark_done(kmsg_or_bytes):
        """Mark a KlioMessage as done and to be removed from handling.

        This method just sets the PubSubKlioMessage.event object where then
        in the next iteration in `MessageManager.manage`, it is then
        acknowledged and removed from further "babysitting".

        Args:
            kmsg_or_bytes (klio_pb2.KlioMessage or bytes): the KlioMessage
                (or a KlioMessage that has been serialzied to bytes) to be
                marked as done.
        """
        kmsg = kmsg_or_bytes
        mm_logger = logging.getLogger("klio.gke_direct_runner.message_manager")

        # Wrap in a general try/except to make sure this method returns cleanly,
        # aka no raised errors that may prevent the pipeline from consuming
        # the next message available. Not sure if this causes problems
        # of being unable to pull a message, but at least it's for
        # sanity.
        try:
            # TODO: either use klio.message.serializer.to_klio_message, or
            # figure out how to handle when a parsed_message can't be parsed
            # into a KlioMessage (will need to somehow get the klio context).
            if not isinstance(kmsg_or_bytes, klio_pb2.KlioMessage):
                kmsg = klio_pb2.KlioMessage()
                kmsg.ParseFromString(kmsg_or_bytes)

            entity_id = kmsg.data.element.decode("utf-8")

            # This call to remove the message from the dict ENTITY_ID_TO_ACK_ID
            # will tell the MessageManager that this message is now ready to
            # be acknowledged and no longer being worked upon.
            with MESSAGE_LOCK:
                msg = ENTITY_ID_TO_ACK_ID.pop(entity_id, None)

            if not msg:
                # NOTE: this logger exists as `self.mgr_logger`, but this method
                # needs to be a staticmethod so we don't need to unnecessarily
                # init the class in order to just mark a message as done.
                mm_logger.warn(
                    f"Unable to acknowledge {entity_id}: Not found.")
        except Exception as e:
            # Catch all Exceptions so that the pipeline doesn't enter into
            # a weird state because of an uncaught error.
            mm_logger.warning(
                f"Error occurred while trying to remove message {kmsg}: {e}",
                exc_info=True,
            )
def test_convert_raw_pubsub_message(mocker, monkeypatch, msg_manager):
    mock_event = mocker.Mock()
    monkeypatch.setattr(pmm.threading, "Event", mock_event)
    exp_message = pmm.PubSubKlioMessage("ack_id1", "kmsg_id1")

    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = b"kmsg_id1"
    kmsg_bytes = kmsg.SerializeToString()
    pmsg = beam_pubsub.PubsubMessage(data=kmsg_bytes, attributes={})

    act_message = msg_manager._convert_raw_pubsub_message("ack_id1", pmsg)
    # comparing class attributes (via __dict__) since we'd need to implement
    # __eq__ on the PubSubKlioMessage class, but doing so would make it un-
    # hashable. Which can be addressed, but this just seems easier for now.
    assert _compare_objects_dicts(exp_message, act_message)
Exemple #23
0
 def read_records(self, file_name, range_tracker):
     records = super(_KlioFastAvroSource,
                     self).read_records(file_name=file_name,
                                        range_tracker=range_tracker)
     for record in records:
         message = klio_pb2.KlioMessage()
         message.version = klio_pb2.Version.V2
         message.metadata.intended_recipients.anyone.SetInParent()
         # If an element is sent then we set the element
         # to handle event reading
         # If "element" is not present then we stuff the record
         # into the message element
         message.data.element = (record["element"]
                                 if "element" in record else bytes(
                                     json.dumps(record).encode("utf-8")))
         yield message.SerializeToString()
Exemple #24
0
def test_process(klio_msg, expected_log_messages, caplog):
    helloklio_fn = transforms.LogKlioMessage()
    output = helloklio_fn.process(klio_msg.SerializeToString())

    row = {
        "entity_id": klio_msg.data.element.decode("utf-8"),
        "value": klio_msg.data.element.decode("utf-8")
    }
    expected_kmsg = klio_pb2.KlioMessage()
    expected_kmsg.data.element = klio_msg.data.element
    expected_kmsg.data.payload = bytes(json.dumps(row), "utf-8")
    expected_kmsg.version = klio_pb2.Version.V2

    assert expected_kmsg.SerializeToString() == list(output)[0]

    for index, record in enumerate(caplog.records):
        assert "INFO" == record.levelname
        assert expected_log_messages[index] == record.message
Exemple #25
0
    def _generate_klio_message(self):
        message = klio_pb2.KlioMessage()
        message.version = klio_pb2.Version.V2
        message.metadata.intended_recipients.anyone.SetInParent()

        # TODO: this is where we should add (relevant) KlioMessage.metadata;
        # (1) One thing to figure out is the klio_pb2.KlioJob definition,
        # particularly the JobInput definition, in light of KlioConfig v2.
        # Once that's figured out, we should at least populate the
        # job audit log.
        # (2) Another thing to figure out is force/ping. In streaming, messages
        # are individually marked as force or ping when needed. However,
        # users aren't able to tag individual messages generated from a row
        # of BQ data as force/ping, and it's probably very difficult for us
        # to provide a way to do that. So, should we allow users to at least
        # globally set force/ping on their event input config in klio-job.yaml?
        # Potentially.
        return message
Exemple #26
0
def _expected_avro_kmsgs():
    expected_records = [
        {
            "username": "******",
            "tweet": "Rock: Nerf paper, scissors is fine.",
            "timestamp": 1366150681,
        },
        {
            "username": "******",
            "tweet": "Works as intended.  Terran is IMBA.",
            "timestamp": 1366154481,
        },
    ]
    expected_kmsgs = []
    for record in expected_records:
        message = klio_pb2.KlioMessage()
        message.version = klio_pb2.Version.V2
        message.metadata.intended_recipients.anyone.SetInParent()
        message.data.element = bytes(json.dumps(record).encode("utf-8"))
        expected_kmsgs.append(message)
    return expected_kmsgs
Exemple #27
0
def test_update_klio_log(mocker, monkeypatch, caplog, mock_config):
    mock_ts = mocker.Mock()
    monkeypatch.setattr(klio_pb2.KlioJobAuditLogItem, "timestamp", mock_ts)

    kmsg = klio_pb2.KlioMessage()
    kmsg.version = klio_pb2.Version.V2
    assert not kmsg.metadata.job_audit_log  # sanity check

    with test_pipeline.TestPipeline() as p:
        in_pcol = p | beam.Create([kmsg.SerializeToString()])
        act_pcol = in_pcol | helpers.KlioUpdateAuditLog()
        _ = act_pcol | beam.Map(assert_audit)

    exp_log = (
        "KlioMessage full audit log - Entity ID:  - Path: not-a-real-project::"
        "a-job (current job)")
    for rec in caplog.records:
        if exp_log in rec.message:
            assert True
            break
    else:
        assert False, "Expected debug audit log not found"
Exemple #28
0
def test_process(klio_msg, expected_log_messages, caplog):
    helloklio_fn = transforms.LogKlioMessage()
    output = helloklio_fn.process(klio_msg.SerializeToString())

    row = {
        "entity_id": klio_msg.data.element.decode("utf-8"),
        "value": klio_msg.data.element.decode("utf-8")
    }
    expected_kmsg = klio_pb2.KlioMessage()
    expected_kmsg.data.element = klio_msg.data.element
    expected_kmsg.data.payload = bytes(json.dumps(row), "utf-8")
    expected_kmsg.version = klio_pb2.Version.V2

    assert expected_kmsg.SerializeToString() == list(output)[0]

    # logs may not all be available yet since some may be on a different thread
    # so we'll wait a second
    time.sleep(1)
    assert len(caplog.records) == len(expected_log_messages)

    for index, record in enumerate(caplog.records):
        expected_log_message = expected_log_messages[index]
        assert expected_log_message["level"] == record.levelname
        assert expected_log_message["message"] in record.message
Exemple #29
0
def klio_msg():
    element = b"s0m3_tr4ck_1d"
    msg = klio_pb2.KlioMessage()
    msg.data.element = element
    msg.version = klio_pb2.Version.V2
    return msg
Exemple #30
0
def assert_expected_klio_msg_from_file(element):
    message = klio_pb2.KlioMessage()
    message.ParseFromString(element)
    assert message.data.element is not None
    assert isinstance(message.data.element, bytes)