def _filter_intended_recipients(self, in_pcol, label_pfx=None): pfx = "" if label_pfx is not None: pfx = "[{}] ".format(label_pfx) def lbl(label): return "{}{}".format(pfx, label) # TODO: this "tagging by version then processing each version # differently" should only be temporary and removed once v2 # migration is done version_lbl = lbl("Tag Message Versions") msg_version = in_pcol | version_lbl >> helpers._KlioTagMessageVersion() # tag each v1 message as 'process' or to 'drop' depending on if this # job should actually be handling the received message. v1_proc_lbl = lbl("Should Process v1 Message") v1_to_process = (msg_version.v1 | v1_proc_lbl >> helpers._KlioV1CheckRecipients()) v2_proc_lbl = lbl("Should Process v2 Message") v2_to_process = (msg_version.v2 | v2_proc_lbl >> helpers.KlioCheckRecipients()) flatten_ign_lbl = lbl("Flatten to Drop Messages to Ignore") to_drop_flatten = (v1_to_process.drop, v2_to_process.drop) to_drop = to_drop_flatten | flatten_ign_lbl >> beam.Flatten() ignore_lbl = lbl("Drop Messages to Ignore") _ = to_drop | ignore_lbl >> helpers.KlioDrop() flatten_proc_lbl = lbl("Flatten to Process Intended Messages") to_process_flatten = (v1_to_process.process, v2_to_process.process) to_process = to_process_flatten | flatten_proc_lbl >> beam.Flatten() return to_process
def test_bottom_up_drop(self): pcoll = [ self.input_v0_kmsg_drop().SerializeToString(), self.input_v2_kmsg_drop().SerializeToString(), ] exp_pcoll = self.exp_kmsg_drop() with test_pipeline.TestPipeline() as p: in_pcol = p | beam.Create(pcoll) msg_version = in_pcol | helpers._KlioTagMessageVersion() should_process = msg_version.v2 | helpers.KlioCheckRecipients() _ = should_process.drop | "Assert expected dropped" >> beam.Map( self.assert_actual_is_expected, expected=exp_pcoll) _ = (should_process.process | "Assert no processed msgs" >> beam.Map( self.assert_not_processed)) _ = msg_version.v1 | "Assert no v1 msgs" >> beam.Map( self.assert_not_processed) actual_counters = p.result.metrics().query()["counters"] assert 1 == len(actual_counters) drop_counter = actual_counters[0] assert len(pcoll) == drop_counter.committed assert "KlioCheckRecipients" == drop_counter.key.metric.namespace assert "kmsg-drop-not-recipient" == drop_counter.key.metric.name
def test_bottom_up_to_top_down(self): pcoll = [ self.in_v0_kmsg_trigger_children_of().SerializeToString(), self.in_v2_kmsg_trigger_children_of().SerializeToString(), ] exp_pcoll = self.exp_kmsg_trigger_children_of() with test_pipeline.TestPipeline() as p: in_pcol = p | beam.Create(pcoll) msg_version = in_pcol | helpers._KlioTagMessageVersion() should_process = msg_version.v2 | helpers.KlioCheckRecipients() _ = (should_process.process | "Assert expected processed" >> beam.Map( self.assert_actual_is_expected, expected=exp_pcoll)) _ = should_process.drop | "Assert no dropped msgs" >> beam.Map( self.assert_not_processed) _ = msg_version.v1 | "Assert no v1 msgs" >> beam.Map( self.assert_not_processed)
def _filter_intended_recipients(self, in_pcol, label_pfx=None): pfx = "" if label_pfx is not None: pfx = "[{}] ".format(label_pfx) def lbl(label): return "{}{}".format(pfx, label) # TODO: this "tagging by version then processing each version # differently" should only be temporary and removed once v2 # migration is done version_lbl = lbl("Tag Message Versions") msg_version = in_pcol | version_lbl >> helpers._KlioTagMessageVersion() # tag each v1 message as 'process' or to 'drop' depending on if this # job should actually be handling the received message. v1_proc_lbl = lbl("Should Process v1 Message") v1_to_process = (msg_version.v1 | v1_proc_lbl >> helpers._KlioV1CheckRecipients()) v2_proc_lbl = lbl("Should Process v2 Message") v2_to_process = (msg_version.v2 | v2_proc_lbl >> helpers.KlioCheckRecipients()) flatten_ign_lbl = lbl("Flatten to Drop Messages to Ignore") to_drop_flatten = (v1_to_process.drop, v2_to_process.drop) to_drop = to_drop_flatten | flatten_ign_lbl >> beam.Flatten() # TODO: update me to `var.KlioRunner.DIRECT_GKE_RUNNER` once # direct_on_gke_runner_clean is merged if self.config.pipeline_options.runner == "DirectGKERunner": ack_inp_lbl = lbl("Ack Dropped Input Message") _ = to_drop | ack_inp_lbl >> beam.ParDo( helpers.KlioAckInputMessage()) ignore_lbl = lbl("Drop Messages to Ignore") _ = to_drop | ignore_lbl >> helpers.KlioDrop() flatten_proc_lbl = lbl("Flatten to Process Intended Messages") to_process_flatten = (v1_to_process.process, v2_to_process.process) to_process = to_process_flatten | flatten_proc_lbl >> beam.Flatten() return to_process
def test_bottom_up(self): pcoll = [ self.input_v0_kmsg_limited().SerializeToString(), self.input_v2_kmsg_limited().SerializeToString(), ] exp_pcoll = self.exp_kmsg_limited() with test_pipeline.TestPipeline() as p: in_pcol = p | beam.Create(pcoll) msg_version = in_pcol | helpers._KlioTagMessageVersion() should_process = msg_version.v2 | helpers.KlioCheckRecipients() _ = (should_process.process | "Assert expected processed" >> beam.Map( self.assert_actual_is_expected, expected=exp_pcoll)) _ = should_process.drop | "Assert no dropped msgs" >> beam.Map( self.assert_not_processed) _ = msg_version.v1 | "Assert no v1 msgs" >> beam.Map( self.assert_not_processed) assert 0 == len(p.result.metrics().query()["counters"])