def test_expand_deprecated(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/baz') | WriteStringsToPubSub('projects/fakeprj/topics/a_topic') | beam.Map(lambda x: x)) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. write_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly self.assertEqual('a_topic', write_transform.dofn.short_topic_name)
def test_expand_deprecated(self): options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/baz') | WriteStringsToPubSub('projects/fakeprj/topics/a_topic') | beam.Map(lambda x: x)) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. write_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly self.assertEqual('a_topic', write_transform.dofn.short_topic_name)
def test_expand_with_multiple_sources_and_other_options(self): options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) sources = [ 'projects/fakeprj/topics/a_topic', 'projects/fakeprj2/topics/b_topic', 'projects/fakeprj/subscriptions/a_subscription' ] id_labels = ['a_label_topic', 'b_label_topic', 'a_label_subscription'] timestamp_attributes = [ 'a_ta_topic', 'b_ta_topic', 'a_ta_subscription' ] pubsub_sources = [ PubSubSourceDescriptor(source=source, id_label=id_label, timestamp_attribute=timestamp_attribute) for source, id_label, timestamp_attribute in zip( sources, id_labels, timestamp_attributes) ] pcoll = (p | MultipleReadFromPubSub(pubsub_sources) | beam.Map(lambda x: x)) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(options) p.replace_all(overrides) self.assertEqual(bytes, pcoll.element_type) # Ensure that the sources are passed through correctly read_transforms = pcoll.producer.inputs[0].producer.inputs for i, read_transform in enumerate(read_transforms): id_label = id_labels[i] timestamp_attribute = timestamp_attributes[i] source = read_transform.producer.transform._source self.assertEqual(source.id_label, id_label) self.assertEqual(source.with_attributes, False) self.assertEqual(source.timestamp_attribute, timestamp_attribute)
def test_expand_with_other_options(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, 'a_label', with_attributes=True, timestamp_attribute='time') | beam.Map(lambda x: x)) self.assertEqual(PubsubMessage, pcoll.element_type) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. read_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly source = read_transform._source self.assertTrue(source.with_attributes) self.assertEqual('time', source.timestamp_attribute)
def test_expand_with_subscription(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub( None, 'projects/fakeprj/subscriptions/a_subscription', 'a_label', with_attributes=False, timestamp_attribute=None) | beam.Map(lambda x: x)) self.assertEqual(bytes, pcoll.element_type) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. read_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly source = read_transform._source self.assertEqual('a_subscription', source.subscription_name) self.assertEqual('a_label', source.id_label)
def test_expand_with_other_options(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, 'a_label', with_attributes=True, timestamp_attribute='time') | beam.Map(lambda x: x)) self.assertEqual(PubsubMessage, pcoll.element_type) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. read_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly source = read_transform._source self.assertTrue(source.with_attributes) self.assertEqual('time', source.timestamp_attribute)
def test_expand_with_subscription(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub( None, 'projects/fakeprj/subscriptions/a_subscription', 'a_label', with_attributes=False, timestamp_attribute=None) | beam.Map(lambda x: x)) self.assertEqual(bytes, pcoll.element_type) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. read_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly source = read_transform._source self.assertEqual('a_subscription', source.subscription_name) self.assertEqual('a_label', source.id_label)
def test_expand_with_topic(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadStringsFromPubSub('projects/fakeprj/topics/a_topic', None, 'a_label') | beam.Map(lambda x: x)) # Ensure that the output type is str. self.assertEqual(unicode, pcoll.element_type) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadStringsFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. read_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly source = read_transform._source self.assertEqual('a_topic', source.topic_name) self.assertEqual('a_label', source.id_label)
def test_expand(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/baz') | WriteToPubSub('projects/fakeprj/topics/a_topic', with_attributes=True) | beam.Map(lambda x: x)) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. write_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly self.assertEqual('a_topic', write_transform.dofn.short_topic_name) self.assertEqual(True, write_transform.dofn.with_attributes) # TODO(BEAM-4275): These properties aren't supported yet in direct runner. self.assertEqual(None, write_transform.dofn.id_label) self.assertEqual(None, write_transform.dofn.timestamp_attribute)
def test_expand(self): p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/baz') | WriteToPubSub('projects/fakeprj/topics/a_topic', with_attributes=True) | beam.Map(lambda x: x)) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(p.options) p.replace_all(overrides) # Note that the direct output of ReadFromPubSub will be replaced # by a PTransformOverride, so we use a no-op Map. write_transform = pcoll.producer.inputs[0].producer.transform # Ensure that the properties passed through correctly self.assertEqual('a_topic', write_transform.dofn.short_topic_name) self.assertEqual(True, write_transform.dofn.with_attributes) # TODO(BEAM-4275): These properties aren't supported yet in direct runner. self.assertEqual(None, write_transform.dofn.id_label) self.assertEqual(None, write_transform.dofn.timestamp_attribute)
def test_expand_with_multiple_sources_and_attributes(self): options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) topics = [ 'projects/fakeprj/topics/a_topic', 'projects/fakeprj2/topics/b_topic' ] subscriptions = ['projects/fakeprj/subscriptions/a_subscription'] pubsub_sources = [ PubSubSourceDescriptor(descriptor) for descriptor in topics + subscriptions ] pcoll = (p | MultipleReadFromPubSub(pubsub_sources, with_attributes=True) | beam.Map(lambda x: x)) # Apply the necessary PTransformOverrides. overrides = _get_transform_overrides(options) p.replace_all(overrides) self.assertEqual(PubsubMessage, pcoll.element_type) # Ensure that the sources are passed through correctly read_transforms = pcoll.producer.inputs[0].producer.inputs topics_list = [] subscription_list = [] for read_transform in read_transforms: source = read_transform.producer.transform._source if source.full_topic: topics_list.append(source.full_topic) else: subscription_list.append(source.full_subscription) self.assertEqual(topics_list, topics) self.assertEqual(subscription_list, subscriptions)
def run_pipeline(self, pipeline, options): """Execute the entire pipeline and returns an DirectPipelineResult.""" # Klio maintainer note: This code is the eact same logic in # direct_runner.BundleBasedDirectRunner.run_pipeline with the # following changes: # 1. Import statements that were originally inside this method # was moved to the top of this module. # 2. Import statements adjusted to import module and not objects # according to the google style guide. # 3. The functionalty we needed to override, which is invoking # our own TransformEvaluatorRegistry when instantiating the # Executor class (called out below). # If the TestStream I/O is used, use a mock test clock. class TestStreamUsageVisitor(beam_pipeline.PipelineVisitor): """Visitor determining whether a Pipeline uses a TestStream.""" def __init__(self): self.uses_test_stream = False def visit_transform(self, applied_ptransform): if isinstance(applied_ptransform.transform, test_stream.TestStream): self.uses_test_stream = True visitor = TestStreamUsageVisitor() pipeline.visit(visitor) clock = (beam_clock.TestClock() if visitor.uses_test_stream else beam_clock.RealClock()) # Performing configured PTransform overrides. pipeline.replace_all(direct_runner._get_transform_overrides(options)) _LOGGER.info("Running pipeline with Klio's GkeDirectRunner.") self.consumer_tracking_visitor = ctpv.ConsumerTrackingPipelineVisitor() pipeline.visit(self.consumer_tracking_visitor) bndl_factory = bundle_factory.BundleFactory(stacked=options.view_as( pipeline_options.DirectOptions).direct_runner_use_stacked_bundle) evaluation_context = eval_ctx.EvaluationContext( options, bndl_factory, self.consumer_tracking_visitor.root_transforms, self.consumer_tracking_visitor.value_to_consumers, self.consumer_tracking_visitor.step_names, self.consumer_tracking_visitor.views, clock, ) # Klio maintainer note: this is where the change in logic is: # using our own `KlioTransformEvaluatorRegistry`. executor = beam_exec.Executor( self.consumer_tracking_visitor.value_to_consumers, evaluators.KlioTransformEvaluatorRegistry(evaluation_context), evaluation_context, ) # DirectRunner does not support injecting # PipelineOptions values at runtime value_provider.RuntimeValueProvider.set_runtime_options({}) # Start the executor. This is a non-blocking call, it will start the # execution in background threads and return. executor.start(self.consumer_tracking_visitor.root_transforms) result = direct_runner.DirectPipelineResult(executor, evaluation_context) return result