Ejemplo n.º 1
0
  def test_expand_deprecated(self):
    p = TestPipeline()
    p.options.view_as(StandardOptions).streaming = True
    pcoll = (p
             | ReadFromPubSub('projects/fakeprj/topics/baz')
             | WriteStringsToPubSub('projects/fakeprj/topics/a_topic')
             | beam.Map(lambda x: x))

    # Apply the necessary PTransformOverrides.
    overrides = _get_transform_overrides(p.options)
    p.replace_all(overrides)

    # Note that the direct output of ReadFromPubSub will be replaced
    # by a PTransformOverride, so we use a no-op Map.
    write_transform = pcoll.producer.inputs[0].producer.transform

    # Ensure that the properties passed through correctly
    self.assertEqual('a_topic', write_transform.dofn.short_topic_name)
Ejemplo n.º 2
0
    def test_expand_deprecated(self):
        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        p = TestPipeline(options=options)
        pcoll = (p
                 | ReadFromPubSub('projects/fakeprj/topics/baz')
                 | WriteStringsToPubSub('projects/fakeprj/topics/a_topic')
                 | beam.Map(lambda x: x))

        # Apply the necessary PTransformOverrides.
        overrides = _get_transform_overrides(options)
        p.replace_all(overrides)

        # Note that the direct output of ReadFromPubSub will be replaced
        # by a PTransformOverride, so we use a no-op Map.
        write_transform = pcoll.producer.inputs[0].producer.transform

        # Ensure that the properties passed through correctly
        self.assertEqual('a_topic', write_transform.dofn.short_topic_name)
Ejemplo n.º 3
0
    def test_expand_with_multiple_sources_and_other_options(self):
        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        p = TestPipeline(options=options)
        sources = [
            'projects/fakeprj/topics/a_topic',
            'projects/fakeprj2/topics/b_topic',
            'projects/fakeprj/subscriptions/a_subscription'
        ]
        id_labels = ['a_label_topic', 'b_label_topic', 'a_label_subscription']
        timestamp_attributes = [
            'a_ta_topic', 'b_ta_topic', 'a_ta_subscription'
        ]

        pubsub_sources = [
            PubSubSourceDescriptor(source=source,
                                   id_label=id_label,
                                   timestamp_attribute=timestamp_attribute)
            for source, id_label, timestamp_attribute in zip(
                sources, id_labels, timestamp_attributes)
        ]

        pcoll = (p | MultipleReadFromPubSub(pubsub_sources)
                 | beam.Map(lambda x: x))

        # Apply the necessary PTransformOverrides.
        overrides = _get_transform_overrides(options)
        p.replace_all(overrides)

        self.assertEqual(bytes, pcoll.element_type)

        # Ensure that the sources are passed through correctly
        read_transforms = pcoll.producer.inputs[0].producer.inputs
        for i, read_transform in enumerate(read_transforms):
            id_label = id_labels[i]
            timestamp_attribute = timestamp_attributes[i]

            source = read_transform.producer.transform._source
            self.assertEqual(source.id_label, id_label)
            self.assertEqual(source.with_attributes, False)
            self.assertEqual(source.timestamp_attribute, timestamp_attribute)
Ejemplo n.º 4
0
  def test_expand_with_other_options(self):
    p = TestPipeline()
    p.options.view_as(StandardOptions).streaming = True
    pcoll = (p
             | ReadFromPubSub('projects/fakeprj/topics/a_topic',
                              None, 'a_label', with_attributes=True,
                              timestamp_attribute='time')
             | beam.Map(lambda x: x))
    self.assertEqual(PubsubMessage, pcoll.element_type)

    # Apply the necessary PTransformOverrides.
    overrides = _get_transform_overrides(p.options)
    p.replace_all(overrides)

    # Note that the direct output of ReadFromPubSub will be replaced
    # by a PTransformOverride, so we use a no-op Map.
    read_transform = pcoll.producer.inputs[0].producer.transform

    # Ensure that the properties passed through correctly
    source = read_transform._source
    self.assertTrue(source.with_attributes)
    self.assertEqual('time', source.timestamp_attribute)
Ejemplo n.º 5
0
  def test_expand_with_subscription(self):
    p = TestPipeline()
    p.options.view_as(StandardOptions).streaming = True
    pcoll = (p
             | ReadFromPubSub(
                 None, 'projects/fakeprj/subscriptions/a_subscription',
                 'a_label', with_attributes=False, timestamp_attribute=None)
             | beam.Map(lambda x: x))
    self.assertEqual(bytes, pcoll.element_type)

    # Apply the necessary PTransformOverrides.
    overrides = _get_transform_overrides(p.options)
    p.replace_all(overrides)

    # Note that the direct output of ReadFromPubSub will be replaced
    # by a PTransformOverride, so we use a no-op Map.
    read_transform = pcoll.producer.inputs[0].producer.transform

    # Ensure that the properties passed through correctly
    source = read_transform._source
    self.assertEqual('a_subscription', source.subscription_name)
    self.assertEqual('a_label', source.id_label)
Ejemplo n.º 6
0
  def test_expand_with_other_options(self):
    p = TestPipeline()
    p.options.view_as(StandardOptions).streaming = True
    pcoll = (p
             | ReadFromPubSub('projects/fakeprj/topics/a_topic',
                              None, 'a_label', with_attributes=True,
                              timestamp_attribute='time')
             | beam.Map(lambda x: x))
    self.assertEqual(PubsubMessage, pcoll.element_type)

    # Apply the necessary PTransformOverrides.
    overrides = _get_transform_overrides(p.options)
    p.replace_all(overrides)

    # Note that the direct output of ReadFromPubSub will be replaced
    # by a PTransformOverride, so we use a no-op Map.
    read_transform = pcoll.producer.inputs[0].producer.transform

    # Ensure that the properties passed through correctly
    source = read_transform._source
    self.assertTrue(source.with_attributes)
    self.assertEqual('time', source.timestamp_attribute)
Ejemplo n.º 7
0
  def test_expand_with_subscription(self):
    p = TestPipeline()
    p.options.view_as(StandardOptions).streaming = True
    pcoll = (p
             | ReadFromPubSub(
                 None, 'projects/fakeprj/subscriptions/a_subscription',
                 'a_label', with_attributes=False, timestamp_attribute=None)
             | beam.Map(lambda x: x))
    self.assertEqual(bytes, pcoll.element_type)

    # Apply the necessary PTransformOverrides.
    overrides = _get_transform_overrides(p.options)
    p.replace_all(overrides)

    # Note that the direct output of ReadFromPubSub will be replaced
    # by a PTransformOverride, so we use a no-op Map.
    read_transform = pcoll.producer.inputs[0].producer.transform

    # Ensure that the properties passed through correctly
    source = read_transform._source
    self.assertEqual('a_subscription', source.subscription_name)
    self.assertEqual('a_label', source.id_label)
Ejemplo n.º 8
0
    def test_expand_with_topic(self):
        p = TestPipeline()
        p.options.view_as(StandardOptions).streaming = True
        pcoll = (p
                 | ReadStringsFromPubSub('projects/fakeprj/topics/a_topic',
                                         None, 'a_label')
                 | beam.Map(lambda x: x))
        # Ensure that the output type is str.
        self.assertEqual(unicode, pcoll.element_type)

        # Apply the necessary PTransformOverrides.
        overrides = _get_transform_overrides(p.options)
        p.replace_all(overrides)

        # Note that the direct output of ReadStringsFromPubSub will be replaced
        # by a PTransformOverride, so we use a no-op Map.
        read_transform = pcoll.producer.inputs[0].producer.transform

        # Ensure that the properties passed through correctly
        source = read_transform._source
        self.assertEqual('a_topic', source.topic_name)
        self.assertEqual('a_label', source.id_label)
Ejemplo n.º 9
0
    def test_expand(self):
        p = TestPipeline()
        p.options.view_as(StandardOptions).streaming = True
        pcoll = (p
                 | ReadFromPubSub('projects/fakeprj/topics/baz')
                 | WriteToPubSub('projects/fakeprj/topics/a_topic',
                                 with_attributes=True)
                 | beam.Map(lambda x: x))

        # Apply the necessary PTransformOverrides.
        overrides = _get_transform_overrides(p.options)
        p.replace_all(overrides)

        # Note that the direct output of ReadFromPubSub will be replaced
        # by a PTransformOverride, so we use a no-op Map.
        write_transform = pcoll.producer.inputs[0].producer.transform

        # Ensure that the properties passed through correctly
        self.assertEqual('a_topic', write_transform.dofn.short_topic_name)
        self.assertEqual(True, write_transform.dofn.with_attributes)
        # TODO(BEAM-4275): These properties aren't supported yet in direct runner.
        self.assertEqual(None, write_transform.dofn.id_label)
        self.assertEqual(None, write_transform.dofn.timestamp_attribute)
Ejemplo n.º 10
0
  def test_expand(self):
    p = TestPipeline()
    p.options.view_as(StandardOptions).streaming = True
    pcoll = (p
             | ReadFromPubSub('projects/fakeprj/topics/baz')
             | WriteToPubSub('projects/fakeprj/topics/a_topic',
                             with_attributes=True)
             | beam.Map(lambda x: x))

    # Apply the necessary PTransformOverrides.
    overrides = _get_transform_overrides(p.options)
    p.replace_all(overrides)

    # Note that the direct output of ReadFromPubSub will be replaced
    # by a PTransformOverride, so we use a no-op Map.
    write_transform = pcoll.producer.inputs[0].producer.transform

    # Ensure that the properties passed through correctly
    self.assertEqual('a_topic', write_transform.dofn.short_topic_name)
    self.assertEqual(True, write_transform.dofn.with_attributes)
    # TODO(BEAM-4275): These properties aren't supported yet in direct runner.
    self.assertEqual(None, write_transform.dofn.id_label)
    self.assertEqual(None, write_transform.dofn.timestamp_attribute)
Ejemplo n.º 11
0
    def test_expand_with_multiple_sources_and_attributes(self):
        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        p = TestPipeline(options=options)
        topics = [
            'projects/fakeprj/topics/a_topic',
            'projects/fakeprj2/topics/b_topic'
        ]
        subscriptions = ['projects/fakeprj/subscriptions/a_subscription']

        pubsub_sources = [
            PubSubSourceDescriptor(descriptor)
            for descriptor in topics + subscriptions
        ]
        pcoll = (p
                 | MultipleReadFromPubSub(pubsub_sources, with_attributes=True)
                 | beam.Map(lambda x: x))

        # Apply the necessary PTransformOverrides.
        overrides = _get_transform_overrides(options)
        p.replace_all(overrides)

        self.assertEqual(PubsubMessage, pcoll.element_type)

        # Ensure that the sources are passed through correctly
        read_transforms = pcoll.producer.inputs[0].producer.inputs
        topics_list = []
        subscription_list = []
        for read_transform in read_transforms:
            source = read_transform.producer.transform._source
            if source.full_topic:
                topics_list.append(source.full_topic)
            else:
                subscription_list.append(source.full_subscription)
        self.assertEqual(topics_list, topics)
        self.assertEqual(subscription_list, subscriptions)
Ejemplo n.º 12
0
    def run_pipeline(self, pipeline, options):
        """Execute the entire pipeline and returns an DirectPipelineResult."""

        # Klio maintainer note: This code is the eact same logic in
        # direct_runner.BundleBasedDirectRunner.run_pipeline with the
        # following changes:
        # 1. Import statements that were originally inside this method
        #    was moved to the top of this module.
        # 2. Import statements adjusted to import module and not objects
        #    according to the google style guide.
        # 3. The functionalty we needed to override, which is invoking
        #    our own TransformEvaluatorRegistry when instantiating the
        #    Executor class (called out below).

        # If the TestStream I/O is used, use a mock test clock.
        class TestStreamUsageVisitor(beam_pipeline.PipelineVisitor):
            """Visitor determining whether a Pipeline uses a TestStream."""
            def __init__(self):
                self.uses_test_stream = False

            def visit_transform(self, applied_ptransform):
                if isinstance(applied_ptransform.transform,
                              test_stream.TestStream):
                    self.uses_test_stream = True

        visitor = TestStreamUsageVisitor()
        pipeline.visit(visitor)
        clock = (beam_clock.TestClock()
                 if visitor.uses_test_stream else beam_clock.RealClock())

        # Performing configured PTransform overrides.
        pipeline.replace_all(direct_runner._get_transform_overrides(options))

        _LOGGER.info("Running pipeline with Klio's GkeDirectRunner.")
        self.consumer_tracking_visitor = ctpv.ConsumerTrackingPipelineVisitor()
        pipeline.visit(self.consumer_tracking_visitor)

        bndl_factory = bundle_factory.BundleFactory(stacked=options.view_as(
            pipeline_options.DirectOptions).direct_runner_use_stacked_bundle)
        evaluation_context = eval_ctx.EvaluationContext(
            options,
            bndl_factory,
            self.consumer_tracking_visitor.root_transforms,
            self.consumer_tracking_visitor.value_to_consumers,
            self.consumer_tracking_visitor.step_names,
            self.consumer_tracking_visitor.views,
            clock,
        )

        # Klio maintainer note: this is where the change in logic is:
        # using our own `KlioTransformEvaluatorRegistry`.
        executor = beam_exec.Executor(
            self.consumer_tracking_visitor.value_to_consumers,
            evaluators.KlioTransformEvaluatorRegistry(evaluation_context),
            evaluation_context,
        )
        # DirectRunner does not support injecting
        # PipelineOptions values at runtime
        value_provider.RuntimeValueProvider.set_runtime_options({})
        # Start the executor. This is a non-blocking call, it will start the
        # execution in background threads and return.
        executor.start(self.consumer_tracking_visitor.root_transforms)
        result = direct_runner.DirectPipelineResult(executor,
                                                    evaluation_context)

        return result