Esempio n. 1
0
    def test_source_to_cache_not_changed_for_same_source(self, cell):
        with cell:  # Cell 1
            pipeline = _build_an_empty_stream_pipeline()
            transform = beam.io.ReadFromPubSub(subscription=_FOO_PUBSUB_SUB)

        with cell:  # Cell 2
            read_foo_1 = pipeline | 'Read' >> transform
            ib.watch({'read_foo_1': read_foo_1})

        # Sets the signature for current pipeline state.
        ie.current_env().set_cached_source_signature(
            pipeline, bcj.extract_source_to_cache_signature(pipeline))

        with cell:  # Cell 3
            # Apply exactly the same transform and the same instance.
            read_foo_2 = pipeline | 'Read' >> transform
            ib.watch({'read_foo_2': read_foo_2})

        self.assertFalse(bcj.is_source_to_cache_changed(pipeline))

        with cell:  # Cell 4
            # Apply the same transform but represented in a different instance.
            # The signature representing the urn and payload is still the same, so it
            # is not treated as a new unbounded source.
            read_foo_3 = pipeline | 'Read' >> beam.io.ReadFromPubSub(
                subscription=_FOO_PUBSUB_SUB)
            ib.watch({'read_foo_3': read_foo_3})

        self.assertFalse(bcj.is_source_to_cache_changed(pipeline))
Esempio n. 2
0
    def test_source_to_cache_not_changed_when_source_is_removed(self, cell):
        with cell:  # Cell 1
            pipeline = _build_an_empty_stream_pipeline()
            foo_transform = beam.io.ReadFromPubSub(
                subscription=_FOO_PUBSUB_SUB)
            bar_transform = beam.io.ReadFromPubSub(
                subscription=_BAR_PUBSUB_SUB)

        with cell:  # Cell 2
            read_foo = pipeline | 'Read' >> foo_transform
            ib.watch({'read_foo': read_foo})

        signature_with_only_foo = bcj.extract_source_to_cache_signature(
            pipeline)

        with cell:  # Cell 3
            read_bar = pipeline | 'Read' >> bar_transform
            ib.watch({'read_bar': read_bar})

        self.assertTrue(bcj.is_source_to_cache_changed(pipeline))
        signature_with_foo_bar = ie.current_env().get_cached_source_signature(
            pipeline)
        self.assertNotEqual(signature_with_only_foo, signature_with_foo_bar)

        class BarPruneVisitor(PipelineVisitor):
            def enter_composite_transform(self, transform_node):
                pruned_parts = list(transform_node.parts)
                for part in transform_node.parts:
                    if part.transform is bar_transform:
                        pruned_parts.remove(part)
                transform_node.parts = tuple(pruned_parts)
                self.visit_transform(transform_node)

            def visit_transform(self, transform_node):
                if transform_node.transform is bar_transform:
                    transform_node.parent = None

        v = BarPruneVisitor()
        pipeline.visit(v)

        signature_after_pruning_bar = bcj.extract_source_to_cache_signature(
            pipeline)
        self.assertEqual(signature_with_only_foo, signature_after_pruning_bar)
        self.assertFalse(bcj.is_source_to_cache_changed(pipeline))
  def test_source_to_cache_changed_when_pipeline_is_first_time_seen(self, cell):
    with cell:  # Cell 1
      pipeline = _build_an_empty_stream_pipeline()

    with cell:  # Cell 2
      read_foo = pipeline | 'Read' >> beam.io.ReadFromPubSub(
          subscription=_FOO_PUBSUB_SUB)
      ib.watch({'read_foo': read_foo})

    self.assertTrue(bcj.is_source_to_cache_changed(pipeline))
Esempio n. 4
0
    def test_source_to_cache_changed_when_source_is_altered(self, cell):
        with cell:  # Cell 1
            pipeline = _build_an_empty_stream_pipeline()
            transform = beam.io.ReadFromPubSub(subscription=_FOO_PUBSUB_SUB)
            read_foo = pipeline | 'Read' >> transform
            ib.watch({'read_foo': read_foo})

        # Sets the signature for current pipeline state.
        ie.current_env().set_cached_source_signature(
            pipeline, bcj.extract_source_to_cache_signature(pipeline))

        with cell:  # Cell 2
            from apache_beam.io.gcp.pubsub import _PubSubSource
            # Alter the transform.
            transform._source = _PubSubSource(subscription=_BAR_PUBSUB_SUB)

        self.assertTrue(bcj.is_source_to_cache_changed(pipeline))
Esempio n. 5
0
    def test_source_to_cache_changed_when_new_source_is_added(self, cell):
        with cell:  # Cell 1
            pipeline = _build_an_empty_stream_pipeline()
            read_foo = pipeline | 'Read' >> beam.io.ReadFromPubSub(
                subscription=_FOO_PUBSUB_SUB)
            ib.watch({'read_foo': read_foo})

        # Sets the signature for current pipeline state.
        ie.current_env().set_cached_source_signature(
            pipeline, bcj.extract_source_to_cache_signature(pipeline))

        with cell:  # Cell 2
            read_bar = pipeline | 'Read' >> beam.io.ReadFromPubSub(
                subscription=_BAR_PUBSUB_SUB)
            ib.watch({'read_bar': read_bar})

        self.assertTrue(bcj.is_source_to_cache_changed(pipeline))