Example #1
0
 def test_expand_with_wrong_source(self):
     with self.assertRaisesRegex(
             ValueError, r'PubSub source descriptor must be in the form '
             r'"projects/<project>/topics/<topic>"'
             ' or "projects/<project>/subscription/<subscription>".*'):
         MultipleReadFromPubSub(
             [PubSubSourceDescriptor('not_a_proper_source')])
Example #2
0
    def test_expand_with_multiple_sources(self):
        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        p = TestPipeline(options=options)
        topics = [
            'projects/fakeprj/topics/a_topic',
            'projects/fakeprj2/topics/b_topic'
        ]
        subscriptions = ['projects/fakeprj/subscriptions/a_subscription']

        pubsub_sources = [
            PubSubSourceDescriptor(descriptor)
            for descriptor in topics + subscriptions
        ]
        pcoll = (p | MultipleReadFromPubSub(pubsub_sources)
                 | beam.Map(lambda x: x))

        # Apply the necessary PTransformOverrides.
        overrides = _get_transform_overrides(options)
        p.replace_all(overrides)

        self.assertEqual(bytes, pcoll.element_type)

        # Ensure that the sources are passed through correctly
        read_transforms = pcoll.producer.inputs[0].producer.inputs
        topics_list = []
        subscription_list = []
        for read_transform in read_transforms:
            source = read_transform.producer.transform._source
            if source.full_topic:
                topics_list.append(source.full_topic)
            else:
                subscription_list.append(source.full_subscription)
        self.assertEqual(topics_list, topics)
        self.assertEqual(subscription_list, subscriptions)
Example #3
0
    def test_expand_with_multiple_sources_and_other_options(self):
        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        p = TestPipeline(options=options)
        sources = [
            'projects/fakeprj/topics/a_topic',
            'projects/fakeprj2/topics/b_topic',
            'projects/fakeprj/subscriptions/a_subscription'
        ]
        id_labels = ['a_label_topic', 'b_label_topic', 'a_label_subscription']
        timestamp_attributes = [
            'a_ta_topic', 'b_ta_topic', 'a_ta_subscription'
        ]

        pubsub_sources = [
            PubSubSourceDescriptor(source=source,
                                   id_label=id_label,
                                   timestamp_attribute=timestamp_attribute)
            for source, id_label, timestamp_attribute in zip(
                sources, id_labels, timestamp_attributes)
        ]

        pcoll = (p | MultipleReadFromPubSub(pubsub_sources)
                 | beam.Map(lambda x: x))

        # Apply the necessary PTransformOverrides.
        overrides = _get_transform_overrides(options)
        p.replace_all(overrides)

        self.assertEqual(bytes, pcoll.element_type)

        # Ensure that the sources are passed through correctly
        read_transforms = pcoll.producer.inputs[0].producer.inputs
        for i, read_transform in enumerate(read_transforms):
            id_label = id_labels[i]
            timestamp_attribute = timestamp_attributes[i]

            source = read_transform.producer.transform._source
            self.assertEqual(source.id_label, id_label)
            self.assertEqual(source.with_attributes, False)
            self.assertEqual(source.timestamp_attribute, timestamp_attribute)