예제 #1
0
    def test_read_from_pubsub_flaky(self):
        number_of_elements = 10
        mock_pubsub = mock.Mock()
        subscription_path = "project/fakeproj/subscriptions/fakesub"
        data = b'data'
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response(
            [test_utils.PullResponseMessage(data, ack_id=ack_id)])

        class FlakyPullResponse(object):
            def __init__(self, pull_response):
                self.pull_response = pull_response
                self._state = -1

            def __call__(self, *args, **kwargs):
                self._state += 1
                if self._state % 3 == 0:
                    raise gexc.RetryError("", "")
                if self._state % 3 == 1:
                    raise gexc.DeadlineExceeded("")
                if self._state % 3 == 2:
                    return self.pull_response

        mock_pubsub.pull.side_effect = FlakyPullResponse(pull_response)
        output = utils.read_from_pubsub(mock_pubsub,
                                        subscription_path,
                                        number_of_elements=number_of_elements)
        self.assertEqual([data] * number_of_elements, output)
        self._assert_ack_ids_equal(mock_pubsub, [ack_id] * number_of_elements)
예제 #2
0
    def test_read_messages_timestamp_attribute_missing(self, mock_pubsub):
        data = b'data'
        attributes = {}
        publish_time_secs = 1520861821
        publish_time_nanos = 234567000
        publish_time = '2018-03-12T13:37:01.234567Z'
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response([
            test_utils.PullResponseMessage(data, attributes, publish_time_secs,
                                           publish_time_nanos, ack_id)
        ])
        expected_elements = [
            TestWindowedValue(PubsubMessage(data, attributes),
                              timestamp.Timestamp.from_rfc3339(publish_time),
                              [window.GlobalWindow()]),
        ]
        mock_pubsub.return_value.pull.return_value = pull_response

        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        with TestPipeline(options=options) as p:
            pcoll = (p
                     | ReadFromPubSub('projects/fakeprj/topics/a_topic',
                                      None,
                                      None,
                                      with_attributes=True,
                                      timestamp_attribute='nonexistent'))
            assert_that(pcoll, equal_to(expected_elements), reify_windows=True)
        mock_pubsub.return_value.acknowledge.assert_has_calls(
            [mock.call(mock.ANY, [ack_id])])

        mock_pubsub.return_value.api.transport.channel.close.assert_has_calls(
            [mock.call()])
예제 #3
0
    def test_read_messages_timestamp_attribute_fail_parse(self, mock_pubsub):
        data = b'data'
        attributes = {'time': '1337 unparseable'}
        publish_time_secs = 1520861821
        publish_time_nanos = 234567000
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response([
            test_utils.PullResponseMessage(data, attributes, publish_time_secs,
                                           publish_time_nanos, ack_id)
        ])
        mock_pubsub.return_value.pull.return_value = pull_response

        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        p = TestPipeline(options=options)
        _ = (p
             | ReadFromPubSub('projects/fakeprj/topics/a_topic',
                              None,
                              None,
                              with_attributes=True,
                              timestamp_attribute='time'))
        with self.assertRaisesRegex(ValueError, r'parse'):
            p.run()
        mock_pubsub.return_value.acknowledge.assert_not_called()

        mock_pubsub.return_value.api.transport.channel.close.assert_has_calls(
            [mock.call()])
예제 #4
0
    def test_read_messages_timestamp_attribute_milli_success(
            self, mock_pubsub):
        data = b'data'
        attributes = {'time': '1337'}
        publish_time_secs = 1520861821
        publish_time_nanos = 234567000
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response([
            test_utils.PullResponseMessage(data, attributes, publish_time_secs,
                                           publish_time_nanos, ack_id)
        ])
        expected_elements = [
            TestWindowedValue(
                PubsubMessage(data, attributes),
                timestamp.Timestamp(micros=int(attributes['time']) * 1000),
                [window.GlobalWindow()]),
        ]
        mock_pubsub.return_value.pull.return_value = pull_response

        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        with TestPipeline(options=options) as p:
            pcoll = (p
                     | ReadFromPubSub('projects/fakeprj/topics/a_topic',
                                      None,
                                      None,
                                      with_attributes=True,
                                      timestamp_attribute='time'))
            assert_that(pcoll, equal_to(expected_elements), reify_windows=True)
        mock_pubsub.return_value.acknowledge.assert_has_calls(
            [mock.call(subscription=mock.ANY, ack_ids=[ack_id])])

        mock_pubsub.return_value.close.assert_has_calls([mock.call()])
예제 #5
0
    def test_read_messages_timestamp_attribute_rfc3339_success(
            self, mock_pubsub):
        data = 'data'
        attributes = {'time': '2018-03-12T13:37:01.234567Z'}
        publish_time_secs = 1337000000
        publish_time_nanos = 133700000
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response([
            test_utils.PullResponseMessage(data, attributes, publish_time_secs,
                                           publish_time_nanos, ack_id)
        ])
        expected_elements = [
            TestWindowedValue(
                PubsubMessage(data, attributes),
                timestamp.Timestamp.from_rfc3339(attributes['time']),
                [window.GlobalWindow()]),
        ]
        mock_pubsub.return_value.pull.return_value = pull_response

        p = TestPipeline()
        p.options.view_as(StandardOptions).streaming = True
        pcoll = (p
                 | ReadFromPubSub('projects/fakeprj/topics/a_topic',
                                  None,
                                  None,
                                  with_attributes=True,
                                  timestamp_attribute='time'))
        assert_that(pcoll, equal_to(expected_elements), reify_windows=True)
        p.run()
        mock_pubsub.return_value.acknowledge.assert_has_calls(
            [mock.call(mock.ANY, [ack_id])])
예제 #6
0
def test_read_messages_timestamp_attribute_fail_parse(patch_sub_client):
    exp_entity_id = "entity_id"
    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = bytes(exp_entity_id, "utf-8")
    data = kmsg.SerializeToString()

    attributes = {"time": "1337 unparseable"}
    publish_time_secs = 1520861821
    publish_time_nanos = 234567000
    ack_id = "ack_id"
    pull_response = beam_test_utils.create_pull_response([
        beam_test_utils.PullResponseMessage(data, attributes,
                                            publish_time_secs,
                                            publish_time_nanos, ack_id)
    ])
    patch_sub_client.pull.return_value = pull_response

    options = pipeline_options.PipelineOptions([])
    options.view_as(pipeline_options.StandardOptions).streaming = True
    p = beam_test_pipeline.TestPipeline(options=options)
    _ = p | b_pubsub.ReadFromPubSub(
        "projects/fakeprj/topics/a_topic",
        None,
        None,
        with_attributes=True,
        timestamp_attribute="time",
    )
    with pytest.raises(ValueError, match=r"parse"):
        p.run()

    patch_sub_client.acknowledge.assert_not_called()
    patch_sub_client.api.transport.channel.close.assert_called_with()
예제 #7
0
def test_read_messages_timestamp_attribute_rfc3339_success(
    mocker,
    patch_sub_client,
    patch_msg_manager,
):
    exp_entity_id = "entity_id"
    kmsg = klio_pb2.KlioMessage()
    kmsg.data.element = bytes(exp_entity_id, "utf-8")
    data = kmsg.SerializeToString()
    attributes = {"time": "2018-03-12T13:37:01.234567Z"}
    publish_time_secs = 1337000000
    publish_time_nanos = 133700000
    ack_id = "ack_id"
    pull_response = beam_test_utils.create_pull_response([
        beam_test_utils.PullResponseMessage(data, attributes,
                                            publish_time_secs,
                                            publish_time_nanos, ack_id)
    ])
    pmsg = b_pubsub.PubsubMessage(data, attributes)
    expected_elements = [
        beam_testing_util.TestWindowedValue(
            pmsg,
            beam_utils.timestamp.Timestamp.from_rfc3339(attributes["time"]),
            [beam_transforms.window.GlobalWindow()],
        ),
    ]
    patch_sub_client.pull.return_value = pull_response

    options = pipeline_options.PipelineOptions([])
    options.view_as(pipeline_options.StandardOptions).streaming = True
    with beam_test_pipeline.TestPipeline(options=options) as p:
        pcoll = p | b_pubsub.ReadFromPubSub(
            "projects/fakeprj/topics/a_topic",
            None,
            None,
            with_attributes=True,
            timestamp_attribute="time",
        )
        # Check original functionality that was kept the same
        beam_testing_util.assert_that(
            pcoll,
            beam_testing_util.equal_to(expected_elements),
            reify_windows=True,
        )

    # Check overridden functionality:
    # 1. Check that auto-acking is skipped
    patch_sub_client.acknowledge.assert_not_called()
    # 2. Check that MessageManager daemon threads were started
    patch_msg_manager.assert_called_once_with(
        patch_sub_client.subscription_path())
    # 3. Check that messages were added to the MessageManager
    patch_msg_manager.return_value.add.assert_called_once_with(ack_id, pmsg)
    # 4. Check that one message is handled at a time, instead of the
    #    original 10
    patch_sub_client.pull.assert_called_once_with(mocker.ANY,
                                                  max_messages=1,
                                                  return_immediately=True)

    patch_sub_client.api.transport.channel.close.assert_called_once_with()
예제 #8
0
    def test_read_messages_success(self, mock_pubsub):
        data = 'data'
        publish_time_secs = 1520861821
        publish_time_nanos = 234567000
        attributes = {'key': 'value'}
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response([
            test_utils.PullResponseMessage(data, attributes, publish_time_secs,
                                           publish_time_nanos, ack_id)
        ])
        expected_elements = [
            TestWindowedValue(PubsubMessage(data, attributes),
                              timestamp.Timestamp(1520861821.234567),
                              [window.GlobalWindow()])
        ]
        mock_pubsub.return_value.pull.return_value = pull_response

        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        p = TestPipeline(options=options)
        pcoll = (p
                 | ReadFromPubSub('projects/fakeprj/topics/a_topic',
                                  None,
                                  None,
                                  with_attributes=True))
        assert_that(pcoll, equal_to(expected_elements), reify_windows=True)
        p.run()
        mock_pubsub.return_value.acknowledge.assert_has_calls(
            [mock.call(mock.ANY, [ack_id])])
예제 #9
0
 def test_read_from_pubsub(self):
   mock_pubsub = mock.Mock()
   subscription_path = "project/fakeproj/subscriptions/fakesub"
   data = b'data'
   ack_id = 'ack_id'
   pull_response = test_utils.create_pull_response(
       [test_utils.PullResponseMessage(data, ack_id=ack_id)])
   mock_pubsub.pull.return_value = pull_response
   output = utils.read_from_pubsub(
       mock_pubsub, subscription_path, number_of_elements=1)
   self.assertEqual([data], output)
   mock_pubsub.acknowledge.assert_called_once_with(subscription_path, [ack_id])
예제 #10
0
    def test_read_data_success(self, mock_pubsub):
        data_encoded = u'🤷 ¯\\_(ツ)_/¯'.encode('utf-8')
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response(
            [test_utils.PullResponseMessage(data_encoded, ack_id=ack_id)])
        expected_elements = [data_encoded]
        mock_pubsub.return_value.pull.return_value = pull_response

        p = TestPipeline()
        p.options.view_as(StandardOptions).streaming = True
        pcoll = (p
                 | ReadFromPubSub('projects/fakeprj/topics/a_topic', None,
                                  None))
        assert_that(pcoll, equal_to(expected_elements))
        p.run()
        mock_pubsub.return_value.acknowledge.assert_has_calls(
            [mock.call(mock.ANY, [ack_id])])
예제 #11
0
 def test_read_from_pubsub_with_attributes(self):
     mock_pubsub = mock.Mock()
     subscription_path = "project/fakeproj/subscriptions/fakesub"
     data = b'data'
     ack_id = 'ack_id'
     attributes = {'key': 'value'}
     message = PubsubMessage(data, attributes)
     pull_response = test_utils.create_pull_response(
         [test_utils.PullResponseMessage(data, attributes, ack_id=ack_id)])
     mock_pubsub.pull.return_value = pull_response
     output = utils.read_from_pubsub(mock_pubsub,
                                     subscription_path,
                                     with_attributes=True,
                                     number_of_elements=1)
     self.assertEqual([message], output)
     mock_pubsub.acknowledge.assert_called_once_with(
         subscription=subscription_path, ack_ids=[ack_id])
예제 #12
0
    def test_read_from_pubsub_many(self):
        response_size = 33
        number_of_elements = 100
        mock_pubsub = mock.Mock()
        subscription_path = "project/fakeproj/subscriptions/fakesub"
        data_list = [
            'data {}'.format(i).encode("utf-8")
            for i in range(number_of_elements)
        ]
        attributes_list = [{
            'key': 'value {}'.format(i)
        } for i in range(number_of_elements)]
        ack_ids = ['ack_id_{}'.format(i) for i in range(number_of_elements)]
        messages = [
            PubsubMessage(data, attributes)
            for data, attributes in zip(data_list, attributes_list)
        ]
        response_messages = [
            test_utils.PullResponseMessage(data, attributes, ack_id=ack_id)
            for data, attributes, ack_id in zip(data_list, attributes_list,
                                                ack_ids)
        ]

        class SequentialPullResponse(object):
            def __init__(self, response_messages, response_size):
                self.response_messages = response_messages
                self.response_size = response_size
                self._index = 0

            def __call__(self, *args, **kwargs):
                start = self._index
                self._index += self.response_size
                response = test_utils.create_pull_response(
                    self.response_messages[start:start + self.response_size])
                return response

        mock_pubsub.pull.side_effect = SequentialPullResponse(
            response_messages, response_size)
        output = utils.read_from_pubsub(mock_pubsub,
                                        subscription_path,
                                        with_attributes=True,
                                        number_of_elements=number_of_elements)
        self.assertEqual(messages, output)
        self._assert_ack_ids_equal(mock_pubsub, ack_ids)
예제 #13
0
    def test_read_strings_success(self, mock_pubsub):
        data = u'🤷 ¯\\_(ツ)_/¯'
        data_encoded = data.encode('utf-8')
        ack_id = 'ack_id'
        pull_response = test_utils.create_pull_response(
            [test_utils.PullResponseMessage(data_encoded, ack_id=ack_id)])
        expected_elements = [data]
        mock_pubsub.return_value.pull.return_value = pull_response

        options = PipelineOptions([])
        options.view_as(StandardOptions).streaming = True
        with TestPipeline(options=options) as p:
            pcoll = (p
                     | ReadStringsFromPubSub('projects/fakeprj/topics/a_topic',
                                             None, None))
            assert_that(pcoll, equal_to(expected_elements))
        mock_pubsub.return_value.acknowledge.assert_has_calls(
            [mock.call(subscription=mock.ANY, ack_ids=[ack_id])])

        mock_pubsub.return_value.close.assert_has_calls([mock.call()])