def test_read_from_pubsub_flaky(self): number_of_elements = 10 mock_pubsub = mock.Mock() subscription_path = "project/fakeproj/subscriptions/fakesub" data = b'data' ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data, ack_id=ack_id)]) class FlakyPullResponse(object): def __init__(self, pull_response): self.pull_response = pull_response self._state = -1 def __call__(self, *args, **kwargs): self._state += 1 if self._state % 3 == 0: raise gexc.RetryError("", "") if self._state % 3 == 1: raise gexc.DeadlineExceeded("") if self._state % 3 == 2: return self.pull_response mock_pubsub.pull.side_effect = FlakyPullResponse(pull_response) output = utils.read_from_pubsub(mock_pubsub, subscription_path, number_of_elements=number_of_elements) self.assertEqual([data] * number_of_elements, output) self._assert_ack_ids_equal(mock_pubsub, [ack_id] * number_of_elements)
def test_read_messages_timestamp_attribute_missing(self, mock_pubsub): data = b'data' attributes = {} publish_time_secs = 1520861821 publish_time_nanos = 234567000 publish_time = '2018-03-12T13:37:01.234567Z' ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue(PubsubMessage(data, attributes), timestamp.Timestamp.from_rfc3339(publish_time), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True with TestPipeline(options=options) as p: pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='nonexistent')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])]) mock_pubsub.return_value.api.transport.channel.close.assert_has_calls( [mock.call()])
def test_read_messages_timestamp_attribute_fail_parse(self, mock_pubsub): data = b'data' attributes = {'time': '1337 unparseable'} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) _ = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) with self.assertRaisesRegex(ValueError, r'parse'): p.run() mock_pubsub.return_value.acknowledge.assert_not_called() mock_pubsub.return_value.api.transport.channel.close.assert_has_calls( [mock.call()])
def test_read_messages_timestamp_attribute_milli_success( self, mock_pubsub): data = b'data' attributes = {'time': '1337'} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue( PubsubMessage(data, attributes), timestamp.Timestamp(micros=int(attributes['time']) * 1000), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True with TestPipeline(options=options) as p: pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(subscription=mock.ANY, ack_ids=[ack_id])]) mock_pubsub.return_value.close.assert_has_calls([mock.call()])
def test_read_messages_timestamp_attribute_rfc3339_success( self, mock_pubsub): data = 'data' attributes = {'time': '2018-03-12T13:37:01.234567Z'} publish_time_secs = 1337000000 publish_time_nanos = 133700000 ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue( PubsubMessage(data, attributes), timestamp.Timestamp.from_rfc3339(attributes['time']), [window.GlobalWindow()]), ] mock_pubsub.return_value.pull.return_value = pull_response p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True, timestamp_attribute='time')) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])])
def test_read_messages_timestamp_attribute_fail_parse(patch_sub_client): exp_entity_id = "entity_id" kmsg = klio_pb2.KlioMessage() kmsg.data.element = bytes(exp_entity_id, "utf-8") data = kmsg.SerializeToString() attributes = {"time": "1337 unparseable"} publish_time_secs = 1520861821 publish_time_nanos = 234567000 ack_id = "ack_id" pull_response = beam_test_utils.create_pull_response([ beam_test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) patch_sub_client.pull.return_value = pull_response options = pipeline_options.PipelineOptions([]) options.view_as(pipeline_options.StandardOptions).streaming = True p = beam_test_pipeline.TestPipeline(options=options) _ = p | b_pubsub.ReadFromPubSub( "projects/fakeprj/topics/a_topic", None, None, with_attributes=True, timestamp_attribute="time", ) with pytest.raises(ValueError, match=r"parse"): p.run() patch_sub_client.acknowledge.assert_not_called() patch_sub_client.api.transport.channel.close.assert_called_with()
def test_read_messages_timestamp_attribute_rfc3339_success( mocker, patch_sub_client, patch_msg_manager, ): exp_entity_id = "entity_id" kmsg = klio_pb2.KlioMessage() kmsg.data.element = bytes(exp_entity_id, "utf-8") data = kmsg.SerializeToString() attributes = {"time": "2018-03-12T13:37:01.234567Z"} publish_time_secs = 1337000000 publish_time_nanos = 133700000 ack_id = "ack_id" pull_response = beam_test_utils.create_pull_response([ beam_test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) pmsg = b_pubsub.PubsubMessage(data, attributes) expected_elements = [ beam_testing_util.TestWindowedValue( pmsg, beam_utils.timestamp.Timestamp.from_rfc3339(attributes["time"]), [beam_transforms.window.GlobalWindow()], ), ] patch_sub_client.pull.return_value = pull_response options = pipeline_options.PipelineOptions([]) options.view_as(pipeline_options.StandardOptions).streaming = True with beam_test_pipeline.TestPipeline(options=options) as p: pcoll = p | b_pubsub.ReadFromPubSub( "projects/fakeprj/topics/a_topic", None, None, with_attributes=True, timestamp_attribute="time", ) # Check original functionality that was kept the same beam_testing_util.assert_that( pcoll, beam_testing_util.equal_to(expected_elements), reify_windows=True, ) # Check overridden functionality: # 1. Check that auto-acking is skipped patch_sub_client.acknowledge.assert_not_called() # 2. Check that MessageManager daemon threads were started patch_msg_manager.assert_called_once_with( patch_sub_client.subscription_path()) # 3. Check that messages were added to the MessageManager patch_msg_manager.return_value.add.assert_called_once_with(ack_id, pmsg) # 4. Check that one message is handled at a time, instead of the # original 10 patch_sub_client.pull.assert_called_once_with(mocker.ANY, max_messages=1, return_immediately=True) patch_sub_client.api.transport.channel.close.assert_called_once_with()
def test_read_messages_success(self, mock_pubsub): data = 'data' publish_time_secs = 1520861821 publish_time_nanos = 234567000 attributes = {'key': 'value'} ack_id = 'ack_id' pull_response = test_utils.create_pull_response([ test_utils.PullResponseMessage(data, attributes, publish_time_secs, publish_time_nanos, ack_id) ]) expected_elements = [ TestWindowedValue(PubsubMessage(data, attributes), timestamp.Timestamp(1520861821.234567), [window.GlobalWindow()]) ] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True p = TestPipeline(options=options) pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None, with_attributes=True)) assert_that(pcoll, equal_to(expected_elements), reify_windows=True) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])])
def test_read_from_pubsub(self): mock_pubsub = mock.Mock() subscription_path = "project/fakeproj/subscriptions/fakesub" data = b'data' ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data, ack_id=ack_id)]) mock_pubsub.pull.return_value = pull_response output = utils.read_from_pubsub( mock_pubsub, subscription_path, number_of_elements=1) self.assertEqual([data], output) mock_pubsub.acknowledge.assert_called_once_with(subscription_path, [ack_id])
def test_read_data_success(self, mock_pubsub): data_encoded = u'🤷 ¯\\_(ツ)_/¯'.encode('utf-8') ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data_encoded, ack_id=ack_id)]) expected_elements = [data_encoded] mock_pubsub.return_value.pull.return_value = pull_response p = TestPipeline() p.options.view_as(StandardOptions).streaming = True pcoll = (p | ReadFromPubSub('projects/fakeprj/topics/a_topic', None, None)) assert_that(pcoll, equal_to(expected_elements)) p.run() mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(mock.ANY, [ack_id])])
def test_read_from_pubsub_with_attributes(self): mock_pubsub = mock.Mock() subscription_path = "project/fakeproj/subscriptions/fakesub" data = b'data' ack_id = 'ack_id' attributes = {'key': 'value'} message = PubsubMessage(data, attributes) pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data, attributes, ack_id=ack_id)]) mock_pubsub.pull.return_value = pull_response output = utils.read_from_pubsub(mock_pubsub, subscription_path, with_attributes=True, number_of_elements=1) self.assertEqual([message], output) mock_pubsub.acknowledge.assert_called_once_with( subscription=subscription_path, ack_ids=[ack_id])
def test_read_from_pubsub_many(self): response_size = 33 number_of_elements = 100 mock_pubsub = mock.Mock() subscription_path = "project/fakeproj/subscriptions/fakesub" data_list = [ 'data {}'.format(i).encode("utf-8") for i in range(number_of_elements) ] attributes_list = [{ 'key': 'value {}'.format(i) } for i in range(number_of_elements)] ack_ids = ['ack_id_{}'.format(i) for i in range(number_of_elements)] messages = [ PubsubMessage(data, attributes) for data, attributes in zip(data_list, attributes_list) ] response_messages = [ test_utils.PullResponseMessage(data, attributes, ack_id=ack_id) for data, attributes, ack_id in zip(data_list, attributes_list, ack_ids) ] class SequentialPullResponse(object): def __init__(self, response_messages, response_size): self.response_messages = response_messages self.response_size = response_size self._index = 0 def __call__(self, *args, **kwargs): start = self._index self._index += self.response_size response = test_utils.create_pull_response( self.response_messages[start:start + self.response_size]) return response mock_pubsub.pull.side_effect = SequentialPullResponse( response_messages, response_size) output = utils.read_from_pubsub(mock_pubsub, subscription_path, with_attributes=True, number_of_elements=number_of_elements) self.assertEqual(messages, output) self._assert_ack_ids_equal(mock_pubsub, ack_ids)
def test_read_strings_success(self, mock_pubsub): data = u'🤷 ¯\\_(ツ)_/¯' data_encoded = data.encode('utf-8') ack_id = 'ack_id' pull_response = test_utils.create_pull_response( [test_utils.PullResponseMessage(data_encoded, ack_id=ack_id)]) expected_elements = [data] mock_pubsub.return_value.pull.return_value = pull_response options = PipelineOptions([]) options.view_as(StandardOptions).streaming = True with TestPipeline(options=options) as p: pcoll = (p | ReadStringsFromPubSub('projects/fakeprj/topics/a_topic', None, None)) assert_that(pcoll, equal_to(expected_elements)) mock_pubsub.return_value.acknowledge.assert_has_calls( [mock.call(subscription=mock.ANY, ack_ids=[ack_id])]) mock_pubsub.return_value.close.assert_has_calls([mock.call()])