def test_streaming_wordcount_it(self): # Build expected dataset. expected_msg = [('%d: 1' % num).encode('utf-8') for num in range(DEFAULT_INPUT_NUMBERS)] # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) pubsub_msg_verifier = PubSubMessageMatcher(self.project, self.output_sub.name, expected_msg, timeout=400) extra_opts = { 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. streaming_wordcount.run( self.test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False)
def test_streaming_wordcount_it(self): # Build expected dataset. expected_msg = [('%d: 1' % num) for num in range(DEFAULT_INPUT_NUMBERS)] # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) pubsub_msg_verifier = PubSubMessageMatcher(self.project, OUTPUT_SUB + self.uuid, expected_msg, timeout=400) extra_opts = { 'input_subscription': self.input_sub.full_name, 'output_topic': self.output_topic.full_name, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. test_utils.wait_for_subscriptions_created([self.input_sub]) self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. streaming_wordcount.run( self.test_pipeline.get_full_options_as_args(**extra_opts))
def init_matcher(self, expected_msg=None, with_attributes=False, strip_attributes=None): self.pubsub_matcher = PubSubMessageMatcher( 'mock_project', 'mock_sub_name', expected_msg, with_attributes=with_attributes, strip_attributes=strip_attributes)
def _test_streaming(self, with_attributes): """Runs IT pipeline with message verifier. Args: with_attributes: False - Reads and writes message data only. True - Reads and writes message data and attributes. Also verifies id_label and timestamp_attribute features. """ # Set on_success_matcher to verify pipeline state and pubsub output. These # verifications run on a (remote) worker. # Expect the state to be RUNNING since a streaming pipeline is usually # never DONE. The test runner will cancel the pipeline after verification. state_verifier = PipelineStateMatcher(PipelineState.RUNNING) expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name] if not with_attributes: expected_messages = [ pubsub_msg.data.decode('utf-8') for pubsub_msg in expected_messages ] if self.runner_name == 'TestDirectRunner': strip_attributes = None else: strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE] pubsub_msg_verifier = PubSubMessageMatcher( self.project, self.output_sub.name, expected_messages, timeout=MESSAGE_MATCHER_TIMEOUT_S, with_attributes=with_attributes, strip_attributes=strip_attributes) extra_opts = { 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. for msg in self.INPUT_MESSAGES[self.runner_name]: self.pub_client.publish(self.input_topic.name, msg.data, **msg.attributes) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pubsub_it_pipeline.run_pipeline( argv=self.test_pipeline.get_full_options_as_args(**extra_opts), with_attributes=with_attributes, id_label=self.ID_LABEL, timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
def _setup_pipeline(self): pubsub_msg_verifier = PubSubMessageMatcher( self.project_id, self.read_matcher_sub_name, expected_msg=[str(self.num_of_messages).encode('utf-8')], timeout=MATCHER_TIMEOUT, pull_timeout=MATCHER_PULL_TIMEOUT, ) extra_opts = { 'on_success_matcher': all_of(pubsub_msg_verifier), 'streaming': True, 'save_main_session': True } args = self.pipeline.get_full_options_as_args(**extra_opts) self.pipeline = TestPipeline(options=PipelineOptions(args))
def run_pipeline(self): # Waits for messages to appear in output topic. expected_msg = [msg.encode('utf-8') for msg in MESSAGES_TO_PUBLISH] pubsub_msg_verifier = PubSubMessageMatcher( self.project, self.output_sub.name, expected_msg, timeout=600) # Checks that pipeline initializes to RUNNING state. state_verifier = PipelineStateMatcher(PipelineState.RUNNING) extra_opts = { 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier), 'experiment': 'beam_fn_api', 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, } argv = self.test_pipeline.get_full_options_as_args(**extra_opts) return dataflow_exercise_streaming_metrics_pipeline.run(argv)
def _test_streaming(self, with_attributes): """Runs IT pipeline with message verifier. Args: with_attributes: False - Reads and writes message data only. True - Reads and writes message data and attributes. Also verifies id_label and timestamp_attribute features. """ # Build expected dataset. # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) expected_messages = self.EXPECTED_OUTPUT_MESSAGES if not with_attributes: expected_messages = [ pubsub_msg.data for pubsub_msg in expected_messages ] pubsub_msg_verifier = PubSubMessageMatcher( self.project, OUTPUT_SUB + self.uuid, expected_messages, timeout=MESSAGE_MATCHER_TIMEOUT_S, with_attributes=with_attributes, strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]) extra_opts = { 'input_subscription': self.input_sub.full_name, 'output_topic': self.output_topic.full_name, 'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. test_utils.wait_for_subscriptions_created([self.input_sub]) for msg in self.INPUT_MESSAGES: self.input_topic.publish(msg.data, **msg.attributes) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pubsub_it_pipeline.run_pipeline( argv=self.test_pipeline.get_full_options_as_args(**extra_opts), with_attributes=with_attributes, id_label=self.ID_LABEL, timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
def setUp(self): super(GroupByKeyStreamingTest, self).setUp() self.topic_short_name = self.pipeline.get_option('pubsub_topic_name') self.setup_pubsub() timeout = self.pipeline.get_option('timeout') or DEFAULT_TIMEOUT expected_num_of_records = self.pipeline.get_option('num_of_records') pubsub_msg_verifier = PubSubMessageMatcher( self.project_id, self.output_sub.name, expected_msg_len=int(expected_num_of_records), timeout=int(timeout)) self.extra_opts = { 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'metrics_namespace': self.metrics_namespace, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(pubsub_msg_verifier) }
def test_streaming_wordcount_debugging_it(self): # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) pubsub_msg_verifier = PubSubMessageMatcher( self.project, self.output_sub.name, EXPECTED_MESSAGE, timeout=400) extra_opts = { 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. self._inject_data(self.input_topic, SAMPLE_MESSAGES) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. streaming_wordcount_debugging.run( self.test_pipeline.get_full_options_as_args(**extra_opts), save_main_session=False)
def test_pubsub_pipe_it(self): # Build expected dataset. expected_msg = ['conall_0 - 1608051184'.encode('utf-8')] # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) pubsub_msg_verifier = PubSubMessageMatcher(self.project, self.output_sub.name, expected_msg, timeout=60 * 7) # in seconds EXPECTED_BQ_CHECKSUM = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' # SELECT SHA1(text) FROM `<project>.<dataset>.<table>` validation_query = f'SELECT text FROM `{self.project}.{self.dataset_ref.dataset_id}.{OUTPUT_TABLE}`' bq_sessions_verifier = BigqueryMatcher(self.project, validation_query, EXPECTED_BQ_CHECKSUM) # bq_sessions_verifier extra_opts = { 'bigquery_dataset': self.dataset_ref.dataset_id, 'bigquery_table': OUTPUT_TABLE, 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier) } # Generate input data and inject to PubSub. self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pipeline.run(self.test_pipeline.get_full_options_as_args(**extra_opts)) # Cleanup PubSub self.addCleanup(self._cleanup_pubsub) self.addCleanup(utils.delete_bq_dataset, self.project, self.dataset_ref)
def setUp(self): self.mock_presult = mock.MagicMock() self.pubsub_matcher = PubSubMessageMatcher('mock_project', 'mock_sub_name', ['mock_expected_msg'])
def init_counter_matcher(self, expected_msg_len=1): self.pubsub_matcher = PubSubMessageMatcher( 'mock_project', 'mock_sub_name', expected_msg_len=expected_msg_len)
def test_pubsub_pipe_it(self): # Build expected dataset. expected_msg = ['conall_0 - 1608051184'.encode('utf-8')] # Set extra options to the pipeline for test purpose state_verifier = PipelineStateMatcher(PipelineState.RUNNING) pubsub_msg_verifier = PubSubMessageMatcher(self.project, self.output_sub.name, expected_msg, timeout=60 * 7) # in seconds EXPECTED_BQ_CHECKSUM = 'da39a3ee5e6b4b0d3255bfef95601890afd80709' # SELECT SHA1(text) FROM `<project>.<dataset>.<table>` validation_query = f'SELECT text FROM `{self.project}.{self.dataset_ref.dataset_id}.{OUTPUT_TABLE}`' bq_sessions_verifier = BigqueryMatcher(self.project, validation_query, EXPECTED_BQ_CHECKSUM) # make sure you put the expected result in a tuple with a trailing comma expected_bq_msg = [('conall_0 - 1608051184', )] # Fetch Bigquery data with given query, compare to the expected data. # bigquery_verifier = BigqueryFullResultMatcher( # project=self.project, # query=validation_query, # data=expected_bq_msg) # Fetch Bigquery data with given query, compare to the expected data. # This matcher polls BigQuery until the no. of records in BigQuery is # equal to the no. of records in expected data. # Specifying a timeout is optional bigquery_streaming_verifier = BigqueryFullResultStreamingMatcher( project=self.project, query=validation_query, data=expected_bq_msg, timeout=60 * 7) extra_opts = { 'bigquery_dataset': self.dataset_ref.dataset_id, 'bigquery_table': OUTPUT_TABLE, 'input_subscription': self.input_sub.name, 'output_topic': self.output_topic.name, 'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION, 'on_success_matcher': all_of(bigquery_streaming_verifier, state_verifier, pubsub_msg_verifier) # bigquery_verifier } # Generate input data and inject to PubSub. self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS) # Get pipeline options from command argument: --test-pipeline-options, # and start pipeline job by calling pipeline main function. pipeline.run(self.test_pipeline.get_full_options_as_args(**extra_opts)) # Cleanup PubSub self.addCleanup(self._cleanup_pubsub) self.addCleanup(utils.delete_bq_dataset, self.project, self.dataset_ref)