def test_streaming_wordcount_it(self):
        # Build expected dataset.
        expected_msg = [('%d: 1' % num).encode('utf-8')
                        for num in range(DEFAULT_INPUT_NUMBERS)]

        # Set extra options to the pipeline for test purpose
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        pubsub_msg_verifier = PubSubMessageMatcher(self.project,
                                                   self.output_sub.name,
                                                   expected_msg,
                                                   timeout=400)
        extra_opts = {
            'input_subscription': self.input_sub.name,
            'output_topic': self.output_topic.name,
            'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        streaming_wordcount.run(
            self.test_pipeline.get_full_options_as_args(**extra_opts),
            save_main_session=False)
Ejemplo n.º 2
0
    def test_streaming_wordcount_it(self):
        # Build expected dataset.
        expected_msg = [('%d: 1' % num)
                        for num in range(DEFAULT_INPUT_NUMBERS)]

        # Set extra options to the pipeline for test purpose
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        pubsub_msg_verifier = PubSubMessageMatcher(self.project,
                                                   OUTPUT_SUB + self.uuid,
                                                   expected_msg,
                                                   timeout=400)
        extra_opts = {
            'input_subscription': self.input_sub.full_name,
            'output_topic': self.output_topic.full_name,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        test_utils.wait_for_subscriptions_created([self.input_sub])
        self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        streaming_wordcount.run(
            self.test_pipeline.get_full_options_as_args(**extra_opts))
Ejemplo n.º 3
0
 def init_matcher(self,
                  expected_msg=None,
                  with_attributes=False,
                  strip_attributes=None):
     self.pubsub_matcher = PubSubMessageMatcher(
         'mock_project',
         'mock_sub_name',
         expected_msg,
         with_attributes=with_attributes,
         strip_attributes=strip_attributes)
Ejemplo n.º 4
0
    def _test_streaming(self, with_attributes):
        """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
        # Set on_success_matcher to verify pipeline state and pubsub output. These
        # verifications run on a (remote) worker.

        # Expect the state to be RUNNING since a streaming pipeline is usually
        # never DONE. The test runner will cancel the pipeline after verification.
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name]
        if not with_attributes:
            expected_messages = [
                pubsub_msg.data.decode('utf-8')
                for pubsub_msg in expected_messages
            ]
        if self.runner_name == 'TestDirectRunner':
            strip_attributes = None
        else:
            strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project,
            self.output_sub.name,
            expected_messages,
            timeout=MESSAGE_MATCHER_TIMEOUT_S,
            with_attributes=with_attributes,
            strip_attributes=strip_attributes)
        extra_opts = {
            'input_subscription': self.input_sub.name,
            'output_topic': self.output_topic.name,
            'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        for msg in self.INPUT_MESSAGES[self.runner_name]:
            self.pub_client.publish(self.input_topic.name, msg.data,
                                    **msg.attributes)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pubsub_it_pipeline.run_pipeline(
            argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
            with_attributes=with_attributes,
            id_label=self.ID_LABEL,
            timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
Ejemplo n.º 5
0
 def _setup_pipeline(self):
   pubsub_msg_verifier = PubSubMessageMatcher(
       self.project_id,
       self.read_matcher_sub_name,
       expected_msg=[str(self.num_of_messages).encode('utf-8')],
       timeout=MATCHER_TIMEOUT,
       pull_timeout=MATCHER_PULL_TIMEOUT,
   )
   extra_opts = {
       'on_success_matcher': all_of(pubsub_msg_verifier),
       'streaming': True,
       'save_main_session': True
   }
   args = self.pipeline.get_full_options_as_args(**extra_opts)
   self.pipeline = TestPipeline(options=PipelineOptions(args))
Ejemplo n.º 6
0
  def run_pipeline(self):
    # Waits for messages to appear in output topic.
    expected_msg = [msg.encode('utf-8') for msg in MESSAGES_TO_PUBLISH]
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project, self.output_sub.name, expected_msg, timeout=600)

    # Checks that pipeline initializes to RUNNING state.
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)

    extra_opts = {
        'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
        'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier),
        'experiment': 'beam_fn_api',
        'input_subscription': self.input_sub.name,
        'output_topic': self.output_topic.name,
    }

    argv = self.test_pipeline.get_full_options_as_args(**extra_opts)
    return dataflow_exercise_streaming_metrics_pipeline.run(argv)
Ejemplo n.º 7
0
    def _test_streaming(self, with_attributes):
        """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
        # Build expected dataset.
        # Set extra options to the pipeline for test purpose
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        expected_messages = self.EXPECTED_OUTPUT_MESSAGES
        if not with_attributes:
            expected_messages = [
                pubsub_msg.data for pubsub_msg in expected_messages
            ]
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project,
            OUTPUT_SUB + self.uuid,
            expected_messages,
            timeout=MESSAGE_MATCHER_TIMEOUT_S,
            with_attributes=with_attributes,
            strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE])
        extra_opts = {
            'input_subscription': self.input_sub.full_name,
            'output_topic': self.output_topic.full_name,
            'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        test_utils.wait_for_subscriptions_created([self.input_sub])
        for msg in self.INPUT_MESSAGES:
            self.input_topic.publish(msg.data, **msg.attributes)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pubsub_it_pipeline.run_pipeline(
            argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
            with_attributes=with_attributes,
            id_label=self.ID_LABEL,
            timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)
Ejemplo n.º 8
0
    def setUp(self):
        super(GroupByKeyStreamingTest, self).setUp()
        self.topic_short_name = self.pipeline.get_option('pubsub_topic_name')
        self.setup_pubsub()

        timeout = self.pipeline.get_option('timeout') or DEFAULT_TIMEOUT
        expected_num_of_records = self.pipeline.get_option('num_of_records')
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project_id,
            self.output_sub.name,
            expected_msg_len=int(expected_num_of_records),
            timeout=int(timeout))

        self.extra_opts = {
            'input_subscription': self.input_sub.name,
            'output_topic': self.output_topic.name,
            'metrics_namespace': self.metrics_namespace,
            'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
            'on_success_matcher': all_of(pubsub_msg_verifier)
        }
  def test_streaming_wordcount_debugging_it(self):

    # Set extra options to the pipeline for test purpose
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project, self.output_sub.name, EXPECTED_MESSAGE, timeout=400)
    extra_opts = {
        'input_subscription': self.input_sub.name,
        'output_topic': self.output_topic.name,
        'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
        'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
    }

    # Generate input data and inject to PubSub.
    self._inject_data(self.input_topic, SAMPLE_MESSAGES)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    streaming_wordcount_debugging.run(
        self.test_pipeline.get_full_options_as_args(**extra_opts),
        save_main_session=False)
    def test_pubsub_pipe_it(self):
        # Build expected dataset.
        expected_msg = ['conall_0 - 1608051184'.encode('utf-8')]

        # Set extra options to the pipeline for test purpose
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        pubsub_msg_verifier = PubSubMessageMatcher(self.project,
                                                   self.output_sub.name,
                                                   expected_msg,
                                                   timeout=60 *
                                                   7)  # in seconds

        EXPECTED_BQ_CHECKSUM = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'  # SELECT SHA1(text) FROM `<project>.<dataset>.<table>`
        validation_query = f'SELECT text FROM `{self.project}.{self.dataset_ref.dataset_id}.{OUTPUT_TABLE}`'
        bq_sessions_verifier = BigqueryMatcher(self.project, validation_query,
                                               EXPECTED_BQ_CHECKSUM)
        # bq_sessions_verifier

        extra_opts = {
            'bigquery_dataset': self.dataset_ref.dataset_id,
            'bigquery_table': OUTPUT_TABLE,
            'input_subscription': self.input_sub.name,
            'output_topic': self.output_topic.name,
            'wait_until_finish_duration': WAIT_UNTIL_FINISH_DURATION,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pipeline.run(self.test_pipeline.get_full_options_as_args(**extra_opts))

        # Cleanup PubSub
        self.addCleanup(self._cleanup_pubsub)
        self.addCleanup(utils.delete_bq_dataset, self.project,
                        self.dataset_ref)
Ejemplo n.º 11
0
 def setUp(self):
     self.mock_presult = mock.MagicMock()
     self.pubsub_matcher = PubSubMessageMatcher('mock_project',
                                                'mock_sub_name',
                                                ['mock_expected_msg'])
Ejemplo n.º 12
0
 def init_counter_matcher(self, expected_msg_len=1):
     self.pubsub_matcher = PubSubMessageMatcher(
         'mock_project', 'mock_sub_name', expected_msg_len=expected_msg_len)
Ejemplo n.º 13
0
    def test_pubsub_pipe_it(self):
        # Build expected dataset.
        expected_msg = ['conall_0 - 1608051184'.encode('utf-8')]

        # Set extra options to the pipeline for test purpose
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        pubsub_msg_verifier = PubSubMessageMatcher(self.project,
                                                   self.output_sub.name,
                                                   expected_msg,
                                                   timeout=60 *
                                                   7)  # in seconds

        EXPECTED_BQ_CHECKSUM = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'  # SELECT SHA1(text) FROM `<project>.<dataset>.<table>`
        validation_query = f'SELECT text FROM `{self.project}.{self.dataset_ref.dataset_id}.{OUTPUT_TABLE}`'
        bq_sessions_verifier = BigqueryMatcher(self.project, validation_query,
                                               EXPECTED_BQ_CHECKSUM)

        # make sure you put the expected result in a tuple with a trailing comma
        expected_bq_msg = [('conall_0 - 1608051184', )]
        # Fetch Bigquery data with given query, compare to the expected data.
        # bigquery_verifier = BigqueryFullResultMatcher(
        #     project=self.project,
        #     query=validation_query,
        #     data=expected_bq_msg)

        # Fetch Bigquery data with given query, compare to the expected data.
        # This matcher polls BigQuery until the no. of records in BigQuery is
        # equal to the no. of records in expected data.
        # Specifying a timeout is optional
        bigquery_streaming_verifier = BigqueryFullResultStreamingMatcher(
            project=self.project,
            query=validation_query,
            data=expected_bq_msg,
            timeout=60 * 7)

        extra_opts = {
            'bigquery_dataset':
            self.dataset_ref.dataset_id,
            'bigquery_table':
            OUTPUT_TABLE,
            'input_subscription':
            self.input_sub.name,
            'output_topic':
            self.output_topic.name,
            'wait_until_finish_duration':
            WAIT_UNTIL_FINISH_DURATION,
            'on_success_matcher':
            all_of(bigquery_streaming_verifier, state_verifier,
                   pubsub_msg_verifier)  # bigquery_verifier
        }

        # Generate input data and inject to PubSub.
        self._inject_numbers(self.input_topic, DEFAULT_INPUT_NUMBERS)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pipeline.run(self.test_pipeline.get_full_options_as_args(**extra_opts))

        # Cleanup PubSub
        self.addCleanup(self._cleanup_pubsub)
        self.addCleanup(utils.delete_bq_dataset, self.project,
                        self.dataset_ref)