Python run_pipeline Examples, apache_beam.io.gcp.pubsub_it_pipeline.run_pipeline Python Examples

Example #1

0

Show file

    def _test_streaming(self, with_attributes):
        """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
        # Set on_success_matcher to verify pipeline state and pubsub output. These
        # verifications run on a (remote) worker.

        # Expect the state to be RUNNING since a streaming pipeline is usually
        # never DONE. The test runner will cancel the pipeline after verification.
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name]
        if not with_attributes:
            expected_messages = [
                pubsub_msg.data.decode('utf-8')
                for pubsub_msg in expected_messages
            ]
        if self.runner_name == 'TestDirectRunner':
            strip_attributes = None
        else:
            strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project,
            self.output_sub.name,
            expected_messages,
            timeout=MESSAGE_MATCHER_TIMEOUT_S,
            with_attributes=with_attributes,
            strip_attributes=strip_attributes)
        extra_opts = {
            'input_subscription': self.input_sub.name,
            'output_topic': self.output_topic.name,
            'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        for msg in self.INPUT_MESSAGES[self.runner_name]:
            self.pub_client.publish(self.input_topic.name, msg.data,
                                    **msg.attributes)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pubsub_it_pipeline.run_pipeline(
            argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
            with_attributes=with_attributes,
            id_label=self.ID_LABEL,
            timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)

Example #2

0

Show file

File: pubsub_integration_test.py Project: iemejia/incubator-beam

  def _test_streaming(self, with_attributes):
    """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
    # Set on_success_matcher to verify pipeline state and pubsub output. These
    # verifications run on a (remote) worker.

    # Expect the state to be RUNNING since a streaming pipeline is usually
    # never DONE. The test runner will cancel the pipeline after verification.
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
    expected_messages = self.EXPECTED_OUTPUT_MESSAGES[self.runner_name]
    if not with_attributes:
      expected_messages = [pubsub_msg.data.decode('utf-8')
                           for pubsub_msg in expected_messages]
    if self.runner_name == 'TestDirectRunner':
      strip_attributes = None
    else:
      strip_attributes = [self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE]
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project,
        self.output_sub.name,
        expected_messages,
        timeout=MESSAGE_MATCHER_TIMEOUT_S,
        with_attributes=with_attributes,
        strip_attributes=strip_attributes)
    extra_opts = {'input_subscription': self.input_sub.name,
                  'output_topic': self.output_topic.name,
                  'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
                  'on_success_matcher': all_of(state_verifier,
                                               pubsub_msg_verifier)}

    # Generate input data and inject to PubSub.
    for msg in self.INPUT_MESSAGES[self.runner_name]:
      self.pub_client.publish(self.input_topic.name, msg.data, **msg.attributes)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    pubsub_it_pipeline.run_pipeline(
        argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
        with_attributes=with_attributes,
        id_label=self.ID_LABEL,
        timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)

Example #3

0

Show file

    def _test_streaming(self, with_attributes):
        """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
        # Build expected dataset.
        # Set extra options to the pipeline for test purpose
        state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
        expected_messages = self.EXPECTED_OUTPUT_MESSAGES
        if not with_attributes:
            expected_messages = [
                pubsub_msg.data for pubsub_msg in expected_messages
            ]
        pubsub_msg_verifier = PubSubMessageMatcher(
            self.project,
            OUTPUT_SUB + self.uuid,
            expected_messages,
            timeout=MESSAGE_MATCHER_TIMEOUT_S,
            with_attributes=with_attributes,
            strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE])
        extra_opts = {
            'input_subscription': self.input_sub.full_name,
            'output_topic': self.output_topic.full_name,
            'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
            'on_success_matcher': all_of(state_verifier, pubsub_msg_verifier)
        }

        # Generate input data and inject to PubSub.
        test_utils.wait_for_subscriptions_created([self.input_sub])
        for msg in self.INPUT_MESSAGES:
            self.input_topic.publish(msg.data, **msg.attributes)

        # Get pipeline options from command argument: --test-pipeline-options,
        # and start pipeline job by calling pipeline main function.
        pubsub_it_pipeline.run_pipeline(
            argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
            with_attributes=with_attributes,
            id_label=self.ID_LABEL,
            timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)

Example #4

0

Show file

File: pubsub_integration_test.py Project: ocadotechnology/incubator-beam

  def _test_streaming(self, with_attributes):
    """Runs IT pipeline with message verifier.

    Args:
      with_attributes: False - Reads and writes message data only.
        True - Reads and writes message data and attributes. Also verifies
        id_label and timestamp_attribute features.
    """
    # Build expected dataset.
    # Set extra options to the pipeline for test purpose
    state_verifier = PipelineStateMatcher(PipelineState.RUNNING)
    expected_messages = self.EXPECTED_OUTPUT_MESSAGES
    if not with_attributes:
      expected_messages = [pubsub_msg.data for pubsub_msg in expected_messages]
    pubsub_msg_verifier = PubSubMessageMatcher(
        self.project,
        OUTPUT_SUB + self.uuid,
        expected_messages,
        timeout=MESSAGE_MATCHER_TIMEOUT_S,
        with_attributes=with_attributes,
        strip_attributes=[self.ID_LABEL, self.TIMESTAMP_ATTRIBUTE])
    extra_opts = {'input_subscription': self.input_sub.full_name,
                  'output_topic': self.output_topic.full_name,
                  'wait_until_finish_duration': TEST_PIPELINE_DURATION_MS,
                  'on_success_matcher': all_of(state_verifier,
                                               pubsub_msg_verifier)}

    # Generate input data and inject to PubSub.
    test_utils.wait_for_subscriptions_created([self.input_sub])
    for msg in self.INPUT_MESSAGES:
      self.input_topic.publish(msg.data, **msg.attributes)

    # Get pipeline options from command argument: --test-pipeline-options,
    # and start pipeline job by calling pipeline main function.
    pubsub_it_pipeline.run_pipeline(
        argv=self.test_pipeline.get_full_options_as_args(**extra_opts),
        with_attributes=with_attributes,
        id_label=self.ID_LABEL,
        timestamp_attribute=self.TIMESTAMP_ATTRIBUTE)