Beispiel #1
0
  def test_custormized_counters_in_combine_fn(self):
    p = TestPipeline()
    input = (
        p
        | beam.Create([('key1', 'a'), ('key1', 'ab'), ('key1', 'abc'),
                       ('key2', 'uvxy'), ('key2', 'uvxyz')]))

    # The result of concatenating all values regardless of key.
    global_concat = (
        input
        | beam.Values()
        | beam.CombineGlobally(SortedConcatWithCounters()))

    # The (key, concatenated_string) pairs for all keys.
    concat_per_key = (input | beam.CombinePerKey(SortedConcatWithCounters()))

    # Verify the concatenated strings are correct.
    expected_concat_per_key = [('key1', 'aaabbc'), ('key2', 'uuvvxxyyz')]
    assert_that(
        global_concat, equal_to(['aaabbcuuvvxxyyz']), label='global concat')
    assert_that(
        concat_per_key,
        equal_to(expected_concat_per_key),
        label='concat per key')

    result = p.run()
    result.wait_until_finish()

    # Verify the values of metrics are correct.
    word_counter_filter = MetricsFilter().with_name('word_counter')
    query_result = result.metrics().query(word_counter_filter)
    if query_result['counters']:
      word_counter = query_result['counters'][0]
      self.assertEqual(word_counter.result, 5)

    word_lengths_filter = MetricsFilter().with_name('word_lengths')
    query_result = result.metrics().query(word_lengths_filter)
    if query_result['counters']:
      word_lengths = query_result['counters'][0]
      self.assertEqual(word_lengths.result, 15)

    word_len_dist_filter = MetricsFilter().with_name('word_len_dist')
    query_result = result.metrics().query(word_len_dist_filter)
    if query_result['distributions']:
      word_len_dist = query_result['distributions'][0]
      self.assertEqual(word_len_dist.result.mean, 3)

    last_word_len_filter = MetricsFilter().with_name('last_word_len')
    query_result = result.metrics().query(last_word_len_filter)
    if query_result['gauges']:
      last_word_len = query_result['gauges'][0]
      self.assertIn(last_word_len.result.value, [1, 2, 3, 4, 5])
Beispiel #2
0
  def __init__(self):
    # Be sure to set blocking to false for timeout_ms to work properly
    self.pipeline = TestPipeline(is_integration_test=True, blocking=False)
    assert not self.pipeline.blocking

    options = self.pipeline.get_pipeline_options().view_as(LoadTestOptions)
    self.timeout_ms = options.timeout_ms
    self.input_options = options.input_options
    self.metrics_namespace = options.metrics_table or 'default'
    publish_to_bq = options.publish_to_big_query
    if publish_to_bq is None:
      logging.info(
          'Missing --publish_to_big_query option. Metrics will not '
          'be published to BigQuery.')
    if options.input_options is None:
      logging.error('--input_options argument is required.')
      sys.exit(1)

    gcloud_options = self.pipeline.get_pipeline_options().view_as(
        GoogleCloudOptions)
    self.project_id = gcloud_options.project

    self._metrics_monitor = MetricsReader(
        publish_to_bq=publish_to_bq,
        project_name=self.project_id,
        bq_table=options.metrics_table,
        bq_dataset=options.metrics_dataset,
        influxdb_options=InfluxDBMetricsPublisherOptions(
            options.influx_measurement,
            options.influx_db_name,
            options.influx_hostname,
            os.getenv('INFLUXDB_USER'),
            os.getenv('INFLUXDB_USER_PASSWORD')),
        # Apply filter to prevent system metrics from being published
        filters=MetricsFilter().with_namespace(self.metrics_namespace))
Beispiel #3
0
    def test_AnnotateImage_b64_content(self):
        base_64_encoded_image = \
          b'YmVnaW4gNjQ0IGNhdC12aWRlby5tcDRNICAgICgmOVQ+NyFNPCMwUi4uZmFrZV92aWRlb'
        images_to_annotate = [
            base_64_encoded_image,
            base_64_encoded_image,
            base_64_encoded_image,
        ]
        expected_counter = len(images_to_annotate)
        with mock.patch.object(visionml,
                               'get_vision_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()
            _ = (p
                 | "Create data" >> beam.Create(images_to_annotate)
                 | "Annotate image" >> visionml.AnnotateImage(
                     self.features,
                     min_batch_size=self.min_batch_size,
                     max_batch_size=self.max_batch_size))
            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('API Calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.result == expected_counter)
Beispiel #4
0
    def test_AnnotateVideo_with_side_input_context(self):
        videos_to_annotate = [
            'gs://cloud-samples-data/video/cat.mp4',
            'gs://some-other-video/sample.mp4',
            'gs://some-other-video/sample_2.mp4'
        ]
        video_contexts = [
            ('gs://cloud-samples-data/video/cat.mp4', self.video_ctx),
            ('gs://some-other-video/sample.mp4', self.video_ctx),
        ]

        expected_counter = len(videos_to_annotate)
        with mock.patch.object(videointelligenceml,
                               'get_videointelligence_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()
            context_side_input = (
                p | "Video contexts" >> beam.Create(video_contexts))

            _ = (
                p
                | "Create data" >> beam.Create(videos_to_annotate)
                | "Annotate video" >> videointelligenceml.AnnotateVideo(
                    self.features,
                    context_side_input=beam.pvalue.AsDict(context_side_input)))
            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('API Calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.committed == expected_counter)
Beispiel #5
0
    def test_AnnotateVideo_b64_content(self):
        base_64_encoded_video = \
          b'YmVnaW4gNjQ0IGNhdC12aWRlby5tcDRNICAgICgmOVQ+NyFNPCMwUi4uZmFrZV92aWRlb'
        videos_to_annotate = [
            base_64_encoded_video,
            base_64_encoded_video,
            base_64_encoded_video,
        ]
        expected_counter = len(videos_to_annotate)
        with mock.patch.object(videointelligenceml,
                               'get_videointelligence_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()
            _ = (p
                 | "Create data" >> beam.Create(videos_to_annotate)
                 | "Annotate video" >> videointelligenceml.AnnotateVideo(
                     self.features))
            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('API Calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.committed == expected_counter)
Beispiel #6
0
    def test_AnnotateImage_URI_with_side_input_context(self):
        images_to_annotate = [
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg'
        ]
        image_contexts = [
            ('gs://cloud-samples-data/vision/ocr/sign.jpg', self.img_ctx),
            ('gs://cloud-samples-data/vision/ocr/sign.jpg', self.img_ctx),
        ]

        expected_counter = len(images_to_annotate)
        with mock.patch.object(visionml,
                               'get_vision_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()
            context_side_input = (
                p | "Image contexts" >> beam.Create(image_contexts))

            _ = (
                p
                | "Create data" >> beam.Create(images_to_annotate)
                | "Annotate image" >> visionml.AnnotateImage(
                    self.features,
                    min_batch_size=self.min_batch_size,
                    max_batch_size=self.max_batch_size,
                    context_side_input=beam.pvalue.AsDict(context_side_input)))
            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('API Calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.result == expected_counter)
Beispiel #7
0
    def __init__(self):
        self.pipeline = TestPipeline(is_integration_test=True)

        load_test_options = self.pipeline.get_pipeline_options().view_as(
            LoadTestOptions)
        self.input_options = load_test_options.input_options
        self.metrics_namespace = load_test_options.metrics_table or 'default'
        publish_to_bq = load_test_options.publish_to_big_query
        if publish_to_bq is None:
            logging.info(
                'Missing --publish_to_big_query option. Metrics will not '
                'be published to BigQuery.')
        if load_test_options.input_options is None:
            logging.error('--input_options argument is required.')
            sys.exit(1)

        gcloud_options = self.pipeline.get_pipeline_options().view_as(
            GoogleCloudOptions)
        self.project_id = gcloud_options.project

        self._metrics_monitor = MetricsReader(
            publish_to_bq=publish_to_bq,
            project_name=self.project_id,
            bq_table=load_test_options.metrics_table,
            bq_dataset=load_test_options.metrics_dataset,
            # Apply filter to prevent system metrics from being published
            filters=MetricsFilter().with_namespace(self.metrics_namespace))
Beispiel #8
0
    def test_AnnotateImage_URIs_large_batch(self):
        images_to_annotate = [
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
            'gs://cloud-samples-data/vision/ocr/sign.jpg',
        ]

        batch_size = 5
        expected_counter = 3  # All 11 images should fit in 3 batches
        with mock.patch.object(visionml,
                               'get_vision_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()
            _ = (p
                 | "Create data" >> beam.Create(images_to_annotate)
                 | "Annotate image" >> visionml.AnnotateImage(
                     self.features,
                     max_batch_size=batch_size,
                     min_batch_size=batch_size))
            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('API Calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.result == expected_counter)
 def assertCounterEqual(self, pipeline_result, counter_name, expected):
     metrics = pipeline_result.metrics().query(
         MetricsFilter().with_name(counter_name))
     try:
         counter = metrics['counters'][0]
         self.assertEqual(expected, counter.result)
     except IndexError:
         raise AssertionError(
             'Counter "{}" was not found'.format(counter_name))
Beispiel #10
0
  def test_custormized_counters_in_combine_fn_empty(self):
    p = TestPipeline()
    input = p | beam.Create([])

    # The result of concatenating all values regardless of key.
    global_concat = (
        input
        | beam.Values()
        | beam.CombineGlobally(SortedConcatWithCounters()))

    # The (key, concatenated_string) pairs for all keys.
    concat_per_key = (input | beam.CombinePerKey(SortedConcatWithCounters()))

    # Verify the concatenated strings are correct.
    assert_that(global_concat, equal_to(['']), label='global concat')
    assert_that(concat_per_key, equal_to([]), label='concat per key')

    result = p.run()
    result.wait_until_finish()

    # Verify the values of metrics are correct.
    word_counter_filter = MetricsFilter().with_name('word_counter')
    query_result = result.metrics().query(word_counter_filter)
    if query_result['counters']:
      word_counter = query_result['counters'][0]
      self.assertEqual(word_counter.result, 0)

    word_lengths_filter = MetricsFilter().with_name('word_lengths')
    query_result = result.metrics().query(word_lengths_filter)
    if query_result['counters']:
      word_lengths = query_result['counters'][0]
      self.assertEqual(word_lengths.result, 0)

    word_len_dist_filter = MetricsFilter().with_name('word_len_dist')
    query_result = result.metrics().query(word_len_dist_filter)
    if query_result['distributions']:
      word_len_dist = query_result['distributions'][0]
      self.assertEqual(word_len_dist.result.count, 0)

    last_word_len_filter = MetricsFilter().with_name('last_word_len')
    query_result = result.metrics().query(last_word_len_filter)

    # No element has ever been recorded.
    self.assertFalse(query_result['gauges'])
def get_counter_metric(result, namespace, name):
  # type: (PipelineResult, str, str) -> int
  metrics = result.metrics().query(
      MetricsFilter().with_namespace(namespace).with_name(name))
  counters = metrics['counters']
  if len(counters) > 1:
    raise RuntimeError(
        '%d instead of one metric result matches name: %s in namespace %s' %
        (len(counters), name, namespace))
  return counters[0].result if len(counters) > 0 else -1
Beispiel #12
0
    def setUp(self, pipeline_options=None):
        self.pipeline = TestPipeline(pipeline_options)
        input = self.pipeline.get_option('input_options') or '{}'
        self.input_options = json.loads(input)
        self.project_id = self.pipeline.get_option('project')

        self.metrics_dataset = self.pipeline.get_option('metrics_dataset')
        self.metrics_namespace = self.pipeline.get_option('metrics_table')

        self.metrics_monitor = MetricsReader(
            publish_to_bq=self.pipeline.get_option('publish_to_big_query') ==
            'true',
            project_name=self.project_id,
            bq_table=self.metrics_namespace,
            bq_dataset=self.metrics_dataset,
            # Apply filter to prevent system metrics from being published
            filters=MetricsFilter().with_namespace(self.metrics_namespace))
Beispiel #13
0
def get_end_time_metric(result, namespace, name):
    # type: (PipelineResult, str, str) -> int
    """
  get the end time out of all times recorded by the specified distribution
  metric

  Args:
    result: the PipelineResult which metrics are read from
    namespace: a string representing the namespace of wanted metric
    name: a string representing the  name of the wanted metric

  Returns:
    the largest time in the metric or -1 if it doesn't exist
  """
    distributions = result.metrics().query(MetricsFilter().with_namespace(
        namespace).with_name(name))['distributions']
    max_list = list(map(lambda m: m.result.max, distributions))
    return max(max_list) if len(max_list) > 0 else -1
    def test_CreateCatalogItem(self):
        expected_counter = 1
        with mock.patch.object(recommendations_ai,
                               'get_recommendation_catalog_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()

            _ = (p | "Create data" >> beam.Create([self._catalog_item])
                 | "Create CatalogItem" >>
                 recommendations_ai.CreateCatalogItem(project="test"))

            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('api_calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.result == expected_counter)
Beispiel #15
0
def get_counter_metric(result, namespace, name):
    # type: (PipelineResult, str, str) -> int
    """
  get specific counter metric from pipeline result

  Args:
    result: the PipelineResult which metrics are read from
    namespace: a string representing the namespace of wanted metric
    name: a string representing the  name of the wanted metric

  Returns:
    the result of the wanted metric if it exist, else -1
  """
    metrics = result.metrics().query(
        MetricsFilter().with_namespace(namespace).with_name(name))
    counters = metrics['counters']
    if len(counters) > 1:
        raise RuntimeError(
            '%d instead of one metric result matches name: %s in namespace %s'
            % (len(counters), name, namespace))
    return counters[0].result if len(counters) > 0 else -1
    def test_Predict(self):
        expected_counter = 1
        with mock.patch.object(recommendations_ai,
                               'get_recommendation_prediction_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()

            _ = (
                p | "Create data" >> beam.Create([self._user_event])
                |
                "Prediction UserEvents" >> recommendations_ai.PredictUserEvent(
                    project="test", placement_id="recently_viewed_default"))

            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('api_calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.result == expected_counter)
    def test_ImportUserEvents(self):
        expected_counter = 1
        with mock.patch.object(recommendations_ai,
                               'get_recommendation_user_event_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()

            _ = (p | "Create data" >> beam.Create([
                (self._user_event["user_info"]["visitor_id"],
                 self._user_event),
                (self._user_event["user_info"]["visitor_id"], self._user_event)
            ]) | "Create UserEvents" >>
                 recommendations_ai.ImportUserEvents(project="test"))

            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('api_calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.result == expected_counter)
Beispiel #18
0
    def test_AnnotateVideo_URIs(self):
        videos_to_annotate = [
            'gs://cloud-samples-data/video/cat.mp4',
            'gs://cloud-samples-data/video/cat.mp4'
        ]
        expected_counter = len(videos_to_annotate)
        with mock.patch.object(videointelligenceml,
                               'get_videointelligence_client',
                               return_value=self._mock_client):
            p = beam.Pipeline()
            _ = (p
                 | "Create data" >> beam.Create(videos_to_annotate)
                 | "Annotate video" >> videointelligenceml.AnnotateVideo(
                     self.features))
            result = p.run()
            result.wait_until_finish()

            read_filter = MetricsFilter().with_name('API Calls')
            query_result = result.metrics().query(read_filter)
            if query_result['counters']:
                read_counter = query_result['counters'][0]
                self.assertTrue(read_counter.committed == expected_counter)
Beispiel #19
0
 def apply_filter(self, allowed):
     """Prevents metrics from namespaces other than specified in the argument
 from being published."""
     if allowed:
         self.metrics_monitor.filters = MetricsFilter().with_namespaces(
             allowed)
def get_end_time_metric(result, namespace, name):
  # type: (PipelineResult, str, str) -> int
  distributions = result.metrics().query(
      MetricsFilter().with_namespace(namespace).with_name(
          name))['distributions']
  return max(map(lambda m: m.result.max, distributions))