def test_custormized_counters_in_combine_fn(self): p = TestPipeline() input = ( p | beam.Create([('key1', 'a'), ('key1', 'ab'), ('key1', 'abc'), ('key2', 'uvxy'), ('key2', 'uvxyz')])) # The result of concatenating all values regardless of key. global_concat = ( input | beam.Values() | beam.CombineGlobally(SortedConcatWithCounters())) # The (key, concatenated_string) pairs for all keys. concat_per_key = (input | beam.CombinePerKey(SortedConcatWithCounters())) # Verify the concatenated strings are correct. expected_concat_per_key = [('key1', 'aaabbc'), ('key2', 'uuvvxxyyz')] assert_that( global_concat, equal_to(['aaabbcuuvvxxyyz']), label='global concat') assert_that( concat_per_key, equal_to(expected_concat_per_key), label='concat per key') result = p.run() result.wait_until_finish() # Verify the values of metrics are correct. word_counter_filter = MetricsFilter().with_name('word_counter') query_result = result.metrics().query(word_counter_filter) if query_result['counters']: word_counter = query_result['counters'][0] self.assertEqual(word_counter.result, 5) word_lengths_filter = MetricsFilter().with_name('word_lengths') query_result = result.metrics().query(word_lengths_filter) if query_result['counters']: word_lengths = query_result['counters'][0] self.assertEqual(word_lengths.result, 15) word_len_dist_filter = MetricsFilter().with_name('word_len_dist') query_result = result.metrics().query(word_len_dist_filter) if query_result['distributions']: word_len_dist = query_result['distributions'][0] self.assertEqual(word_len_dist.result.mean, 3) last_word_len_filter = MetricsFilter().with_name('last_word_len') query_result = result.metrics().query(last_word_len_filter) if query_result['gauges']: last_word_len = query_result['gauges'][0] self.assertIn(last_word_len.result.value, [1, 2, 3, 4, 5])
def __init__(self): # Be sure to set blocking to false for timeout_ms to work properly self.pipeline = TestPipeline(is_integration_test=True, blocking=False) assert not self.pipeline.blocking options = self.pipeline.get_pipeline_options().view_as(LoadTestOptions) self.timeout_ms = options.timeout_ms self.input_options = options.input_options self.metrics_namespace = options.metrics_table or 'default' publish_to_bq = options.publish_to_big_query if publish_to_bq is None: logging.info( 'Missing --publish_to_big_query option. Metrics will not ' 'be published to BigQuery.') if options.input_options is None: logging.error('--input_options argument is required.') sys.exit(1) gcloud_options = self.pipeline.get_pipeline_options().view_as( GoogleCloudOptions) self.project_id = gcloud_options.project self._metrics_monitor = MetricsReader( publish_to_bq=publish_to_bq, project_name=self.project_id, bq_table=options.metrics_table, bq_dataset=options.metrics_dataset, influxdb_options=InfluxDBMetricsPublisherOptions( options.influx_measurement, options.influx_db_name, options.influx_hostname, os.getenv('INFLUXDB_USER'), os.getenv('INFLUXDB_USER_PASSWORD')), # Apply filter to prevent system metrics from being published filters=MetricsFilter().with_namespace(self.metrics_namespace))
def test_AnnotateImage_b64_content(self): base_64_encoded_image = \ b'YmVnaW4gNjQ0IGNhdC12aWRlby5tcDRNICAgICgmOVQ+NyFNPCMwUi4uZmFrZV92aWRlb' images_to_annotate = [ base_64_encoded_image, base_64_encoded_image, base_64_encoded_image, ] expected_counter = len(images_to_annotate) with mock.patch.object(visionml, 'get_vision_client', return_value=self._mock_client): p = beam.Pipeline() _ = (p | "Create data" >> beam.Create(images_to_annotate) | "Annotate image" >> visionml.AnnotateImage( self.features, min_batch_size=self.min_batch_size, max_batch_size=self.max_batch_size)) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('API Calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.result == expected_counter)
def test_AnnotateVideo_with_side_input_context(self): videos_to_annotate = [ 'gs://cloud-samples-data/video/cat.mp4', 'gs://some-other-video/sample.mp4', 'gs://some-other-video/sample_2.mp4' ] video_contexts = [ ('gs://cloud-samples-data/video/cat.mp4', self.video_ctx), ('gs://some-other-video/sample.mp4', self.video_ctx), ] expected_counter = len(videos_to_annotate) with mock.patch.object(videointelligenceml, 'get_videointelligence_client', return_value=self._mock_client): p = beam.Pipeline() context_side_input = ( p | "Video contexts" >> beam.Create(video_contexts)) _ = ( p | "Create data" >> beam.Create(videos_to_annotate) | "Annotate video" >> videointelligenceml.AnnotateVideo( self.features, context_side_input=beam.pvalue.AsDict(context_side_input))) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('API Calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.committed == expected_counter)
def test_AnnotateVideo_b64_content(self): base_64_encoded_video = \ b'YmVnaW4gNjQ0IGNhdC12aWRlby5tcDRNICAgICgmOVQ+NyFNPCMwUi4uZmFrZV92aWRlb' videos_to_annotate = [ base_64_encoded_video, base_64_encoded_video, base_64_encoded_video, ] expected_counter = len(videos_to_annotate) with mock.patch.object(videointelligenceml, 'get_videointelligence_client', return_value=self._mock_client): p = beam.Pipeline() _ = (p | "Create data" >> beam.Create(videos_to_annotate) | "Annotate video" >> videointelligenceml.AnnotateVideo( self.features)) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('API Calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.committed == expected_counter)
def test_AnnotateImage_URI_with_side_input_context(self): images_to_annotate = [ 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg' ] image_contexts = [ ('gs://cloud-samples-data/vision/ocr/sign.jpg', self.img_ctx), ('gs://cloud-samples-data/vision/ocr/sign.jpg', self.img_ctx), ] expected_counter = len(images_to_annotate) with mock.patch.object(visionml, 'get_vision_client', return_value=self._mock_client): p = beam.Pipeline() context_side_input = ( p | "Image contexts" >> beam.Create(image_contexts)) _ = ( p | "Create data" >> beam.Create(images_to_annotate) | "Annotate image" >> visionml.AnnotateImage( self.features, min_batch_size=self.min_batch_size, max_batch_size=self.max_batch_size, context_side_input=beam.pvalue.AsDict(context_side_input))) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('API Calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.result == expected_counter)
def __init__(self): self.pipeline = TestPipeline(is_integration_test=True) load_test_options = self.pipeline.get_pipeline_options().view_as( LoadTestOptions) self.input_options = load_test_options.input_options self.metrics_namespace = load_test_options.metrics_table or 'default' publish_to_bq = load_test_options.publish_to_big_query if publish_to_bq is None: logging.info( 'Missing --publish_to_big_query option. Metrics will not ' 'be published to BigQuery.') if load_test_options.input_options is None: logging.error('--input_options argument is required.') sys.exit(1) gcloud_options = self.pipeline.get_pipeline_options().view_as( GoogleCloudOptions) self.project_id = gcloud_options.project self._metrics_monitor = MetricsReader( publish_to_bq=publish_to_bq, project_name=self.project_id, bq_table=load_test_options.metrics_table, bq_dataset=load_test_options.metrics_dataset, # Apply filter to prevent system metrics from being published filters=MetricsFilter().with_namespace(self.metrics_namespace))
def test_AnnotateImage_URIs_large_batch(self): images_to_annotate = [ 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', 'gs://cloud-samples-data/vision/ocr/sign.jpg', ] batch_size = 5 expected_counter = 3 # All 11 images should fit in 3 batches with mock.patch.object(visionml, 'get_vision_client', return_value=self._mock_client): p = beam.Pipeline() _ = (p | "Create data" >> beam.Create(images_to_annotate) | "Annotate image" >> visionml.AnnotateImage( self.features, max_batch_size=batch_size, min_batch_size=batch_size)) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('API Calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.result == expected_counter)
def assertCounterEqual(self, pipeline_result, counter_name, expected): metrics = pipeline_result.metrics().query( MetricsFilter().with_name(counter_name)) try: counter = metrics['counters'][0] self.assertEqual(expected, counter.result) except IndexError: raise AssertionError( 'Counter "{}" was not found'.format(counter_name))
def test_custormized_counters_in_combine_fn_empty(self): p = TestPipeline() input = p | beam.Create([]) # The result of concatenating all values regardless of key. global_concat = ( input | beam.Values() | beam.CombineGlobally(SortedConcatWithCounters())) # The (key, concatenated_string) pairs for all keys. concat_per_key = (input | beam.CombinePerKey(SortedConcatWithCounters())) # Verify the concatenated strings are correct. assert_that(global_concat, equal_to(['']), label='global concat') assert_that(concat_per_key, equal_to([]), label='concat per key') result = p.run() result.wait_until_finish() # Verify the values of metrics are correct. word_counter_filter = MetricsFilter().with_name('word_counter') query_result = result.metrics().query(word_counter_filter) if query_result['counters']: word_counter = query_result['counters'][0] self.assertEqual(word_counter.result, 0) word_lengths_filter = MetricsFilter().with_name('word_lengths') query_result = result.metrics().query(word_lengths_filter) if query_result['counters']: word_lengths = query_result['counters'][0] self.assertEqual(word_lengths.result, 0) word_len_dist_filter = MetricsFilter().with_name('word_len_dist') query_result = result.metrics().query(word_len_dist_filter) if query_result['distributions']: word_len_dist = query_result['distributions'][0] self.assertEqual(word_len_dist.result.count, 0) last_word_len_filter = MetricsFilter().with_name('last_word_len') query_result = result.metrics().query(last_word_len_filter) # No element has ever been recorded. self.assertFalse(query_result['gauges'])
def get_counter_metric(result, namespace, name): # type: (PipelineResult, str, str) -> int metrics = result.metrics().query( MetricsFilter().with_namespace(namespace).with_name(name)) counters = metrics['counters'] if len(counters) > 1: raise RuntimeError( '%d instead of one metric result matches name: %s in namespace %s' % (len(counters), name, namespace)) return counters[0].result if len(counters) > 0 else -1
def setUp(self, pipeline_options=None): self.pipeline = TestPipeline(pipeline_options) input = self.pipeline.get_option('input_options') or '{}' self.input_options = json.loads(input) self.project_id = self.pipeline.get_option('project') self.metrics_dataset = self.pipeline.get_option('metrics_dataset') self.metrics_namespace = self.pipeline.get_option('metrics_table') self.metrics_monitor = MetricsReader( publish_to_bq=self.pipeline.get_option('publish_to_big_query') == 'true', project_name=self.project_id, bq_table=self.metrics_namespace, bq_dataset=self.metrics_dataset, # Apply filter to prevent system metrics from being published filters=MetricsFilter().with_namespace(self.metrics_namespace))
def get_end_time_metric(result, namespace, name): # type: (PipelineResult, str, str) -> int """ get the end time out of all times recorded by the specified distribution metric Args: result: the PipelineResult which metrics are read from namespace: a string representing the namespace of wanted metric name: a string representing the name of the wanted metric Returns: the largest time in the metric or -1 if it doesn't exist """ distributions = result.metrics().query(MetricsFilter().with_namespace( namespace).with_name(name))['distributions'] max_list = list(map(lambda m: m.result.max, distributions)) return max(max_list) if len(max_list) > 0 else -1
def test_CreateCatalogItem(self): expected_counter = 1 with mock.patch.object(recommendations_ai, 'get_recommendation_catalog_client', return_value=self._mock_client): p = beam.Pipeline() _ = (p | "Create data" >> beam.Create([self._catalog_item]) | "Create CatalogItem" >> recommendations_ai.CreateCatalogItem(project="test")) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('api_calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.result == expected_counter)
def get_counter_metric(result, namespace, name): # type: (PipelineResult, str, str) -> int """ get specific counter metric from pipeline result Args: result: the PipelineResult which metrics are read from namespace: a string representing the namespace of wanted metric name: a string representing the name of the wanted metric Returns: the result of the wanted metric if it exist, else -1 """ metrics = result.metrics().query( MetricsFilter().with_namespace(namespace).with_name(name)) counters = metrics['counters'] if len(counters) > 1: raise RuntimeError( '%d instead of one metric result matches name: %s in namespace %s' % (len(counters), name, namespace)) return counters[0].result if len(counters) > 0 else -1
def test_Predict(self): expected_counter = 1 with mock.patch.object(recommendations_ai, 'get_recommendation_prediction_client', return_value=self._mock_client): p = beam.Pipeline() _ = ( p | "Create data" >> beam.Create([self._user_event]) | "Prediction UserEvents" >> recommendations_ai.PredictUserEvent( project="test", placement_id="recently_viewed_default")) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('api_calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.result == expected_counter)
def test_ImportUserEvents(self): expected_counter = 1 with mock.patch.object(recommendations_ai, 'get_recommendation_user_event_client', return_value=self._mock_client): p = beam.Pipeline() _ = (p | "Create data" >> beam.Create([ (self._user_event["user_info"]["visitor_id"], self._user_event), (self._user_event["user_info"]["visitor_id"], self._user_event) ]) | "Create UserEvents" >> recommendations_ai.ImportUserEvents(project="test")) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('api_calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.result == expected_counter)
def test_AnnotateVideo_URIs(self): videos_to_annotate = [ 'gs://cloud-samples-data/video/cat.mp4', 'gs://cloud-samples-data/video/cat.mp4' ] expected_counter = len(videos_to_annotate) with mock.patch.object(videointelligenceml, 'get_videointelligence_client', return_value=self._mock_client): p = beam.Pipeline() _ = (p | "Create data" >> beam.Create(videos_to_annotate) | "Annotate video" >> videointelligenceml.AnnotateVideo( self.features)) result = p.run() result.wait_until_finish() read_filter = MetricsFilter().with_name('API Calls') query_result = result.metrics().query(read_filter) if query_result['counters']: read_counter = query_result['counters'][0] self.assertTrue(read_counter.committed == expected_counter)
def apply_filter(self, allowed): """Prevents metrics from namespaces other than specified in the argument from being published.""" if allowed: self.metrics_monitor.filters = MetricsFilter().with_namespaces( allowed)
def get_end_time_metric(result, namespace, name): # type: (PipelineResult, str, str) -> int distributions = result.metrics().query( MetricsFilter().with_namespace(namespace).with_name( name))['distributions'] return max(map(lambda m: m.result.max, distributions))