def test_system_counters_set_labels_and_step_name(self): mock_client, mock_job_result = self.setup_mock_client_result( self.SYSTEM_COUNTERS_LIST) test_object = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result) all_metrics = test_object.all_metrics() matchers = [ MetricResultMatcher(name='ElementCount', labels={ 'original_name': 'ToIsmRecordForMultimap-out0-ElementCount', 'output_user_name': 'ToIsmRecordForMultimap-out0' }, attempted=42, committed=42), MetricResultMatcher(name='MeanByteCount', labels={ 'original_name': 'Read-out0-MeanByteCount', 'output_user_name': 'GroupByKey/Read-out0' }, attempted=31, committed=31), MetricResultMatcher(name='ExecutionTime_ProcessElement', step='write/Write/Write', attempted=1000, committed=1000) ] errors = metric_result_matchers.verify_all(all_metrics, matchers) self.assertFalse(errors, errors)
def test_query_structured_metrics(self): mock_client, mock_job_result = self.setup_mock_client_result( self.STRUCTURED_COUNTER_LIST) dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result) dm._translate_step_name = types.MethodType(lambda self, x: 'split', dm) query_result = dm.query() expected_counters = [ MetricResult( MetricKey( 'split', MetricName('__main__.WordExtractingDoFn', 'word_lengths'), ), 109475, 109475), ] self.assertEqual(query_result['counters'], expected_counters) expected_distributions = [ MetricResult( MetricKey( 'split', MetricName('__main__.WordExtractingDoFn', 'word_length_dist'), ), DistributionResult(DistributionData(18, 2, 2, 16)), DistributionResult(DistributionData(18, 2, 2, 16))), ] self.assertEqual(query_result['distributions'], expected_distributions)
def test_translate_portable_job_step_name(self): mock_client, mock_job_result = self.setup_mock_client_result( self.ONLY_COUNTERS_LIST) pipeline_options = PipelineOptions([ '--experiments=use_runner_v2', '--experiments=use_portable_job_submission', '--temp_location=gs://any-location/temp', '--project=dummy_project', ]) pipeline = Pipeline(options=pipeline_options) pipeline | Create([1, 2, 3]) | 'MyTestParDo' >> ParDo(DoFn()) # pylint:disable=expression-not-assigned test_environment = DockerEnvironment( container_image='test_default_image') proto_pipeline, _ = pipeline.to_runner_api( return_context=True, default_environment=test_environment) job = apiclient.Job(pipeline_options, proto_pipeline) dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result, job) self.assertEqual( 'MyTestParDo', dm._translate_step_name('ref_AppliedPTransform_MyTestParDo_14'))
def test_cache_functions(self): mock_client, mock_job_result = self.setup_mock_client_result() dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result) # At first creation, we should always query dataflow. self.assertTrue(dm._cached_metrics is None) # Right after querying, we still query again. dm.query() self.assertTrue(dm._cached_metrics is None) # The job has ended. The query should not run again after this. mock_job_result._is_in_terminal_state.return_value = True dm.query() self.assertTrue(dm._cached_metrics)
def test_query_counters(self): mock_client, mock_job_result = self.setup_mock_client_result() dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result) query_result = dm.query() expected_counters = [ MetricResult( MetricKey( 'split', MetricName('__main__.WordExtractingDoFn', 'empty_lines')), 1080, 1080), MetricResult( MetricKey('longstepname/split', MetricName('__main__.WordExtractingDoFn', 'words')), 26181, 26185), ] self.assertEqual( sorted(query_result['counters'], key=lambda x: x.key.metric.name), sorted(expected_counters, key=lambda x: x.key.metric.name))
def test_query_counters(self): mock_client, mock_job_result = self.setup_mock_client_result( self.ONLY_COUNTERS_LIST) dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result) dm._translate_step_name = types.MethodType(lambda self, x: 'split', dm) query_result = dm.query() expected_counters = [ MetricResult( MetricKey( 'split', MetricName('__main__.WordExtractingDoFn', 'empty_lines')), 1080, 1080), MetricResult( MetricKey('split', MetricName('__main__.WordExtractingDoFn', 'words')), 26181, 26185), ] self.assertEqual( sorted(query_result['counters'], key=lambda x: x.key.metric.name), sorted(expected_counters, key=lambda x: x.key.metric.name))