Esempio n. 1
0
    def test_system_counters_set_labels_and_step_name(self):
        mock_client, mock_job_result = self.setup_mock_client_result(
            self.SYSTEM_COUNTERS_LIST)
        test_object = dataflow_metrics.DataflowMetrics(mock_client,
                                                       mock_job_result)
        all_metrics = test_object.all_metrics()

        matchers = [
            MetricResultMatcher(name='ElementCount',
                                labels={
                                    'original_name':
                                    'ToIsmRecordForMultimap-out0-ElementCount',
                                    'output_user_name':
                                    'ToIsmRecordForMultimap-out0'
                                },
                                attempted=42,
                                committed=42),
            MetricResultMatcher(name='MeanByteCount',
                                labels={
                                    'original_name': 'Read-out0-MeanByteCount',
                                    'output_user_name': 'GroupByKey/Read-out0'
                                },
                                attempted=31,
                                committed=31),
            MetricResultMatcher(name='ExecutionTime_ProcessElement',
                                step='write/Write/Write',
                                attempted=1000,
                                committed=1000)
        ]
        errors = metric_result_matchers.verify_all(all_metrics, matchers)
        self.assertFalse(errors, errors)
Esempio n. 2
0
    def test_query_structured_metrics(self):
        mock_client, mock_job_result = self.setup_mock_client_result(
            self.STRUCTURED_COUNTER_LIST)
        dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
        dm._translate_step_name = types.MethodType(lambda self, x: 'split', dm)
        query_result = dm.query()
        expected_counters = [
            MetricResult(
                MetricKey(
                    'split',
                    MetricName('__main__.WordExtractingDoFn', 'word_lengths'),
                ), 109475, 109475),
        ]
        self.assertEqual(query_result['counters'], expected_counters)

        expected_distributions = [
            MetricResult(
                MetricKey(
                    'split',
                    MetricName('__main__.WordExtractingDoFn',
                               'word_length_dist'),
                ), DistributionResult(DistributionData(18, 2, 2, 16)),
                DistributionResult(DistributionData(18, 2, 2, 16))),
        ]
        self.assertEqual(query_result['distributions'], expected_distributions)
Esempio n. 3
0
    def test_translate_portable_job_step_name(self):
        mock_client, mock_job_result = self.setup_mock_client_result(
            self.ONLY_COUNTERS_LIST)

        pipeline_options = PipelineOptions([
            '--experiments=use_runner_v2',
            '--experiments=use_portable_job_submission',
            '--temp_location=gs://any-location/temp',
            '--project=dummy_project',
        ])

        pipeline = Pipeline(options=pipeline_options)
        pipeline | Create([1, 2, 3]) | 'MyTestParDo' >> ParDo(DoFn())  # pylint:disable=expression-not-assigned

        test_environment = DockerEnvironment(
            container_image='test_default_image')
        proto_pipeline, _ = pipeline.to_runner_api(
            return_context=True, default_environment=test_environment)

        job = apiclient.Job(pipeline_options, proto_pipeline)
        dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result,
                                              job)
        self.assertEqual(
            'MyTestParDo',
            dm._translate_step_name('ref_AppliedPTransform_MyTestParDo_14'))
Esempio n. 4
0
    def test_cache_functions(self):
        mock_client, mock_job_result = self.setup_mock_client_result()
        dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)

        # At first creation, we should always query dataflow.
        self.assertTrue(dm._cached_metrics is None)

        # Right after querying, we still query again.
        dm.query()
        self.assertTrue(dm._cached_metrics is None)

        # The job has ended. The query should not run again after this.
        mock_job_result._is_in_terminal_state.return_value = True
        dm.query()
        self.assertTrue(dm._cached_metrics)
Esempio n. 5
0
 def test_query_counters(self):
     mock_client, mock_job_result = self.setup_mock_client_result()
     dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
     query_result = dm.query()
     expected_counters = [
         MetricResult(
             MetricKey(
                 'split',
                 MetricName('__main__.WordExtractingDoFn', 'empty_lines')),
             1080, 1080),
         MetricResult(
             MetricKey('longstepname/split',
                       MetricName('__main__.WordExtractingDoFn', 'words')),
             26181, 26185),
     ]
     self.assertEqual(
         sorted(query_result['counters'], key=lambda x: x.key.metric.name),
         sorted(expected_counters, key=lambda x: x.key.metric.name))
Esempio n. 6
0
 def test_query_counters(self):
     mock_client, mock_job_result = self.setup_mock_client_result(
         self.ONLY_COUNTERS_LIST)
     dm = dataflow_metrics.DataflowMetrics(mock_client, mock_job_result)
     dm._translate_step_name = types.MethodType(lambda self, x: 'split', dm)
     query_result = dm.query()
     expected_counters = [
         MetricResult(
             MetricKey(
                 'split',
                 MetricName('__main__.WordExtractingDoFn', 'empty_lines')),
             1080, 1080),
         MetricResult(
             MetricKey('split',
                       MetricName('__main__.WordExtractingDoFn', 'words')),
             26181, 26185),
     ]
     self.assertEqual(
         sorted(query_result['counters'], key=lambda x: x.key.metric.name),
         sorted(expected_counters, key=lambda x: x.key.metric.name))