예제 #1
0
    def test_compute_bounds_stddevs_from_mean(self,
                                              comparison,
                                              std_devs,
                                              expected_bounds,
                                              inclusive=False):
        # mean = 3, stddev = ~1.414
        value_history = range(1, 6)

        class _FakeMetricStore:
            def get_metric_history(*args, **kwargs):
                return [
                    bigquery_client.MetricHistoryRow('', '',
                                                     datetime.datetime.now(),
                                                     '', v)
                    for v in value_history
                ]

        assertion = metrics_pb2.Assertion(
            std_devs_from_mean=metrics_pb2.Assertion.StdDevsFromMean(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                std_devs=std_devs,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None, _FakeMetricStore())
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            # EQUAL is always inclusive
            dataclasses.astuple(
                utils.Bounds(*expected_bounds, inclusive
                             or comparison == 'EQUAL')),
            places=3)
예제 #2
0
    def test_compute_bounds_percent_difference_with_mean_value(
            self, comparison, pct_diff, expected_bounds, inclusive=False):
        # mean = 3
        value_history = range(1, 6)

        class _FakeMetricStore:
            def get_metric_history(*args, **kwargs):
                return [
                    bigquery_client.MetricHistoryRow('', '',
                                                     datetime.datetime.now(),
                                                     '', v)
                    for v in value_history
                ]

        assertion = metrics_pb2.Assertion(
            percent_difference=metrics_pb2.Assertion.PercentDifference(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                use_historical_mean=True,
                percent=pct_diff,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None, _FakeMetricStore())
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            dataclasses.astuple(utils.Bounds(*expected_bounds, inclusive)),
            places=3)
 def test_assert_duration(self):
     metric_source = metrics_pb2.MetricSource(
         literals=metrics_pb2.LiteralSource(
             assertions={
                 "duration":
                 metrics_pb2.Assertion(
                     within_bounds=metrics_pb2.Assertion.WithinBounds(
                         lower_bound=100,
                         upper_bound=200,
                     ),
                     inclusive_bounds=False,
                 )
             }))
     event = metrics_pb2.TestCompletedEvent(
         benchmark_id="test_benchmark",
         duration=duration_pb2.Duration(seconds=150),
         metric_collection_config=metrics_pb2.MetricCollectionConfig(
             sources=[metric_source]))
     collector = literal_collector.LiteralCollector(
         event=event, raw_source=metric_source)
     points = collector.metric_points()
     self.assertLen(points, 1)
     self.assertEqual(points[0].metric_key, 'duration')
     self.assertEqual(points[0].metric_value, 150)
     self.assertEqual(points[0].bounds, utils.Bounds(100, 200, False))
예제 #4
0
def receive_test_event(data: dict, context: dict) -> bool:
    """Entrypoint for Cloud Function.

  Args:
    data: dict containing base64-encoded proto message.
    context: dict containing event metadata.

  Returns:
    True if message should be ack-ed, else False.
  """
    logging.set_verbosity(logging.INFO)

    dataset = DATASET
    project = PROJECT or google.auth.default()[1]

    try:
        message_bytes = base64.b64decode(data['data'])
        event = metrics_pb2.TestCompletedEvent()
        event.ParseFromString(message_bytes)
    except Exception as e:
        logging.fatal(
            'Failed to parse PubSub message. Will ack message to prevent '
            'more crashes.',
            exc_info=e)
        return True

    alert_handler = (alerts.AlertHandler(project,
                                         event.benchmark_id,
                                         event.debug_info,
                                         level='ERROR'))
    logging.get_absl_logger().addHandler(alert_handler)

    metric_store = bigquery_client.BigQueryMetricStore(
        project=project,
        dataset=dataset,
    )
    try:
        logging.info('Processing test event: %s', str(event))
        job_row, metric_rows = process_proto_message(event, metric_store,
                                                     context.event_id)
        metric_store.insert_status_and_metrics(job_row, metric_rows)
    except Exception as e:
        logging.fatal(
            'Encountered exception while attempting to process message.',
            exc_info=e)

    if alert_handler.has_errors:
        logging.info('Alerts: %s', str(alert_handler._records))
        if SEND_EMAIL_ALERTS:
            _send_email(project, *alert_handler.generate_email_content)
        else:
            logging.info('E-mail alerts disabled.')
    else:
        logging.info('No alerts found.')

    return True
  def test_create_test_completed_event(self, succeeded_count, failed_count, conditions, expected_status):
    job = _job_from_dict({
      'metadata': {
        'name': 'job-name',
        'namespace': 'namespace',
        'labels': {
          'benchmarkId': 'test-job',
        },
      },
      'status': {
        'startTime': _START_TIME,
        'succeeded': succeeded_count,
        'failed': failed_count,
        'conditions': [
          {
            'status': True,
            'reason': reason,
            'type': cond_type,
            'lastTransitionTime': _END_TIME,
          }
          for cond_type, reason in conditions
        ]
      }
    })

    actual_event = event_publisher.create_test_completed_event(
      job,
      model_output_bucket='gs://fake-bucket',
      cluster_name='cluster-name',
      cluster_location='cluster-location',
      project='project-id'
    )

    start_time = timestamp_pb2.Timestamp()
    start_time.FromDatetime(_START_TIME)
    duration = duration_pb2.Duration()
    duration.FromTimedelta(_END_TIME - _START_TIME)
    expected_event = metrics_pb2.TestCompletedEvent(
      benchmark_id='test-job',
      output_path='gs://fake-bucket/job-name',
      status=metrics_pb2.TestCompletedEvent.TestStatus.Value(expected_status),
      num_attempts=succeeded_count + failed_count,
      start_time=start_time,
      duration=duration,
      labels={'benchmarkId': 'test-job'},
      debug_info=metrics_pb2.DebugInfo(
        logs_link='https://console.cloud.google.com/logs?project=project-id&advancedFilter=resource.type%3Dk8s_container%0Aresource.labels.project_id%3Dproject-id%0Aresource.labels.cluster_name%3Dcluster-name%0Aresource.labels.namespace_name%3Dnamespace%0Aresource.labels.pod_name%3Ajob-name%0Aresource.labels.location%3Acluster-location%0A',
        details_link=f'https://console.cloud.google.com/kubernetes/job/cluster-location/cluster-name/namespace/job-name?project=project-id'
      ),
      metric_collection_config=metrics_pb2.MetricCollectionConfig(),
    )

    self.assertProtoEqual(expected_event, actual_event)
예제 #6
0
 def test_compute_bounds_within_bounds(self,
                                       lower,
                                       upper,
                                       expected_bounds,
                                       inclusive=False):
     assertion = metrics_pb2.Assertion(
         within_bounds=metrics_pb2.Assertion.WithinBounds(
             lower_bound=lower,
             upper_bound=upper,
         ),
         inclusive_bounds=inclusive,
     )
     collector = base.BaseCollector(
         metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
         None)
     bounds = collector.compute_bounds("metric_key", assertion)
     self.assertEqual(bounds, utils.Bounds(*expected_bounds, inclusive))
    def test_aggregate_metrics_include_all_strategies(self):
        metric_source = metrics_pb2.MetricSource(
            tensorboard=metrics_pb2.TensorBoardSource(include_tags=[
                metrics_pb2.TensorBoardSource.TagStrategy(
                    tag_pattern="*",
                    strategies=[
                        metrics_pb2.TensorBoardSource.FINAL,
                        metrics_pb2.TensorBoardSource.MAX,
                        metrics_pb2.TensorBoardSource.MIN,
                        metrics_pb2.TensorBoardSource.AVERAGE,
                        metrics_pb2.TensorBoardSource.MEDIAN,
                    ])
            ]))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=self.temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))
        collector = tensorboard_collector.TensorBoardCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())

        metric_to_value = {key: value for key, value, _ in points}

        self.assertDictEqual(
            metric_to_value, {
                'foo_final': 2,
                'foo_min': 1,
                'foo_max': 2,
                'foo_average': 1.5,
                'foo_median': 1.5,
                'eval/accuracy_final': .25,
                'eval/accuracy_min': .125,
                'eval/accuracy_max': .5,
                'eval/accuracy_average': np.mean([.125, .25, .5]),
                'eval/accuracy_median': np.median([.125, .25, .5]),
                'train/bar_final': 100,
                'train/bar_min': 10,
                'train/bar_max': 100,
                'train/bar_average': np.mean([10, 100, 100]),
                'train/bar_median': np.median([10, 100, 100]),
            })

        for _, _, bounds in points:
            self.assertEqual(bounds, utils.NO_BOUNDS)
    def test_aggregate_metrics_with_assertion(self):
        metric_source = metrics_pb2.MetricSource(
            tensorboard=metrics_pb2.TensorBoardSource(
                include_tags=[
                    metrics_pb2.TensorBoardSource.TagStrategy(
                        tag_pattern="eval/*",
                        strategies=[
                            metrics_pb2.TensorBoardSource.FINAL,
                            metrics_pb2.TensorBoardSource.MAX,
                            metrics_pb2.TensorBoardSource.MIN,
                        ])
                ],
                aggregate_assertions=[
                    metrics_pb2.TensorBoardSource.AggregateAssertion(
                        tag='eval/accuracy',
                        strategy=metrics_pb2.TensorBoardSource.MAX,
                        assertion=metrics_pb2.Assertion(
                            within_bounds=metrics_pb2.Assertion.WithinBounds(
                                lower_bound=.4,
                                upper_bound=1.0,
                            ),
                            inclusive_bounds=True,
                        ))
                ]))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=self.temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))
        collector = tensorboard_collector.TensorBoardCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())

        self.assertCountEqual(
            points,
            [
                utils.MetricPoint('eval/accuracy_max', .5,
                                  utils.Bounds(.4, 1.0, True)),
                utils.MetricPoint('eval/accuracy_min', .125, utils.NO_BOUNDS),
                utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS),
            ],
        )
예제 #9
0
 def test_compute_bounds_percent_difference_with_target_value(
         self,
         comparison,
         target,
         pct_diff,
         expected_bounds,
         inclusive=False):
     assertion = metrics_pb2.Assertion(
         percent_difference=metrics_pb2.Assertion.PercentDifference(
             comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
             value=target,
             percent=pct_diff,
         ),
         inclusive_bounds=inclusive,
     )
     collector = base.BaseCollector(
         metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
         None)
     bounds = collector.compute_bounds("metric_key", assertion)
     self.assertEqual(bounds, utils.Bounds(*expected_bounds, inclusive))
    def test_include_and_exclude(self):
        metric_source = metrics_pb2.MetricSource(
            tensorboard=metrics_pb2.TensorBoardSource(
                include_tags=[
                    metrics_pb2.TensorBoardSource.TagStrategy(
                        tag_pattern="*",
                        strategies=[
                            metrics_pb2.TensorBoardSource.FINAL,
                        ])
                ],
                exclude_tags=[
                    'foo',
                    'train/*',
                ],
                aggregate_assertions=[
                    metrics_pb2.TensorBoardSource.AggregateAssertion(
                        tag='foo',
                        strategy=metrics_pb2.TensorBoardSource.MIN,
                        assertion=metrics_pb2.Assertion(
                            within_bounds=metrics_pb2.Assertion.WithinBounds(
                                lower_bound=0.,
                                upper_bound=2.,
                            )))
                ]))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=self.temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))
        collector = tensorboard_collector.TensorBoardCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())

        self.assertCountEqual(
            points,
            [
                utils.MetricPoint('eval/accuracy_final', .25, utils.NO_BOUNDS),
                utils.MetricPoint('foo_min', 1, utils.Bounds(0., 2., False)),
            ],
        )
예제 #11
0
    def test_min_time(self, window, timestamp, expected_min):
        start_time = timestamp_pb2.Timestamp()
        start_time.FromDatetime(datetime.datetime(2021, 2, 16, 0, 0, 0))

        min_timestamp = timestamp_pb2.Timestamp()
        if timestamp:
            min_timestamp.FromDatetime(timestamp)

        time_window = duration_pb2.Duration()
        if window:
            time_window.FromTimedelta(window)

        assertion = metrics_pb2.Assertion(
            std_devs_from_mean=metrics_pb2.Assertion.StdDevsFromMean(
                comparison=metrics_pb2.Assertion.Comparison.WITHIN,
                std_devs=1,
            ),
            time_window=time_window,
            min_timestamp=min_timestamp,
        )

        metric_store = bigquery_client.BigQueryMetricStore(
            'fake_dataset', 'fake_project')
        metric_store.get_metric_history = mock.Mock(return_value=[])

        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(
                benchmark_id="test_benchmark",
                start_time=start_time,
            ), None, metric_store)

        collector.compute_bounds("metric_key", assertion)

        metric_store.get_metric_history.assert_called_with(
            benchmark_id="test_benchmark",
            metric_key="metric_key",
            min_time=expected_min)
예제 #12
0
    def test_compute_bounds_fixed_value(self,
                                        comparison,
                                        threshold_value,
                                        expected_bounds,
                                        inclusive=False):

        assertion = metrics_pb2.Assertion(
            fixed_value=metrics_pb2.Assertion.FixedValue(
                comparison=metrics_pb2.Assertion.Comparison.Value(comparison),
                value=threshold_value,
            ),
            inclusive_bounds=inclusive,
        )
        collector = base.BaseCollector(
            metrics_pb2.TestCompletedEvent(benchmark_id="test_benchmark"),
            None)
        bounds = collector.compute_bounds("metric_key", assertion)
        self.assertSequenceAlmostEqual(
            dataclasses.astuple(bounds),
            # EQUAL is always inclusive
            dataclasses.astuple(
                utils.Bounds(*expected_bounds, inclusive
                             or comparison == 'EQUAL')),
            places=3)
예제 #13
0
    def test_get_metrics_from_perfzero_summary(self):
        temp_dir = self.create_tempdir().full_path
        summary_dir = os.path.join(temp_dir, 'date_and_time')
        pathlib.Path(summary_dir).mkdir(parents=True, exist_ok=True)
        summary_path = os.path.join(summary_dir, 'perfzero_summary.json')
        with open(summary_path, 'w') as f:
            json.dump(
                {
                    "execution_id": "execution_id",
                    "execution_timestamp": 1234567890.1,
                    "benchmark_result": {
                        "wall_time":
                        1234,
                        "metrics": [{
                            "name": "exp_per_second",
                            "value": 1.1,
                        }, {
                            "name": "avg_exp_per_second",
                            "value": 2.2,
                        }, {
                            "name": "startup_time",
                            "value": 3.3
                        }],
                    },
                    "benchmark_info": {
                        "not": "important",
                    },
                    "setup_info": {},
                    "ml_framework_info": {
                        "not": "important",
                    },
                    "system_info": {
                        "not": "important"
                    },
                    "process_info": {
                        "max_rss": 4.4,
                        "max_vms": 5.5,
                        "max_cpu_percent": 6.6,
                    }
                }, f)

        metric_source = metrics_pb2.MetricSource(
            perfzero=metrics_pb2.PerfZeroSource(
                assertions={
                    'total_wall_time':
                    metrics_pb2.Assertion(
                        within_bounds=metrics_pb2.Assertion.WithinBounds(
                            lower_bound=1230,
                            upper_bound=1240,
                        )),
                    'exp_per_second':
                    metrics_pb2.Assertion(
                        within_bounds=metrics_pb2.Assertion.WithinBounds(
                            lower_bound=1,
                            upper_bound=100,
                        ), )
                }))
        event = metrics_pb2.TestCompletedEvent(
            benchmark_id="test_benchmark",
            output_path=temp_dir,
            metric_collection_config=metrics_pb2.MetricCollectionConfig(
                sources=[metric_source]))

        collector = perfzero_collector.PerfZeroCollector(
            event=event, raw_source=metric_source)
        points = list(collector.metric_points())
        self.assertCountEqual(
            points,
            {
                utils.MetricPoint("total_wall_time", 1234,
                                  utils.Bounds(1230., 1240., False)),
                utils.MetricPoint("exp_per_second", 1.1,
                                  utils.Bounds(1., 100., False)),
                utils.MetricPoint("avg_exp_per_second", 2.2, utils.NO_BOUNDS),
                utils.MetricPoint("startup_time", 3.3, utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_rss", 4.4,
                                  utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_vms", 5.5,
                                  utils.NO_BOUNDS),
                utils.MetricPoint("process_info/max_cpu_percent", 6.6,
                                  utils.NO_BOUNDS),
            },
        )
def create_test_completed_event(
        job: kubernetes.client.V1Job, model_output_bucket: str,
        cluster_name: str, cluster_location: str,
        project: str) -> metrics_pb2.TestCompletedEvent:
    """Returns a TestCompletedEvent to publish to PubSub.

  Args:
    job: A Kubernetes Job resource.
    model_output_bucket: Path to GCS bucket with model outputs.
    cluster_name: Name of the current Kubernetes cluster.
    cluster_location: Location (region or zone) of the current Kubernetes cluster.
    project: The project ID of the current project.
  
  Returns:
    A TestCompletedEvent with the information from job.
  """
    if len(job.status.conditions) == 1:
        condition = job.status.conditions[0]
    # job.status.conditions _usually_ has length 1, but it can have both passing and failing conditions.
    # Give precedence to failing conditions.
    elif len(job.status.conditions) == 0:
        logging.error('Job %s has no conditions.', job.metadata.name)
        return
    else:
        condition = next(
            (c for c in job.status.conditions if c.type == 'Failed'), None)

    if not condition:
        logging.error('This should never happen. Conditions: %s',
                      str(job.status.conditions))
        return
    elif condition.reason == 'DeadlineExceeded':
        job_status = metrics_pb2.TestCompletedEvent.TIMEOUT
    elif condition.reason == 'BackoffLimitExceeded':
        job_status = metrics_pb2.TestCompletedEvent.FAILED
    elif condition.type == 'Complete':
        job_status = metrics_pb2.TestCompletedEvent.COMPLETED
    else:
        logging.error('Unknown condition for Job %s: %s', job.metadata.name,
                      str(condition))
        return

    annotations = job.metadata.annotations or {}
    gcs_subdir = annotations.get('ml-testing-accelerators/gcs-subdir', '')
    output_path = os.path.join(model_output_bucket, gcs_subdir,
                               job.metadata.name)

    metric_config = metrics_pb2.MetricCollectionConfig()
    mcc_json = annotations.get('ml-testing-accelerators/metric-config', '{}')
    json_format.Parse(mcc_json, metric_config)

    stackdriver_query = textwrap.dedent(f"""\
    resource.type=k8s_container
    resource.labels.project_id={project}
    resource.labels.cluster_name={cluster_name}
    resource.labels.namespace_name={job.metadata.namespace}
    resource.labels.pod_name:{job.metadata.name}
    resource.labels.location:{cluster_location}
  """)
    stackdriver_link = "https://console.cloud.google.com/logs?{}".format(
        urllib.parse.urlencode({
            'project': project,
            'advancedFilter': stackdriver_query
        }))

    start_time = timestamp_pb2.Timestamp()
    start_time.FromDatetime(job.status.start_time)
    duration = duration_pb2.Duration()
    duration.FromTimedelta(condition.last_transition_time -
                           job.status.start_time)

    return metrics_pb2.TestCompletedEvent(
        benchmark_id=job.metadata.labels['benchmarkId'],
        output_path=output_path,
        status=job_status,
        num_attempts=(job.status.succeeded or 0) + (job.status.failed or 0),
        start_time=start_time,
        duration=duration,
        metric_collection_config=metric_config,
        labels=job.metadata.labels,
        debug_info=metrics_pb2.DebugInfo(
            logs_link=stackdriver_link,
            # TODO: fix hard-coded region and cluster name
            details_link=
            f'https://console.cloud.google.com/kubernetes/job/{cluster_location}/{cluster_name}/{job.metadata.namespace}/{job.metadata.name}?project={project}'
        ))