예제 #1
0
 def execution_time_monitoring_infos(self, transform_id):
   total_time_spent_msecs = (
       self.scoped_start_state.sampled_msecs_int()
       + self.scoped_process_state.sampled_msecs_int()
       + self.scoped_finish_state.sampled_msecs_int())
   mis = [
       monitoring_infos.int64_counter(
           monitoring_infos.START_BUNDLE_MSECS_URN,
           self.scoped_start_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.PROCESS_BUNDLE_MSECS_URN,
           self.scoped_process_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.FINISH_BUNDLE_MSECS_URN,
           self.scoped_finish_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.TOTAL_MSECS_URN,
           total_time_spent_msecs,
           ptransform=transform_id
       ),
   ]
   return {monitoring_infos.to_key(mi) : mi for mi in mis}
예제 #2
0
파일: operations.py 프로젝트: eralmas7/beam
 def execution_time_monitoring_infos(self, transform_id):
   total_time_spent_msecs = (
       self.scoped_start_state.sampled_msecs_int()
       + self.scoped_process_state.sampled_msecs_int()
       + self.scoped_finish_state.sampled_msecs_int())
   mis = [
       monitoring_infos.int64_counter(
           monitoring_infos.START_BUNDLE_MSECS_URN,
           self.scoped_start_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.PROCESS_BUNDLE_MSECS_URN,
           self.scoped_process_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.FINISH_BUNDLE_MSECS_URN,
           self.scoped_finish_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.TOTAL_MSECS_URN,
           total_time_spent_msecs,
           ptransform=transform_id
       ),
   ]
   return {monitoring_infos.to_key(mi) : mi for mi in mis}
예제 #3
0
    def pcollection_count_monitoring_infos(self, transform_id):
        """Returns the element count MonitoringInfo collected by this operation."""
        if len(self.receivers) == 1:
            # If there is exactly one output, we can unambiguously
            # fix its name later, which we do.
            # TODO(robertwb): Plumb the actual name here.
            elem_count_mi = monitoring_infos.int64_counter(
                monitoring_infos.ELEMENT_COUNT_URN,
                self.receivers[0].opcounter.element_counter.value(),
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )

            (unused_mean, sum, count, min,
             max) = (self.receivers[0].opcounter.mean_byte_counter.value())
            metric = metrics_pb2.Metric(
                distribution_data=metrics_pb2.DistributionData(
                    int_distribution_data=metrics_pb2.IntDistributionData(
                        count=count, sum=sum, min=min, max=max)))
            sampled_byte_count = monitoring_infos.int64_distribution(
                monitoring_infos.SAMPLED_BYTE_SIZE_URN,
                metric,
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )
            return {
                monitoring_infos.to_key(elem_count_mi): elem_count_mi,
                monitoring_infos.to_key(sampled_byte_count): sampled_byte_count
            }
        return {}
예제 #4
0
  def verify_write_call_metric(
      self, project_id, dataset_id, table_id, status, count):
    """Check if an metric was recorded for the BQ IO write API call."""
    process_wide_monitoring_infos = list(
        MetricsEnvironment.process_wide_container().
        to_runner_api_monitoring_infos(None).values())
    resource = resource_identifiers.BigQueryTable(
        project_id, dataset_id, table_id)
    labels = {
        # TODO(ajamato): Add Ptransform label.
        monitoring_infos.SERVICE_LABEL: 'BigQuery',
        # Refer to any method which writes elements to BigQuery in batches
        # as "BigQueryBatchWrite". I.e. storage API's insertAll, or future
        # APIs introduced.
        monitoring_infos.METHOD_LABEL: 'BigQueryBatchWrite',
        monitoring_infos.RESOURCE_LABEL: resource,
        monitoring_infos.BIGQUERY_PROJECT_ID_LABEL: project_id,
        monitoring_infos.BIGQUERY_DATASET_LABEL: dataset_id,
        monitoring_infos.BIGQUERY_TABLE_LABEL: table_id,
        monitoring_infos.STATUS_LABEL: status,
    }
    expected_mi = monitoring_infos.int64_counter(
        monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels)
    expected_mi.ClearField("start_time")

    found = False
    for actual_mi in process_wide_monitoring_infos:
      actual_mi.ClearField("start_time")
      if expected_mi == actual_mi:
        found = True
        break
    self.assertTrue(
        found, "Did not find write call metric with status: %s" % status)
예제 #5
0
  def pcollection_count_monitoring_infos(self, tag_to_pcollection_id):
    # type: (Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]

    """Returns the element count MonitoringInfo collected by this operation."""
    infos = super(
        DoOperation,
        self).pcollection_count_monitoring_infos(tag_to_pcollection_id)

    if self.tagged_receivers:
      for tag, receiver in self.tagged_receivers.items():
        if str(tag) not in tag_to_pcollection_id:
          continue
        pcollection_id = tag_to_pcollection_id[str(tag)]
        mi = monitoring_infos.int64_counter(
            monitoring_infos.ELEMENT_COUNT_URN,
            receiver.opcounter.element_counter.value(),
            pcollection=pcollection_id)
        infos[monitoring_infos.to_key(mi)] = mi
        (unused_mean, sum, count, min, max) = (
            receiver.opcounter.mean_byte_counter.value())
        sampled_byte_count = monitoring_infos.int64_distribution(
            monitoring_infos.SAMPLED_BYTE_SIZE_URN,
            DistributionData(sum, count, min, max),
            pcollection=pcollection_id)
        infos[monitoring_infos.to_key(sampled_byte_count)] = sampled_byte_count
    return infos
예제 #6
0
  def pcollection_count_monitoring_infos(self, tag_to_pcollection_id):
    # type: (Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]

    """Returns the element count MonitoringInfo collected by this operation."""

    # Skip producing monitoring infos if there is more then one receiver
    # since there is no way to provide a mapping from tag to pcollection id
    # within Operation.
    if len(self.receivers) != 1 or len(tag_to_pcollection_id) != 1:
      return {}

    all_monitoring_infos = {}
    pcollection_id = next(iter(tag_to_pcollection_id.values()))
    receiver = self.receivers[0]
    elem_count_mi = monitoring_infos.int64_counter(
        monitoring_infos.ELEMENT_COUNT_URN,
        receiver.opcounter.element_counter.value(),
        pcollection=pcollection_id,
    )

    (unused_mean, sum, count, min, max) = (
        receiver.opcounter.mean_byte_counter.value())

    sampled_byte_count = monitoring_infos.int64_distribution(
        monitoring_infos.SAMPLED_BYTE_SIZE_URN,
        DistributionData(sum, count, min, max),
        pcollection=pcollection_id,
    )
    all_monitoring_infos[monitoring_infos.to_key(elem_count_mi)] = elem_count_mi
    all_monitoring_infos[monitoring_infos.to_key(
        sampled_byte_count)] = sampled_byte_count

    return all_monitoring_infos
예제 #7
0
    def verify_read_call_metric(self, project_id, namespace, status, count):
        """Check if a metric was recorded for the Datastore IO read API call."""
        process_wide_monitoring_infos = list(
            MetricsEnvironment.process_wide_container(
            ).to_runner_api_monitoring_infos(None).values())
        resource = resource_identifiers.DatastoreNamespace(
            project_id, namespace)
        labels = {
            monitoring_infos.SERVICE_LABEL: 'Datastore',
            monitoring_infos.METHOD_LABEL: 'BatchDatastoreRead',
            monitoring_infos.RESOURCE_LABEL: resource,
            monitoring_infos.DATASTORE_NAMESPACE_LABEL: namespace,
            monitoring_infos.DATASTORE_PROJECT_ID_LABEL: project_id,
            monitoring_infos.STATUS_LABEL: status
        }
        expected_mi = monitoring_infos.int64_counter(
            monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels)
        expected_mi.ClearField("start_time")

        found = False
        for actual_mi in process_wide_monitoring_infos:
            actual_mi.ClearField("start_time")
            if expected_mi == actual_mi:
                found = True
                break
        self.assertTrue(
            found, "Did not find read call metric with status: %s" % status)
예제 #8
0
    def verify_write_call_metric(self, project_id, instance_id, table_id,
                                 status, count):
        """Check if a metric was recorded for the Datastore IO write API call."""
        process_wide_monitoring_infos = list(
            MetricsEnvironment.process_wide_container(
            ).to_runner_api_monitoring_infos(None).values())
        resource = resource_identifiers.BigtableTable(project_id, instance_id,
                                                      table_id)
        labels = {
            monitoring_infos.SERVICE_LABEL: 'BigTable',
            monitoring_infos.METHOD_LABEL: 'google.bigtable.v2.MutateRows',
            monitoring_infos.RESOURCE_LABEL: resource,
            monitoring_infos.BIGTABLE_PROJECT_ID_LABEL: project_id,
            monitoring_infos.INSTANCE_ID_LABEL: instance_id,
            monitoring_infos.TABLE_ID_LABEL: table_id,
            monitoring_infos.STATUS_LABEL: status
        }
        expected_mi = monitoring_infos.int64_counter(
            monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels)
        expected_mi.ClearField("start_time")

        found = False
        for actual_mi in process_wide_monitoring_infos:
            actual_mi.ClearField("start_time")
            if expected_mi == actual_mi:
                found = True
                break
        self.assertTrue(
            found, "Did not find write call metric with status: %s" % status)
예제 #9
0
 def monitoring_infos(self, transform_id):
     # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
     infos = super(DoOperation, self).monitoring_infos(transform_id)
     if self.tagged_receivers:
         for tag, receiver in self.tagged_receivers.items():
             mi = monitoring_infos.int64_counter(
                 monitoring_infos.ELEMENT_COUNT_URN,
                 receiver.opcounter.element_counter.value(),
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(mi)] = mi
             (unused_mean, sum, count, min,
              max) = (receiver.opcounter.mean_byte_counter.value())
             metric = metrics_pb2.Metric(
                 distribution_data=metrics_pb2.DistributionData(
                     int_distribution_data=metrics_pb2.IntDistributionData(
                         count=count, sum=sum, min=min, max=max)))
             sampled_byte_count = monitoring_infos.int64_distribution(
                 monitoring_infos.SAMPLED_BYTE_SIZE_URN,
                 metric,
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(
                 sampled_byte_count)] = sampled_byte_count
     return infos
예제 #10
0
 def get_monitoring_infos(self, cache_size, cache_capacity):
     # type: (int, int) -> List[metrics_pb2.MonitoringInfo]
     """Returns the metrics scoped to the current bundle."""
     metrics = self._context.metrics
     if len(metrics) == 0:
         # No metrics collected, do not report
         return []
     # Add all missing metrics which were not reported
     for key in Metrics.ALL_METRICS:
         if key not in metrics:
             metrics[key] = 0
     # Gauges which reflect the state since last queried
     gauges = [
         monitoring_infos.int64_gauge(self.PREFIX + name, val)
         for name, val in metrics.items()
     ]
     gauges.append(
         monitoring_infos.int64_gauge(self.PREFIX + 'size', cache_size))
     gauges.append(
         monitoring_infos.int64_gauge(self.PREFIX + 'capacity',
                                      cache_capacity))
     # Counters for the summary across all metrics
     counters = [
         monitoring_infos.int64_counter(self.PREFIX + name + '_total', val)
         for name, val in metrics.items()
     ]
     # Reinitialize metrics for this thread/bundle
     metrics.clear()
     return gauges + counters
예제 #11
0
파일: operations.py 프로젝트: zhujk/beam
 def monitoring_infos(self, transform_id):
     infos = super(DoOperation, self).monitoring_infos(transform_id)
     if self.tagged_receivers:
         for tag, receiver in self.tagged_receivers.items():
             mi = monitoring_infos.int64_counter(
                 monitoring_infos.ELEMENT_COUNT_URN,
                 receiver.opcounter.element_counter.value(),
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(mi)] = mi
     return infos
예제 #12
0
파일: operations.py 프로젝트: eralmas7/beam
 def monitoring_infos(self, transform_id):
   infos = super(DoOperation, self).monitoring_infos(transform_id)
   if self.tagged_receivers:
     for tag, receiver in self.tagged_receivers.items():
       mi = monitoring_infos.int64_counter(
           monitoring_infos.ELEMENT_COUNT_URN,
           receiver.opcounter.element_counter.value(),
           ptransform=transform_id,
           tag=str(tag)
       )
       infos[monitoring_infos.to_key(mi)] = mi
   return infos
예제 #13
0
파일: cells.py 프로젝트: xmflyingfish/beam
 def to_runner_api_monitoring_info(self, name, transform_id):
   from apache_beam.metrics import monitoring_infos
   if not name.urn:
     # User counter case.
     return monitoring_infos.int64_user_counter(
         name.namespace,
         name.name,
         self.get_cumulative(),
         ptransform=transform_id)
   else:
     # Arbitrary URN case.
     return monitoring_infos.int64_counter(
         name.urn, self.get_cumulative(), labels=name.labels)
예제 #14
0
파일: operations.py 프로젝트: zhujk/beam
 def element_count_monitoring_infos(self, transform_id):
     """Returns the element count MonitoringInfo collected by this operation."""
     if len(self.receivers) == 1:
         # If there is exactly one output, we can unambiguously
         # fix its name later, which we do.
         # TODO(robertwb): Plumb the actual name here.
         mi = monitoring_infos.int64_counter(
             monitoring_infos.ELEMENT_COUNT_URN,
             self.receivers[0].opcounter.element_counter.value(),
             ptransform=transform_id,
             tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
         )
         return {monitoring_infos.to_key(mi): mi}
     return {}
예제 #15
0
파일: operations.py 프로젝트: eralmas7/beam
 def element_count_monitoring_infos(self, transform_id):
   """Returns the element count MonitoringInfo collected by this operation."""
   if len(self.receivers) == 1:
     # If there is exactly one output, we can unambiguously
     # fix its name later, which we do.
     # TODO(robertwb): Plumb the actual name here.
     mi = monitoring_infos.int64_counter(
         monitoring_infos.ELEMENT_COUNT_URN,
         self.receivers[0].opcounter.element_counter.value(),
         ptransform=transform_id,
         tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
     )
     return {monitoring_infos.to_key(mi) : mi}
   return {}
예제 #16
0
    def test_harness_monitoring_infos_and_metadata(self):
        # Clear the process wide metric container.
        MetricsEnvironment.process_wide_container().reset()
        # Create a process_wide metric.
        urn = 'my.custom.urn'
        labels = {'key': 'value'}
        InternalMetrics.counter(urn=urn, labels=labels,
                                process_wide=True).inc(10)

        harness_monitoring_infos_request = beam_fn_api_pb2.InstructionRequest(
            instruction_id="monitoring_infos",
            harness_monitoring_infos=beam_fn_api_pb2.
            HarnessMonitoringInfosRequest())

        responses = self.get_responses([harness_monitoring_infos_request])

        expected_monitoring_info = monitoring_infos.int64_counter(
            urn, 10, labels=labels)
        monitoring_data = (responses['monitoring_infos'].
                           harness_monitoring_infos.monitoring_data)

        # Request the full MonitoringInfo metadata for the returned short_ids.
        short_ids = list(monitoring_data.keys())
        monitoring_infos_metadata_request = beam_fn_api_pb2.InstructionRequest(
            instruction_id="monitoring_infos_metadata",
            monitoring_infos=beam_fn_api_pb2.MonitoringInfosMetadataRequest(
                monitoring_info_id=short_ids))

        responses = self.get_responses([monitoring_infos_metadata_request])

        # Request the full MonitoringInfo metadata to be returned now.
        expected_monitoring_info.ClearField("payload")

        # Verify that one of the returned monitoring infos is our expected
        # monitoring info.
        short_id_to_mi = (responses['monitoring_infos_metadata'].
                          monitoring_infos.monitoring_info)
        found = False
        for mi in short_id_to_mi.values():
            # Clear the timestamp before comparing
            mi.ClearField("start_time")
            if mi == expected_monitoring_info:
                found = True
        self.assertTrue(found, str(responses['monitoring_infos_metadata']))
    def test_int64_counter(self):
        expected_labels = {}
        expected_labels[monitoring_infos.PCOLLECTION_LABEL] = "collectionname"
        expected_labels[monitoring_infos.PTRANSFORM_LABEL] = "ptransformname"
        expected_labels[monitoring_infos.SERVICE_LABEL] = "BigQuery"

        labels = {
            monitoring_infos.SERVICE_LABEL: "BigQuery",
        }
        metric = CounterCell().get_cumulative()
        result = monitoring_infos.int64_counter(
            monitoring_infos.API_REQUEST_COUNT_URN,
            metric,
            ptransform="ptransformname",
            pcollection="collectionname",
            labels=labels)
        counter_value = monitoring_infos.extract_counter_value(result)

        self.assertEqual(0, counter_value)
        self.assertEqual(result.labels, expected_labels)
예제 #18
0
    def to_runner_api_monitoring_infos(self, transform_id):
        """Returns a list of MonitoringInfos for the metrics in this container."""
        all_user_metrics = []
        for k, v in self.counters.items():
            all_user_metrics.append(
                monitoring_infos.int64_counter(
                    user_metric_urn(k.namespace, k.name),
                    v.to_runner_api_monitoring_info(),
                    ptransform=transform_id))

        for k, v in self.distributions.items():
            all_user_metrics.append(
                monitoring_infos.int64_distribution(
                    user_metric_urn(k.namespace, k.name),
                    v.get_cumulative().to_runner_api_monitoring_info(),
                    ptransform=transform_id))

        for k, v in self.gauges.items():
            all_user_metrics.append(
                monitoring_infos.int64_gauge(
                    user_metric_urn(k.namespace, k.name),
                    v.get_cumulative().to_runner_api_monitoring_info(),
                    ptransform=transform_id))
        return {monitoring_infos.to_key(mi): mi for mi in all_user_metrics}
예제 #19
0
  def to_runner_api_monitoring_infos(self, transform_id):
    """Returns a list of MonitoringInfos for the metrics in this container."""
    all_user_metrics = []
    for k, v in self.counters.items():
      all_user_metrics.append(monitoring_infos.int64_counter(
          user_metric_urn(k.namespace, k.name),
          v.to_runner_api_monitoring_info(),
          ptransform=transform_id
      ))

    for k, v in self.distributions.items():
      all_user_metrics.append(monitoring_infos.int64_distribution(
          user_distribution_metric_urn(k.namespace, k.name),
          v.get_cumulative().to_runner_api_monitoring_info(),
          ptransform=transform_id
      ))

    for k, v in self.gauges.items():
      all_user_metrics.append(monitoring_infos.int64_gauge(
          user_metric_urn(k.namespace, k.name),
          v.get_cumulative().to_runner_api_monitoring_info(),
          ptransform=transform_id
      ))
    return {monitoring_infos.to_key(mi) : mi for mi in all_user_metrics}