def execution_time_monitoring_infos(self, transform_id): total_time_spent_msecs = ( self.scoped_start_state.sampled_msecs_int() + self.scoped_process_state.sampled_msecs_int() + self.scoped_finish_state.sampled_msecs_int()) mis = [ monitoring_infos.int64_counter( monitoring_infos.START_BUNDLE_MSECS_URN, self.scoped_start_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.PROCESS_BUNDLE_MSECS_URN, self.scoped_process_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.FINISH_BUNDLE_MSECS_URN, self.scoped_finish_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.TOTAL_MSECS_URN, total_time_spent_msecs, ptransform=transform_id ), ] return {monitoring_infos.to_key(mi) : mi for mi in mis}
def execution_time_monitoring_infos(self, transform_id): total_time_spent_msecs = ( self.scoped_start_state.sampled_msecs_int() + self.scoped_process_state.sampled_msecs_int() + self.scoped_finish_state.sampled_msecs_int()) mis = [ monitoring_infos.int64_counter( monitoring_infos.START_BUNDLE_MSECS_URN, self.scoped_start_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.PROCESS_BUNDLE_MSECS_URN, self.scoped_process_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.FINISH_BUNDLE_MSECS_URN, self.scoped_finish_state.sampled_msecs_int(), ptransform=transform_id ), monitoring_infos.int64_counter( monitoring_infos.TOTAL_MSECS_URN, total_time_spent_msecs, ptransform=transform_id ), ] return {monitoring_infos.to_key(mi) : mi for mi in mis}
def pcollection_count_monitoring_infos(self, transform_id): """Returns the element count MonitoringInfo collected by this operation.""" if len(self.receivers) == 1: # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. elem_count_mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, self.receivers[0].opcounter.element_counter.value(), ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) (unused_mean, sum, count, min, max) = (self.receivers[0].opcounter.mean_byte_counter.value()) metric = metrics_pb2.Metric( distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=count, sum=sum, min=min, max=max))) sampled_byte_count = monitoring_infos.int64_distribution( monitoring_infos.SAMPLED_BYTE_SIZE_URN, metric, ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) return { monitoring_infos.to_key(elem_count_mi): elem_count_mi, monitoring_infos.to_key(sampled_byte_count): sampled_byte_count } return {}
def verify_write_call_metric( self, project_id, dataset_id, table_id, status, count): """Check if an metric was recorded for the BQ IO write API call.""" process_wide_monitoring_infos = list( MetricsEnvironment.process_wide_container(). to_runner_api_monitoring_infos(None).values()) resource = resource_identifiers.BigQueryTable( project_id, dataset_id, table_id) labels = { # TODO(ajamato): Add Ptransform label. monitoring_infos.SERVICE_LABEL: 'BigQuery', # Refer to any method which writes elements to BigQuery in batches # as "BigQueryBatchWrite". I.e. storage API's insertAll, or future # APIs introduced. monitoring_infos.METHOD_LABEL: 'BigQueryBatchWrite', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.BIGQUERY_PROJECT_ID_LABEL: project_id, monitoring_infos.BIGQUERY_DATASET_LABEL: dataset_id, monitoring_infos.BIGQUERY_TABLE_LABEL: table_id, monitoring_infos.STATUS_LABEL: status, } expected_mi = monitoring_infos.int64_counter( monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels) expected_mi.ClearField("start_time") found = False for actual_mi in process_wide_monitoring_infos: actual_mi.ClearField("start_time") if expected_mi == actual_mi: found = True break self.assertTrue( found, "Did not find write call metric with status: %s" % status)
def pcollection_count_monitoring_infos(self, tag_to_pcollection_id): # type: (Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] """Returns the element count MonitoringInfo collected by this operation.""" infos = super( DoOperation, self).pcollection_count_monitoring_infos(tag_to_pcollection_id) if self.tagged_receivers: for tag, receiver in self.tagged_receivers.items(): if str(tag) not in tag_to_pcollection_id: continue pcollection_id = tag_to_pcollection_id[str(tag)] mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, receiver.opcounter.element_counter.value(), pcollection=pcollection_id) infos[monitoring_infos.to_key(mi)] = mi (unused_mean, sum, count, min, max) = ( receiver.opcounter.mean_byte_counter.value()) sampled_byte_count = monitoring_infos.int64_distribution( monitoring_infos.SAMPLED_BYTE_SIZE_URN, DistributionData(sum, count, min, max), pcollection=pcollection_id) infos[monitoring_infos.to_key(sampled_byte_count)] = sampled_byte_count return infos
def pcollection_count_monitoring_infos(self, tag_to_pcollection_id): # type: (Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] """Returns the element count MonitoringInfo collected by this operation.""" # Skip producing monitoring infos if there is more then one receiver # since there is no way to provide a mapping from tag to pcollection id # within Operation. if len(self.receivers) != 1 or len(tag_to_pcollection_id) != 1: return {} all_monitoring_infos = {} pcollection_id = next(iter(tag_to_pcollection_id.values())) receiver = self.receivers[0] elem_count_mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, receiver.opcounter.element_counter.value(), pcollection=pcollection_id, ) (unused_mean, sum, count, min, max) = ( receiver.opcounter.mean_byte_counter.value()) sampled_byte_count = monitoring_infos.int64_distribution( monitoring_infos.SAMPLED_BYTE_SIZE_URN, DistributionData(sum, count, min, max), pcollection=pcollection_id, ) all_monitoring_infos[monitoring_infos.to_key(elem_count_mi)] = elem_count_mi all_monitoring_infos[monitoring_infos.to_key( sampled_byte_count)] = sampled_byte_count return all_monitoring_infos
def verify_read_call_metric(self, project_id, namespace, status, count): """Check if a metric was recorded for the Datastore IO read API call.""" process_wide_monitoring_infos = list( MetricsEnvironment.process_wide_container( ).to_runner_api_monitoring_infos(None).values()) resource = resource_identifiers.DatastoreNamespace( project_id, namespace) labels = { monitoring_infos.SERVICE_LABEL: 'Datastore', monitoring_infos.METHOD_LABEL: 'BatchDatastoreRead', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.DATASTORE_NAMESPACE_LABEL: namespace, monitoring_infos.DATASTORE_PROJECT_ID_LABEL: project_id, monitoring_infos.STATUS_LABEL: status } expected_mi = monitoring_infos.int64_counter( monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels) expected_mi.ClearField("start_time") found = False for actual_mi in process_wide_monitoring_infos: actual_mi.ClearField("start_time") if expected_mi == actual_mi: found = True break self.assertTrue( found, "Did not find read call metric with status: %s" % status)
def verify_write_call_metric(self, project_id, instance_id, table_id, status, count): """Check if a metric was recorded for the Datastore IO write API call.""" process_wide_monitoring_infos = list( MetricsEnvironment.process_wide_container( ).to_runner_api_monitoring_infos(None).values()) resource = resource_identifiers.BigtableTable(project_id, instance_id, table_id) labels = { monitoring_infos.SERVICE_LABEL: 'BigTable', monitoring_infos.METHOD_LABEL: 'google.bigtable.v2.MutateRows', monitoring_infos.RESOURCE_LABEL: resource, monitoring_infos.BIGTABLE_PROJECT_ID_LABEL: project_id, monitoring_infos.INSTANCE_ID_LABEL: instance_id, monitoring_infos.TABLE_ID_LABEL: table_id, monitoring_infos.STATUS_LABEL: status } expected_mi = monitoring_infos.int64_counter( monitoring_infos.API_REQUEST_COUNT_URN, count, labels=labels) expected_mi.ClearField("start_time") found = False for actual_mi in process_wide_monitoring_infos: actual_mi.ClearField("start_time") if expected_mi == actual_mi: found = True break self.assertTrue( found, "Did not find write call metric with status: %s" % status)
def monitoring_infos(self, transform_id): # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] infos = super(DoOperation, self).monitoring_infos(transform_id) if self.tagged_receivers: for tag, receiver in self.tagged_receivers.items(): mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, receiver.opcounter.element_counter.value(), ptransform=transform_id, tag=str(tag)) infos[monitoring_infos.to_key(mi)] = mi (unused_mean, sum, count, min, max) = (receiver.opcounter.mean_byte_counter.value()) metric = metrics_pb2.Metric( distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=count, sum=sum, min=min, max=max))) sampled_byte_count = monitoring_infos.int64_distribution( monitoring_infos.SAMPLED_BYTE_SIZE_URN, metric, ptransform=transform_id, tag=str(tag)) infos[monitoring_infos.to_key( sampled_byte_count)] = sampled_byte_count return infos
def get_monitoring_infos(self, cache_size, cache_capacity): # type: (int, int) -> List[metrics_pb2.MonitoringInfo] """Returns the metrics scoped to the current bundle.""" metrics = self._context.metrics if len(metrics) == 0: # No metrics collected, do not report return [] # Add all missing metrics which were not reported for key in Metrics.ALL_METRICS: if key not in metrics: metrics[key] = 0 # Gauges which reflect the state since last queried gauges = [ monitoring_infos.int64_gauge(self.PREFIX + name, val) for name, val in metrics.items() ] gauges.append( monitoring_infos.int64_gauge(self.PREFIX + 'size', cache_size)) gauges.append( monitoring_infos.int64_gauge(self.PREFIX + 'capacity', cache_capacity)) # Counters for the summary across all metrics counters = [ monitoring_infos.int64_counter(self.PREFIX + name + '_total', val) for name, val in metrics.items() ] # Reinitialize metrics for this thread/bundle metrics.clear() return gauges + counters
def monitoring_infos(self, transform_id): infos = super(DoOperation, self).monitoring_infos(transform_id) if self.tagged_receivers: for tag, receiver in self.tagged_receivers.items(): mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, receiver.opcounter.element_counter.value(), ptransform=transform_id, tag=str(tag)) infos[monitoring_infos.to_key(mi)] = mi return infos
def monitoring_infos(self, transform_id): infos = super(DoOperation, self).monitoring_infos(transform_id) if self.tagged_receivers: for tag, receiver in self.tagged_receivers.items(): mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, receiver.opcounter.element_counter.value(), ptransform=transform_id, tag=str(tag) ) infos[monitoring_infos.to_key(mi)] = mi return infos
def to_runner_api_monitoring_info(self, name, transform_id): from apache_beam.metrics import monitoring_infos if not name.urn: # User counter case. return monitoring_infos.int64_user_counter( name.namespace, name.name, self.get_cumulative(), ptransform=transform_id) else: # Arbitrary URN case. return monitoring_infos.int64_counter( name.urn, self.get_cumulative(), labels=name.labels)
def element_count_monitoring_infos(self, transform_id): """Returns the element count MonitoringInfo collected by this operation.""" if len(self.receivers) == 1: # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, self.receivers[0].opcounter.element_counter.value(), ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) return {monitoring_infos.to_key(mi): mi} return {}
def element_count_monitoring_infos(self, transform_id): """Returns the element count MonitoringInfo collected by this operation.""" if len(self.receivers) == 1: # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, self.receivers[0].opcounter.element_counter.value(), ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) return {monitoring_infos.to_key(mi) : mi} return {}
def test_harness_monitoring_infos_and_metadata(self): # Clear the process wide metric container. MetricsEnvironment.process_wide_container().reset() # Create a process_wide metric. urn = 'my.custom.urn' labels = {'key': 'value'} InternalMetrics.counter(urn=urn, labels=labels, process_wide=True).inc(10) harness_monitoring_infos_request = beam_fn_api_pb2.InstructionRequest( instruction_id="monitoring_infos", harness_monitoring_infos=beam_fn_api_pb2. HarnessMonitoringInfosRequest()) responses = self.get_responses([harness_monitoring_infos_request]) expected_monitoring_info = monitoring_infos.int64_counter( urn, 10, labels=labels) monitoring_data = (responses['monitoring_infos']. harness_monitoring_infos.monitoring_data) # Request the full MonitoringInfo metadata for the returned short_ids. short_ids = list(monitoring_data.keys()) monitoring_infos_metadata_request = beam_fn_api_pb2.InstructionRequest( instruction_id="monitoring_infos_metadata", monitoring_infos=beam_fn_api_pb2.MonitoringInfosMetadataRequest( monitoring_info_id=short_ids)) responses = self.get_responses([monitoring_infos_metadata_request]) # Request the full MonitoringInfo metadata to be returned now. expected_monitoring_info.ClearField("payload") # Verify that one of the returned monitoring infos is our expected # monitoring info. short_id_to_mi = (responses['monitoring_infos_metadata']. monitoring_infos.monitoring_info) found = False for mi in short_id_to_mi.values(): # Clear the timestamp before comparing mi.ClearField("start_time") if mi == expected_monitoring_info: found = True self.assertTrue(found, str(responses['monitoring_infos_metadata']))
def test_int64_counter(self): expected_labels = {} expected_labels[monitoring_infos.PCOLLECTION_LABEL] = "collectionname" expected_labels[monitoring_infos.PTRANSFORM_LABEL] = "ptransformname" expected_labels[monitoring_infos.SERVICE_LABEL] = "BigQuery" labels = { monitoring_infos.SERVICE_LABEL: "BigQuery", } metric = CounterCell().get_cumulative() result = monitoring_infos.int64_counter( monitoring_infos.API_REQUEST_COUNT_URN, metric, ptransform="ptransformname", pcollection="collectionname", labels=labels) counter_value = monitoring_infos.extract_counter_value(result) self.assertEqual(0, counter_value) self.assertEqual(result.labels, expected_labels)
def to_runner_api_monitoring_infos(self, transform_id): """Returns a list of MonitoringInfos for the metrics in this container.""" all_user_metrics = [] for k, v in self.counters.items(): all_user_metrics.append( monitoring_infos.int64_counter( user_metric_urn(k.namespace, k.name), v.to_runner_api_monitoring_info(), ptransform=transform_id)) for k, v in self.distributions.items(): all_user_metrics.append( monitoring_infos.int64_distribution( user_metric_urn(k.namespace, k.name), v.get_cumulative().to_runner_api_monitoring_info(), ptransform=transform_id)) for k, v in self.gauges.items(): all_user_metrics.append( monitoring_infos.int64_gauge( user_metric_urn(k.namespace, k.name), v.get_cumulative().to_runner_api_monitoring_info(), ptransform=transform_id)) return {monitoring_infos.to_key(mi): mi for mi in all_user_metrics}
def to_runner_api_monitoring_infos(self, transform_id): """Returns a list of MonitoringInfos for the metrics in this container.""" all_user_metrics = [] for k, v in self.counters.items(): all_user_metrics.append(monitoring_infos.int64_counter( user_metric_urn(k.namespace, k.name), v.to_runner_api_monitoring_info(), ptransform=transform_id )) for k, v in self.distributions.items(): all_user_metrics.append(monitoring_infos.int64_distribution( user_distribution_metric_urn(k.namespace, k.name), v.get_cumulative().to_runner_api_monitoring_info(), ptransform=transform_id )) for k, v in self.gauges.items(): all_user_metrics.append(monitoring_infos.int64_gauge( user_metric_urn(k.namespace, k.name), v.get_cumulative().to_runner_api_monitoring_info(), ptransform=transform_id )) return {monitoring_infos.to_key(mi) : mi for mi in all_user_metrics}