def to_runner_api_monitoring_info(self): """Returns a Metric with this value for use in a MonitoringInfo.""" return metrics_pb2.Metric( distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=self.count, sum=self.sum, min=self.min, max=self.max)))
def pcollection_count_monitoring_infos(self, transform_id): """Returns the element count MonitoringInfo collected by this operation.""" if len(self.receivers) == 1: # If there is exactly one output, we can unambiguously # fix its name later, which we do. # TODO(robertwb): Plumb the actual name here. elem_count_mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, self.receivers[0].opcounter.element_counter.value(), ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) (unused_mean, sum, count, min, max) = (self.receivers[0].opcounter.mean_byte_counter.value()) metric = metrics_pb2.Metric( distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=count, sum=sum, min=min, max=max))) sampled_byte_count = monitoring_infos.int64_distribution( monitoring_infos.SAMPLED_BYTE_SIZE_URN, metric, ptransform=transform_id, tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None), ) return { monitoring_infos.to_key(elem_count_mi): elem_count_mi, monitoring_infos.to_key(sampled_byte_count): sampled_byte_count } return {}
def monitoring_infos(self, transform_id): # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] infos = super(DoOperation, self).monitoring_infos(transform_id) if self.tagged_receivers: for tag, receiver in self.tagged_receivers.items(): mi = monitoring_infos.int64_counter( monitoring_infos.ELEMENT_COUNT_URN, receiver.opcounter.element_counter.value(), ptransform=transform_id, tag=str(tag)) infos[monitoring_infos.to_key(mi)] = mi (unused_mean, sum, count, min, max) = (receiver.opcounter.mean_byte_counter.value()) metric = metrics_pb2.Metric( distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=count, sum=sum, min=min, max=max))) sampled_byte_count = monitoring_infos.int64_distribution( monitoring_infos.SAMPLED_BYTE_SIZE_URN, metric, ptransform=transform_id, tag=str(tag)) infos[monitoring_infos.to_key( sampled_byte_count)] = sampled_byte_count return infos
def to_runner_api_monitoring_info(self, name, transform_id): from apache_beam.metrics import monitoring_infos return monitoring_infos.int64_user_counter( name.namespace, name.name, metrics_pb2.Metric(counter_data=metrics_pb2.CounterData( int64_value=self.get_cumulative())), ptransform=transform_id)
def to_runner_api_monitoring_info(self): """Returns a Metric with this counter value for use in a MonitoringInfo.""" # TODO(ajamato): Update this code to be consistent with Gauges # and Distributions. Since there is no CounterData class this method # was added to CounterCell. Consider adding a CounterData class or # removing the GaugeData and DistributionData classes. return metrics_pb2.Metric(counter_data=metrics_pb2.CounterData( int64_value=self.get_cumulative()))
def distribution_combiner(metric_a, metric_b): a_data = metric_a.distribution_data.int_distribution_data b_data = metric_b.distribution_data.int_distribution_data return metrics_pb2.Metric(distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=a_data.count + b_data.count, sum=a_data.sum + b_data.sum, min=min(a_data.min, b_data.min), max=max(a_data.max, b_data.max))))
def int64_gauge(urn, metric, ptransform=None, tag=None): """Return the gauge monitoring info for the URN, metric and labels. Args: urn: The URN of the monitoring info/metric. metric: The metric proto field to use in the monitoring info. ptransform: The ptransform/step name used as a label. tag: The output tag name, used as a label. """ labels = create_labels(ptransform=ptransform, tag=tag) if isinstance(metric, int): metric = metrics_pb2.Metric(counter_data=metrics_pb2.CounterData( int64_value=metric)) return create_monitoring_info(urn, LATEST_INT64_TYPE, metric, labels)
def int64_counter(urn, metric, ptransform=None, tag=None): # type: (...) -> metrics_pb2.MonitoringInfo """Return the counter monitoring info for the specifed URN, metric and labels. Args: urn: The URN of the monitoring info/metric. metric: The metric proto field to use in the monitoring info. Or an int value. ptransform: The ptransform/step name used as a label. tag: The output tag name, used as a label. """ labels = create_labels(ptransform=ptransform, tag=tag) if isinstance(metric, int): metric = metrics_pb2.Metric(counter_data=metrics_pb2.CounterData( int64_value=metric)) return create_monitoring_info(urn, SUM_INT64_TYPE, metric, labels)
def to_runner_api_monitoring_info(self): """Returns a Metric with this value for use in a MonitoringInfo.""" return metrics_pb2.Metric(counter_data=metrics_pb2.CounterData( int64_value=self.value))
def distribution_combiner(metric_a, metric_b): a_data = metric_a.distribution_data.int_distribution_data b_data = metric_b.distribution_data.int_distribution_data return metrics_pb2.Metric(distribution_data=metrics_pb2.DistributionData( int_distribution_data=metrics_pb2.IntDistributionData( count=a_data.count + b_data.count, sum=a_data.sum + b_data.sum, min=min(a_data.min, b_data.min), max=max(a_data.max, b_data.max)))) _KNOWN_COMBINERS = { SUM_INT64_TYPE: lambda a, b: metrics_pb2.Metric(counter_data=metrics_pb2.CounterData( int64_value=a.counter_data.int64_value + b.counter_data.int64_value)), DISTRIBUTION_INT64_TYPE: distribution_combiner, } def max_timestamp(a, b): if a.ToNanoseconds() > b.ToNanoseconds(): return a else: return b def consolidate(metrics, key=to_key): grouped = collections.defaultdict(list) for metric in metrics: