Exemple #1
0
 def to_runner_api_monitoring_info(self):
     """Returns a Metric with this value for use in a MonitoringInfo."""
     return metrics_pb2.Metric(
         distribution_data=metrics_pb2.DistributionData(
             int_distribution_data=metrics_pb2.IntDistributionData(
                 count=self.count, sum=self.sum, min=self.min,
                 max=self.max)))
Exemple #2
0
    def pcollection_count_monitoring_infos(self, transform_id):
        """Returns the element count MonitoringInfo collected by this operation."""
        if len(self.receivers) == 1:
            # If there is exactly one output, we can unambiguously
            # fix its name later, which we do.
            # TODO(robertwb): Plumb the actual name here.
            elem_count_mi = monitoring_infos.int64_counter(
                monitoring_infos.ELEMENT_COUNT_URN,
                self.receivers[0].opcounter.element_counter.value(),
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )

            (unused_mean, sum, count, min,
             max) = (self.receivers[0].opcounter.mean_byte_counter.value())
            metric = metrics_pb2.Metric(
                distribution_data=metrics_pb2.DistributionData(
                    int_distribution_data=metrics_pb2.IntDistributionData(
                        count=count, sum=sum, min=min, max=max)))
            sampled_byte_count = monitoring_infos.int64_distribution(
                monitoring_infos.SAMPLED_BYTE_SIZE_URN,
                metric,
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )
            return {
                monitoring_infos.to_key(elem_count_mi): elem_count_mi,
                monitoring_infos.to_key(sampled_byte_count): sampled_byte_count
            }
        return {}
Exemple #3
0
 def monitoring_infos(self, transform_id):
     # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
     infos = super(DoOperation, self).monitoring_infos(transform_id)
     if self.tagged_receivers:
         for tag, receiver in self.tagged_receivers.items():
             mi = monitoring_infos.int64_counter(
                 monitoring_infos.ELEMENT_COUNT_URN,
                 receiver.opcounter.element_counter.value(),
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(mi)] = mi
             (unused_mean, sum, count, min,
              max) = (receiver.opcounter.mean_byte_counter.value())
             metric = metrics_pb2.Metric(
                 distribution_data=metrics_pb2.DistributionData(
                     int_distribution_data=metrics_pb2.IntDistributionData(
                         count=count, sum=sum, min=min, max=max)))
             sampled_byte_count = monitoring_infos.int64_distribution(
                 monitoring_infos.SAMPLED_BYTE_SIZE_URN,
                 metric,
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(
                 sampled_byte_count)] = sampled_byte_count
     return infos
Exemple #4
0
 def to_runner_api_monitoring_info(self, name, transform_id):
     from apache_beam.metrics import monitoring_infos
     return monitoring_infos.int64_user_counter(
         name.namespace,
         name.name,
         metrics_pb2.Metric(counter_data=metrics_pb2.CounterData(
             int64_value=self.get_cumulative())),
         ptransform=transform_id)
Exemple #5
0
 def to_runner_api_monitoring_info(self):
     """Returns a Metric with this counter value for use in a MonitoringInfo."""
     # TODO(ajamato): Update this code to be consistent with Gauges
     # and Distributions. Since there is no CounterData class this method
     # was added to CounterCell. Consider adding a CounterData class or
     # removing the GaugeData and DistributionData classes.
     return metrics_pb2.Metric(counter_data=metrics_pb2.CounterData(
         int64_value=self.get_cumulative()))
def distribution_combiner(metric_a, metric_b):
    a_data = metric_a.distribution_data.int_distribution_data
    b_data = metric_b.distribution_data.int_distribution_data
    return metrics_pb2.Metric(distribution_data=metrics_pb2.DistributionData(
        int_distribution_data=metrics_pb2.IntDistributionData(
            count=a_data.count + b_data.count,
            sum=a_data.sum + b_data.sum,
            min=min(a_data.min, b_data.min),
            max=max(a_data.max, b_data.max))))
def int64_gauge(urn, metric, ptransform=None, tag=None):
    """Return the gauge monitoring info for the URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: The metric proto field to use in the monitoring info.
    ptransform: The ptransform/step name used as a label.
    tag: The output tag name, used as a label.
  """
    labels = create_labels(ptransform=ptransform, tag=tag)
    if isinstance(metric, int):
        metric = metrics_pb2.Metric(counter_data=metrics_pb2.CounterData(
            int64_value=metric))
    return create_monitoring_info(urn, LATEST_INT64_TYPE, metric, labels)
def int64_counter(urn, metric, ptransform=None, tag=None):
    # type: (...) -> metrics_pb2.MonitoringInfo
    """Return the counter monitoring info for the specifed URN, metric and labels.

  Args:
    urn: The URN of the monitoring info/metric.
    metric: The metric proto field to use in the monitoring info.
        Or an int value.
    ptransform: The ptransform/step name used as a label.
    tag: The output tag name, used as a label.
  """
    labels = create_labels(ptransform=ptransform, tag=tag)
    if isinstance(metric, int):
        metric = metrics_pb2.Metric(counter_data=metrics_pb2.CounterData(
            int64_value=metric))
    return create_monitoring_info(urn, SUM_INT64_TYPE, metric, labels)
Exemple #9
0
 def to_runner_api_monitoring_info(self):
     """Returns a Metric with this value for use in a MonitoringInfo."""
     return metrics_pb2.Metric(counter_data=metrics_pb2.CounterData(
         int64_value=self.value))

def distribution_combiner(metric_a, metric_b):
    a_data = metric_a.distribution_data.int_distribution_data
    b_data = metric_b.distribution_data.int_distribution_data
    return metrics_pb2.Metric(distribution_data=metrics_pb2.DistributionData(
        int_distribution_data=metrics_pb2.IntDistributionData(
            count=a_data.count + b_data.count,
            sum=a_data.sum + b_data.sum,
            min=min(a_data.min, b_data.min),
            max=max(a_data.max, b_data.max))))


_KNOWN_COMBINERS = {
    SUM_INT64_TYPE:
    lambda a, b: metrics_pb2.Metric(counter_data=metrics_pb2.CounterData(
        int64_value=a.counter_data.int64_value + b.counter_data.int64_value)),
    DISTRIBUTION_INT64_TYPE:
    distribution_combiner,
}


def max_timestamp(a, b):
    if a.ToNanoseconds() > b.ToNanoseconds():
        return a
    else:
        return b


def consolidate(metrics, key=to_key):
    grouped = collections.defaultdict(list)
    for metric in metrics: