Example #1
0
  def monitoring_infos(self, transform_id, tag_to_pcollection_id):
    # type: (str, Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]

    def encode_progress(value):
      # type: (float) -> bytes
      coder = coders.IterableCoder(coders.FloatCoder())
      return coder.encode([value])

    with self.lock:
      infos = super(SdfProcessSizedElements,
                    self).monitoring_infos(transform_id, tag_to_pcollection_id)
      current_element_progress = self.current_element_progress()
      if current_element_progress:
        if current_element_progress.completed_work:
          completed = current_element_progress.completed_work
          remaining = current_element_progress.remaining_work
        else:
          completed = current_element_progress.fraction_completed
          remaining = current_element_progress.fraction_remaining
        assert completed is not None
        assert remaining is not None
        completed_mi = metrics_pb2.MonitoringInfo(
            urn=monitoring_infos.WORK_COMPLETED_URN,
            type=monitoring_infos.PROGRESS_TYPE,
            labels=monitoring_infos.create_labels(ptransform=transform_id),
            payload=encode_progress(completed))
        remaining_mi = metrics_pb2.MonitoringInfo(
            urn=monitoring_infos.WORK_REMAINING_URN,
            type=monitoring_infos.PROGRESS_TYPE,
            labels=monitoring_infos.create_labels(ptransform=transform_id),
            payload=encode_progress(remaining))
        infos[monitoring_infos.to_key(completed_mi)] = completed_mi
        infos[monitoring_infos.to_key(remaining_mi)] = remaining_mi
    return infos
Example #2
0
  def pcollection_count_monitoring_infos(self, tag_to_pcollection_id):
    # type: (Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]

    """Returns the element count MonitoringInfo collected by this operation."""

    # Skip producing monitoring infos if there is more then one receiver
    # since there is no way to provide a mapping from tag to pcollection id
    # within Operation.
    if len(self.receivers) != 1 or len(tag_to_pcollection_id) != 1:
      return {}

    all_monitoring_infos = {}
    pcollection_id = next(iter(tag_to_pcollection_id.values()))
    receiver = self.receivers[0]
    elem_count_mi = monitoring_infos.int64_counter(
        monitoring_infos.ELEMENT_COUNT_URN,
        receiver.opcounter.element_counter.value(),
        pcollection=pcollection_id,
    )

    (unused_mean, sum, count, min, max) = (
        receiver.opcounter.mean_byte_counter.value())

    sampled_byte_count = monitoring_infos.int64_distribution(
        monitoring_infos.SAMPLED_BYTE_SIZE_URN,
        DistributionData(sum, count, min, max),
        pcollection=pcollection_id,
    )
    all_monitoring_infos[monitoring_infos.to_key(elem_count_mi)] = elem_count_mi
    all_monitoring_infos[monitoring_infos.to_key(
        sampled_byte_count)] = sampled_byte_count

    return all_monitoring_infos
Example #3
0
 def monitoring_infos(self, transform_id):
     # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
     infos = super(DoOperation, self).monitoring_infos(transform_id)
     if self.tagged_receivers:
         for tag, receiver in self.tagged_receivers.items():
             mi = monitoring_infos.int64_counter(
                 monitoring_infos.ELEMENT_COUNT_URN,
                 receiver.opcounter.element_counter.value(),
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(mi)] = mi
             (unused_mean, sum, count, min,
              max) = (receiver.opcounter.mean_byte_counter.value())
             metric = metrics_pb2.Metric(
                 distribution_data=metrics_pb2.DistributionData(
                     int_distribution_data=metrics_pb2.IntDistributionData(
                         count=count, sum=sum, min=min, max=max)))
             sampled_byte_count = monitoring_infos.int64_distribution(
                 monitoring_infos.SAMPLED_BYTE_SIZE_URN,
                 metric,
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(
                 sampled_byte_count)] = sampled_byte_count
     return infos
Example #4
0
 def monitoring_infos(self, transform_id):
   # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
   with self.lock:
     infos = super(SdfProcessSizedElements,
                   self).monitoring_infos(transform_id)
     current_element_progress = self.current_element_progress()
     if current_element_progress:
       if current_element_progress.completed_work:
         completed = current_element_progress.completed_work
         remaining = current_element_progress.remaining_work
       else:
         completed = current_element_progress.fraction_completed
         remaining = current_element_progress.fraction_remaining
       assert completed is not None
       assert remaining is not None
       completed_mi = metrics_pb2.MonitoringInfo(
           urn=monitoring_infos.WORK_COMPLETED_URN,
           type=monitoring_infos.LATEST_DOUBLES_TYPE,
           labels=monitoring_infos.create_labels(ptransform=transform_id),
           payload=coders.FloatCoder().get_impl().encode_nested(completed),
           timestamp=monitoring_infos.to_timestamp_proto(time.time()))
       remaining_mi = metrics_pb2.MonitoringInfo(
           urn=monitoring_infos.WORK_REMAINING_URN,
           type=monitoring_infos.LATEST_DOUBLES_TYPE,
           labels=monitoring_infos.create_labels(ptransform=transform_id),
           payload=coders.FloatCoder().get_impl().encode_nested(remaining),
           timestamp=monitoring_infos.to_timestamp_proto(time.time()))
       infos[monitoring_infos.to_key(completed_mi)] = completed_mi
       infos[monitoring_infos.to_key(remaining_mi)] = remaining_mi
   return infos
Example #5
0
  def pcollection_count_monitoring_infos(self, tag_to_pcollection_id):
    # type: (Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]

    """Returns the element count MonitoringInfo collected by this operation."""
    infos = super(
        DoOperation,
        self).pcollection_count_monitoring_infos(tag_to_pcollection_id)

    if self.tagged_receivers:
      for tag, receiver in self.tagged_receivers.items():
        if str(tag) not in tag_to_pcollection_id:
          continue
        pcollection_id = tag_to_pcollection_id[str(tag)]
        mi = monitoring_infos.int64_counter(
            monitoring_infos.ELEMENT_COUNT_URN,
            receiver.opcounter.element_counter.value(),
            pcollection=pcollection_id)
        infos[monitoring_infos.to_key(mi)] = mi
        (unused_mean, sum, count, min, max) = (
            receiver.opcounter.mean_byte_counter.value())
        sampled_byte_count = monitoring_infos.int64_distribution(
            monitoring_infos.SAMPLED_BYTE_SIZE_URN,
            DistributionData(sum, count, min, max),
            pcollection=pcollection_id)
        infos[monitoring_infos.to_key(sampled_byte_count)] = sampled_byte_count
    return infos
Example #6
0
    def pcollection_count_monitoring_infos(self, transform_id):
        """Returns the element count MonitoringInfo collected by this operation."""
        if len(self.receivers) == 1:
            # If there is exactly one output, we can unambiguously
            # fix its name later, which we do.
            # TODO(robertwb): Plumb the actual name here.
            elem_count_mi = monitoring_infos.int64_counter(
                monitoring_infos.ELEMENT_COUNT_URN,
                self.receivers[0].opcounter.element_counter.value(),
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )

            (unused_mean, sum, count, min,
             max) = (self.receivers[0].opcounter.mean_byte_counter.value())
            metric = metrics_pb2.Metric(
                distribution_data=metrics_pb2.DistributionData(
                    int_distribution_data=metrics_pb2.IntDistributionData(
                        count=count, sum=sum, min=min, max=max)))
            sampled_byte_count = monitoring_infos.int64_distribution(
                monitoring_infos.SAMPLED_BYTE_SIZE_URN,
                metric,
                ptransform=transform_id,
                tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
            )
            return {
                monitoring_infos.to_key(elem_count_mi): elem_count_mi,
                monitoring_infos.to_key(sampled_byte_count): sampled_byte_count
            }
        return {}
Example #7
0
 def execution_time_monitoring_infos(self, transform_id):
   total_time_spent_msecs = (
       self.scoped_start_state.sampled_msecs_int()
       + self.scoped_process_state.sampled_msecs_int()
       + self.scoped_finish_state.sampled_msecs_int())
   mis = [
       monitoring_infos.int64_counter(
           monitoring_infos.START_BUNDLE_MSECS_URN,
           self.scoped_start_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.PROCESS_BUNDLE_MSECS_URN,
           self.scoped_process_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.FINISH_BUNDLE_MSECS_URN,
           self.scoped_finish_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.TOTAL_MSECS_URN,
           total_time_spent_msecs,
           ptransform=transform_id
       ),
   ]
   return {monitoring_infos.to_key(mi) : mi for mi in mis}
Example #8
0
    def to_runner_api_monitoring_infos(self, transform_id):
        """Returns a list of MonitoringInfos for the metrics in this container."""
        all_user_metrics = []
        for k, v in self.counters.items():
            all_user_metrics.append(
                monitoring_infos.int64_user_counter(
                    k.namespace,
                    k.name,
                    v.to_runner_api_monitoring_info(),
                    ptransform=transform_id))

        for k, v in self.distributions.items():
            all_user_metrics.append(
                monitoring_infos.int64_user_distribution(
                    k.namespace,
                    k.name,
                    v.get_cumulative().to_runner_api_monitoring_info(),
                    ptransform=transform_id))

        for k, v in self.gauges.items():
            all_user_metrics.append(
                monitoring_infos.int64_user_gauge(
                    k.namespace,
                    k.name,
                    v.get_cumulative().to_runner_api_monitoring_info(),
                    ptransform=transform_id))
        return {monitoring_infos.to_key(mi): mi for mi in all_user_metrics}
Example #9
0
 def execution_time_monitoring_infos(self, transform_id):
   total_time_spent_msecs = (
       self.scoped_start_state.sampled_msecs_int()
       + self.scoped_process_state.sampled_msecs_int()
       + self.scoped_finish_state.sampled_msecs_int())
   mis = [
       monitoring_infos.int64_counter(
           monitoring_infos.START_BUNDLE_MSECS_URN,
           self.scoped_start_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.PROCESS_BUNDLE_MSECS_URN,
           self.scoped_process_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.FINISH_BUNDLE_MSECS_URN,
           self.scoped_finish_state.sampled_msecs_int(),
           ptransform=transform_id
       ),
       monitoring_infos.int64_counter(
           monitoring_infos.TOTAL_MSECS_URN,
           total_time_spent_msecs,
           ptransform=transform_id
       ),
   ]
   return {monitoring_infos.to_key(mi) : mi for mi in mis}
Example #10
0
 def to_runner_api_monitoring_infos(self, transform_id):
     """Returns a list of MonitoringInfos for the metrics in this container."""
     all_user_metrics = [
         cell.to_runner_api_monitoring_info(key.metric_name, transform_id)
         for key, cell in self.metrics.items()
     ]
     return {monitoring_infos.to_key(mi): mi for mi in all_user_metrics}
Example #11
0
 def monitoring_infos(self):
   """Returns the list of MonitoringInfos collected processing this bundle."""
   # Construct a new dict first to remove duplciates.
   all_monitoring_infos_dict = {}
   for transform_id, op in self.ops.items():
     for mi in op.monitoring_infos(transform_id).values():
       fixed_mi = self._fix_output_tags_monitoring_info(transform_id, mi)
       all_monitoring_infos_dict[monitoring_infos.to_key(fixed_mi)] = fixed_mi
   return list(all_monitoring_infos_dict.values())
 def monitoring_infos(self):
   """Returns the list of MonitoringInfos collected processing this bundle."""
   # Construct a new dict first to remove duplciates.
   all_monitoring_infos_dict = {}
   for transform_id, op in self.ops.items():
     for mi in op.monitoring_infos(transform_id).values():
       fixed_mi = self._fix_output_tags_monitoring_info(transform_id, mi)
       all_monitoring_infos_dict[monitoring_infos.to_key(fixed_mi)] = fixed_mi
   return list(all_monitoring_infos_dict.values())
Example #13
0
 def monitoring_infos(self, transform_id):
     infos = super(DoOperation, self).monitoring_infos(transform_id)
     if self.tagged_receivers:
         for tag, receiver in self.tagged_receivers.items():
             mi = monitoring_infos.int64_counter(
                 monitoring_infos.ELEMENT_COUNT_URN,
                 receiver.opcounter.element_counter.value(),
                 ptransform=transform_id,
                 tag=str(tag))
             infos[monitoring_infos.to_key(mi)] = mi
     return infos
Example #14
0
 def monitoring_infos(self, transform_id):
   infos = super(DoOperation, self).monitoring_infos(transform_id)
   if self.tagged_receivers:
     for tag, receiver in self.tagged_receivers.items():
       mi = monitoring_infos.int64_counter(
           monitoring_infos.ELEMENT_COUNT_URN,
           receiver.opcounter.element_counter.value(),
           ptransform=transform_id,
           tag=str(tag)
       )
       infos[monitoring_infos.to_key(mi)] = mi
   return infos
Example #15
0
 def to_runner_api_monitoring_infos(self, transform_id):
     # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo]
     """Returns a list of MonitoringInfos for the metrics in this container."""
     with self.lock:
         items = list(self.metrics.items())
     all_metrics = [
         cell.to_runner_api_monitoring_info(key.metric_name, transform_id)
         for key, cell in items
     ]
     return {
         monitoring_infos.to_key(mi): mi
         for mi in all_metrics if mi is not None
     }
Example #16
0
 def element_count_monitoring_infos(self, transform_id):
     """Returns the element count MonitoringInfo collected by this operation."""
     if len(self.receivers) == 1:
         # If there is exactly one output, we can unambiguously
         # fix its name later, which we do.
         # TODO(robertwb): Plumb the actual name here.
         mi = monitoring_infos.int64_counter(
             monitoring_infos.ELEMENT_COUNT_URN,
             self.receivers[0].opcounter.element_counter.value(),
             ptransform=transform_id,
             tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
         )
         return {monitoring_infos.to_key(mi): mi}
     return {}
Example #17
0
 def element_count_monitoring_infos(self, transform_id):
   """Returns the element count MonitoringInfo collected by this operation."""
   if len(self.receivers) == 1:
     # If there is exactly one output, we can unambiguously
     # fix its name later, which we do.
     # TODO(robertwb): Plumb the actual name here.
     mi = monitoring_infos.int64_counter(
         monitoring_infos.ELEMENT_COUNT_URN,
         self.receivers[0].opcounter.element_counter.value(),
         ptransform=transform_id,
         tag='ONLY_OUTPUT' if len(self.receivers) == 1 else str(None),
     )
     return {monitoring_infos.to_key(mi) : mi}
   return {}
Example #18
0
  def monitoring_infos(self):
    """Returns the list of MonitoringInfos collected processing this bundle."""
    # Construct a new dict first to remove duplciates.
    all_monitoring_infos_dict = {}
    for transform_id, op in self.ops.items():
      for mi in op.monitoring_infos(transform_id).values():
        fixed_mi = self._fix_output_tags_monitoring_info(transform_id, mi)
        all_monitoring_infos_dict[monitoring_infos.to_key(fixed_mi)] = fixed_mi

    infos_list = list(all_monitoring_infos_dict.values())

    def inject_pcollection(monitoring_info):
      """
      If provided metric is element count metric:
      Finds relevant transform output info in current process_bundle_descriptor
      and adds tag with PCOLLECTION_LABEL and pcollection_id into monitoring
      info.
      """
      if monitoring_info.urn in URNS_NEEDING_PCOLLECTIONS:
        if not monitoring_infos.PTRANSFORM_LABEL in monitoring_info.labels:
          return
        ptransform_label = monitoring_info.labels[
            monitoring_infos.PTRANSFORM_LABEL]
        if not monitoring_infos.TAG_LABEL in monitoring_info.labels:
          return
        tag_label = monitoring_info.labels[monitoring_infos.TAG_LABEL]

        if not ptransform_label in self.process_bundle_descriptor.transforms:
          return
        if not tag_label in self.process_bundle_descriptor.transforms[
            ptransform_label].outputs:
          return

        pcollection_name = (self.process_bundle_descriptor
                            .transforms[ptransform_label].outputs[tag_label])

        monitoring_info.labels[
            monitoring_infos.PCOLLECTION_LABEL] = pcollection_name

        # Cleaning up labels that are not in specification.
        monitoring_info.labels.pop(monitoring_infos.PTRANSFORM_LABEL)
        monitoring_info.labels.pop(monitoring_infos.TAG_LABEL)

    for mi in infos_list:
      inject_pcollection(mi)

    return infos_list
  def monitoring_infos(self):
    """Returns the list of MonitoringInfos collected processing this bundle."""
    # Construct a new dict first to remove duplciates.
    all_monitoring_infos_dict = {}
    for transform_id, op in self.ops.items():
      for mi in op.monitoring_infos(transform_id).values():
        fixed_mi = self._fix_output_tags_monitoring_info(transform_id, mi)
        all_monitoring_infos_dict[monitoring_infos.to_key(fixed_mi)] = fixed_mi

    infos_list = list(all_monitoring_infos_dict.values())

    def inject_pcollection_into_element_count(monitoring_info):
      """
      If provided metric is element count metric:
      Finds relevant transform output info in current process_bundle_descriptor
      and adds tag with PCOLLECTION_LABEL and pcollection_id into monitoring
      info.
      """
      if monitoring_info.urn == monitoring_infos.ELEMENT_COUNT_URN:
        if not monitoring_infos.PTRANSFORM_LABEL in monitoring_info.labels:
          return
        ptransform_label = monitoring_info.labels[
            monitoring_infos.PTRANSFORM_LABEL]
        if not monitoring_infos.TAG_LABEL in monitoring_info.labels:
          return
        tag_label = monitoring_info.labels[monitoring_infos.TAG_LABEL]

        if not ptransform_label in self.process_bundle_descriptor.transforms:
          return
        if not tag_label in self.process_bundle_descriptor.transforms[
            ptransform_label].outputs:
          return

        pcollection_name = (self.process_bundle_descriptor
                            .transforms[ptransform_label].outputs[tag_label])
        monitoring_info.labels[
            monitoring_infos.PCOLLECTION_LABEL] = pcollection_name

        # Cleaning up labels that are not in specification.
        monitoring_info.labels.pop(monitoring_infos.PTRANSFORM_LABEL)
        monitoring_info.labels.pop(monitoring_infos.TAG_LABEL)

    for mi in infos_list:
      inject_pcollection_into_element_count(mi)

    return infos_list
Example #20
0
    def getShortId(self, monitoring_info):
        # type: (metrics_pb2.MonitoringInfo) -> str
        """ Returns the assigned shortId for a given MonitoringInfo, assigns one if
    not assigned already.
    """
        key = monitoring_infos.to_key(monitoring_info)
        with self._lock:
            try:
                return self._infoKeyToShortId[key]
            except KeyError:
                self._lastShortId += 1

                # Convert to a hex string (and drop the '0x') for some compression
                shortId = hex(self._lastShortId)[2:]

                payload_cleared = metrics_pb2.MonitoringInfo()
                payload_cleared.CopyFrom(monitoring_info)
                payload_cleared.ClearField('payload')

                self._infoKeyToShortId[key] = shortId
                self._shortIdToInfo[shortId] = payload_cleared
                return shortId
Example #21
0
  def to_runner_api_monitoring_infos(self, transform_id):
    """Returns a list of MonitoringInfos for the metrics in this container."""
    all_user_metrics = []
    for k, v in self.counters.items():
      all_user_metrics.append(monitoring_infos.int64_counter(
          user_metric_urn(k.namespace, k.name),
          v.to_runner_api_monitoring_info(),
          ptransform=transform_id
      ))

    for k, v in self.distributions.items():
      all_user_metrics.append(monitoring_infos.int64_distribution(
          user_distribution_metric_urn(k.namespace, k.name),
          v.get_cumulative().to_runner_api_monitoring_info(),
          ptransform=transform_id
      ))

    for k, v in self.gauges.items():
      all_user_metrics.append(monitoring_infos.int64_gauge(
          user_metric_urn(k.namespace, k.name),
          v.get_cumulative().to_runner_api_monitoring_info(),
          ptransform=transform_id
      ))
    return {monitoring_infos.to_key(mi) : mi for mi in all_user_metrics}