def monitoring_infos(self, transform_id): # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] with self.lock: infos = super(SdfProcessSizedElements, self).monitoring_infos(transform_id) current_element_progress = self.current_element_progress() if current_element_progress: if current_element_progress.completed_work: completed = current_element_progress.completed_work remaining = current_element_progress.remaining_work else: completed = current_element_progress.fraction_completed remaining = current_element_progress.fraction_remaining assert completed is not None assert remaining is not None completed_mi = metrics_pb2.MonitoringInfo( urn=monitoring_infos.WORK_COMPLETED_URN, type=monitoring_infos.LATEST_DOUBLES_TYPE, labels=monitoring_infos.create_labels(ptransform=transform_id), payload=coders.FloatCoder().get_impl().encode_nested(completed), timestamp=monitoring_infos.to_timestamp_proto(time.time())) remaining_mi = metrics_pb2.MonitoringInfo( urn=monitoring_infos.WORK_REMAINING_URN, type=monitoring_infos.LATEST_DOUBLES_TYPE, labels=monitoring_infos.create_labels(ptransform=transform_id), payload=coders.FloatCoder().get_impl().encode_nested(remaining), timestamp=monitoring_infos.to_timestamp_proto(time.time())) infos[monitoring_infos.to_key(completed_mi)] = completed_mi infos[monitoring_infos.to_key(remaining_mi)] = remaining_mi return infos
def monitoring_infos(self, transform_id, tag_to_pcollection_id): # type: (str, Dict[str, str]) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] def encode_progress(value): # type: (float) -> bytes coder = coders.IterableCoder(coders.FloatCoder()) return coder.encode([value]) with self.lock: infos = super(SdfProcessSizedElements, self).monitoring_infos(transform_id, tag_to_pcollection_id) current_element_progress = self.current_element_progress() if current_element_progress: if current_element_progress.completed_work: completed = current_element_progress.completed_work remaining = current_element_progress.remaining_work else: completed = current_element_progress.fraction_completed remaining = current_element_progress.fraction_remaining assert completed is not None assert remaining is not None completed_mi = metrics_pb2.MonitoringInfo( urn=monitoring_infos.WORK_COMPLETED_URN, type=monitoring_infos.PROGRESS_TYPE, labels=monitoring_infos.create_labels(ptransform=transform_id), payload=encode_progress(completed)) remaining_mi = metrics_pb2.MonitoringInfo( urn=monitoring_infos.WORK_REMAINING_URN, type=monitoring_infos.PROGRESS_TYPE, labels=monitoring_infos.create_labels(ptransform=transform_id), payload=encode_progress(remaining)) infos[monitoring_infos.to_key(completed_mi)] = completed_mi infos[monitoring_infos.to_key(remaining_mi)] = remaining_mi return infos
def merge(a, b): # pylint: disable=cell-var-from-loop return metrics_pb2.MonitoringInfo( urn=a.urn, type=a.type, labels=dict((label, value) for label, value in a.labels.items() if b.labels.get(label) == value), payload=combiner(a.payload, b.payload))
def merge(a, b): # pylint: disable=cell-var-from-loop return metrics_pb2.MonitoringInfo( urn=a.urn, type=a.type, labels=dict((label, value) for label, value in a.labels.items() if b.labels.get(label) == value), metric=combiner(a.metric, b.metric), timestamp=max_timestamp(a.timestamp, b.timestamp))
def create_monitoring_info(urn, type_urn, payload, labels=None): # type: (...) -> metrics_pb2.MonitoringInfo """Return the gauge monitoring info for the URN, type, metric and labels. Args: urn: The URN of the monitoring info/metric. type_urn: The URN of the type of the monitoring info/metric. i.e. beam:metrics:sum_int_64, beam:metrics:latest_int_64. payload: The payload field to use in the monitoring info. labels: The label dictionary to use in the MonitoringInfo. """ return metrics_pb2.MonitoringInfo( urn=urn, type=type_urn, labels=labels or dict(), payload=payload)
def create_monitoring_info(urn, type_urn, metric_proto, labels=None): """Return the gauge monitoring info for the URN, type, metric and labels. Args: urn: The URN of the monitoring info/metric. type_urn: The URN of the type of the monitoring info/metric. i.e. beam:metrics:sum_int_64, beam:metrics:latest_int_64. metric_proto: The metric proto field to use in the monitoring info. Or an int value. labels: The label dictionary to use in the MonitoringInfo. """ return metrics_pb2.MonitoringInfo(urn=urn, type=type_urn, labels=labels or dict(), metric=metric_proto, timestamp=to_timestamp_proto( time.time()))
def getShortId(self, monitoring_info): # type: (metrics_pb2.MonitoringInfo) -> str """ Returns the assigned shortId for a given MonitoringInfo, assigns one if not assigned already. """ key = monitoring_infos.to_key(monitoring_info) with self._lock: try: return self._infoKeyToShortId[key] except KeyError: self._lastShortId += 1 # Convert to a hex string (and drop the '0x') for some compression shortId = hex(self._lastShortId)[2:] payload_cleared = metrics_pb2.MonitoringInfo() payload_cleared.CopyFrom(monitoring_info) payload_cleared.ClearField('payload') self._infoKeyToShortId[key] = shortId self._shortIdToInfo[shortId] = payload_cleared return shortId
def testShortIdAssignment(self): TestCase = namedtuple('TestCase', ['expectedShortId', 'info']) test_cases = [ TestCase(*args) for args in [ ("1", metrics_pb2.MonitoringInfo( urn="beam:metric:user:distribution_int64:v1", type="beam:metrics:distribution_int64:v1")), ("2", metrics_pb2.MonitoringInfo(urn="beam:metric:element_count:v1", type="beam:metrics:sum_int64:v1")), ("3", metrics_pb2.MonitoringInfo( urn="beam:metric:ptransform_progress:completed:v1", type="beam:metrics:progress:v1")), ("4", metrics_pb2.MonitoringInfo( urn="beam:metric:user:distribution_double:v1", type="beam:metrics:distribution_double:v1")), ("5", metrics_pb2.MonitoringInfo(urn="TestingSentinelUrn", type="TestingSentinelType")), ("6", metrics_pb2.MonitoringInfo( urn= "beam:metric:pardo_execution_time:finish_bundle_msecs:v1", type="beam:metrics:sum_int64:v1")), # This case and the next one validates that different labels # with the same urn are in fact assigned different short ids. ("7", metrics_pb2.MonitoringInfo( urn="beam:metric:user:sum_int64:v1", type="beam:metrics:sum_int64:v1", labels={ "PTRANSFORM": "myT", "NAMESPACE": "harness", "NAME": "metricNumber7" })), ("8", metrics_pb2.MonitoringInfo( urn="beam:metric:user:sum_int64:v1", type="beam:metrics:sum_int64:v1", labels={ "PTRANSFORM": "myT", "NAMESPACE": "harness", "NAME": "metricNumber8" })), ("9", metrics_pb2.MonitoringInfo( urn="beam:metric:user:top_n_double:v1", type="beam:metrics:top_n_double:v1", labels={ "PTRANSFORM": "myT", "NAMESPACE": "harness", "NAME": "metricNumber7" })), ("a", metrics_pb2.MonitoringInfo(urn="beam:metric:element_count:v1", type="beam:metrics:sum_int64:v1", labels={"PCOLLECTION": "myPCol"})), # validate payload is ignored for shortId assignment ("3", metrics_pb2.MonitoringInfo( urn="beam:metric:ptransform_progress:completed:v1", type="beam:metrics:progress:v1", payload=b"this is ignored!")) ] ] cache = sdk_worker.ShortIdCache() for case in test_cases: self.assertEqual( case.expectedShortId, cache.getShortId(case.info), "Got incorrect short id for monitoring info:\n%s" % case.info) # Retrieve all of the monitoring infos by short id, and verify that the # metadata (everything but the payload) matches the originals actual_recovered_infos = cache.getInfos(case.expectedShortId for case in test_cases)