def monitoring_infos(self, transform_id): # type: (str) -> Dict[FrozenSet, metrics_pb2.MonitoringInfo] with self.lock: infos = super(SdfProcessSizedElements, self).monitoring_infos(transform_id) current_element_progress = self.current_element_progress() if current_element_progress: if current_element_progress.completed_work: completed = current_element_progress.completed_work remaining = current_element_progress.remaining_work else: completed = current_element_progress.fraction_completed remaining = current_element_progress.fraction_remaining assert completed is not None assert remaining is not None completed_mi = metrics_pb2.MonitoringInfo( urn=monitoring_infos.WORK_COMPLETED_URN, type=monitoring_infos.LATEST_DOUBLES_TYPE, labels=monitoring_infos.create_labels(ptransform=transform_id), payload=coders.FloatCoder().get_impl().encode_nested(completed), timestamp=monitoring_infos.to_timestamp_proto(time.time())) remaining_mi = metrics_pb2.MonitoringInfo( urn=monitoring_infos.WORK_REMAINING_URN, type=monitoring_infos.LATEST_DOUBLES_TYPE, labels=monitoring_infos.create_labels(ptransform=transform_id), payload=coders.FloatCoder().get_impl().encode_nested(remaining), timestamp=monitoring_infos.to_timestamp_proto(time.time())) infos[monitoring_infos.to_key(completed_mi)] = completed_mi infos[monitoring_infos.to_key(remaining_mi)] = remaining_mi return infos
def test_serialization(self): context = pipeline_context.PipelineContext() float_coder_ref = context.coders.get_id(coders.FloatCoder()) bytes_coder_ref = context.coders.get_id(coders.BytesCoder()) proto = context.to_runner_api() context2 = pipeline_context.PipelineContext.from_runner_api(proto) self.assertEqual(coders.FloatCoder(), context2.coders.get_by_id(float_coder_ref)) self.assertEqual(coders.BytesCoder(), context2.coders.get_by_id(bytes_coder_ref))
def test_common_id_assignment(self): context = pipeline_context.PipelineContext() float_coder_ref = context.coders.get_id(coders.FloatCoder()) bytes_coder_ref = context.coders.get_id(coders.BytesCoder()) context2 = pipeline_context.PipelineContext( component_id_map=context.component_id_map) bytes_coder_ref2 = context2.coders.get_id(coders.BytesCoder()) float_coder_ref2 = context2.coders.get_id(coders.FloatCoder()) self.assertEqual(bytes_coder_ref, bytes_coder_ref2) self.assertEqual(float_coder_ref, float_coder_ref2)
def expand_sdf(stages, context): """Transforms splitable DoFns into pair+split+read.""" for stage in stages: assert len(stage.transforms) == 1 transform = stage.transforms[0] if transform.spec.urn == common_urns.primitives.PAR_DO.urn: pardo_payload = proto_utils.parse_Bytes( transform.spec.payload, beam_runner_api_pb2.ParDoPayload) if pardo_payload.splittable: def copy_like(protos, original, suffix='_copy', **kwargs): if isinstance(original, (str, unicode)): key = original original = protos[original] else: key = 'component' new_id = unique_name(protos, key + suffix) protos[new_id].CopyFrom(original) proto = protos[new_id] for name, value in kwargs.items(): if isinstance(value, dict): getattr(proto, name).clear() getattr(proto, name).update(value) elif isinstance(value, list): del getattr(proto, name)[:] getattr(proto, name).extend(value) elif name == 'urn': proto.spec.urn = value else: setattr(proto, name, value) return new_id def make_stage(base_stage, transform_id, extra_must_follow=()): transform = context.components.transforms[transform_id] return Stage( transform.unique_name, [transform], base_stage.downstream_side_inputs, union(base_stage.must_follow, frozenset(extra_must_follow)), parent=base_stage, environment=base_stage.environment) main_input_tag = only_element(tag for tag in transform.inputs.keys() if tag not in pardo_payload.side_inputs) main_input_id = transform.inputs[main_input_tag] element_coder_id = context.components.pcollections[ main_input_id].coder_id # KV[element, restriction] paired_coder_id = context.add_or_get_coder_id( beam_runner_api_pb2.Coder( spec=beam_runner_api_pb2.SdkFunctionSpec( spec=beam_runner_api_pb2.FunctionSpec( urn=common_urns.coders.KV.urn)), component_coder_ids=[element_coder_id, pardo_payload.restriction_coder_id])) # KV[KV[element, restriction], double] sized_coder_id = context.add_or_get_coder_id( beam_runner_api_pb2.Coder( spec=beam_runner_api_pb2.SdkFunctionSpec( spec=beam_runner_api_pb2.FunctionSpec( urn=common_urns.coders.KV.urn)), component_coder_ids=[ paired_coder_id, context.add_or_get_coder_id( coders.FloatCoder().to_runner_api(None), 'doubles_coder') ])) paired_pcoll_id = copy_like( context.components.pcollections, main_input_id, '_paired', coder_id=paired_coder_id) pair_transform_id = copy_like( context.components.transforms, transform, unique_name=transform.unique_name + '/PairWithRestriction', urn=common_urns.sdf_components.PAIR_WITH_RESTRICTION.urn, outputs={'out': paired_pcoll_id}) split_pcoll_id = copy_like( context.components.pcollections, main_input_id, '_split', coder_id=sized_coder_id) split_transform_id = copy_like( context.components.transforms, transform, unique_name=transform.unique_name + '/SplitAndSizeRestriction', urn=common_urns.sdf_components.SPLIT_AND_SIZE_RESTRICTIONS.urn, inputs=dict(transform.inputs, **{main_input_tag: paired_pcoll_id}), outputs={'out': split_pcoll_id}) process_transform_id = copy_like( context.components.transforms, transform, unique_name=transform.unique_name + '/Process', urn= common_urns.sdf_components.PROCESS_SIZED_ELEMENTS_AND_RESTRICTIONS .urn, inputs=dict(transform.inputs, **{main_input_tag: split_pcoll_id})) yield make_stage(stage, pair_transform_id) split_stage = make_stage(stage, split_transform_id) yield split_stage yield make_stage( stage, process_transform_id, extra_must_follow=[split_stage]) else: yield stage else: yield stage
def encode_progress(value): # type: (float) -> bytes coder = coders.IterableCoder(coders.FloatCoder()) return coder.encode([value])