def from_runner_api(proto, context): result = AppliedPTransform(parent=None, transform=pickler.loads( proto_utils.unpack_Any( proto.spec.parameter, wrappers_pb2.BytesValue).value), full_label=proto.unique_name, inputs=[ context.pcollections.get_by_id(id) for id in proto.inputs.values() ]) result.parts = [ context.transforms.get_by_id(id) for id in proto.subtransforms ] result.outputs = { None if tag == 'None' else tag: context.pcollections.get_by_id(id) for tag, id in proto.outputs.items() } if not result.parts: for tag, pc in result.outputs.items(): if pc not in result.inputs: pc.producer = result pc.tag = tag result.update_input_refcounts() return result
def from_runner_api(cls, proto, context): if proto is None or not proto.urn: return None parameter_type, constructor = cls._known_urns[proto.urn] return constructor( proto_utils.unpack_Any(proto.parameter, parameter_type), context)
def create_operation(self, transform_id, consumers): transform_proto = self.descriptor.transforms[transform_id] creator, parameter_type = self._known_urns[transform_proto.spec.urn] parameter = proto_utils.unpack_Any(transform_proto.spec.parameter, parameter_type) return creator(self, transform_id, transform_proto, parameter, consumers)
def extract_endpoints(stage): # Returns maps of transform names to PCollection identifiers. # Also mutates IO stages to point to the data data_operation_spec. data_input = {} data_side_input = {} data_output = {} for transform in stage.transforms: pcoll_id = proto_utils.unpack_Any( transform.spec.parameter, wrappers_pb2.BytesValue).value if transform.spec.urn in (bundle_processor.DATA_INPUT_URN, bundle_processor.DATA_OUTPUT_URN): if transform.spec.urn == bundle_processor.DATA_INPUT_URN: target = transform.unique_name, only_element( transform.outputs) data_input[target] = pcoll_id elif transform.spec.urn == bundle_processor.DATA_OUTPUT_URN: target = transform.unique_name, only_element( transform.inputs) data_output[target] = pcoll_id else: raise NotImplementedError if data_operation_spec: transform.spec.parameter.CopyFrom(data_operation_spec) else: transform.spec.parameter.Clear() return data_input, data_side_input, data_output
def from_runner_api(cls, proto, context): if proto is None or not proto.urn: return None parameter_type, constructor = cls._known_urns[proto.urn] return constructor( proto_utils.unpack_Any(proto.parameter, parameter_type), context)
def side_inputs(self): for transform in self.transforms: if transform.spec.urn == urns.PARDO_TRANSFORM: payload = proto_utils.unpack_Any( transform.spec.parameter, beam_runner_api_pb2.ParDoPayload) for side_input in payload.side_inputs: yield transform.inputs[side_input]
def from_runner_api(cls, fn_proto, context): """Converts from an SdkFunctionSpec to a Fn object. Prefer registering a urn with its parameter type and constructor. """ parameter_type, constructor = cls._known_urns[fn_proto.spec.urn] return constructor( proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type), context)
def from_runner_api(cls, fn_proto, context): """Converts from an SdkFunctionSpec to a Fn object. Prefer registering a urn with its parameter type and constructor. """ parameter_type, constructor = cls._known_urns[fn_proto.spec.urn] return constructor( proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type), context)
def get_coder(self, coder_id): coder_proto = self.descriptor.codersyyy[coder_id] if coder_proto.spec.spec.urn: return self.context.coders.get_by_id(coder_id) else: # No URN, assume cloud object encoding json bytes. return operation_specs.get_coder_from_spec( json.loads( proto_utils.unpack_Any(coder_proto.spec.spec.parameter, wrappers_pb2.BytesValue).value))
def get_coder(self, coder_id): coder_proto = self.descriptor.codersyyy[coder_id] if coder_proto.spec.spec.urn: return self.context.coders.get_by_id(coder_id) else: # No URN, assume cloud object encoding json bytes. return operation_specs.get_coder_from_spec( json.loads( proto_utils.unpack_Any(coder_proto.spec.spec.parameter, wrappers_pb2.BytesValue).value))
def has_as_main_input(self, pcoll): for transform in self.transforms: if transform.spec.urn == urns.PARDO_TRANSFORM: payload = proto_utils.unpack_Any( transform.spec.parameter, beam_runner_api_pb2.ParDoPayload) local_side_inputs = payload.side_inputs else: local_side_inputs = {} for local_id, pipeline_id in transform.inputs.items(): if pcoll == pipeline_id and local_id not in local_side_inputs: return True
def create(factory, transform_id, transform_proto, parameter, consumers): assert parameter.do_fn.spec.urn == urns.PICKLED_DO_FN_INFO serialized_fn = proto_utils.unpack_Any( parameter.do_fn.spec.parameter, wrappers_pb2.BytesValue).value dofn_data = pickler.loads(serialized_fn) if len(dofn_data) == 2: # Has side input data. serialized_fn, side_input_data = dofn_data else: # No side input data. side_input_data = [] return _create_pardo_operation( factory, transform_id, transform_proto, consumers, serialized_fn, side_input_data)
def from_runner_api(proto, context): result = AppliedPTransform( parent=None, transform=pickler.loads( proto_utils.unpack_Any(proto.spec.parameter, wrappers_pb2.BytesValue).value), full_label=proto.unique_name, inputs=[ context.pcollections.get_by_id(id) for id in proto.inputs.values()]) result.parts = [ context.transforms.get_by_id(id) for id in proto.subtransforms] result.outputs = { None if tag == 'None' else tag: context.pcollections.get_by_id(id) for tag, id in proto.outputs.items()} if not result.parts: for tag, pc in result.outputs.items(): if pc not in result.inputs: pc.producer = result pc.tag = tag result.update_input_refcounts() return result
def from_runner_api(cls, fn_proto, context): parameter_type, constructor = cls._known_urns[fn_proto.spec.urn] return constructor( proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type), context)
def create_operation(self, transform_id, consumers): transform_proto = self.descriptor.transforms[transform_id] creator, parameter_type = self._known_urns[transform_proto.spec.urn] parameter = proto_utils.unpack_Any( transform_proto.spec.parameter, parameter_type) return creator(self, transform_id, transform_proto, parameter, consumers)
def from_runner_api(cls, fn_proto, context): parameter_type, constructor = cls._known_urns[fn_proto.spec.urn] return constructor( proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type), context)