def expand(self, pvalueish): # type: (pvalue.PCollection) -> pvalue.PCollection if isinstance(pvalueish, pvalue.PBegin): self._inputs = {} elif isinstance(pvalueish, (list, tuple)): self._inputs = {str(ix): pvalue for ix, pvalue in enumerate(pvalueish)} elif isinstance(pvalueish, dict): self._inputs = pvalueish else: self._inputs = {'input': pvalueish} pipeline = ( next(iter(self._inputs.values())).pipeline if self._inputs else pvalueish.pipeline) context = pipeline_context.PipelineContext() transform_proto = beam_runner_api_pb2.PTransform( unique_name=pipeline._current_transform().full_label, spec=beam_runner_api_pb2.FunctionSpec( urn=self._urn, payload=self._payload)) for tag, pcoll in self._inputs.items(): transform_proto.inputs[tag] = context.pcollections.get_id(pcoll) # Conversion to/from proto assumes producers. # TODO: Possibly loosen this. context.transforms.put_proto( '%s_%s' % (self._IMPULSE_PREFIX, tag), beam_runner_api_pb2.PTransform( unique_name='%s_%s' % (self._IMPULSE_PREFIX, tag), spec=beam_runner_api_pb2.FunctionSpec( urn=common_urns.primitives.IMPULSE.urn), outputs={'out': transform_proto.inputs[tag]})) components = context.to_runner_api() request = beam_expansion_api_pb2.ExpansionRequest( components=components, namespace=self._namespace, # type: ignore # mypy thinks self._namespace is threading.local transform=transform_proto) if isinstance(self._expansion_service, str): with grpc.insecure_channel(self._expansion_service) as channel: response = beam_expansion_api_pb2_grpc.ExpansionServiceStub( channel).Expand(request) else: response = self._expansion_service.Expand(request, None) if response.error: raise RuntimeError(response.error) self._expanded_components = response.components self._expanded_transform = response.transform result_context = pipeline_context.PipelineContext(response.components) def fix_output(pcoll, tag): pcoll.pipeline = pipeline pcoll.tag = tag return pcoll self._outputs = { tag: fix_output(result_context.pcollections.get_by_id(pcoll_id), tag) for tag, pcoll_id in self._expanded_transform.outputs.items() } return self._output_to_pvalueish(self._outputs)
def expand(self, pvalueish): # type: (pvalue.PCollection) -> pvalue.PCollection if isinstance(pvalueish, pvalue.PBegin): self._inputs = {} elif isinstance(pvalueish, (list, tuple)): self._inputs = { str(ix): pvalue for ix, pvalue in enumerate(pvalueish) } elif isinstance(pvalueish, dict): self._inputs = pvalueish else: self._inputs = {'input': pvalueish} pipeline = (next(iter(self._inputs.values())).pipeline if self._inputs else pvalueish.pipeline) context = pipeline_context.PipelineContext() transform_proto = beam_runner_api_pb2.PTransform( unique_name=pipeline._current_transform().full_label, spec=beam_runner_api_pb2.FunctionSpec(urn=self._urn, payload=self._payload)) for tag, pcoll in self._inputs.items(): transform_proto.inputs[tag] = context.pcollections.get_id(pcoll) # Conversion to/from proto assumes producers. # TODO: Possibly loosen this. context.transforms.put_proto( '%s_%s' % (self._IMPULSE_PREFIX, tag), beam_runner_api_pb2.PTransform( unique_name='%s_%s' % (self._IMPULSE_PREFIX, tag), spec=beam_runner_api_pb2.FunctionSpec( urn=common_urns.primitives.IMPULSE.urn), outputs={'out': transform_proto.inputs[tag]})) components = context.to_runner_api() # Retain unknown options since they may only be relevant to the expanding # SDK options = pipeline._options.get_all_options( drop_default=True, retain_unknown_options=True) request = beam_expansion_api_pb2.ExpansionRequest( components=components, namespace=self. _namespace, # type: ignore # mypy thinks self._namespace is threading.local transform=transform_proto, pipeline_options=job_utils.pipeline_options_dict_to_struct( options)) if isinstance(self._expansion_service, str): # Some environments may not support unsecure channels. Hence using a # secure channel with local credentials here. # TODO: update this to support secure non-local channels. channel_creds = grpc.local_channel_credentials() with grpc.secure_channel(self._expansion_service, channel_creds) as channel: response = beam_expansion_api_pb2_grpc.ExpansionServiceStub( channel).Expand(request) else: response = self._expansion_service.Expand(request, None) if response.error: raise RuntimeError(response.error) self._expanded_components = response.components self._expanded_transform = response.transform self._expanded_requirements = response.requirements result_context = pipeline_context.PipelineContext(response.components) def fix_output(pcoll, tag): pcoll.pipeline = pipeline pcoll.tag = tag return pcoll self._outputs = { tag: fix_output(result_context.pcollections.get_by_id(pcoll_id), tag) for tag, pcoll_id in self._expanded_transform.outputs.items() } return self._output_to_pvalueish(self._outputs)
def expand(self, pvalueish): # type: (pvalue.PCollection) -> pvalue.PCollection if isinstance(pvalueish, pvalue.PBegin): self._inputs = {} elif isinstance(pvalueish, (list, tuple)): self._inputs = { str(ix): pvalue for ix, pvalue in enumerate(pvalueish) } elif isinstance(pvalueish, dict): self._inputs = pvalueish else: self._inputs = {'input': pvalueish} pipeline = (next(iter(self._inputs.values())).pipeline if self._inputs else pvalueish.pipeline) context = pipeline_context.PipelineContext( component_id_map=pipeline.component_id_map) transform_proto = beam_runner_api_pb2.PTransform( unique_name=pipeline._current_transform().full_label, spec=beam_runner_api_pb2.FunctionSpec(urn=self._urn, payload=self._payload)) for tag, pcoll in self._inputs.items(): transform_proto.inputs[tag] = context.pcollections.get_id(pcoll) # Conversion to/from proto assumes producers. # TODO: Possibly loosen this. context.transforms.put_proto( '%s_%s' % (self._IMPULSE_PREFIX, tag), beam_runner_api_pb2.PTransform( unique_name='%s_%s' % (self._IMPULSE_PREFIX, tag), spec=beam_runner_api_pb2.FunctionSpec( urn=common_urns.primitives.IMPULSE.urn), outputs={'out': transform_proto.inputs[tag]})) output_coders = None if self._type_hints.output_types: if self._type_hints.output_types[0]: output_coders = dict( (str(k), context.coder_id_from_element_type(v)) for (k, v) in enumerate(self._type_hints.output_types[0])) elif self._type_hints.output_types[1]: output_coders = { k: context.coder_id_from_element_type(v) for (k, v) in self._type_hints.output_types[1].items() } components = context.to_runner_api() request = beam_expansion_api_pb2.ExpansionRequest( components=components, namespace=self. _external_namespace, # type: ignore # mypy thinks self._namespace is threading.local transform=transform_proto, output_coder_requests=output_coders) with self._service() as service: response = service.Expand(request) if response.error: raise RuntimeError(response.error) self._expanded_components = response.components if any(env.dependencies for env in self._expanded_components.environments.values()): self._expanded_components = self._resolve_artifacts( self._expanded_components, service.artifact_service(), pipeline.local_tempdir) self._expanded_transform = response.transform self._expanded_requirements = response.requirements result_context = pipeline_context.PipelineContext(response.components) def fix_output(pcoll, tag): pcoll.pipeline = pipeline pcoll.tag = tag return pcoll self._outputs = { tag: fix_output(result_context.pcollections.get_by_id(pcoll_id), tag) for tag, pcoll_id in self._expanded_transform.outputs.items() } return self._output_to_pvalueish(self._outputs)