class DummyContainerSpecComponent(base_component.BaseComponent): """Dummy ContainerSpec component.""" SPEC_CLASS = TransformerSpec EXECUTOR_SPEC = executor_specs.TemplatedExecutorContainerSpec( image='dummy/transformer', command=[ 'transformer', '--input1', placeholders.InputUriPlaceholder('input1'), '--output1', placeholders.OutputUriPlaceholder('output1'), '--param1', placeholders.InputValuePlaceholder('param1'), ]) def __init__(self, input1, param1, output1, instance_name=None): spec = TransformerSpec( input1=input1, output1=output1, param1=param1, ) super(DummyContainerSpecComponent, self).__init__(spec=spec) if instance_name: self._id = '{}.{}'.format(self.__class__.__name__, instance_name) else: self._id = self.__class__.__name__
class DummyProducerComponent(base_component.BaseComponent): """Dummy producer component.""" SPEC_CLASS = ProducerSpec EXECUTOR_SPEC = executor_specs.TemplatedExecutorContainerSpec( image='dummy/producer', command=[ 'producer', '--output1', placeholders.OutputUriPlaceholder('output1'), '--param1', placeholders.InputValuePlaceholder('param1'), '--wrapped-param', placeholders.ConcatPlaceholder([ 'prefix-', placeholders.InputValuePlaceholder('param1'), '-suffix', ]), ]) def __init__(self, param1, output1, instance_name=None): spec = ProducerSpec( output1=output1, param1=param1, ) super(DummyProducerComponent, self).__init__(spec=spec) if instance_name: self._id = '{}.{}'.format(self.__class__.__name__, instance_name) else: self._id = self.__class__.__name__
class _DummyComponent(base_component.BaseComponent): SPEC_CLASS = _DummySpec EXECUTOR_SPEC = executor_specs.TemplatedExecutorContainerSpec( image='dummy:latest', command=['ls']) def __init__(self): super().__init__(_DummySpec())
class ByeWorldComponent(BaseComponent): """Consumer component.""" SPEC_CLASS = _ByeWorldSpec EXECUTOR_SPEC = executor_specs.TemplatedExecutorContainerSpec( image='bash:latest', command=['echo'], args=['received ' + ph.input('hearing')[0].value]) def __init__(self, hearing): super(ByeWorldComponent, self).__init__(_ByeWorldSpec(hearing=hearing))
class _HelloWorldComponent(base_component.BaseComponent): SPEC_CLASS = _HelloWorldSpec EXECUTOR_SPEC = executor_specs.TemplatedExecutorContainerSpec( # TODO(b/143965964): move the image to private repo if the test is flaky # due to docker hub. image='alpine:latest', # TODO(b/170872338): support a placeholder as command. command=['echo']) def __init__(self, name): super(_HelloWorldComponent, self).__init__(_HelloWorldSpec(name=name))
def testEncodeTemplatedExecutorContainerSpec_withConcatAllText(self): specs = executor_specs.TemplatedExecutorContainerSpec( image='image', command=[self._text_concat_placeholder]) encode_result = specs.encode() self.assertProtoEquals( """ image: "image" commands { value { string_value: "texttext1text2" } }""", encode_result)
def run_executor( self, execution_info: data_types.ExecutionInfo ) -> execution_result_pb2.ExecutorOutput: """Execute underlying component implementation.""" context = placeholder_utils.ResolutionContext( exec_info=execution_info, executor_spec=self._executor_spec, platform_config=self._platform_config) component_executor_spec = ( executor_specs.TemplatedExecutorContainerSpec( image=self._container_executor_spec.image, command=[ placeholder_utils.resolve_placeholder_expression( cmd, context) for cmd in self._container_executor_spec.commands ])) docker_config = docker_component_config.DockerComponentConfig() logging.info('Container spec: %s', vars(component_executor_spec)) logging.info('Docker config: %s', vars(docker_config)) # Call client.containers.run and wait for completion. # ExecutorContainerSpec follows k8s container spec which has different # names to Docker's container spec. It's intended to set command to docker's # entrypoint and args to docker's command. if docker_config.docker_server_url: client = docker.DockerClient( base_url=docker_config.docker_server_url) else: client = docker.from_env() run_args = docker_config.to_run_args() container = client.containers.run( image=component_executor_spec.image, command=component_executor_spec.command, detach=True, **run_args) # Streaming logs for log in container.logs(stream=True): logging.info('Docker: %s', log.decode('utf-8')) exit_code = container.wait()['StatusCode'] if exit_code != 0: raise RuntimeError( 'Container exited with error code "{}"'.format(exit_code)) # TODO(b/141192583): Report data to publisher # - report container digest # - report replaced command line entrypoints # - report docker run args return execution_result_pb2.ExecutorOutput()
class HelloWorldComponent(BaseComponent): """Producer component.""" SPEC_CLASS = _HelloWorldSpec EXECUTOR_SPEC = executor_specs.TemplatedExecutorContainerSpec( # TODO(b/143965964): move the image to private repo if the test is flaky # due to docker hub. image='gcr.io/google.com/cloudsdktool/cloud-sdk:latest', command=['sh', '-c'], args=[ 'echo "hello ' + ph.exec_property('word') + '" | gsutil cp - ' + ph.output('greeting')[0].uri ]) def __init__(self, word, greeting=None): if not greeting: artifact = standard_artifacts.String() greeting = channel_utils.as_channel([artifact]) super().__init__(_HelloWorldSpec(word=word, greeting=greeting))
def create_container_component( name: Text, image: Text, command: List[executor_specs.CommandlineArgumentType], inputs: Dict[Text, Any] = None, outputs: Dict[Text, Any] = None, parameters: Dict[Text, Any] = None, ) -> Callable[..., base_component.BaseComponent]: """Creates a container-based component. Args: name: The name of the component image: Container image name. command: Container entrypoint command-line. Not executed within a shell. The command-line can use placeholder objects that will be replaced at the compilation time. The placeholder objects can be imported from tfx.dsl.component.experimental.placeholders. Note that Jinja templates are not supported. inputs: The list of component inputs outputs: The list of component outputs parameters: The list of component parameters Returns: Component that can be instantiated and user inside pipeline. Example: component = create_container_component( name='TrainModel', inputs={ 'training_data': Dataset, }, outputs={ 'model': Model, }, parameters={ 'num_training_steps': int, }, image='gcr.io/my-project/my-trainer', command=[ 'python3', 'my_trainer', '--training_data_uri', InputUriPlaceholder('training_data'), '--model_uri', OutputUriPlaceholder('model'), '--num_training-steps', InputValuePlaceholder('num_training_steps'), ] ) """ if not name: raise ValueError('Component name cannot be empty.') if inputs is None: inputs = {} if outputs is None: outputs = {} if parameters is None: parameters = {} input_channel_parameters = {} output_channel_parameters = {} output_channels = {} execution_parameters = {} for input_name, channel_type in inputs.items(): # TODO(b/155804245) Sanitize the names so that they're valid python names input_channel_parameters[input_name] = ( component_spec.ChannelParameter(type=channel_type, )) for output_name, channel_type in outputs.items(): # TODO(b/155804245) Sanitize the names so that they're valid python names output_channel_parameters[output_name] = ( component_spec.ChannelParameter(type=channel_type)) artifact = channel_type() channel = channel_utils.as_channel([artifact]) output_channels[output_name] = channel for param_name, parameter_type in parameters.items(): # TODO(b/155804245) Sanitize the names so that they're valid python names execution_parameters[param_name] = (component_spec.ExecutionParameter( type=parameter_type)) default_init_args = {**output_channels} return component_utils.create_tfx_component_class( name=name, tfx_executor_spec=executor_specs.TemplatedExecutorContainerSpec( image=image, command=command, ), input_channel_parameters=input_channel_parameters, output_channel_parameters=output_channel_parameters, execution_parameters=execution_parameters, default_init_args=default_init_args)
def testEncodeTemplatedExecutorContainerSpec(self): specs = executor_specs.TemplatedExecutorContainerSpec( image='image', command=[ self._text, self._input_value_placeholder, self._input_uri_placeholder, self._output_uri_placeholder, self._concat_placeholder ]) encode_result = specs.encode() self.assertProtoEquals(""" image: "image" commands { value { string_value: "text" } } commands { operator { index_op { expression { placeholder { key: "input_artifact" } } } } } commands { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { key: "input_uri" } } index: 0 } } } } } } commands { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { type: OUTPUT_ARTIFACT key: "output_uri" } } index: 0 } } } } } } commands { operator { concat_op { expressions { value { string_value: "text" } } expressions { operator { index_op { expression { placeholder { key: "input_artifact" } } index: 0 } } } expressions { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { key: "input_uri" } } index: 0 } } } } } } expressions { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { type: OUTPUT_ARTIFACT key: "output_uri" } } index: 0 } } } } } } } } }""", encode_result)
def create_container_component( name: Text, image: Text, command: List[executor_specs.CommandlineArgumentType], inputs: Dict[Text, Any] = None, outputs: Dict[Text, Any] = None, parameters: Dict[Text, Any] = None, ) -> Callable[..., base_component.BaseComponent]: """Creates a container-based component. Args: name: The name of the component image: Container image name. command: Container entrypoint command-line. Not executed within a shell. The command-line can use placeholder objects that will be replaced at the compilation time. The placeholder objects can be imported from tfx.dsl.component.experimental.placeholders. Note that Jinja templates are not supported. inputs: The list of component inputs outputs: The list of component outputs parameters: The list of component parameters Returns: Component that can be instantiated and user inside pipeline. Example: component = create_container_component( name='TrainModel', inputs={ 'training_data': Dataset, }, outputs={ 'model': Model, }, parameters={ 'num_training_steps': int, }, image='gcr.io/my-project/my-trainer', command=[ 'python3', 'my_trainer', '--training_data_uri', InputUriPlaceholder('training_data'), '--model_uri', OutputUriPlaceholder('model'), '--num_training-steps', InputValuePlaceholder('num_training_steps'), ] ) """ if not name: raise ValueError('Component name cannot be empty.') if inputs is None: inputs = {} if outputs is None: outputs = {} if parameters is None: parameters = {} input_channel_parameters = {} output_channel_parameters = {} output_channels = {} execution_parameters = {} for input_name, channel_type in inputs.items(): # TODO(b/155804245) Sanitize the names so that they're valid python names input_channel_parameters[input_name] = ( component_spec.ChannelParameter(type=channel_type, )) for output_name, channel_type in outputs.items(): # TODO(b/155804245) Sanitize the names so that they're valid python names output_channel_parameters[output_name] = ( component_spec.ChannelParameter(type=channel_type)) artifact = channel_type() channel = channel_utils.as_channel([artifact]) output_channels[output_name] = channel for param_name, parameter_type in parameters.items(): # TODO(b/155804245) Sanitize the names so that they're valid python names execution_parameters[param_name] = (component_spec.ExecutionParameter( type=parameter_type)) tfx_component_spec_class = type( # Need str() for Python 2 compatibility. str(name) + 'Spec', (component_spec.ComponentSpec, ), dict( PARAMETERS=execution_parameters, INPUTS=input_channel_parameters, OUTPUTS=output_channel_parameters, ), ) def tfx_component_class_init(self, **kwargs): instance_name = kwargs.pop('instance_name', None) arguments = {} arguments.update(output_channels) arguments.update(kwargs) base_component.BaseComponent.__init__( self, spec=self.__class__.SPEC_CLASS(**arguments), instance_name=instance_name, ) tfx_component_class = type( # Need str() for Python 2 compatibility. str(name), (base_component.BaseComponent, ), dict( SPEC_CLASS=tfx_component_spec_class, EXECUTOR_SPEC=executor_specs.TemplatedExecutorContainerSpec( image=image, command=command, ), __init__=tfx_component_class_init, ), ) return tfx_component_class
def testEncodeTemplatedExecutorContainerSpec(self): specs = executor_specs.TemplatedExecutorContainerSpec( image='image', command=[ self._text, self._input_value_placeholder, self._another_input_value_placeholder, self._input_uri_placeholder, self._output_uri_placeholder, self._concat_placeholder ]) encode_result = specs.encode( component_spec=TestComponentSpec( input_artifact=channel.Channel(type=standard_artifacts.Examples), output_artifact=channel.Channel(type=standard_artifacts.Model), input_parameter=42)) self.assertProtoEquals( """ image: "image" commands { value { string_value: "text" } } commands { operator { artifact_value_op { expression { operator { index_op { expression { placeholder { key: "input_artifact" } } } } } } } } commands { placeholder { type: EXEC_PROPERTY key: "input_parameter" } } commands { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { key: "input_artifact" } } index: 0 } } } } } } commands { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { type: OUTPUT_ARTIFACT key: "output_artifact" } } index: 0 } } } } } } commands { operator { concat_op { expressions { value { string_value: "text" } } expressions { operator { artifact_value_op { expression { operator { index_op { expression { placeholder { key: "input_artifact" } } index: 0 } } } } } } expressions { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { key: "input_artifact" } } index: 0 } } } } } } expressions { operator { artifact_uri_op { expression { operator { index_op { expression { placeholder { type: OUTPUT_ARTIFACT key: "output_artifact" } } index: 0 } } } } } } } } }""", encode_result)
def run_executor( self, execution_info: data_types.ExecutionInfo ) -> execution_result_pb2.ExecutorOutput: """Execute underlying component implementation. Runs executor container in a Kubernetes Pod and wait until it goes into `Succeeded` or `Failed` state. Args: execution_info: All the information that the launcher provides. Raises: RuntimeError: when the pod is in `Failed` state or unexpected failure from Kubernetes API. Returns: An ExecutorOutput instance """ context = placeholder_utils.ResolutionContext( exec_info=execution_info, executor_spec=self._executor_spec, platform_config=self._platform_config) container_spec = executor_specs.TemplatedExecutorContainerSpec( image=self._container_executor_spec.image, command=[ placeholder_utils.resolve_placeholder_expression(cmd, context) for cmd in self._container_executor_spec.commands ] or None, args=[ placeholder_utils.resolve_placeholder_expression(arg, context) for arg in self._container_executor_spec.args ] or None, ) pod_name = self._build_pod_name(execution_info) # TODO(hongyes): replace the default value from component config. try: namespace = kube_utils.get_kfp_namespace() except RuntimeError: namespace = 'kubeflow' pod_manifest = self._build_pod_manifest(pod_name, container_spec) core_api = kube_utils.make_core_v1_api() if kube_utils.is_inside_kfp(): launcher_pod = kube_utils.get_current_kfp_pod(core_api) pod_manifest['spec'][ 'serviceAccount'] = launcher_pod.spec.service_account pod_manifest['spec'][ 'serviceAccountName'] = launcher_pod.spec.service_account_name pod_manifest['metadata'][ 'ownerReferences'] = container_common.to_swagger_dict( launcher_pod.metadata.owner_references) else: pod_manifest['spec'][ 'serviceAccount'] = kube_utils.TFX_SERVICE_ACCOUNT pod_manifest['spec'][ 'serviceAccountName'] = kube_utils.TFX_SERVICE_ACCOUNT logging.info('Looking for pod "%s:%s".', namespace, pod_name) resp = kube_utils.get_pod(core_api, pod_name, namespace) if not resp: logging.info('Pod "%s:%s" does not exist. Creating it...', namespace, pod_name) logging.info('Pod manifest: %s', pod_manifest) try: resp = core_api.create_namespaced_pod(namespace=namespace, body=pod_manifest) except client.rest.ApiException as e: raise RuntimeError( 'Failed to created container executor pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) # Wait up to 300 seconds for the pod to move from pending to another status. logging.info('Waiting for pod "%s:%s" to start.', namespace, pod_name) kube_utils.wait_pod( core_api, pod_name, namespace, exit_condition_lambda=kube_utils.pod_is_not_pending, condition_description='non-pending status', timeout_sec=300) logging.info('Start log streaming for pod "%s:%s".', namespace, pod_name) try: logs = core_api.read_namespaced_pod_log( name=pod_name, namespace=namespace, container=kube_utils.ARGO_MAIN_CONTAINER_NAME, follow=True, _preload_content=False).stream() except client.rest.ApiException as e: raise RuntimeError( 'Failed to stream the logs from the pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) for log in logs: logging.info(log.decode().rstrip('\n')) # Wait indefinitely for the pod to complete. resp = kube_utils.wait_pod( core_api, pod_name, namespace, exit_condition_lambda=kube_utils.pod_is_done, condition_description='done state') if resp.status.phase == kube_utils.PodPhase.FAILED.value: raise RuntimeError('Pod "%s:%s" failed with status "%s".' % (namespace, pod_name, resp.status)) logging.info('Pod "%s:%s" is done.', namespace, pod_name) return execution_result_pb2.ExecutorOutput()