def testToSwaggerDict(self): pod = client.V1Pod( metadata=client.V1ObjectMeta(owner_references=[ client.V1OwnerReference( api_version='argoproj.io/v1alpha1', kind='Workflow', name='wf-1', uid='wf-uid-1') ]), spec=client.V1PodSpec(containers=[], service_account='sa-1')) pod_dict = container_common.to_swagger_dict(pod) self.assertDictEqual( { 'metadata': { 'ownerReferences': [{ 'apiVersion': 'argoproj.io/v1alpha1', 'kind': 'Workflow', 'name': 'wf-1', 'uid': 'wf-uid-1' }] }, 'spec': { 'serviceAccount': 'sa-1' } }, pod_dict)
def _build_pod_manifest( self, pod_name: Text, container_spec: executor_spec.ExecutorContainerSpec ) -> Dict[Text, Any]: """Build a pod spec. The function builds a pod spec by patching executor container spec into the pod spec from component config. Args: pod_name: The name of the pod. container_spec: The resolved executor container spec. Returns: The pod manifest in dictionary format. """ if self._component_config: kubernetes_config = cast( kubernetes_component_config.KubernetesComponentConfig, self._component_config) pod_manifest = container_common.to_swagger_dict( kubernetes_config.pod) else: pod_manifest = {} pod_manifest.update({ 'apiVersion': 'v1', 'kind': 'Pod', }) # TODO(hongyes): figure out a better way to figure out type hints for nested # dict. metadata = pod_manifest.setdefault('metadata', {}) # type: Dict[Text, Any] metadata.update({'name': pod_name}) spec = pod_manifest.setdefault('spec', {}) # type: Dict[Text, Any] spec.update({'restartPolicy': 'Never'}) containers = spec.setdefault('containers', []) # type: List[Dict[Text, Any]] container = None # type: Optional[Dict[Text, Any]] for c in containers: if c['name'] == 'main': container = c break if not container: container = {'name': 'main'} containers.append(container) container.update({ 'image': container_spec.image, 'command': container_spec.command, 'args': container_spec.args, }) return pod_manifest
def _run_executor(self, execution_id: int, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Execute underlying component implementation. Runs executor container in a Kubernetes Pod and wait until it goes into `Succeeded` or `Failed` state. Args: execution_id: The ID of the execution. input_dict: Input dict from input key to a list of Artifacts. These are often outputs of another component in the pipeline and passed to the component by the orchestration system. output_dict: Output dict from output key to a list of Artifacts. These are often consumed by a dependent component. exec_properties: A dict of execution properties. These are inputs to pipeline with primitive types (int, string, float) and fully materialized when a pipeline is constructed. No dependency to other component or later injection from orchestration systems is necessary or possible on these values. Raises: RuntimeError: when the pod is in `Failed` state or unexpected failure from Kubernetes API. """ container_spec = cast(executor_spec.ExecutorContainerSpec, self._component_executor_spec) # Replace container spec with jinja2 template. container_spec = container_common.resolve_container_template( container_spec, input_dict, output_dict, exec_properties) pod_name = self._build_pod_name(execution_id) # TODO(hongyes): replace the default value from component config. try: namespace = kube_utils.get_kfp_namespace() except RuntimeError: namespace = 'kubeflow' pod_manifest = self._build_pod_manifest(pod_name, container_spec) core_api = kube_utils.make_core_v1_api() if kube_utils.is_inside_kfp(): launcher_pod = kube_utils.get_current_kfp_pod(core_api) pod_manifest['spec']['serviceAccount'] = launcher_pod.spec.service_account pod_manifest['spec'][ 'serviceAccountName'] = launcher_pod.spec.service_account_name pod_manifest['metadata'][ 'ownerReferences'] = container_common.to_swagger_dict( launcher_pod.metadata.owner_references) else: pod_manifest['spec']['serviceAccount'] = kube_utils.TFX_SERVICE_ACCOUNT pod_manifest['spec'][ 'serviceAccountName'] = kube_utils.TFX_SERVICE_ACCOUNT logging.info('Looking for pod "%s:%s".', namespace, pod_name) resp = kube_utils.get_pod(core_api, pod_name, namespace) if not resp: logging.info('Pod "%s:%s" does not exist. Creating it...', namespace, pod_name) logging.info('Pod manifest: %s', pod_manifest) try: resp = core_api.create_namespaced_pod( namespace=namespace, body=pod_manifest) except client.rest.ApiException as e: raise RuntimeError( 'Failed to created container executor pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) # Wait up to 300 seconds for the pod to move from pending to another status. logging.info('Waiting for pod "%s:%s" to start.', namespace, pod_name) kube_utils.wait_pod( core_api, pod_name, namespace, exit_condition_lambda=kube_utils.pod_is_not_pending, condition_description='non-pending status', timeout_sec=300) logging.info('Start log streaming for pod "%s:%s".', namespace, pod_name) try: logs = core_api.read_namespaced_pod_log( name=pod_name, namespace=namespace, container=kube_utils.ARGO_MAIN_CONTAINER_NAME, follow=True, _preload_content=False).stream() except client.rest.ApiException as e: raise RuntimeError( 'Failed to stream the logs from the pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) for log in logs: logging.info(log.decode().rstrip('\n')) # Wait indefinitely for the pod to complete. resp = kube_utils.wait_pod( core_api, pod_name, namespace, exit_condition_lambda=kube_utils.pod_is_done, condition_description='done state') if resp.status.phase == kube_utils.PodPhase.FAILED.value: raise RuntimeError('Pod "%s:%s" failed with status "%s".' % (namespace, pod_name, resp.status)) logging.info('Pod "%s:%s" is done.', namespace, pod_name)
def __init__(self, pod: Union[client.V1Pod, Dict[str, Any]]): if not pod: raise ValueError('pod must have a value.') self.pod = container_common.to_swagger_dict(pod)
def _run_executor(self, execution_id: int, input_dict: Dict[Text, List[types.Artifact]], output_dict: Dict[Text, List[types.Artifact]], exec_properties: Dict[Text, Any]) -> None: """Execute underlying component implementation. Runs executor container in a Kubernetes Pod and wait until it goes into `Succeeded` or `Failed` state. Args: execution_id: The ID of the execution. input_dict: Input dict from input key to a list of Artifacts. These are often outputs of another component in the pipeline and passed to the component by the orchestration system. output_dict: Output dict from output key to a list of Artifacts. These are often consumed by a dependent component. exec_properties: A dict of execution properties. These are inputs to pipeline with primitive types (int, string, float) and fully materialized when a pipeline is constructed. No dependency to other component or later injection from orchestration systems is necessary or possible on these values. Raises: RuntimeError: when the pod is in `Failed` state or unexpected failure from Kubernetes API. """ container_spec = cast(executor_spec.ExecutorContainerSpec, self._component_executor_spec) # Replace container spec with jinja2 template. container_spec = container_common.resolve_container_template( container_spec, input_dict, output_dict, exec_properties) pod_name = self._build_pod_name(execution_id) # TODO(hongyes): replace the default value from component config. namespace = os.getenv(_KFP_NAMESPACE_ENV, 'kubeflow') pod_manifest = self._build_pod_manifest(pod_name, container_spec) try: is_in_cluster = True config.load_incluster_config() absl.logging.info('Loaded in cluster config.') except config.config_exception.ConfigException: is_in_cluster = False config.load_kube_config() absl.logging.info('Loaded kube config.') core_api = client.CoreV1Api() if is_in_cluster: launcher_pod_name = os.getenv(_KFP_POD_NAME_ENV) launcher_pod = self._get_pod(core_api, launcher_pod_name, namespace) pod_manifest['spec'][ 'serviceAccount'] = launcher_pod.spec.service_account pod_manifest['spec'][ 'serviceAccountName'] = launcher_pod.spec.service_account_name pod_manifest['metadata'][ 'ownerReferences'] = container_common.to_swagger_dict( launcher_pod.metadata.owner_references) absl.logging.info('Looking for pod "%s:%s".' % (namespace, pod_name)) resp = self._get_pod(core_api, pod_name, namespace) if not resp: absl.logging.info('Pod "%s:%s" does not exist. Creating it...' % (namespace, pod_name)) absl.logging.info('Pod manifest: ' + str(pod_manifest)) try: resp = core_api.create_namespaced_pod(namespace=namespace, body=pod_manifest) except client.rest.ApiException as e: raise RuntimeError( 'Failed to created container executor pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) absl.logging.info('Waiting for pod "%s:%s" to start.' % (namespace, pod_name)) self._wait_pod(core_api, pod_name, namespace, exit_condition_lambda=_pod_is_not_pending, condition_description='non-pending status') absl.logging.info('Start log streaming for pod "%s:%s".' % (namespace, pod_name)) try: logs = core_api.read_namespaced_pod_log( name=pod_name, namespace=namespace, container='main', follow=True, _preload_content=False).stream() except client.rest.ApiException as e: raise RuntimeError( 'Failed to stream the logs from the pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) for log in logs: absl.logging.info(log.decode().rstrip('\n')) resp = self._wait_pod(core_api, pod_name, namespace, exit_condition_lambda=_pod_is_done, condition_description='done state') if resp.status.phase == _POD_FAILED_PHASE: raise RuntimeError('Pod "%s:%s" failed with status "%s".' % (namespace, pod_name, resp.status)) absl.logging.info('Pod "%s:%s" is done.' % (namespace, pod_name))
def run_executor( self, execution_info: data_types.ExecutionInfo ) -> execution_result_pb2.ExecutorOutput: """Execute underlying component implementation. Runs executor container in a Kubernetes Pod and wait until it goes into `Succeeded` or `Failed` state. Args: execution_info: All the information that the launcher provides. Raises: RuntimeError: when the pod is in `Failed` state or unexpected failure from Kubernetes API. Returns: An ExecutorOutput instance """ context = placeholder_utils.ResolutionContext( exec_info=execution_info, executor_spec=self._executor_spec, platform_config=self._platform_config) container_spec = executor_specs.TemplatedExecutorContainerSpec( image=self._container_executor_spec.image, command=[ placeholder_utils.resolve_placeholder_expression(cmd, context) for cmd in self._container_executor_spec.commands ] or None, args=[ placeholder_utils.resolve_placeholder_expression(arg, context) for arg in self._container_executor_spec.args ] or None, ) pod_name = self._build_pod_name(execution_info) # TODO(hongyes): replace the default value from component config. try: namespace = kube_utils.get_kfp_namespace() except RuntimeError: namespace = 'kubeflow' pod_manifest = self._build_pod_manifest(pod_name, container_spec) core_api = kube_utils.make_core_v1_api() if kube_utils.is_inside_kfp(): launcher_pod = kube_utils.get_current_kfp_pod(core_api) pod_manifest['spec'][ 'serviceAccount'] = launcher_pod.spec.service_account pod_manifest['spec'][ 'serviceAccountName'] = launcher_pod.spec.service_account_name pod_manifest['metadata'][ 'ownerReferences'] = container_common.to_swagger_dict( launcher_pod.metadata.owner_references) else: pod_manifest['spec'][ 'serviceAccount'] = kube_utils.TFX_SERVICE_ACCOUNT pod_manifest['spec'][ 'serviceAccountName'] = kube_utils.TFX_SERVICE_ACCOUNT logging.info('Looking for pod "%s:%s".', namespace, pod_name) resp = kube_utils.get_pod(core_api, pod_name, namespace) if not resp: logging.info('Pod "%s:%s" does not exist. Creating it...', namespace, pod_name) logging.info('Pod manifest: %s', pod_manifest) try: resp = core_api.create_namespaced_pod(namespace=namespace, body=pod_manifest) except client.rest.ApiException as e: raise RuntimeError( 'Failed to created container executor pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) # Wait up to 300 seconds for the pod to move from pending to another status. logging.info('Waiting for pod "%s:%s" to start.', namespace, pod_name) kube_utils.wait_pod( core_api, pod_name, namespace, exit_condition_lambda=kube_utils.pod_is_not_pending, condition_description='non-pending status', timeout_sec=300) logging.info('Start log streaming for pod "%s:%s".', namespace, pod_name) try: logs = core_api.read_namespaced_pod_log( name=pod_name, namespace=namespace, container=kube_utils.ARGO_MAIN_CONTAINER_NAME, follow=True, _preload_content=False).stream() except client.rest.ApiException as e: raise RuntimeError( 'Failed to stream the logs from the pod!\nReason: %s\nBody: %s' % (e.reason, e.body)) for log in logs: logging.info(log.decode().rstrip('\n')) # Wait indefinitely for the pod to complete. resp = kube_utils.wait_pod( core_api, pod_name, namespace, exit_condition_lambda=kube_utils.pod_is_done, condition_description='done state') if resp.status.phase == kube_utils.PodPhase.FAILED.value: raise RuntimeError('Pod "%s:%s" failed with status "%s".' % (namespace, pod_name, resp.status)) logging.info('Pod "%s:%s" is done.', namespace, pod_name) return execution_result_pb2.ExecutorOutput()