def update_op(op: dsl.ContainerOp, pipeline_name: dsl.PipelineParam, pipeline_root: dsl.PipelineParam, launcher_image: Optional[str] = None) -> None: """Updates the passed in Op for running in v2-compatible mode. Args: op: The Op to update. pipeline_spec: The PipelineSpec for the pipeline under which `op` runs. pipeline_root: The root output directory for pipeline artifacts. launcher_image: An optional launcher image. Useful for tests. """ # Inject the launcher binary and overwrite the entrypoint. image_name = launcher_image or _DEFAULT_LAUNCHER_IMAGE launcher_container = dsl.UserContainer(name="kfp-launcher", image=image_name, command="/bin/mount_launcher.sh", mirror_volume_mounts=True) op.add_init_container(launcher_container) op.add_volume(k8s_client.V1Volume(name='kfp-launcher')) op.add_volume_mount( k8s_client.V1VolumeMount(name='kfp-launcher', mount_path='/kfp-launcher')) op.command = [ "/kfp-launcher/launch", "--mlmd_server_address", "$(METADATA_GRPC_SERVICE_HOST)", "--mlmd_server_port", "$(METADATA_GRPC_SERVICE_PORT)", "--runtime_info_json", "$(KFP_V2_RUNTIME_INFO)", "--container_image", "$(KFP_V2_IMAGE)", "--task_name", op.name, "--pipeline_name", pipeline_name, "--pipeline_run_id", "$(WORKFLOW_ID)", "--pipeline_task_id", "$(KFP_POD_NAME)", "--pipeline_root", pipeline_root, ] # Mount necessary environment variables. op.apply(_default_transformers.add_kfp_pod_env) op.container.add_env_variable( k8s_client.V1EnvVar(name="KFP_V2_IMAGE", value=op.container.image)) config_map_ref = k8s_client.V1ConfigMapEnvSource( name='metadata-grpc-configmap', optional=True) op.container.add_env_from( k8s_client.V1EnvFromSource(config_map_ref=config_map_ref)) op.arguments = list(op.container_spec.command) + list(op.container_spec.args) runtime_info = { "inputParameters": collections.OrderedDict(), "inputArtifacts": collections.OrderedDict(), "outputParameters": collections.OrderedDict(), "outputArtifacts": collections.OrderedDict(), } component_spec = op.component_spec for parameter, spec in sorted( component_spec.input_definitions.parameters.items()): parameter_info = { "parameterType": pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type), "parameterValue": op._parameter_arguments[parameter], } runtime_info["inputParameters"][parameter] = parameter_info for artifact_name, spec in sorted( component_spec.input_definitions.artifacts.items()): artifact_info = {"fileInputPath": op.input_artifact_paths[artifact_name]} runtime_info["inputArtifacts"][artifact_name] = artifact_info for parameter, spec in sorted( component_spec.output_definitions.parameters.items()): parameter_info = { "parameterType": pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type), "fileOutputPath": op.file_outputs[parameter], } runtime_info["outputParameters"][parameter] = parameter_info for artifact_name, spec in sorted( component_spec.output_definitions.artifacts.items()): # TODO: Assert instance_schema. artifact_info = { # Type used to register output artifacts. "artifactSchema": spec.artifact_type.instance_schema, # File used to write out the registered artifact ID. "fileOutputPath": op.file_outputs[artifact_name], } runtime_info["outputArtifacts"][artifact_name] = artifact_info op.container.add_env_variable( k8s_client.V1EnvVar(name="KFP_V2_RUNTIME_INFO", value=json.dumps(runtime_info))) op.pod_annotations['pipelines.kubeflow.org/v2_component'] = "true"
def update_op(op: dsl.ContainerOp, pipeline_name: dsl.PipelineParam, pipeline_root: dsl.PipelineParam, launcher_image: Optional[str] = None) -> None: """Updates the passed in Op for running in v2-compatible mode. Args: op: The Op to update. pipeline_spec: The PipelineSpec for the pipeline under which `op` runs. pipeline_root: The root output directory for pipeline artifacts. launcher_image: An optional launcher image. Useful for tests. """ op.is_v2 = True # Inject the launcher binary and overwrite the entrypoint. image_name = launcher_image or _DEFAULT_LAUNCHER_IMAGE launcher_container = dsl.UserContainer( name="kfp-launcher", image=image_name, command=["launcher", "--copy", "/kfp-launcher/launch"], mirror_volume_mounts=True) op.add_init_container(launcher_container) op.add_volume(k8s_client.V1Volume(name='kfp-launcher')) op.add_volume_mount( k8s_client.V1VolumeMount(name='kfp-launcher', mount_path='/kfp-launcher')) # op.command + op.args will have the following sections: # 1. args passed to kfp-launcher # 2. a separator "--" # 3. parameters in format "key1=value1", "key2=value2", ... # 4. a separator "--" as end of arguments passed to launcher # 5. (start of op.args) arguments of the original user program command + args # # example: # - command: # - /kfp-launcher/launch # - '--mlmd_server_address' # - $(METADATA_GRPC_SERVICE_HOST) # - '--mlmd_server_port' # - $(METADATA_GRPC_SERVICE_PORT) # - ... # more launcher params # - '--pipeline_task_id' # - $(KFP_POD_NAME) # - '--pipeline_root' # - '' # - '--' # start of parameter values # - first=first # - second=second # - '--' # start of user command and args # args: # - sh # - '-ec' # - | # program_path=$(mktemp) # printf "%s" "$0" > "$program_path" # python3 -u "$program_path" "$@" # - > # import json # import xxx # ... op.command = [ "/kfp-launcher/launch", "--mlmd_server_address", "$(METADATA_GRPC_SERVICE_HOST)", "--mlmd_server_port", "$(METADATA_GRPC_SERVICE_PORT)", "--runtime_info_json", "$(KFP_V2_RUNTIME_INFO)", "--container_image", "$(KFP_V2_IMAGE)", "--task_name", op.name, "--pipeline_name", pipeline_name, "--run_id", "$(KFP_RUN_ID)", "--run_resource", "workflows.argoproj.io/$(WORKFLOW_ID)", "--namespace", "$(KFP_NAMESPACE)", "--pod_name", "$(KFP_POD_NAME)", "--pod_uid", "$(KFP_POD_UID)", "--pipeline_root", pipeline_root, "--enable_caching", "$(ENABLE_CACHING)", ] # Mount necessary environment variables. op.apply(_default_transformers.add_kfp_pod_env) op.container.add_env_variable( k8s_client.V1EnvVar(name="KFP_V2_IMAGE", value=op.container.image)) config_map_ref = k8s_client.V1ConfigMapEnvSource( name='metadata-grpc-configmap', optional=True) op.container.add_env_from( k8s_client.V1EnvFromSource(config_map_ref=config_map_ref)) op.arguments = list(op.container_spec.command) + list( op.container_spec.args) runtime_info = { "inputParameters": collections.OrderedDict(), "inputArtifacts": collections.OrderedDict(), "outputParameters": collections.OrderedDict(), "outputArtifacts": collections.OrderedDict(), } op.command += ["--"] component_spec = op.component_spec for parameter, spec in sorted( component_spec.input_definitions.parameters.items()): parameter_info = { "type": pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type), } op.command += [f"{parameter}={op._parameter_arguments[parameter]}"] runtime_info["inputParameters"][parameter] = parameter_info op.command += ["--"] for artifact_name, spec in sorted( component_spec.input_definitions.artifacts.items()): artifact_info = { "metadataPath": op.input_artifact_paths[artifact_name], "schemaTitle": spec.artifact_type.schema_title, "instanceSchema": spec.artifact_type.instance_schema, } runtime_info["inputArtifacts"][artifact_name] = artifact_info for parameter, spec in sorted( component_spec.output_definitions.parameters.items()): parameter_info = { "type": pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type), "path": op.file_outputs[parameter], } runtime_info["outputParameters"][parameter] = parameter_info for artifact_name, spec in sorted( component_spec.output_definitions.artifacts.items()): # TODO: Assert instance_schema. artifact_info = { # Type used to register output artifacts. "schemaTitle": spec.artifact_type.schema_title, "instanceSchema": spec.artifact_type.instance_schema, # File used to write out the registered artifact ID. "metadataPath": op.file_outputs[artifact_name], } runtime_info["outputArtifacts"][artifact_name] = artifact_info op.container.add_env_variable( k8s_client.V1EnvVar(name="KFP_V2_RUNTIME_INFO", value=json.dumps(runtime_info))) op.pod_annotations['pipelines.kubeflow.org/v2_component'] = "true" op.pod_labels['pipelines.kubeflow.org/v2_component'] = "true"
def container( name: str, arguments: str, inputs: Optional[List[Tuple[InputArgumentPath, str]]] = None, outputs: Optional[Dict[str, str]] = None, ) -> Tuple[ContainerOp, Dict[str, Tuple[InputArgumentPath, str]]]: # Set the correct shell parameters prepare_args = "set -euo pipefail\n" # Copy the output artifacts correctly file_outputs = {} output_artifact_copy_args = "" if outputs: for k, v in outputs.items(): out = Pipeline.out_dir(v) file_outputs[k] = out output_artifact_copy_args += dedent(""" mkdir -p {d} cp -r {fr} {to} """.format( d=os.path.dirname(out), fr=v, to=out, )).lstrip() # Create the container ctr = ContainerOp( image=Pipeline.IMAGE, name=name, command=["bash", "-c"], output_artifact_paths=Pipeline.default_artifact_path(), file_outputs=file_outputs, artifact_argument_paths=[InputArgumentPath(x[0]) for x in inputs] if inputs else None, ) ctr.container.set_image_pull_policy("Always") # Copy input artifacts correctly input_artifact_copy_args = "" in_repo = False for i, path in enumerate(ctr.input_artifact_paths.values()): target_location = inputs[i][1] input_artifact_copy_args += "cp -r {fr} {to}\n".format( fr=path, to=target_location) # Change to the repository path if available if target_location == Pipeline.REPO: in_repo = True input_artifact_copy_args += "cd {}\n".format(Pipeline.REPO) # Show the git diff to validate if in_repo: input_artifact_copy_args += dedent(""" echo "git diff:" git diff --name-only """) # Assemble the command ctr.arguments = prepare_args + \ input_artifact_copy_args + \ arguments + \ "\n" + \ output_artifact_copy_args # Output Artifacts vol = "output-artifacts" ctr.add_volume( k8s.V1Volume(name=vol, empty_dir=k8s.V1EmptyDirVolumeSource())) ctr.container.add_volume_mount( k8s.V1VolumeMount(name=vol, mount_path=Pipeline.OUT_DIR)) # GitHub Token gh_token = "github-token" ctr.add_volume( k8s.V1Volume( name=gh_token, secret=k8s.V1SecretVolumeSource(secret_name=gh_token))) ctr.container.add_volume_mount( k8s.V1VolumeMount(name=gh_token, read_only=True, mount_path=Pipeline.GITHUB_TOKEN_MOUNT_PATH)) # Quay Login quay = "quay" ctr.add_volume( k8s.V1Volume(name=quay, secret=k8s.V1SecretVolumeSource(secret_name=quay))) ctr.container.add_volume_mount( k8s.V1VolumeMount(name=quay, read_only=True, mount_path=Pipeline.QUAY_SECRET_MOUNT_PATH)) # SSH Key ssh_key = "ssh-key" ctr.add_volume( k8s.V1Volume(name=ssh_key, secret=k8s.V1SecretVolumeSource(default_mode=0o600, secret_name=ssh_key))) ctr.container.add_volume_mount( k8s.V1VolumeMount(name=ssh_key, read_only=True, mount_path="/root/.ssh")) # Assemble the inputs for the next stage consumable_inputs = {} for k, v in file_outputs.items(): consumable_inputs[k] = (ctr.outputs[k], outputs[k]) return ctr, consumable_inputs