def _create_pipeline( self, pipeline_func: Callable[..., Any], pipeline_name: Optional[str] = None, ) -> pipeline_spec_pb2.PipelineSpec: """Creates a pipeline instance and constructs the pipeline spec from it. Args: pipeline_func: Pipeline function with @dsl.pipeline decorator. pipeline_name: The name of the pipeline. Optional. Returns: The IR representation (pipeline spec) of the pipeline. """ # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _python_op._extract_component_interface(pipeline_func) pipeline_name = pipeline_name or pipeline_meta.name args_list = [] signature = inspect.signature(pipeline_func) for arg_name in signature.parameters: arg_type = None for pipeline_input in pipeline_meta.inputs or []: if arg_name == pipeline_input.name: arg_type = pipeline_input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True), param_type=input_spec.type, value=input_spec.default) for input_spec in pipeline_meta.inputs ] pipeline_spec = self._create_pipeline_spec( args_list_with_defaults, dsl_pipeline, ) return pipeline_spec
def _extract_pipeline_metadata(func): """Creates pipeline metadata structure instance based on the function signature.""" # Most of this code is only needed for verifying the default values against # "openapi_schema_validator" type properties. # TODO: Move the value verification code to some other place from ._pipeline_param import PipelineParam import inspect fullargspec = inspect.getfullargspec(func) args = fullargspec.args annotations = fullargspec.annotations # defaults arg_defaults = {} if fullargspec.defaults: for arg, default in zip(reversed(fullargspec.args), reversed(fullargspec.defaults)): arg_defaults[arg] = default for arg in args: arg_type = None arg_default = arg_defaults[arg] if arg in arg_defaults else None if isinstance(arg_default, PipelineParam): warnings.warn( 'Explicit creation of `kfp.dsl.PipelineParam`s by the users is ' 'deprecated. The users should define the parameter type and default ' 'values using standard pythonic constructs: ' 'def my_func(a: int = 1, b: str = "default"):') arg_default = arg_default.value if arg in annotations: arg_type = _annotation_to_typemeta(annotations[arg]) arg_type_properties = list(arg_type.values())[0] if isinstance( arg_type, dict) else {} if 'openapi_schema_validator' in arg_type_properties and (arg_default is not None): from jsonschema import validate import json schema_object = arg_type_properties['openapi_schema_validator'] if isinstance(schema_object, str): # In case the property value for the schema validator is a string # instead of a dict. schema_object = json.loads(schema_object) # Only validating non-serialized values validate(instance=arg_default, schema=schema_object) from kfp.components._python_op import _extract_component_interface component_spec = _extract_component_interface(func) return component_spec
def get_output_artifacts( fn: Callable, output_uris: Dict[str, str]) -> Dict[str, artifact.Artifact]: """Gets the output artifacts from function signature and provided URIs. Args: fn: A user-provided function, whose signature annotates the type of output artifacts. output_uris: The mapping from output artifact name to its URI. Returns: A mapping from output artifact name to Python artifact objects. """ # Inspect the function signature to determine the set of output artifact. spec = _python_op._extract_component_interface(fn) result = {} # Mapping from output name to artifacts. for output in spec.outputs: if (getattr(output, '_passing_style', None) == _python_op.OutputArtifact): # Creates an artifact according to its name type_name = getattr(output, 'type', None) if not type_name: continue try: artifact_cls = getattr( importlib.import_module( artifact.KFP_ARTIFACT_ONTOLOGY_MODULE), type_name) except (AttributeError, ImportError, ValueError): logging.warning(( 'Could not load artifact class %s.%s; using fallback deserialization' ' for the relevant artifact. Please make sure that any artifact ' 'classes can be imported within your container or environment.' ), artifact.KFP_ARTIFACT_ONTOLOGY_MODULE, type_name) artifact_cls = artifact.Artifact if artifact_cls == artifact.Artifact: # Provide an empty schema if instantiating an bare-metal artifact. art = artifact_cls( instance_schema=artifact.DEFAULT_ARTIFACT_SCHEMA) else: art = artifact_cls() art.uri = output_uris[output.name] result[output.name] = art return result
def _create_pipeline_v2( self, pipeline_func: Callable[..., Any], pipeline_root: Optional[str] = None, pipeline_name: Optional[str] = None, pipeline_parameters_override: Optional[Mapping[str, Any]] = None, ) -> pipeline_spec_pb2.PipelineJob: """Creates a pipeline instance and constructs the pipeline spec from it. Args: pipeline_func: Pipeline function with @dsl.pipeline decorator. pipeline_root: The root of the pipeline outputs. Optional. pipeline_name: The name of the pipeline. Optional. pipeline_parameters_override: The mapping from parameter names to values. Optional. Returns: A PipelineJob proto representing the compiled pipeline. """ # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _python_op._extract_component_interface(pipeline_func) pipeline_name = pipeline_name or pipeline_meta.name pipeline_root = pipeline_root or getattr(pipeline_func, 'output_directory', None) if not pipeline_root: warnings.warn('pipeline_root is None or empty. A valid pipeline_root ' 'must be provided at job submission.') args_list = [] signature = inspect.signature(pipeline_func) for arg_name in signature.parameters: arg_type = None for pipeline_input in pipeline_meta.inputs or []: if arg_name == pipeline_input.name: arg_type = pipeline_input.type break args_list.append( dsl.PipelineParam( sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) self._sanitize_and_inject_artifact(dsl_pipeline) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam( sanitize_k8s_name(input_spec.name, True), param_type=input_spec.type, value=input_spec.default) for input_spec in pipeline_meta.inputs ] # Making the pipeline group name unique to prevent name clashes with templates pipeline_group = dsl_pipeline.groups[0] temp_pipeline_group_name = uuid.uuid4().hex pipeline_group.name = temp_pipeline_group_name pipeline_spec = self._create_pipeline_spec( args_list_with_defaults, dsl_pipeline, ) pipeline_parameters = { param.name: param for param in args_list_with_defaults } # Update pipeline parameters override if there were any. pipeline_parameters_override = pipeline_parameters_override or {} for k, v in pipeline_parameters_override.items(): if k not in pipeline_parameters: raise ValueError('Pipeline parameter {} does not match any known ' 'pipeline argument.'.format(k)) pipeline_parameters[k].value = v runtime_config = compiler_utils.build_runtime_config_spec( output_directory=pipeline_root, pipeline_parameters=pipeline_parameters) pipeline_job = pipeline_spec_pb2.PipelineJob(runtime_config=runtime_config) pipeline_job.pipeline_spec.update(json_format.MessageToDict(pipeline_spec)) return pipeline_job
def _create_pipeline( self, pipeline_func: Callable[..., Any], output_directory: str, pipeline_name: Optional[str] = None, pipeline_parameters_override: Optional[Mapping[str, Any]] = None, ) -> pipeline_spec_pb2.PipelineJob: """Creates a pipeline instance and constructs the pipeline spec from it. Args: pipeline_func: Pipeline function with @dsl.pipeline decorator. pipeline_name: The name of the pipeline. Optional. output_directory: The root of the pipeline outputs. pipeline_parameters_override: The mapping from parameter names to values. Optional. Returns: A PipelineJob proto representing the compiled pipeline. """ # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _python_op._extract_component_interface(pipeline_func) pipeline_name = pipeline_name or pipeline_meta.name args_list = [] signature = inspect.signature(pipeline_func) for arg_name in signature.parameters: arg_type = None for pipeline_input in pipeline_meta.inputs or []: if arg_name == pipeline_input.name: arg_type = pipeline_input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True), param_type=input_spec.type, value=input_spec.default) for input_spec in pipeline_meta.inputs ] pipeline_spec = self._create_pipeline_spec( args_list_with_defaults, dsl_pipeline, ) pipeline_parameters = { arg.name: arg.value for arg in args_list_with_defaults } # Update pipeline parameters override if there were any. pipeline_parameters.update(pipeline_parameters_override or {}) runtime_config = compiler_utils.build_runtime_config_spec( output_directory=output_directory, pipeline_parameters=pipeline_parameters) pipeline_job = pipeline_spec_pb2.PipelineJob( runtime_config=runtime_config) pipeline_job.pipeline_spec.update( json_format.MessageToDict(pipeline_spec)) return pipeline_job