def test_decorator_metadata(self): """Test @pipeline decorator with metadata.""" @pipeline(name='p1', description='description1') def my_pipeline1(a: {'Schema': { 'file_type': 'csv' }} = 'good', b: Integer() = 12): pass golden_meta = ComponentSpec(name='p1', description='description1', inputs=[]) golden_meta.inputs.append( InputSpec(name='a', type={'Schema': { 'file_type': 'csv' }}, default='good', optional=True)) golden_meta.inputs.append( InputSpec(name='b', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } }, default="12", optional=True)) pipeline_meta = _extract_pipeline_metadata(my_pipeline1) self.assertEqual(pipeline_meta, golden_meta)
def test_decorator_metadata(self): """Test @pipeline decorator with metadata.""" @pipeline(name='p1', description='description1') def my_pipeline1(a: {'Schema': { 'file_type': 'csv' }} = 'good', b: Integer() = 12): pass golden_meta = PipelineMeta(name='p1', description='description1') golden_meta.inputs.append( ParameterMeta(name='a', description='', param_type={'Schema': { 'file_type': 'csv' }}, default='good')) golden_meta.inputs.append( ParameterMeta(name='b', description='', param_type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } }, default=12)) pipeline_meta = _extract_pipeline_metadata(my_pipeline1) self.assertEqual(pipeline_meta, golden_meta)
def _create_workflow(self, pipeline_func: Callable, pipeline_name: Text=None, pipeline_description: Text=None, params_list: List[dsl.PipelineParam]=None, pipeline_conf: dsl.PipelineConf = None, ) -> List[Dict[Text, Any]]: # Tekton change, signature """ Internal implementation of create_workflow.""" params_list = params_list or [] argspec = inspect.getfullargspec(pipeline_func) # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = {} for param in params_list: default_param_values[param.name] = param.value param.value = None # Currently only allow specifying pipeline params at one place. if params_list and pipeline_meta.inputs: raise ValueError('Either specify pipeline params in the pipeline function, or in "params_list", but not both.') args_list = [] for arg_name in argspec.args: arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break args_list.append(dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified. self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [dsl.PipelineParam(sanitize_k8s_name(arg_name, True)) for arg_name in argspec.args] if argspec.defaults: for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)): arg.value = default.value if isinstance(default, dsl.PipelineParam) else default elif params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for param in params_list: param.value = default_param_values[param.name] args_list_with_defaults = params_list pipeline_meta.inputs = [ InputSpec( name=param.name, type=param.param_type, default=param.value) for param in params_list] op_transformers = [add_pod_env] op_transformers.extend(pipeline_conf.op_transformers) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) from ._data_passing_rewriter import fix_big_data_passing workflow = fix_big_data_passing(workflow) import json pipeline = [item for item in workflow if item["kind"] == "Pipeline"][0] # Tekton change pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True) return workflow
def _create_workflow( self, pipeline_func: Callable, pipeline_name: Text = None, pipeline_description: Text = None, params_list: List[dsl.PipelineParam] = None, pipeline_conf: dsl.PipelineConf = None, ) -> Dict[Text, Any]: """ Internal implementation of create_workflow.""" params_list = params_list or [] argspec = inspect.getfullargspec(pipeline_func) # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = {} for param in params_list: default_param_values[param.name] = param.value param.value = None # Currently only allow specifying pipeline params at one place. if params_list and pipeline_meta.inputs: raise ValueError( 'Either specify pipeline params in the pipeline function, or in "params_list", but not both.' ) args_list = [] for arg_name in argspec.args: arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Configuration passed to the compiler is overriding. Unfortunately, it is # not trivial to detect whether the dsl_pipeline.conf was ever modified. pipeline_conf = pipeline_conf or dsl_pipeline.conf self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(arg_name, True)) for arg_name in argspec.args ] if argspec.defaults: for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)): arg.value = default.value if isinstance( default, dsl.PipelineParam) else default elif params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for param in params_list: param.value = default_param_values[param.name] args_list_with_defaults = params_list pipeline_meta.inputs = [ InputSpec(name=param.name, type=param.param_type, default=param.value) for param in params_list ] op_transformers = [add_pod_env] # # By default adds telemetry instruments. Users can opt out toggling # # allow_telemetry. # # Also, TFX pipelines will be bypassed for pipeline compiled by tfx>0.21.4. # if allow_telemetry: # pod_labels = get_default_telemetry_labels() # op_transformers.append(add_pod_labels(pod_labels)) op_transformers.extend(pipeline_conf.op_transformers) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) workflow = fix_big_data_passing(workflow) workflow.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \ json.dumps(pipeline_meta.to_dict(), sort_keys=True) # recursively strip empty structures, DANGER: this may remove necessary empty elements ?! def remove_empty_elements(obj) -> dict: if not isinstance(obj, (dict, list)): return obj if isinstance(obj, list): return [remove_empty_elements(o) for o in obj if o != []] return { k: remove_empty_elements(v) for k, v in obj.items() if v != [] } workflow = remove_empty_elements(workflow) return workflow
def _create_workflow( self, pipeline_func: Callable, pipeline_name: Optional[Text] = None, pipeline_description: Optional[Text] = None, params_list: Optional[List[dsl.PipelineParam]] = None, pipeline_conf: Optional[dsl.PipelineConf] = None, ) -> Dict[Text, Any]: """ Internal implementation of create_workflow.""" params_list = params_list or [] # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = OrderedDict() if self._pipeline_root_param: params_list.append(self._pipeline_root_param) if self._pipeline_name_param: params_list.append(self._pipeline_name_param) for param in params_list: default_param_values[param.name] = param.value param.value = None args_list = [] kwargs_dict = dict() signature = inspect.signature(pipeline_func) for arg_name, arg in signature.parameters.items(): arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break param = dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type) if arg.kind == inspect.Parameter.KEYWORD_ONLY: kwargs_dict[arg_name] = param else: args_list.append(param) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list, **kwargs_dict) pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified. self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values by merging two param lists. args_list_with_defaults = OrderedDict() if pipeline_meta.inputs: args_list_with_defaults = OrderedDict([ (sanitize_k8s_name(input_spec.name, True), input_spec.default) for input_spec in pipeline_meta.inputs ]) if params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for k, v in default_param_values.items(): args_list_with_defaults[k] = v pipeline_meta.inputs = pipeline_meta.inputs or [] for param in params_list: pipeline_meta.inputs.append( InputSpec( name=param.name, type=param.param_type, default=default_param_values[param.name])) op_transformers = [add_pod_env] pod_labels = {_SDK_VERSION_LABEL: kfp.__version__, _SDK_ENV_LABEL:_SDK_ENV_DEFAULT} op_transformers.append(add_pod_labels(pod_labels)) op_transformers.extend(pipeline_conf.op_transformers) if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE: # Add self._pipeline_name_param and self._pipeline_root_param to ops inputs # if they don't exist already. for op in dsl_pipeline.ops.values(): insert_pipeline_name_param = True insert_pipeline_root_param = True for param in op.inputs: if param.name == self._pipeline_name_param.name: insert_pipeline_name_param = False elif param.name == self._pipeline_root_param.name: insert_pipeline_root_param = False if insert_pipeline_name_param: op.inputs.append(self._pipeline_name_param) if insert_pipeline_root_param: op.inputs.append(self._pipeline_root_param) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) from ._data_passing_rewriter import fix_big_data_passing workflow = fix_big_data_passing(workflow) workflow = _data_passing_rewriter.add_pod_name_passing( workflow, str(self._pipeline_root_param or None)) if pipeline_conf and pipeline_conf.data_passing_method != None: workflow = pipeline_conf.data_passing_method(workflow) metadata = workflow.setdefault('metadata', {}) annotations = metadata.setdefault('annotations', {}) labels = metadata.setdefault('labels', {}) annotations[_SDK_VERSION_LABEL] = kfp.__version__ annotations['pipelines.kubeflow.org/pipeline_compilation_time'] = datetime.datetime.now().isoformat() annotations['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True) if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE: annotations['pipelines.kubeflow.org/v2_pipeline'] = "true" labels['pipelines.kubeflow.org/v2_pipeline'] = "true" # Labels might be logged better than annotations so adding some information here as well labels[_SDK_VERSION_LABEL] = kfp.__version__ return workflow