def _resolve_task_pipeline_param(pipeline_param: PipelineParam, group_type) -> str: if pipeline_param.op_name is None: return '{{workflow.parameters.%s}}' % pipeline_param.name param_name = '%s-%s' % (sanitize_k8s_name(pipeline_param.op_name), pipeline_param.name) if group_type == 'subgraph': return '{{inputs.parameters.%s}}' % (param_name) return '{{tasks.%s.outputs.parameters.%s}}' % (sanitize_k8s_name(pipeline_param.op_name), param_name)
def _create_pipeline( self, pipeline_func: Callable[..., Any], pipeline_name: Optional[str] = None, ) -> pipeline_spec_pb2.PipelineSpec: """Creates a pipeline instance and constructs the pipeline spec from it. Args: pipeline_func: Pipeline function with @dsl.pipeline decorator. pipeline_name: The name of the pipeline. Optional. Returns: The IR representation (pipeline spec) of the pipeline. """ # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = python_op._extract_component_interface(pipeline_func) pipeline_name = pipeline_name or pipeline_meta.name args_list = [] signature = inspect.signature(pipeline_func) for arg_name in signature.parameters: arg_type = None for pipeline_input in pipeline_meta.inputs or []: if arg_name == pipeline_input.name: arg_type = pipeline_input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True), param_type=input_spec.type, value=input_spec.default) for input_spec in pipeline_meta.inputs ] pipeline_spec = self._create_pipeline_spec( args_list_with_defaults, dsl_pipeline, ) return pipeline_spec
def _sanitize_and_inject_artifact(self, pipeline: dsl.Pipeline) -> None: """Sanitize operator/param names and inject pipeline artifact location. """ # Sanitize operator names and param names sanitized_ops = {} for op in pipeline.ops.values(): sanitized_name = sanitize_k8s_name(op.name) op.name = sanitized_name for param in op.outputs.values(): param.name = sanitize_k8s_name(param.name, True) if param.op_name: param.op_name = sanitize_k8s_name(param.op_name) if op.output is not None and not isinstance( op.output, dsl._container_op._MultipleOutputsError): op.output.name = sanitize_k8s_name(op.output.name, True) op.output.op_name = sanitize_k8s_name(op.output.op_name) if op.dependent_names: op.dependent_names = [ sanitize_k8s_name(name) for name in op.dependent_names ] if isinstance(op, dsl.ContainerOp) and op.file_outputs is not None: sanitized_file_outputs = {} for key in op.file_outputs.keys(): sanitized_file_outputs[sanitize_k8s_name(key, True)] = op.file_outputs[key] op.file_outputs = sanitized_file_outputs elif isinstance(op, dsl.ResourceOp) and op.attribute_outputs is not None: sanitized_attribute_outputs = {} for key in op.attribute_outputs.keys(): sanitized_attribute_outputs[sanitize_k8s_name(key, True)] = \ op.attribute_outputs[key] op.attribute_outputs = sanitized_attribute_outputs if isinstance(op, dsl.ContainerOp): if op.input_artifact_paths: op.input_artifact_paths = { sanitize_k8s_name(key, True): value for key, value in op.input_artifact_paths.items() } if op.artifact_arguments: op.artifact_arguments = { sanitize_k8s_name(key, True): value for key, value in op.artifact_arguments.items() } sanitized_ops[sanitized_name] = op pipeline.ops = sanitized_ops
def get_arguments_for_sub_group( self, sub_group: Union[OpsGroup, dsl._container_op.BaseOp], is_recursive_subgroup: Optional[bool], inputs: Dict[Text, Tuple[Text, Text]], ): arguments = [] for param_name, dependent_name in inputs[sub_group.name]: if is_recursive_subgroup: for input_name, input in sub_group.arguments.items(): if param_name == input.full_name: break referenced_input = sub_group.recursive_ref.arguments[input_name] argument_name = referenced_input.full_name else: argument_name = param_name # Preparing argument. It can be pipeline input reference, task output reference or loop item (or loop item attribute sanitized_loop_arg_full_name = '---' if isinstance(sub_group, dsl.ParallelFor): sanitized_loop_arg_full_name = sanitize_k8s_name(sub_group.loop_args.full_name) arg_ref_full_name = sanitize_k8s_name(param_name) # We only care about the reference to the current loop item, not the outer loops if isinstance(sub_group, dsl.ParallelFor) and arg_ref_full_name.startswith(sanitized_loop_arg_full_name): if arg_ref_full_name == sanitized_loop_arg_full_name: argument_value = '{{item}}' elif _for_loop.LoopArgumentVariable.name_is_loop_arguments_variable(param_name): subvar_name = _for_loop.LoopArgumentVariable.get_subvar_name(param_name) argument_value = '{{item.%s}}' % subvar_name else: raise ValueError("Argument seems to reference the loop item, but not the item itself and not some attribute of the item. param_name: {}, ".format(param_name)) else: if dependent_name: argument_value = '{{tasks.%s.outputs.parameters.%s}}' % (dependent_name, param_name) else: argument_value = '{{inputs.parameters.%s}}' % param_name arguments.append({ 'name': argument_name, 'value': argument_value, }) arguments.sort(key=lambda x: x['name']) return arguments
def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None): """Schedule pipeline on kubeflow to run based upon a cron job Arguments: experiment_id {[type]} -- The expriment within which we would like kubeflow job_name {[type]} -- The name of the scheduled job Keyword Arguments: pipeline_package_path {[type]} -- The path to the pipeline package (default: {None}) params {dict} -- The pipeline parameters (default: {{}}) pipeline_id {[type]} -- The id of the pipeline which should run on schedule (default: {None}) namespace {[type]} -- The name space with which the pipeline should run (default: {None}) """ pipeline_json_string = None if pipeline_package_path: pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) pipeline_json_string = json.dumps(pipeline_obj) api_params = [kfp_server_api.ApiParameter( name=sanitize_k8s_name(name=k, allow_capital_underscore=True), value=str(v)) for k, v in params.items()] resource_references = [] key = kfp_server_api.models.ApiResourceKey(id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference(key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) if namespace is not None: key = kfp_server_api.models.ApiResourceKey(id=namespace, type=kfp_server_api.models.ApiResourceType.NAMESPACE) reference = kfp_server_api.models.ApiResourceReference(key=key, name=namespace, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) spec = kfp_server_api.models.ApiPipelineSpec( pipeline_id=pipeline_id, workflow_manifest=pipeline_json_string, parameters=api_params) trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule( cron="0 0 9 ? * 2-6") job_id = ''.join(random.choices( string.ascii_uppercase + string.digits, k=10)) schedule_body = kfp_server_api.models.ApiJob( id=job_id, name="TestScheduling", description="Schedule the pipeline using the API", pipeline_spec=spec, resource_references=resource_references, max_concurrency=10, trigger=trigger, enabled=True, )
def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None): """Run a specified pipeline. Args: experiment_id: The string id of an experiment. job_name: name of the job. pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml). params: a dictionary with key (string) as param name and value (string) as as param value. pipeline_id: the string ID of a pipeline. namespace: kubernetes namespace where the pipeline runs are created. For single user deployment, leave it as None; For multi user, input a namespace where the user is authorized Returns: A run object. Most important field is id. """ pipeline_json_string = None if pipeline_package_path: pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) pipeline_json_string = json.dumps(pipeline_obj) api_params = [kfp_server_api.ApiParameter( name=sanitize_k8s_name(name=k, allow_capital_underscore=True), value=str(v)) for k,v in params.items()] resource_references = [] key = kfp_server_api.models.ApiResourceKey(id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference(key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) if namespace is not None: key = kfp_server_api.models.ApiResourceKey(id=namespace, type=kfp_server_api.models.ApiResourceType.NAMESPACE) reference = kfp_server_api.models.ApiResourceReference(key=key, name=namespace, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) spec = kfp_server_api.models.ApiPipelineSpec( pipeline_id=pipeline_id, workflow_manifest=pipeline_json_string, parameters=api_params) run_body = kfp_server_api.models.ApiRun( pipeline_spec=spec, resource_references=resource_references, name=job_name) response = self._run_api.create_run(body=run_body) if self._is_ipython(): import IPython html = ('Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>' % (self._get_url_prefix(), response.run.id)) IPython.display.display(IPython.display.HTML(html)) return response.run
def _create_job_config(self, experiment_id, params, pipeline_package_path, pipeline_id, version_id): """Create a JobConfig with spec and resource_references. Args: experiment_id: The id of an experiment. pipeline_package_path: Local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml). params: A dictionary with key (string) as param name and value (string) as param value. pipeline_id: The id of a pipeline. version_id: The id of a pipeline version. If both pipeline_id and version_id are specified, version_id will take precendence. If only pipeline_id is specified, the default version of this pipeline is used to create the run. Returns: A JobConfig object with attributes spec and resource_reference. """ class JobConfig: def __init__(self, spec, resource_references): self.spec = spec self.resource_references = resource_references pipeline_json_string = None if pipeline_package_path: pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) pipeline_json_string = json.dumps(pipeline_obj) api_params = [ kfp_server_api.ApiParameter( name=sanitize_k8s_name(name=k, allow_capital_underscore=True), value=str(v) if type(v) not in (list, dict) else json.dumps(v)) for k, v in params.items() ] resource_references = [] key = kfp_server_api.models.ApiResourceKey( id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference( key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) if version_id: key = kfp_server_api.models.ApiResourceKey( id=version_id, type=kfp_server_api.models.ApiResourceType.PIPELINE_VERSION) reference = kfp_server_api.models.ApiResourceReference( key=key, relationship=kfp_server_api.models.ApiRelationship.CREATOR) resource_references.append(reference) spec = kfp_server_api.models.ApiPipelineSpec( pipeline_id=pipeline_id, workflow_manifest=pipeline_json_string, parameters=api_params) return JobConfig(spec=spec, resource_references=resource_references)
def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None): """Run a specified pipeline. Args: experiment_id: The string id of an experiment. job_name: name of the job. pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml). params: a dictionary with key (string) as param name and value (string) as as param value. pipeline_id: the string ID of a pipeline. Returns: A run object. Most important field is id. """ pipeline_json_string = None if pipeline_package_path: pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) pipeline_json_string = json.dumps(pipeline_obj) api_params = [ kfp_server_api.ApiParameter(name=sanitize_k8s_name(k), value=str(v)) for k, v in params.items() ] key = kfp_server_api.models.ApiResourceKey( id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference( key, kfp_server_api.models.ApiRelationship.OWNER) spec = kfp_server_api.models.ApiPipelineSpec( pipeline_id=pipeline_id, workflow_manifest=pipeline_json_string, parameters=api_params) run_body = kfp_server_api.models.ApiRun( pipeline_spec=spec, resource_references=[reference], name=job_name) response = self._run_api.create_run(body=run_body) if self._is_ipython(): import IPython html = ( 'Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>' % (self._get_url_prefix(), response.run.id)) IPython.display.display(IPython.display.HTML(html)) return response.run
def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None, cron_schedule=None, description=None, max_concurrency=10, no_catchup=None): """Schedule pipeline on kubeflow to run based upon a cron job Arguments: experiment_id {string} -- The expriment within which we would like kubeflow job_name {string} -- The name of the scheduled job Keyword Arguments: pipeline_package_path {string} -- The path to the pipeline package (default: {None}) params {dict} -- The pipeline parameters (default: {{}}) pipeline_id {string} -- The id of the pipeline which should run on schedule (default: {None}) namespace {string} -- The name space with which the pipeline should run (default: {None}) max_concurrency {int} -- Max number of concurrent runs scheduled (default: {10}) no_catchup {boolean} -- Whether the recurring run should catch up if behind schedule. For example, if the recurring run is paused for a while and re-enabled afterwards. If no_catchup=False, the scheduler will catch up on (backfill) each missed interval. Otherwise, it only schedules the latest interval if more than one interval is ready to be scheduled. Usually, if your pipeline handles backfill internally, you should turn catchup off to avoid duplicate backfill. (default: {False}) """ pipeline_json_string = None if pipeline_package_path: pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) pipeline_json_string = json.dumps(pipeline_obj) api_params = [kfp_server_api.ApiParameter( name=sanitize_k8s_name(name=k, allow_capital_underscore=True), value=str(v)) for k,v in params.items()] resource_references = [] key = kfp_server_api.models.ApiResourceKey(id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference(key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) if namespace is not None: key = kfp_server_api.models.ApiResourceKey(id=namespace, type=kfp_server_api.models.ApiResourceType.NAMESPACE) reference = kfp_server_api.models.ApiResourceReference(key=key, name=namespace, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) spec = kfp_server_api.models.ApiPipelineSpec( pipeline_id=pipeline_id, workflow_manifest=pipeline_json_string, parameters=api_params) trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule(cron=cron_schedule) #Example:cron_schedule="0 0 9 ? * 2-6" job_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10)) schedule_body = kfp_server_api.models.ApiJob( id=job_id, name=job_name, description=description, pipeline_spec=spec, resource_references=resource_references, max_concurrency=max_concurrency, no_catchup=no_catchup, trigger=trigger, enabled=True, ) #[TODO] Add link to the scheduled job. response = self._job_api.create_job(body=schedule_body)
def _create_workflow(self, pipeline_func: Callable, pipeline_name: Text=None, pipeline_description: Text=None, params_list: List[dsl.PipelineParam]=None, pipeline_conf: dsl.PipelineConf = None, ) -> List[Dict[Text, Any]]: # Tekton change, signature """ Internal implementation of create_workflow.""" params_list = params_list or [] argspec = inspect.getfullargspec(pipeline_func) # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = {} for param in params_list: default_param_values[param.name] = param.value param.value = None # Currently only allow specifying pipeline params at one place. if params_list and pipeline_meta.inputs: raise ValueError('Either specify pipeline params in the pipeline function, or in "params_list", but not both.') args_list = [] for arg_name in argspec.args: arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break args_list.append(dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified. self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [dsl.PipelineParam(sanitize_k8s_name(arg_name, True)) for arg_name in argspec.args] if argspec.defaults: for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)): arg.value = default.value if isinstance(default, dsl.PipelineParam) else default elif params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for param in params_list: param.value = default_param_values[param.name] args_list_with_defaults = params_list pipeline_meta.inputs = [ InputSpec( name=param.name, type=param.param_type, default=param.value) for param in params_list] op_transformers = [add_pod_env] op_transformers.extend(pipeline_conf.op_transformers) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) from ._data_passing_rewriter import fix_big_data_passing workflow = fix_big_data_passing(workflow) import json pipeline = [item for item in workflow if item["kind"] == "Pipeline"][0] # Tekton change pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True) return workflow
def _create_pipeline_v2( self, pipeline_func: Callable[..., Any], pipeline_root: Optional[str] = None, pipeline_name: Optional[str] = None, pipeline_parameters_override: Optional[Mapping[str, Any]] = None, ) -> pipeline_spec_pb2.PipelineJob: """Creates a pipeline instance and constructs the pipeline spec from it. Args: pipeline_func: Pipeline function with @dsl.pipeline decorator. pipeline_root: The root of the pipeline outputs. Optional. pipeline_name: The name of the pipeline. Optional. pipeline_parameters_override: The mapping from parameter names to values. Optional. Returns: A PipelineJob proto representing the compiled pipeline. """ # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _python_op._extract_component_interface(pipeline_func) pipeline_name = pipeline_name or pipeline_meta.name pipeline_root = pipeline_root or getattr(pipeline_func, 'output_directory', None) if not pipeline_root: warnings.warn('pipeline_root is None or empty. A valid pipeline_root ' 'must be provided at job submission.') args_list = [] signature = inspect.signature(pipeline_func) for arg_name in signature.parameters: arg_type = None for pipeline_input in pipeline_meta.inputs or []: if arg_name == pipeline_input.name: arg_type = pipeline_input.type break args_list.append( dsl.PipelineParam( sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) self._sanitize_and_inject_artifact(dsl_pipeline) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam( sanitize_k8s_name(input_spec.name, True), param_type=input_spec.type, value=input_spec.default) for input_spec in pipeline_meta.inputs ] # Making the pipeline group name unique to prevent name clashes with templates pipeline_group = dsl_pipeline.groups[0] temp_pipeline_group_name = uuid.uuid4().hex pipeline_group.name = temp_pipeline_group_name pipeline_spec = self._create_pipeline_spec( args_list_with_defaults, dsl_pipeline, ) pipeline_parameters = { param.name: param for param in args_list_with_defaults } # Update pipeline parameters override if there were any. pipeline_parameters_override = pipeline_parameters_override or {} for k, v in pipeline_parameters_override.items(): if k not in pipeline_parameters: raise ValueError('Pipeline parameter {} does not match any known ' 'pipeline argument.'.format(k)) pipeline_parameters[k].value = v runtime_config = compiler_utils.build_runtime_config_spec( output_directory=pipeline_root, pipeline_parameters=pipeline_parameters) pipeline_job = pipeline_spec_pb2.PipelineJob(runtime_config=runtime_config) pipeline_job.pipeline_spec.update(json_format.MessageToDict(pipeline_spec)) return pipeline_job
def _create_pipeline( self, pipeline_func: Callable[..., Any], output_directory: str, pipeline_name: Optional[str] = None, pipeline_parameters_override: Optional[Mapping[str, Any]] = None, ) -> pipeline_spec_pb2.PipelineJob: """Creates a pipeline instance and constructs the pipeline spec from it. Args: pipeline_func: Pipeline function with @dsl.pipeline decorator. pipeline_name: The name of the pipeline. Optional. output_directory: The root of the pipeline outputs. pipeline_parameters_override: The mapping from parameter names to values. Optional. Returns: A PipelineJob proto representing the compiled pipeline. """ # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _python_op._extract_component_interface(pipeline_func) pipeline_name = pipeline_name or pipeline_meta.name args_list = [] signature = inspect.signature(pipeline_func) for arg_name in signature.parameters: arg_type = None for pipeline_input in pipeline_meta.inputs or []: if arg_name == pipeline_input.name: arg_type = pipeline_input.type break args_list.append( dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list) # Fill in the default values. args_list_with_defaults = [] if pipeline_meta.inputs: args_list_with_defaults = [ dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True), param_type=input_spec.type, value=input_spec.default) for input_spec in pipeline_meta.inputs ] pipeline_spec = self._create_pipeline_spec( args_list_with_defaults, dsl_pipeline, ) pipeline_parameters = { arg.name: arg.value for arg in args_list_with_defaults } # Update pipeline parameters override if there were any. pipeline_parameters.update(pipeline_parameters_override or {}) runtime_config = compiler_utils.build_runtime_config_spec( output_directory=output_directory, pipeline_parameters=pipeline_parameters) pipeline_job = pipeline_spec_pb2.PipelineJob( runtime_config=runtime_config) pipeline_job.pipeline_spec.update( json_format.MessageToDict(pipeline_spec)) return pipeline_job
def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, version_id=None): """Run a specified pipeline. Args: experiment_id: The string id of an experiment. job_name: name of the job. pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml). params: a dictionary with key (string) as param name and value (string) as as param value. pipeline_id: the string ID of a pipeline. version_id: the string ID of a pipeline version. If both pipeline_id and version_id are specified, pipeline_id will take precendence This will change in a future version, so it is recommended to use version_id by itself Returns: A run object. Most important field is id. """ pipeline_json_string = None if pipeline_package_path: pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path) pipeline_json_string = json.dumps(pipeline_obj) api_params = [ kfp_server_api.ApiParameter( name=sanitize_k8s_name(name=k, allow_capital_underscore=True), value=str(v)) for k, v in params.items() ] resource_references = [] key = kfp_server_api.models.ApiResourceKey( id=experiment_id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference( key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER) resource_references.append(reference) if version_id: key = kfp_server_api.models.ApiResourceKey( id=version_id, type=kfp_server_api.models.ApiResourceType.PIPELINE_VERSION) reference = kfp_server_api.models.ApiResourceReference( key=key, relationship=kfp_server_api.models.ApiRelationship.CREATOR) resource_references.append(reference) spec = kfp_server_api.models.ApiPipelineSpec( pipeline_id=pipeline_id, workflow_manifest=pipeline_json_string, parameters=api_params) run_body = kfp_server_api.models.ApiRun( pipeline_spec=spec, resource_references=resource_references, name=job_name) response = self._run_api.create_run(body=run_body) if self._is_ipython(): import IPython html = ( 'Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>' % (self._get_url_prefix(), response.run.id)) IPython.display.display(IPython.display.HTML(html)) return response.run
def _create_workflow( self, pipeline_func: Callable, pipeline_name: Optional[Text] = None, pipeline_description: Optional[Text] = None, params_list: Optional[List[dsl.PipelineParam]] = None, pipeline_conf: Optional[dsl.PipelineConf] = None, ) -> Dict[Text, Any]: """ Internal implementation of create_workflow.""" params_list = params_list or [] # Create the arg list with no default values and call pipeline function. # Assign type information to the PipelineParam pipeline_meta = _extract_pipeline_metadata(pipeline_func) pipeline_meta.name = pipeline_name or pipeline_meta.name pipeline_meta.description = pipeline_description or pipeline_meta.description pipeline_name = sanitize_k8s_name(pipeline_meta.name) # Need to first clear the default value of dsl.PipelineParams. Otherwise, it # will be resolved immediately in place when being to each component. default_param_values = OrderedDict() if self._pipeline_root_param: params_list.append(self._pipeline_root_param) if self._pipeline_name_param: params_list.append(self._pipeline_name_param) for param in params_list: default_param_values[param.name] = param.value param.value = None args_list = [] kwargs_dict = dict() signature = inspect.signature(pipeline_func) for arg_name, arg in signature.parameters.items(): arg_type = None for input in pipeline_meta.inputs or []: if arg_name == input.name: arg_type = input.type break param = dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type) if arg.kind == inspect.Parameter.KEYWORD_ONLY: kwargs_dict[arg_name] = param else: args_list.append(param) with dsl.Pipeline(pipeline_name) as dsl_pipeline: pipeline_func(*args_list, **kwargs_dict) pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified. self._validate_exit_handler(dsl_pipeline) self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf) # Fill in the default values by merging two param lists. args_list_with_defaults = OrderedDict() if pipeline_meta.inputs: args_list_with_defaults = OrderedDict([ (sanitize_k8s_name(input_spec.name, True), input_spec.default) for input_spec in pipeline_meta.inputs ]) if params_list: # Or, if args are provided by params_list, fill in pipeline_meta. for k, v in default_param_values.items(): args_list_with_defaults[k] = v pipeline_meta.inputs = pipeline_meta.inputs or [] for param in params_list: pipeline_meta.inputs.append( InputSpec( name=param.name, type=param.param_type, default=default_param_values[param.name])) op_transformers = [add_pod_env] pod_labels = {_SDK_VERSION_LABEL: kfp.__version__, _SDK_ENV_LABEL:_SDK_ENV_DEFAULT} op_transformers.append(add_pod_labels(pod_labels)) op_transformers.extend(pipeline_conf.op_transformers) if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE: # Add self._pipeline_name_param and self._pipeline_root_param to ops inputs # if they don't exist already. for op in dsl_pipeline.ops.values(): insert_pipeline_name_param = True insert_pipeline_root_param = True for param in op.inputs: if param.name == self._pipeline_name_param.name: insert_pipeline_name_param = False elif param.name == self._pipeline_root_param.name: insert_pipeline_root_param = False if insert_pipeline_name_param: op.inputs.append(self._pipeline_name_param) if insert_pipeline_root_param: op.inputs.append(self._pipeline_root_param) workflow = self._create_pipeline_workflow( args_list_with_defaults, dsl_pipeline, op_transformers, pipeline_conf, ) from ._data_passing_rewriter import fix_big_data_passing workflow = fix_big_data_passing(workflow) workflow = _data_passing_rewriter.add_pod_name_passing( workflow, str(self._pipeline_root_param or None)) if pipeline_conf and pipeline_conf.data_passing_method != None: workflow = pipeline_conf.data_passing_method(workflow) metadata = workflow.setdefault('metadata', {}) annotations = metadata.setdefault('annotations', {}) labels = metadata.setdefault('labels', {}) annotations[_SDK_VERSION_LABEL] = kfp.__version__ annotations['pipelines.kubeflow.org/pipeline_compilation_time'] = datetime.datetime.now().isoformat() annotations['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True) if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE: annotations['pipelines.kubeflow.org/v2_pipeline'] = "true" labels['pipelines.kubeflow.org/v2_pipeline'] = "true" # Labels might be logged better than annotations so adding some information here as well labels[_SDK_VERSION_LABEL] = kfp.__version__ return workflow
def _group_to_dag_template(self, group, inputs, outputs, dependencies): """Generate template given an OpsGroup. inputs, outputs, dependencies are all helper dicts. """ template = {'name': group.name} if group.parallelism != None: template["parallelism"] = group.parallelism # Generate inputs section. if inputs.get(group.name, None): template_inputs = [{'name': x[0]} for x in inputs[group.name]] template_inputs.sort(key=lambda x: x['name']) template['inputs'] = { 'parameters': template_inputs } # Generate outputs section. if outputs.get(group.name, None): template_outputs = [] for param_name, dependent_name in outputs[group.name]: template_outputs.append({ 'name': param_name, 'valueFrom': { 'parameter': '{{tasks.%s.outputs.parameters.%s}}' % (dependent_name, param_name) } }) template_outputs.sort(key=lambda x: x['name']) template['outputs'] = {'parameters': template_outputs} # Generate tasks section. tasks = [] sub_groups = group.groups + group.ops for sub_group in sub_groups: is_recursive_subgroup = (isinstance(sub_group, OpsGroup) and sub_group.recursive_ref) # Special handling for recursive subgroup: use the existing opsgroup name if is_recursive_subgroup: task = { 'name': sub_group.recursive_ref.name, 'template': sub_group.recursive_ref.name, } else: task = { 'name': sub_group.name, 'template': sub_group.name, } if isinstance(sub_group, dsl.OpsGroup) and sub_group.type == 'condition': subgroup_inputs = inputs.get(sub_group.name, []) condition = sub_group.condition operand1_value = self._resolve_value_or_reference(condition.operand1, subgroup_inputs) operand2_value = self._resolve_value_or_reference(condition.operand2, subgroup_inputs) if condition.operator in ['==', '!=']: operand1_value = '"' + operand1_value + '"' operand2_value = '"' + operand2_value + '"' task['when'] = '{} {} {}'.format(operand1_value, condition.operator, operand2_value) # Generate dependencies section for this task. if dependencies.get(sub_group.name, None): group_dependencies = list(dependencies[sub_group.name]) group_dependencies.sort() task['dependencies'] = group_dependencies # Generate arguments section for this task. if inputs.get(sub_group.name, None): task['arguments'] = {'parameters': self.get_arguments_for_sub_group(sub_group, is_recursive_subgroup, inputs)} # additional task modifications for withItems and withParam if isinstance(sub_group, dsl.ParallelFor): if sub_group.items_is_pipeline_param: # these loop args are a 'withParam' rather than 'withItems'. # i.e., rather than a static list, they are either the output of another task or were input # as global pipeline parameters pipeline_param = sub_group.loop_args.items_or_pipeline_param withparam_value = self._resolve_task_pipeline_param(pipeline_param, group.type) if pipeline_param.op_name: # these loop args are the output of another task if 'dependencies' not in task or task['dependencies'] is None: task['dependencies'] = [] if sanitize_k8s_name( pipeline_param.op_name) not in task['dependencies'] and group.type != 'subgraph': task['dependencies'].append( sanitize_k8s_name(pipeline_param.op_name)) task['withParam'] = withparam_value else: # Need to sanitize the dict keys for consistency. loop_tasks = sub_group.loop_args.to_list_for_task_yaml() nested_pipeline_params = extract_pipelineparams_from_any(loop_tasks) # Set dependencies in case of nested pipeline_params map_to_tmpl_var = {str(p): self._resolve_task_pipeline_param(p, group.type) for p in nested_pipeline_params} for pipeline_param in nested_pipeline_params: if pipeline_param.op_name: # these pipeline_param are the output of another task if 'dependencies' not in task or task['dependencies'] is None: task['dependencies'] = [] if sanitize_k8s_name( pipeline_param.op_name) not in task['dependencies']: task['dependencies'].append( sanitize_k8s_name(pipeline_param.op_name)) sanitized_tasks = [] if isinstance(loop_tasks[0], dict): for argument_set in loop_tasks: c_dict = {} for k, v in argument_set.items(): c_dict[sanitize_k8s_name(k, True)] = v sanitized_tasks.append(c_dict) else: sanitized_tasks = loop_tasks # Replace pipeline param if map_to_tmpl_var not empty task['withItems'] = _process_obj(sanitized_tasks, map_to_tmpl_var) if map_to_tmpl_var else sanitized_tasks # We will sort dependencies to have determinitc yaml and thus stable tests if task.get('dependencies'): task['dependencies'].sort() tasks.append(task) tasks.sort(key=lambda x: x['name']) template['dag'] = {'tasks': tasks} return template
def _get_inputs_outputs( self, pipeline, root_group, op_groups, opsgroup_groups, condition_params, op_name_to_for_loop_op: Dict[Text, dsl.ParallelFor], ): """Get inputs and outputs of each group and op. Returns: A tuple (inputs, outputs). inputs and outputs are dicts with key being the group/op names and values being list of tuples (param_name, producing_op_name). producing_op_name is the name of the op that produces the param. If the param is a pipeline param (no producer op), then producing_op_name is None. """ inputs = defaultdict(set) outputs = defaultdict(set) for op in pipeline.ops.values(): # op's inputs and all params used in conditions for that op are both considered. for param in op.inputs + list(condition_params[op.name]): # if the value is already provided (immediate value), then no need to expose # it as input for its parent groups. if param.value: continue if param.op_name: upstream_op = pipeline.ops[param.op_name] upstream_groups, downstream_groups = \ self._get_uncommon_ancestors(op_groups, opsgroup_groups, upstream_op, op) for i, group_name in enumerate(downstream_groups): if i == 0: # If it is the first uncommon downstream group, then the input comes from # the first uncommon upstream group. inputs[group_name].add((param.full_name, upstream_groups[0])) else: # If not the first downstream group, then the input is passed down from # its ancestor groups so the upstream group is None. inputs[group_name].add((param.full_name, None)) for i, group_name in enumerate(upstream_groups): if i == len(upstream_groups) - 1: # If last upstream group, it is an operator and output comes from container. outputs[group_name].add((param.full_name, None)) else: # If not last upstream group, output value comes from one of its child. outputs[group_name].add((param.full_name, upstream_groups[i+1])) else: if not op.is_exit_handler: for group_name in op_groups[op.name][::-1]: # if group is for loop group and param is that loop's param, then the param # is created by that for loop ops_group and it shouldn't be an input to # any of its parent groups. inputs[group_name].add((param.full_name, None)) if group_name in op_name_to_for_loop_op: # for example: # loop_group.loop_args.name = 'loop-item-param-99ca152e' # param.name = 'loop-item-param-99ca152e--a' loop_group = op_name_to_for_loop_op[group_name] if loop_group.loop_args.name in param.name: break # Generate the input/output for recursive opsgroups # It propagates the recursive opsgroups IO to their ancester opsgroups def _get_inputs_outputs_recursive_opsgroup(group): #TODO: refactor the following codes with the above if group.recursive_ref: params = [(param, False) for param in group.inputs] params.extend([(param, True) for param in list(condition_params[group.name])]) for param, is_condition_param in params: if param.value: continue full_name = param.full_name if param.op_name: upstream_op = pipeline.ops[param.op_name] upstream_groups, downstream_groups = \ self._get_uncommon_ancestors(op_groups, opsgroup_groups, upstream_op, group) for i, g in enumerate(downstream_groups): if i == 0: inputs[g].add((full_name, upstream_groups[0])) # There is no need to pass the condition param as argument to the downstream ops. #TODO: this might also apply to ops. add a TODO here and think about it. elif i == len(downstream_groups) - 1 and is_condition_param: continue else: inputs[g].add((full_name, None)) for i, g in enumerate(upstream_groups): if i == len(upstream_groups) - 1: outputs[g].add((full_name, None)) else: outputs[g].add((full_name, upstream_groups[i+1])) elif not is_condition_param: for g in op_groups[group.name]: inputs[g].add((full_name, None)) for subgroup in group.groups: _get_inputs_outputs_recursive_opsgroup(subgroup) _get_inputs_outputs_recursive_opsgroup(root_group) # Generate the input for SubGraph along with parallelfor for sub_graph in opsgroup_groups: if sub_graph in op_name_to_for_loop_op: # The opsgroup list is sorted with the farthest group as the first and # the opsgroup itself as the last. To get the latest opsgroup which is # not the opsgroup itself -2 is used. parent = opsgroup_groups[sub_graph][-2] if parent and parent.startswith('subgraph'): # propagate only op's pipeline param from subgraph to parallelfor loop_op = op_name_to_for_loop_op[sub_graph] pipeline_param = loop_op.loop_args.items_or_pipeline_param if loop_op.items_is_pipeline_param and pipeline_param.op_name: param_name = '%s-%s' % ( sanitize_k8s_name(pipeline_param.op_name), pipeline_param.name) inputs[parent].add((param_name, pipeline_param.op_name)) return inputs, outputs