コード例 #1
0
 def _resolve_task_pipeline_param(pipeline_param: PipelineParam, group_type) -> str:
   if pipeline_param.op_name is None:
     return '{{workflow.parameters.%s}}' % pipeline_param.name
   param_name = '%s-%s' % (sanitize_k8s_name(pipeline_param.op_name), pipeline_param.name)
   if group_type == 'subgraph':
     return '{{inputs.parameters.%s}}' % (param_name)
   return '{{tasks.%s.outputs.parameters.%s}}' % (sanitize_k8s_name(pipeline_param.op_name), param_name)
コード例 #2
0
ファイル: compiler.py プロジェクト: tanguycdls/pipelines
    def _create_pipeline(
        self,
        pipeline_func: Callable[..., Any],
        pipeline_name: Optional[str] = None,
    ) -> pipeline_spec_pb2.PipelineSpec:
        """Creates a pipeline instance and constructs the pipeline spec from it.

    Args:
      pipeline_func: Pipeline function with @dsl.pipeline decorator.
      pipeline_name: The name of the pipeline. Optional.

    Returns:
      The IR representation (pipeline spec) of the pipeline.
    """

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = python_op._extract_component_interface(pipeline_func)
        pipeline_name = pipeline_name or pipeline_meta.name

        args_list = []
        signature = inspect.signature(pipeline_func)
        for arg_name in signature.parameters:
            arg_type = None
            for pipeline_input in pipeline_meta.inputs or []:
                if arg_name == pipeline_input.name:
                    arg_type = pipeline_input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True),
                                  param_type=input_spec.type,
                                  value=input_spec.default)
                for input_spec in pipeline_meta.inputs
            ]

        pipeline_spec = self._create_pipeline_spec(
            args_list_with_defaults,
            dsl_pipeline,
        )

        return pipeline_spec
コード例 #3
0
ファイル: compiler.py プロジェクト: mishraprafful/pipelines
  def _sanitize_and_inject_artifact(self, pipeline: dsl.Pipeline) -> None:
    """Sanitize operator/param names and inject pipeline artifact location. """

    # Sanitize operator names and param names
    sanitized_ops = {}

    for op in pipeline.ops.values():
      sanitized_name = sanitize_k8s_name(op.name)
      op.name = sanitized_name
      for param in op.outputs.values():
        param.name = sanitize_k8s_name(param.name, True)
        if param.op_name:
          param.op_name = sanitize_k8s_name(param.op_name)
      if op.output is not None and not isinstance(
          op.output, dsl._container_op._MultipleOutputsError):
        op.output.name = sanitize_k8s_name(op.output.name, True)
        op.output.op_name = sanitize_k8s_name(op.output.op_name)
      if op.dependent_names:
        op.dependent_names = [
            sanitize_k8s_name(name) for name in op.dependent_names
        ]
      if isinstance(op, dsl.ContainerOp) and op.file_outputs is not None:
        sanitized_file_outputs = {}
        for key in op.file_outputs.keys():
          sanitized_file_outputs[sanitize_k8s_name(key,
                                                   True)] = op.file_outputs[key]
        op.file_outputs = sanitized_file_outputs
      elif isinstance(op, dsl.ResourceOp) and op.attribute_outputs is not None:
        sanitized_attribute_outputs = {}
        for key in op.attribute_outputs.keys():
          sanitized_attribute_outputs[sanitize_k8s_name(key, True)] = \
            op.attribute_outputs[key]
        op.attribute_outputs = sanitized_attribute_outputs
      if isinstance(op, dsl.ContainerOp):
        if op.input_artifact_paths:
          op.input_artifact_paths = {
              sanitize_k8s_name(key, True): value
              for key, value in op.input_artifact_paths.items()
          }
        if op.artifact_arguments:
          op.artifact_arguments = {
              sanitize_k8s_name(key, True): value
              for key, value in op.artifact_arguments.items()
          }
      sanitized_ops[sanitized_name] = op
    pipeline.ops = sanitized_ops
コード例 #4
0
  def get_arguments_for_sub_group(
          self,
          sub_group: Union[OpsGroup, dsl._container_op.BaseOp],
          is_recursive_subgroup: Optional[bool],
          inputs: Dict[Text, Tuple[Text, Text]],
  ):
    arguments = []
    for param_name, dependent_name in inputs[sub_group.name]:
      if is_recursive_subgroup:
        for input_name, input in sub_group.arguments.items():
          if param_name == input.full_name:
            break
        referenced_input = sub_group.recursive_ref.arguments[input_name]
        argument_name = referenced_input.full_name
      else:
        argument_name = param_name

      # Preparing argument. It can be pipeline input reference, task output reference or loop item (or loop item attribute
      sanitized_loop_arg_full_name = '---'
      if isinstance(sub_group, dsl.ParallelFor):
        sanitized_loop_arg_full_name = sanitize_k8s_name(sub_group.loop_args.full_name)
      arg_ref_full_name = sanitize_k8s_name(param_name)
      # We only care about the reference to the current loop item, not the outer loops
      if isinstance(sub_group, dsl.ParallelFor) and arg_ref_full_name.startswith(sanitized_loop_arg_full_name):
        if arg_ref_full_name == sanitized_loop_arg_full_name:
          argument_value = '{{item}}'
        elif _for_loop.LoopArgumentVariable.name_is_loop_arguments_variable(param_name):
          subvar_name = _for_loop.LoopArgumentVariable.get_subvar_name(param_name)
          argument_value = '{{item.%s}}' % subvar_name
        else:
          raise ValueError("Argument seems to reference the loop item, but not the item itself and not some attribute of the item. param_name: {}, ".format(param_name))
      else:
        if dependent_name:
          argument_value = '{{tasks.%s.outputs.parameters.%s}}' % (dependent_name, param_name)
        else:
          argument_value = '{{inputs.parameters.%s}}' % param_name

      arguments.append({
        'name': argument_name,
        'value': argument_value,
      })

    arguments.sort(key=lambda x: x['name'])

    return arguments
コード例 #5
0
    def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None):
        """Schedule pipeline on kubeflow to run based upon a cron job

        Arguments:
            experiment_id {[type]} -- The expriment within which we would like kubeflow 
            job_name {[type]} -- The name of the scheduled job

        Keyword Arguments:
            pipeline_package_path {[type]} -- The path to the pipeline package (default: {None})
            params {dict} -- The pipeline parameters (default: {{}})
            pipeline_id {[type]} -- The id of the pipeline which should run on schedule (default: {None})
            namespace {[type]} -- The name space with which the pipeline should run (default: {None})
        """

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [kfp_server_api.ApiParameter(
            name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
            value=str(v)) for k, v in params.items()]
        resource_references = []

        key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
                                                   type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                               relationship=kfp_server_api.models.ApiRelationship.OWNER)
        resource_references.append(reference)
        if namespace is not None:
            key = kfp_server_api.models.ApiResourceKey(id=namespace,
                                                       type=kfp_server_api.models.ApiResourceType.NAMESPACE)
            reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                                   name=namespace,
                                                                   relationship=kfp_server_api.models.ApiRelationship.OWNER)
            resource_references.append(reference)
        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)

        trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule(
            cron="0 0 9 ? * 2-6")
        job_id = ''.join(random.choices(
            string.ascii_uppercase + string.digits, k=10))
        schedule_body = kfp_server_api.models.ApiJob(
            id=job_id,
            name="TestScheduling",
            description="Schedule the pipeline using the API",
            pipeline_spec=spec,
            resource_references=resource_references,
            max_concurrency=10,
            trigger=trigger,
            enabled=True,
        )
コード例 #6
0
ファイル: _client.py プロジェクト: spaparaju/pipelines
  def run_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None, namespace=None):
    """Run a specified pipeline.

    Args:
      experiment_id: The string id of an experiment.
      job_name: name of the job.
      pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: a dictionary with key (string) as param name and value (string) as as param value.
      pipeline_id: the string ID of a pipeline.
      namespace: kubernetes namespace where the pipeline runs are created.
        For single user deployment, leave it as None;
        For multi user, input a namespace where the user is authorized

    Returns:
      A run object. Most important field is id.
    """

    pipeline_json_string = None
    if pipeline_package_path:
      pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
      pipeline_json_string = json.dumps(pipeline_obj)
    api_params = [kfp_server_api.ApiParameter(
        name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
        value=str(v)) for k,v in params.items()]
    resource_references = []

    key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
                                        type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
    reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                           relationship=kfp_server_api.models.ApiRelationship.OWNER)
    resource_references.append(reference)
    if namespace is not None:
      key = kfp_server_api.models.ApiResourceKey(id=namespace,
                                                 type=kfp_server_api.models.ApiResourceType.NAMESPACE)
      reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                             name=namespace,
                                                             relationship=kfp_server_api.models.ApiRelationship.OWNER)
      resource_references.append(reference)
    spec = kfp_server_api.models.ApiPipelineSpec(
        pipeline_id=pipeline_id,
        workflow_manifest=pipeline_json_string,
        parameters=api_params)
    run_body = kfp_server_api.models.ApiRun(
        pipeline_spec=spec, resource_references=resource_references, name=job_name)

    response = self._run_api.create_run(body=run_body)

    if self._is_ipython():
      import IPython
      html = ('Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>'
              % (self._get_url_prefix(), response.run.id))
      IPython.display.display(IPython.display.HTML(html))
    return response.run
コード例 #7
0
    def _create_job_config(self, experiment_id, params, pipeline_package_path,
                           pipeline_id, version_id):
        """Create a JobConfig with spec and resource_references.

    Args:
      experiment_id: The id of an experiment.
      pipeline_package_path: Local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: A dictionary with key (string) as param name and value (string) as param value.
      pipeline_id: The id of a pipeline.
      version_id: The id of a pipeline version.
        If both pipeline_id and version_id are specified, version_id will take precendence.
        If only pipeline_id is specified, the default version of this pipeline is used to create the run.

    Returns:
      A JobConfig object with attributes spec and resource_reference.
    """
        class JobConfig:
            def __init__(self, spec, resource_references):
                self.spec = spec
                self.resource_references = resource_references

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [
            kfp_server_api.ApiParameter(
                name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
                value=str(v) if type(v) not in (list, dict) else json.dumps(v))
            for k, v in params.items()
        ]
        resource_references = []
        key = kfp_server_api.models.ApiResourceKey(
            id=experiment_id,
            type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(
            key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER)
        resource_references.append(reference)

        if version_id:
            key = kfp_server_api.models.ApiResourceKey(
                id=version_id,
                type=kfp_server_api.models.ApiResourceType.PIPELINE_VERSION)
            reference = kfp_server_api.models.ApiResourceReference(
                key=key,
                relationship=kfp_server_api.models.ApiRelationship.CREATOR)
            resource_references.append(reference)

        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)
        return JobConfig(spec=spec, resource_references=resource_references)
コード例 #8
0
    def run_pipeline(self,
                     experiment_id,
                     job_name,
                     pipeline_package_path=None,
                     params={},
                     pipeline_id=None):
        """Run a specified pipeline.

    Args:
      experiment_id: The string id of an experiment.
      job_name: name of the job.
      pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: a dictionary with key (string) as param name and value (string) as as param value.
      pipeline_id: the string ID of a pipeline.

    Returns:
      A run object. Most important field is id.
    """

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [
            kfp_server_api.ApiParameter(name=sanitize_k8s_name(k),
                                        value=str(v))
            for k, v in params.items()
        ]
        key = kfp_server_api.models.ApiResourceKey(
            id=experiment_id,
            type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(
            key, kfp_server_api.models.ApiRelationship.OWNER)
        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)
        run_body = kfp_server_api.models.ApiRun(
            pipeline_spec=spec, resource_references=[reference], name=job_name)

        response = self._run_api.create_run(body=run_body)

        if self._is_ipython():
            import IPython
            html = (
                'Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>'
                % (self._get_url_prefix(), response.run.id))
            IPython.display.display(IPython.display.HTML(html))
        return response.run
コード例 #9
0
ファイル: _client.py プロジェクト: spaparaju/pipelines
  def schedule_pipeline(self, experiment_id, job_name, pipeline_package_path=None, params={}, pipeline_id=None,
    namespace=None, cron_schedule=None, description=None, max_concurrency=10, no_catchup=None):
    """Schedule pipeline on kubeflow to run based upon a cron job

    Arguments:
        experiment_id {string} -- The expriment within which we would like kubeflow
        job_name {string} -- The name of the scheduled job

    Keyword Arguments:
        pipeline_package_path {string} -- The path to the pipeline package (default: {None})
        params {dict} -- The pipeline parameters (default: {{}})
        pipeline_id {string} -- The id of the pipeline which should run on schedule (default: {None})
        namespace {string} -- The name space with which the pipeline should run (default: {None})
        max_concurrency {int} -- Max number of concurrent runs scheduled (default: {10})
        no_catchup {boolean} -- Whether the recurring run should catch up if behind schedule.
          For example, if the recurring run is paused for a while and re-enabled
          afterwards. If no_catchup=False, the scheduler will catch up on (backfill) each
          missed interval. Otherwise, it only schedules the latest interval if more than one interval
          is ready to be scheduled.
          Usually, if your pipeline handles backfill internally, you should turn catchup
          off to avoid duplicate backfill. (default: {False})
    """

    pipeline_json_string = None
    if pipeline_package_path:
      pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
      pipeline_json_string = json.dumps(pipeline_obj)
    api_params = [kfp_server_api.ApiParameter(
        name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
        value=str(v)) for k,v in params.items()]
    resource_references = []

    key = kfp_server_api.models.ApiResourceKey(id=experiment_id,
                                        type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
    reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                           relationship=kfp_server_api.models.ApiRelationship.OWNER)
    resource_references.append(reference)
    if namespace is not None:
      key = kfp_server_api.models.ApiResourceKey(id=namespace,
                                                 type=kfp_server_api.models.ApiResourceType.NAMESPACE)
      reference = kfp_server_api.models.ApiResourceReference(key=key,
                                                             name=namespace,
                                                             relationship=kfp_server_api.models.ApiRelationship.OWNER)
      resource_references.append(reference)
    spec = kfp_server_api.models.ApiPipelineSpec(
        pipeline_id=pipeline_id,
        workflow_manifest=pipeline_json_string,
        parameters=api_params)

    trigger = kfp_server_api.models.api_cron_schedule.ApiCronSchedule(cron=cron_schedule) #Example:cron_schedule="0 0 9 ? * 2-6"
    job_id = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
    schedule_body = kfp_server_api.models.ApiJob(
        id=job_id,
        name=job_name,
        description=description,
        pipeline_spec=spec,
        resource_references=resource_references,
        max_concurrency=max_concurrency,
        no_catchup=no_catchup,
        trigger=trigger,
        enabled=True,
        )
    #[TODO] Add link to the scheduled job.
    response = self._job_api.create_job(body=schedule_body)
コード例 #10
0
ファイル: compiler.py プロジェクト: AnkilP/kfp-tekton
  def _create_workflow(self,
      pipeline_func: Callable,
      pipeline_name: Text=None,
      pipeline_description: Text=None,
      params_list: List[dsl.PipelineParam]=None,
      pipeline_conf: dsl.PipelineConf = None,
      ) -> List[Dict[Text, Any]]:  # Tekton change, signature
    """ Internal implementation of create_workflow."""
    params_list = params_list or []
    argspec = inspect.getfullargspec(pipeline_func)

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _extract_pipeline_metadata(pipeline_func)
    pipeline_meta.name = pipeline_name or pipeline_meta.name
    pipeline_meta.description = pipeline_description or pipeline_meta.description
    pipeline_name = sanitize_k8s_name(pipeline_meta.name)

    # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
    # will be resolved immediately in place when being to each component.
    default_param_values = {}
    for param in params_list:
      default_param_values[param.name] = param.value
      param.value = None

    # Currently only allow specifying pipeline params at one place.
    if params_list and pipeline_meta.inputs:
      raise ValueError('Either specify pipeline params in the pipeline function, or in "params_list", but not both.')

    args_list = []
    for arg_name in argspec.args:
      arg_type = None
      for input in pipeline_meta.inputs or []:
        if arg_name == input.name:
          arg_type = input.type
          break
      args_list.append(dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type))

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list)

    pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified.

    self._validate_exit_handler(dsl_pipeline)
    self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

    # Fill in the default values.
    args_list_with_defaults = []
    if pipeline_meta.inputs:
      args_list_with_defaults = [dsl.PipelineParam(sanitize_k8s_name(arg_name, True))
                                 for arg_name in argspec.args]
      if argspec.defaults:
        for arg, default in zip(reversed(args_list_with_defaults), reversed(argspec.defaults)):
          arg.value = default.value if isinstance(default, dsl.PipelineParam) else default
    elif params_list:
      # Or, if args are provided by params_list, fill in pipeline_meta.
      for param in params_list:
        param.value = default_param_values[param.name]

      args_list_with_defaults = params_list
      pipeline_meta.inputs = [
        InputSpec(
            name=param.name,
            type=param.param_type,
            default=param.value) for param in params_list]

    op_transformers = [add_pod_env]
    op_transformers.extend(pipeline_conf.op_transformers)

    workflow = self._create_pipeline_workflow(
        args_list_with_defaults,
        dsl_pipeline,
        op_transformers,
        pipeline_conf,
    )

    from ._data_passing_rewriter import fix_big_data_passing
    workflow = fix_big_data_passing(workflow)

    import json
    pipeline = [item for item in workflow if item["kind"] == "Pipeline"][0]  # Tekton change
    pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True)

    return workflow
コード例 #11
0
ファイル: compiler.py プロジェクト: mishraprafful/pipelines
  def _create_pipeline_v2(
      self,
      pipeline_func: Callable[..., Any],
      pipeline_root: Optional[str] = None,
      pipeline_name: Optional[str] = None,
      pipeline_parameters_override: Optional[Mapping[str, Any]] = None,
  ) -> pipeline_spec_pb2.PipelineJob:
    """Creates a pipeline instance and constructs the pipeline spec from it.

    Args:
      pipeline_func: Pipeline function with @dsl.pipeline decorator.
      pipeline_root: The root of the pipeline outputs. Optional.
      pipeline_name: The name of the pipeline. Optional.
      pipeline_parameters_override: The mapping from parameter names to values.
        Optional.

    Returns:
      A PipelineJob proto representing the compiled pipeline.
    """

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _python_op._extract_component_interface(pipeline_func)
    pipeline_name = pipeline_name or pipeline_meta.name

    pipeline_root = pipeline_root or getattr(pipeline_func, 'output_directory',
                                             None)
    if not pipeline_root:
      warnings.warn('pipeline_root is None or empty. A valid pipeline_root '
                    'must be provided at job submission.')

    args_list = []
    signature = inspect.signature(pipeline_func)
    for arg_name in signature.parameters:
      arg_type = None
      for pipeline_input in pipeline_meta.inputs or []:
        if arg_name == pipeline_input.name:
          arg_type = pipeline_input.type
          break
      args_list.append(
          dsl.PipelineParam(
              sanitize_k8s_name(arg_name, True), param_type=arg_type))

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list)

    self._sanitize_and_inject_artifact(dsl_pipeline)

    # Fill in the default values.
    args_list_with_defaults = []
    if pipeline_meta.inputs:
      args_list_with_defaults = [
          dsl.PipelineParam(
              sanitize_k8s_name(input_spec.name, True),
              param_type=input_spec.type,
              value=input_spec.default) for input_spec in pipeline_meta.inputs
      ]

    # Making the pipeline group name unique to prevent name clashes with templates
    pipeline_group = dsl_pipeline.groups[0]
    temp_pipeline_group_name = uuid.uuid4().hex
    pipeline_group.name = temp_pipeline_group_name

    pipeline_spec = self._create_pipeline_spec(
        args_list_with_defaults,
        dsl_pipeline,
    )

    pipeline_parameters = {
        param.name: param for param in args_list_with_defaults
    }
    # Update pipeline parameters override if there were any.
    pipeline_parameters_override = pipeline_parameters_override or {}
    for k, v in pipeline_parameters_override.items():
      if k not in pipeline_parameters:
        raise ValueError('Pipeline parameter {} does not match any known '
                         'pipeline argument.'.format(k))
      pipeline_parameters[k].value = v

    runtime_config = compiler_utils.build_runtime_config_spec(
        output_directory=pipeline_root, pipeline_parameters=pipeline_parameters)
    pipeline_job = pipeline_spec_pb2.PipelineJob(runtime_config=runtime_config)
    pipeline_job.pipeline_spec.update(json_format.MessageToDict(pipeline_spec))

    return pipeline_job
コード例 #12
0
    def _create_pipeline(
        self,
        pipeline_func: Callable[..., Any],
        output_directory: str,
        pipeline_name: Optional[str] = None,
        pipeline_parameters_override: Optional[Mapping[str, Any]] = None,
    ) -> pipeline_spec_pb2.PipelineJob:
        """Creates a pipeline instance and constructs the pipeline spec from it.

    Args:
      pipeline_func: Pipeline function with @dsl.pipeline decorator.
      pipeline_name: The name of the pipeline. Optional.
      output_directory: The root of the pipeline outputs.
      pipeline_parameters_override: The mapping from parameter names to values.
        Optional.

    Returns:
      A PipelineJob proto representing the compiled pipeline.
    """

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = _python_op._extract_component_interface(pipeline_func)
        pipeline_name = pipeline_name or pipeline_meta.name

        args_list = []
        signature = inspect.signature(pipeline_func)
        for arg_name in signature.parameters:
            arg_type = None
            for pipeline_input in pipeline_meta.inputs or []:
                if arg_name == pipeline_input.name:
                    arg_type = pipeline_input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(input_spec.name, True),
                                  param_type=input_spec.type,
                                  value=input_spec.default)
                for input_spec in pipeline_meta.inputs
            ]

        pipeline_spec = self._create_pipeline_spec(
            args_list_with_defaults,
            dsl_pipeline,
        )

        pipeline_parameters = {
            arg.name: arg.value
            for arg in args_list_with_defaults
        }
        # Update pipeline parameters override if there were any.
        pipeline_parameters.update(pipeline_parameters_override or {})
        runtime_config = compiler_utils.build_runtime_config_spec(
            output_directory=output_directory,
            pipeline_parameters=pipeline_parameters)
        pipeline_job = pipeline_spec_pb2.PipelineJob(
            runtime_config=runtime_config)
        pipeline_job.pipeline_spec.update(
            json_format.MessageToDict(pipeline_spec))

        return pipeline_job
コード例 #13
0
    def run_pipeline(self,
                     experiment_id,
                     job_name,
                     pipeline_package_path=None,
                     params={},
                     pipeline_id=None,
                     version_id=None):
        """Run a specified pipeline.

    Args:
      experiment_id: The string id of an experiment.
      job_name: name of the job.
      pipeline_package_path: local path of the pipeline package(the filename should end with one of the following .tar.gz, .tgz, .zip, .yaml, .yml).
      params: a dictionary with key (string) as param name and value (string) as as param value.
      pipeline_id: the string ID of a pipeline.
      version_id: the string ID of a pipeline version. 
        If both pipeline_id and version_id are specified, pipeline_id will take precendence
        This will change in a future version, so it is recommended to use version_id by itself

    Returns:
      A run object. Most important field is id.
    """

        pipeline_json_string = None
        if pipeline_package_path:
            pipeline_obj = self._extract_pipeline_yaml(pipeline_package_path)
            pipeline_json_string = json.dumps(pipeline_obj)
        api_params = [
            kfp_server_api.ApiParameter(
                name=sanitize_k8s_name(name=k, allow_capital_underscore=True),
                value=str(v)) for k, v in params.items()
        ]
        resource_references = []
        key = kfp_server_api.models.ApiResourceKey(
            id=experiment_id,
            type=kfp_server_api.models.ApiResourceType.EXPERIMENT)
        reference = kfp_server_api.models.ApiResourceReference(
            key=key, relationship=kfp_server_api.models.ApiRelationship.OWNER)
        resource_references.append(reference)

        if version_id:
            key = kfp_server_api.models.ApiResourceKey(
                id=version_id,
                type=kfp_server_api.models.ApiResourceType.PIPELINE_VERSION)
            reference = kfp_server_api.models.ApiResourceReference(
                key=key,
                relationship=kfp_server_api.models.ApiRelationship.CREATOR)
            resource_references.append(reference)

        spec = kfp_server_api.models.ApiPipelineSpec(
            pipeline_id=pipeline_id,
            workflow_manifest=pipeline_json_string,
            parameters=api_params)
        run_body = kfp_server_api.models.ApiRun(
            pipeline_spec=spec,
            resource_references=resource_references,
            name=job_name)

        response = self._run_api.create_run(body=run_body)

        if self._is_ipython():
            import IPython
            html = (
                'Run link <a href="%s/#/runs/details/%s" target="_blank" >here</a>'
                % (self._get_url_prefix(), response.run.id))
            IPython.display.display(IPython.display.HTML(html))
        return response.run
コード例 #14
0
  def _create_workflow(
      self,
      pipeline_func: Callable,
      pipeline_name: Optional[Text] = None,
      pipeline_description: Optional[Text] = None,
      params_list: Optional[List[dsl.PipelineParam]] = None,
      pipeline_conf: Optional[dsl.PipelineConf] = None,
  ) -> Dict[Text, Any]:
    """ Internal implementation of create_workflow."""
    params_list = params_list or []

    # Create the arg list with no default values and call pipeline function.
    # Assign type information to the PipelineParam
    pipeline_meta = _extract_pipeline_metadata(pipeline_func)
    pipeline_meta.name = pipeline_name or pipeline_meta.name
    pipeline_meta.description = pipeline_description or pipeline_meta.description
    pipeline_name = sanitize_k8s_name(pipeline_meta.name)

    # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
    # will be resolved immediately in place when being to each component.
    default_param_values = OrderedDict()

    if self._pipeline_root_param:
      params_list.append(self._pipeline_root_param)
    if self._pipeline_name_param:
      params_list.append(self._pipeline_name_param)

    for param in params_list:
      default_param_values[param.name] = param.value
      param.value = None

    args_list = []
    kwargs_dict = dict()
    signature = inspect.signature(pipeline_func)
    for arg_name, arg in signature.parameters.items():
      arg_type = None
      for input in pipeline_meta.inputs or []:
        if arg_name == input.name:
          arg_type = input.type
          break
      param = dsl.PipelineParam(sanitize_k8s_name(arg_name, True), param_type=arg_type)
      if arg.kind == inspect.Parameter.KEYWORD_ONLY:
        kwargs_dict[arg_name] = param
      else:
        args_list.append(param)

    with dsl.Pipeline(pipeline_name) as dsl_pipeline:
      pipeline_func(*args_list, **kwargs_dict)

    pipeline_conf = pipeline_conf or dsl_pipeline.conf # Configuration passed to the compiler is overriding. Unfortunately, it's not trivial to detect whether the dsl_pipeline.conf was ever modified.

    self._validate_exit_handler(dsl_pipeline)
    self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

    # Fill in the default values by merging two param lists.
    args_list_with_defaults = OrderedDict()
    if pipeline_meta.inputs:
      args_list_with_defaults = OrderedDict([
        (sanitize_k8s_name(input_spec.name, True), input_spec.default)
        for input_spec in pipeline_meta.inputs
      ])

    if params_list:
      # Or, if args are provided by params_list, fill in pipeline_meta.
      for k, v in default_param_values.items():
        args_list_with_defaults[k] = v

      pipeline_meta.inputs = pipeline_meta.inputs or []
      for param in params_list:
        pipeline_meta.inputs.append(
            InputSpec(
                name=param.name,
                type=param.param_type,
                default=default_param_values[param.name]))

    op_transformers = [add_pod_env]
    pod_labels = {_SDK_VERSION_LABEL: kfp.__version__, _SDK_ENV_LABEL:_SDK_ENV_DEFAULT}
    op_transformers.append(add_pod_labels(pod_labels))
    op_transformers.extend(pipeline_conf.op_transformers)

    if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE:
      # Add self._pipeline_name_param and self._pipeline_root_param to ops inputs
      # if they don't exist already.
      for op in dsl_pipeline.ops.values():
        insert_pipeline_name_param = True
        insert_pipeline_root_param = True
        for param in op.inputs:
          if param.name == self._pipeline_name_param.name:
            insert_pipeline_name_param = False
          elif param.name == self._pipeline_root_param.name:
            insert_pipeline_root_param = False

        if insert_pipeline_name_param:
          op.inputs.append(self._pipeline_name_param)
        if insert_pipeline_root_param:
          op.inputs.append(self._pipeline_root_param)

    workflow = self._create_pipeline_workflow(
        args_list_with_defaults,
        dsl_pipeline,
        op_transformers,
        pipeline_conf,
    )

    from ._data_passing_rewriter import fix_big_data_passing
    workflow = fix_big_data_passing(workflow)

    workflow = _data_passing_rewriter.add_pod_name_passing(
        workflow, str(self._pipeline_root_param or None))

    if pipeline_conf and pipeline_conf.data_passing_method != None:
      workflow = pipeline_conf.data_passing_method(workflow)

    metadata = workflow.setdefault('metadata', {})
    annotations = metadata.setdefault('annotations', {})
    labels = metadata.setdefault('labels', {})

    annotations[_SDK_VERSION_LABEL] = kfp.__version__
    annotations['pipelines.kubeflow.org/pipeline_compilation_time'] = datetime.datetime.now().isoformat()
    annotations['pipelines.kubeflow.org/pipeline_spec'] = json.dumps(pipeline_meta.to_dict(), sort_keys=True)

    if self._mode == dsl.PipelineExecutionMode.V2_COMPATIBLE:
      annotations['pipelines.kubeflow.org/v2_pipeline'] = "true"
      labels['pipelines.kubeflow.org/v2_pipeline'] = "true"


    # Labels might be logged better than annotations so adding some information here as well
    labels[_SDK_VERSION_LABEL] = kfp.__version__

    return workflow
コード例 #15
0
  def _group_to_dag_template(self, group, inputs, outputs, dependencies):
    """Generate template given an OpsGroup.

    inputs, outputs, dependencies are all helper dicts.
    """
    template = {'name': group.name}
    if group.parallelism != None:
      template["parallelism"] = group.parallelism

    # Generate inputs section.
    if inputs.get(group.name, None):
      template_inputs = [{'name': x[0]} for x in inputs[group.name]]
      template_inputs.sort(key=lambda x: x['name'])
      template['inputs'] = {
        'parameters': template_inputs
      }

    # Generate outputs section.
    if outputs.get(group.name, None):
      template_outputs = []
      for param_name, dependent_name in outputs[group.name]:
        template_outputs.append({
          'name': param_name,
          'valueFrom': {
            'parameter': '{{tasks.%s.outputs.parameters.%s}}' % (dependent_name, param_name)
          }
        })
      template_outputs.sort(key=lambda x: x['name'])
      template['outputs'] = {'parameters': template_outputs}

    # Generate tasks section.
    tasks = []
    sub_groups = group.groups + group.ops
    for sub_group in sub_groups:
      is_recursive_subgroup = (isinstance(sub_group, OpsGroup) and sub_group.recursive_ref)
      # Special handling for recursive subgroup: use the existing opsgroup name
      if is_recursive_subgroup:
        task = {
            'name': sub_group.recursive_ref.name,
            'template': sub_group.recursive_ref.name,
        }
      else:
        task = {
          'name': sub_group.name,
          'template': sub_group.name,
        }
      if isinstance(sub_group, dsl.OpsGroup) and sub_group.type == 'condition':
        subgroup_inputs = inputs.get(sub_group.name, [])
        condition = sub_group.condition
        operand1_value = self._resolve_value_or_reference(condition.operand1, subgroup_inputs)
        operand2_value = self._resolve_value_or_reference(condition.operand2, subgroup_inputs)
        if condition.operator in ['==', '!=']:
          operand1_value = '"' + operand1_value + '"'
          operand2_value = '"' + operand2_value + '"'
        task['when'] = '{} {} {}'.format(operand1_value, condition.operator, operand2_value)

      # Generate dependencies section for this task.
      if dependencies.get(sub_group.name, None):
        group_dependencies = list(dependencies[sub_group.name])
        group_dependencies.sort()
        task['dependencies'] = group_dependencies

      # Generate arguments section for this task.
      if inputs.get(sub_group.name, None):
        task['arguments'] = {'parameters': self.get_arguments_for_sub_group(sub_group, is_recursive_subgroup, inputs)}

      # additional task modifications for withItems and withParam
      if isinstance(sub_group, dsl.ParallelFor):
        if sub_group.items_is_pipeline_param:
          # these loop args are a 'withParam' rather than 'withItems'.
          # i.e., rather than a static list, they are either the output of another task or were input
          # as global pipeline parameters

          pipeline_param = sub_group.loop_args.items_or_pipeline_param
          withparam_value = self._resolve_task_pipeline_param(pipeline_param, group.type)
          if pipeline_param.op_name:
            # these loop args are the output of another task
            if 'dependencies' not in task or task['dependencies'] is None:
              task['dependencies'] = []
            if sanitize_k8s_name(
                pipeline_param.op_name) not in task['dependencies'] and group.type != 'subgraph':
              task['dependencies'].append(
                  sanitize_k8s_name(pipeline_param.op_name))

          task['withParam'] = withparam_value
        else:
          # Need to sanitize the dict keys for consistency.
          loop_tasks = sub_group.loop_args.to_list_for_task_yaml()
          nested_pipeline_params = extract_pipelineparams_from_any(loop_tasks)

          # Set dependencies in case of nested pipeline_params
          map_to_tmpl_var = {str(p): self._resolve_task_pipeline_param(p, group.type) for p in nested_pipeline_params}
          for pipeline_param in nested_pipeline_params:
            if pipeline_param.op_name:
              # these pipeline_param are the output of another task
              if 'dependencies' not in task or task['dependencies'] is None:
                task['dependencies'] = []
              if sanitize_k8s_name(
                  pipeline_param.op_name) not in task['dependencies']:
                task['dependencies'].append(
                    sanitize_k8s_name(pipeline_param.op_name))

          sanitized_tasks = []
          if isinstance(loop_tasks[0], dict):
            for argument_set in loop_tasks:
              c_dict = {}
              for k, v in argument_set.items():
                c_dict[sanitize_k8s_name(k, True)] = v
              sanitized_tasks.append(c_dict)
          else:
            sanitized_tasks = loop_tasks
          # Replace pipeline param if map_to_tmpl_var not empty
          task['withItems'] = _process_obj(sanitized_tasks, map_to_tmpl_var) if map_to_tmpl_var else sanitized_tasks

        # We will sort dependencies to have determinitc yaml and thus stable tests
        if task.get('dependencies'):
          task['dependencies'].sort()

      tasks.append(task)
    tasks.sort(key=lambda x: x['name'])
    template['dag'] = {'tasks': tasks}
    return template
コード例 #16
0
  def _get_inputs_outputs(
          self,
          pipeline,
          root_group,
          op_groups,
          opsgroup_groups,
          condition_params,
          op_name_to_for_loop_op: Dict[Text, dsl.ParallelFor],
  ):
    """Get inputs and outputs of each group and op.

    Returns:
      A tuple (inputs, outputs).
      inputs and outputs are dicts with key being the group/op names and values being list of
      tuples (param_name, producing_op_name). producing_op_name is the name of the op that
      produces the param. If the param is a pipeline param (no producer op), then
      producing_op_name is None.
    """
    inputs = defaultdict(set)
    outputs = defaultdict(set)

    for op in pipeline.ops.values():
      # op's inputs and all params used in conditions for that op are both considered.
      for param in op.inputs + list(condition_params[op.name]):
        # if the value is already provided (immediate value), then no need to expose
        # it as input for its parent groups.
        if param.value:
          continue
        if param.op_name:
          upstream_op = pipeline.ops[param.op_name]
          upstream_groups, downstream_groups = \
            self._get_uncommon_ancestors(op_groups, opsgroup_groups, upstream_op, op)
          for i, group_name in enumerate(downstream_groups):
            if i == 0:
              # If it is the first uncommon downstream group, then the input comes from
              # the first uncommon upstream group.
              inputs[group_name].add((param.full_name, upstream_groups[0]))
            else:
              # If not the first downstream group, then the input is passed down from
              # its ancestor groups so the upstream group is None.
              inputs[group_name].add((param.full_name, None))
          for i, group_name in enumerate(upstream_groups):
            if i == len(upstream_groups) - 1:
              # If last upstream group, it is an operator and output comes from container.
              outputs[group_name].add((param.full_name, None))
            else:
              # If not last upstream group, output value comes from one of its child.
              outputs[group_name].add((param.full_name, upstream_groups[i+1]))
        else:
          if not op.is_exit_handler:
            for group_name in op_groups[op.name][::-1]:
              # if group is for loop group and param is that loop's param, then the param
              # is created by that for loop ops_group and it shouldn't be an input to
              # any of its parent groups.
              inputs[group_name].add((param.full_name, None))
              if group_name in op_name_to_for_loop_op:
                # for example:
                #   loop_group.loop_args.name = 'loop-item-param-99ca152e'
                #   param.name =                'loop-item-param-99ca152e--a'
                loop_group = op_name_to_for_loop_op[group_name]
                if loop_group.loop_args.name in param.name:
                  break


    # Generate the input/output for recursive opsgroups
    # It propagates the recursive opsgroups IO to their ancester opsgroups
    def _get_inputs_outputs_recursive_opsgroup(group):
      #TODO: refactor the following codes with the above
      if group.recursive_ref:
        params = [(param, False) for param in group.inputs]
        params.extend([(param, True) for param in list(condition_params[group.name])])
        for param, is_condition_param in params:
          if param.value:
            continue
          full_name = param.full_name
          if param.op_name:
            upstream_op = pipeline.ops[param.op_name]
            upstream_groups, downstream_groups = \
              self._get_uncommon_ancestors(op_groups, opsgroup_groups, upstream_op, group)
            for i, g in enumerate(downstream_groups):
              if i == 0:
                inputs[g].add((full_name, upstream_groups[0]))
              # There is no need to pass the condition param as argument to the downstream ops.
              #TODO: this might also apply to ops. add a TODO here and think about it.
              elif i == len(downstream_groups) - 1 and is_condition_param:
                continue
              else:
                inputs[g].add((full_name, None))
            for i, g in enumerate(upstream_groups):
              if i == len(upstream_groups) - 1:
                outputs[g].add((full_name, None))
              else:
                outputs[g].add((full_name, upstream_groups[i+1]))
          elif not is_condition_param:
            for g in op_groups[group.name]:
              inputs[g].add((full_name, None))
      for subgroup in group.groups:
        _get_inputs_outputs_recursive_opsgroup(subgroup)

    _get_inputs_outputs_recursive_opsgroup(root_group)

    # Generate the input for SubGraph along with parallelfor
    for sub_graph in opsgroup_groups:
      if sub_graph in op_name_to_for_loop_op:
        # The opsgroup list is sorted with the farthest group as the first and
        # the opsgroup itself as the last. To get the latest opsgroup which is
        # not the opsgroup itself -2 is used.
        parent = opsgroup_groups[sub_graph][-2]
        if parent and parent.startswith('subgraph'):
          # propagate only op's pipeline param from subgraph to parallelfor
          loop_op = op_name_to_for_loop_op[sub_graph]
          pipeline_param = loop_op.loop_args.items_or_pipeline_param
          if loop_op.items_is_pipeline_param and pipeline_param.op_name:
            param_name = '%s-%s' % (
              sanitize_k8s_name(pipeline_param.op_name), pipeline_param.name)
            inputs[parent].add((param_name, pipeline_param.op_name))

    return inputs, outputs