예제 #1
0
def _get_copy_result_step_template(step_number: int, result_maps: list):
    """Base copy result step for moving Tekton result files around.

    Return a copy result step for moving Tekton result files around.

    Args:
        step_number {int}: step number
        result_maps {list}: list of maps bucketed with the result groups

    Returns:
        Dict[Text, Any]
    """
    args = [""]
    for key in result_maps[step_number].keys():
        sanitize_key = sanitize_k8s_name(key)
        args[0] += "mv %s%s $(results.%s.path);\n" % (
            TEKTON_HOME_RESULT_PATH, sanitize_key, sanitize_key)
    if step_number > 0:
        for key in result_maps[step_number - 1].keys():
            sanitize_key = sanitize_k8s_name(key)
            args[0] += "mv $(results.%s.path) %s%s;\n" % (
                sanitize_key, TEKTON_HOME_RESULT_PATH, sanitize_key)
    return {
        "name": "copy-results-%s" % str(step_number),
        "args": args,
        "command": ["sh", "-c"],
        "image": TEKTON_COPY_RESULTS_STEP_IMAGE
    }
예제 #2
0
 def test_sanitize_k8s_labels(self):
     labels = {
         "my.favorite/hobby":
         "Hobby? Passion! Football. Go to https://www.fifa.com/",
         "My other hobbies?": "eating; drinking. sleeping ;-)"
     }
     expected_labels = {
         "my.favorite/hobby":
         "Hobby-Passion-Football.-Go-to-https-www.fifa.com",
         "My-other-hobbies": "eating-drinking.-sleeping"
     }
     self.assertEqual(
         list(
             map(
                 lambda k: sanitize_k8s_name(k,
                                             allow_capital_underscore=True,
                                             allow_dot=True,
                                             allow_slash=True,
                                             max_length=253),
                 labels.keys())), list(expected_labels.keys()))
     self.assertEqual(
         list(
             map(
                 lambda v: sanitize_k8s_name(v,
                                             allow_capital_underscore=True,
                                             allow_dot=True,
                                             allow_slash=False,
                                             max_length=63),
                 labels.values())), list(expected_labels.values()))
예제 #3
0
def processOperand(operand) -> (str, str):
    if isinstance(operand, dsl.PipelineParam):
        return "results_" + sanitize_k8s_name(operand.op_name) + "_" + sanitize_k8s_name(operand.name), operand.op_name
    else:
        # Do the same as in _get_super_condition_template to check whehter it's int
        try:
            operand = int(operand)
        except:
            operand = '\'' + str(operand) + '\''
        return operand, None
예제 #4
0
def _process_output_artifacts(outputs_dict: Dict[Text, Any],
                              volume_mount_step_template: List[Dict[Text,
                                                                    Any]],
                              volume_template: List[Dict[Text, Any]],
                              replaced_param_list: List[Text],
                              artifact_to_result_mapping: Dict[Text, Any],
                              artifact_items: List[Any]):
    """Process output artifact dependencies to replicate the same behavior as Argo.

    For storing artifacts, we will need to provide the output artifact dependencies for the server to
    find and store the artifacts with the proper metadata.

    Args:
        outputs_dict {Dict[Text, Any]}: Dictionary of the possible parameters/artifacts in this task
        volume_mount_step_template {List[Dict[Text, Any]]}: Step template for the list of volume mounts
        volume_template {List[Dict[Text, Any]]}: Task template for the list of volumes
        replaced_param_list {List[Text]}: List of parameters that already set up as results
        artifact_to_result_mapping {Dict[Text, Any]}: Mapping between parameter and artifact results

    Returns:
        Dict[Text, Any]
    """

    if outputs_dict.get('artifacts'):
        mounted_artifact_paths = []
        for artifact in outputs_dict['artifacts']:
            artifact_name = artifact_to_result_mapping.get(
                artifact['name'], artifact['name'])
            if artifact['name'] in replaced_param_list:
                artifact_items.append([
                    artifact_name,
                    "$(results.%s.path)" % sanitize_k8s_name(artifact_name)
                ])
            else:
                artifact_items.append([artifact_name, artifact['path']])
                if artifact['path'].rsplit("/",
                                           1)[0] not in mounted_artifact_paths:
                    if artifact['path'].rsplit("/", 1)[0] == "":
                        raise ValueError(
                            'Undefined volume path or "/" path artifacts are not allowed.'
                        )
                    volume_mount_step_template.append({
                        'name':
                        sanitize_k8s_name(artifact['name']),
                        'mountPath':
                        artifact['path'].rsplit("/", 1)[0]
                    })
                    volume_template.append({
                        'name':
                        sanitize_k8s_name(artifact['name']),
                        'emptyDir': {}
                    })
                    mounted_artifact_paths.append(artifact['path'].rsplit(
                        "/", 1)[0])
예제 #5
0
def _add_mount_path(name: str,
                    path: str,
                    mount_path: str,
                    volume_mount_step_template: List[Dict[Text, Any]],
                    volume_template: List[Dict[Text, Any]],
                    mounted_param_paths: List[Text]):
    """
    Add emptyDir to the given mount_path for persisting files within the same tasks
    """
    volume_mount_step_template.append({'name': sanitize_k8s_name(name), 'mountPath': path.rsplit("/", 1)[0]})
    volume_template.append({'name': sanitize_k8s_name(name), 'emptyDir': {}})
    mounted_param_paths.append(mount_path)
예제 #6
0
def generate_id(name: str = None, length: int = 36) -> str:
    if name:
        # return name.lower().replace(" ", "-").replace("---", "-").replace("-–-", "–")
        return sanitize_k8s_name(name)
    else:
        # return ''.join([choice(ascii_letters + digits + '-') for n in range(length)])
        return ''.join([choice(hexdigits) for n in range(length)]).lower()
예제 #7
0
    def __init__(
        self,
        any: Iterable[Union[dsl.ContainerOp, ConditionOperator]],
        name: str = None,
    ):
        arguments = [
            "--namespace", "$(context.pipelineRun.namespace)", "--prName",
            "$(context.pipelineRun.name)"
        ]
        tasks_list = []
        condition_list = []
        for cop in any:
            if isinstance(cop, dsl.ContainerOp):
                cop_name = sanitize_k8s_name(cop.name)
                tasks_list.append(cop_name)
            elif isinstance(cop, ConditionOperator):
                condition_list.append(cop)
        if len(tasks_list) > 0:
            task_list_str = ",".join(tasks_list)
            arguments.extend(["--taskList", task_list_str])

        conditonArgs = processConditionArgs(condition_list)
        arguments.extend(conditonArgs)

        super().__init__(
            name=name,
            image=ANY_SEQUENCER_IMAGE,
            command="any-taskrun",
            arguments=arguments,
        )
예제 #8
0
 def test_sanitize_k8s_name_max_length(self):
     from string import ascii_lowercase, ascii_uppercase, digits, punctuation
     names = [
         "short-name with under_score and spaces",
         "very long name".replace("o", "o" * 300),
         digits + ascii_uppercase + punctuation + digits
     ]
     expected_names = [
         "short-name-with-under-score-and-spaces",
         "very-long-name".replace("o", "o" * 300),
         digits + ascii_lowercase + "-" + digits
     ]
     self.assertEqual([sanitize_k8s_name(name) for name in names],
                      [name[:63] for name in expected_names])
     self.assertEqual(
         [sanitize_k8s_name(sanitize_k8s_name(name)) for name in names],
         [name[:63] for name in expected_names])
예제 #9
0
 def test_sanitize_k8s_annotations(self):
     annotation_keys = {
         "sidecar.istio.io/inject",
     }
     expected_k8s_annotation_keys = {
         "sidecar.istio.io/inject",
     }
     self.assertEqual(
         [sanitize_k8s_name(key, allow_capital_underscore=True, allow_dot=True, allow_slash=True,
                            max_length=253) for key in annotation_keys],
         [key[:253] for key in expected_k8s_annotation_keys])
예제 #10
0
    def _sanitize_and_inject_artifact(self,
                                      pipeline: dsl.Pipeline,
                                      pipeline_conf=None):
        """Sanitize operator/param names and inject pipeline artifact location."""

        # Sanitize operator names and param names
        sanitized_ops = {}
        # pipeline level artifact location
        artifact_location = pipeline_conf.artifact_location

        for op in pipeline.ops.values():
            # inject pipeline level artifact location into if the op does not have
            # an artifact location config already.
            if hasattr(op, "artifact_location"):
                if artifact_location and not op.artifact_location:
                    op.artifact_location = artifact_location

            sanitized_name = sanitize_k8s_name(op.name)
            op.name = sanitized_name
            for param in op.outputs.values():
                param.name = sanitize_k8s_name(param.name, True)
                if param.op_name:
                    param.op_name = sanitize_k8s_name(param.op_name)
            if op.output is not None and not isinstance(
                    op.output, dsl._container_op._MultipleOutputsError):
                op.output.name = sanitize_k8s_name(op.output.name, True)
                op.output.op_name = sanitize_k8s_name(op.output.op_name)
            if op.dependent_names:
                op.dependent_names = [
                    sanitize_k8s_name(name) for name in op.dependent_names
                ]
            if isinstance(op, dsl.ContainerOp) and op.file_outputs is not None:
                sanitized_file_outputs = {}
                for key in op.file_outputs.keys():
                    sanitized_file_outputs[sanitize_k8s_name(
                        key, True)] = op.file_outputs[key]
                op.file_outputs = sanitized_file_outputs
            elif isinstance(
                    op, dsl.ResourceOp) and op.attribute_outputs is not None:
                sanitized_attribute_outputs = {}
                for key in op.attribute_outputs.keys():
                    sanitized_attribute_outputs[sanitize_k8s_name(key, True)] = \
                      op.attribute_outputs[key]
                op.attribute_outputs = sanitized_attribute_outputs
            sanitized_ops[sanitized_name] = op
        pipeline.ops = sanitized_ops
예제 #11
0
def after_any(container_ops: List[dsl.ContainerOp]):
    '''
    The function add a flag for any condition handler.
    '''
    tasks_list = []
    for cop in container_ops:
        cop_name = sanitize_k8s_name(cop.name)
        tasks_list.append(cop_name)
    task_list_str = ",".join(tasks_list)

    def _after_components(cop):
        cop.any_sequencer = {"tasks_list": task_list_str}
        return cop

    return _after_components
예제 #12
0
    def __init__(self,
                 any: Iterable[Union[dsl.ContainerOp, ConditionOperator]],
                 name: str = None,
                 statusPath: str = None,
                 skippingPolicy: str = None,
                 errorPolicy: str = None,
                 image: str = ANY_SEQUENCER_IMAGE):
        arguments = [
            "--namespace", "$(context.pipelineRun.namespace)", "--prName",
            "$(context.pipelineRun.name)"
        ]
        tasks_list = []
        condition_list = []
        file_outputs = None
        for cop in any:
            if isinstance(cop, dsl.ContainerOp):
                cop_name = sanitize_k8s_name(cop.name)
                tasks_list.append(cop_name)
            elif isinstance(cop, ConditionOperator):
                condition_list.append(cop)
        if len(tasks_list) > 0:
            task_list_str = ",".join(tasks_list)
            arguments.extend(["--taskList", task_list_str])
        if statusPath is not None:
            file_outputs = {"status": statusPath}
            arguments.extend(["--statusPath", statusPath])
            if skippingPolicy is not None:
                assert skippingPolicy == "skipOnNoMatch" or skippingPolicy == "errorOnNoMatch"
                arguments.extend(["--skippingPolicy", skippingPolicy])
            if errorPolicy is not None:
                assert errorPolicy == "continueOnError" or errorPolicy == "failOnError"
                arguments.extend(["--errorPolicy", errorPolicy])

        conditonArgs = processConditionArgs(condition_list)
        arguments.extend(conditonArgs)

        super().__init__(
            name=name,
            image=image,
            file_outputs=file_outputs,
            command="any-task",
            arguments=arguments,
        )
예제 #13
0
    def _sanitize_and_inject_artifact(self,
                                      pipeline: dsl.Pipeline,
                                      pipeline_conf=None):
        """Sanitize operator/param names and inject pipeline artifact location."""

        # Sanitize operator names and param names
        sanitized_ops = {}

        for op in pipeline.ops.values():
            sanitized_name = sanitize_k8s_name(op.name)
            op.name = sanitized_name
            for param in op.outputs.values():
                param.name = sanitize_k8s_name(param.name, True)
                if param.op_name:
                    param.op_name = sanitize_k8s_name(param.op_name)
            if op.output is not None and not isinstance(
                    op.output, dsl._container_op._MultipleOutputsError):
                op.output.name = sanitize_k8s_name(op.output.name, True)
                op.output.op_name = sanitize_k8s_name(op.output.op_name)
            if op.dependent_names:
                op.dependent_names = [
                    sanitize_k8s_name(name) for name in op.dependent_names
                ]
            if isinstance(op, dsl.ContainerOp) and op.file_outputs is not None:
                sanitized_file_outputs = {}
                for key in op.file_outputs.keys():
                    sanitized_file_outputs[sanitize_k8s_name(
                        key, True)] = op.file_outputs[key]
                op.file_outputs = sanitized_file_outputs
            elif isinstance(
                    op, dsl.ResourceOp) and op.attribute_outputs is not None:
                sanitized_attribute_outputs = {}
                for key in op.attribute_outputs.keys():
                    sanitized_attribute_outputs[sanitize_k8s_name(key, True)] = \
                      op.attribute_outputs[key]
                op.attribute_outputs = sanitized_attribute_outputs
            if isinstance(op, dsl.ContainerOp) and op.container is not None:
                sanitize_k8s_object(op.container)
            sanitized_ops[sanitized_name] = op
        pipeline.ops = sanitized_ops
예제 #14
0
    def __init__(
        self,
        any: List[dsl.ContainerOp],
        name: str = None,
    ):

        tasks_list = []
        for cop in any:
            cop_name = sanitize_k8s_name(cop.name)
            tasks_list.append(cop_name)
        task_list_str = ",".join(tasks_list)

        super().__init__(
            name=name,
            image="dspipelines/any-sequencer:latest",
            command="any-taskrun",
            arguments=[
                "-namespace", "$(context.pipelineRun.namespace)", "-prName",
                "$(context.pipelineRun.name)", "-taskList", task_list_str
            ],
        )
예제 #15
0
    def _group_to_dag_template(self, group, inputs, outputs, dependencies):
        """Generate template given an OpsGroup.
    inputs, outputs, dependencies are all helper dicts.
    """

        # Generate GroupOp template
        sub_group = group
        template = {
            'apiVersion': tekton_api_version,
            'metadata': {
                'name': sanitize_k8s_name(sub_group.name),
            },
            'spec': {}
        }

        # Generates a pseudo-template unique to conditions due to the catalog condition approach
        # where every condition is an extension of one super-condition
        if isinstance(sub_group,
                      dsl.OpsGroup) and sub_group.type == 'condition':
            subgroup_inputs = inputs.get(sub_group.name, [])
            condition = sub_group.condition

            operand1_value = self._resolve_value_or_reference(
                condition.operand1, subgroup_inputs)
            operand2_value = self._resolve_value_or_reference(
                condition.operand2, subgroup_inputs)

            template['kind'] = 'Condition'
            template['spec']['params'] = [{
                'name': 'operand1',
                'value': operand1_value
            }, {
                'name': 'operand2',
                'value': operand2_value
            }, {
                'name': 'operator',
                'value': str(condition.operator)
            }]

        return template
예제 #16
0
def _handle_tekton_custom_task(custom_task: dict, workflow: dict,
                               recursive_tasks: list, group_names: list):
    """
    Separate custom task's workflow from the main workflow, return a tuple result of custom task cr definitions
    and a new workflow

    Args:
      custom_task: dictionary with custom_task infomation, the format should be as below:
      {
        'kind': '',
        'task_list': [],
        'spec': {},
        'depends': []
      }
      workflow: a workflow without loop pipeline separeted.
      recursive_tasks: List of recursive_tasks information.
      group_names: List of name constructions for creating custom loop crd names.

    Returns:
      A tuple (custom_task_crs, workflow).
      custom_task_crs is a list of custom task cr definitions.
      and workflow is a dict which will not including the tasks in custom task definitions
    """
    custom_task_crs = []
    task_list = []
    tasks = workflow['spec']['pipelineSpec']['tasks']
    new_tasks = []
    dependencies = []
    # handle dependecies
    for key in custom_task.keys():
        dependencies.extend(custom_task[key]['depends'])
    for task in tasks:
        for dependency in dependencies:
            if task['name'] == dependency['org']:
                task_dependencies = [dependency['runAfter']]
                for dep_task in task.get('runAfter', []):
                    # should exclude the custom task itself for cases like graph
                    dep_task_trim = copy.copy(dep_task)
                    if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH:
                        dep_task_trim = sanitize_k8s_name(
                            dep_task,
                            max_length=LOOP_GROUP_NAME_LENGTH,
                            rev_truncate=True)
                    dep_task_with_prefix = '-'.join(group_names[:-1] +
                                                    [dep_task_trim])
                    if dep_task_with_prefix == dependency['runAfter']:
                        continue
                    if dep_task not in custom_task[
                            dependency['runAfter']]['task_list']:
                        task_dependencies.append(dep_task)
                task['runAfter'] = task_dependencies

    # process recursive tasks to match parameters
    for task in recursive_tasks:
        recursive_graph = custom_task.get(task['taskRef']['name'], {})
        if recursive_graph:
            if recursive_graph['spec']['params']:
                recursive_graph['spec']['params'] = sorted(
                    recursive_graph['spec']['params'], key=lambda k: k['name'])
            for param in recursive_graph['spec']['params']:
                recursive_params = [param['name'] for param in task['params']]
                if param['name'] not in recursive_params:
                    task['params'].append({
                        'name':
                        param['name'],
                        'value':
                        "$(params.%s)" % param['name']
                    })

    # get custom tasks
    for custom_task_key in custom_task.keys():
        denpendency_list = custom_task[custom_task_key]['spec'].get(
            'runAfter', [])
        task_list.extend(custom_task[custom_task_key]['task_list'])
        # generate custom task cr
        custom_task_cr_tasks = []
        for task in tasks:
            if task['name'] in custom_task[custom_task_key]['task_list']:
                for param in task.get('taskSpec', {}).get('params', []):
                    param['type'] = 'string'
                run_after_task_list = []
                for run_after_task in task.get('runAfter', []):
                    for recursive_task in recursive_tasks:
                        # The subset of the loop group name should be LOOP_GROUP_NAME_LENGTH minus 4 because the
                        # numbers of loop cannot exceed 1000 due to ETCD limitation.
                        if sanitize_k8s_name(recursive_task['name'], max_length=(LOOP_GROUP_NAME_LENGTH - 4), rev_truncate=True) \
                            in run_after_task and '-'.join(group_names[:-1]) not in run_after_task:
                            if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH:
                                run_after_task = sanitize_k8s_name(
                                    run_after_task,
                                    max_length=LOOP_GROUP_NAME_LENGTH,
                                    rev_truncate=True)
                            run_after_task = '-'.join(group_names[:-1] +
                                                      [run_after_task])
                            break
                    if run_after_task not in denpendency_list:
                        run_after_task_list.append(run_after_task)
                if task.get('runAfter', []):
                    task['runAfter'] = run_after_task_list
                custom_task_cr_tasks.append(task)
        # append recursive tasks
        for task in recursive_tasks:
            if task['name'] in custom_task[custom_task_key]['task_list']:
                custom_task_cr_tasks.append(task)
        # generator custom task cr
        custom_task_cr = {
            "apiVersion": "custom.tekton.dev/v1alpha1",
            "kind": 'custom_task_kind',
            "metadata": {
                "name": custom_task_key
            },
            "spec": {
                "pipelineSpec": {
                    "params": [{
                        "name": parm['name'],
                        'type': 'string'
                    } for parm in sorted(custom_task[custom_task_key]['spec']
                                         ['params'],
                                         key=lambda k: k['name'])],
                    "tasks":
                    custom_task_cr_tasks
                }
            }
        }

        # handle loop special case
        if custom_task[custom_task_key]['kind'] == 'loops':
            # if subvar exist, this is dict loop parameters
            # remove the loop_arg and add subvar args to the cr params
            if custom_task[custom_task_key]['loop_sub_args'] != []:
                refesh_cr_params = []
                for param in custom_task_cr['spec']['pipelineSpec']['params']:
                    if param['name'] != custom_task[custom_task_key][
                            'loop_args']:
                        refesh_cr_params.append(param)
                custom_task_cr['spec']['pipelineSpec'][
                    'params'] = refesh_cr_params
                custom_task_cr['spec']['pipelineSpec']['params'].extend(
                    [{
                        "name": sub_param,
                        'type': 'string'
                    } for sub_param in custom_task[custom_task_key]
                     ['loop_sub_args']])

            # add loop special filed
            custom_task_cr['kind'] = 'PipelineLoop'
            if custom_task[custom_task_key]['spec'].get(
                    'parallelism') is not None:
                custom_task_cr['spec']['parallelism'] = custom_task[
                    custom_task_key]['spec']['parallelism']
                # remove from pipeline run spec
                del custom_task[custom_task_key]['spec']['parallelism']
            custom_task_cr['spec']['iterateParam'] = custom_task[
                custom_task_key]['loop_args']
            for custom_task_param in custom_task[custom_task_key]['spec'][
                    'params']:
                if custom_task_param['name'] != custom_task[custom_task_key][
                        'loop_args'] and '$(tasks.' in custom_task_param[
                            'value']:
                    custom_task_cr = json.loads(
                        json.dumps(custom_task_cr).replace(
                            custom_task_param['value'],
                            '$(params.%s)' % custom_task_param['name']))

        # need to process task parameters to replace out of scope results
        # because nested graph cannot refer to task results outside of the sub-pipeline.
        custom_task_cr_task_names = [
            custom_task_cr_task['name']
            for custom_task_cr_task in custom_task_cr_tasks
        ]
        for task in custom_task_cr_tasks:
            for task_param in task.get('params', []):
                if '$(tasks.' in task_param['value']:
                    param_results = re.findall(
                        '\$\(tasks.([^ \t\n.:,;\{\}]+).results.([^ \t\n.:,;\{\}]+)\)',
                        task_param['value'])
                    for param_result in param_results:
                        if param_result[0] not in custom_task_cr_task_names:
                            task['params'] = json.loads(
                                json.dumps(task['params']).replace(
                                    task_param['value'],
                                    '$(params.%s-%s)' % param_result))
        custom_task_crs.append(custom_task_cr)
        custom_task[custom_task_key]['spec']['params'] = sorted(
            custom_task[custom_task_key]['spec']['params'],
            key=lambda k: k['name'])
        tasks.append(custom_task[custom_task_key]['spec'])

    # handle the nested custom task case
    # Need to be verified: nested custom task with tasks result as parameters
    nested_custom_tasks = []
    custom_task_crs_namelist = []
    for custom_task_key in custom_task.keys():
        if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH:
            sanitize_k8s_name(custom_task_key,
                              max_length=LOOP_GROUP_NAME_LENGTH,
                              rev_truncate=True)
        custom_task_crs_namelist.append(custom_task_key)
    for custom_task_key in custom_task.keys():
        for inner_task_name in custom_task[custom_task_key]['task_list']:
            inner_task_name_trimmed = copy.copy(inner_task_name)
            if len(group_names[-1]) <= LOOP_GROUP_NAME_LENGTH:
                inner_task_name_trimmed = sanitize_k8s_name(
                    inner_task_name,
                    max_length=LOOP_GROUP_NAME_LENGTH,
                    rev_truncate=True)
            inner_task_cr_name = '-'.join(group_names[:-1] +
                                          [inner_task_name_trimmed])
            if inner_task_cr_name in custom_task_crs_namelist:
                nested_custom_tasks.append({
                    "father_ct":
                    custom_task_key,
                    "nested_custom_task":
                    inner_task_cr_name
                })
    # Summary out all of the nested tasks relationships.
    for nested_custom_task in nested_custom_tasks:
        father_ct_name = nested_custom_task['father_ct']
        relationships = find_ancestors(nested_custom_tasks, father_ct_name, [],
                                       father_ct_name)
        nested_custom_task['ancestors'] = relationships['ancestors']
        nested_custom_task['root_ct'] = relationships['root_ct']

    for nested_custom_task in nested_custom_tasks:
        nested_custom_task_spec = custom_task[
            nested_custom_task['nested_custom_task']]['spec']
        for custom_task_cr in custom_task_crs:
            if custom_task_cr['metadata']['name'] == nested_custom_task[
                    'father_ct']:
                # handle parameters of nested custom task
                params_nested_custom_task = nested_custom_task_spec['params']
                # nested_custom_task_special_params = the global params that doesn't defined in parent custom task
                nested_custom_task_special_params = [
                    param for param in params_nested_custom_task
                    if '$(params.' in param['value'] and not bool([
                        True
                        for ct_param in custom_task_cr['spec']['pipelineSpec']
                        ['params'] if param['name'] in ct_param['name']
                    ])
                ]
                custom_task_cr['spec']['pipelineSpec']['params'].extend([{
                    'name':
                    param['name'],
                    'type':
                    'string'
                } for param in nested_custom_task_special_params])

                if nested_custom_task['ancestors']:
                    for custom_task_cr_again in custom_task_crs:
                        if custom_task_cr_again['metadata'][
                                'name'] in nested_custom_task[
                                    'ancestors'] or custom_task_cr_again[
                                        'metadata'][
                                            'name'] == nested_custom_task[
                                                'root_ct']:
                            custom_task_cr_again['spec']['pipelineSpec'][
                                'params'].extend(
                                    [{
                                        'name': param['name'],
                                        'type': 'string'
                                    } for param in
                                     nested_custom_task_special_params])
                            custom_task_cr_again['spec']['pipelineSpec'][
                                'params'] = sorted(custom_task_cr_again['spec']
                                                   ['pipelineSpec']['params'],
                                                   key=lambda k: k['name'])
                # add children params to the root tasks
                for task in tasks:
                    if task['name'] == nested_custom_task['root_ct']:
                        task['params'].extend(
                            copy.deepcopy(nested_custom_task_special_params))
                    elif task['name'] in nested_custom_task[
                            'ancestors'] or task['name'] == nested_custom_task[
                                'father_ct']:
                        task['params'].extend(
                            nested_custom_task_special_params)
                    if task.get('params') is not None:
                        task['params'] = sorted(task['params'],
                                                key=lambda k: k['name'])
                for special_param in nested_custom_task_special_params:
                    for nested_param in nested_custom_task_spec['params']:
                        if nested_param['name'] == special_param['name']:
                            nested_param[
                                'value'] = '$(params.%s)' % nested_param['name']
                # need process parameters to replace results
                custom_task_cr_task_names = [
                    cr_task['name'] for cr_task in custom_task_cr['spec']
                    ['pipelineSpec']['tasks']
                ]
                for nested_custom_task_param in nested_custom_task_spec[
                        'params']:
                    if '$(tasks.' in nested_custom_task_param['value']:
                        param_results = re.findall(
                            '\$\(tasks.([^ \t\n.:,;\{\}]+).results.([^ \t\n.:,;\{\}]+)\)',
                            nested_custom_task_param['value'])
                        for param_result in param_results:
                            if param_result[
                                    0] not in custom_task_cr_task_names:
                                custom_task_cr_param_names = [
                                    p['name'] for p in custom_task_cr['spec']
                                    ['pipelineSpec']['params']
                                ]
                                if nested_custom_task_param[
                                        'name'] not in custom_task_cr_param_names:
                                    for index, param in enumerate(
                                            nested_custom_task_spec['params']):
                                        if nested_custom_task_param[
                                                'name'] == param['name']:
                                            nested_custom_task_spec[
                                                'params'].pop(index)
                                            break
                                else:
                                    nested_custom_task_spec = json.loads(
                                        json.dumps(nested_custom_task_spec).
                                        replace(
                                            nested_custom_task_param['value'],
                                            '$(params.%s)' %
                                            nested_custom_task_param['name']))
                # add nested custom task spec to main custom task
                custom_task_cr['spec']['pipelineSpec']['tasks'].append(
                    nested_custom_task_spec)
                custom_task_cr['spec']['pipelineSpec']['params'] = sorted(
                    custom_task_cr['spec']['pipelineSpec']['params'],
                    key=lambda k: k['name'])

    # remove the tasks belong to custom task from main workflow
    task_name_prefix = '-'.join(group_names[:-1] + [""])
    for task in tasks:
        if task['name'].replace(task_name_prefix, "") not in task_list:
            task_list_trimmed = [
                sanitize_k8s_name(task,
                                  max_length=LOOP_GROUP_NAME_LENGTH,
                                  rev_truncate=True) for task in task_list
            ]
            if task['name'].replace(task_name_prefix,
                                    "") not in task_list_trimmed:
                new_tasks.append(task)
    workflow['spec']['pipelineSpec']['tasks'] = new_tasks
    return custom_task_crs, workflow
예제 #17
0
    def _workflow_with_pipelinerun(self, task_refs, pipeline,
                                   pipeline_template, workflow):
        """ Generate pipelinerun template """
        pipelinerun = {
            'apiVersion': tekton_api_version,
            'kind': 'PipelineRun',
            'metadata': {
                'name':
                sanitize_k8s_name(pipeline_template['metadata']['name'],
                                  suffix_space=4) + '-run',
                'annotation': {
                    'tekton.dev/output_artifacts':
                    json.dumps(self.output_artifacts),
                    'tekton.dev/input_artifacts':
                    json.dumps(self.input_artifacts)
                }
            },
            'spec': {
                'params': [{
                    'name': p['name'],
                    'value': p.get('default', '')
                } for p in pipeline_template['spec']['params']],
                'pipelineRef': {
                    'name': pipeline_template['metadata']['name']
                }
            }
        }

        # Generate TaskRunSpecs PodTemplate:s
        task_run_spec = []
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            task_spec = {
                "pipelineTaskName": task['name'],
                "taskPodTemplate": {}
            }
            if op.affinity:
                task_spec["taskPodTemplate"][
                    "affinity"] = convert_k8s_obj_to_json(op.affinity)
            if op.tolerations:
                task_spec["taskPodTemplate"]['tolerations'] = op.tolerations
            if op.node_selector:
                task_spec["taskPodTemplate"]['nodeSelector'] = op.node_selector
            if bool(task_spec["taskPodTemplate"]):
                task_run_spec.append(task_spec)
        if len(task_run_spec) > 0:
            pipelinerun['spec']['taskRunSpecs'] = task_run_spec

        # add workflow level timeout to pipeline run
        if pipeline.conf.timeout:
            pipelinerun['spec']['timeout'] = '%ds' % pipeline.conf.timeout

        # generate the Tekton podTemplate for image pull secret
        if len(pipeline.conf.image_pull_secrets) > 0:
            pipelinerun['spec']['podTemplate'] = pipelinerun['spec'].get(
                'podTemplate', {})
            pipelinerun['spec']['podTemplate']['imagePullSecrets'] = [{
                "name":
                s.name
            } for s in pipeline.conf.image_pull_secrets]

        workflow = workflow + [pipelinerun]

        return workflow
예제 #18
0
    def _create_workflow(
        self,
        pipeline_func: Callable,
        pipeline_name: Text = None,
        pipeline_description: Text = None,
        params_list: List[dsl.PipelineParam] = None,
        pipeline_conf: dsl.PipelineConf = None,
    ) -> List[Dict[Text, Any]]:  # Tekton change, signature
        """ Internal implementation of create_workflow."""
        params_list = params_list or []
        argspec = inspect.getfullargspec(pipeline_func)

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = _extract_pipeline_metadata(pipeline_func)
        pipeline_meta.name = pipeline_name or pipeline_meta.name
        pipeline_meta.description = pipeline_description or pipeline_meta.description
        pipeline_name = sanitize_k8s_name(pipeline_meta.name)

        # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
        # will be resolved immediately in place when being to each component.
        default_param_values = {}
        for param in params_list:
            default_param_values[param.name] = param.value
            param.value = None

        # Currently only allow specifying pipeline params at one place.
        if params_list and pipeline_meta.inputs:
            raise ValueError(
                'Either specify pipeline params in the pipeline function, or in "params_list", but not both.'
            )

        args_list = []
        for arg_name in argspec.args:
            arg_type = None
            for input in pipeline_meta.inputs or []:
                if arg_name == input.name:
                    arg_type = input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Configuration passed to the compiler is overriding. Unfortunately, it is
        # not trivial to detect whether the dsl_pipeline.conf was ever modified.
        pipeline_conf = pipeline_conf or dsl_pipeline.conf

        self._validate_exit_handler(dsl_pipeline)
        self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True))
                for arg_name in argspec.args
            ]
            if argspec.defaults:
                for arg, default in zip(reversed(args_list_with_defaults),
                                        reversed(argspec.defaults)):
                    arg.value = default.value if isinstance(
                        default, dsl.PipelineParam) else default
        elif params_list:
            # Or, if args are provided by params_list, fill in pipeline_meta.
            for param in params_list:
                param.value = default_param_values[param.name]

            args_list_with_defaults = params_list
            pipeline_meta.inputs = [
                InputSpec(name=param.name,
                          type=param.param_type,
                          default=param.value) for param in params_list
            ]

        op_transformers = [add_pod_env]
        op_transformers.extend(pipeline_conf.op_transformers)

        workflow = self._create_pipeline_workflow(
            args_list_with_defaults,
            dsl_pipeline,
            op_transformers,
            pipeline_conf,
        )

        from kfp_tekton.compiler._data_passing_rewriter import fix_big_data_passing
        workflow = fix_big_data_passing(workflow)

        import json
        pipeline = [item for item in workflow
                    if item["kind"] == "Pipeline"][0]  # Tekton change
        pipeline.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \
          json.dumps(pipeline_meta.to_dict(), sort_keys=True)

        return workflow
예제 #19
0
    def _get_loop_task(self, task: Dict, op_name_to_for_loop_op):
        """Get the list of task references which will flatten the loop parameters defined in pipeline.

    Args:
      task: ops template in pipeline.
      op_name_to_for_loop_op: a dictionary of ospgroup
    """
        # Get all the params in the task
        task_params_list = []
        for tp in task.get('params', []):
            task_params_list.append(tp)
        # Get the loop values for each param
        for tp in task_params_list:
            for loop_param in op_name_to_for_loop_op.values():
                loop_args = loop_param.loop_args
                if loop_args.name in tp['name']:
                    lpn = tp['name'].replace(loop_args.name, '').replace(
                        LoopArgumentVariable.SUBVAR_NAME_DELIMITER, '')
                    if lpn:
                        tp['loop-value'] = [
                            value[lpn]
                            for value in loop_args.items_or_pipeline_param
                        ]
                    else:
                        tp['loop-value'] = loop_args.items_or_pipeline_param
        # Get the task params list
        # 1. Get the task_params list without loop first
        loop_value = [
            p['loop-value'] for p in task_params_list if p.get('loop-value')
        ]
        task_params_without_loop = [
            p for p in task_params_list if not p.get('loop-value')
        ]
        # 2. Get the task_params list with loop
        loop_params = [p for p in task_params_list if p.get('loop-value')]
        for param in loop_params:
            del param['loop-value']
            del param['value']

        value_iter = list(itertools.product(*loop_value))
        value_iter_list = []
        for values in value_iter:
            opt = []
            for value in values:
                opt.append({"value": str(value)})
            value_iter_list.append(opt)
        {
            value[i].update(loop_params[i])
            for i in range(len(loop_params)) for value in value_iter_list
        }
        task_params_with_loop = value_iter_list
        # 3. combine task params
        list(a.extend(task_params_without_loop) for a in task_params_with_loop)
        task_params_all = task_params_with_loop
        # Get the task list based on params list
        task_list = []
        del task['params']
        task_name_suffix_length = len(LoopArguments.LOOP_ITEM_NAME_BASE
                                      ) + LoopArguments.NUM_CODE_CHARS + 2
        task_old_name = sanitize_k8s_name(task['name'],
                                          suffix_space=task_name_suffix_length)
        for i in range(len(task_params_all)):
            task['params'] = task_params_all[i]
            task['name'] = '%s-%s-%d' % (task_old_name,
                                         LoopArguments.LOOP_ITEM_NAME_BASE, i)
            task_list.append(copy.deepcopy(task))
            del task['params']
        return task_list
예제 #20
0
def _op_to_template(op: BaseOp,
                    pipelinerun_output_artifacts={},
                    artifact_items={}):
    """Generate template given an operator inherited from BaseOp."""

    # Display name
    if op.display_name:
        op.add_pod_annotation('pipelines.kubeflow.org/task_display_name',
                              op.display_name)

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [{
            'name': name,
            'path': path
        } for name, path in output_artifact_paths.items()]

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        # Calling containerOp step as "main" to align with Argo
        step = {'name': "main"}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

        # Create output artifact tracking annotation.
        for output_artifact in output_artifacts:
            output_annotation = pipelinerun_output_artifacts.get(
                processed_op.name, [])
            output_annotation.append({
                'name':
                output_artifact.get('name', ''),
                'path':
                output_artifact.get('path', ''),
                'key':
                "artifacts/$PIPELINERUN/%s/%s.tgz" %
                (processed_op.name, output_artifact.get('name', '').replace(
                    processed_op.name + '-', ''))
            })
            pipelinerun_output_artifacts[processed_op.name] = output_annotation

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # Leave artifacts for big data passing
        template['spec']['artifacts'] = inputs['artifacts']

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    artifact_items[op.name] = artifact_items.get(op.name, [])
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        _process_output_artifacts(outputs_dict, volume_mount_step_template,
                                  volume_template, replaced_param_list,
                                  artifact_to_result_mapping,
                                  artifact_items[op.name])
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253): value
                for key, value in processed_op.pod_annotations.items()
            }
        if processed_op.pod_labels:
            template['metadata']['labels'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253):
                sanitize_k8s_name(value,
                                  allow_capital_underscore=True,
                                  allow_dot=True)
                for key, value in processed_op.pod_labels.items()
            }

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volumes', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    if isinstance(op, dsl.ContainerOp
                  ) and op._metadata and GENERATE_COMPONENT_SPEC_ANNOTATIONS:
        component_spec_dict = op._metadata.to_dict()
        component_spec_digest = hashlib.sha256(
            json.dumps(component_spec_dict,
                       sort_keys=True).encode()).hexdigest()
        component_name = component_spec_dict.get('name', op.name)
        component_version = component_name + '@sha256=' + component_spec_digest
        digested_component_spec_dict = {
            'name': component_name,
            'outputs': component_spec_dict.get('outputs', []),
            'version': component_version
        }
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec_digest'] = \
            json.dumps(digested_component_spec_dict, sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \
                str(op.execution_options.caching_strategy.max_cache_staleness)

    # Sort and arrange results based on provided estimate size and process results in multi-steps if the result sizes are too big.
    result_size_map = "{}"
    if processed_op.pod_annotations:
        result_size_map = processed_op.pod_annotations.get(
            "tekton-result-sizes", "{}")
    # Only sort and arrange results when the estimated sizes are given.
    if result_size_map and result_size_map != "{}":
        try:
            result_size_map = json.loads(result_size_map)
        except ValueError:
            raise ("tekton-result-sizes annotation is not a valid JSON")
        # Normalize estimated result size keys.
        result_size_map = {
            sanitize_k8s_name(key, allow_capital_underscore=True): value
            for key, value in result_size_map.items()
        }
        # Sort key orders based on values
        result_size_map = dict(
            sorted(result_size_map.items(),
                   key=lambda item: item[1],
                   reverse=True))
        max_byte_size = 2048
        verified_result_size_map = {0: {}}
        op_result_names = [
            name['name'] for name in template['spec']['results']
        ]
        step_bins = {0: 0}
        step_counter = 0
        # Group result files to not exceed max_byte_size as a bin packing problem
        # Results are sorted from large to small, each value will loop over each bin to determine can it fit in the existing bins.
        for key, value in result_size_map.items():
            try:
                value = int(value)
            except ValueError:
                raise (
                    "Estimated value for result %s is %s, but it needs to be an integer."
                    % (key, value))
            if key in op_result_names:
                packed_index = -1
                # Look for bin that can fit the result value
                for i in range(len(step_bins)):
                    if step_bins[i] + value > max_byte_size:
                        continue
                    step_bins[i] = step_bins[i] + value
                    packed_index = i
                    break
                # If no bin can fit the value, create a new bin to store the value
                if packed_index < 0:
                    step_counter += 1
                    if value > max_byte_size:
                        logging.warning(
                            "The estimated size for parameter %s is %sB which is more than 2KB, "
                            "consider passing this value as artifact instead of output parameter."
                            % (key, str(value)))
                    step_bins[step_counter] = value
                    verified_result_size_map[step_counter] = {}
                    packed_index = step_counter
                verified_result_size_map[packed_index][key] = value
            else:
                logging.warning(
                    "The esitmated size for parameter %s does not exist in the task %s."
                    "Please correct the task annotations with the correct parameter key"
                    % (key, op.name))
        missing_param_estimation = []
        for result_name in op_result_names:
            if result_name not in result_size_map.keys():
                missing_param_estimation.append(result_name)
        if missing_param_estimation:
            logging.warning(
                "The following output parameter estimations are missing in task %s: Missing params: %s."
                % (op.name, missing_param_estimation))
        # Move results between the Tekton home and result directories if there are more than one step
        if step_counter > 0:
            for step in template['spec']['steps']:
                if step['name'] == 'main':
                    for key in result_size_map.keys():
                        # Replace main step results that are not in the first bin to the Tekton home path
                        if key not in verified_result_size_map[0].keys():
                            sanitize_key = sanitize_k8s_name(key)
                            for i, a in enumerate(step['args']):
                                a = a.replace(
                                    '$(results.%s.path)' % sanitize_key,
                                    '%s%s' %
                                    (TEKTON_HOME_RESULT_PATH, sanitize_key))
                                step['args'][i] = a
                            for i, c in enumerate(step['command']):
                                c = c.replace(
                                    '$(results.%s.path)' % sanitize_key,
                                    '%s%s' %
                                    (TEKTON_HOME_RESULT_PATH, sanitize_key))
                                step['command'][i] = c
            # Append new steps to move result files between each step, so Tekton controller can record all results without
            # exceeding the Kubernetes termination log limit.
            for i in range(1, step_counter + 1):
                copy_result_step = _get_copy_result_step_template(
                    i, verified_result_size_map)
                template['spec']['steps'].append(copy_result_step)
        # Update actifact item location to the latest stage in order to properly track and store all the artifacts.
        for i, artifact in enumerate(artifact_items[op.name]):
            if artifact[0] not in verified_result_size_map[step_counter].keys(
            ):
                artifact[1] = '%s%s' % (TEKTON_HOME_RESULT_PATH,
                                        sanitize_k8s_name(artifact[0]))
                artifact_items[op.name][i] = artifact
    return template
예제 #21
0
def _process_parameters(processed_op: BaseOp, template: Dict[Text, Any],
                        outputs_dict: Dict[Text, Any],
                        volume_mount_step_template: List[Dict[Text, Any]],
                        volume_template: List[Dict[Text, Any]],
                        replaced_param_list: List[Text],
                        artifact_to_result_mapping: Dict[Text, Any],
                        mounted_param_paths: List[Text]):
    """Process output parameters to replicate the same behavior as Argo.

    Since Tekton results need to be under /tekton/results. If file output paths cannot be
    configured to /tekton/results, we need to create the below copy step for moving
    file outputs to the Tekton destination. BusyBox is recommended to be used on
    small tasks because it's relatively lightweight and small compared to the ubuntu and
    bash images.

    - image: busybox
        name: copy-results
        script: |
            #!/bin/sh
            set -exo pipefail
            cp $LOCALPATH $(results.data.path);

    Args:
        processed_op {BaseOp}: class that inherits from BaseOp
        template {Dict[Text, Any]}: Task template
        outputs_dict {Dict[Text, Any]}: Dictionary of the possible parameters/artifacts in this task
        volume_mount_step_template {List[Dict[Text, Any]]}: Step template for the list of volume mounts
        volume_template {List[Dict[Text, Any]]}: Task template for the list of volumes
        replaced_param_list {List[Text]}: List of parameters that already set up as results
        artifact_to_result_mapping {Dict[Text, Any]}: Mapping between parameter and artifact results
        mounted_param_paths {List[Text]}: List of paths that already mounted to a volume.

    Returns:
        Dict[Text, Any]
    """
    if outputs_dict.get('parameters'):
        template['spec']['results'] = []
        copy_results_step = _get_base_step('copy-results')
        for name, path in processed_op.file_outputs.items():
            template['spec']['results'].append({
                'name': name,
                'description': path
            })
            # replace all occurrences of the output file path with the Tekton output parameter expression
            need_copy_step = True
            for s in template['spec']['steps']:
                if 'command' in s:
                    commands = []
                    for c in s['command']:
                        if path in c:
                            c = c.replace(
                                path,
                                '$(results.%s.path)' % sanitize_k8s_name(name))
                            need_copy_step = False
                        commands.append(c)
                    s['command'] = commands
                if 'args' in s:
                    args = []
                    for a in s['args']:
                        if path in a:
                            a = a.replace(
                                path,
                                '$(results.%s.path)' % sanitize_k8s_name(name))
                            need_copy_step = False
                        args.append(a)
                    s['args'] = args
                if path == '/tekton/results/' + sanitize_k8s_name(name):
                    need_copy_step = False
            # If file output path cannot be found/replaced, use emptyDir to copy it to the tekton/results path
            if need_copy_step:
                copy_results_step['script'] = copy_results_step['script'] + 'cp ' + path + ' $(results.%s.path);' \
                                                % sanitize_k8s_name(name) + '\n'
                mount_path = path.rsplit("/", 1)[0]
                if mount_path not in mounted_param_paths:
                    _add_mount_path(name, path, mount_path,
                                    volume_mount_step_template,
                                    volume_template, mounted_param_paths)
            # Record what artifacts are moved to result parameters.
            parameter_name = sanitize_k8s_name(processed_op.name + '-' + name,
                                               allow_capital_underscore=True,
                                               max_length=float('Inf'))
            replaced_param_list.append(parameter_name)
            artifact_to_result_mapping[parameter_name] = name
        return copy_results_step
    else:
        return {}
예제 #22
0
    def _create_pipeline_workflow(self, args, pipeline, op_transformers=None, pipeline_conf=None) \
            -> Dict[Text, Any]:
        """Create workflow for the pipeline."""
        # Input Parameters
        params = []
        for arg in args:
            param = {'name': arg.name}
            if arg.value is not None:
                if isinstance(arg.value, (list, tuple, dict)):
                    param['default'] = json.dumps(arg.value, sort_keys=True)
                else:
                    param['default'] = str(arg.value)
            params.append(param)

        # TODO: task templates?

        # generate Tekton tasks from pipeline ops
        raw_templates = self._create_dag_templates(pipeline, op_transformers,
                                                   params)

        # generate task and condition reference list for the Tekton Pipeline
        condition_refs = {}

        # TODO
        task_refs = []
        templates = []
        condition_task_refs = {}
        for template in raw_templates:
            # TODO Allow an opt-out for the condition_template
            if template['kind'] == 'Condition':
                condition_task_ref = [{
                    'name':
                    template['metadata']['name'],
                    'params': [{
                        'name': p['name'],
                        'value': p.get('value', '')
                    } for p in template['spec'].get('params', [])],
                    'taskSpec':
                    _get_super_condition_template(),
                }]
                condition_refs[template['metadata']['name']] = [{
                    'input':
                    '$(tasks.%s.results.status)' %
                    template['metadata']['name'],
                    'operator':
                    'in',
                    'values': ['true']
                }]
                condition_task_refs[template['metadata']
                                    ['name']] = condition_task_ref
            else:
                templates.append(template)
                task_ref = {
                    'name':
                    template['metadata']['name'],
                    'params': [{
                        'name': p['name'],
                        'value': p.get('default', '')
                    } for p in template['spec'].get('params', [])],
                    'taskSpec':
                    template['spec'],
                }

                if template['metadata'].get('labels', None):
                    task_ref['taskSpec']['metadata'] = task_ref[
                        'taskSpec'].get('metadata', {})
                    task_ref['taskSpec']['metadata']['labels'] = template[
                        'metadata']['labels']
                if template['metadata'].get('annotations', None):
                    task_ref['taskSpec']['metadata'] = task_ref[
                        'taskSpec'].get('metadata', {})
                    task_ref['taskSpec']['metadata']['annotations'] = template[
                        'metadata']['annotations']
                task_refs.append(task_ref)

        # process input parameters from upstream tasks for conditions and pair conditions with their ancestor conditions
        opsgroup_stack = [pipeline.groups[0]]
        condition_stack = [None]
        while opsgroup_stack:
            cur_opsgroup = opsgroup_stack.pop()
            most_recent_condition = condition_stack.pop()

            if cur_opsgroup.type == 'condition':
                condition_task_ref = condition_task_refs[cur_opsgroup.name][0]
                condition = cur_opsgroup.condition
                input_params = []

                # Process input parameters if needed
                if isinstance(condition.operand1, dsl.PipelineParam):
                    if condition.operand1.op_name:
                        operand_value = '$(tasks.' + condition.operand1.op_name + '.results.' + sanitize_k8s_name(
                            condition.operand1.name) + ')'
                    else:
                        operand_value = '$(params.' + condition.operand1.name + ')'
                    input_params.append(operand_value)
                if isinstance(condition.operand2, dsl.PipelineParam):
                    if condition.operand2.op_name:
                        operand_value = '$(tasks.' + condition.operand2.op_name + '.results.' + sanitize_k8s_name(
                            condition.operand2.name) + ')'
                    else:
                        operand_value = '$(params.' + condition.operand2.name + ')'
                    input_params.append(operand_value)
                for param_iter in range(len(input_params)):
                    condition_task_ref['params'][param_iter][
                        'value'] = input_params[param_iter]

            opsgroup_stack.extend(cur_opsgroup.groups)
            condition_stack.extend([
                most_recent_condition for x in range(len(cur_opsgroup.groups))
            ])
        # add task dependencies and add condition refs to the task ref that depends on the condition
        op_name_to_parent_groups = self._get_groups_for_ops(pipeline.groups[0])
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            parent_group = op_name_to_parent_groups.get(task['name'], [])
            if parent_group:
                if condition_refs.get(parent_group[-2], []):
                    task['when'] = condition_refs.get(
                        op_name_to_parent_groups[task['name']][-2], [])
            if op.dependent_names:
                task['runAfter'] = op.dependent_names

        # process input parameters from upstream tasks
        pipeline_param_names = [p['name'] for p in params]
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            for tp in task.get('params', []):
                if tp['name'] in pipeline_param_names:
                    tp['value'] = '$(params.%s)' % tp['name']
                else:
                    for pp in op.inputs:
                        if tp['name'] == pp.full_name:
                            tp['value'] = '$(tasks.%s.results.%s)' % (
                                pp.op_name, pp.name)
                            # Create input artifact tracking annotation
                            input_annotation = self.input_artifacts.get(
                                task['name'], [])
                            input_annotation.append({
                                'name': tp['name'],
                                'parent_task': pp.op_name
                            })
                            self.input_artifacts[
                                task['name']] = input_annotation
                            break

        # add retries params
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            if op.num_retries:
                task['retries'] = op.num_retries

        # add timeout params to task_refs, instead of task.
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            if not TEKTON_GLOBAL_DEFAULT_TIMEOUT or op.timeout:
                task['timeout'] = '%ds' % op.timeout

        # handle resourceOp cases in pipeline
        self._process_resourceOp(task_refs, pipeline)

        # handle exit handler in pipeline
        finally_tasks = []
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            if op.is_exit_handler:
                finally_tasks.append(task)
        task_refs = [
            task for task in task_refs
            if not pipeline.ops.get(task['name']).is_exit_handler
        ]

        # process loop parameters, keep this section in the behind of other processes, ahead of gen pipeline
        root_group = pipeline.groups[0]
        op_name_to_for_loop_op = self._get_for_loop_ops(root_group)
        if op_name_to_for_loop_op:
            for loop_param in op_name_to_for_loop_op.values():
                if loop_param.items_is_pipeline_param is True:
                    raise NotImplementedError(
                        "dynamic params are not yet implemented")
            include_loop_task_refs = []
            for task in task_refs:
                with_loop_task = self._get_loop_task(task,
                                                     op_name_to_for_loop_op)
                include_loop_task_refs.extend(with_loop_task)
            task_refs = include_loop_task_refs

        # Flatten condition task
        condition_task_refs_temp = []
        for condition_task_ref in condition_task_refs.values():
            for ref in condition_task_ref:
                condition_task_refs_temp.append(ref)
        condition_task_refs = condition_task_refs_temp
        # TODO: generate the PipelineRun template
        pipeline_run = {
            'apiVersion': tekton_api_version,
            'kind': 'PipelineRun',
            'metadata': {
                'name':
                sanitize_k8s_name(pipeline.name or 'Pipeline', suffix_space=4),
                # 'labels': get_default_telemetry_labels(),
                'annotations': {
                    'tekton.dev/output_artifacts':
                    json.dumps(self.output_artifacts, sort_keys=True),
                    'tekton.dev/input_artifacts':
                    json.dumps(self.input_artifacts, sort_keys=True),
                    'tekton.dev/artifact_bucket':
                    DEFAULT_ARTIFACT_BUCKET,
                    'tekton.dev/artifact_endpoint':
                    DEFAULT_ARTIFACT_ENDPOINT,
                    'tekton.dev/artifact_endpoint_scheme':
                    DEFAULT_ARTIFACT_ENDPOINT_SCHEME,
                    'tekton.dev/artifact_items':
                    json.dumps(self.artifact_items, sort_keys=True),
                    'sidecar.istio.io/inject':
                    'false'  # disable Istio inject since Tekton cannot run with Istio sidecar
                }
            },
            'spec': {
                'params': [{
                    'name': p['name'],
                    'value': p.get('default', '')
                } for p in params],
                'pipelineSpec': {
                    'params': params,
                    'tasks': task_refs + condition_task_refs,
                    'finally': finally_tasks
                }
            }
        }

        # TODO: pipelineRun additions

        # Generate TaskRunSpec PodTemplate:s
        task_run_spec = []
        for task in task_refs:

            # TODO: should loop-item tasks be included here?
            if LoopArguments.LOOP_ITEM_NAME_BASE in task['name']:
                task_name = re.sub(
                    r'-%s-.+$' % LoopArguments.LOOP_ITEM_NAME_BASE, '',
                    task['name'])
            else:
                task_name = task['name']
            op = pipeline.ops.get(task_name)
            if not op:
                raise RuntimeError("unable to find op with name '%s'" %
                                   task["name"])

            task_spec = {
                "pipelineTaskName": task['name'],
                "taskPodTemplate": {}
            }
            if op.affinity:
                task_spec["taskPodTemplate"][
                    "affinity"] = convert_k8s_obj_to_json(op.affinity)
            if op.tolerations:
                task_spec["taskPodTemplate"]['tolerations'] = op.tolerations
            if op.node_selector:
                task_spec["taskPodTemplate"]['nodeSelector'] = op.node_selector
            if bool(task_spec["taskPodTemplate"]):
                task_run_spec.append(task_spec)
        if len(task_run_spec) > 0:
            pipeline_run['spec']['taskRunSpecs'] = task_run_spec

        # add workflow level timeout to pipeline run
        if not TEKTON_GLOBAL_DEFAULT_TIMEOUT or pipeline.conf.timeout:
            pipeline_run['spec']['timeout'] = '%ds' % pipeline.conf.timeout

        # generate the Tekton podTemplate for image pull secret
        if len(pipeline.conf.image_pull_secrets) > 0:
            pipeline_run['spec']['podTemplate'] = pipeline_run['spec'].get(
                'podTemplate', {})
            pipeline_run['spec']['podTemplate']['imagePullSecrets'] = [{
                "name":
                s.name
            } for s in pipeline.conf.image_pull_secrets]

        workflow = pipeline_run

        return workflow
예제 #23
0
    def _create_workflow(
        self,
        pipeline_func: Callable,
        pipeline_name: Text = None,
        pipeline_description: Text = None,
        params_list: List[dsl.PipelineParam] = None,
        pipeline_conf: dsl.PipelineConf = None,
    ) -> Dict[Text, Any]:
        """ Internal implementation of create_workflow."""
        params_list = params_list or []
        argspec = inspect.getfullargspec(pipeline_func)

        # Create the arg list with no default values and call pipeline function.
        # Assign type information to the PipelineParam
        pipeline_meta = _extract_pipeline_metadata(pipeline_func)
        pipeline_meta.name = pipeline_name or pipeline_meta.name
        pipeline_meta.description = pipeline_description or pipeline_meta.description
        pipeline_name = sanitize_k8s_name(pipeline_meta.name)

        # Need to first clear the default value of dsl.PipelineParams. Otherwise, it
        # will be resolved immediately in place when being to each component.
        default_param_values = {}
        for param in params_list:
            default_param_values[param.name] = param.value
            param.value = None

        # Currently only allow specifying pipeline params at one place.
        if params_list and pipeline_meta.inputs:
            raise ValueError(
                'Either specify pipeline params in the pipeline function, or in "params_list", but not both.'
            )

        args_list = []
        for arg_name in argspec.args:
            arg_type = None
            for input in pipeline_meta.inputs or []:
                if arg_name == input.name:
                    arg_type = input.type
                    break
            args_list.append(
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True),
                                  param_type=arg_type))

        with dsl.Pipeline(pipeline_name) as dsl_pipeline:
            pipeline_func(*args_list)

        # Configuration passed to the compiler is overriding. Unfortunately, it is
        # not trivial to detect whether the dsl_pipeline.conf was ever modified.
        pipeline_conf = pipeline_conf or dsl_pipeline.conf

        self._validate_exit_handler(dsl_pipeline)
        self._sanitize_and_inject_artifact(dsl_pipeline, pipeline_conf)

        # Fill in the default values.
        args_list_with_defaults = []
        if pipeline_meta.inputs:
            args_list_with_defaults = [
                dsl.PipelineParam(sanitize_k8s_name(arg_name, True))
                for arg_name in argspec.args
            ]
            if argspec.defaults:
                for arg, default in zip(reversed(args_list_with_defaults),
                                        reversed(argspec.defaults)):
                    arg.value = default.value if isinstance(
                        default, dsl.PipelineParam) else default
        elif params_list:
            # Or, if args are provided by params_list, fill in pipeline_meta.
            for param in params_list:
                param.value = default_param_values[param.name]

            args_list_with_defaults = params_list
            pipeline_meta.inputs = [
                InputSpec(name=param.name,
                          type=param.param_type,
                          default=param.value) for param in params_list
            ]

        op_transformers = [add_pod_env]

        # # By default adds telemetry instruments. Users can opt out toggling
        # # allow_telemetry.
        # # Also, TFX pipelines will be bypassed for pipeline compiled by tfx>0.21.4.
        # if allow_telemetry:
        #   pod_labels = get_default_telemetry_labels()
        #   op_transformers.append(add_pod_labels(pod_labels))

        op_transformers.extend(pipeline_conf.op_transformers)

        workflow = self._create_pipeline_workflow(
            args_list_with_defaults,
            dsl_pipeline,
            op_transformers,
            pipeline_conf,
        )

        workflow = fix_big_data_passing(workflow)

        workflow.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/pipeline_spec'] = \
          json.dumps(pipeline_meta.to_dict(), sort_keys=True)

        # recursively strip empty structures, DANGER: this may remove necessary empty elements ?!
        def remove_empty_elements(obj) -> dict:
            if not isinstance(obj, (dict, list)):
                return obj
            if isinstance(obj, list):
                return [remove_empty_elements(o) for o in obj if o != []]
            return {
                k: remove_empty_elements(v)
                for k, v in obj.items() if v != []
            }

        workflow = remove_empty_elements(workflow)

        return workflow
def fix_big_data_passing(
    workflow: List[Dict[Text, Any]]
) -> List[Dict[Text, Any]]:  # Tekton change signature
    """
    fix_big_data_passing converts a workflow where some artifact data is passed
    as parameters and converts it to a workflow where this data is passed as
    artifacts.

    Args:
        workflow: The workflow to fix
    Returns:
        The fixed workflow

    Motivation:

    DSL compiler only supports passing Tekton parameters.
    Due to the convoluted nature of the DSL compiler, the artifact consumption
    and passing has been implemented on top of that using parameter passing.
    The artifact data is passed as parameters and the consumer template creates
    an artifact/file out of that data.
    Due to the limitations of Kubernetes and Tekton this scheme cannot pass data
    larger than few kilobytes preventing any serious use of artifacts.

    This function rewrites the compiled workflow so that the data consumed as
    artifact is passed as artifact.
    It also prunes the unused parameter outputs. This is important since if a
    big piece of data is ever returned through a file that is also output as
    parameter, the execution will fail.
    This makes is possible to pass large amounts of data.

    Implementation:

    1. Index the pipelines to understand how data is being passed and which inputs/outputs
       are connected to each other.
    2. Search for direct data consumers in container/resource templates and some
       pipeline task attributes (e.g. conditions and loops) to find out which inputs
       are directly consumed as parameters/artifacts.
    3. Propagate the consumption information upstream to all inputs/outputs all
       the way up to the data producers.
    4. Convert the inputs, outputs based on how they're consumed downstream.
    5. Use workspaces instead of result and params for big data passing.
    6. Added workspaces to tasks, pipelines, pipelineruns, if the parmas is big data.
    7. A PVC named with pipelinerun name will be created if big data is passed, as workspaces need to use it.
       User need to define proper volume or enable dynamic volume provisioning refer to the link of:
       https://kubernetes.io/docs/concepts/storage/dynamic-provisioning
    """

    workflow = copy.deepcopy(workflow)
    resource_templates = []
    for template in workflow:
        resource_params = [
            param.get('name')
            for param in template.get('spec', {}).get('params', [])
            if param.get('name') == 'action'
            or param.get('name') == 'success-condition'
        ]
        if 'action' in resource_params and 'success-condition' in resource_params:
            resource_templates.append(template)

    resource_template_names = set(
        template.get('metadata', {}).get('name')
        for template in resource_templates)

    container_templates = [
        template for template in workflow if template['kind'] == 'Task' and
        template.get('metadata', {}).get('name') not in resource_template_names
    ]

    pipeline_templates = [
        template for template in workflow if template['kind'] == 'Pipeline'
    ]

    pipelinerun_templates = [
        template for template in workflow if template['kind'] == 'PipelineRun'
    ]

    # 1. Index the pipelines to understand how data is being passed and which
    #  inputs/outputs are connected to each other.
    template_input_to_parent_pipeline_inputs = {
    }  # (task_template_name, task_input_name) -> Set[(pipeline_template_name, pipeline_input_name)]
    template_input_to_parent_task_outputs = {
    }  # (task_template_name, task_input_name) -> Set[(upstream_template_name, upstream_output_name)]
    template_input_to_parent_constant_arguments = {
    }  # (task_template_name, task_input_name) -> Set[argument_value] # Unused
    pipeline_output_to_parent_template_outputs = {
    }  # (pipeline_template_name, output_name) -> Set[(upstream_template_name, upstream_output_name)]

    for template in pipeline_templates:
        pipeline_template_name = template.get('metadata', {}).get('name')
        # Indexing task arguments
        pipeline_tasks = template.get('spec', {}).get('tasks', []) + template.get('spec', {}).get('finally', [])
        task_name_to_template_name = {
            task['name']: task['taskRef']['name']
            for task in pipeline_tasks
        }
        for task in pipeline_tasks:
            task_template_name = task['taskRef']['name']
            parameter_arguments = task['params']
            for parameter_argument in parameter_arguments:
                task_input_name = parameter_argument['name']
                argument_value = parameter_argument['value']

                argument_placeholder_parts = deconstruct_tekton_single_placeholder(
                    argument_value)
                if not argument_placeholder_parts:  # Argument is considered to be constant string
                    template_input_to_parent_constant_arguments.setdefault(
                        (task_template_name, task_input_name),
                        set()).add(argument_value)
                else:
                    placeholder_type = argument_placeholder_parts[0]
                    if placeholder_type not in ('params', 'outputs', 'tasks',
                                                'steps', 'workflow', 'pod',
                                                'item'):
                        # Do not fail on Jinja or other double-curly-brace templates
                        continue
                    if placeholder_type == 'params':
                        pipeline_input_name = argument_placeholder_parts[1]
                        template_input_to_parent_pipeline_inputs.setdefault(
                            (task_template_name, task_input_name), set()).add(
                                (pipeline_template_name, pipeline_input_name))
                    elif placeholder_type == 'tasks':
                        upstream_task_name = argument_placeholder_parts[1]
                        assert argument_placeholder_parts[2] == 'results'
                        upstream_output_name = argument_placeholder_parts[3]
                        upstream_template_name = task_name_to_template_name[
                            upstream_task_name]
                        template_input_to_parent_task_outputs.setdefault(
                            (task_template_name, task_input_name), set()).add(
                                (upstream_template_name, upstream_output_name))
                    elif placeholder_type == 'item' or placeholder_type == 'workflow' or placeholder_type == 'pod':
                        # workflow.parameters.* placeholders are not supported,
                        # but the DSL compiler does not produce those.
                        template_input_to_parent_constant_arguments.setdefault(
                            (task_template_name, task_input_name),
                            set()).add(argument_value)
                    else:
                        raise AssertionError

                pipeline_input_name = extract_tekton_input_parameter_name(
                    argument_value)
                if pipeline_input_name:
                    template_input_to_parent_pipeline_inputs.setdefault(
                        (task_template_name, task_input_name), set()).add(
                            (pipeline_template_name, pipeline_input_name))
                else:
                    template_input_to_parent_constant_arguments.setdefault(
                        (task_template_name, task_input_name),
                        set()).add(argument_value)
    # Finshed indexing the pipelines

    # 2. Search for direct data consumers in container/resource templates and some pipeline task attributes
    #  (e.g. conditions and loops) to find out which inputs are directly consumed as parameters/artifacts.
    inputs_directly_consumed_as_parameters = set()
    inputs_directly_consumed_as_artifacts = set()
    outputs_directly_consumed_as_parameters = set()

    # Searching for artifact input consumers in container template inputs
    for template in container_templates:
        template_name = template.get('metadata', {}).get('name')
        for input_artifact in template.get('spec', {}).get('artifacts', {}):
            raw_data = input_artifact['raw'][
                'data']  # The structure must exist
            # The raw data must be a single input parameter reference. Otherwise (e.g. it's a string
            #  or a string with multiple inputs) we should not do the conversion to artifact passing.
            input_name = extract_tekton_input_parameter_name(raw_data)
            if input_name:
                inputs_directly_consumed_as_artifacts.add(
                    (template_name, input_name))
                del input_artifact[
                    'raw']  # Deleting the "default value based" data passing hack
                # so that it's replaced by the "argument based" way of data passing.
                input_artifact[
                    'name'] = input_name  # The input artifact name should be the same
                # as the original input parameter name

    # Searching for parameter input consumers in pipeline templates
    # TODO: loop params is not support for tekton yet, refer to https://github.com/kubeflow/kfp-tekton/issues/82
    for template in pipeline_templates:
        template_name = template.get('metadata', {}).get('name')
        pipeline_tasks = template.get('spec', {}).get('tasks', []) + template.get('spec', {}).get('finally', [])
        task_name_to_template_name = {
            task['name']: task['taskRef']['name']
            for task in pipeline_tasks
        }
        for task in pipeline_tasks:
            # We do not care about the inputs mentioned in task arguments
            # since we will be free to switch them from parameters to artifacts
            task_without_arguments = task.copy()  # Shallow copy
            task_without_arguments.pop('params', None)
            placeholders = extract_all_tekton_placeholders(
                task_without_arguments)
            for placeholder in placeholders:
                parts = placeholder.split('.')
                placeholder_type = parts[0]
                if placeholder_type not in ('inputs', 'outputs', 'tasks',
                                            'steps', 'workflow', 'pod',
                                            'item'):
                    # Do not fail on Jinja or other double-curly-brace templates
                    continue
                if placeholder_type == 'inputs':
                    if parts[1] == 'parameters':
                        input_name = parts[2]
                        inputs_directly_consumed_as_parameters.add(
                            (template_name, input_name))
                    else:
                        raise AssertionError
                elif placeholder_type == 'tasks':
                    upstream_task_name = parts[1]
                    assert parts[2] == 'results'
                    upstream_output_name = parts[3]
                    upstream_template_name = task_name_to_template_name[
                        upstream_task_name]
                    outputs_directly_consumed_as_parameters.add(
                        (upstream_template_name, upstream_output_name))
                elif placeholder_type == 'workflow' or placeholder_type == 'pod':
                    pass
                elif placeholder_type == 'item':
                    raise AssertionError(
                        'The "{{item}}" placeholder is not expected outside task arguments.'
                    )
                else:
                    raise AssertionError(
                        'Unexpected placeholder type "{}".'.format(
                            placeholder_type))

    # Searching for parameter input consumers in container and resource templates
    for template in container_templates + resource_templates:
        template_name = template.get('metadata', {}).get('name')
        placeholders = extract_all_tekton_placeholders(template)
        for placeholder in placeholders:
            parts = placeholder.split('.')
            placeholder_type = parts[0]
            if placeholder_type not in ('inputs', 'outputs', 'tasks', 'steps',
                                        'workflow', 'pod', 'item', 'results'):
                # Do not fail on Jinja or other double-curly-brace templates
                continue

            if placeholder_type == 'workflow' or placeholder_type == 'pod':
                pass
            elif placeholder_type == 'inputs':
                if parts[1] == 'params':
                    input_name = parts[2]
                    inputs_directly_consumed_as_parameters.add(
                        (template_name, input_name))
                elif parts[1] == 'artifacts':
                    raise AssertionError(
                        'Found unexpected Tekton input artifact placeholder in container template: {}'
                        .format(placeholder))
                else:
                    raise AssertionError(
                        'Found unexpected Tekton input placeholder in container template: {}'
                        .format(placeholder))
            elif placeholder_type == 'results':
                input_name = parts[1]
                outputs_directly_consumed_as_parameters.add(
                    (template_name, input_name))
            else:
                raise AssertionError(
                    'Found unexpected Tekton placeholder in container template: {}'
                    .format(placeholder))

    # Finished indexing data consumers

    # 3. Propagate the consumption information upstream to all inputs/outputs all the way up to the data producers.
    inputs_consumed_as_parameters = set()
    inputs_consumed_as_artifacts = set()

    outputs_consumed_as_parameters = set()
    outputs_consumed_as_artifacts = set()

    def mark_upstream_ios_of_input(template_input, marked_inputs,
                                   marked_outputs):
        # Stopping if the input has already been visited to save time and handle recursive calls
        if template_input in marked_inputs:
            return
        marked_inputs.add(template_input)

        upstream_inputs = template_input_to_parent_pipeline_inputs.get(
            template_input, [])
        for upstream_input in upstream_inputs:
            mark_upstream_ios_of_input(upstream_input, marked_inputs,
                                       marked_outputs)

        upstream_outputs = template_input_to_parent_task_outputs.get(
            template_input, [])
        for upstream_output in upstream_outputs:
            mark_upstream_ios_of_output(upstream_output, marked_inputs,
                                        marked_outputs)

    def mark_upstream_ios_of_output(template_output, marked_inputs,
                                    marked_outputs):
        # Stopping if the output has already been visited to save time and handle recursive calls
        if template_output in marked_outputs:
            return
        marked_outputs.add(template_output)

        upstream_outputs = pipeline_output_to_parent_template_outputs.get(
            template_output, [])
        for upstream_output in upstream_outputs:
            mark_upstream_ios_of_output(upstream_output, marked_inputs,
                                        marked_outputs)

    for input in inputs_directly_consumed_as_parameters:
        mark_upstream_ios_of_input(input, inputs_consumed_as_parameters,
                                   outputs_consumed_as_parameters)
    for input in inputs_directly_consumed_as_artifacts:
        mark_upstream_ios_of_input(input, inputs_consumed_as_artifacts,
                                   outputs_consumed_as_artifacts)
    for output in outputs_directly_consumed_as_parameters:
        mark_upstream_ios_of_output(output, inputs_consumed_as_parameters,
                                    outputs_consumed_as_parameters)

    # 4. Convert the inputs, outputs and arguments based on how they're consumed downstream.
    # Add workspaces to pipeline and pipeline task_ref if big data passing
    pipeline_workspaces = set()
    pipelinerun_workspaces = set()
    output_tasks_consumed_as_artifacts = {
        output[0]
        for output in outputs_consumed_as_artifacts
    }
    # task_workspaces = set()
    for pipeline in pipeline_templates:
        # Converting pipeline inputs
        pipeline, pipeline_workspaces = big_data_passing_pipeline(
            pipeline, inputs_consumed_as_artifacts,
            output_tasks_consumed_as_artifacts)

    # Add workspaces to pipelinerun if big data passing
    # Check whether pipelinerun was generated, through error if not.
    if pipeline_workspaces:
        if not pipelinerun_templates:
            raise AssertionError(
                'Found big data passing, please enable generate_pipelinerun for your complier'
            )
        for pipelinerun in pipelinerun_templates:
            pipeline, pipelinerun_workspaces = big_data_passing_pipelinerun(
                pipelinerun, pipeline_workspaces)

    # Use workspaces to tasks if big data passing instead of 'results', 'copy-inputs'
    for task_template in container_templates:
        task_template = big_data_passing_tasks(task_template,
                                               inputs_consumed_as_artifacts,
                                               outputs_consumed_as_artifacts)

    # Create pvc for pipelinerun if big data passing.
    # As we used workspaces in tekton pipelines which depends on it.
    # User need to create PV manually, or enable dynamic volume provisioning, refer to the link of:
    # https://kubernetes.io/docs/concepts/storage/dynamic-provisioning
    # TODO: Remove PVC if Tekton version > = 0.12, use 'volumeClaimTemplate' instead
    if pipelinerun_workspaces:
        for pipelinerun in pipelinerun_workspaces:
            workflow.append(create_pvc(pipelinerun))

    # Remove input parameters unless they're used downstream.
    # This also removes unused container template inputs if any.
    for template in container_templates + pipeline_templates:
        spec = template.get('spec', {})
        spec['params'] = [
            input_parameter for input_parameter in spec.get('params', [])
            if (template.get('metadata', {}).get('name'),
                input_parameter['name']) in inputs_consumed_as_parameters
        ]

    # Remove output parameters unless they're used downstream
    for template in container_templates + pipeline_templates:
        spec = template.get('spec', {})
        spec['results'] = [
            output_parameter for output_parameter in spec.get('results', [])
            if (template.get('metadata', {}).get('name'),
                output_parameter['name']) in outputs_consumed_as_parameters
        ]
        # tekton results doesn't support underscore
        renamed_results_in_pipeline_task = set()
        for task_result in spec['results']:
            task_result_old_name = task_result.get('name')
            task_result_new_name = sanitize_k8s_name(task_result_old_name)
            if task_result_new_name != task_result_old_name:
                task_result['name'] = task_result_new_name
                renamed_results_in_pipeline_task.add(
                    (task_result_old_name, task_result_new_name))
        for renamed_result in renamed_results_in_pipeline_task:
            # Change results.downloaded_resultOutput to results.downloaded-resultoutput
            template['spec'] = replace_big_data_placeholder(
                spec, 'results.%s' % renamed_result[0],
                'results.%s' % renamed_result[1])

    # Remove pipeline task parameters unless they're used downstream
    for template in pipeline_templates:
        tasks = template.get('spec', {}).get('tasks', []) + template.get('spec', {}).get('finally', [])
        for task in tasks:
            task['params'] = [
                parameter_argument
                for parameter_argument in task.get('params', [])
                if (task['taskRef']['name'], parameter_argument['name']
                    ) in inputs_consumed_as_parameters and
                (task['taskRef']['name'], parameter_argument['name']
                 ) not in inputs_consumed_as_artifacts
                or task['taskRef']['name'] in resource_template_names
            ]

            # tekton results doesn't support underscore
            for argument in task['params']:
                argument_value = argument.get('value')
                argument_placeholder_parts = deconstruct_tekton_single_placeholder(
                    argument_value)
                if len(argument_placeholder_parts) == 4 \
                        and argument_placeholder_parts[0] == 'tasks':
                    argument['value'] = '$(tasks.%s.%s.%s)' % (
                        argument_placeholder_parts[1],
                        argument_placeholder_parts[2],
                        sanitize_k8s_name(argument_placeholder_parts[3]))

    # Need to confirm:
    # I didn't find the use cases to support workflow parameter consumed as artifacts downstream in tekton.
    # Whether this case need to be supporting?

    clean_up_empty_workflow_structures(workflow)
    return workflow
예제 #25
0
def _process_output_artifacts(outputs_dict: Dict[Text, Any],
                              volume_mount_step_template: List[Dict[Text,
                                                                    Any]],
                              volume_template: List[Dict[Text, Any]],
                              replaced_param_list: List[Text],
                              artifact_to_result_mapping: Dict[Text, Any]):
    """Process output artifacts to replicate the same behavior as Argo.

    For storing artifacts, we will be using the minio/mc image because we need to upload artifacts to any type of
    object storage and endpoint. The minio/mc is the best image suited for this task because the default KFP
    is using minio and it also works well with other s3/gcs type of storage.

    - image: minio/mc
        name: copy-artifacts
        script: |
            #!/usr/bin/env sh
            mc config host add storage http://minio-service.$NAMESPACE:9000 $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY
            mc cp /tmp/file.txt storage/$(inputs.params.bucket)/runs/$PIPELINERUN/$TASKRUN/file.txt

    Args:
        outputs_dict {Dict[Text, Any]}: Dictionary of the possible parameters/artifacts in this task
        volume_mount_step_template {List[Dict[Text, Any]]}: Step template for the list of volume mounts
        volume_template {List[Dict[Text, Any]]}: Task template for the list of volumes
        replaced_param_list {List[Text]}: List of parameters that already set up as results
        artifact_to_result_mapping {Dict[Text, Any]}: Mapping between parameter and artifact results

    Returns:
        Dict[Text, Any]
    """
    if outputs_dict.get('artifacts'):
        # TODO: Pull default values from KFP configmap when integrated with KFP.
        storage_location = outputs_dict['artifacts'][0].get('s3', {})
        insecure = storage_location.get("insecure", True)
        endpoint = storage_location.get("endpoint",
                                        "minio-service.$NAMESPACE:9000")
        # We want to use the insecure flag to figure out whether to use http or https scheme
        endpoint = re.sub(r"https?://", "", endpoint)
        endpoint = 'http://' + endpoint if insecure else 'https://' + endpoint
        access_key = storage_location.get("accessKeySecret", {
            "name": "mlpipeline-minio-artifact",
            "key": "accesskey"
        })
        secret_access_key = storage_location.get("secretKeySecret", {
            "name": "mlpipeline-minio-artifact",
            "key": "secretkey"
        })
        bucket = storage_location.get("bucket", "mlpipeline")
        copy_artifacts_step = {
            'image':
            'minio/mc',
            'name':
            'copy-artifacts',
            'script':
            textwrap.dedent('''\
                        #!/usr/bin/env sh
                        mc config host add storage %s $AWS_ACCESS_KEY_ID $AWS_SECRET_ACCESS_KEY
                        ''' % (endpoint)),
            'env': [{
                'name': 'PIPELINERUN',
                'valueFrom': {
                    'fieldRef': {
                        'fieldPath':
                        "metadata.labels['tekton.dev/pipelineRun']"
                    }
                }
            }, {
                'name': 'PIPELINETASK',
                'valueFrom': {
                    'fieldRef': {
                        'fieldPath':
                        "metadata.labels['tekton.dev/pipelineTask']"
                    }
                }
            }, {
                'name': 'NAMESPACE',
                'valueFrom': {
                    'fieldRef': {
                        'fieldPath': "metadata.namespace"
                    }
                }
            }, {
                'name': 'AWS_ACCESS_KEY_ID',
                'valueFrom': {
                    'secretKeyRef': {
                        'name': access_key['name'],
                        'key': access_key['key']
                    }
                }
            }, {
                'name': 'AWS_SECRET_ACCESS_KEY',
                'valueFrom': {
                    'secretKeyRef': {
                        'name': secret_access_key['name'],
                        'key': secret_access_key['key']
                    }
                }
            }]
        }
        mounted_artifact_paths = []
        for artifact in outputs_dict['artifacts']:
            if artifact['name'] in replaced_param_list:
                copy_artifacts_step['script'] = copy_artifacts_step['script'] + \
                    'tar -cvzf %s.tgz $(results.%s.path)\n' % (artifact['name'], artifact_to_result_mapping[artifact['name']]) + \
                    'mc cp %s.tgz storage/%s/runs/$PIPELINERUN/$PIPELINETASK/%s.tgz\n' % (artifact['name'],
                                                                                     bucket, artifact['name'])
            else:
                copy_artifacts_step['script'] = copy_artifacts_step['script'] + \
                    'tar -cvzf %s.tgz %s\n' % (artifact['name'], artifact['path']) + \
                    'mc cp %s.tgz storage/%s/runs/$PIPELINERUN/$PIPELINETASK/%s.tgz\n' % (artifact['name'], bucket, artifact['name'])
                if artifact['path'].rsplit("/",
                                           1)[0] not in mounted_artifact_paths:
                    volume_mount_step_template.append({
                        'name':
                        sanitize_k8s_name(artifact['name']),
                        'mountPath':
                        artifact['path'].rsplit("/", 1)[0]
                    })
                    volume_template.append({
                        'name':
                        sanitize_k8s_name(artifact['name']),
                        'emptyDir': {}
                    })
                    mounted_artifact_paths.append(artifact['path'].rsplit(
                        "/", 1)[0])
        return copy_artifacts_step
    else:
        return {}
예제 #26
0
def _validate_workflow(workflow: Dict[Text, Any]):

    # verify that all names and labels conform to kubernetes naming standards
    #   https://kubernetes.io/docs/concepts/overview/working-with-objects/names/
    #   https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/

    def _find_items(obj,
                    search_key,
                    current_path="",
                    results_dict=dict()) -> dict:
        if isinstance(obj, dict):
            if search_key in obj:
                results_dict.update(
                    {"%s.%s" % (current_path, search_key): obj[search_key]})
            for k, v in obj.items():
                _find_items(v, search_key, "%s.%s" % (current_path, k),
                            results_dict)
        elif isinstance(obj, list):
            for i, list_item in enumerate(obj):
                _find_items(list_item, search_key,
                            "%s[%i]" % (current_path, i), results_dict)
        return {k.lstrip("."): v for k, v in results_dict.items()}

    non_k8s_names = {
        path: name
        for path, name in _find_items(workflow, "name").items()
        if "metadata" in path and name != sanitize_k8s_name(name) or "param" in
        path and name != sanitize_k8s_name(name, allow_capital_underscore=True)
    }

    non_k8s_labels = {
        path: k_v_dict
        for path, k_v_dict in _find_items(workflow, "labels", "", {}).items()
        if "metadata" in path and any([
            k != sanitize_k8s_name(k,
                                   allow_capital_underscore=True,
                                   allow_dot=True,
                                   allow_slash=True,
                                   max_length=253) or v !=
            sanitize_k8s_name(v, allow_capital_underscore=True, allow_dot=True)
            for k, v in k_v_dict.items()
        ])
    }

    non_k8s_annotations = {
        path: k_v_dict
        for path, k_v_dict in _find_items(workflow, "annotations", "",
                                          {}).items()
        if "metadata" in path and any([
            k != sanitize_k8s_name(k,
                                   allow_capital_underscore=True,
                                   allow_dot=True,
                                   allow_slash=True,
                                   max_length=253) for k in k_v_dict.keys()
        ])
    }

    error_msg_tmplt = textwrap.dedent("""\
    Internal compiler error: Found non-compliant Kubernetes %s:
    %s
    Please create a new issue at https://github.com/kubeflow/kfp-tekton/issues
    attaching the pipeline DSL code and the pipeline YAML.""")

    if non_k8s_names:
        raise RuntimeError(
            error_msg_tmplt %
            ("names", json.dumps(non_k8s_names, sort_keys=False, indent=2)))

    if non_k8s_labels:
        raise RuntimeError(
            error_msg_tmplt %
            ("labels", json.dumps(non_k8s_labels, sort_keys=False, indent=2)))

    if non_k8s_annotations:
        raise RuntimeError(
            error_msg_tmplt %
            ("annotations",
             json.dumps(non_k8s_annotations, sort_keys=False, indent=2)))

    # TODO: Tekton pipeline parameter validation
    #   workflow = workflow.copy()
    #   # Working around Argo lint issue
    #   for argument in workflow['spec'].get('arguments', {}).get('parameters', []):
    #     if 'value' not in argument:
    #       argument['value'] = ''
    #   yaml_text = dump_yaml(workflow)
    #   if '{{pipelineparam' in yaml_text:
    #     raise RuntimeError(
    #         '''Internal compiler error: Found unresolved PipelineParam.
    # Please create a new issue at https://github.com/kubeflow/kfp-tekton/issues
    # attaching the pipeline code and the pipeline package.'''
    #     )

    # TODO: Tekton lint, if a tool exists for it
    #   # Running Argo lint if available
    #   import shutil
    #   import subprocess
    #   argo_path = shutil.which('argo')
    #   if argo_path:
    #     result = subprocess.run([argo_path, 'lint', '/dev/stdin'], input=yaml_text.encode('utf-8'),
    #                             stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    #     if result.returncode:
    #       raise RuntimeError(
    #         '''Internal compiler error: Compiler has produced Argo-incompatible workflow.
    # Please create a new issue at https://github.com/kubeflow/kfp-tekton/issues
    # attaching the pipeline code and the pipeline package.
    # Error: {}'''.format(result.stderr.decode('utf-8'))
    #       )
    pass
예제 #27
0
def _op_to_template(op: BaseOp,
                    pipelinerun_output_artifacts={},
                    enable_artifacts=False):
    """Generate template given an operator inherited from BaseOp."""

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            convert_k8s_obj_to_json(
                ArtifactLocation.create_artifact_for_s3(
                    op.artifact_location,
                    name=name,
                    path=path,
                    key='runs/$PIPELINERUN/$PIPELINETASK/' + name))
            for name, path in output_artifact_paths.items()
        ] if enable_artifacts else []

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        # Calling containerOp step as "main" to align with Argo
        step = {'name': "main"}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

        # Create output artifact tracking annotation.
        if enable_artifacts:
            for output_artifact in output_artifacts:
                output_annotation = pipelinerun_output_artifacts.get(
                    processed_op.name, [])
                output_annotation.append({
                    'name': output_artifact['name'],
                    'path': output_artifact['path']
                })
                pipelinerun_output_artifacts[
                    processed_op.name] = output_annotation

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # Leave artifacts for big data passing
        template['spec']['artifacts'] = inputs['artifacts']

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        copy_artifacts_step = _process_output_artifacts(
            outputs_dict, volume_mount_step_template, volume_template,
            replaced_param_list, artifact_to_result_mapping)
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)
        if copy_artifacts_step:
            template['spec']['steps'].append(copy_artifacts_step)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253): value
                for key, value in processed_op.pod_annotations.items()
            }
        if processed_op.pod_labels:
            template['metadata']['labels'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253):
                sanitize_k8s_name(value,
                                  allow_capital_underscore=True,
                                  allow_dot=True)
                for key, value in processed_op.pod_labels.items()
            }

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/task_display_name'] = \
            processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = \
            json.dumps(op._metadata.to_dict(), sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \
                str(op.execution_options.caching_strategy.max_cache_staleness)

    return template
예제 #28
0
def AnySequencer(any: Iterable[Union[dsl.ContainerOp, ConditionOperator]],
                 name: str = None,
                 statusPath: str = None,
                 skippingPolicy: str = None,
                 errorPolicy: str = None,
                 image: str = ANY_SEQUENCER_IMAGE):
    """A containerOp that will proceed when any of the dependent containerOps completed
       successfully

    Args:
        name: The name of the containerOp. It does not have to be unique within a pipeline
                because the pipeline will generate a unique new name in case of conflicts.

        any: List of `Conditional` containerOps that deploy together with the `main`
                containerOp, or the condtion that must meet to continue.

        statusPath: The location to write the output stauts

        skippingPolicy: Determines for the Any Sequencer reacts to
                no-dependency-condition-matching case. Values can be one of `skipOnNoMatch`
                or `errorOnNoMatch`, a status with value "Skipped" will be generated and the
                exit status will still be succeeded on `skipOnNoMatch`.

        errorPolicy: The standard field, either `failOnError` or `continueOnError`. On
                `continueOnError`, a status with value "Failed" will be generated
                but the exit status will still be succeeded. For `Fail_on_error` the
                Any Sequencer should truly fail in the Tekton terms, as it does now.

        image: The image to implement the any sequencer logic. Default to dspipelines/any-sequencer:latest.
    """
    arguments = [
        "--namespace", "$(context.pipelineRun.namespace)", "--prName",
        "$(context.pipelineRun.name)"
    ]
    tasks_list = []
    condition_list = []
    file_outputs = None
    for cop in any:
        if isinstance(cop, dsl.ContainerOp):
            cop_name = sanitize_k8s_name(cop.name)
            tasks_list.append(cop_name)
        elif isinstance(cop, ConditionOperator):
            condition_list.append(cop)
    if len(tasks_list) > 0:
        task_list_str = "\'" + ",".join(tasks_list) + "\'"
        arguments.extend(["--taskList", task_list_str])
    if statusPath is not None:
        file_outputs = '{outputPath: %s}' % statusPath
        arguments.extend(["--statusPath", file_outputs])
        if skippingPolicy is not None:
            assert skippingPolicy == "skipOnNoMatch" or skippingPolicy == "errorOnNoMatch"
            arguments.extend(["--skippingPolicy", skippingPolicy])
        if errorPolicy is not None:
            assert errorPolicy == "continueOnError" or errorPolicy == "failOnError"
            arguments.extend(["--errorPolicy", errorPolicy])
    conditonArgs = processConditionArgs(condition_list)
    arguments.extend(conditonArgs)

    AnyOp_yaml = '''\
    name: %s
    description: 'Proceed when any of the dependents completed successfully'
    outputs:
    - {name: %s, description: 'The output file to create the status'}
    implementation:
        container:
            image: %s
            command: [any-task]
            args: [%s]
    ''' % (name, statusPath, image, ",".join(arguments))
    AnyOp_template = components.load_component_from_text(AnyOp_yaml)
    AnyOp = AnyOp_template()
    return AnyOp
def big_data_passing_tasks(task: dict, inputs_tasks: set,
                           outputs_tasks: set) -> dict:
    task_name = task.get('metadata', {}).get('name')
    task_spec = task.get('spec', {})
    # Data passing for the task outputs
    task_outputs = task_spec.get('results', [])
    for task_output in task_outputs:
        if (task_name, task_output.get('name')) in outputs_tasks:
            if not task_spec.setdefault('workspaces', []):
                task_spec['workspaces'].append({"name": task_name})
            # Replace the args for the outputs in the task_spec
            # $(results.task_output.get('name').path)  -->
            # $(workspaces.task_name.path)/task_name-task_output.get('name')
            placeholder = '$(results.%s.path)' % (sanitize_k8s_name(task_output.get('name')))
            workspaces_parameter = '$(workspaces.%s.path)/%s-%s' % (
                task_name, task_name, task_output.get('name'))
            task['spec'] = replace_big_data_placeholder(
                task['spec'], placeholder, workspaces_parameter)

    # Remove artifacts outputs from results
    task['spec']['results'] = [
        result for result in task_outputs
        if (task_name, result.get('name')) not in outputs_tasks
    ]

    # Data passing for task inputs
    task_spec = task.get('spec', {})
    task_parmas = task_spec.get('params', [])
    task_artifacts = task_spec.get('artifacts', [])
    for task_parma in task_parmas:
        if (task_name, task_parma.get('name')) in inputs_tasks:
            if not task_spec.setdefault('workspaces', []):
                task_spec['workspaces'].append({"name": task_name})
            # Replace the args for the inputs in the task_spec
            # /tmp/inputs/text/data ---->
            # $(workspaces.task_name.path)/task_parma.get('name')
            placeholder = '/tmp/inputs/text/data'
            for task_artifact in task_artifacts:
                if task_artifact.get('name') == task_parma.get('name'):
                    placeholder = task_artifact.get('path')
            workspaces_parameter = '$(workspaces.%s.path)/%s' % (
                task_name, task_parma.get('name'))
            task['spec'] = replace_big_data_placeholder(
                task_spec, placeholder, workspaces_parameter)
    # Handle the case of input artifact without dependent the output of other tasks
    for task_artifact in task_artifacts:
        if (task_name, task_artifact.get('name')) not in inputs_tasks:
            # add input artifact processes
            task = input_artifacts_tasks(task, task_artifact)

    # Remove artifacts parameter from params
    task['spec']['params'] = [
        parma for parma in task_parmas
        if (task_name, parma.get('name')) not in inputs_tasks
    ]

    # Remove artifacts from task_spec
    if 'artifacts' in task_spec:
        del task['spec']['artifacts']

    return task
예제 #30
0
def big_data_passing_tasks(prname: str, task: dict, pipelinerun_template: dict,
                           inputs_tasks: set, outputs_tasks: set,
                           loops_pipeline: dict,
                           loop_name_prefix: str) -> dict:
    task_name = task.get('name')
    task_spec = task.get('taskSpec', {})
    # Data passing for the task outputs
    appended_taskrun_name = False
    for task_output in task.get('taskSpec', {}).get('results', []):
        if (task_name, task_output.get('name')) in outputs_tasks:
            if not task.get('taskSpec', {}).setdefault('workspaces', []):
                task.get('taskSpec',
                         {})['workspaces'].append({"name": task_name})
            # Replace the args for the outputs in the task_spec
            # $(results.task_output.get('name').path)  -->
            # $(workspaces.task_name.path)/task_name-task_output.get('name')
            placeholder = '$(results.%s.path)' % (sanitize_k8s_name(
                task_output.get('name')))
            workspaces_parameter = '$(workspaces.%s.path)/%s/%s/%s' % (
                task_name, BIG_DATA_MIDPATH, "$(context.taskRun.name)",
                task_output.get('name'))
            # For child nodes to know the taskrun name, it has to pass to results via /tekton/results emptydir
            if not appended_taskrun_name:
                copy_taskrun_name_step = _get_base_step('output-taskrun-name')
                copy_taskrun_name_step[
                    'script'] += 'echo -n "%s" > $(results.taskrun-name.path)\n' % (
                        "$(context.taskRun.name)")
                task['taskSpec']['results'].append({"name": "taskrun-name"})
                task['taskSpec']['steps'].append(copy_taskrun_name_step)
                _append_original_pr_name_env(task)
                appended_taskrun_name = True
            task['taskSpec'] = replace_big_data_placeholder(
                task.get("taskSpec", {}), placeholder, workspaces_parameter)
            artifact_items = pipelinerun_template['metadata']['annotations'][
                'tekton.dev/artifact_items']
            artifact_items[task['name']] = replace_big_data_placeholder(
                artifact_items[task['name']], placeholder,
                workspaces_parameter)
            pipelinerun_template['metadata']['annotations']['tekton.dev/artifact_items'] = \
                artifact_items

    task_spec = task.get('taskSpec', {})
    task_params = task_spec.get('params', [])
    task_artifacts = task_spec.get('artifacts', [])

    # Data passing for task inputs
    for task_param in task_params:
        if (task_name, task_param.get('name')) in inputs_tasks:
            if not task_spec.setdefault('workspaces', []):
                task_spec['workspaces'].append({"name": task_name})
            # Replace the args for the inputs in the task_spec
            # /tmp/inputs/text/data ---->
            # $(workspaces.task_name.path)/task_param.get('name')
            placeholder = '/tmp/inputs/text/data'
            for task_artifact in task_artifacts:
                if task_artifact.get('name') == task_param.get('name'):
                    placeholder = task_artifact.get('path')
            task_param_task_name = ""
            task_param_param_name = ""
            for o_task in outputs_tasks:
                if '-'.join(o_task) == task_param.get('name'):
                    task_param_task_name = o_task[0]
                    task_param_param_name = o_task[1]
                    break
            # If the param name is constructed with task_name-param_name,
            # use the current task_name as the path prefix

            def append_taskrun_params(task_name_append: str):
                taskrun_param_name = task_name_append + "-trname"
                inserted_taskrun_param = False
                for param in task['taskSpec'].get('params', []):
                    if param.get('name', "") == taskrun_param_name:
                        inserted_taskrun_param = True
                        break
                if not inserted_taskrun_param:
                    task['taskSpec']['params'].append(
                        {"name": taskrun_param_name})
                    task['params'].append({
                        "name":
                        taskrun_param_name,
                        "value":
                        "$(tasks.%s.results.taskrun-name)" % task_name_append
                    })
                    parent_task_queue = [task['name']]
                    while parent_task_queue:
                        current_task = parent_task_queue.pop(0)
                        for loop_name, loop_spec in loops_pipeline.items():
                            # print(loop_name, loop_spec)
                            if current_task in loop_spec.get('task_list', []):
                                parent_task_queue.append(
                                    loop_name.replace(loop_name_prefix, ""))
                                loop_param_names = [
                                    loop_param['name'] for loop_param in
                                    loops_pipeline[loop_name]['spec']['params']
                                ]
                                if task_name_append + '-taskrun-name' in loop_param_names:
                                    continue
                                loops_pipeline[loop_name]['spec'][
                                    'params'].append({
                                        'name':
                                        task_name_append + '-taskrun-name',
                                        'value':
                                        '$(tasks.%s.results.taskrun-name)' %
                                        task_name_append
                                    })

            if task_param_task_name:
                workspaces_parameter = '$(workspaces.%s.path)/%s/$(params.%s-trname)/%s' % (
                    task_name, BIG_DATA_MIDPATH, task_param_task_name,
                    task_param_param_name)
                if task_param_task_name != task_name:
                    append_taskrun_params(
                        task_param_task_name
                    )  # need to get taskrun name from parent path
            else:
                workspaces_parameter = '$(workspaces.%s.path)/%s/%s/%s' % (
                    task_name, BIG_DATA_MIDPATH, "$(context.taskRun.name)",
                    task_param.get('name'))
            _append_original_pr_name_env(task)
            task['taskSpec'] = replace_big_data_placeholder(
                task_spec, placeholder, workspaces_parameter)
            task_spec = task.get('taskSpec', {})
    # Handle the case of input artifact without dependent the output of other tasks
    for task_artifact in task_artifacts:
        if (task_name, task_artifact.get('name')) not in inputs_tasks:
            # add input artifact processes
            task = input_artifacts_tasks(task, task_artifact)

        if (prname, task_artifact.get('name')) in inputs_tasks:
            # add input artifact processes for pipeline parameter
            if not task_artifact.setdefault('raw', {}):
                for i in range(len(pipelinerun_template['spec']['params'])):
                    param_name = pipelinerun_template['spec']['params'][i][
                        'name']
                    param_value = pipelinerun_template['spec']['params'][i][
                        'value']
                    if (task_artifact.get('name') == param_name):
                        task_artifact['raw']['data'] = param_value
                        task = input_artifacts_tasks_pr_params(
                            task, task_artifact)

    # If a task produces a result and artifact, add a step to copy artifact to results.
    artifact_items = pipelinerun_template['metadata']['annotations'][
        'tekton.dev/artifact_items']
    add_copy_results_artifacts_step = False
    if task.get("taskSpec", {}):
        if task_spec.get('results', []):
            copy_results_artifact_step = _get_base_step(
                'copy-results-artifacts')
            copy_results_artifact_step[
                'onError'] = 'continue'  # supported by v0.27+ of tekton.
            copy_results_artifact_step['script'] += 'TOTAL_SIZE=0\n'
            for result in task_spec['results']:
                if task['name'] in artifact_items:
                    artifact_i = artifact_items[task['name']]
                    for index, artifact_tuple in enumerate(artifact_i):
                        artifact_name, artifact = artifact_tuple
                        src = artifact
                        dst = '$(results.%s.path)' % sanitize_k8s_name(
                            result['name'])
                        if artifact_name == result['name'] and src != dst:
                            add_copy_results_artifacts_step = True
                            copy_results_artifact_step['script'] += (
                                'ARTIFACT_SIZE=`wc -c %s | awk \'{print $1}\'`\n'
                                % src +
                                'TOTAL_SIZE=$( expr $TOTAL_SIZE + $ARTIFACT_SIZE)\n'
                                + 'touch ' + dst +
                                '\n' +  # create an empty file by default.
                                'if [[ $TOTAL_SIZE -lt 3072 ]]; then\n' +
                                '  cp ' + src + ' ' + dst + '\n' + 'fi\n')
            _append_original_pr_name_env_to_step(copy_results_artifact_step)
            if add_copy_results_artifacts_step:
                task['taskSpec']['steps'].append(copy_results_artifact_step)

    # Remove artifacts parameter from params
    task.get("taskSpec", {})['params'] = [
        param for param in task_spec.get('params', [])
        if (task_name, param.get('name')) not in inputs_tasks
        or param.get('name').endswith("-trname")
    ]

    # Remove artifacts from task_spec
    if 'artifacts' in task_spec:
        del task['taskSpec']['artifacts']

    return task