예제 #1
0
 def test_convert_k8s_obj_to_json_accepts_dict(self):
     now = datetime.now()
     converted = convert_k8s_obj_to_json({
         "ENV": "test",
         "number": 3,
         "list": [1, 2, 3],
         "time": now
     })
     self.assertEqual(converted, {
         "ENV": "test",
         "number": 3,
         "list": [1, 2, 3],
         "time": now.isoformat()
     })
예제 #2
0
def _op_to_template(op: BaseOp,
                    pipelinerun_output_artifacts={},
                    enable_artifacts=False):
    """Generate template given an operator inherited from BaseOp."""

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            convert_k8s_obj_to_json(
                ArtifactLocation.create_artifact_for_s3(
                    op.artifact_location,
                    name=name,
                    path=path,
                    key='runs/$PIPELINERUN/$PIPELINETASK/' + name))
            for name, path in output_artifact_paths.items()
        ] if enable_artifacts else []

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        # Calling containerOp step as "main" to align with Argo
        step = {'name': "main"}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

        # Create output artifact tracking annotation.
        if enable_artifacts:
            for output_artifact in output_artifacts:
                output_annotation = pipelinerun_output_artifacts.get(
                    processed_op.name, [])
                output_annotation.append({
                    'name': output_artifact['name'],
                    'path': output_artifact['path']
                })
                pipelinerun_output_artifacts[
                    processed_op.name] = output_annotation

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # Leave artifacts for big data passing
        template['spec']['artifacts'] = inputs['artifacts']

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        copy_artifacts_step = _process_output_artifacts(
            outputs_dict, volume_mount_step_template, volume_template,
            replaced_param_list, artifact_to_result_mapping)
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)
        if copy_artifacts_step:
            template['spec']['steps'].append(copy_artifacts_step)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253): value
                for key, value in processed_op.pod_annotations.items()
            }
        if processed_op.pod_labels:
            template['metadata']['labels'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253):
                sanitize_k8s_name(value,
                                  allow_capital_underscore=True,
                                  allow_dot=True)
                for key, value in processed_op.pod_labels.items()
            }

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/task_display_name'] = \
            processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = \
            json.dumps(op._metadata.to_dict(), sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \
                str(op.execution_options.caching_strategy.max_cache_staleness)

    return template
예제 #3
0
def _op_to_template(op: BaseOp,
                    pipelinerun_output_artifacts={},
                    artifact_items={}):
    """Generate template given an operator inherited from BaseOp."""

    # Display name
    if op.display_name:
        op.add_pod_annotation('pipelines.kubeflow.org/task_display_name',
                              op.display_name)

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [{
            'name': name,
            'path': path
        } for name, path in output_artifact_paths.items()]

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        # Calling containerOp step as "main" to align with Argo
        step = {'name': "main"}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

        # Create output artifact tracking annotation.
        for output_artifact in output_artifacts:
            output_annotation = pipelinerun_output_artifacts.get(
                processed_op.name, [])
            output_annotation.append({
                'name':
                output_artifact.get('name', ''),
                'path':
                output_artifact.get('path', ''),
                'key':
                "artifacts/$PIPELINERUN/%s/%s.tgz" %
                (processed_op.name, output_artifact.get('name', '').replace(
                    processed_op.name + '-', ''))
            })
            pipelinerun_output_artifacts[processed_op.name] = output_annotation

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # Leave artifacts for big data passing
        template['spec']['artifacts'] = inputs['artifacts']

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    artifact_items[op.name] = artifact_items.get(op.name, [])
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        _process_output_artifacts(outputs_dict, volume_mount_step_template,
                                  volume_template, replaced_param_list,
                                  artifact_to_result_mapping,
                                  artifact_items[op.name])
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253): value
                for key, value in processed_op.pod_annotations.items()
            }
        if processed_op.pod_labels:
            template['metadata']['labels'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253):
                sanitize_k8s_name(value,
                                  allow_capital_underscore=True,
                                  allow_dot=True)
                for key, value in processed_op.pod_labels.items()
            }

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volumes', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    if isinstance(op, dsl.ContainerOp
                  ) and op._metadata and GENERATE_COMPONENT_SPEC_ANNOTATIONS:
        component_spec_dict = op._metadata.to_dict()
        component_spec_digest = hashlib.sha256(
            json.dumps(component_spec_dict,
                       sort_keys=True).encode()).hexdigest()
        component_name = component_spec_dict.get('name', op.name)
        component_version = component_name + '@sha256=' + component_spec_digest
        digested_component_spec_dict = {
            'name': component_name,
            'outputs': component_spec_dict.get('outputs', []),
            'version': component_version
        }
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec_digest'] = \
            json.dumps(digested_component_spec_dict, sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \
                str(op.execution_options.caching_strategy.max_cache_staleness)

    # Sort and arrange results based on provided estimate size and process results in multi-steps if the result sizes are too big.
    result_size_map = "{}"
    if processed_op.pod_annotations:
        result_size_map = processed_op.pod_annotations.get(
            "tekton-result-sizes", "{}")
    # Only sort and arrange results when the estimated sizes are given.
    if result_size_map and result_size_map != "{}":
        try:
            result_size_map = json.loads(result_size_map)
        except ValueError:
            raise ("tekton-result-sizes annotation is not a valid JSON")
        # Normalize estimated result size keys.
        result_size_map = {
            sanitize_k8s_name(key, allow_capital_underscore=True): value
            for key, value in result_size_map.items()
        }
        # Sort key orders based on values
        result_size_map = dict(
            sorted(result_size_map.items(),
                   key=lambda item: item[1],
                   reverse=True))
        max_byte_size = 2048
        verified_result_size_map = {0: {}}
        op_result_names = [
            name['name'] for name in template['spec']['results']
        ]
        step_bins = {0: 0}
        step_counter = 0
        # Group result files to not exceed max_byte_size as a bin packing problem
        # Results are sorted from large to small, each value will loop over each bin to determine can it fit in the existing bins.
        for key, value in result_size_map.items():
            try:
                value = int(value)
            except ValueError:
                raise (
                    "Estimated value for result %s is %s, but it needs to be an integer."
                    % (key, value))
            if key in op_result_names:
                packed_index = -1
                # Look for bin that can fit the result value
                for i in range(len(step_bins)):
                    if step_bins[i] + value > max_byte_size:
                        continue
                    step_bins[i] = step_bins[i] + value
                    packed_index = i
                    break
                # If no bin can fit the value, create a new bin to store the value
                if packed_index < 0:
                    step_counter += 1
                    if value > max_byte_size:
                        logging.warning(
                            "The estimated size for parameter %s is %sB which is more than 2KB, "
                            "consider passing this value as artifact instead of output parameter."
                            % (key, str(value)))
                    step_bins[step_counter] = value
                    verified_result_size_map[step_counter] = {}
                    packed_index = step_counter
                verified_result_size_map[packed_index][key] = value
            else:
                logging.warning(
                    "The esitmated size for parameter %s does not exist in the task %s."
                    "Please correct the task annotations with the correct parameter key"
                    % (key, op.name))
        missing_param_estimation = []
        for result_name in op_result_names:
            if result_name not in result_size_map.keys():
                missing_param_estimation.append(result_name)
        if missing_param_estimation:
            logging.warning(
                "The following output parameter estimations are missing in task %s: Missing params: %s."
                % (op.name, missing_param_estimation))
        # Move results between the Tekton home and result directories if there are more than one step
        if step_counter > 0:
            for step in template['spec']['steps']:
                if step['name'] == 'main':
                    for key in result_size_map.keys():
                        # Replace main step results that are not in the first bin to the Tekton home path
                        if key not in verified_result_size_map[0].keys():
                            sanitize_key = sanitize_k8s_name(key)
                            for i, a in enumerate(step['args']):
                                a = a.replace(
                                    '$(results.%s.path)' % sanitize_key,
                                    '%s%s' %
                                    (TEKTON_HOME_RESULT_PATH, sanitize_key))
                                step['args'][i] = a
                            for i, c in enumerate(step['command']):
                                c = c.replace(
                                    '$(results.%s.path)' % sanitize_key,
                                    '%s%s' %
                                    (TEKTON_HOME_RESULT_PATH, sanitize_key))
                                step['command'][i] = c
            # Append new steps to move result files between each step, so Tekton controller can record all results without
            # exceeding the Kubernetes termination log limit.
            for i in range(1, step_counter + 1):
                copy_result_step = _get_copy_result_step_template(
                    i, verified_result_size_map)
                template['spec']['steps'].append(copy_result_step)
        # Update actifact item location to the latest stage in order to properly track and store all the artifacts.
        for i, artifact in enumerate(artifact_items[op.name]):
            if artifact[0] not in verified_result_size_map[step_counter].keys(
            ):
                artifact[1] = '%s%s' % (TEKTON_HOME_RESULT_PATH,
                                        sanitize_k8s_name(artifact[0]))
                artifact_items[op.name][i] = artifact
    return template
예제 #4
0
    def _workflow_with_pipelinerun(self, task_refs, pipeline,
                                   pipeline_template, workflow):
        """ Generate pipelinerun template """
        pipelinerun = {
            'apiVersion': tekton_api_version,
            'kind': 'PipelineRun',
            'metadata': {
                'name': pipeline_template['metadata']['name'] + '-run'
            },
            'spec': {
                'params': [{
                    'name': p['name'],
                    'value': p.get('default', '')
                } for p in pipeline_template['spec']['params']],
                'pipelineRef': {
                    'name': pipeline_template['metadata']['name']
                }
            }
        }

        # Generate TaskRunSpec PodTemplate:s
        task_run_spec = []
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            task_spec = {
                "pipelineTaskName": task['name'],
                "taskPodTemplate": {}
            }
            if op.affinity:
                task_spec["taskPodTemplate"][
                    "affinity"] = convert_k8s_obj_to_json(op.affinity)
            if op.tolerations:
                task_spec["taskPodTemplate"]['tolerations'] = op.tolerations
            if op.node_selector:
                task_spec["taskPodTemplate"]['nodeSelector'] = op.node_selector
            if bool(task_spec["taskPodTemplate"]):
                task_run_spec.append(task_spec)
        if len(task_run_spec) > 0:
            pipelinerun['spec']['taskRunSpec'] = task_run_spec

        # add workflow level timeout to pipeline run
        if pipeline.conf.timeout:
            pipelinerun['spec']['timeout'] = '%ds' % pipeline.conf.timeout

        # generate the Tekton service account template for image pull secret
        service_template = {}
        if len(pipeline.conf.image_pull_secrets) > 0:
            service_template = {
                'apiVersion': 'v1',
                'kind': 'ServiceAccount',
                'metadata': {
                    'name': pipelinerun['metadata']['name'] + '-sa'
                }
            }
        for image_pull_secret in pipeline.conf.image_pull_secrets:
            service_template['imagePullSecrets'] = [{
                'name':
                image_pull_secret.name
            }]

        if service_template:
            workflow = workflow + [service_template]
            pipelinerun['spec']['serviceAccountName'] = service_template[
                'metadata']['name']

        workflow = workflow + [pipelinerun]

        return workflow
예제 #5
0
    def _create_pipeline_workflow(self, args, pipeline, op_transformers=None, pipeline_conf=None) \
            -> Dict[Text, Any]:
        """Create workflow for the pipeline."""
        # Input Parameters
        params = []
        for arg in args:
            param = {'name': arg.name}
            if arg.value is not None:
                if isinstance(arg.value, (list, tuple, dict)):
                    param['default'] = json.dumps(arg.value, sort_keys=True)
                else:
                    param['default'] = str(arg.value)
            params.append(param)

        # TODO: task templates?

        # generate Tekton tasks from pipeline ops
        raw_templates = self._create_dag_templates(pipeline, op_transformers,
                                                   params)

        # generate task and condition reference list for the Tekton Pipeline
        condition_refs = {}

        # TODO
        task_refs = []
        templates = []
        condition_task_refs = {}
        for template in raw_templates:
            # TODO Allow an opt-out for the condition_template
            if template['kind'] == 'Condition':
                condition_task_ref = [{
                    'name':
                    template['metadata']['name'],
                    'params': [{
                        'name': p['name'],
                        'value': p.get('value', '')
                    } for p in template['spec'].get('params', [])],
                    'taskSpec':
                    _get_super_condition_template(),
                }]
                condition_refs[template['metadata']['name']] = [{
                    'input':
                    '$(tasks.%s.results.status)' %
                    template['metadata']['name'],
                    'operator':
                    'in',
                    'values': ['true']
                }]
                condition_task_refs[template['metadata']
                                    ['name']] = condition_task_ref
            else:
                templates.append(template)
                task_ref = {
                    'name':
                    template['metadata']['name'],
                    'params': [{
                        'name': p['name'],
                        'value': p.get('default', '')
                    } for p in template['spec'].get('params', [])],
                    'taskSpec':
                    template['spec'],
                }

                if template['metadata'].get('labels', None):
                    task_ref['taskSpec']['metadata'] = task_ref[
                        'taskSpec'].get('metadata', {})
                    task_ref['taskSpec']['metadata']['labels'] = template[
                        'metadata']['labels']
                if template['metadata'].get('annotations', None):
                    task_ref['taskSpec']['metadata'] = task_ref[
                        'taskSpec'].get('metadata', {})
                    task_ref['taskSpec']['metadata']['annotations'] = template[
                        'metadata']['annotations']
                task_refs.append(task_ref)

        # process input parameters from upstream tasks for conditions and pair conditions with their ancestor conditions
        opsgroup_stack = [pipeline.groups[0]]
        condition_stack = [None]
        while opsgroup_stack:
            cur_opsgroup = opsgroup_stack.pop()
            most_recent_condition = condition_stack.pop()

            if cur_opsgroup.type == 'condition':
                condition_task_ref = condition_task_refs[cur_opsgroup.name][0]
                condition = cur_opsgroup.condition
                input_params = []

                # Process input parameters if needed
                if isinstance(condition.operand1, dsl.PipelineParam):
                    if condition.operand1.op_name:
                        operand_value = '$(tasks.' + condition.operand1.op_name + '.results.' + sanitize_k8s_name(
                            condition.operand1.name) + ')'
                    else:
                        operand_value = '$(params.' + condition.operand1.name + ')'
                    input_params.append(operand_value)
                if isinstance(condition.operand2, dsl.PipelineParam):
                    if condition.operand2.op_name:
                        operand_value = '$(tasks.' + condition.operand2.op_name + '.results.' + sanitize_k8s_name(
                            condition.operand2.name) + ')'
                    else:
                        operand_value = '$(params.' + condition.operand2.name + ')'
                    input_params.append(operand_value)
                for param_iter in range(len(input_params)):
                    condition_task_ref['params'][param_iter][
                        'value'] = input_params[param_iter]

            opsgroup_stack.extend(cur_opsgroup.groups)
            condition_stack.extend([
                most_recent_condition for x in range(len(cur_opsgroup.groups))
            ])
        # add task dependencies and add condition refs to the task ref that depends on the condition
        op_name_to_parent_groups = self._get_groups_for_ops(pipeline.groups[0])
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            parent_group = op_name_to_parent_groups.get(task['name'], [])
            if parent_group:
                if condition_refs.get(parent_group[-2], []):
                    task['when'] = condition_refs.get(
                        op_name_to_parent_groups[task['name']][-2], [])
            if op.dependent_names:
                task['runAfter'] = op.dependent_names

        # process input parameters from upstream tasks
        pipeline_param_names = [p['name'] for p in params]
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            for tp in task.get('params', []):
                if tp['name'] in pipeline_param_names:
                    tp['value'] = '$(params.%s)' % tp['name']
                else:
                    for pp in op.inputs:
                        if tp['name'] == pp.full_name:
                            tp['value'] = '$(tasks.%s.results.%s)' % (
                                pp.op_name, pp.name)
                            # Create input artifact tracking annotation
                            input_annotation = self.input_artifacts.get(
                                task['name'], [])
                            input_annotation.append({
                                'name': tp['name'],
                                'parent_task': pp.op_name
                            })
                            self.input_artifacts[
                                task['name']] = input_annotation
                            break

        # add retries params
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            if op.num_retries:
                task['retries'] = op.num_retries

        # add timeout params to task_refs, instead of task.
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            if not TEKTON_GLOBAL_DEFAULT_TIMEOUT or op.timeout:
                task['timeout'] = '%ds' % op.timeout

        # handle resourceOp cases in pipeline
        self._process_resourceOp(task_refs, pipeline)

        # handle exit handler in pipeline
        finally_tasks = []
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            if op.is_exit_handler:
                finally_tasks.append(task)
        task_refs = [
            task for task in task_refs
            if not pipeline.ops.get(task['name']).is_exit_handler
        ]

        # process loop parameters, keep this section in the behind of other processes, ahead of gen pipeline
        root_group = pipeline.groups[0]
        op_name_to_for_loop_op = self._get_for_loop_ops(root_group)
        if op_name_to_for_loop_op:
            for loop_param in op_name_to_for_loop_op.values():
                if loop_param.items_is_pipeline_param is True:
                    raise NotImplementedError(
                        "dynamic params are not yet implemented")
            include_loop_task_refs = []
            for task in task_refs:
                with_loop_task = self._get_loop_task(task,
                                                     op_name_to_for_loop_op)
                include_loop_task_refs.extend(with_loop_task)
            task_refs = include_loop_task_refs

        # Flatten condition task
        condition_task_refs_temp = []
        for condition_task_ref in condition_task_refs.values():
            for ref in condition_task_ref:
                condition_task_refs_temp.append(ref)
        condition_task_refs = condition_task_refs_temp
        # TODO: generate the PipelineRun template
        pipeline_run = {
            'apiVersion': tekton_api_version,
            'kind': 'PipelineRun',
            'metadata': {
                'name':
                sanitize_k8s_name(pipeline.name or 'Pipeline', suffix_space=4),
                # 'labels': get_default_telemetry_labels(),
                'annotations': {
                    'tekton.dev/output_artifacts':
                    json.dumps(self.output_artifacts, sort_keys=True),
                    'tekton.dev/input_artifacts':
                    json.dumps(self.input_artifacts, sort_keys=True),
                    'tekton.dev/artifact_bucket':
                    DEFAULT_ARTIFACT_BUCKET,
                    'tekton.dev/artifact_endpoint':
                    DEFAULT_ARTIFACT_ENDPOINT,
                    'tekton.dev/artifact_endpoint_scheme':
                    DEFAULT_ARTIFACT_ENDPOINT_SCHEME,
                    'tekton.dev/artifact_items':
                    json.dumps(self.artifact_items, sort_keys=True),
                    'sidecar.istio.io/inject':
                    'false'  # disable Istio inject since Tekton cannot run with Istio sidecar
                }
            },
            'spec': {
                'params': [{
                    'name': p['name'],
                    'value': p.get('default', '')
                } for p in params],
                'pipelineSpec': {
                    'params': params,
                    'tasks': task_refs + condition_task_refs,
                    'finally': finally_tasks
                }
            }
        }

        # TODO: pipelineRun additions

        # Generate TaskRunSpec PodTemplate:s
        task_run_spec = []
        for task in task_refs:

            # TODO: should loop-item tasks be included here?
            if LoopArguments.LOOP_ITEM_NAME_BASE in task['name']:
                task_name = re.sub(
                    r'-%s-.+$' % LoopArguments.LOOP_ITEM_NAME_BASE, '',
                    task['name'])
            else:
                task_name = task['name']
            op = pipeline.ops.get(task_name)
            if not op:
                raise RuntimeError("unable to find op with name '%s'" %
                                   task["name"])

            task_spec = {
                "pipelineTaskName": task['name'],
                "taskPodTemplate": {}
            }
            if op.affinity:
                task_spec["taskPodTemplate"][
                    "affinity"] = convert_k8s_obj_to_json(op.affinity)
            if op.tolerations:
                task_spec["taskPodTemplate"]['tolerations'] = op.tolerations
            if op.node_selector:
                task_spec["taskPodTemplate"]['nodeSelector'] = op.node_selector
            if bool(task_spec["taskPodTemplate"]):
                task_run_spec.append(task_spec)
        if len(task_run_spec) > 0:
            pipeline_run['spec']['taskRunSpecs'] = task_run_spec

        # add workflow level timeout to pipeline run
        if not TEKTON_GLOBAL_DEFAULT_TIMEOUT or pipeline.conf.timeout:
            pipeline_run['spec']['timeout'] = '%ds' % pipeline.conf.timeout

        # generate the Tekton podTemplate for image pull secret
        if len(pipeline.conf.image_pull_secrets) > 0:
            pipeline_run['spec']['podTemplate'] = pipeline_run['spec'].get(
                'podTemplate', {})
            pipeline_run['spec']['podTemplate']['imagePullSecrets'] = [{
                "name":
                s.name
            } for s in pipeline.conf.image_pull_secrets]

        workflow = pipeline_run

        return workflow
예제 #6
0
    def _workflow_with_pipelinerun(self, task_refs, pipeline,
                                   pipeline_template, workflow):
        """ Generate pipelinerun template """
        pipelinerun = {
            'apiVersion': tekton_api_version,
            'kind': 'PipelineRun',
            'metadata': {
                'name':
                sanitize_k8s_name(pipeline_template['metadata']['name'],
                                  suffix_space=4) + '-run',
                'annotation': {
                    'tekton.dev/output_artifacts':
                    json.dumps(self.output_artifacts),
                    'tekton.dev/input_artifacts':
                    json.dumps(self.input_artifacts)
                }
            },
            'spec': {
                'params': [{
                    'name': p['name'],
                    'value': p.get('default', '')
                } for p in pipeline_template['spec']['params']],
                'pipelineRef': {
                    'name': pipeline_template['metadata']['name']
                }
            }
        }

        # Generate TaskRunSpecs PodTemplate:s
        task_run_spec = []
        for task in task_refs:
            op = pipeline.ops.get(task['name'])
            task_spec = {
                "pipelineTaskName": task['name'],
                "taskPodTemplate": {}
            }
            if op.affinity:
                task_spec["taskPodTemplate"][
                    "affinity"] = convert_k8s_obj_to_json(op.affinity)
            if op.tolerations:
                task_spec["taskPodTemplate"]['tolerations'] = op.tolerations
            if op.node_selector:
                task_spec["taskPodTemplate"]['nodeSelector'] = op.node_selector
            if bool(task_spec["taskPodTemplate"]):
                task_run_spec.append(task_spec)
        if len(task_run_spec) > 0:
            pipelinerun['spec']['taskRunSpecs'] = task_run_spec

        # add workflow level timeout to pipeline run
        if pipeline.conf.timeout:
            pipelinerun['spec']['timeout'] = '%ds' % pipeline.conf.timeout

        # generate the Tekton podTemplate for image pull secret
        if len(pipeline.conf.image_pull_secrets) > 0:
            pipelinerun['spec']['podTemplate'] = pipelinerun['spec'].get(
                'podTemplate', {})
            pipelinerun['spec']['podTemplate']['imagePullSecrets'] = [{
                "name":
                s.name
            } for s in pipeline.conf.image_pull_secrets]

        workflow = workflow + [pipelinerun]

        return workflow
예제 #7
0
def _op_to_template(op: BaseOp, enable_artifacts=False):
    """Generate template given an operator inherited from BaseOp."""

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            convert_k8s_obj_to_json(
                ArtifactLocation.create_artifact_for_s3(
                    op.artifact_location,
                    name=name,
                    path=path,
                    key='runs/$PIPELINERUN/$PODNAME/' + name))
            for name, path in output_artifact_paths.items()
        ] if enable_artifacts else []

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        step = {'name': processed_op.name}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # The input artifacts in KFP is not pulling from s3, it will always be passed as a raw input.
        # Visit https://github.com/kubeflow/pipelines/issues/336 for more details on the implementation.
        copy_inputs_step = _get_base_step('copy-inputs')
        for artifact in inputs['artifacts']:
            if 'raw' in artifact:
                copy_inputs_step['script'] += 'echo -n "%s" > %s\n' % (
                    artifact['raw']['data'], artifact['path'])
            mount_path = artifact['path'].rsplit("/", 1)[0]
            if mount_path not in mounted_param_paths:
                _add_mount_path(artifact['name'], artifact['path'], mount_path,
                                volume_mount_step_template, volume_template,
                                mounted_param_paths)
        template['spec']['steps'] = _prepend_steps([copy_inputs_step],
                                                   template['spec']['steps'])
        _update_volumes(template, volume_mount_step_template, volume_template)

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        copy_artifacts_step = _process_output_artifacts(
            outputs_dict, volume_mount_step_template, volume_template,
            replaced_param_list, artifact_to_result_mapping)
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)
        if copy_artifacts_step:
            template['spec']['steps'].append(copy_artifacts_step)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = processed_op.pod_annotations
        if processed_op.pod_labels:
            template['metadata']['labels'] = processed_op.pod_labels

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault(
            'annotations', {}
        )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        template.setdefault('metadata', {}).setdefault(
            'annotations',
            {})['pipelines.kubeflow.org/component_spec'] = json.dumps(
                op._metadata.to_dict(), sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault(
                'annotations',
                {})['pipelines.kubeflow.org/max_cache_staleness'] = str(
                    op.execution_options.caching_strategy.max_cache_staleness)

    return template