Example #1
0
def _op_to_template(op: BaseOp,
                    pipelinerun_output_artifacts={},
                    enable_artifacts=False):
    """Generate template given an operator inherited from BaseOp."""

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            convert_k8s_obj_to_json(
                ArtifactLocation.create_artifact_for_s3(
                    op.artifact_location,
                    name=name,
                    path=path,
                    key='runs/$PIPELINERUN/$PIPELINETASK/' + name))
            for name, path in output_artifact_paths.items()
        ] if enable_artifacts else []

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        # Calling containerOp step as "main" to align with Argo
        step = {'name': "main"}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

        # Create output artifact tracking annotation.
        if enable_artifacts:
            for output_artifact in output_artifacts:
                output_annotation = pipelinerun_output_artifacts.get(
                    processed_op.name, [])
                output_annotation.append({
                    'name': output_artifact['name'],
                    'path': output_artifact['path']
                })
                pipelinerun_output_artifacts[
                    processed_op.name] = output_annotation

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # Leave artifacts for big data passing
        template['spec']['artifacts'] = inputs['artifacts']

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        copy_artifacts_step = _process_output_artifacts(
            outputs_dict, volume_mount_step_template, volume_template,
            replaced_param_list, artifact_to_result_mapping)
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)
        if copy_artifacts_step:
            template['spec']['steps'].append(copy_artifacts_step)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253): value
                for key, value in processed_op.pod_annotations.items()
            }
        if processed_op.pod_labels:
            template['metadata']['labels'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253):
                sanitize_k8s_name(value,
                                  allow_capital_underscore=True,
                                  allow_dot=True)
                for key, value in processed_op.pod_labels.items()
            }

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/task_display_name'] = \
            processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = \
            json.dumps(op._metadata.to_dict(), sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \
                str(op.execution_options.caching_strategy.max_cache_staleness)

    return template
Example #2
0
def _op_to_template(op: BaseOp):
    """Generate template given an operator inherited from BaseOp."""

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs), but the _outputs_to_json function changes the output names and we must do the same here, so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            #  convert_k8s_obj_to_json(
            #      ArtifactLocation.create_artifact_for_s3(
            #          op.artifact_location,
            #          name=name,
            #          path=path,
            #          key='runs/{{workflow.uid}}/{{pod.name}}/' + name + '.tgz'))
            # for name, path in output_artifact_paths.items()
        ]

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        step = {'name': processed_op.name}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

    elif isinstance(op, dsl.ResourceOp):
        # # no output artifacts
        # output_artifacts = []
        #
        # # workflow template
        # processed_op.resource["manifest"] = yaml.dump(
        #     convert_k8s_obj_to_json(processed_op.k8s_resource),
        #     default_flow_style=False
        # )
        # template = {
        #     'name': processed_op.name,
        #     'resource': convert_k8s_obj_to_json(
        #         processed_op.resource
        #     )
        # }
        raise NotImplementedError("dsl.ResourceOp is not yet implemented")

    # initContainers
    if processed_op.init_containers:
        steps = processed_op.init_containers.copy()
        steps.extend(template['spec']['steps'])
        template['spec']['steps'] = steps

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        template['spec']['params'] = inputs['parameters']
    elif 'artifacts' in inputs:
        raise NotImplementedError("input artifacts are not yet implemented")

    # outputs
    if isinstance(op, dsl.ContainerOp):
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        param_outputs = processed_op.attribute_outputs
    outputs_dict = _outputs_to_json(op, processed_op.outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        template['spec']['results'] = []
        for name, path in processed_op.file_outputs.items():
            name = name.replace(
                '_', '-'
            )  # replace '_' to '-' since tekton results doesn't support underscore
            template['spec']['results'].append({
                'name': name,
                'description': path
            })
            # replace all occurrences of the output file path with the Tekton output parameter expression
            for s in template['spec']['steps']:
                if 'command' in s:
                    s['command'] = [
                        c.replace(path, '$(results.%s.path)' % name)
                        for c in s['command']
                    ]
                if 'args' in s:
                    s['args'] = [
                        a.replace(path, '$(results.%s.path)' % name)
                        for a in s['args']
                    ]

    # **********************************************************
    #  NOTE: the following features are still under development
    # **********************************************************

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = processed_op.pod_annotations
        if processed_op.pod_labels:
            template['metadata']['labels'] = processed_op.pod_labels

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault(
            'annotations', {}
        )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        import json
        template.setdefault('metadata', {}).setdefault(
            'annotations',
            {})['pipelines.kubeflow.org/component_spec'] = json.dumps(
                op._metadata.to_dict(), sort_keys=True)

    return template
Example #3
0
def _op_to_template(op: BaseOp, enable_artifacts=False):
    """Generate template given an operator inherited from BaseOp."""

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            convert_k8s_obj_to_json(
                ArtifactLocation.create_artifact_for_s3(
                    op.artifact_location,
                    name=name,
                    path=path,
                    key='runs/$PIPELINERUN/$PODNAME/' + name))
            for name, path in output_artifact_paths.items()
        ] if enable_artifacts else []

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        step = {'name': processed_op.name}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # The input artifacts in KFP is not pulling from s3, it will always be passed as a raw input.
        # Visit https://github.com/kubeflow/pipelines/issues/336 for more details on the implementation.
        copy_inputs_step = _get_base_step('copy-inputs')
        for artifact in inputs['artifacts']:
            if 'raw' in artifact:
                copy_inputs_step['script'] += 'echo -n "%s" > %s\n' % (
                    artifact['raw']['data'], artifact['path'])
            mount_path = artifact['path'].rsplit("/", 1)[0]
            if mount_path not in mounted_param_paths:
                _add_mount_path(artifact['name'], artifact['path'], mount_path,
                                volume_mount_step_template, volume_template,
                                mounted_param_paths)
        template['spec']['steps'] = _prepend_steps([copy_inputs_step],
                                                   template['spec']['steps'])
        _update_volumes(template, volume_mount_step_template, volume_template)

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        copy_artifacts_step = _process_output_artifacts(
            outputs_dict, volume_mount_step_template, volume_template,
            replaced_param_list, artifact_to_result_mapping)
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)
        if copy_artifacts_step:
            template['spec']['steps'].append(copy_artifacts_step)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = processed_op.pod_annotations
        if processed_op.pod_labels:
            template['metadata']['labels'] = processed_op.pod_labels

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault(
            'annotations', {}
        )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        template.setdefault('metadata', {}).setdefault(
            'annotations',
            {})['pipelines.kubeflow.org/component_spec'] = json.dumps(
                op._metadata.to_dict(), sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault(
                'annotations',
                {})['pipelines.kubeflow.org/max_cache_staleness'] = str(
                    op.execution_options.caching_strategy.max_cache_staleness)

    return template
Example #4
0
def _op_to_template(op: BaseOp,
                    pipelinerun_output_artifacts={},
                    artifact_items={}):
    """Generate template given an operator inherited from BaseOp."""

    # Display name
    if op.display_name:
        op.add_pod_annotation('pipelines.kubeflow.org/task_display_name',
                              op.display_name)

    # initial local variables for tracking volumes and artifacts
    volume_mount_step_template = []
    volume_template = []
    mounted_param_paths = []
    replaced_param_list = []
    artifact_to_result_mapping = {}

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs),
        # but the _outputs_to_json function changes the output names and we must do the same here,
        # so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [{
            'name': name,
            'path': path
        } for name, path in output_artifact_paths.items()]

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        # Calling containerOp step as "main" to align with Argo
        step = {'name': "main"}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

        # Create output artifact tracking annotation.
        for output_artifact in output_artifacts:
            output_annotation = pipelinerun_output_artifacts.get(
                processed_op.name, [])
            output_annotation.append({
                'name':
                output_artifact.get('name', ''),
                'path':
                output_artifact.get('path', ''),
                'key':
                "artifacts/$PIPELINERUN/%s/%s.tgz" %
                (processed_op.name, output_artifact.get('name', '').replace(
                    processed_op.name + '-', ''))
            })
            pipelinerun_output_artifacts[processed_op.name] = output_annotation

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # Flatten manifest because it needs to replace Argo variables
        manifest = yaml.dump(convert_k8s_obj_to_json(
            processed_op.k8s_resource),
                             default_flow_style=False)
        argo_var = False
        if manifest.find('{{workflow.name}}') != -1:
            # Kubernetes Pod arguments only take $() as environment variables
            manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)")
            # Remove yaml quote in order to read bash variables
            manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest)
            argo_var = True

        # task template
        template = _get_resourceOp_template(op,
                                            processed_op.name,
                                            tekton_api_version,
                                            manifest,
                                            argo_var=argo_var)

    # initContainers
    if processed_op.init_containers:
        template['spec']['steps'] = _prepend_steps(
            processed_op.init_containers, template['spec']['steps'])

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    if 'artifacts' in inputs:
        # Leave artifacts for big data passing
        template['spec']['artifacts'] = inputs['artifacts']

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    artifact_items[op.name] = artifact_items.get(op.name, [])
    if outputs_dict:
        copy_results_step = _process_parameters(
            processed_op, template, outputs_dict, volume_mount_step_template,
            volume_template, replaced_param_list, artifact_to_result_mapping,
            mounted_param_paths)
        _process_output_artifacts(outputs_dict, volume_mount_step_template,
                                  volume_template, replaced_param_list,
                                  artifact_to_result_mapping,
                                  artifact_items[op.name])
        if mounted_param_paths:
            template['spec']['steps'].append(copy_results_step)
        _update_volumes(template, volume_mount_step_template, volume_template)

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253): value
                for key, value in processed_op.pod_annotations.items()
            }
        if processed_op.pod_labels:
            template['metadata']['labels'] = {
                sanitize_k8s_name(key,
                                  allow_capital_underscore=True,
                                  allow_dot=True,
                                  allow_slash=True,
                                  max_length=253):
                sanitize_k8s_name(value,
                                  allow_capital_underscore=True,
                                  allow_dot=True)
                for key, value in processed_op.pod_labels.items()
            }

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volumes', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    if isinstance(op, dsl.ContainerOp
                  ) and op._metadata and GENERATE_COMPONENT_SPEC_ANNOTATIONS:
        component_spec_dict = op._metadata.to_dict()
        component_spec_digest = hashlib.sha256(
            json.dumps(component_spec_dict,
                       sort_keys=True).encode()).hexdigest()
        component_name = component_spec_dict.get('name', op.name)
        component_version = component_name + '@sha256=' + component_spec_digest
        digested_component_spec_dict = {
            'name': component_name,
            'outputs': component_spec_dict.get('outputs', []),
            'version': component_version
        }
        template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec_digest'] = \
            json.dumps(digested_component_spec_dict, sort_keys=True)

    if isinstance(op, dsl.ContainerOp) and op.execution_options:
        if op.execution_options.caching_strategy.max_cache_staleness:
            template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = \
                str(op.execution_options.caching_strategy.max_cache_staleness)

    # Sort and arrange results based on provided estimate size and process results in multi-steps if the result sizes are too big.
    result_size_map = "{}"
    if processed_op.pod_annotations:
        result_size_map = processed_op.pod_annotations.get(
            "tekton-result-sizes", "{}")
    # Only sort and arrange results when the estimated sizes are given.
    if result_size_map and result_size_map != "{}":
        try:
            result_size_map = json.loads(result_size_map)
        except ValueError:
            raise ("tekton-result-sizes annotation is not a valid JSON")
        # Normalize estimated result size keys.
        result_size_map = {
            sanitize_k8s_name(key, allow_capital_underscore=True): value
            for key, value in result_size_map.items()
        }
        # Sort key orders based on values
        result_size_map = dict(
            sorted(result_size_map.items(),
                   key=lambda item: item[1],
                   reverse=True))
        max_byte_size = 2048
        verified_result_size_map = {0: {}}
        op_result_names = [
            name['name'] for name in template['spec']['results']
        ]
        step_bins = {0: 0}
        step_counter = 0
        # Group result files to not exceed max_byte_size as a bin packing problem
        # Results are sorted from large to small, each value will loop over each bin to determine can it fit in the existing bins.
        for key, value in result_size_map.items():
            try:
                value = int(value)
            except ValueError:
                raise (
                    "Estimated value for result %s is %s, but it needs to be an integer."
                    % (key, value))
            if key in op_result_names:
                packed_index = -1
                # Look for bin that can fit the result value
                for i in range(len(step_bins)):
                    if step_bins[i] + value > max_byte_size:
                        continue
                    step_bins[i] = step_bins[i] + value
                    packed_index = i
                    break
                # If no bin can fit the value, create a new bin to store the value
                if packed_index < 0:
                    step_counter += 1
                    if value > max_byte_size:
                        logging.warning(
                            "The estimated size for parameter %s is %sB which is more than 2KB, "
                            "consider passing this value as artifact instead of output parameter."
                            % (key, str(value)))
                    step_bins[step_counter] = value
                    verified_result_size_map[step_counter] = {}
                    packed_index = step_counter
                verified_result_size_map[packed_index][key] = value
            else:
                logging.warning(
                    "The esitmated size for parameter %s does not exist in the task %s."
                    "Please correct the task annotations with the correct parameter key"
                    % (key, op.name))
        missing_param_estimation = []
        for result_name in op_result_names:
            if result_name not in result_size_map.keys():
                missing_param_estimation.append(result_name)
        if missing_param_estimation:
            logging.warning(
                "The following output parameter estimations are missing in task %s: Missing params: %s."
                % (op.name, missing_param_estimation))
        # Move results between the Tekton home and result directories if there are more than one step
        if step_counter > 0:
            for step in template['spec']['steps']:
                if step['name'] == 'main':
                    for key in result_size_map.keys():
                        # Replace main step results that are not in the first bin to the Tekton home path
                        if key not in verified_result_size_map[0].keys():
                            sanitize_key = sanitize_k8s_name(key)
                            for i, a in enumerate(step['args']):
                                a = a.replace(
                                    '$(results.%s.path)' % sanitize_key,
                                    '%s%s' %
                                    (TEKTON_HOME_RESULT_PATH, sanitize_key))
                                step['args'][i] = a
                            for i, c in enumerate(step['command']):
                                c = c.replace(
                                    '$(results.%s.path)' % sanitize_key,
                                    '%s%s' %
                                    (TEKTON_HOME_RESULT_PATH, sanitize_key))
                                step['command'][i] = c
            # Append new steps to move result files between each step, so Tekton controller can record all results without
            # exceeding the Kubernetes termination log limit.
            for i in range(1, step_counter + 1):
                copy_result_step = _get_copy_result_step_template(
                    i, verified_result_size_map)
                template['spec']['steps'].append(copy_result_step)
        # Update actifact item location to the latest stage in order to properly track and store all the artifacts.
        for i, artifact in enumerate(artifact_items[op.name]):
            if artifact[0] not in verified_result_size_map[step_counter].keys(
            ):
                artifact[1] = '%s%s' % (TEKTON_HOME_RESULT_PATH,
                                        sanitize_k8s_name(artifact[0]))
                artifact_items[op.name][i] = artifact
    return template
Example #5
0
def _op_to_template(op: BaseOp):
    """Generate template given an operator inherited from BaseOp."""

    # NOTE in-place update to BaseOp
    # replace all PipelineParams with template var strings
    processed_op = _process_base_ops(op)

    if isinstance(op, dsl.ContainerOp):
        # default output artifacts
        output_artifact_paths = OrderedDict(op.output_artifact_paths)
        # print(op.output_artifact_paths)
        # This should have been as easy as output_artifact_paths.update(op.file_outputs), but the _outputs_to_json function changes the output names and we must do the same here, so that the names are the same
        output_artifact_paths.update(
            sorted(((param.full_name, processed_op.file_outputs[param.name])
                    for param in processed_op.outputs.values()),
                   key=lambda x: x[0]))

        output_artifacts = [
            #  convert_k8s_obj_to_json(
            #      ArtifactLocation.create_artifact_for_s3(
            #          op.artifact_location,
            #          name=name,
            #          path=path,
            #          key='runs/{{workflow.uid}}/{{pod.name}}/' + name + '.tgz'))
            # for name, path in output_artifact_paths.items()
        ]

        # workflow template
        container = convert_k8s_obj_to_json(processed_op.container)

        step = {'name': processed_op.name}
        step.update(container)

        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                'steps': [step]
            }
        }

    elif isinstance(op, dsl.ResourceOp):
        # no output artifacts
        output_artifacts = []

        # task template
        template = {
            'apiVersion': tekton_api_version,
            'kind': 'Task',
            'metadata': {
                'name': processed_op.name
            },
            'spec': {
                "params": [
                    {
                        "description": "Action on the resource",
                        "name": "action",
                        "type": "string"
                    },
                    {
                        "default": "strategic",
                        "description":
                        "Merge strategy when using action patch",
                        "name": "merge-strategy",
                        "type": "string"
                    },
                    {
                        "description": "Content of the resource to deploy",
                        "name": "manifest",
                        "type": "string"
                    },
                    {
                        "default": "",
                        "description":
                        "An express to retrieval data from resource.",
                        "name": "output",
                        "type": "string"
                    },
                    {
                        "default": "",
                        "description":
                        "A label selector express to decide if the action on resource is success.",
                        "name": "success-condition",
                        "type": "string"
                    },
                    {
                        "default": "",
                        "description":
                        "A label selector express to decide if the action on resource is failure.",
                        "name": "failure-condition",
                        "type": "string"
                    },
                    {
                        "default":
                        "index.docker.io/fenglixa/kubeclient:v0.0.1",  # Todo: The image need to be replaced, once there are official images from tekton
                        "description": "Kubectl wrapper image",
                        "name": "image",
                        "type": "string"
                    },
                    {
                        "default": "false",
                        "description":
                        "Enable set owner reference for created resource.",
                        "name": "set-ownerreference",
                        "type": "string"
                    }
                ],
                'steps': [{
                    "args": [
                        "--action=$(params.action)",
                        "--merge-strategy=$(params.merge-strategy)",
                        "--manifest=$(params.manifest)",
                        "--output=$(params.output)",
                        "--success-condition=$(params.success-condition)",
                        "--failure-condition=$(params.failure-condition)",
                        "--set-ownerreference=$(params.set-ownerreference)"
                    ],
                    "image":
                    "$(params.image)",
                    "name":
                    processed_op.name,
                    "resources": {}
                }]
            }
        }

    # initContainers
    if processed_op.init_containers:
        steps = processed_op.init_containers.copy()
        steps.extend(template['spec']['steps'])
        template['spec']['steps'] = steps

    # inputs
    input_artifact_paths = processed_op.input_artifact_paths if isinstance(
        processed_op, dsl.ContainerOp) else None
    artifact_arguments = processed_op.artifact_arguments if isinstance(
        processed_op, dsl.ContainerOp) else None
    inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths,
                             artifact_arguments)
    if 'parameters' in inputs:
        if isinstance(processed_op, dsl.ContainerOp):
            template['spec']['params'] = inputs['parameters']
        elif isinstance(op, dsl.ResourceOp):
            template['spec']['params'].extend(inputs['parameters'])
    elif 'artifacts' in inputs:
        raise NotImplementedError("input artifacts are not yet implemented")

    # outputs
    if isinstance(op, dsl.ContainerOp):
        op_outputs = processed_op.outputs
        param_outputs = processed_op.file_outputs
    elif isinstance(op, dsl.ResourceOp):
        op_outputs = {}
        param_outputs = {}
    outputs_dict = _outputs_to_json(op, op_outputs, param_outputs,
                                    output_artifacts)
    if outputs_dict:
        """
        Since Tekton results need to be under /tekton/results. If file output paths cannot be
        configured to /tekton/results, we need to create the below copy step for moving
        file outputs to the Tekton destination. BusyBox is recommended to be used on
        small tasks because it's relatively lightweight and small compared to the ubuntu and
        bash images.

        - image: busybox
          name: copy-results
          script: |
            #!/bin/sh
            set -exo pipefail
            cp $LOCALPATH $(results.data.path);
        """
        template['spec']['results'] = []
        copy_results_step = {
            'image': 'busybox',
            'name': 'copy-results',
            'script': '#!/bin/sh\nset -exo pipefail\n'
        }
        volume_mount_step_template = []
        volume_template = []
        mounted_paths = []
        for name, path in param_outputs.items():
            name = name.replace(
                '_', '-'
            )  # replace '_' to '-' since tekton results doesn't support underscore
            template['spec']['results'].append({
                'name': name,
                'description': path
            })
            # replace all occurrences of the output file path with the Tekton output parameter expression
            need_copy_step = True
            for s in template['spec']['steps']:
                if 'command' in s:
                    commands = []
                    for c in s['command']:
                        if path in c:
                            c = c.replace(path, '$(results.%s.path)' % name)
                            need_copy_step = False
                        commands.append(c)
                    s['command'] = commands
                if 'args' in s:
                    args = []
                    for a in s['args']:
                        if path in a:
                            a = a.replace(path, '$(results.%s.path)' % name)
                            need_copy_step = False
                        args.append(a)
                    s['args'] = args
            # If file output path cannot be found/replaced, use emptyDir to copy it to the tekton/results path
            if need_copy_step:
                copy_results_step['script'] = copy_results_step[
                    'script'] + 'cp ' + path + ' $(results.%s.path);' % name + '\n'
                mountPath = path.rsplit("/", 1)[0]
                if mountPath not in mounted_paths:
                    volume_mount_step_template.append({
                        'name':
                        name,
                        'mountPath':
                        path.rsplit("/", 1)[0]
                    })
                    volume_template.append({'name': name, 'emptyDir': {}})
                    mounted_paths.append(mountPath)
        if mounted_paths:
            copy_results_step['script'] = literal_str(
                copy_results_step['script'])
            template['spec']['steps'].append(copy_results_step)
            template['spec']['stepTemplate'] = {}
            template['spec']['stepTemplate'][
                'volumeMounts'] = volume_mount_step_template
            template['spec']['volumes'] = volume_template

    # **********************************************************
    #  NOTE: the following features are still under development
    # **********************************************************

    # metadata
    if processed_op.pod_annotations or processed_op.pod_labels:
        template.setdefault(
            'metadata', {})  # Tekton change, don't wipe out existing metadata
        if processed_op.pod_annotations:
            template['metadata']['annotations'] = processed_op.pod_annotations
        if processed_op.pod_labels:
            template['metadata']['labels'] = processed_op.pod_labels

    # sidecars
    if processed_op.sidecars:
        template['spec']['sidecars'] = processed_op.sidecars

    # volumes
    if processed_op.volumes:
        template['spec']['volumes'] = template['spec'].get('volume', []) + [
            convert_k8s_obj_to_json(volume) for volume in processed_op.volumes
        ]
        template['spec']['volumes'].sort(key=lambda x: x['name'])

    # Display name
    if processed_op.display_name:
        template.setdefault('metadata', {}).setdefault(
            'annotations', {}
        )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name

    if isinstance(op, dsl.ContainerOp) and op._metadata:
        import json
        template.setdefault('metadata', {}).setdefault(
            'annotations',
            {})['pipelines.kubeflow.org/component_spec'] = json.dumps(
                op._metadata.to_dict(), sort_keys=True)

    return template