def test_create_artifact_for_s3_with_dict(self): # use the convert_k8s_obj_to_json to mimick the compiler artifact_location_dict = convert_k8s_obj_to_json( ArtifactLocation.s3(bucket="foo", endpoint="s3.amazonaws.com", insecure=False, region="ap-southeast-1", access_key_secret={ "name": "s3-secret", "key": "accesskey" }, secret_key_secret=V1SecretKeySelector( name="s3-secret", key="secretkey"))) artifact = ArtifactLocation.create_artifact_for_s3( artifact_location_dict, name="foo", path="path/to", key="key") self.assertEqual(artifact.name, "foo") self.assertEqual(artifact.path, "path/to") self.assertEqual(artifact.s3.endpoint, "s3.amazonaws.com") self.assertEqual(artifact.s3.bucket, "foo") self.assertEqual(artifact.s3.key, "key") self.assertEqual(artifact.s3.access_key_secret.name, "s3-secret") self.assertEqual(artifact.s3.access_key_secret.key, "accesskey") self.assertEqual(artifact.s3.secret_key_secret.name, "s3-secret") self.assertEqual(artifact.s3.secret_key_secret.key, "secretkey")
def _process_obj(obj: Any, map_to_tmpl_var: dict): """Recursively sanitize and replace any PipelineParam (instances and serialized strings) in the object with the corresponding template variables (i.e. '{{inputs.parameters.<PipelineParam.full_name>}}'). Args: obj: any obj that may have PipelineParam map_to_tmpl_var: a dict that maps an unsanitized pipeline params signature into a template var """ # serialized str might be unsanitized if isinstance(obj, str): # get signature param_tuples = dsl.match_serialized_pipelineparam(obj) if not param_tuples: return obj # replace all unsanitized signature with template var for param_tuple in param_tuples: obj = re.sub(param_tuple.pattern, map_to_tmpl_var[param_tuple.pattern], obj) # list if isinstance(obj, list): return [_process_obj(item, map_to_tmpl_var) for item in obj] # tuple if isinstance(obj, tuple): return tuple((_process_obj(item, map_to_tmpl_var) for item in obj)) # dict if isinstance(obj, dict): return { _process_obj(key, map_to_tmpl_var): _process_obj(value, map_to_tmpl_var) for key, value in obj.items() } # pipelineparam if isinstance(obj, dsl.PipelineParam): # if not found in unsanitized map, then likely to be sanitized return map_to_tmpl_var.get(str(obj), '{{inputs.parameters.%s}}' % obj.full_name) # k8s objects (generated from swaggercodegen) if hasattr(obj, 'attribute_map') and isinstance(obj.attribute_map, dict): # process everything inside recursively for key in obj.attribute_map.keys(): setattr(obj, key, _process_obj(getattr(obj, key), map_to_tmpl_var)) # return json representation of the k8s obj return convert_k8s_obj_to_json(obj) # do nothing return obj
def test_convert_k8s_obj_to_dic_accepts_dict(self): now = datetime.now() converted = convert_k8s_obj_to_json({ "ENV": "test", "number": 3, "list": [1, 2, 3], "time": now }) self.assertEqual(converted, { "ENV": "test", "number": 3, "list": [1, 2, 3], "time": now.isoformat() })
def _op_to_template(op: BaseOp): """Generate template given an operator inherited from BaseOp.""" # Display name if op.display_name: op.add_pod_annotation('pipelines.kubeflow.org/task_display_name', op.display_name) # Caching option op.add_pod_label( 'pipelines.kubeflow.org/enable_caching', str(op.enable_caching).lower()) # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): output_artifact_paths = OrderedDict(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), but the _outputs_to_json function changes the output names and we must do the same here, so that the names are the same output_artifact_paths.update(sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [ {'name': name, 'path': path} for name, path in output_artifact_paths.items() ] # workflow template template = { 'name': processed_op.name, 'container': convert_k8s_obj_to_json( processed_op.container ) } elif isinstance(op, dsl.ResourceOp): # no output artifacts output_artifacts = [] # workflow template processed_op.resource["manifest"] = yaml.dump( convert_k8s_obj_to_json(processed_op.k8s_resource), default_flow_style=False ) template = { 'name': processed_op.name, 'resource': convert_k8s_obj_to_json( processed_op.resource ) } # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance(processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance(processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if inputs: template['inputs'] = inputs # outputs if isinstance(op, dsl.ContainerOp): param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): param_outputs = processed_op.attribute_outputs outputs_dict = _outputs_to_json(op, processed_op.outputs, param_outputs, output_artifacts) if outputs_dict: template['outputs'] = outputs_dict # pod spec used for runtime container settings podSpecPatch = {} # node selector if processed_op.node_selector: copy_node_selector = copy.deepcopy(processed_op.node_selector) for key, value in processed_op.node_selector.items(): if re.match('^{{inputs.parameters.*}}$', key) or re.match('^{{inputs.parameters.*}}$', value): if not 'nodeSelector' in podSpecPatch: podSpecPatch['nodeSelector'] = [] podSpecPatch["nodeSelector"].append({key: value}) del copy_node_selector[key] # avoid to change the dict when iterating it if processed_op.node_selector: template['nodeSelector'] = copy_node_selector # tolerations if processed_op.tolerations: template['tolerations'] = processed_op.tolerations # affinity if processed_op.affinity: template['affinity'] = convert_k8s_obj_to_json(processed_op.affinity) # metadata if processed_op.pod_annotations or processed_op.pod_labels: template['metadata'] = {} if processed_op.pod_annotations: template['metadata']['annotations'] = processed_op.pod_annotations if processed_op.pod_labels: template['metadata']['labels'] = processed_op.pod_labels # retries if processed_op.num_retries or processed_op.retry_policy: template['retryStrategy'] = {} if processed_op.num_retries: template['retryStrategy']['limit'] = processed_op.num_retries if processed_op.retry_policy: template['retryStrategy']['retryPolicy'] = processed_op.retry_policy if not processed_op.num_retries: warnings.warn('retry_policy is set, but num_retries is not') backoff_dict = {} if processed_op.backoff_duration: backoff_dict['duration'] = processed_op.backoff_duration if processed_op.backoff_factor: backoff_dict['factor'] = processed_op.backoff_factor if processed_op.backoff_max_duration: backoff_dict['maxDuration'] = processed_op.backoff_max_duration if backoff_dict: template['retryStrategy']['backoff'] = backoff_dict # timeout if processed_op.timeout: template['activeDeadlineSeconds'] = processed_op.timeout # initContainers if processed_op.init_containers: template['initContainers'] = processed_op.init_containers # sidecars if processed_op.sidecars: template['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['volumes'] = [convert_k8s_obj_to_json(volume) for volume in processed_op.volumes] template['volumes'].sort(key=lambda x: x['name']) # Runtime resource requests if isinstance(op, dsl.ContainerOp) and ('resources' in op.container.keys()): for setting, val in op.container['resources'].items(): for resource, param in val.items(): if (resource in ['cpu', 'memory', 'amd.com/gpu', 'nvidia.com/gpu'] or re.match('^{{inputs.parameters.*}}$', resource))\ and re.match('^{{inputs.parameters.*}}$', str(param)): if not 'containers' in podSpecPatch: podSpecPatch = {'containers':[{'name':'main', 'resources':{}}]} if setting not in podSpecPatch['containers'][0]['resources']: podSpecPatch['containers'][0]['resources'][setting] = {resource: param} else: podSpecPatch['containers'][0]['resources'][setting][resource] = param del template['container']['resources'][setting][resource] if not template['container']['resources'][setting]: del template['container']['resources'][setting] if isinstance(op, dsl.ContainerOp) and op._metadata and not op.is_v2: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = json.dumps(op._metadata.to_dict(), sort_keys=True) if hasattr(op, '_component_ref'): template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_ref'] = json.dumps(op._component_ref.to_dict(), sort_keys=True) if hasattr(op, '_parameter_arguments') and op._parameter_arguments: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/arguments.parameters'] = json.dumps(op._parameter_arguments, sort_keys=True) if isinstance(op, dsl.ContainerOp) and op.execution_options: if op.execution_options.caching_strategy.max_cache_staleness: template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = str(op.execution_options.caching_strategy.max_cache_staleness) if podSpecPatch: template['podSpecPatch'] = json.dumps(podSpecPatch) return template
def _create_pipeline_workflow(self, args, pipeline, op_transformers=None, pipeline_conf=None) \ -> List[Dict[Text, Any]]: # Tekton change, signature/return type """Create workflow for the pipeline.""" # Input Parameters params = [] for arg in args: param = {'name': arg.name} if arg.value is not None: if isinstance(arg.value, (list, tuple)): param['default'] = json.dumps(arg.value, sort_keys=True) else: param['default'] = str(arg.value) params.append(param) # generate Tekton tasks from pipeline ops tasks = self._create_dag_templates(pipeline, op_transformers, params) # generate task reference list for Tekton pipeline task_refs = [ { 'name': t['metadata']['name'], 'taskRef': { 'name': t['metadata']['name'] }, 'params': [{ 'name': p['name'], 'value': p.get('default', '') } for p in t['spec'].get('params', []) ] } for t in tasks ] # add task dependencies for task in task_refs: op = pipeline.ops.get(task['name']) if op.dependent_names: task['runAfter'] = op.dependent_names # process input parameters from upstream tasks pipeline_param_names = [p['name'] for p in params] for task in task_refs: op = pipeline.ops.get(task['name']) for tp in task.get('params', []): if tp['name'] in pipeline_param_names: tp['value'] = '$(params.%s)' % tp['name'] else: for pp in op.inputs: if tp['name'] == pp.full_name: # replace '_' to '-' since tekton results doesn't support underscore tp['value'] = '$(tasks.%s.results.%s)' % (pp.op_name, pp.name.replace('_', '-')) break # add retries params for task in task_refs: op = pipeline.ops.get(task['name']) if op.num_retries: task['retries'] = op.num_retries # add timeout params to task_refs, instead of task. pipeline_conf = pipeline.conf for task in task_refs: op = pipeline.ops.get(task['name']) if op.timeout: task['timeout'] = '%ds' % op.timeout # generate the Tekton Pipeline document pipeline_template = { 'apiVersion': tekton_api_version, 'kind': 'Pipeline', 'metadata': { 'name': pipeline.name or 'Pipeline' }, 'spec': { 'params': params, 'tasks': task_refs } } # append Task and Pipeline documents workflow = tasks + [pipeline_template] # Generate pipelinerun if generate-pipelinerun flag is enabled # The base templete is generated first and then insert optional parameters. if self.generate_pipelinerun: pipelinerun = { 'apiVersion': tekton_api_version, 'kind': 'PipelineRun', 'metadata': { 'name': pipeline_template['metadata']['name'] + '-run' }, 'spec': { 'params': [{ 'name': p['name'], 'value': p['default'] } for p in pipeline_template['spec']['params'] ], 'pipelineRef': { 'name': pipeline_template['metadata']['name'] } } } pod_template = {} for task in task_refs: op = pipeline.ops.get(task['name']) if op.affinity: pod_template['affinity'] = convert_k8s_obj_to_json(op.affinity) if op.tolerations: pod_template['tolerations'] = pod_template.get('tolerations', []) + op.tolerations if op.node_selector: pod_template['nodeSelector'] = op.node_selector if pod_template: pipelinerun['spec']['podtemplate'] = pod_template # add workflow level timeout to pipeline run if pipeline_conf.timeout: pipelinerun['spec']['timeout'] = '%ds' % pipeline_conf.timeout workflow = workflow + [pipelinerun] return workflow # Tekton change, from return type Dict[Text, Any] to List[Dict[Text, Any]]
def _op_to_template(op: BaseOp): """Generate template given an operator inherited from BaseOp.""" # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), but the _outputs_to_json function changes the output names and we must do the same here, so that the names are the same output_artifact_paths.update( sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [ # convert_k8s_obj_to_json( # ArtifactLocation.create_artifact_for_s3( # op.artifact_location, # name=name, # path=path, # key='runs/{{workflow.uid}}/{{pod.name}}/' + name + '.tgz')) # for name, path in output_artifact_paths.items() ] # workflow template container = convert_k8s_obj_to_json(processed_op.container) step = {'name': processed_op.name} step.update(container) template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [step] } } elif isinstance(op, dsl.ResourceOp): # # no output artifacts # output_artifacts = [] # # # workflow template # processed_op.resource["manifest"] = yaml.dump( # convert_k8s_obj_to_json(processed_op.k8s_resource), # default_flow_style=False # ) # template = { # 'name': processed_op.name, # 'resource': convert_k8s_obj_to_json( # processed_op.resource # ) # } raise NotImplementedError("dsl.ResourceOp is not yet implemented") # initContainers if processed_op.init_containers: steps = processed_op.init_containers.copy() steps.extend(template['spec']['steps']) template['spec']['steps'] = steps # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if 'parameters' in inputs: template['spec']['params'] = inputs['parameters'] elif 'artifacts' in inputs: raise NotImplementedError("input artifacts are not yet implemented") # outputs if isinstance(op, dsl.ContainerOp): param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): param_outputs = processed_op.attribute_outputs outputs_dict = _outputs_to_json(op, processed_op.outputs, param_outputs, output_artifacts) if outputs_dict: template['spec']['results'] = [] for name, path in processed_op.file_outputs.items(): name = name.replace( '_', '-' ) # replace '_' to '-' since tekton results doesn't support underscore template['spec']['results'].append({ 'name': name, 'description': path }) # replace all occurrences of the output file path with the Tekton output parameter expression for s in template['spec']['steps']: if 'command' in s: s['command'] = [ c.replace(path, '$(results.%s.path)' % name) for c in s['command'] ] if 'args' in s: s['args'] = [ a.replace(path, '$(results.%s.path)' % name) for a in s['args'] ] # ********************************************************** # NOTE: the following features are still under development # ********************************************************** # metadata if processed_op.pod_annotations or processed_op.pod_labels: template.setdefault( 'metadata', {}) # Tekton change, don't wipe out existing metadata if processed_op.pod_annotations: template['metadata']['annotations'] = processed_op.pod_annotations if processed_op.pod_labels: template['metadata']['labels'] = processed_op.pod_labels # sidecars if processed_op.sidecars: template['spec']['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['spec']['volumes'] = [ convert_k8s_obj_to_json(volume) for volume in processed_op.volumes ] template['spec']['volumes'].sort(key=lambda x: x['name']) # Display name if processed_op.display_name: template.setdefault('metadata', {}).setdefault( 'annotations', {} )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name if isinstance(op, dsl.ContainerOp) and op._metadata: import json template.setdefault('metadata', {}).setdefault( 'annotations', {})['pipelines.kubeflow.org/component_spec'] = json.dumps( op._metadata.to_dict(), sort_keys=True) return template
def _create_pipeline_workflow(self, args, pipeline, op_transformers=None, pipeline_conf=None) \ -> List[Dict[Text, Any]]: # Tekton change, signature/return type """Create workflow for the pipeline.""" # Input Parameters params = [] for arg in args: param = {'name': arg.name} if arg.value is not None: if isinstance(arg.value, (list, tuple)): param['default'] = json.dumps(arg.value, sort_keys=True) else: param['default'] = str(arg.value) params.append(param) # generate Tekton tasks from pipeline ops templates = self._create_dag_templates(pipeline, op_transformers, params) # generate task and condition reference list for the Tekton Pipeline condition_refs = {} task_refs = [] for template in templates: if template['kind'] == 'Condition': condition_refs[template['metadata']['name']] = { 'conditionRef': template['metadata']['name'], 'params': [{ 'name': param['name'], 'value': '$(params.'+param['name']+')' } for param in template['spec'].get('params',[]) ] } else: task_refs.append( { 'name': template['metadata']['name'], 'taskRef': { 'name': template['metadata']['name'] }, 'params': [{ 'name': p['name'], 'value': p.get('default', '') } for p in template['spec'].get('params', []) ] } ) # add task dependencies and add condition refs to the task ref that depends on the condition op_name_to_parent_groups = self._get_groups_for_ops(pipeline.groups[0]) for task in task_refs: op = pipeline.ops.get(task['name']) parent_group = op_name_to_parent_groups.get(task['name'], []) if parent_group: if condition_refs.get(parent_group[-2],[]): task['conditions'] = [condition_refs.get(op_name_to_parent_groups[task['name']][-2],[])] if op.dependent_names: task['runAfter'] = op.dependent_names # process input parameters from upstream tasks pipeline_param_names = [p['name'] for p in params] for task in task_refs: op = pipeline.ops.get(task['name']) for tp in task.get('params', []): if tp['name'] in pipeline_param_names: tp['value'] = '$(params.%s)' % tp['name'] else: for pp in op.inputs: if tp['name'] == pp.full_name: # replace '_' to '-' since tekton results doesn't support underscore tp['value'] = '$(tasks.%s.results.%s)' % (pp.op_name, pp.name.replace('_', '-')) break # add retries params for task in task_refs: op = pipeline.ops.get(task['name']) if op.num_retries: task['retries'] = op.num_retries # add timeout params to task_refs, instead of task. pipeline_conf = pipeline.conf for task in task_refs: op = pipeline.ops.get(task['name']) if op.timeout: task['timeout'] = '%ds' % op.timeout # handle resourceOp cases in pipeline for task in task_refs: op = pipeline.ops.get(task['name']) if isinstance(op, dsl.ResourceOp): action = op.resource.get('action') merge_strategy = op.resource.get('merge_strategy') success_condition = op.resource.get('success_condition') failure_condition = op.resource.get('failure_condition') task['params'] = [tp for tp in task.get('params', []) if tp.get('name') != "image"] if not merge_strategy: task['params'] = [tp for tp in task.get('params', []) if tp.get('name') != 'merge-strategy'] if not success_condition: task['params'] = [tp for tp in task.get('params', []) if tp.get('name') != 'success-condition'] if not failure_condition: task['params'] = [tp for tp in task.get('params', []) if tp.get('name') != "failure-condition"] for tp in task.get('params', []): if tp.get('name') == "action" and action: tp['value'] = action if tp.get('name') == "merge-strategy" and merge_strategy: tp['value'] = merge_strategy if tp.get('name') == "success-condition" and success_condition: tp['value'] = success_condition if tp.get('name') == "failure-condition" and failure_condition: tp['value'] = failure_condition if tp.get('name') == "manifest": manifest = yaml.dump(convert_k8s_obj_to_json(op.k8s_resource), default_flow_style=False) tp['value'] = manifest if tp.get('name') == "output": output_values = ','.join(set(list(op.attribute_outputs.values()))) tp['value'] = output_values # process loop parameters, keep this section in the behind of other processes, ahead of gen pipeline root_group = pipeline.groups[0] op_name_to_for_loop_op = self._get_for_loop_ops(root_group) if op_name_to_for_loop_op: for loop_param in op_name_to_for_loop_op.values(): if loop_param.items_is_pipeline_param is True: raise NotImplementedError("dynamic params are not yet implemented") include_loop_task_refs = [] for task in task_refs: with_loop_task = self._get_loop_task(task, op_name_to_for_loop_op) include_loop_task_refs.extend(with_loop_task) task_refs = include_loop_task_refs # generate the Tekton Pipeline document pipeline_template = { 'apiVersion': tekton_api_version, 'kind': 'Pipeline', 'metadata': { 'name': pipeline.name or 'Pipeline' }, 'spec': { 'params': params, 'tasks': task_refs } } # append Task and Pipeline documents workflow = templates + [pipeline_template] # Generate pipelinerun if generate-pipelinerun flag is enabled # The base templete is generated first and then insert optional parameters. # Wrapped in a try catch for when this method is called directly (e.g. there is no pipeline decorator) try: if self.generate_pipelinerun: pipelinerun = { 'apiVersion': tekton_api_version, 'kind': 'PipelineRun', 'metadata': { 'name': pipeline_template['metadata']['name'] + '-run' }, 'spec': { 'params': [{ 'name': p['name'], 'value': p.get('default', '') } for p in pipeline_template['spec']['params'] ], 'pipelineRef': { 'name': pipeline_template['metadata']['name'] } } } pod_template = {} for task in task_refs: op = pipeline.ops.get(task['name']) if op.affinity: pod_template['affinity'] = convert_k8s_obj_to_json(op.affinity) if op.tolerations: pod_template['tolerations'] = pod_template.get('tolerations', []) + op.tolerations if op.node_selector: pod_template['nodeSelector'] = op.node_selector if pod_template: pipelinerun['spec']['podtemplate'] = pod_template # add workflow level timeout to pipeline run if pipeline_conf.timeout: pipelinerun['spec']['timeout'] = '%ds' % pipeline_conf.timeout # generate the Tekton service account template service_template = {} if len(pipeline_conf.image_pull_secrets) > 0: service_template = { 'apiVersion': 'v1', 'kind': 'ServiceAccount', 'metadata': {'name': pipelinerun['metadata']['name'] + '-sa'} } for image_pull_secret in pipeline_conf.image_pull_secrets: service_template['imagePullSecrets'] = [{'name': image_pull_secret.name}] if service_template: workflow = workflow + [service_template] pipelinerun['spec']['serviceAccountName'] = service_template['metadata']['name'] workflow = workflow + [pipelinerun] except: # Intentionally do nothing for when _create_pipeline_workflow is called directly (e.g. in the case of there # being no pipeline decorator) and self.generate_pipeline is not set pass # Use regex to replace all the Argo variables to Tekton variables. For variables that are unique to Argo, # we raise an Error to alert users about the unsupported variables. Here is the list of Argo variables. # https://github.com/argoproj/argo/blob/master/docs/variables.md # Since Argo variables can be used in anywhere in the yaml, we need to dump and then parse the whole yaml # using regular expression. workflow_dump = json.dumps(workflow) tekton_var_regex_rules = [ {'argo_rule': '{{inputs.parameters.([^ \t\n.:,;{}]+)}}', 'tekton_rule': '$(inputs.params.\g<1>)'}, {'argo_rule': '{{outputs.parameters.([^ \t\n.:,;{}]+).path}}', 'tekton_rule': '$(results.\g<1>.path)'} ] for regex_rule in tekton_var_regex_rules: workflow_dump = re.sub(regex_rule['argo_rule'], regex_rule['tekton_rule'], workflow_dump) unsupported_vars = re.findall(r"{{[^ \t\n.:,;{}]+\.[^ \t\n:,;{}]+}}", workflow_dump) if unsupported_vars: raise ValueError('These Argo variables are not supported in Tekton Pipeline: %s' % ", ".join(str(v) for v in set(unsupported_vars))) workflow = json.loads(workflow_dump) return workflow # Tekton change, from return type Dict[Text, Any] to List[Dict[Text, Any]]
def _create_pipeline_workflow(self, parameter_defaults, pipeline, op_transformers=None, pipeline_conf=None): """Create workflow for the pipeline.""" # Input Parameters input_params = [] for name, value in parameter_defaults.items(): param = {'name': name} if value is not None: param['value'] = value input_params.append(param) # Making the pipeline group name unique to prevent name clashes with templates pipeline_group = pipeline.groups[0] temp_pipeline_group_name = uuid.uuid4().hex pipeline_group.name = temp_pipeline_group_name # Templates templates = self._create_dag_templates(pipeline, op_transformers) # Exit Handler exit_handler = None if pipeline.groups[0].groups: first_group = pipeline.groups[0].groups[0] if first_group.type == 'exit_handler': exit_handler = first_group.exit_op # The whole pipeline workflow # It must valid as a subdomain pipeline_name = pipeline.name or 'pipeline' # Workaround for pipeline name clashing with container template names # TODO: Make sure template names cannot clash at all (container, DAG, workflow) template_map = {template['name'].lower(): template for template in templates} from ..components._naming import _make_name_unique_by_adding_index pipeline_template_name = _make_name_unique_by_adding_index(pipeline_name, template_map, '-') # Restoring the name of the pipeline template pipeline_template = template_map[temp_pipeline_group_name] pipeline_template['name'] = pipeline_template_name templates.sort(key=lambda x: x['name']) workflow = { 'apiVersion': 'argoproj.io/v1alpha1', 'kind': 'Workflow', 'metadata': {'generateName': pipeline_template_name + '-'}, 'spec': { 'entrypoint': pipeline_template_name, 'templates': templates, 'arguments': {'parameters': input_params}, 'serviceAccountName': 'pipeline-runner', } } # set parallelism limits at pipeline level if pipeline_conf.parallelism: workflow['spec']['parallelism'] = pipeline_conf.parallelism # set ttl after workflow finishes if pipeline_conf.ttl_seconds_after_finished >= 0: workflow['spec']['ttlSecondsAfterFinished'] = pipeline_conf.ttl_seconds_after_finished if pipeline_conf._pod_disruption_budget_min_available: pod_disruption_budget = {"minAvailable": pipeline_conf._pod_disruption_budget_min_available} workflow['spec']['podDisruptionBudget'] = pod_disruption_budget if len(pipeline_conf.image_pull_secrets) > 0: image_pull_secrets = [] for image_pull_secret in pipeline_conf.image_pull_secrets: image_pull_secrets.append(convert_k8s_obj_to_json(image_pull_secret)) workflow['spec']['imagePullSecrets'] = image_pull_secrets if pipeline_conf.timeout: workflow['spec']['activeDeadlineSeconds'] = pipeline_conf.timeout if exit_handler: workflow['spec']['onExit'] = exit_handler.name # This can be overwritten by the task specific # nodeselection, specified in the template. if pipeline_conf.default_pod_node_selector: workflow['spec']['nodeSelector'] = pipeline_conf.default_pod_node_selector if pipeline_conf.dns_config: workflow['spec']['dnsConfig'] = convert_k8s_obj_to_json(pipeline_conf.dns_config) if pipeline_conf.image_pull_policy != None: if pipeline_conf.image_pull_policy in ["Always", "Never", "IfNotPresent"]: for template in workflow["spec"]["templates"]: container = template.get('container', None) if container and "imagePullPolicy" not in container: container["imagePullPolicy"] = pipeline_conf.image_pull_policy else: raise ValueError( 'Invalid imagePullPolicy. Must be one of `Always`, `Never`, `IfNotPresent`.' ) return workflow
def _workflow_with_pipelinerun(self, task_refs, pipeline, pipeline_template, workflow): """ Generate pipelinerun template """ pipelinerun = { 'apiVersion': tekton_api_version, 'kind': 'PipelineRun', 'metadata': { 'name': pipeline_template['metadata']['name'] + '-run' }, 'spec': { 'params': [{ 'name': p['name'], 'value': p.get('default', '') } for p in pipeline_template['spec']['params']], 'pipelineRef': { 'name': pipeline_template['metadata']['name'] } } } # Generate PodTemplate pod_template = {} for task in task_refs: op = pipeline.ops.get(task['name']) if op.affinity: pod_template['affinity'] = convert_k8s_obj_to_json(op.affinity) if op.tolerations: pod_template['tolerations'] = pod_template.get( 'tolerations', []) + op.tolerations if op.node_selector: pod_template['nodeSelector'] = op.node_selector if pod_template: pipelinerun['spec']['podtemplate'] = pod_template # add workflow level timeout to pipeline run if pipeline.conf.timeout: pipelinerun['spec']['timeout'] = '%ds' % pipeline.conf.timeout # generate the Tekton service account template for image pull secret service_template = {} if len(pipeline.conf.image_pull_secrets) > 0: service_template = { 'apiVersion': 'v1', 'kind': 'ServiceAccount', 'metadata': { 'name': pipelinerun['metadata']['name'] + '-sa' } } for image_pull_secret in pipeline.conf.image_pull_secrets: service_template['imagePullSecrets'] = [{ 'name': image_pull_secret.name }] if service_template: workflow = workflow + [service_template] pipelinerun['spec']['serviceAccountName'] = service_template[ 'metadata']['name'] workflow = workflow + [pipelinerun] return workflow
def _op_to_template(op: BaseOp): """Generate template given an operator inherited from BaseOp.""" # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts # output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # # This should have been as easy as output_artifact_paths.update(op.file_outputs), but the _outputs_to_json function changes the output names and we must do the same here, so that the names are the same # output_artifact_paths.update(sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) # output_artifacts = [ # convert_k8s_obj_to_json( # ArtifactLocation.create_artifact_for_s3( # op.artifact_location, # name=name, # path=path, # key='runs/{{workflow.uid}}/{{pod.name}}/' + name + '.tgz')) # for name, path in output_artifact_paths.items() # ] # workflow template container = convert_k8s_obj_to_json(processed_op.container) template = { 'apiVersion': 'tekton.dev/v1alpha1', 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [{ 'name': processed_op.name, 'image': container['image'], 'command': container['command'], 'args': container['args'] }] } } elif isinstance(op, dsl.ResourceOp): # # no output artifacts # output_artifacts = [] # # # workflow template # processed_op.resource["manifest"] = yaml.dump( # convert_k8s_obj_to_json(processed_op.k8s_resource), # default_flow_style=False # ) # template = { # 'name': processed_op.name, # 'resource': convert_k8s_obj_to_json( # processed_op.resource # ) # } raise NotImplementedError("dsl.ResourceOp is not yet implemented") # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if inputs: template['spec']['inputs'] = {'params': inputs['parameters']} # NOTE: the following lines are commented out while we are working on supporting parallel tasks # execution and parameter handling # # outputs # if isinstance(op, dsl.ContainerOp): # param_outputs = processed_op.file_outputs # elif isinstance(op, dsl.ResourceOp): # param_outputs = processed_op.attribute_outputs # outputs_dict = _outputs_to_json(op, processed_op.outputs, param_outputs, output_artifacts) # if outputs_dict: # template['outputs'] = outputs_dict # # node selector # if processed_op.node_selector: # template['nodeSelector'] = processed_op.node_selector # # tolerations # if processed_op.tolerations: # template['tolerations'] = processed_op.tolerations # # affinity # if processed_op.affinity: # template['affinity'] = convert_k8s_obj_to_json(processed_op.affinity) # # metadata # if processed_op.pod_annotations or processed_op.pod_labels: # template['metadata'] = {} # if processed_op.pod_annotations: # template['metadata']['annotations'] = processed_op.pod_annotations # if processed_op.pod_labels: # template['metadata']['labels'] = processed_op.pod_labels # # retries # if processed_op.num_retries: # template['retryStrategy'] = {'limit': processed_op.num_retries} # # timeout # if processed_op.timeout: # template['activeDeadlineSeconds'] = processed_op.timeout # # initContainers # if processed_op.init_containers: # template['initContainers'] = processed_op.init_containers # # sidecars # if processed_op.sidecars: # template['sidecars'] = processed_op.sidecars # # volumes # if processed_op.volumes: # template['volumes'] = [convert_k8s_obj_to_json(volume) for volume in processed_op.volumes] # template['volumes'].sort(key=lambda x: x['name']) # # Display name # if processed_op.display_name: # template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name # if isinstance(op, dsl.ContainerOp) and op._metadata: # import json # template.setdefault('metadata', {}).setdefault('annotations', {})['pipelines.kubeflow.org/component_spec'] = json.dumps(op._metadata.to_dict(), sort_keys=True) return template
def _op_to_template(op: BaseOp, enable_artifacts=False): """Generate template given an operator inherited from BaseOp.""" # initial local variables for tracking volumes and artifacts volume_mount_step_template = [] volume_template = [] mounted_param_paths = [] replaced_param_list = [] artifact_to_result_mapping = {} # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), # but the _outputs_to_json function changes the output names and we must do the same here, # so that the names are the same output_artifact_paths.update( sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [ convert_k8s_obj_to_json( ArtifactLocation.create_artifact_for_s3( op.artifact_location, name=name, path=path, key='runs/$PIPELINERUN/$PODNAME/' + name)) for name, path in output_artifact_paths.items() ] if enable_artifacts else [] # workflow template container = convert_k8s_obj_to_json(processed_op.container) step = {'name': processed_op.name} step.update(container) template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [step] } } elif isinstance(op, dsl.ResourceOp): # no output artifacts output_artifacts = [] # Flatten manifest because it needs to replace Argo variables manifest = yaml.dump(convert_k8s_obj_to_json( processed_op.k8s_resource), default_flow_style=False) argo_var = False if manifest.find('{{workflow.name}}') != -1: # Kubernetes Pod arguments only take $() as environment variables manifest = manifest.replace('{{workflow.name}}', "$(PIPELINERUN)") # Remove yaml quote in order to read bash variables manifest = re.sub('name: \'([^\']+)\'', 'name: \g<1>', manifest) argo_var = True # task template template = _get_resourceOp_template(op, processed_op.name, tekton_api_version, manifest, argo_var=argo_var) # initContainers if processed_op.init_containers: template['spec']['steps'] = _prepend_steps( processed_op.init_containers, template['spec']['steps']) # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if 'parameters' in inputs: if isinstance(processed_op, dsl.ContainerOp): template['spec']['params'] = inputs['parameters'] elif isinstance(op, dsl.ResourceOp): template['spec']['params'].extend(inputs['parameters']) if 'artifacts' in inputs: # The input artifacts in KFP is not pulling from s3, it will always be passed as a raw input. # Visit https://github.com/kubeflow/pipelines/issues/336 for more details on the implementation. copy_inputs_step = _get_base_step('copy-inputs') for artifact in inputs['artifacts']: if 'raw' in artifact: copy_inputs_step['script'] += 'echo -n "%s" > %s\n' % ( artifact['raw']['data'], artifact['path']) mount_path = artifact['path'].rsplit("/", 1)[0] if mount_path not in mounted_param_paths: _add_mount_path(artifact['name'], artifact['path'], mount_path, volume_mount_step_template, volume_template, mounted_param_paths) template['spec']['steps'] = _prepend_steps([copy_inputs_step], template['spec']['steps']) _update_volumes(template, volume_mount_step_template, volume_template) # outputs if isinstance(op, dsl.ContainerOp): op_outputs = processed_op.outputs param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): op_outputs = {} param_outputs = {} outputs_dict = _outputs_to_json(op, op_outputs, param_outputs, output_artifacts) if outputs_dict: copy_results_step = _process_parameters( processed_op, template, outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping, mounted_param_paths) copy_artifacts_step = _process_output_artifacts( outputs_dict, volume_mount_step_template, volume_template, replaced_param_list, artifact_to_result_mapping) if mounted_param_paths: template['spec']['steps'].append(copy_results_step) _update_volumes(template, volume_mount_step_template, volume_template) if copy_artifacts_step: template['spec']['steps'].append(copy_artifacts_step) # metadata if processed_op.pod_annotations or processed_op.pod_labels: template.setdefault( 'metadata', {}) # Tekton change, don't wipe out existing metadata if processed_op.pod_annotations: template['metadata']['annotations'] = processed_op.pod_annotations if processed_op.pod_labels: template['metadata']['labels'] = processed_op.pod_labels # sidecars if processed_op.sidecars: template['spec']['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['spec']['volumes'] = template['spec'].get('volume', []) + [ convert_k8s_obj_to_json(volume) for volume in processed_op.volumes ] template['spec']['volumes'].sort(key=lambda x: x['name']) # Display name if processed_op.display_name: template.setdefault('metadata', {}).setdefault( 'annotations', {} )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name if isinstance(op, dsl.ContainerOp) and op._metadata: template.setdefault('metadata', {}).setdefault( 'annotations', {})['pipelines.kubeflow.org/component_spec'] = json.dumps( op._metadata.to_dict(), sort_keys=True) if isinstance(op, dsl.ContainerOp) and op.execution_options: if op.execution_options.caching_strategy.max_cache_staleness: template.setdefault('metadata', {}).setdefault( 'annotations', {})['pipelines.kubeflow.org/max_cache_staleness'] = str( op.execution_options.caching_strategy.max_cache_staleness) return template
def _op_to_template(op: BaseOp): """Generate template given an operator inherited from BaseOp.""" # NOTE in-place update to BaseOp # replace all PipelineParams with template var strings processed_op = _process_base_ops(op) if isinstance(op, dsl.ContainerOp): # default output artifacts output_artifact_paths = OrderedDict(op.output_artifact_paths) # print(op.output_artifact_paths) # This should have been as easy as output_artifact_paths.update(op.file_outputs), but the _outputs_to_json function changes the output names and we must do the same here, so that the names are the same output_artifact_paths.update( sorted(((param.full_name, processed_op.file_outputs[param.name]) for param in processed_op.outputs.values()), key=lambda x: x[0])) output_artifacts = [ # convert_k8s_obj_to_json( # ArtifactLocation.create_artifact_for_s3( # op.artifact_location, # name=name, # path=path, # key='runs/{{workflow.uid}}/{{pod.name}}/' + name + '.tgz')) # for name, path in output_artifact_paths.items() ] # workflow template container = convert_k8s_obj_to_json(processed_op.container) step = {'name': processed_op.name} step.update(container) template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { 'steps': [step] } } elif isinstance(op, dsl.ResourceOp): # no output artifacts output_artifacts = [] # task template template = { 'apiVersion': tekton_api_version, 'kind': 'Task', 'metadata': { 'name': processed_op.name }, 'spec': { "params": [ { "description": "Action on the resource", "name": "action", "type": "string" }, { "default": "strategic", "description": "Merge strategy when using action patch", "name": "merge-strategy", "type": "string" }, { "description": "Content of the resource to deploy", "name": "manifest", "type": "string" }, { "default": "", "description": "An express to retrieval data from resource.", "name": "output", "type": "string" }, { "default": "", "description": "A label selector express to decide if the action on resource is success.", "name": "success-condition", "type": "string" }, { "default": "", "description": "A label selector express to decide if the action on resource is failure.", "name": "failure-condition", "type": "string" }, { "default": "index.docker.io/fenglixa/kubeclient:v0.0.1", # Todo: The image need to be replaced, once there are official images from tekton "description": "Kubectl wrapper image", "name": "image", "type": "string" }, { "default": "false", "description": "Enable set owner reference for created resource.", "name": "set-ownerreference", "type": "string" } ], 'steps': [{ "args": [ "--action=$(params.action)", "--merge-strategy=$(params.merge-strategy)", "--manifest=$(params.manifest)", "--output=$(params.output)", "--success-condition=$(params.success-condition)", "--failure-condition=$(params.failure-condition)", "--set-ownerreference=$(params.set-ownerreference)" ], "image": "$(params.image)", "name": processed_op.name, "resources": {} }] } } # initContainers if processed_op.init_containers: steps = processed_op.init_containers.copy() steps.extend(template['spec']['steps']) template['spec']['steps'] = steps # inputs input_artifact_paths = processed_op.input_artifact_paths if isinstance( processed_op, dsl.ContainerOp) else None artifact_arguments = processed_op.artifact_arguments if isinstance( processed_op, dsl.ContainerOp) else None inputs = _inputs_to_json(processed_op.inputs, input_artifact_paths, artifact_arguments) if 'parameters' in inputs: if isinstance(processed_op, dsl.ContainerOp): template['spec']['params'] = inputs['parameters'] elif isinstance(op, dsl.ResourceOp): template['spec']['params'].extend(inputs['parameters']) elif 'artifacts' in inputs: raise NotImplementedError("input artifacts are not yet implemented") # outputs if isinstance(op, dsl.ContainerOp): op_outputs = processed_op.outputs param_outputs = processed_op.file_outputs elif isinstance(op, dsl.ResourceOp): op_outputs = {} param_outputs = {} outputs_dict = _outputs_to_json(op, op_outputs, param_outputs, output_artifacts) if outputs_dict: """ Since Tekton results need to be under /tekton/results. If file output paths cannot be configured to /tekton/results, we need to create the below copy step for moving file outputs to the Tekton destination. BusyBox is recommended to be used on small tasks because it's relatively lightweight and small compared to the ubuntu and bash images. - image: busybox name: copy-results script: | #!/bin/sh set -exo pipefail cp $LOCALPATH $(results.data.path); """ template['spec']['results'] = [] copy_results_step = { 'image': 'busybox', 'name': 'copy-results', 'script': '#!/bin/sh\nset -exo pipefail\n' } volume_mount_step_template = [] volume_template = [] mounted_paths = [] for name, path in param_outputs.items(): name = name.replace( '_', '-' ) # replace '_' to '-' since tekton results doesn't support underscore template['spec']['results'].append({ 'name': name, 'description': path }) # replace all occurrences of the output file path with the Tekton output parameter expression need_copy_step = True for s in template['spec']['steps']: if 'command' in s: commands = [] for c in s['command']: if path in c: c = c.replace(path, '$(results.%s.path)' % name) need_copy_step = False commands.append(c) s['command'] = commands if 'args' in s: args = [] for a in s['args']: if path in a: a = a.replace(path, '$(results.%s.path)' % name) need_copy_step = False args.append(a) s['args'] = args # If file output path cannot be found/replaced, use emptyDir to copy it to the tekton/results path if need_copy_step: copy_results_step['script'] = copy_results_step[ 'script'] + 'cp ' + path + ' $(results.%s.path);' % name + '\n' mountPath = path.rsplit("/", 1)[0] if mountPath not in mounted_paths: volume_mount_step_template.append({ 'name': name, 'mountPath': path.rsplit("/", 1)[0] }) volume_template.append({'name': name, 'emptyDir': {}}) mounted_paths.append(mountPath) if mounted_paths: copy_results_step['script'] = literal_str( copy_results_step['script']) template['spec']['steps'].append(copy_results_step) template['spec']['stepTemplate'] = {} template['spec']['stepTemplate'][ 'volumeMounts'] = volume_mount_step_template template['spec']['volumes'] = volume_template # ********************************************************** # NOTE: the following features are still under development # ********************************************************** # metadata if processed_op.pod_annotations or processed_op.pod_labels: template.setdefault( 'metadata', {}) # Tekton change, don't wipe out existing metadata if processed_op.pod_annotations: template['metadata']['annotations'] = processed_op.pod_annotations if processed_op.pod_labels: template['metadata']['labels'] = processed_op.pod_labels # sidecars if processed_op.sidecars: template['spec']['sidecars'] = processed_op.sidecars # volumes if processed_op.volumes: template['spec']['volumes'] = template['spec'].get('volume', []) + [ convert_k8s_obj_to_json(volume) for volume in processed_op.volumes ] template['spec']['volumes'].sort(key=lambda x: x['name']) # Display name if processed_op.display_name: template.setdefault('metadata', {}).setdefault( 'annotations', {} )['pipelines.kubeflow.org/task_display_name'] = processed_op.display_name if isinstance(op, dsl.ContainerOp) and op._metadata: import json template.setdefault('metadata', {}).setdefault( 'annotations', {})['pipelines.kubeflow.org/component_spec'] = json.dumps( op._metadata.to_dict(), sort_keys=True) return template