def run_notebook(id, run_name=None, parameters: dict = None): # noqa: E501 """run_notebook :param id: :type id: str :param run_name: name to identify the run on the Kubeflow Pipelines UI, defaults to notebook name :type run_name: str :param parameters: optional run parameters, may be required based on pipeline definition :type parameters: dict :rtype: ApiRunCodeResponse """ if not parameters and connexion.request.is_json: parameter_dict = dict(connexion.request.get_json()) # noqa: E501 else: parameter_dict = parameters api_notebook, status_code = get_notebook(id) if status_code > 200: return f"Notebook with id '{id}' does not exist", 404 # # TODO: Elyra kfp-notebook currently does not pass parameters on to papermill # if parameters: # raise ApiError("The 'elyra-ai/kfp-notebook' executor does not support parameters", 422) # parameter_errors, status_code = validate_parameters(api_notebook.parameters, parameter_dict) # # if parameter_errors: # return parameter_errors, status_code try: run_id = run_notebook_in_experiment(notebook=api_notebook, parameters=parameter_dict, run_name=run_name) # expected output notebook based on: # https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L188-L190 notebook_url = get_object_url(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".ipynb"]) # TODO: create a "sandboxed" notebook in a subfolder since Elyra overwrites # the original notebook instead of creating an "-output.ipynb" file: # https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L205 notebook_output_url = notebook_url.replace(".ipynb", "-output.ipynb") # instead return link to the generated output .html for the time being notebook_output_html = notebook_url.replace(".ipynb", ".html") return ApiRunCodeResponse( run_url=f"/runs/details/{run_id}", run_output_location=notebook_output_html), 200 except Exception as e: return f"Error while trying to run notebook {id}: {e}", 500
def generate_notebook_run_script(api_notebook: ApiNotebook, parameters: dict = {}, run_name: str = None, hide_secrets: bool = True): if "dataset_pvc" in parameters: template_file = "run_notebook_with_dataset.TEMPLATE.py" else: template_file = "run_notebook.TEMPLATE.py" with open(join(CODE_TEMPLATE_DIR, template_file), 'r') as f: template_raw = f.read() notebook_file = api_notebook.url.split("/")[-1] requirements_url = get_object_url(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".txt"], file_name_filter="requirements") cos_dependencies_archive_url = get_object_url(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".tar.gz"], file_name_filter="elyra-dependencies-archive") if not cos_dependencies_archive_url: tar, bytes_io = create_tarfile(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".ipynb"]) cos_dependencies_archive_url = store_file(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_name="elyra-dependencies-archive.tar.gz", file_content=bytes_io.getvalue()) cos_dependencies_archive = cos_dependencies_archive_url.split("/")[-1] # TODO: move this into a ApiNotebook.image as opposed to parsing yaml here yaml_file_content = retrieve_file_content(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".yaml", ".yml"]) metadata_yaml = yaml.load(yaml_file_content, Loader=yaml.FullLoader) image = metadata_yaml["implementation"]["github"].get("image", "tensorflow/tensorflow:latest") # TODO: elyra-ai/kfp-notebook generates output notebook as: "-output.ipynb" # https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L188-L190 # so here we may consider renaming the generated file with a datetimestamp # output_folder = f"notebooks/{api_notebook.id}/runs/{datetime.now().strftime('%Y%m%d-%H%M%S')}" # output_file_name = notebook_file_name.replace(r'.ipynb', '-output.ipynb') # output_file_path = f"{output_folder}/{output_file_name}" # output_file_url = f"http://{minio_host}:{minio_port}/mlpipeline/{output_file_path}" kfp_url = f"'{_pipeline_service_url}'" if "POD_NAMESPACE" not in os.environ else "" substitutions = { "name": api_notebook.name, "notebook": notebook_file, "cos_bucket": "mlpipeline", "cos_directory": f"notebooks/{api_notebook.id}/", "cos_dependencies_archive": cos_dependencies_archive, "cos_endpoint": "***", "cos_username": "******", "cos_password": "******", "requirements_url": requirements_url or "", "image": image, "pipeline_server": kfp_url, "run_name": run_name or api_notebook.name } # TODO: make the `dataset_pvc` and `mount_path` parameters part of the Swagger spec? if "dataset_pvc" in parameters: substitutions.update({ "dataset_pvc": parameters["dataset_pvc"], "mount_path": parameters.get("mount_path", "/tmp/data") }) if not hide_secrets: substitutions.update({ "cos_endpoint": f"http://{minio_host}:{minio_port}/minio", "cos_username": minio_access_key, "cos_password": minio_secret_key }) run_script = Template(template_raw).substitute(substitutions) return run_script
def generate_dataset_run_script(dataset: ApiDataset, dataset_template_url, run_parameters=dict(), run_name: str = None, fail_on_missing_prereqs=False): name = f"{dataset.name} ({generate_id(length=4)})" description = dataset.description.strip().replace("'", "\\'") # TODO: some of the parameters, template URLs should move out of here # dataset_parameters = dataset.parameters # TODO: ApiParameters should not be defined here dataset_parameters = [ApiParameter(name="action", default="create"), ApiParameter(name="namespace", default=_namespace)] pipeline_method_args = generate_pipeline_method_args(dataset_parameters) parameter_names = ",".join([p.name for p in dataset_parameters]) # TODO: the action parameter is required by DLF-to-PVC op, so it should not be dynamically generated here parameter_dict = { "action": "create", "namespace": run_parameters.get("namespace", _namespace) } # see component name at https://github.com/machine-learning-exchange/mlx/blob/main/components/component-samples/dax-to-dlf/component.yaml#L1 dax_to_dlf_component_id = generate_id(name="Generate Dataset Metadata") # see component name at https://github.com/machine-learning-exchange/mlx/blob/main/components/component-samples/dlf/component.yaml#L1 dlf_to_pvc_component_id = generate_id(name="Create Dataset Volume") dax_to_dlf_component_url = get_object_url(bucket_name="mlpipeline", prefix=f"components/{dax_to_dlf_component_id}/", file_extensions=[".yaml"]) dlf_to_pvc_component_url = get_object_url(bucket_name="mlpipeline", prefix=f"components/{dlf_to_pvc_component_id}/", file_extensions=[".yaml"]) if fail_on_missing_prereqs: if not dax_to_dlf_component_url: raise ApiError(f"Missing required component '{dax_to_dlf_component_id}'") if not dlf_to_pvc_component_url: raise ApiError(f"Missing required component '{dlf_to_pvc_component_id}'") namespace = run_parameters.get("namespace", _namespace) pipeline_server = "" if "POD_NAMESPACE" in os.environ else f"'{_pipeline_service_url}'" run_name = (run_name or "").replace("'", "\"") or dataset.name substitutions = dict(locals()) template_file = f"run_dataset.TEMPLATE.py" with open(join(CODE_TEMPLATE_DIR, template_file), 'r') as f: template_raw = f.read() template_rendered = Template(template_raw).substitute(substitutions) run_script = autopep8.fix_code(template_rendered, options={"aggressive": 2}) return run_script
def generate_custom_pipeline_function_body(custom_pipeline: ApiPipelineCustom, hide_secrets=True): function_body = """ from kfp import components """ component_template_raw = """ ${comp_name} = components.load_component_from_url('${template_url}') ${op_name} = ${comp_name}(${component_args}) """ op_dependency_template_raw = """ ${op_name}.after(${required_op_name}) """ for task in custom_pipeline.dag.tasks: parameters = [] if task.artifact_type == "notebook": component_s3_prefix = f"components/jupyter/" notebook_url = get_object_url(bucket_name="mlpipeline", prefix=f"notebooks/{task.artifact_id}/", file_extensions=[".ipynb"]) if not notebook_url: raise ApiError(f"Could not find notebook '{task.artifact_id}'") task_parameters = list(task.arguments.parameters) if task.arguments and task.arguments.parameters else [] for p in task_parameters: if type(p.value) == str and p.value.startswith("{{inputs.parameters."): raise ApiError("Referencing '{{inputs.parameters.*}}' is not supported for notebook parameter" f" values: {task.to_dict()}", 422) notebook_parameters = {p.name: p.value or p.default for p in task_parameters} notebook_parameters_str = json.dumps(notebook_parameters) if notebook_parameters else "" jupyter_component_parameters = { "notebook_url": notebook_url, "notebook_params": notebook_parameters_str, "api_token": "", "endpoint_url": "", "bucket_name": "", "object_name": "", "access_key": "", "secret_access_key": "" } if not hide_secrets: output_folder = f"notebooks/{task.artifact_id}/runs/{datetime.now().strftime('%Y%m%d-%H%M%S')}" notebook_file_name = notebook_url.split("/")[-1] output_file_name = notebook_file_name.replace(r'.ipynb', '_out.ipynb') output_file_path = f"{output_folder}/{output_file_name}" output_bucket = "mlpipeline" jupyter_component_parameters.update({ "endpoint_url": "minio-service:9000", # f"{minio_host}:{minio_port}", "bucket_name": output_bucket, "object_name": output_file_path, "access_key": minio_access_key, "secret_access_key": minio_secret_key }) for name, value in jupyter_component_parameters.items(): parameters.append(f"{name} = '{value}'") elif task.artifact_type == "component": component_s3_prefix = f"components/{task.artifact_id}/" # replace parameter values that reference pipeline input parameters {{inputs.parameters.parameter_name}} task_parameters = list(task.arguments.parameters) if task.arguments and task.arguments.parameters else [] missing_parameter_values = [p.name for p in task_parameters if not p.value and not p.default and p.description \ and p.description.title().startswith("Required")] if missing_parameter_values: raise ApiError(f"Missing required task parameters {missing_parameter_values}", 422) for p in task_parameters: if type(p.value) == str and p.value.startswith("{{inputs.parameters."): match = re.match(r"{{inputs.parameters.(?P<pipeline_parameter_name>\w+)}}", p.value) if not match: raise ApiError(f"Cannot match pipeline input.parameter '{p.value}'", 422) pipeline_param_ref = match.groupdict().get("pipeline_parameter_name") parameters.append(f"{p.name} = {pipeline_param_ref}") else: arg = generate_method_arg_from_parameter(p) parameters.append(arg) else: raise ApiError(f"Unknown or unsupported artifact_type '{task.artifact_type}':\n'{task}'", 422) comp_name = "comp_" + re.sub(r"\W+", "_", task.name, flags=re.ASCII).lower() op_name = "op_" + re.sub(r"\W+", "_", task.name, flags=re.ASCII).lower() template_url = get_object_url(bucket_name="mlpipeline", prefix=component_s3_prefix, file_extensions=[".yaml", ".yml"]) if not template_url: raise ApiError(f"Could not find component template '{component_s3_prefix}'") substitutions = { "comp_name": comp_name, "op_name": op_name, "template_url": template_url, "component_args": ", ".join(parameters) } template_rendered = Template(component_template_raw).substitute(substitutions) function_body += template_rendered for task in custom_pipeline.dag.tasks: for required_task_name in task.dependencies or []: substitutions = { "op_name": "op_" + re.sub(r"\W+", "_", task.name, flags=re.ASCII).lower(), "required_op_name": "op_" + re.sub(r"\W+", "_", required_task_name, flags=re.ASCII).lower() } template_rendered = Template(op_dependency_template_raw).substitute(substitutions) function_body += template_rendered return function_body