def download_pipeline_files(id): # noqa: E501 """Returns the pipeline YAML compressed into a .tgz (.tar.gz) file. :param id: :type id: str :rtype: file """ tar, bytes_io = create_tarfile(bucket_name="mlpipeline", prefix=f"pipelines/{id}", file_extensions=[""], keep_open=False) if len(tar.members) == 0: return f"Could not find pipeline with id '{id}'", 404 return bytes_io.getvalue(), 200, {"Content-Disposition": f"attachment; filename={id}.tgz"}
def download_model_files(id, include_generated_code=None): # noqa: E501 """ Returns the model artifacts compressed into a .tgz (.tar.gz) file. :param id: :type id: str :param include_generated_code: Include generated run scripts in download :type include_generated_code: bool :rtype: file | binary """ tar, bytes_io = create_tarfile( bucket_name="mlpipeline", prefix=f"models/{id}/", file_extensions=[".yaml", ".yml", ".py", ".md"], keep_open=include_generated_code) if len(tar.members) == 0: return f"Could not find model with id '{id}'", 404 if include_generated_code: generate_code_response: ApiGenerateModelCodeResponse = generate_model_code( id)[0] for s in generate_code_response.scripts: file_name = f"run_{s.pipeline_stage}_{s.execution_platform}.py" if file_name in tar.getnames(): file_name = file_name.replace(".py", "_generated.py") file_content = s.script_code file_size = len(file_content) file_obj = BytesIO(file_content.encode('utf-8')) tarinfo = tarfile.TarInfo(name=file_name) tarinfo.size = file_size tar.addfile(tarinfo, file_obj) tar.close() return bytes_io.getvalue(), 200, { "Content-Disposition": f"attachment; filename={id}.tgz" }
def download_dataset_files(id, include_generated_code=None): # noqa: E501 """ Returns the dataset artifacts compressed into a .tgz (.tar.gz) file. :param id: :type id: str :param include_generated_code: Include generated run script in download :type include_generated_code: bool :rtype: file | binary """ tar, bytes_io = create_tarfile( bucket_name="mlpipeline", prefix=f"datasets/{id}/", file_extensions=[".yaml", ".yml", ".py", ".md"], keep_open=include_generated_code) if len(tar.members) == 0: return f"Could not find dataset with id '{id}'", 404 if include_generated_code: generate_code_response, api_status = generate_dataset_code(id) if api_status == 200: file_content = generate_code_response.script file_name = f"run_dataset.py" if file_name in tar.getnames(): file_name = file_name.replace(".py", "_generated.py") tarinfo = tarfile.TarInfo(name=file_name) tarinfo.size = len(file_content) file_obj = BytesIO(file_content.encode('utf-8')) tar.addfile(tarinfo, file_obj) tar.close() return bytes_io.getvalue(), 200, { "Content-Disposition": f"attachment; filename={id}.tgz" }
def generate_notebook_run_script(api_notebook: ApiNotebook, parameters: dict = {}, run_name: str = None, hide_secrets: bool = True): if "dataset_pvc" in parameters: template_file = "run_notebook_with_dataset.TEMPLATE.py" else: template_file = "run_notebook.TEMPLATE.py" with open(join(CODE_TEMPLATE_DIR, template_file), 'r') as f: template_raw = f.read() notebook_file = api_notebook.url.split("/")[-1] requirements_url = get_object_url(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".txt"], file_name_filter="requirements") cos_dependencies_archive_url = get_object_url(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".tar.gz"], file_name_filter="elyra-dependencies-archive") if not cos_dependencies_archive_url: tar, bytes_io = create_tarfile(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".ipynb"]) cos_dependencies_archive_url = store_file(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_name="elyra-dependencies-archive.tar.gz", file_content=bytes_io.getvalue()) cos_dependencies_archive = cos_dependencies_archive_url.split("/")[-1] # TODO: move this into a ApiNotebook.image as opposed to parsing yaml here yaml_file_content = retrieve_file_content(bucket_name="mlpipeline", prefix=f"notebooks/{api_notebook.id}/", file_extensions=[".yaml", ".yml"]) metadata_yaml = yaml.load(yaml_file_content, Loader=yaml.FullLoader) image = metadata_yaml["implementation"]["github"].get("image", "tensorflow/tensorflow:latest") # TODO: elyra-ai/kfp-notebook generates output notebook as: "-output.ipynb" # https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L188-L190 # so here we may consider renaming the generated file with a datetimestamp # output_folder = f"notebooks/{api_notebook.id}/runs/{datetime.now().strftime('%Y%m%d-%H%M%S')}" # output_file_name = notebook_file_name.replace(r'.ipynb', '-output.ipynb') # output_file_path = f"{output_folder}/{output_file_name}" # output_file_url = f"http://{minio_host}:{minio_port}/mlpipeline/{output_file_path}" kfp_url = f"'{_pipeline_service_url}'" if "POD_NAMESPACE" not in os.environ else "" substitutions = { "name": api_notebook.name, "notebook": notebook_file, "cos_bucket": "mlpipeline", "cos_directory": f"notebooks/{api_notebook.id}/", "cos_dependencies_archive": cos_dependencies_archive, "cos_endpoint": "***", "cos_username": "******", "cos_password": "******", "requirements_url": requirements_url or "", "image": image, "pipeline_server": kfp_url, "run_name": run_name or api_notebook.name } # TODO: make the `dataset_pvc` and `mount_path` parameters part of the Swagger spec? if "dataset_pvc" in parameters: substitutions.update({ "dataset_pvc": parameters["dataset_pvc"], "mount_path": parameters.get("mount_path", "/tmp/data") }) if not hide_secrets: substitutions.update({ "cos_endpoint": f"http://{minio_host}:{minio_port}/minio", "cos_username": minio_access_key, "cos_password": minio_secret_key }) run_script = Template(template_raw).substitute(substitutions) return run_script