def _upload_pipeline_yaml(yaml_file_content: AnyStr, name=None, description=None, labels=None, annotations=None): (fd, filename) = mkstemp(suffix=".yaml") try: with os.fdopen(fd, "wb") as f: f.write(yaml_file_content) if KFP_HOST == "UNAVAILABLE": # inside docker-compose we don't have KFP api_pipeline: ApiPipeline = _store_pipeline(yaml_file_content, name, description) else: api_pipeline: ApiPipeline = upload_pipeline_to_kfp(uploadfile=filename, name=name) if description: update_multiple(ApiPipeline, [api_pipeline.id], "description", description) store_data(ApiPipelineExtension(id=api_pipeline.id)) if annotations: if type(annotations) == str: annotations = json.loads(annotations) update_multiple(ApiPipelineExtension, [api_pipeline.id], "annotations", annotations) api_pipeline_extended, _ = get_pipeline(api_pipeline.id) finally: os.remove(filename) return api_pipeline_extended, 201
def approve_components_for_publishing(component_ids): # noqa: E501 """approve_components_for_publishing :param component_ids: Array of component IDs to be approved for publishing. :type component_ids: List[str] :rtype: None """ update_multiple(ApiComponent, [], "publish_approved", False) if component_ids: update_multiple(ApiComponent, component_ids, "publish_approved", True) return None, 200
def set_featured_components(component_ids): # noqa: E501 """set_featured_components :param component_ids: Array of component IDs to be featured. :type component_ids: List[str] :rtype: None """ update_multiple(ApiComponent, [], "featured", False) if component_ids: update_multiple(ApiComponent, component_ids, "featured", True) return None, 200
def approve_notebooks_for_publishing(notebook_ids): # noqa: E501 """approve_notebooks_for_publishing :param notebook_ids: Array of notebook IDs to be approved for publishing. :type notebook_ids: List[str] :rtype: None """ update_multiple(ApiNotebook, [], "publish_approved", False) if notebook_ids: update_multiple(ApiNotebook, notebook_ids, "publish_approved", True) return None, 200
def set_featured_notebooks(notebook_ids): # noqa: E501 """set_featured_notebooks :param notebook_ids: Array of notebook IDs to be featured. :type notebook_ids: List[str] :rtype: None """ update_multiple(ApiNotebook, [], "featured", False) if notebook_ids: update_multiple(ApiNotebook, notebook_ids, "featured", True) return None, 200
def approve_datasets_for_publishing(dataset_ids): # noqa: E501 """approve_datasets_for_publishing :param dataset_ids: Array of dataset IDs to be approved for publishing. :type dataset_ids: List[str] :rtype: None """ update_multiple(ApiDataset, [], "publish_approved", False) if dataset_ids: update_multiple(ApiDataset, dataset_ids, "publish_approved", True) return None, 200
def set_featured_datasets(dataset_ids): # noqa: E501 """set_featured_datasets :param dataset_ids: Array of dataset IDs to be featured. :type dataset_ids: List[str] :rtype: None """ update_multiple(ApiDataset, [], "featured", False) if dataset_ids: update_multiple(ApiDataset, dataset_ids, "featured", True) return None, 200
def set_featured_models(model_ids): # noqa: E501 """set_featured_models :param model_ids: Array of model IDs to be featured. :type model_ids: List[str] :rtype: None """ update_multiple(ApiModel, [], "featured", False) if model_ids: update_multiple(ApiModel, model_ids, "featured", True) return None, 200
def approve_models_for_publishing(model_ids): # noqa: E501 """approve_models_for_publishing :param model_ids: Array of model IDs to be approved for publishing. :type model_ids: List[str] :rtype: None """ update_multiple(ApiModel, [], "publish_approved", False) if model_ids: update_multiple(ApiModel, model_ids, "publish_approved", True) return None, 200
def approve_pipelines_for_publishing(pipeline_ids): # noqa: E501 """approve_pipelines_for_publishing :param pipeline_ids: Array of pipeline IDs to be approved for publishing. :type pipeline_ids: List[str] :rtype: None """ pipe_exts: [ApiPipelineExtension] = load_data(ApiPipelineExtension) pipe_ext_ids = {p.id for p in pipe_exts} missing_pipe_ext_ids = set(pipeline_ids) - pipe_ext_ids for id in missing_pipe_ext_ids: store_data(ApiPipelineExtension(id=id)) update_multiple(ApiPipelineExtension, [], "publish_approved", False) if pipeline_ids: update_multiple(ApiPipelineExtension, pipeline_ids, "publish_approved", True) return None, 200
def set_featured_pipelines(pipeline_ids): # noqa: E501 """set_featured_pipelines :param pipeline_ids: Array of pipeline IDs to be featured. :type pipeline_ids: List[str] :rtype: None """ pipe_exts: [ApiPipelineExtension] = load_data(ApiPipelineExtension) pipe_ext_ids = {p.id for p in pipe_exts} missing_pipe_ext_ids = set(pipeline_ids) - pipe_ext_ids for id in missing_pipe_ext_ids: store_data(ApiPipelineExtension(id=id)) update_multiple(ApiPipelineExtension, [], "featured", False) if pipeline_ids: update_multiple(ApiPipelineExtension, pipeline_ids, "featured", True) return None, 200
def _upload_pipeline_yaml(yaml_file_content: AnyStr, name=None, description=None, labels=None, annotations=None): (fd, filename) = mkstemp(suffix=".yaml") try: with os.fdopen(fd, "wb") as f: f.write(yaml_file_content) if KFP_HOST == "UNAVAILABLE": # when running inside Docker Compose w/out KFP we store pipelines ourselves api_pipeline: ApiPipeline = _store_pipeline(yaml_file_content, name, description) else: # when deployed on top of KFP, we let KFP store pipelines # KFP does not extract the description, so let's parse that out if not description: yaml_dict = yaml.load(yaml_file_content, Loader=yaml.FullLoader) template_metadata = yaml_dict.get("metadata") or dict() annotations = template_metadata.get("annotations", {}) pipeline_spec = json.loads(annotations.get("pipelines.kubeflow.org/pipeline_spec", "{}")) description = description or pipeline_spec.get("description", "").strip() api_pipeline: ApiPipeline = upload_pipeline_to_kfp(filename, name, description) store_data(ApiPipelineExtension(id=api_pipeline.id)) if annotations: if type(annotations) == str: annotations = json.loads(annotations) update_multiple(ApiPipelineExtension, [api_pipeline.id], "annotations", annotations) api_pipeline_extended, _ = get_pipeline(api_pipeline.id) finally: os.remove(filename) return api_pipeline_extended, 201
def _upload_notebook_yaml(yaml_file_content: AnyStr, name=None, access_token=None, existing_id=None): yaml_dict = yaml.load(yaml_file_content, Loader=yaml.FullLoader) template_metadata = yaml_dict.get("metadata") or dict() notebook_id = existing_id or generate_id(name=name or yaml_dict["name"]) created_at = datetime.now() name = name or yaml_dict["name"] description = yaml_dict["description"].strip() url = yaml_dict["implementation"]["github"]["source"] requirements = yaml_dict["implementation"]["github"].get("requirements") metadata = ApiMetadata(annotations=template_metadata.get("annotations"), labels=template_metadata.get("labels"), tags=template_metadata.get("tags")) notebook_content = _download_notebook( url, enterprise_github_api_token=access_token) # parameters = _extract_notebook_parameters(notebook_content) # TODO: not using Papermill any longer, notebook parameters no longer valid? # kfp-notebook has inputs and outputs ? parameters = dict() api_notebook = ApiNotebook(id=notebook_id, created_at=created_at, name=name, description=description, url=url, metadata=metadata, parameters=parameters) uuid = store_data(api_notebook) api_notebook.id = uuid store_file(bucket_name="mlpipeline", prefix=f"notebooks/{notebook_id}/", file_name="template.yaml", file_content=yaml_file_content) s3_url = store_file(bucket_name="mlpipeline", prefix=f"notebooks/{notebook_id}/", file_name=url.split("/")[-1].split("?")[0], file_content=json.dumps(notebook_content).encode()) if requirements: if _is_url(requirements): requirements_url = requirements requirements_txt = download_file_content_from_url( requirements_url).decode() else: requirements_txt = "\n".join(requirements.split(",")) # TODO: remove this after fixing the Elyra-AI/KFP-Notebook runner so that # Elyra should install its own requirements in addition to the provided requirements requirements_elyra_url = "https://github.com/elyra-ai/kfp-notebook/blob/master/etc/requirements-elyra.txt" requirements_elyra_txt = download_file_content_from_url( requirements_elyra_url).decode() requirements_elyra = "\n".join([ line for line in requirements_elyra_txt.split("\n") if not line.startswith("#") ]) requirements_all = f"# Required packages for {api_notebook.name}:\n" \ f"{requirements_txt}\n" \ f"# Requirements from {requirements_elyra_url}:\n" \ f"{requirements_elyra}" store_file(bucket_name="mlpipeline", prefix=f"notebooks/{notebook_id}/", file_name="requirements.txt", file_content=requirements_all.encode()) # if the url included an access token, replace the original url with the s3 url if "?token=" in url or "github.ibm.com" in url: api_notebook.url = s3_url update_multiple(ApiNotebook, [notebook_id], "url", s3_url) enable_anonymous_read_access(bucket_name="mlpipeline", prefix="notebooks/*") return api_notebook, 201
def _upload_multiple_assets(body: ApiCatalogUpload): # noqa: E501 # TODO: parameterize `publish_all` and `feature_all` flags, maybe? Although # uploading a whole catalog is an admin activity, who most likely wants to # register a curated list of assets that are to be published and featured publish_all = True feature_all = True def get_access_token_for_url(url: str) -> str: for api_access_token in body.api_access_tokens or []: if api_access_token.url_host in url: return api_access_token.api_token return None upload_methods = { "components": upload_component_from_url, "datasets": upload_dataset_from_url, "models": upload_model_from_url, "notebooks": upload_notebook_from_url, "pipelines": upload_pipeline_from_url } api_response = ApiCatalogUploadResponse(components=[], datasets=[], models=[], notebooks=[], pipelines=[], total_created=0, errors=[], total_errors=0) for asset_type, upload_method in upload_methods.items(): for asset in body.__getattribute__(asset_type) or []: try: api_object, status = upload_method( url=asset.url, name=asset.name, access_token=get_access_token_for_url(asset.url)) if 200 <= status < 300: api_response.__getattribute__(asset_type).append( api_object) api_response.total_created += 1 else: # TODO: remove this? api_error = ApiCatalogUploadError( **asset.to_dict(), error_message= f"THIS SHOULD NOT HAPPEN: {str(api_object).strip()}", status_code=500) api_response.errors.append(api_error) print(f"THIS SHOULD NOT HAPPEN: {api_error}") print(traceback.format_exc()) except ApiError as e: api_error = ApiCatalogUploadError( **asset.to_dict(), error_message=e.message, status_code=e.http_status_code) api_response.errors.append(api_error) except Exception as e: api_error = ApiCatalogUploadError(**asset.to_dict(), error_message=str(e), status_code=500) api_response.errors.append(api_error) print(traceback.format_exc()) api_response.total_errors = len(api_response.errors) if publish_all or feature_all: api_classes = { "components": ApiComponent, "datasets": ApiDataset, "models": ApiModel, "notebooks": ApiNotebook, "pipelines": ApiPipelineExtension } for asset_type, api_class in api_classes.items(): asset_list = api_response.__getattribute__(asset_type) asset_ids = [asset.id for asset in asset_list] update_multiple(api_class, asset_ids, "publish_approved", publish_all) update_multiple(api_class, asset_ids, "featured", feature_all) response_status = \ 201 if api_response.total_created > 0 and api_response.total_errors == 0 else \ 207 if api_response.total_created > 0 and api_response.total_errors > 0 else \ max([e.status_code for e in api_response.errors]) return api_response, response_status