def _upload_pipeline_yaml(yaml_file_content: AnyStr, name=None, description=None, labels=None, annotations=None): (fd, filename) = mkstemp(suffix=".yaml") try: with os.fdopen(fd, "wb") as f: f.write(yaml_file_content) if KFP_HOST == "UNAVAILABLE": # inside docker-compose we don't have KFP api_pipeline: ApiPipeline = _store_pipeline(yaml_file_content, name, description) else: api_pipeline: ApiPipeline = upload_pipeline_to_kfp(uploadfile=filename, name=name) if description: update_multiple(ApiPipeline, [api_pipeline.id], "description", description) store_data(ApiPipelineExtension(id=api_pipeline.id)) if annotations: if type(annotations) == str: annotations = json.loads(annotations) update_multiple(ApiPipelineExtension, [api_pipeline.id], "annotations", annotations) api_pipeline_extended, _ = get_pipeline(api_pipeline.id) finally: os.remove(filename) return api_pipeline_extended, 201
def create_credential(body): # noqa: E501 """create_credential Creates a credential associated with a pipeline. # noqa: E501 :param body: :type body: dict | bytes :rtype: ApiCredential """ if connexion.request.is_json: body = ApiCredential.from_dict(connexion.request.get_json()) api_credential: ApiCredential = body api_credential.id = api_credential.id or f"{secret_name_prefix}-{generate_id(length=16)}".lower( ) api_credential.created_at = datetime.now() error = store_data(api_credential) if error: return error, 400 # TODO: do we need to generate some token or return something generated by K8s? secret = create_secret( api_credential.id, { key: value for key, value in api_credential.to_dict().items() if key not in ["id", "created_at"] }) # TODO: remove credential if kubernetes secret was not created return api_credential, 200 # TODO: return 201
def _store_pipeline(yaml_file_content: AnyStr, name=None, description=None): yaml_dict = yaml.load(yaml_file_content, Loader=yaml.FullLoader) template_metadata = yaml_dict.get("metadata") or dict() annotations = template_metadata.get("annotations", {}) pipeline_spec = json.loads(annotations.get("pipelines.kubeflow.org/pipeline_spec", "{}")) name = name or template_metadata["name"] description = pipeline_spec.get("description", "").strip() namespace = pipeline_spec.get("namespace", "").strip() pipeline_id = "-".join([generate_id(length=l) for l in [8, 4, 4, 4, 12]]) created_at = datetime.now() parameters = [ApiParameter(name=p.get("name"), description=p.get("description"), default=p.get("default"), value=p.get("value")) for p in yaml_dict["spec"].get("params", {})] api_pipeline = ApiPipeline(id=pipeline_id, created_at=created_at, name=name, description=description, parameters=parameters, namespace=namespace) uuid = store_data(api_pipeline) api_pipeline.id = uuid store_file(bucket_name="mlpipeline", prefix=f"pipelines/", file_name=f"{pipeline_id}", file_content=yaml_file_content) enable_anonymous_read_access(bucket_name="mlpipeline", prefix="pipelines/*") return api_pipeline
def approve_pipelines_for_publishing(pipeline_ids): # noqa: E501 """approve_pipelines_for_publishing :param pipeline_ids: Array of pipeline IDs to be approved for publishing. :type pipeline_ids: List[str] :rtype: None """ pipe_exts: [ApiPipelineExtension] = load_data(ApiPipelineExtension) pipe_ext_ids = {p.id for p in pipe_exts} missing_pipe_ext_ids = set(pipeline_ids) - pipe_ext_ids for id in missing_pipe_ext_ids: store_data(ApiPipelineExtension(id=id)) update_multiple(ApiPipelineExtension, [], "publish_approved", False) if pipeline_ids: update_multiple(ApiPipelineExtension, pipeline_ids, "publish_approved", True) return None, 200
def set_featured_pipelines(pipeline_ids): # noqa: E501 """set_featured_pipelines :param pipeline_ids: Array of pipeline IDs to be featured. :type pipeline_ids: List[str] :rtype: None """ pipe_exts: [ApiPipelineExtension] = load_data(ApiPipelineExtension) pipe_ext_ids = {p.id for p in pipe_exts} missing_pipe_ext_ids = set(pipeline_ids) - pipe_ext_ids for id in missing_pipe_ext_ids: store_data(ApiPipelineExtension(id=id)) update_multiple(ApiPipelineExtension, [], "featured", False) if pipeline_ids: update_multiple(ApiPipelineExtension, pipeline_ids, "featured", True) return None, 200
def _upload_model_yaml(yaml_file_content: AnyStr, name=None, existing_id=None): model_def = yaml.load(yaml_file_content, Loader=yaml.FullLoader) api_model = ApiModel( id=existing_id or model_def.get("model_identifier") or generate_id(name=name or model_def["name"]), created_at=datetime.now(), name=name or model_def["name"], description=model_def["description"].strip(), domain=model_def.get("domain") or "", labels=model_def.get("labels") or dict(), framework=model_def["framework"], filter_categories=model_def.get("filter_categories") or dict(), trainable=model_def.get("train", {}).get("trainable") or False, trainable_tested_platforms=model_def.get( "train", {}).get("tested_platforms") or [], trainable_credentials_required=model_def.get( "train", {}).get("credentials_required") or False, trainable_parameters=model_def.get("train", {}).get("input_params") or [], servable=model_def.get("serve", {}).get("servable") or False, servable_tested_platforms=model_def.get( "serve", {}).get("tested_platforms") or [], servable_credentials_required=model_def.get( "serve", {}).get("credentials_required") or False, servable_parameters=model_def.get("serve", {}).get("input_params") or []) # convert comma-separate strings to lists if type(api_model.trainable_tested_platforms) == str: api_model.trainable_tested_platforms = api_model.trainable_tested_platforms.replace( ", ", ",").split(",") if type(api_model.servable_tested_platforms) == str: api_model.servable_tested_platforms = api_model.servable_tested_platforms.replace( ", ", ",").split(",") uuid = store_data(api_model) api_model.id = uuid store_file(bucket_name="mlpipeline", prefix=f"models/{api_model.id}/", file_name="template.yaml", file_content=yaml_file_content, content_type="text/yaml") enable_anonymous_read_access(bucket_name="mlpipeline", prefix="models/*") return api_model, 201
def _upload_pipeline_yaml(yaml_file_content: AnyStr, name=None, description=None, labels=None, annotations=None): (fd, filename) = mkstemp(suffix=".yaml") try: with os.fdopen(fd, "wb") as f: f.write(yaml_file_content) if KFP_HOST == "UNAVAILABLE": # when running inside Docker Compose w/out KFP we store pipelines ourselves api_pipeline: ApiPipeline = _store_pipeline(yaml_file_content, name, description) else: # when deployed on top of KFP, we let KFP store pipelines # KFP does not extract the description, so let's parse that out if not description: yaml_dict = yaml.load(yaml_file_content, Loader=yaml.FullLoader) template_metadata = yaml_dict.get("metadata") or dict() annotations = template_metadata.get("annotations", {}) pipeline_spec = json.loads(annotations.get("pipelines.kubeflow.org/pipeline_spec", "{}")) description = description or pipeline_spec.get("description", "").strip() api_pipeline: ApiPipeline = upload_pipeline_to_kfp(filename, name, description) store_data(ApiPipelineExtension(id=api_pipeline.id)) if annotations: if type(annotations) == str: annotations = json.loads(annotations) update_multiple(ApiPipelineExtension, [api_pipeline.id], "annotations", annotations) api_pipeline_extended, _ = get_pipeline(api_pipeline.id) finally: os.remove(filename) return api_pipeline_extended, 201
def _upload_component_yaml(yaml_file_content: AnyStr, name=None, existing_id=None): yaml_dict = yaml.load(yaml_file_content, Loader=yaml.FullLoader) template_metadata = yaml_dict.get("metadata") or dict() component_id = existing_id or generate_id(name=name or yaml_dict["name"]) created_at = datetime.now() name = name or yaml_dict["name"] description = (yaml_dict.get("description") or name).strip()[:255] filter_categories = yaml_dict.get("filter_categories") or dict() metadata = ApiMetadata(annotations=template_metadata.get("annotations"), labels=template_metadata.get("labels"), tags=template_metadata.get("tags")) parameters = [ ApiParameter(name=p.get("name"), description=p.get("description"), default=p.get("default"), value=p.get("value")) for p in yaml_dict.get("inputs", []) ] api_component = ApiComponent(id=component_id, created_at=created_at, name=name, description=description, metadata=metadata, parameters=parameters, filter_categories=filter_categories) uuid = store_data(api_component) api_component.id = uuid store_file(bucket_name="mlpipeline", prefix=f"components/{component_id}/", file_name="template.yaml", file_content=yaml_file_content, content_type="text/yaml") enable_anonymous_read_access(bucket_name="mlpipeline", prefix="components/*") return api_component, 201
def create_dataset(body): # noqa: E501 """create_dataset :param body: :type body: dict | bytes :rtype: ApiDataset """ if connexion.request.is_json: body = ApiDataset.from_dict(connexion.request.get_json()) # noqa: E501 api_dataset = body error = store_data(api_dataset) if error: return error, 400 return api_dataset, 200 # TODO: return 201
def create_model(body): # noqa: E501 """create_model :param body: :type body: dict | bytes :rtype: ApiModel """ if connexion.request.is_json: body = ApiModel.from_dict(connexion.request.get_json()) # noqa: E501 api_model = body error = store_data(api_model) if error: return error, 400 return api_model, 200 # TODO: return 201
def create_notebook(body): # noqa: E501 """create_notebook :param body: :type body: dict | bytes :rtype: ApiNotebook """ if connexion.request.is_json: body = ApiNotebook.from_dict( connexion.request.get_json()) # noqa: E501 api_notebook = body error = store_data(api_notebook) if error: return error, 400 return api_notebook, 200 # TODO: return 201
def _upload_notebook_yaml(yaml_file_content: AnyStr, name=None, access_token=None, existing_id=None): yaml_dict = yaml.load(yaml_file_content, Loader=yaml.FullLoader) template_metadata = yaml_dict.get("metadata") or dict() notebook_id = existing_id or generate_id(name=name or yaml_dict["name"]) created_at = datetime.now() name = name or yaml_dict["name"] description = yaml_dict["description"].strip() url = yaml_dict["implementation"]["github"]["source"] requirements = yaml_dict["implementation"]["github"].get("requirements") metadata = ApiMetadata(annotations=template_metadata.get("annotations"), labels=template_metadata.get("labels"), tags=template_metadata.get("tags")) notebook_content = _download_notebook( url, enterprise_github_api_token=access_token) # parameters = _extract_notebook_parameters(notebook_content) # TODO: not using Papermill any longer, notebook parameters no longer valid? # kfp-notebook has inputs and outputs ? parameters = dict() api_notebook = ApiNotebook(id=notebook_id, created_at=created_at, name=name, description=description, url=url, metadata=metadata, parameters=parameters) uuid = store_data(api_notebook) api_notebook.id = uuid store_file(bucket_name="mlpipeline", prefix=f"notebooks/{notebook_id}/", file_name="template.yaml", file_content=yaml_file_content) s3_url = store_file(bucket_name="mlpipeline", prefix=f"notebooks/{notebook_id}/", file_name=url.split("/")[-1].split("?")[0], file_content=json.dumps(notebook_content).encode()) if requirements: if _is_url(requirements): requirements_url = requirements requirements_txt = download_file_content_from_url( requirements_url).decode() else: requirements_txt = "\n".join(requirements.split(",")) # TODO: remove this after fixing the Elyra-AI/KFP-Notebook runner so that # Elyra should install its own requirements in addition to the provided requirements requirements_elyra_url = "https://github.com/elyra-ai/kfp-notebook/blob/master/etc/requirements-elyra.txt" requirements_elyra_txt = download_file_content_from_url( requirements_elyra_url).decode() requirements_elyra = "\n".join([ line for line in requirements_elyra_txt.split("\n") if not line.startswith("#") ]) requirements_all = f"# Required packages for {api_notebook.name}:\n" \ f"{requirements_txt}\n" \ f"# Requirements from {requirements_elyra_url}:\n" \ f"{requirements_elyra}" store_file(bucket_name="mlpipeline", prefix=f"notebooks/{notebook_id}/", file_name="requirements.txt", file_content=requirements_all.encode()) # if the url included an access token, replace the original url with the s3 url if "?token=" in url or "github.ibm.com" in url: api_notebook.url = s3_url update_multiple(ApiNotebook, [notebook_id], "url", s3_url) enable_anonymous_read_access(bucket_name="mlpipeline", prefix="notebooks/*") return api_notebook, 201
def _upload_dataset_yaml(yaml_file_content: AnyStr, name=None, existing_id=None): yaml_dict = yaml.load(yaml_file_content, Loader=yaml.FullLoader) name = name or yaml_dict["name"] description = yaml_dict["description"] dataset_id = existing_id or generate_id(name=yaml_dict.get("id", name)) created_at = datetime.now() # if yaml_dict.get("id") != dataset_id: # raise ValueError(f"Dataset.id contains non k8s character: {yaml_dict.get('id')}") # TODO: re-evaluate if we should use dataset update time as our MLX "created_at" time if "updated" in yaml_dict: created_at = datetime.strptime(str(yaml_dict["updated"]), "%Y-%m-%d") elif "created" in yaml_dict: created_at = datetime.strptime(str(yaml_dict["created"]), "%Y-%m-%d") license_name = yaml_dict["license"]["name"] domain = yaml_dict["domain"] format_type = yaml_dict["format"][0]["type"] size = yaml_dict["content"][0].get("size") version = yaml_dict["version"] # # extract number of records and convert thousand separators based on Locale # num_records_str = yaml_dict["statistics"]["number_of_records"] # num_records_number_str = num_records_str.split()[0]. \ # replace("~", ""). \ # replace("+", ""). \ # replace("k", "000"). \ # replace(",", "") # assumes thousand separators in locale.en_US.UTF-8 # # locale.setlocale(locale.LC_ALL, 'en_US.UTF-8') # setting locale does not work reliably in Docker # # number_of_records = locale.atoi(num_records_number_str) # number_of_records = int(num_records_number_str) number_of_records = yaml_dict["content"][0].get("records", 0) related_assets = [ a["application"].get("asset_id") for a in yaml_dict.get("related_assets", []) if "MLX" in a.get("application", {}).get("name", "") and "asset_id" in a.get("application", {}) ] template_metadata = yaml_dict.get("metadata") or dict() metadata = ApiMetadata(annotations=template_metadata.get("annotations"), labels=template_metadata.get("labels"), tags=template_metadata.get("tags") or yaml_dict.get("seo_tags")) # TODO: add "version" to ApiDataset api_dataset = ApiDataset(id=dataset_id, created_at=created_at, name=name, description=description, domain=domain, format=format_type, size=size, number_of_records=number_of_records, license=license_name, metadata=metadata, related_assets=related_assets) uuid = store_data(api_dataset) api_dataset.id = uuid store_file(bucket_name="mlpipeline", prefix=f"datasets/{api_dataset.id}/", file_name="template.yaml", file_content=yaml_file_content) enable_anonymous_read_access(bucket_name="mlpipeline", prefix="datasets/*") return api_dataset, 201