def _submit_pipeline(request, data, namespace, experiment_name, run_name): arguments = {} arguments_data = request.headers.get("pipeline-arguments") if arguments_data: arguments = ast.literal_eval(arguments_data) logger.info("pipeline arguments {}".format(arguments_data)) ctype = request.headers.get("content-type", "") if "/yaml" in ctype: ctype = ".yaml" elif " /zip" in ctype: ctype = ".zip" else: log_and_raise(HTTPStatus.BAD_REQUEST, reason="unsupported pipeline type {}".format(ctype)) logger.info("writing file {}".format(ctype)) print(str(data)) pipe_tmp = tempfile.mktemp(suffix=ctype) with open(pipe_tmp, "wb") as fp: fp.write(data) run = None try: client = kfclient(namespace=namespace) experiment = client.create_experiment(name=experiment_name) run = client.run_pipeline(experiment.id, run_name, pipe_tmp, params=arguments) except Exception as e: remove(pipe_tmp) log_and_raise(HTTPStatus.BAD_REQUEST, reason="kfp err: {}".format(e)) remove(pipe_tmp) return run
async def store_artifact( request: Request, project: str, uid: str, key: str, tag: str = "", iter: int = 0, db_session: Session = Depends(deps.get_db_session), ): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") logger.debug("Storing artifact", data=data) await run_in_threadpool( get_db().store_artifact, db_session, key, data, uid, iter=iter, tag=tag, project=project, ) return {}
async def store_run( request: Request, project: str, uid: str, iter: int = 0, auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier), db_session: Session = Depends(deps.get_db_session), ): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") logger.info("Storing run", data=data) await run_in_threadpool( get_db().store_run, db_session, data, uid, project, iter=iter, leader_session=auth_verifier.auth_info.session, ) return {}
async def build_function(request: Request, db_session: Session = Depends(deps.get_db_session)): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") logger.info(f"build_function:\n{data}") function = data.get("function") with_mlrun = strtobool(data.get("with_mlrun", "on")) skip_deployed = data.get("skip_deployed", False) mlrun_version_specifier = data.get("mlrun_version_specifier") fn, ready = await run_in_threadpool( _build_function, db_session, function, with_mlrun, skip_deployed, mlrun_version_specifier, ) return { "data": fn.to_dict(), "ready": ready, }
def get_logs( self, db_session: Session, project: str, uid: str, size: int = -1, offset: int = 0, source: LogSources = LogSources.AUTO, ) -> typing.Tuple[str, bytes]: """ :return: Tuple with: 1. str of the run state (so watchers will know whether to continue polling for logs) 2. bytes of the logs themselves """ project = project or mlrun.mlconf.default_project out = b"" log_file = log_path(project, uid) data = get_db().read_run(db_session, uid, project) if not data: log_and_raise(HTTPStatus.NOT_FOUND.value, project=project, uid=uid) run_state = data.get("status", {}).get("state", "") if log_file.exists() and source in [LogSources.AUTO, LogSources.PERSISTENCY]: with log_file.open("rb") as fp: fp.seek(offset) out = fp.read(size) elif source in [LogSources.AUTO, LogSources.K8S]: if get_k8s(): pods = get_k8s().get_logger_pods(project, uid) if pods: pod, pod_phase = list(pods.items())[0] if pod_phase != PodPhases.pending: resp = get_k8s().logs(pod) if resp: out = resp.encode()[offset:] return run_state, out
async def start_function( request: Request, background_tasks: BackgroundTasks, db_session: Session = Depends(deps.get_db_session), ): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") logger.info("Got request to start function", body=data) function = await run_in_threadpool(_parse_start_function_body, db_session, data) background_task = await run_in_threadpool( mlrun.api.utils.background_tasks.Handler().create_background_task, db_session, function.metadata.project, background_tasks, _start_function, function, ) return background_task
def get_filestat(request: Request, schema: str = "", path: str = "", user: str = ""): _, filename = path.split(path) path = get_obj_path(schema, path, user=user) if not path: log_and_raise( HTTPStatus.NOT_FOUND.value, path=path, err="illegal path prefix or schema" ) logger.debug("Got get filestat request", path=path) secrets = get_secrets(request) stat = None try: stores = store_manager.set(secrets) stat = stores.object(url=path).stat() except FileNotFoundError as exc: log_and_raise(HTTPStatus.NOT_FOUND.value, path=path, err=str(exc)) ctype, _ = mimetypes.guess_type(path) if not ctype: ctype = "application/octet-stream" return { "size": stat.size, "modified": stat.modified, "mimetype": ctype, }
def _start_function(function, auth_info: mlrun.api.schemas.AuthInfo): db_session = mlrun.api.db.session.create_session() try: resource = runtime_resources_map.get(function.kind) if "start" not in resource: log_and_raise( HTTPStatus.BAD_REQUEST.value, reason="runtime error: 'start' not supported by this runtime", ) try: run_db = get_run_db_instance(db_session) function.set_db_connection(run_db) mlrun.api.api.utils.ensure_function_has_auth_set( function, auth_info) mlrun.api.api.utils.process_function_service_account(function) # resp = resource["start"](fn) # TODO: handle resp? resource["start"](function) function.save(versioned=False) logger.info("Fn:\n %s", function.to_yaml()) except Exception as err: logger.error(traceback.format_exc()) log_and_raise(HTTPStatus.BAD_REQUEST.value, reason=f"runtime error: {err}") finally: mlrun.api.db.session.close_session(db_session)
async def store_function( request: Request, project: str, name: str, tag: str = "", versioned: bool = False, db_session: Session = Depends(deps.get_db_session), ): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST, reason="bad JSON body") logger.debug(data) logger.info("store function: project=%s, name=%s, tag=%s", project, name, tag) hash_key = await run_in_threadpool( get_db().store_function, db_session, data, name, project, tag=tag, versioned=versioned, ) return { 'hash_key': hash_key, }
async def update_run( request: Request, project: str, uid: str, iter: int = 0, auth_info: mlrun.api.schemas.AuthInfo = Depends( deps.authenticate_request), db_session: Session = Depends(deps.get_db_session), ): await run_in_threadpool( mlrun.api.utils.auth.verifier.AuthVerifier(). query_project_resource_permissions, mlrun.api.schemas.AuthorizationResourceTypes.run, project, uid, mlrun.api.schemas.AuthorizationAction.update, auth_info, ) data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") await run_in_threadpool( mlrun.api.crud.Runs().update_run, db_session, project, uid, iter, data, ) return {}
def _build_function( db_session, auth_info: mlrun.api.schemas.AuthInfo, function, with_mlrun, skip_deployed, mlrun_version_specifier, ): fn = None ready = None try: fn = new_function(runtime=function) run_db = get_run_db_instance(db_session, auth_info.session) fn.set_db_connection(run_db) fn.save(versioned=False) if fn.kind in RuntimeKinds.nuclio_runtimes(): mlrun.api.api.utils.ensure_function_has_auth_set(fn, auth_info) deploy_nuclio_function(fn) # deploy only start the process, the get status API is used to check readiness ready = False else: ready = build_runtime(fn, with_mlrun, mlrun_version_specifier, skip_deployed) fn.save(versioned=True) logger.info("Fn:\n %s", fn.to_yaml()) except Exception as err: logger.error(traceback.format_exc()) log_and_raise(HTTPStatus.BAD_REQUEST.value, reason=f"runtime error: {err}") return fn, ready
def get_filestat(request: Request, schema: str = "", path: str = "", user: str = ""): _, filename = path.split(path) path = get_obj_path(schema, path, user=user) if not path: log_and_raise(HTTPStatus.NOT_FOUND, path=path, err="illegal path prefix or schema") secrets = get_secrets(request) stat = None try: stat = get_object_stat(path, secrets) except FileNotFoundError as e: log_and_raise(HTTPStatus.NOT_FOUND, path=path, err=str(e)) ctype, _ = mimetypes.guess_type(path) if not ctype: ctype = "application/octet-stream" return { "size": stat.size, "modified": stat.modified, "mimetype": ctype, }
def ingest_feature_set( request: Request, project: str, name: str, reference: str, ingest_parameters: Optional[ schemas.FeatureSetIngestInput ] = schemas.FeatureSetIngestInput(), username: str = Header(None, alias="x-remote-user"), auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier), db_session: Session = Depends(deps.get_db_session), ): tag, uid = parse_reference(reference) feature_set_record = get_db().get_feature_set(db_session, project, name, tag, uid) feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict()) # Need to override the default rundb since we're in the server. feature_set._override_run_db(db_session, auth_verifier.auth_info.session) data_source = data_targets = None if ingest_parameters.source: data_source = DataSource.from_dict(ingest_parameters.source.dict()) if ingest_parameters.targets: data_targets = [ DataTargetBase.from_dict(data_target.dict()) for data_target in ingest_parameters.targets ] run_config = RunConfig() # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are # user-context parameters, we cannot use the api context. if _has_v3io_path(data_source, data_targets, feature_set): secrets = get_secrets(request) access_key = secrets.get("V3IO_ACCESS_KEY", None) if not access_key or not username: log_and_raise( HTTPStatus.BAD_REQUEST.value, reason="Request needs v3io access key and username in header", ) run_config = run_config.apply(v3io_cred(access_key=access_key, user=username)) infer_options = ingest_parameters.infer_options or InferOptions.default() run_params = ingest( feature_set, data_source, data_targets, infer_options=infer_options, return_df=False, run_config=run_config, ) # ingest may modify the feature-set contents, so returning the updated feature-set. result_feature_set = schemas.FeatureSet(**feature_set.to_dict()) return schemas.FeatureSetIngestOutput( feature_set=result_feature_set, run_object=run_params.to_dict() )
def build_status(name: str = "", project: str = "", tag: str = "", offset: int = 0, logs: str = "on", db_session: Session = Depends(deps.get_db_session)): logs = strtobool(logs) fn = get_db().get_function(db_session, name, project, tag) if not fn: log_and_raise(HTTPStatus.NOT_FOUND, name=name, project=project, tag=tag) state = get_in(fn, "status.state", "") pod = get_in(fn, "status.build_pod", "") image = get_in(fn, "spec.build.image", "") out = b"" if not pod: if state == "ready": image = image or get_in(fn, "spec.image") return Response(content=out, media_type="text/plain", headers={ "function_status": state, "function_image": image, "builder_pod": pod }) logger.info("get pod {} status".format(pod)) state = get_k8s().get_pod_status(pod) logger.info("pod state={}".format(state)) if state == "succeeded": logger.info("build completed successfully") state = "ready" if state in ["failed", "error"]: logger.error("build {}, watch the build pod logs: {}".format( state, pod)) if logs and state != "pending": resp = get_k8s().logs(pod) if resp: out = resp.encode()[offset:] update_in(fn, "status.state", state) if state == "ready": update_in(fn, "spec.image", image) get_db().store_function(db_session, fn, name, project, tag) return Response(content=out, media_type="text/plain", headers={ "function_status": state, "function_image": image, "builder_pod": pod })
def get_log( db_session: Session, project: str, uid: str, size: int = -1, offset: int = 0, source: LogSources = LogSources.AUTO, ): out = b"" log_file = log_path(project, uid) status = None if log_file.exists() and source in [ LogSources.AUTO, LogSources.PERSISTENCY ]: with log_file.open("rb") as fp: fp.seek(offset) out = fp.read(size) status = "" elif source in [LogSources.AUTO, LogSources.K8S]: data = get_db().read_run(db_session, uid, project) if not data: log_and_raise(HTTPStatus.NOT_FOUND, project=project, uid=uid) status = get_in(data, "status.state", "") if get_k8s(): pods = get_k8s().get_logger_pods(uid) if pods: pod, new_status = list(pods.items())[0] new_status = new_status.lower() # TODO: handle in cron/tracking if new_status != "pending": resp = get_k8s().logs(pod) if resp: out = resp.encode()[offset:] if status == "running": now = now_date().isoformat() update_in(data, "status.last_update", now) if new_status == "failed": update_in(data, "status.state", "error") update_in(data, "status.error", "error, check logs") get_db().store_run(db_session, data, uid, project) if new_status == "succeeded": update_in(data, "status.state", "completed") get_db().store_run(db_session, data, uid, project) status = new_status elif status == "running": update_in(data, "status.state", "error") update_in(data, "status.error", "pod not found, maybe terminated") get_db().store_run(db_session, data, uid, project) status = "failed" return out, status
def get_project(name: str, db_session: Session = Depends(deps.get_db_session)): project = get_db().get_project(db_session, name) if not project: log_and_raise(error=f"project {name!r} not found") project.users = [u.name for u in project.users] return { "project": project, }
def get_runtime(kind: str, label_selector: str = None): if kind not in RuntimeKinds.runtime_with_handlers(): log_and_raise(HTTPStatus.BAD_REQUEST.value, kind=kind, err="Invalid runtime kind") runtime_handler = get_runtime_handler(kind) resources = runtime_handler.list_resources(label_selector) return { "kind": kind, "resources": resources, }
def get_runtime(kind: str, label_selector: str = None): if kind not in RuntimeKinds.runtime_with_handlers(): log_and_raise(status.HTTP_400_BAD_REQUEST, kind=kind, err='Invalid runtime kind') runtime_handler = get_runtime_handler(kind) resources = runtime_handler.list_resources(label_selector) return { 'kind': kind, 'resources': resources, }
async def submit_job(request: Request, db_session: Session = Depends(deps.get_db_session)): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST, reason="bad JSON body") logger.info("submit_job: {}".format(data)) response = await run_in_threadpool(submit, db_session, data) return response
async def function_status(request: Request): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST, reason="bad JSON body") resp = await run_in_threadpool(_get_function_status, data) return { "data": resp, }
def delete_runtime(kind: str, label_selector: str = None, force: bool = False, db_session: Session = Depends(deps.get_db_session)): if kind not in RuntimeKinds.runtime_with_handlers(): log_and_raise(status.HTTP_400_BAD_REQUEST, kind=kind, err='Invalid runtime kind') runtime_handler = get_runtime_handler(kind) runtime_handler.delete_resources(get_db(), db_session, label_selector, force) return Response(status_code=status.HTTP_204_NO_CONTENT)
def get_pipeline(run_id, namespace: str = Query(config.namespace)): client = kfclient(namespace=namespace) try: run = client.get_run(run_id) if run: run = run.to_dict() except Exception as e: log_and_raise(HTTPStatus.INTERNAL_SERVER_ERROR.value, reason="get kfp error: {}".format(e)) return run
async def build_function( request: Request, auth_info: mlrun.api.schemas.AuthInfo = Depends( deps.authenticate_request), db_session: Session = Depends(deps.get_db_session), ): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") logger.info(f"build_function:\n{data}") function = data.get("function") await run_in_threadpool( mlrun.api.utils.singletons.project_member.get_project_member(). ensure_project, db_session, function.get("metadata", {}).get("project", mlrun.mlconf.default_project), auth_info=auth_info, ) await run_in_threadpool( mlrun.api.utils.auth.verifier.AuthVerifier(). query_project_resource_permissions, mlrun.api.schemas.AuthorizationResourceTypes.function, function.get("metadata", {}).get("project", mlrun.mlconf.default_project), function.get("metadata", {}).get("name"), mlrun.api.schemas.AuthorizationAction.update, auth_info, ) if isinstance(data.get("with_mlrun"), bool): with_mlrun = data.get("with_mlrun") else: with_mlrun = strtobool(data.get("with_mlrun", "on")) skip_deployed = data.get("skip_deployed", False) mlrun_version_specifier = data.get("mlrun_version_specifier") fn, ready = await run_in_threadpool( _build_function, db_session, auth_info, function, with_mlrun, skip_deployed, mlrun_version_specifier, data.get("builder_env"), ) return { "data": fn.to_dict(), "ready": ready, }
async def start_function(request: Request, db_session: Session = Depends(deps.get_db_session)): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST, reason="bad JSON body") fn = await run_in_threadpool(_start_function, db_session, data) return { "data": fn.to_dict(), }
def _tag_objects(db_session, data, project, name): objs = [] for typ, query in data.items(): cls = table2cls(typ) if cls is None: err = f"unknown type - {typ}" log_and_raise(HTTPStatus.BAD_REQUEST.value, reason=err) # {"name": "bugs"} -> [Function.name=="bugs"] db_query = [getattr(cls, key) == value for key, value in query.items()] # TODO: Change _query to query? # TODO: Not happy about exposing db internals to API objs.extend(db_session.query(cls).filter(*db_query)) get_db().tag_objects(db_session, objs, project, name) return objs
async def function_status( request: Request, auth_info: mlrun.api.schemas.AuthInfo = Depends(deps.authenticate_request), ): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") resp = await run_in_threadpool(_get_function_status, data, auth_info) return { "data": resp, }
def _start_function(db_session, data): logger.info("start_function:\n{}".format(data)) url = data.get("functionUrl") if not url: log_and_raise(HTTPStatus.BAD_REQUEST, reason="runtime error: functionUrl not specified") project, name, tag, hash_key = parse_function_uri(url) runtime = get_db().get_function(db_session, name, project, tag, hash_key) if not runtime: log_and_raise( HTTPStatus.BAD_REQUEST, reason="runtime error: function {} not found".format(url)) fn = new_function(runtime=runtime) resource = runtime_resources_map.get(fn.kind) if "start" not in resource: log_and_raise( HTTPStatus.BAD_REQUEST, reason="runtime error: 'start' not supported by this runtime") try: run_db = get_run_db_instance(db_session) fn.set_db_connection(run_db) # resp = resource["start"](fn) # TODO: handle resp? resource["start"](fn) fn.save(versioned=False) logger.info("Fn:\n %s", fn.to_yaml()) except Exception as err: logger.error(traceback.format_exc()) log_and_raise(HTTPStatus.BAD_REQUEST, reason="runtime error: {}".format(err)) return fn
def __init__(self, request: Request): self.username = None self.password = None self.token = None cfg = config.httpdb header = request.headers.get("Authorization", "") if self._basic_auth_required(cfg): if not header.startswith(self._basic_prefix): log_and_raise(HTTPStatus.UNAUTHORIZED.value, reason="missing basic auth") user, password = self._parse_basic_auth(header) if user != cfg.user or password != cfg.password: log_and_raise(HTTPStatus.UNAUTHORIZED.value, reason="bad basic auth") self.username = user self.password = password elif self._bearer_auth_required(cfg): if not header.startswith(self._bearer_prefix): log_and_raise(HTTPStatus.UNAUTHORIZED.valueD, reason="missing bearer auth") token = header[len(self._bearer_prefix):] if token != cfg.token: log_and_raise(HTTPStatus.UNAUTHORIZED.value, reason="bad basic auth") self.token = token
async def store_function( request: Request, project: str, name: str, tag: str = "", versioned: bool = False, auth_info: mlrun.api.schemas.AuthInfo = Depends( deps.authenticate_request), db_session: Session = Depends(deps.get_db_session), ): await run_in_threadpool( mlrun.api.utils.singletons.project_member.get_project_member(). ensure_project, db_session, project, auth_info=auth_info, ) await run_in_threadpool( mlrun.api.utils.auth.verifier.AuthVerifier(). query_project_resource_permissions, mlrun.api.schemas.AuthorizationResourceTypes.function, project, name, mlrun.api.schemas.AuthorizationAction.store, auth_info, ) data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") logger.debug("Storing function", project=project, name=name, tag=tag, data=data) hash_key = await run_in_threadpool( mlrun.api.crud.Functions().store_function, db_session, data, name, project, tag=tag, versioned=versioned, ) return { "hash_key": hash_key, }
def delete_runtime( kind: str, label_selector: str = None, force: bool = False, grace_period: int = config.runtime_resources_deletion_grace_period, db_session: Session = Depends(deps.get_db_session), ): if kind not in RuntimeKinds.runtime_with_handlers(): log_and_raise(HTTPStatus.BAD_REQUEST.value, kind=kind, err="Invalid runtime kind") runtime_handler = get_runtime_handler(kind) runtime_handler.delete_resources(get_db(), db_session, label_selector, force, grace_period) return Response(status_code=HTTPStatus.NO_CONTENT.value)