def del_artifact( project: str, uid: str, key: str, tag: str = "", db_session: Session = Depends(deps.get_db_session), ): get_db().del_artifact(db_session, key, tag, project) return {}
def del_artifacts( project: str = "", name: str = "", tag: str = "", labels: List[str] = Query([], alias="label"), db_session: Session = Depends(deps.get_db_session), ): get_db().del_artifacts(db_session, name, project, tag, labels) return {}
def delete_schedule( self, db_session: Session, project: str, name: str, ): logger.debug("Deleting schedule", project=project, name=name) self._remove_schedule_scheduler_resources(project, name) get_db().delete_schedule(db_session, project, name)
def del_runs( project: str = None, name: str = None, labels: List[str] = Query([], alias="label"), state: str = None, days_ago: int = 0, db_session: Session = Depends(deps.get_db_session), ): get_db().del_runs(db_session, name, project, labels, state, days_ago) return {}
def update_feature_set( project: str, name: str, feature_set_update: schemas.FeatureSetUpdate, reference: str, db_session: Session = Depends(deps.get_db_session), ): tag, uid = parse_reference(reference) get_db().update_feature_set(db_session, project, name, feature_set_update, tag, uid) return Response(status_code=HTTPStatus.OK.value)
async def submit_run_wrapper(scheduled_object, project_name, schedule_name, schedule_concurrency_limit): # import here to avoid circular imports from mlrun.api.api.utils import submit_run # removing the schedule from the body otherwise when the scheduler will submit this task it will go to an # endless scheduling loop scheduled_object.pop("schedule", None) # removing the uid from the task metadata so that a new uid will be generated for every run # otherwise all runs will have the same uid scheduled_object.get("task", {}).get("metadata", {}).pop("uid", None) if "task" in scheduled_object and "metadata" in scheduled_object[ "task"]: scheduled_object["task"]["metadata"].setdefault("labels", {}) scheduled_object["task"]["metadata"]["labels"][ schemas.constants.LabelNames.schedule_name] = schedule_name db_session = create_session() active_runs = get_db().list_runs( db_session, state=RunStates.non_terminal_states(), project=project_name, labels= f"{schemas.constants.LabelNames.schedule_name}={schedule_name}", ) if len(active_runs) >= schedule_concurrency_limit: logger.warn( "Schedule exceeded concurrency limit, skipping this run", project=project_name, schedule_name=schedule_name, schedule_concurrency_limit=schedule_concurrency_limit, active_runs=len(active_runs), ) return response = await submit_run(db_session, scheduled_object) run_metadata = response["data"]["metadata"] run_uri = RunObject.create_uri(run_metadata["project"], run_metadata["uid"], run_metadata["iteration"]) get_db().update_schedule( db_session, run_metadata["project"], schedule_name, last_run_uri=run_uri, ) close_session(db_session) return response
def delete_feature_set( project: str, name: str, reference: str = None, db_session: Session = Depends(deps.get_db_session), ): tag = uid = None if reference: tag, uid = parse_reference(reference) get_db().delete_feature_set(db_session, project, name, tag, uid) return Response(status_code=HTTPStatus.NO_CONTENT.value)
def _store_run_fixture(self, db: Session): self.run = { "status": { "state": RunStates.created, "last_update": now_date().isoformat(), }, "metadata": { "project": self.project, "uid": self.run_uid }, } get_db().store_run(db, self.run, self.run_uid, self.project)
async def test_invoke_schedule( db: Session, scheduler: Scheduler, k8s_secrets_mock: tests.api.conftest.K8sSecretsMock, ): cron_trigger = schemas.ScheduleCronTrigger(year=1999) schedule_name = "schedule-name" project = config.default_project scheduled_object = _create_mlrun_function_and_matching_scheduled_object( db, project) runs = get_db().list_runs(db, project=project) assert len(runs) == 0 scheduler.create_schedule( db, mlrun.api.schemas.AuthInfo(), project, schedule_name, schemas.ScheduleKinds.job, scheduled_object, cron_trigger, ) runs = get_db().list_runs(db, project=project) assert len(runs) == 0 response_1 = await scheduler.invoke_schedule(db, mlrun.api.schemas.AuthInfo(), project, schedule_name) runs = get_db().list_runs(db, project=project) assert len(runs) == 1 response_2 = await scheduler.invoke_schedule(db, mlrun.api.schemas.AuthInfo(), project, schedule_name) runs = get_db().list_runs(db, project=project) assert len(runs) == 2 for run in runs: assert run["status"]["state"] == RunStates.completed response_uids = [ response["data"]["metadata"]["uid"] for response in [response_1, response_2] ] db_uids = [run["metadata"]["uid"] for run in runs] assert DeepDiff( response_uids, db_uids, ignore_order=True, ) == {} schedule = scheduler.get_schedule(db, project, schedule_name, include_last_run=True) assert schedule.last_run is not None assert schedule.last_run["metadata"]["uid"] == response_uids[-1] assert schedule.last_run["metadata"]["project"] == project
def patch_feature_set( project: str, name: str, feature_set_update: dict, reference: str, patch_mode: schemas.PatchMode = Header( schemas.PatchMode.replace, alias=schemas.HeaderNames.patch_mode), db_session: Session = Depends(deps.get_db_session), ): tag, uid = parse_reference(reference) get_db().patch_feature_set(db_session, project, name, feature_set_update, tag, uid, patch_mode) return Response(status_code=HTTPStatus.OK.value)
def delete_source( source_name: str, db_session: Session = Depends(mlrun.api.api.deps.get_db_session), auth_verifier: AuthVerifierDep = Depends(AuthVerifierDep), ): mlrun.api.utils.clients.opa.Client().query_global_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.marketplace_source, AuthorizationAction.delete, auth_verifier.auth_info, ) get_db().delete_marketplace_source(db_session, source_name) mlrun.api.crud.Marketplace().remove_source(source_name)
def create_schedule( self, db_session: Session, auth_info: mlrun.api.schemas.AuthInfo, project: str, name: str, kind: schemas.ScheduleKinds, scheduled_object: Union[Dict, Callable], cron_trigger: Union[str, schemas.ScheduleCronTrigger], labels: Dict = None, concurrency_limit: int = config.httpdb.scheduling. default_concurrency_limit, ): if isinstance(cron_trigger, str): cron_trigger = schemas.ScheduleCronTrigger.from_crontab( cron_trigger) self._validate_cron_trigger(cron_trigger) logger.debug( "Creating schedule", project=project, name=name, kind=kind, scheduled_object=scheduled_object, cron_trigger=cron_trigger, labels=labels, concurrency_limit=concurrency_limit, ) get_project_member().ensure_project(db_session, project, leader_session=auth_info.session) get_db().create_schedule( db_session, project, name, kind, scheduled_object, cron_trigger, concurrency_limit, labels, ) self._create_schedule_in_scheduler( project, name, kind, scheduled_object, cron_trigger, concurrency_limit, auth_info, )
def update_schedule( self, db_session: Session, auth_info: mlrun.api.schemas.AuthInfo, project: str, name: str, scheduled_object: Union[Dict, Callable] = None, cron_trigger: Union[str, schemas.ScheduleCronTrigger] = None, labels: Dict = None, concurrency_limit: int = None, ): if isinstance(cron_trigger, str): cron_trigger = schemas.ScheduleCronTrigger.from_crontab( cron_trigger) if cron_trigger is not None: self._validate_cron_trigger(cron_trigger) logger.debug( "Updating schedule", project=project, name=name, scheduled_object=scheduled_object, cron_trigger=cron_trigger, labels=labels, concurrency_limit=concurrency_limit, ) get_db().update_schedule( db_session, project, name, scheduled_object, cron_trigger, labels, concurrency_limit, ) db_schedule = get_db().get_schedule(db_session, project, name) updated_schedule = self._transform_and_enrich_db_schedule( db_session, db_schedule) self._ensure_auth_info_has_access_key(auth_info, db_schedule.kind) self._store_schedule_secrets(auth_info, project, name) self._update_schedule_in_scheduler( project, name, updated_schedule.kind, updated_schedule.scheduled_object, updated_schedule.cron_trigger, updated_schedule.concurrency_limit, auth_info, )
def store_feature_set( project: str, name: str, reference: str, feature_set: schemas.FeatureSet, versioned: bool = True, db_session: Session = Depends(deps.get_db_session), ): tag, uid = parse_reference(reference) uid = get_db().store_feature_set( db_session, project, name, feature_set, tag, uid, versioned ) return get_db().get_feature_set(db_session, project, name, uid=uid,)
def delete_schedules( self, db_session: Session, project: str, ): schedules = self.list_schedules( db_session, project, ) logger.debug("Deleting schedules", project=project) for schedule in schedules.schedules: self._remove_schedule_scheduler_resources(schedule.project, schedule.name) get_db().delete_schedules(db_session, project)
def _tag_objects(db_session, data, project, name): objs = [] for typ, query in data.items(): cls = table2cls(typ) if cls is None: err = f"unknown type - {typ}" log_and_raise(HTTPStatus.BAD_REQUEST.value, reason=err) # {"name": "bugs"} -> [Function.name=="bugs"] db_query = [getattr(cls, key) == value for key, value in query.items()] # TODO: Change _query to query? # TODO: Not happy about exposing db internals to API objs.extend(db_session.query(cls).filter(*db_query)) get_db().tag_objects(db_session, objs, project, name) return objs
def delete_source( source_name: str, db_session: Session = Depends(mlrun.api.api.deps.get_db_session), auth_info: mlrun.api.schemas.AuthInfo = Depends( mlrun.api.api.deps.authenticate_request), ): mlrun.api.utils.auth.verifier.AuthVerifier( ).query_global_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.marketplace_source, AuthorizationAction.delete, auth_info, ) get_db().delete_marketplace_source(db_session, source_name) mlrun.api.crud.Marketplace().remove_source(source_name)
def test_monitor_run_failed_pod(self, db: Session, client: TestClient): list_namespaced_pods_calls = [ [self.pending_pod], [self.running_pod], [self.failed_pod], # additional time for the get_logger_pods [self.failed_pod], ] self._mock_list_namespaced_pods(list_namespaced_pods_calls) expected_number_of_list_pods_calls = len(list_namespaced_pods_calls) log = self._mock_read_namespaced_pod_log() expected_monitor_cycles_to_reach_expected_state = ( expected_number_of_list_pods_calls - 1) for _ in range(expected_monitor_cycles_to_reach_expected_state): self.runtime_handler.monitor_runs(get_db(), db) self._assert_list_namespaced_pods_calls( self.runtime_handler, expected_number_of_list_pods_calls) self._assert_run_reached_state(db, self.project, self.run_uid, RunStates.error) self._assert_run_logs( db, self.project, self.run_uid, log, self.failed_pod.metadata.name, )
def get_logs( self, db_session: Session, project: str, uid: str, size: int = -1, offset: int = 0, source: LogSources = LogSources.AUTO, ) -> typing.Tuple[str, bytes]: """ :return: Tuple with: 1. str of the run state (so watchers will know whether to continue polling for logs) 2. bytes of the logs themselves """ project = project or mlrun.mlconf.default_project out = b"" log_file = log_path(project, uid) data = get_db().read_run(db_session, uid, project) if not data: log_and_raise(HTTPStatus.NOT_FOUND.value, project=project, uid=uid) run_state = data.get("status", {}).get("state", "") if log_file.exists() and source in [LogSources.AUTO, LogSources.PERSISTENCY]: with log_file.open("rb") as fp: fp.seek(offset) out = fp.read(size) elif source in [LogSources.AUTO, LogSources.K8S]: if get_k8s(): pods = get_k8s().get_logger_pods(project, uid) if pods: pod, pod_phase = list(pods.items())[0] if pod_phase != PodPhases.pending: resp = get_k8s().logs(pod) if resp: out = resp.encode()[offset:] return run_state, out
async def store_artifact( request: Request, project: str, uid: str, key: str, tag: str = "", iter: int = 0, db_session: Session = Depends(deps.get_db_session), ): data = None try: data = await request.json() except ValueError: log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body") logger.debug("Storing artifact", data=data) await run_in_threadpool( get_db().store_artifact, db_session, key, data, uid, iter=iter, tag=tag, project=project, ) return {}
def test_monitor_run_failed_crd(self, db: Session, client: TestClient): list_namespaced_crds_calls = [ [self.active_crd_dict], [self.failed_crd_dict], ] self._mock_list_namespaced_crds(list_namespaced_crds_calls) # for the get_logger_pods list_namespaced_pods_calls = [ [self.launcher_pod, self.worker_pod], ] self._mock_list_namespaced_pods(list_namespaced_pods_calls) expected_number_of_list_crds_calls = len(list_namespaced_crds_calls) log = self._mock_read_namespaced_pod_log() expected_monitor_cycles_to_reach_expected_state = ( expected_number_of_list_crds_calls) for _ in range(expected_monitor_cycles_to_reach_expected_state): self.runtime_handler.monitor_runs(get_db(), db) self._assert_list_namespaced_crds_calls( self.runtime_handler, expected_number_of_list_crds_calls, ) self._assert_list_namespaced_pods_calls( self.runtime_handler, len(list_namespaced_pods_calls), self.pod_label_selector, ) self._assert_run_reached_state(db, self.project, self.run_uid, RunStates.error) self._assert_run_logs( db, self.project, self.run_uid, log, self.launcher_pod.metadata.name, )
def list_tags(project: str, db_session: Session = Depends(deps.get_db_session)): tags = get_db().list_tags(db_session, project) return { "project": project, "tags": tags, }
def _reload_schedules( self, db_session: Session, auth_info: mlrun.api.schemas.AuthInfo, ): logger.info("Reloading schedules") db_schedules = get_db().list_schedules(db_session) for db_schedule in db_schedules: # don't let one failure fail the rest try: self._create_schedule_in_scheduler( db_schedule.project, db_schedule.name, db_schedule.kind, db_schedule.scheduled_object, db_schedule.cron_trigger, db_schedule.concurrency_limit, auth_info, ) except Exception as exc: logger.warn( "Failed rescheduling job. Continuing", exc=str(exc), db_schedule=db_schedule, )
def create_feature_set( project: str, feature_set: schemas.FeatureSet, versioned: bool = True, db_session: Session = Depends(deps.get_db_session), ): feature_set_uid = get_db().create_feature_set(db_session, project, feature_set, versioned) return get_db().get_feature_set( db_session, project, feature_set.metadata.name, tag=feature_set.metadata.tag, uid=feature_set_uid, )
def list_artifact_tags( project: str, auth_info: mlrun.api.schemas.AuthInfo = Depends( deps.authenticate_request), db_session: Session = Depends(deps.get_db_session), ): mlrun.api.utils.auth.verifier.AuthVerifier().query_project_permissions( project, mlrun.api.schemas.AuthorizationAction.read, auth_info, ) tag_tuples = get_db().list_artifact_tags(db_session, project) artifact_key_to_tag = { tag_tuple[1]: tag_tuple[2] for tag_tuple in tag_tuples } allowed_artifact_keys = mlrun.api.utils.auth.verifier.AuthVerifier( ).filter_project_resources_by_permissions( mlrun.api.schemas.AuthorizationResourceTypes.artifact, list(artifact_key_to_tag.keys()), lambda artifact_key: ( project, artifact_key, ), auth_info, ) tags = [ tag_tuple[2] for tag_tuple in tag_tuples if tag_tuple[1] in allowed_artifact_keys ] return { "project": project, "tags": tags, }
async def invoke_schedule(self, db_session: Session, project: str, name: str): logger.debug("Invoking schedule", project=project, name=name) db_schedule = get_db().get_schedule(db_session, project, name) function, args, kwargs = self._resolve_job_function( db_schedule.kind, db_schedule.scheduled_object, name) return await function(*args, **kwargs)
def add_project(project: schemas.ProjectCreate, db_session: Session = Depends(deps.get_db_session)): project_id = get_db().add_project(db_session, project.dict()) return { "id": project_id, "name": project.name, }
def _create_mlrun_function_and_matching_scheduled_object( db: Session, project: str, handler: str = "do_nothing"): function_name = "my-function" code_path = pathlib.Path(__file__).absolute().parent / "function.py" function = mlrun.code_to_function(name=function_name, kind="local", filename=str(code_path)) function.spec.command = f"{str(code_path)}" hash_key = get_db().store_function(db, function.to_dict(), function_name, project, versioned=True) scheduled_object = { "task": { "spec": { "function": f"{project}/{function_name}@{hash_key}", "handler": handler, }, "metadata": { "name": "my-task", "project": f"{project}" }, } } return scheduled_object
def test_delete_resources_completed_cluster(self, db: Session, client: TestClient): list_namespaced_pods_calls = [ [self.completed_worker_pod, self.completed_scheduler_pod], ] self._mock_list_namespaced_pods(list_namespaced_pods_calls) self._mock_list_services([self.cluster_service]) self._mock_delete_namespaced_pods() self._mock_delete_namespaced_services() self.runtime_handler.delete_resources(get_db(), db, grace_period=0) self._assert_delete_namespaced_pods( [ self.completed_worker_pod.metadata.name, self.completed_scheduler_pod.metadata.name, ], self.completed_scheduler_pod.metadata.namespace, ) self._assert_delete_namespaced_services( [ self.completed_scheduler_pod.metadata.labels.get( "dask.org/cluster-name") ], self.completed_scheduler_pod.metadata.namespace, ) self._assert_list_namespaced_pods_calls( self.runtime_handler, len(list_namespaced_pods_calls))
def test_delete_resources_completed_pod(self, db: Session, client: TestClient): list_namespaced_pods_calls = [ [self.completed_pod], # additional time for the get_logger_pods [self.completed_pod], ] self._mock_list_namespaced_pods(list_namespaced_pods_calls) self._mock_delete_namespaced_pods() log = self._mock_read_namespaced_pod_log() self.runtime_handler.delete_resources(get_db(), db, grace_period=0) self._assert_delete_namespaced_pods( [self.completed_pod.metadata.name], self.completed_pod.metadata.namespace) self._assert_list_namespaced_pods_calls( self.runtime_handler, len(list_namespaced_pods_calls)) self._assert_run_reached_state(db, self.project, self.run_uid, RunStates.completed) self._assert_run_logs( db, self.project, self.run_uid, log, self.completed_pod.metadata.name, )