def data_migration_db(request) -> Generator: # Data migrations performed before the API goes up, therefore there's no project member yet # that's the only difference between this fixture and the db fixture. because of the parameterization it was hard to # share code between them, we anyway going to remove filedb soon, then there won't be params, and we could re-use # code # TODO: fix duplication if request.param == "sqldb": dsn = "sqlite:///:memory:?check_same_thread=false" config.httpdb.dsn = dsn _init_engine() # memory sqldb remove it self when all session closed, this session will keep it up during all test db_session = create_session() try: init_data() db = SQLDB(dsn) db.initialize(db_session) initialize_db(db) yield db finally: close_session(db_session) elif request.param == "filedb": db = FileDB(config.httpdb.dirpath) db_session = create_session(request.param) try: db.initialize(db_session) yield db finally: shutil.rmtree(config.httpdb.dirpath, ignore_errors=True, onerror=None) close_session(db_session) else: raise Exception("Unknown db type")
def db(request) -> Generator: if request.param == "sqldb": dsn = "sqlite:///:memory:?check_same_thread=false" config.httpdb.dsn = dsn _init_engine() # memory sqldb remove it self when all session closed, this session will keep it up during all test db_session = create_session() try: init_data() db = SQLDB(dsn) db.initialize(db_session) initialize_db(db) initialize_project_member() yield db finally: close_session(db_session) elif request.param == "filedb": db = FileDB(config.httpdb.dirpath) db_session = create_session(request.param) try: db.initialize(db_session) yield db finally: shutil.rmtree(config.httpdb.dirpath, ignore_errors=True, onerror=None) close_session(db_session) else: raise Exception("Unknown db type")
async def submit_run_wrapper( scheduled_object, project_name, schedule_name, schedule_concurrency_limit, auth_info: mlrun.api.schemas.AuthInfo, ): # import here to avoid circular imports from mlrun.api.api.utils import submit_run # removing the schedule from the body otherwise when the scheduler will submit this task it will go to an # endless scheduling loop scheduled_object.pop("schedule", None) # removing the uid from the task metadata so that a new uid will be generated for every run # otherwise all runs will have the same uid scheduled_object.get("task", {}).get("metadata", {}).pop("uid", None) if "task" in scheduled_object and "metadata" in scheduled_object[ "task"]: scheduled_object["task"]["metadata"].setdefault("labels", {}) scheduled_object["task"]["metadata"]["labels"][ schemas.constants.LabelNames.schedule_name] = schedule_name db_session = create_session() active_runs = get_db().list_runs( db_session, state=RunStates.non_terminal_states(), project=project_name, labels= f"{schemas.constants.LabelNames.schedule_name}={schedule_name}", ) if len(active_runs) >= schedule_concurrency_limit: logger.warn( "Schedule exceeded concurrency limit, skipping this run", project=project_name, schedule_name=schedule_name, schedule_concurrency_limit=schedule_concurrency_limit, active_runs=len(active_runs), ) return response = await submit_run(db_session, auth_info, scheduled_object) run_metadata = response["data"]["metadata"] run_uri = RunObject.create_uri(run_metadata["project"], run_metadata["uid"], run_metadata["iteration"]) get_db().update_schedule( db_session, run_metadata["project"], schedule_name, last_run_uri=run_uri, leader_session=auth_info.session, ) close_session(db_session) return response
async def submit_run_wrapper(scheduled_object, schedule_name): # import here to avoid circular imports from mlrun.api.api.utils import submit_run # removing the schedule from the body otherwise when the scheduler will submit this task it will go to an # endless scheduling loop scheduled_object.pop("schedule", None) # removing the uid from the task metadata so that a new uid will be generated for every run # otherwise all runs will have the same uid scheduled_object.get("task", {}).get("metadata", {}).pop("uid", None) db_session = create_session() response = await submit_run(db_session, scheduled_object) run_metadata = response["data"]["metadata"] run_uri = RunObject.create_uri(run_metadata["project"], run_metadata["uid"], run_metadata["iteration"]) get_db().update_schedule( db_session, run_metadata["project"], schedule_name, last_run_uri=run_uri, ) close_session(db_session) return response
def init_data() -> None: logger.info("Creating initial data") db_session = create_session() try: init_db(db_session) finally: close_session(db_session) logger.info("Initial data created")
def _cleanup_runtimes(): logger.debug('Cleaning runtimes') db_session = create_session() try: for kind in RuntimeKinds.runtime_with_handlers(): runtime_handler = get_runtime_handler(kind) runtime_handler.delete_resources(get_db(), db_session) finally: close_session(db_session)
def _is_latest_data_version(): db_session = create_session() db = mlrun.api.db.sqldb.db.SQLDB("") try: current_data_version = _resolve_current_data_version(db, db_session) finally: close_session(db_session) return current_data_version == latest_data_version
def _reschedule_tasks(): db_session = None try: db_session = create_session() for data in get_db().list_schedules(db_session): if "schedule" not in data: logger.warning("bad scheduler data - %s", data) continue submit(db_session, data) finally: close_session(db_session)
def _cleanup_runtimes(): db_session = create_session() try: for kind in RuntimeKinds.runtime_with_handlers(): try: runtime_handler = get_runtime_handler(kind) runtime_handler.delete_resources(get_db(), db_session) except Exception as exc: logger.warning("Failed deleting resources. Ignoring", exc=str(exc), kind=kind) finally: close_session(db_session)
def _monitor_runs(): db_session = create_session() try: for kind in RuntimeKinds.runtime_with_handlers(): try: runtime_handler = get_runtime_handler(kind) runtime_handler.monitor_runs(get_db(), db_session) except Exception as exc: logger.warning("Failed monitoring runs. Ignoring", exc=str(exc), kind=kind) finally: close_session(db_session)
def init_data(from_scratch: bool = False) -> None: logger.info("Creating initial data") # run migrations on existing DB or create it with alembic dir_path = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) alembic_config_path = dir_path / "alembic.ini" alembic_util = AlembicUtil(alembic_config_path) alembic_util.init_alembic(from_scratch=from_scratch) db_session = create_session() try: init_db(db_session) finally: close_session(db_session) logger.info("Initial data created")
def init_data(from_scratch: bool = False) -> None: logger.info("Creating initial data") # run schema migrations on existing DB or create it with alembic dir_path = pathlib.Path(os.path.dirname(os.path.realpath(__file__))) alembic_config_path = dir_path / "alembic.ini" alembic_util = AlembicUtil(alembic_config_path) alembic_util.init_alembic(from_scratch=from_scratch) db_session = create_session() try: init_db(db_session) _perform_data_migrations( db_session, mlrun.mlconf.httpdb.projects.iguazio_access_key) finally: close_session(db_session) logger.info("Initial data created")
def init_data(from_scratch: bool = False, perform_migrations_if_needed: bool = False) -> None: MySQLUtil.wait_for_db_liveness(logger) sqlite_migration_util = None if not from_scratch and config.httpdb.db.database_migration_mode == "enabled": sqlite_migration_util = SQLiteMigrationUtil() alembic_util = _create_alembic_util() is_migration_needed = _is_migration_needed(alembic_util, sqlite_migration_util) if not from_scratch and not perform_migrations_if_needed and is_migration_needed: state = mlrun.api.schemas.APIStates.waiting_for_migrations logger.info("Migration is needed, changing API state", state=state) config.httpdb.state = state return logger.info("Creating initial data") config.httpdb.state = mlrun.api.schemas.APIStates.migrations_in_progress try: _perform_schema_migrations(alembic_util) _perform_database_migration(sqlite_migration_util) db_session = create_session() try: init_db(db_session) _add_initial_data(db_session) _perform_data_migrations(db_session) finally: close_session(db_session) except Exception: state = mlrun.api.schemas.APIStates.migrations_failed logger.warning("Migrations failed, changing API state", state=state) config.httpdb.state = state raise # if the above process actually ran a migration - initializations that were skipped on the API initialization # should happen - we can't do it here because it requires an asyncio loop which can't be accessible here # therefore moving to migration_completed state, and other component will take care of moving to online if is_migration_needed: config.httpdb.state = mlrun.api.schemas.APIStates.migrations_completed else: config.httpdb.state = mlrun.api.schemas.APIStates.online logger.info("Initial data created")
def db_session(request) -> Generator: db_session = create_session(request.param) try: yield db_session finally: close_session(db_session)
def get_db_session() -> Generator[Session, None, None]: try: db_session = create_session() yield db_session finally: close_session(db_session)
async def submit_run_wrapper( scheduler, scheduled_object, project_name, schedule_name, schedule_concurrency_limit, auth_info: mlrun.api.schemas.AuthInfo, ): # import here to avoid circular imports import mlrun.api.crud from mlrun.api.api.utils import submit_run # removing the schedule from the body otherwise when the scheduler will submit this task it will go to an # endless scheduling loop scheduled_object.pop("schedule", None) # removing the uid from the task metadata so that a new uid will be generated for every run # otherwise all runs will have the same uid scheduled_object.get("task", {}).get("metadata", {}).pop("uid", None) if "task" in scheduled_object and "metadata" in scheduled_object[ "task"]: scheduled_object["task"]["metadata"].setdefault("labels", {}) scheduled_object["task"]["metadata"]["labels"][ schemas.constants.LabelNames.schedule_name] = schedule_name db_session = create_session() active_runs = mlrun.api.crud.Runs().list_runs( db_session, state=RunStates.non_terminal_states(), project=project_name, labels= f"{schemas.constants.LabelNames.schedule_name}={schedule_name}", ) if len(active_runs) >= schedule_concurrency_limit: logger.warn( "Schedule exceeded concurrency limit, skipping this run", project=project_name, schedule_name=schedule_name, schedule_concurrency_limit=schedule_concurrency_limit, active_runs=len(active_runs), ) return # if credentials are needed but missing (will happen for schedules on upgrade from scheduler that didn't store # credentials to one that does store) enrich them # Note that here we're using the "knowledge" that submit_run only requires the session of the auth info if not auth_info.session and scheduler._store_schedule_credentials_in_secrets: # import here to avoid circular imports import mlrun.api.utils.auth import mlrun.api.utils.singletons.project_member logger.info( "Schedule missing auth info which is required. Trying to fill from project owner", project_name=project_name, schedule_name=schedule_name, ) project_owner = mlrun.api.utils.singletons.project_member.get_project_member( ).get_project_owner(db_session, project_name) # Update the schedule with the new auth info so we won't need to do the above again in the next run scheduler.update_schedule( db_session, mlrun.api.schemas.AuthInfo(session=project_owner.session), project_name, schedule_name, ) response = await submit_run(db_session, auth_info, scheduled_object) run_metadata = response["data"]["metadata"] run_uri = RunObject.create_uri(run_metadata["project"], run_metadata["uid"], run_metadata["iteration"]) get_db().update_schedule( db_session, run_metadata["project"], schedule_name, last_run_uri=run_uri, ) close_session(db_session) return response