Beispiel #1
0
def data_migration_db(request) -> Generator:
    # Data migrations performed before the API goes up, therefore there's no project member yet
    # that's the only difference between this fixture and the db fixture. because of the parameterization it was hard to
    # share code between them, we anyway going to remove filedb soon, then there won't be params, and we could re-use
    # code
    # TODO: fix duplication
    if request.param == "sqldb":
        dsn = "sqlite:///:memory:?check_same_thread=false"
        config.httpdb.dsn = dsn
        _init_engine()

        # memory sqldb remove it self when all session closed, this session will keep it up during all test
        db_session = create_session()
        try:
            init_data()
            db = SQLDB(dsn)
            db.initialize(db_session)
            initialize_db(db)
            yield db
        finally:
            close_session(db_session)
    elif request.param == "filedb":
        db = FileDB(config.httpdb.dirpath)
        db_session = create_session(request.param)
        try:
            db.initialize(db_session)

            yield db
        finally:
            shutil.rmtree(config.httpdb.dirpath,
                          ignore_errors=True,
                          onerror=None)
            close_session(db_session)
    else:
        raise Exception("Unknown db type")
Beispiel #2
0
def db(request) -> Generator:
    if request.param == "sqldb":
        dsn = "sqlite:///:memory:?check_same_thread=false"
        config.httpdb.dsn = dsn
        _init_engine()

        # memory sqldb remove it self when all session closed, this session will keep it up during all test
        db_session = create_session()
        try:
            init_data()
            db = SQLDB(dsn)
            db.initialize(db_session)
            initialize_db(db)
            initialize_project_member()
            yield db
        finally:
            close_session(db_session)
    elif request.param == "filedb":
        db = FileDB(config.httpdb.dirpath)
        db_session = create_session(request.param)
        try:
            db.initialize(db_session)

            yield db
        finally:
            shutil.rmtree(config.httpdb.dirpath,
                          ignore_errors=True,
                          onerror=None)
            close_session(db_session)
    else:
        raise Exception("Unknown db type")
Beispiel #3
0
    async def submit_run_wrapper(
        scheduled_object,
        project_name,
        schedule_name,
        schedule_concurrency_limit,
        auth_info: mlrun.api.schemas.AuthInfo,
    ):
        # import here to avoid circular imports
        from mlrun.api.api.utils import submit_run

        # removing the schedule from the body otherwise when the scheduler will submit this task it will go to an
        # endless scheduling loop
        scheduled_object.pop("schedule", None)

        # removing the uid from the task metadata so that a new uid will be generated for every run
        # otherwise all runs will have the same uid
        scheduled_object.get("task", {}).get("metadata", {}).pop("uid", None)

        if "task" in scheduled_object and "metadata" in scheduled_object[
                "task"]:
            scheduled_object["task"]["metadata"].setdefault("labels", {})
            scheduled_object["task"]["metadata"]["labels"][
                schemas.constants.LabelNames.schedule_name] = schedule_name

        db_session = create_session()

        active_runs = get_db().list_runs(
            db_session,
            state=RunStates.non_terminal_states(),
            project=project_name,
            labels=
            f"{schemas.constants.LabelNames.schedule_name}={schedule_name}",
        )
        if len(active_runs) >= schedule_concurrency_limit:
            logger.warn(
                "Schedule exceeded concurrency limit, skipping this run",
                project=project_name,
                schedule_name=schedule_name,
                schedule_concurrency_limit=schedule_concurrency_limit,
                active_runs=len(active_runs),
            )
            return

        response = await submit_run(db_session, auth_info, scheduled_object)

        run_metadata = response["data"]["metadata"]
        run_uri = RunObject.create_uri(run_metadata["project"],
                                       run_metadata["uid"],
                                       run_metadata["iteration"])
        get_db().update_schedule(
            db_session,
            run_metadata["project"],
            schedule_name,
            last_run_uri=run_uri,
            leader_session=auth_info.session,
        )

        close_session(db_session)

        return response
Beispiel #4
0
    async def submit_run_wrapper(scheduled_object, schedule_name):
        # import here to avoid circular imports
        from mlrun.api.api.utils import submit_run

        # removing the schedule from the body otherwise when the scheduler will submit this task it will go to an
        # endless scheduling loop
        scheduled_object.pop("schedule", None)

        # removing the uid from the task metadata so that a new uid will be generated for every run
        # otherwise all runs will have the same uid
        scheduled_object.get("task", {}).get("metadata", {}).pop("uid", None)

        db_session = create_session()

        response = await submit_run(db_session, scheduled_object)

        run_metadata = response["data"]["metadata"]
        run_uri = RunObject.create_uri(run_metadata["project"],
                                       run_metadata["uid"],
                                       run_metadata["iteration"])
        get_db().update_schedule(
            db_session,
            run_metadata["project"],
            schedule_name,
            last_run_uri=run_uri,
        )

        close_session(db_session)

        return response
Beispiel #5
0
def init_data() -> None:
    logger.info("Creating initial data")
    db_session = create_session()
    try:
        init_db(db_session)
    finally:
        close_session(db_session)
    logger.info("Initial data created")
Beispiel #6
0
def _cleanup_runtimes():
    logger.debug('Cleaning runtimes')
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            runtime_handler = get_runtime_handler(kind)
            runtime_handler.delete_resources(get_db(), db_session)
    finally:
        close_session(db_session)
Beispiel #7
0
def _is_latest_data_version():
    db_session = create_session()
    db = mlrun.api.db.sqldb.db.SQLDB("")

    try:
        current_data_version = _resolve_current_data_version(db, db_session)
    finally:
        close_session(db_session)

    return current_data_version == latest_data_version
Beispiel #8
0
def _reschedule_tasks():
    db_session = None
    try:
        db_session = create_session()
        for data in get_db().list_schedules(db_session):
            if "schedule" not in data:
                logger.warning("bad scheduler data - %s", data)
                continue
            submit(db_session, data)
    finally:
        close_session(db_session)
Beispiel #9
0
def _cleanup_runtimes():
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            try:
                runtime_handler = get_runtime_handler(kind)
                runtime_handler.delete_resources(get_db(), db_session)
            except Exception as exc:
                logger.warning("Failed deleting resources. Ignoring",
                               exc=str(exc),
                               kind=kind)
    finally:
        close_session(db_session)
Beispiel #10
0
def _monitor_runs():
    db_session = create_session()
    try:
        for kind in RuntimeKinds.runtime_with_handlers():
            try:
                runtime_handler = get_runtime_handler(kind)
                runtime_handler.monitor_runs(get_db(), db_session)
            except Exception as exc:
                logger.warning("Failed monitoring runs. Ignoring",
                               exc=str(exc),
                               kind=kind)
    finally:
        close_session(db_session)
Beispiel #11
0
def init_data(from_scratch: bool = False) -> None:
    logger.info("Creating initial data")

    # run migrations on existing DB or create it with alembic
    dir_path = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))
    alembic_config_path = dir_path / "alembic.ini"

    alembic_util = AlembicUtil(alembic_config_path)
    alembic_util.init_alembic(from_scratch=from_scratch)

    db_session = create_session()
    try:
        init_db(db_session)
    finally:
        close_session(db_session)
    logger.info("Initial data created")
Beispiel #12
0
def init_data(from_scratch: bool = False) -> None:
    logger.info("Creating initial data")

    # run schema migrations on existing DB or create it with alembic
    dir_path = pathlib.Path(os.path.dirname(os.path.realpath(__file__)))
    alembic_config_path = dir_path / "alembic.ini"

    alembic_util = AlembicUtil(alembic_config_path)
    alembic_util.init_alembic(from_scratch=from_scratch)

    db_session = create_session()
    try:
        init_db(db_session)
        _perform_data_migrations(
            db_session, mlrun.mlconf.httpdb.projects.iguazio_access_key)
    finally:
        close_session(db_session)
    logger.info("Initial data created")
Beispiel #13
0
def init_data(from_scratch: bool = False,
              perform_migrations_if_needed: bool = False) -> None:
    MySQLUtil.wait_for_db_liveness(logger)

    sqlite_migration_util = None
    if not from_scratch and config.httpdb.db.database_migration_mode == "enabled":
        sqlite_migration_util = SQLiteMigrationUtil()
    alembic_util = _create_alembic_util()
    is_migration_needed = _is_migration_needed(alembic_util,
                                               sqlite_migration_util)
    if not from_scratch and not perform_migrations_if_needed and is_migration_needed:
        state = mlrun.api.schemas.APIStates.waiting_for_migrations
        logger.info("Migration is needed, changing API state", state=state)
        config.httpdb.state = state
        return

    logger.info("Creating initial data")
    config.httpdb.state = mlrun.api.schemas.APIStates.migrations_in_progress

    try:
        _perform_schema_migrations(alembic_util)

        _perform_database_migration(sqlite_migration_util)

        db_session = create_session()
        try:
            init_db(db_session)
            _add_initial_data(db_session)
            _perform_data_migrations(db_session)
        finally:
            close_session(db_session)
    except Exception:
        state = mlrun.api.schemas.APIStates.migrations_failed
        logger.warning("Migrations failed, changing API state", state=state)
        config.httpdb.state = state
        raise
    # if the above process actually ran a migration - initializations that were skipped on the API initialization
    # should happen - we can't do it here because it requires an asyncio loop which can't be accessible here
    # therefore moving to migration_completed state, and other component will take care of moving to online
    if is_migration_needed:
        config.httpdb.state = mlrun.api.schemas.APIStates.migrations_completed
    else:
        config.httpdb.state = mlrun.api.schemas.APIStates.online
    logger.info("Initial data created")
Beispiel #14
0
def db_session(request) -> Generator:
    db_session = create_session(request.param)
    try:
        yield db_session
    finally:
        close_session(db_session)
Beispiel #15
0
def get_db_session() -> Generator[Session, None, None]:
    try:
        db_session = create_session()
        yield db_session
    finally:
        close_session(db_session)
Beispiel #16
0
    async def submit_run_wrapper(
        scheduler,
        scheduled_object,
        project_name,
        schedule_name,
        schedule_concurrency_limit,
        auth_info: mlrun.api.schemas.AuthInfo,
    ):
        # import here to avoid circular imports
        import mlrun.api.crud
        from mlrun.api.api.utils import submit_run

        # removing the schedule from the body otherwise when the scheduler will submit this task it will go to an
        # endless scheduling loop
        scheduled_object.pop("schedule", None)

        # removing the uid from the task metadata so that a new uid will be generated for every run
        # otherwise all runs will have the same uid
        scheduled_object.get("task", {}).get("metadata", {}).pop("uid", None)

        if "task" in scheduled_object and "metadata" in scheduled_object[
                "task"]:
            scheduled_object["task"]["metadata"].setdefault("labels", {})
            scheduled_object["task"]["metadata"]["labels"][
                schemas.constants.LabelNames.schedule_name] = schedule_name

        db_session = create_session()

        active_runs = mlrun.api.crud.Runs().list_runs(
            db_session,
            state=RunStates.non_terminal_states(),
            project=project_name,
            labels=
            f"{schemas.constants.LabelNames.schedule_name}={schedule_name}",
        )
        if len(active_runs) >= schedule_concurrency_limit:
            logger.warn(
                "Schedule exceeded concurrency limit, skipping this run",
                project=project_name,
                schedule_name=schedule_name,
                schedule_concurrency_limit=schedule_concurrency_limit,
                active_runs=len(active_runs),
            )
            return

        # if credentials are needed but missing (will happen for schedules on upgrade from scheduler that didn't store
        # credentials to one that does store) enrich them
        # Note that here we're using the "knowledge" that submit_run only requires the session of the auth info
        if not auth_info.session and scheduler._store_schedule_credentials_in_secrets:
            # import here to avoid circular imports
            import mlrun.api.utils.auth
            import mlrun.api.utils.singletons.project_member

            logger.info(
                "Schedule missing auth info which is required. Trying to fill from project owner",
                project_name=project_name,
                schedule_name=schedule_name,
            )

            project_owner = mlrun.api.utils.singletons.project_member.get_project_member(
            ).get_project_owner(db_session, project_name)
            # Update the schedule with the new auth info so we won't need to do the above again in the next run
            scheduler.update_schedule(
                db_session,
                mlrun.api.schemas.AuthInfo(session=project_owner.session),
                project_name,
                schedule_name,
            )

        response = await submit_run(db_session, auth_info, scheduled_object)

        run_metadata = response["data"]["metadata"]
        run_uri = RunObject.create_uri(run_metadata["project"],
                                       run_metadata["uid"],
                                       run_metadata["iteration"])
        get_db().update_schedule(
            db_session,
            run_metadata["project"],
            schedule_name,
            last_run_uri=run_uri,
        )

        close_session(db_session)

        return response