Example #1
0
async def test_create_schedule_failure_too_frequent_cron_trigger(
    db: Session, scheduler: Scheduler
):
    scheduler._min_allowed_interval = "10 minutes"
    cases = [
        {"second": "*"},
        {"second": "1,2"},
        {"second": "*/30"},
        {"second": "30-35"},
        {"second": "30-40/5"},
        {"minute": "*"},
        {"minute": "*"},
        {"minute": "*/5"},
        {"minute": "43-59"},
        {"minute": "30-50/6"},
        {"minute": "1,3,5"},
        {"minute": "11,22,33,44,55,59"},
    ]
    for case in cases:
        cron_trigger = schemas.ScheduleCronTrigger(**case)
        with pytest.raises(ValueError) as excinfo:
            scheduler.create_schedule(
                db,
                mlrun.api.schemas.AuthInfo(),
                "project",
                "schedule-name",
                schemas.ScheduleKinds.local_function,
                do_nothing,
                cron_trigger,
            )
        assert "Cron trigger too frequent. no more then one job" in str(excinfo.value)
Example #2
0
async def test_create_schedule_mlrun_function(db: Session,
                                              scheduler: Scheduler):
    now = datetime.now()
    now_plus_1_second = now + timedelta(seconds=1)
    now_plus_2_second = now + timedelta(seconds=2)
    # this way we're leaving ourselves one second to create the schedule preventing transient test failure
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=now_plus_1_second,
                                               end_date=now_plus_2_second)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 0
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )
    await asyncio.sleep(2)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 1
    assert runs[0]["status"]["state"] == RunStates.completed

    expected_last_run_uri = f"{project}@{runs[0]['metadata']['uid']}#0"

    schedule = get_db().get_schedule(db, project, schedule_name)
    assert schedule.last_run_uri == expected_last_run_uri
Example #3
0
async def test_create_schedule_failure_already_exists(db: Session,
                                                      scheduler: Scheduler):
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
    )

    with pytest.raises(
            mlrun.errors.MLRunConflictError,
            match=
            rf"Conflict - Schedule already exists: {project}/{schedule_name}",
    ):
        scheduler.create_schedule(
            db,
            mlrun.api.schemas.AuthInfo(),
            project,
            schedule_name,
            schemas.ScheduleKinds.local_function,
            do_nothing,
            cron_trigger,
        )
Example #4
0
async def test_validate_cron_trigger_multi_checks(db: Session,
                                                  scheduler: Scheduler):
    """
    _validate_cron_trigger runs 60 checks to be able to validate limit low as one minute.
    If we would run the check there one time it won't catch scenarios like:
    If the limit is 10 minutes and the cron trigger configured with minute=0-45 (which means every minute, for the
    first 45 minutes of every hour), and the check will occur at the 44 minute of some hour, the next run time
    will be one minute away, but the second next run time after it, will be at the next hour 0 minute. The delta
    between the two will be 15 minutes, more then 10 minutes so it will pass validation, although it actually runs
    every minute.
    """
    scheduler._min_allowed_interval = "10 minutes"
    cron_trigger = schemas.ScheduleCronTrigger(minute="0-45")
    now = datetime(
        year=2020,
        month=2,
        day=3,
        hour=4,
        minute=44,
        second=30,
        tzinfo=cron_trigger.timezone,
    )
    with pytest.raises(ValueError) as excinfo:
        scheduler._validate_cron_trigger(cron_trigger, now)
    assert "Cron trigger too frequent. no more then one job" in str(
        excinfo.value)
Example #5
0
async def test_not_skipping_delayed_schedules(db: Session,
                                              scheduler: Scheduler):
    global call_counter
    call_counter = 0
    now = datetime.now()
    expected_call_counter = 1
    now_plus_1_seconds = now + timedelta(seconds=1)
    now_plus_2_seconds = now + timedelta(seconds=1 + expected_call_counter)
    # this way we're leaving ourselves one second to create the schedule preventing transient test failure
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=now_plus_1_seconds,
                                               end_date=now_plus_2_seconds)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        bump_counter,
        cron_trigger,
    )
    # purposely doing time.sleep to block the reactor to ensure a job is still scheduled although its planned
    # execution time passed
    time.sleep(2 + expected_call_counter)
    await asyncio.sleep(1)
    assert call_counter == expected_call_counter
Example #6
0
async def test_create_schedule(db: Session, scheduler: Scheduler):
    global call_counter
    call_counter = 0

    expected_call_counter = 5
    start_date, end_date = _get_start_and_end_time_for_scheduled_trigger(
        number_of_jobs=5, seconds_interval=1)
    # this way we're leaving ourselves one second to create the schedule preventing transient test failure
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=start_date,
                                               end_date=end_date)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        bump_counter,
        cron_trigger,
    )

    # The trigger is defined with `second="*/1"` meaning it runs on round seconds,
    # but executing the actual functional code - bumping the counter - happens a few microseconds afterwards.
    # To avoid transient errors on slow systems, we add extra margin.
    time_to_sleep = (end_date -
                     datetime.now()).total_seconds() + schedule_end_time_margin

    await asyncio.sleep(time_to_sleep)
    assert call_counter == expected_call_counter
Example #7
0
async def test_rescheduling(db: Session, scheduler: Scheduler):
    global call_counter
    call_counter = 0

    expected_call_counter = 2
    start_date, end_date = _get_start_and_end_time_for_scheduled_trigger(
        number_of_jobs=expected_call_counter, seconds_interval=1)
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=start_date,
                                               end_date=end_date)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        bump_counter,
        cron_trigger,
    )

    # wait so one run will complete
    time_to_sleep = (start_date - datetime.now()).total_seconds() + 1
    await asyncio.sleep(time_to_sleep)

    # stop the scheduler and assert indeed only one call happened
    await scheduler.stop()
    assert call_counter == 1

    # start the scheduler and and assert another run
    await scheduler.start(db)
    await asyncio.sleep(1 + schedule_end_time_margin)
    assert call_counter == 2
Example #8
0
async def test_rescheduling(db: Session, scheduler: Scheduler):
    global call_counter
    call_counter = 0
    now = datetime.now()
    now_plus_2_seconds = now + timedelta(seconds=2)
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=now,
                                               end_date=now_plus_2_seconds)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        bump_counter,
        cron_trigger,
    )

    # wait so one run will complete
    await asyncio.sleep(1)

    # stop the scheduler and assert indeed only one call happened
    await scheduler.stop()
    assert call_counter == 1

    # start the scheduler and and assert another run
    await scheduler.start(db)
    await asyncio.sleep(1)
    assert call_counter == 2
Example #9
0
def _submit_run(db_session: Session, auth_info: mlrun.api.schemas.AuthInfo,
                data) -> typing.Tuple[str, str, str, typing.Dict]:
    """
    :return: Tuple with:
        1. str of the project of the run
        2. str of the kind of the function of the run
        3. str of the uid of the run that started execution (None when it was scheduled)
        4. dict of the response info
    """
    run_uid = None
    project = None
    try:
        fn, task = _parse_submit_run_body(db_session, auth_info, data)
        run_db = get_run_db_instance(db_session, auth_info.session)
        fn.set_db_connection(run_db, True)
        logger.info("Submitting run", function=fn.to_dict(), task=task)
        # fn.spec.rundb = "http://mlrun-api:8080"
        schedule = data.get("schedule")
        if schedule:
            cron_trigger = schedule
            if isinstance(cron_trigger, dict):
                cron_trigger = schemas.ScheduleCronTrigger(**cron_trigger)
            schedule_labels = task["metadata"].get("labels")
            get_scheduler().create_schedule(
                db_session,
                auth_info,
                task["metadata"]["project"],
                task["metadata"]["name"],
                schemas.ScheduleKinds.job,
                data,
                cron_trigger,
                schedule_labels,
            )
            project = task["metadata"]["project"]

            response = {
                "schedule": schedule,
                "project": task["metadata"]["project"],
                "name": task["metadata"]["name"],
            }
        else:
            run = fn.run(task, watch=False)
            run_uid = run.metadata.uid
            project = run.metadata.project
            if run:
                response = run.to_dict()

    except HTTPException:
        logger.error(traceback.format_exc())
        raise
    except mlrun.errors.MLRunHTTPStatusError:
        raise
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      reason=f"runtime error: {err}")

    logger.info("Run submission succeeded", response=response)
    return project, fn.kind, run_uid, {"data": response}
Example #10
0
async def test_invoke_schedule(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    cron_trigger = schemas.ScheduleCronTrigger(year=1999)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 0
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 0
    response_1 = await scheduler.invoke_schedule(db,
                                                 mlrun.api.schemas.AuthInfo(),
                                                 project, schedule_name)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 1
    response_2 = await scheduler.invoke_schedule(db,
                                                 mlrun.api.schemas.AuthInfo(),
                                                 project, schedule_name)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 2
    for run in runs:
        assert run["status"]["state"] == RunStates.completed
    response_uids = [
        response["data"]["metadata"]["uid"]
        for response in [response_1, response_2]
    ]
    db_uids = [run["metadata"]["uid"] for run in runs]
    assert DeepDiff(
        response_uids,
        db_uids,
        ignore_order=True,
    ) == {}

    schedule = scheduler.get_schedule(db,
                                      project,
                                      schedule_name,
                                      include_last_run=True)
    assert schedule.last_run is not None
    assert schedule.last_run["metadata"]["uid"] == response_uids[-1]
    assert schedule.last_run["metadata"]["project"] == project
Example #11
0
def _create_do_nothing_schedule(db: Session, scheduler: Scheduler,
                                project: str, name: str):
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        name,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
    )
Example #12
0
async def test_schedule_crud_secrets_handling(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).is_jobs_auth_required = unittest.mock.Mock(return_value=True)
    for schedule_name in ["valid-secret-key", "invalid/secret/key"]:
        project = config.default_project
        scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
            db, project)
        access_key = "some-user-access-key"
        username = "******"
        cron_trigger = schemas.ScheduleCronTrigger(year="1999")
        scheduler.create_schedule(
            db,
            mlrun.api.schemas.AuthInfo(username=username,
                                       access_key=access_key),
            project,
            schedule_name,
            schemas.ScheduleKinds.job,
            scheduled_object,
            cron_trigger,
        )
        _assert_schedule_secrets(scheduler, project, schedule_name, username,
                                 access_key)
        _assert_schedule_get_and_list_credentials_enrichment(
            db, scheduler, project, schedule_name, access_key)

        username = "******"
        access_key = "new-access-key"
        # update labels
        scheduler.update_schedule(
            db,
            mlrun.api.schemas.AuthInfo(username=username,
                                       access_key=access_key),
            project,
            schedule_name,
            labels={"label-key": "label-value"},
        )
        _assert_schedule_secrets(scheduler, project, schedule_name, username,
                                 access_key)
        _assert_schedule_get_and_list_credentials_enrichment(
            db, scheduler, project, schedule_name, access_key)

        # delete schedule
        scheduler.delete_schedule(
            db,
            project,
            schedule_name,
        )
        _assert_schedule_secrets(scheduler, project, schedule_name, None, None)
Example #13
0
async def test_schedule_upgrade_from_scheduler_without_credentials_store(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    """
    Continue here
    this test doesn't work cause reload schedules takes for granted that there is a session, which made me think whether
    session is enough - after runtimes refactor and getting auth info out of sqlrundb is will be enough, so also can
    remove the mlrun.api.utils.auth.AuthVerifier().generate_auth_info_from_session call from scheduler run wrapper
    """
    name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    now = datetime.now()
    expected_call_counter = 3
    now_plus_2_seconds = now + timedelta(seconds=2)
    now_plus_5_seconds = now + timedelta(seconds=2 + expected_call_counter)
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=now_plus_2_seconds,
                                               end_date=now_plus_5_seconds)
    # we're before upgrade so create a schedule with empty auth info
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )
    # stop scheduler, reconfigure to store credentials and start again (upgrade)
    await scheduler.stop()
    scheduler._store_schedule_credentials_in_secrets = True
    await scheduler.start(db)

    # at this point the schedule is inside the scheduler without auth_info, so the first trigger should try to generate
    # auth info, mock the functions for this
    username = "******"
    session = "some-session"
    mlrun.api.utils.singletons.project_member.get_project_member(
    ).get_project_owner = unittest.mock.Mock(
        return_value=mlrun.api.schemas.ProjectOwner(username=username,
                                                    session=session))

    await asyncio.sleep(2 + expected_call_counter + 1)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 3
    assert (mlrun.api.utils.singletons.project_member.get_project_member().
            get_project_owner.call_count == 1)
Example #14
0
async def test_schedule_upgrade_from_scheduler_without_credentials_store(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)

    expected_call_counter = 3
    start_date, end_date = _get_start_and_end_time_for_scheduled_trigger(
        number_of_jobs=expected_call_counter, seconds_interval=1)
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=start_date,
                                               end_date=end_date)
    # we're before upgrade so create a schedule with empty auth info
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )
    # stop scheduler, reconfigure to store credentials and start again (upgrade)
    await scheduler.stop()
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).is_jobs_auth_required = unittest.mock.Mock(return_value=True)
    await scheduler.start(db)

    # at this point the schedule is inside the scheduler without auth_info, so the first trigger should try to generate
    # auth info, mock the functions for this
    username = "******"
    session = "some-session"
    mlrun.api.utils.singletons.project_member.get_project_member(
    ).get_project_owner = unittest.mock.Mock(
        return_value=mlrun.api.schemas.ProjectOwner(username=username,
                                                    session=session))
    time_to_sleep = (end_date -
                     datetime.now()).total_seconds() + schedule_end_time_margin

    await asyncio.sleep(time_to_sleep)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 3
    assert (mlrun.api.utils.singletons.project_member.get_project_member().
            get_project_owner.call_count == 1)
Example #15
0
async def test_schedule_job_concurrency_limit(
    db: Session,
    scheduler: Scheduler,
    concurrency_limit: int,
    run_amount: int,
    schedule_kind: schemas.ScheduleKinds,
):
    global call_counter
    call_counter = 0

    now = datetime.now()
    now_plus_1_seconds = now + timedelta(seconds=1)
    now_plus_5_seconds = now + timedelta(seconds=5)
    cron_trigger = schemas.ScheduleCronTrigger(
        second="*/1", start_date=now_plus_1_seconds, end_date=now_plus_5_seconds
    )
    schedule_name = "schedule-name"
    project = config.default_project
    scheduled_object = (
        _create_mlrun_function_and_matching_scheduled_object(
            db, project, handler="sleep_two_seconds"
        )
        if schedule_kind == schemas.ScheduleKinds.job
        else bump_counter_and_wait
    )

    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 0

    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schedule_kind,
        scheduled_object,
        cron_trigger,
        concurrency_limit=concurrency_limit,
    )

    # wait so all runs will complete
    await asyncio.sleep(7)
    if schedule_kind == schemas.ScheduleKinds.job:
        runs = get_db().list_runs(db, project=project)
        assert len(runs) == run_amount
    else:
        assert call_counter == run_amount
Example #16
0
async def test_rescheduling_secrets_storing(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).is_jobs_auth_required = unittest.mock.Mock(return_value=True)
    name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    username = "******"
    access_key = "some-user-access-key"
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(username=username, access_key=access_key),
        project,
        name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )

    jobs = scheduler._list_schedules_from_scheduler(project)
    assert jobs[0].args[5].access_key == access_key
    assert jobs[0].args[5].username == username
    k8s_secrets_mock.assert_project_secrets(
        project,
        {
            mlrun.api.crud.Secrets().generate_schedule_access_key_secret_key(name):
            access_key,
            mlrun.api.crud.Secrets().generate_schedule_username_secret_key(name):
            username,
        },
    )

    await scheduler.stop()

    jobs = scheduler._list_schedules_from_scheduler(project)
    assert jobs == []

    await scheduler.start(db)
    jobs = scheduler._list_schedules_from_scheduler(project)
    assert jobs[0].args[5].username == username
    assert jobs[0].args[5].access_key == access_key
Example #17
0
def test_list_schedules(db: Session, client: TestClient) -> None:
    resp = client.get("/api/projects/default/schedules")
    assert resp.status_code == HTTPStatus.OK.value, "status"
    assert "schedules" in resp.json(), "no schedules"

    labels_1 = {
        "label1": "value1",
    }
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    schedule_name = "schedule-name"
    project = config.default_project
    get_db().create_schedule(
        db,
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
        config.httpdb.scheduling.default_concurrency_limit,
        labels_1,
    )

    labels_2 = {
        "label2": "value2",
    }
    schedule_name_2 = "schedule-name-2"
    get_db().create_schedule(
        db,
        project,
        schedule_name_2,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
        config.httpdb.scheduling.default_concurrency_limit,
        labels_2,
    )

    _get_and_assert_single_schedule(client, {"labels": "label1"},
                                    schedule_name)
    _get_and_assert_single_schedule(client, {"labels": "label2"},
                                    schedule_name_2)
    _get_and_assert_single_schedule(client, {"labels": "label1=value1"},
                                    schedule_name)
    _get_and_assert_single_schedule(client, {"labels": "label2=value2"},
                                    schedule_name_2)
Example #18
0
async def test_schedule_access_key_generation(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).is_jobs_auth_required = unittest.mock.Mock(return_value=True)
    project = config.default_project
    schedule_name = "schedule-name"
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    access_key = "generated-access-key"
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).get_or_create_access_key = unittest.mock.Mock(return_value=access_key)
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).get_or_create_access_key.assert_called_once()
    _assert_schedule_secrets(scheduler, project, schedule_name, None,
                             access_key)

    access_key = "generated-access-key-2"
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).get_or_create_access_key = unittest.mock.Mock(return_value=access_key)
    scheduler.update_schedule(
        db,
        mlrun.api.schemas.AuthInfo(
            access_key=mlrun.model.Credentials.generate_access_key),
        project,
        schedule_name,
        labels={"label-key": "label-value"},
    )
    mlrun.api.utils.auth.verifier.AuthVerifier(
    ).get_or_create_access_key.assert_called_once()
    _assert_schedule_secrets(scheduler, project, schedule_name, None,
                             access_key)
Example #19
0
async def test_create_schedule_mlrun_function(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):

    expected_call_counter = 1
    start_date, end_date = _get_start_and_end_time_for_scheduled_trigger(
        number_of_jobs=expected_call_counter, seconds_interval=1)
    # this way we're leaving ourselves one second to create the schedule preventing transient test failure
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=start_date,
                                               end_date=end_date)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 0
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )
    time_to_sleep = (end_date -
                     datetime.now()).total_seconds() + schedule_end_time_margin

    await asyncio.sleep(time_to_sleep)
    runs = get_db().list_runs(db, project=project)

    assert len(runs) == expected_call_counter

    assert runs[0]["status"]["state"] == RunStates.completed

    # the default of list_runs returns the the list descending by date.
    expected_last_run_uri = f"{project}@{runs[0]['metadata']['uid']}#0"

    schedule = get_db().get_schedule(db, project, schedule_name)
    assert schedule.last_run_uri == expected_last_run_uri
Example #20
0
async def test_delete_schedule(db: Session, scheduler: Scheduler):
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
    )

    schedules = scheduler.list_schedules(db)
    assert len(schedules.schedules) == 1

    scheduler.delete_schedule(db, project, schedule_name)

    schedules = scheduler.list_schedules(db)
    assert len(schedules.schedules) == 0
Example #21
0
async def test_get_schedule_datetime_fields_timezone(db: Session,
                                                     scheduler: Scheduler):
    cron_trigger = schemas.ScheduleCronTrigger(minute="*/10")
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
    )
    schedule = scheduler.get_schedule(db, project, schedule_name)
    assert schedule.creation_time.tzinfo is not None
    assert schedule.next_run_time.tzinfo is not None

    schedules = scheduler.list_schedules(db, project)
    assert len(schedules.schedules) == 1
    assert schedules.schedules[0].creation_time.tzinfo is not None
    assert schedules.schedules[0].next_run_time.tzinfo is not None
Example #22
0
async def test_create_schedule_success_cron_trigger_validation(
        db: Session, scheduler: Scheduler):
    scheduler._min_allowed_interval = "10 minutes"
    cases = [
        {
            "second": "1",
            "minute": "19"
        },
        {
            "second": "30",
            "minute": "9,19"
        },
        {
            "minute": "*/10"
        },
        {
            "minute": "20-40/10"
        },
        {
            "hour": "1"
        },
        {
            "year": "1999"
        },
        {
            "year": "2050"
        },
    ]
    for index, case in enumerate(cases):
        cron_trigger = schemas.ScheduleCronTrigger(**case)
        scheduler.create_schedule(
            db,
            mlrun.api.schemas.AuthInfo(),
            "project",
            f"schedule-name-{index}",
            schemas.ScheduleKinds.local_function,
            do_nothing,
            cron_trigger,
        )
Example #23
0
async def test_create_schedule(db: Session, scheduler: Scheduler):
    global call_counter
    call_counter = 0
    now = datetime.now()
    expected_call_counter = 5
    now_plus_1_seconds = now + timedelta(seconds=1)
    now_plus_5_seconds = now + timedelta(seconds=1 + expected_call_counter)
    # this way we're leaving ourselves one second to create the schedule preventing transient test failure
    cron_trigger = schemas.ScheduleCronTrigger(second="*/1",
                                               start_date=now_plus_1_seconds,
                                               end_date=now_plus_5_seconds)
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        bump_counter,
        cron_trigger,
    )
    await asyncio.sleep(1 + expected_call_counter)
    assert call_counter == expected_call_counter
Example #24
0
async def test_rescheduling_secrets_storing(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    scheduler._store_schedule_credentials_in_secrets = True
    name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    session = "some-user-session"
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(session=session),
        project,
        name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        cron_trigger,
    )

    jobs = scheduler._list_schedules_from_scheduler(project)
    assert jobs[0].args[5].session == session
    k8s_secrets_mock.assert_project_secrets(
        project,
        {mlrun.api.crud.Secrets().generate_schedule_secret_key(name): session})

    await scheduler.stop()

    jobs = scheduler._list_schedules_from_scheduler(project)
    assert jobs == []

    await scheduler.start(db)
    jobs = scheduler._list_schedules_from_scheduler(project)
    assert jobs[0].args[5].session == session
Example #25
0
async def test_list_schedules_name_filter(db: Session, scheduler: Scheduler):
    cases = [
        {"name": "some_prefix-mlrun", "should_find": True},
        {"name": "some_prefix-mlrun-some_suffix", "should_find": True},
        {"name": "mlrun-some_suffix", "should_find": True},
        {"name": "mlrun", "should_find": True},
        {"name": "MLRun", "should_find": True},
        {"name": "bla-MLRun-bla", "should_find": True},
        {"name": "mlun", "should_find": False},
        {"name": "mlurn", "should_find": False},
        {"name": "mluRn", "should_find": False},
    ]

    cron_trigger = schemas.ScheduleCronTrigger(minute="*/10")
    project = config.default_project
    expected_schedule_names = []
    for case in cases:
        name = case["name"]
        should_find = case["should_find"]
        scheduler.create_schedule(
            db,
            mlrun.api.schemas.AuthInfo(),
            project,
            name,
            schemas.ScheduleKinds.local_function,
            do_nothing,
            cron_trigger,
        )
        if should_find:
            expected_schedule_names.append(name)

    schedules = scheduler.list_schedules(db, project, "~mlrun")
    assert len(schedules.schedules) == len(expected_schedule_names)
    for schedule in schedules.schedules:
        assert schedule.name in expected_schedule_names
        expected_schedule_names.remove(schedule.name)
Example #26
0
async def test_create_schedule_failure_already_exists(db: Session,
                                                      scheduler: Scheduler):
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
    )

    with pytest.raises(mlrun.errors.MLRunConflictError) as excinfo:
        scheduler.create_schedule(
            db,
            project,
            schedule_name,
            schemas.ScheduleKinds.local_function,
            do_nothing,
            cron_trigger,
        )
    assert "Conflict - Schedule already exists" in str(excinfo.value)
Example #27
0
def _create_resources_of_all_kinds(db: DBInterface,
                                   db_session: sqlalchemy.orm.Session,
                                   project: str):
    # Create several functions with several tags
    labels = {
        "name": "value",
        "name2": "value2",
    }
    function = {
        "bla": "blabla",
        "metadata": {
            "labels": labels
        },
        "status": {
            "bla": "blabla"
        },
    }
    function_names = ["function_name_1", "function_name_2", "function_name_3"]
    function_tags = ["some_tag", "some_tag2", "some_tag3"]
    for function_name in function_names:
        for function_tag in function_tags:
            db.store_function(
                db_session,
                function,
                function_name,
                project,
                tag=function_tag,
                versioned=True,
            )

    # Create several artifacts with several tags
    artifact = {
        "bla": "blabla",
        "labels": labels,
        "status": {
            "bla": "blabla"
        },
    }
    artifact_keys = ["artifact_key_1", "artifact_key_2", "artifact_key_3"]
    artifact_uids = ["some_uid", "some_uid2", "some_uid3"]
    artifact_tags = ["some_tag", "some_tag2", "some_tag3"]
    for artifact_key in artifact_keys:
        for artifact_uid in artifact_uids:
            for artifact_tag in artifact_tags:
                for artifact_iter in range(3):
                    db.store_artifact(
                        db_session,
                        artifact_key,
                        artifact,
                        artifact_uid,
                        artifact_iter,
                        artifact_tag,
                        project,
                    )

    # Create several runs
    run = {
        "bla": "blabla",
        "metadata": {
            "labels": labels
        },
        "status": {
            "bla": "blabla"
        },
    }
    run_uids = ["some_uid", "some_uid2", "some_uid3"]
    for run_uid in run_uids:
        for run_iter in range(3):
            db.store_run(db_session, run, run_uid, project, run_iter)

    # Create several logs
    log = b"some random log"
    log_uids = ["some_uid", "some_uid2", "some_uid3"]
    for log_uid in log_uids:
        db.store_log(db_session, log_uid, project, log)

    # Create several schedule
    schedule = {
        "bla": "blabla",
        "status": {
            "bla": "blabla"
        },
    }
    schedule_cron_trigger = schemas.ScheduleCronTrigger(year=1999)
    schedule_names = ["schedule_name_1", "schedule_name_2", "schedule_name_3"]
    for schedule_name in schedule_names:
        db.create_schedule(
            db_session,
            project,
            schedule_name,
            schemas.ScheduleKinds.job,
            schedule,
            schedule_cron_trigger,
            labels,
        )

    feature_set = schemas.FeatureSet(
        metadata=schemas.ObjectMetadata(name="dummy",
                                        tag="latest",
                                        labels={"owner": "nobody"}),
        spec=schemas.FeatureSetSpec(
            entities=[
                schemas.Entity(name="ent1",
                               value_type="str",
                               labels={"label": "1"})
            ],
            features=[
                schemas.Feature(name="feat1",
                                value_type="str",
                                labels={"label": "1"})
            ],
        ),
        status={},
    )
    db.create_feature_set(db_session, project, feature_set)

    feature_vector = schemas.FeatureVector(
        metadata=schemas.ObjectMetadata(name="dummy",
                                        tag="latest",
                                        labels={"owner": "somebody"}),
        spec=schemas.ObjectSpec(),
        status=schemas.ObjectStatus(state="created"),
    )
    db.create_feature_vector(db_session, project, feature_vector)
Example #28
0
async def test_update_schedule(db: Session, scheduler: Scheduler):
    labels_1 = {
        "label1": "value1",
        "label2": "value2",
    }
    labels_2 = {
        "label3": "value3",
        "label4": "value4",
    }
    inactive_cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    schedule_name = "schedule-name"
    project = config.default_project
    scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
        db, project)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 0
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        scheduled_object,
        inactive_cron_trigger,
        labels=labels_1,
    )

    schedule = scheduler.get_schedule(db, project, schedule_name)

    _assert_schedule(
        schedule,
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        inactive_cron_trigger,
        None,
        labels_1,
    )

    # update labels
    scheduler.update_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        labels=labels_2,
    )
    schedule = scheduler.get_schedule(db, project, schedule_name)

    _assert_schedule(
        schedule,
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        inactive_cron_trigger,
        None,
        labels_2,
    )

    # update nothing
    scheduler.update_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
    )
    schedule = scheduler.get_schedule(db, project, schedule_name)

    _assert_schedule(
        schedule,
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        inactive_cron_trigger,
        None,
        labels_2,
    )

    # update labels to empty dict
    scheduler.update_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        labels={},
    )
    schedule = scheduler.get_schedule(db, project, schedule_name)

    _assert_schedule(
        schedule,
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        inactive_cron_trigger,
        None,
        {},
    )

    # update it so it runs
    now = datetime.now()
    now_plus_1_second = now + timedelta(seconds=1)
    now_plus_2_second = now + timedelta(seconds=2)
    # this way we're leaving ourselves one second to create the schedule preventing transient test failure
    cron_trigger = schemas.ScheduleCronTrigger(
        second="*/1",
        start_date=now_plus_1_second,
        end_date=now_plus_2_second,
    )
    scheduler.update_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        cron_trigger=cron_trigger,
    )
    schedule = scheduler.get_schedule(db, project, schedule_name)

    next_run_time = datetime(
        year=now_plus_2_second.year,
        month=now_plus_2_second.month,
        day=now_plus_2_second.day,
        hour=now_plus_2_second.hour,
        minute=now_plus_2_second.minute,
        second=now_plus_2_second.second,
        tzinfo=tzlocal(),
    )

    _assert_schedule(
        schedule,
        project,
        schedule_name,
        schemas.ScheduleKinds.job,
        cron_trigger,
        next_run_time,
        {},
    )

    await asyncio.sleep(2)
    runs = get_db().list_runs(db, project=project)
    assert len(runs) == 1
    assert runs[0]["status"]["state"] == RunStates.completed
Example #29
0
async def test_schedule_crud_secrets_handling(
    db: Session,
    scheduler: Scheduler,
    k8s_secrets_mock: tests.api.conftest.K8sSecretsMock,
):
    scheduler._store_schedule_credentials_in_secrets = True
    for schedule_name in ["valid-secret-key", "invalid/secret/key"]:
        project = config.default_project
        scheduled_object = _create_mlrun_function_and_matching_scheduled_object(
            db, project)
        session = "some-user-session"
        cron_trigger = schemas.ScheduleCronTrigger(year="1999")
        scheduler.create_schedule(
            db,
            mlrun.api.schemas.AuthInfo(session=session),
            project,
            schedule_name,
            schemas.ScheduleKinds.job,
            scheduled_object,
            cron_trigger,
        )
        secret_key = mlrun.api.crud.Secrets().generate_schedule_secret_key(
            schedule_name)
        key_map_secret_key = (
            mlrun.api.crud.Secrets().generate_schedule_key_map_secret_key())
        secret_value = mlrun.api.crud.Secrets().get_secret(
            project,
            scheduler._secrets_provider,
            secret_key,
            allow_secrets_from_k8s=True,
            allow_internal_secrets=True,
            key_map_secret_key=key_map_secret_key,
        )
        assert secret_value == session

        session = "new-session"
        # update labels
        scheduler.update_schedule(
            db,
            mlrun.api.schemas.AuthInfo(session=session),
            project,
            schedule_name,
            labels={"label-key": "label-value"},
        )
        secret_value = mlrun.api.crud.Secrets().get_secret(
            project,
            scheduler._secrets_provider,
            secret_key,
            allow_secrets_from_k8s=True,
            allow_internal_secrets=True,
            key_map_secret_key=key_map_secret_key,
        )
        assert secret_value == session

        # delete schedule
        scheduler.delete_schedule(
            db,
            project,
            schedule_name,
        )
        secret_value = mlrun.api.crud.Secrets().get_secret(
            project,
            scheduler._secrets_provider,
            secret_key,
            allow_secrets_from_k8s=True,
            allow_internal_secrets=True,
            key_map_secret_key=key_map_secret_key,
        )
        assert secret_value is None
Example #30
0
async def test_get_schedule(db: Session, scheduler: Scheduler):
    labels_1 = {
        "label1": "value1",
        "label2": "value2",
    }
    cron_trigger = schemas.ScheduleCronTrigger(year="1999")
    schedule_name = "schedule-name"
    project = config.default_project
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger,
        labels_1,
    )
    schedule = scheduler.get_schedule(db, project, schedule_name)

    # no next run time cause we put year=1999
    _assert_schedule(
        schedule,
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        cron_trigger,
        None,
        labels_1,
    )

    labels_2 = {
        "label3": "value3",
        "label4": "value4",
    }
    year = 2050
    cron_trigger_2 = schemas.ScheduleCronTrigger(year=year, timezone="utc")
    schedule_name_2 = "schedule-name-2"
    scheduler.create_schedule(
        db,
        mlrun.api.schemas.AuthInfo(),
        project,
        schedule_name_2,
        schemas.ScheduleKinds.local_function,
        do_nothing,
        cron_trigger_2,
        labels_2,
    )
    schedule_2 = scheduler.get_schedule(db, project, schedule_name_2)
    year_datetime = datetime(year=year, month=1, day=1, tzinfo=timezone.utc)
    _assert_schedule(
        schedule_2,
        project,
        schedule_name_2,
        schemas.ScheduleKinds.local_function,
        cron_trigger_2,
        year_datetime,
        labels_2,
    )

    schedules = scheduler.list_schedules(db)
    assert len(schedules.schedules) == 2
    _assert_schedule(
        schedules.schedules[0],
        project,
        schedule_name,
        schemas.ScheduleKinds.local_function,
        cron_trigger,
        None,
        labels_1,
    )
    _assert_schedule(
        schedules.schedules[1],
        project,
        schedule_name_2,
        schemas.ScheduleKinds.local_function,
        cron_trigger_2,
        year_datetime,
        labels_2,
    )

    schedules = scheduler.list_schedules(db, labels="label3=value3")
    assert len(schedules.schedules) == 1
    _assert_schedule(
        schedules.schedules[0],
        project,
        schedule_name_2,
        schemas.ScheduleKinds.local_function,
        cron_trigger_2,
        year_datetime,
        labels_2,
    )