Beispiel #1
0
def test_async_basic():
    function = mlrun.new_function("tests", kind="serving")
    flow = function.set_topology("flow", engine="async")
    queue = flow.to(name="s1", class_name="ChainWithContext").to("$queue",
                                                                 "q1",
                                                                 path="")

    s2 = queue.to(name="s2", class_name="ChainWithContext")
    s2.to(name="s4", class_name="ChainWithContext")
    s2.to(
        name="s5",
        class_name="ChainWithContext").respond()  # this state returns the resp

    queue.to(name="s3", class_name="ChainWithContext")

    # plot the graph for test & debug
    flow.plot(f"{results}/serving/async.png")

    server = function.to_mock_server()
    server.context.visits = {}
    logger.info(f"\nAsync Flow:\n{flow.to_yaml()}")
    resp = server.test(body=[])

    server.wait_for_completion()
    assert resp == ["s1", "s2", "s5"], "flow result is incorrect"
    assert server.context.visits == {
        "s1": 1,
        "s2": 1,
        "s4": 1,
        "s3": 1,
        "s5": 1,
    }, "flow didnt visit expected states"
Beispiel #2
0
async def store_function(request: Request,
                         project: str,
                         name: str,
                         tag: str = "",
                         versioned: bool = False,
                         db_session: Session = Depends(deps.get_db_session)):
    data = None
    try:
        data = await request.json()
    except ValueError:
        log_and_raise(HTTPStatus.BAD_REQUEST, reason="bad JSON body")

    logger.debug(data)
    logger.info("store function: project=%s, name=%s, tag=%s", project, name,
                tag)
    hash_key = await run_in_threadpool(get_db().store_function,
                                       db_session,
                                       data,
                                       name,
                                       project,
                                       tag=tag,
                                       versioned=versioned)
    return {
        'hash_key': hash_key,
    }
Beispiel #3
0
def build_function():
    try:
        data = request.get_json(force=True)
    except ValueError:
        return json_error(HTTPStatus.BAD_REQUEST, reason='bad JSON body')

    logger.info('build_function:\n{}'.format(data))
    function = data.get('function')
    with_mlrun = strtobool(data.get('with_mlrun', 'on'))

    try:
        fn = new_function(runtime=function)
        fn.set_db_connection(_db)
        fn.save(versioned=False)

        ready = build_runtime(fn, with_mlrun)
        fn.save(versioned=False)
        logger.info('Fn:\n %s', fn.to_yaml())
    except Exception as err:
        logger.error(traceback.format_exc())
        return json_error(
            HTTPStatus.BAD_REQUEST,
            reason='runtime error: {}'.format(err),
        )

    return jsonify(ok=True, data=fn.to_dict(), ready=ready)
Beispiel #4
0
 def ensure_project(
     self,
     db_session: sqlalchemy.orm.Session,
     name: str,
     wait_for_completion: bool = True,
     leader_session: typing.Optional[str] = None,
 ):
     project_names = self.list_projects(
         db_session,
         format_=mlrun.api.schemas.Format.name_only,
         leader_session=leader_session,
     )
     if name in project_names.projects:
         return
     logger.info(
         "Ensure project called, but project does not exist. Creating",
         name=name)
     project = mlrun.api.schemas.Project(
         metadata=mlrun.api.schemas.ProjectMetadata(name=name), )
     self.create_project(
         db_session,
         project,
         leader_session=leader_session,
         wait_for_completion=wait_for_completion,
     )
Beispiel #5
0
def db() -> Generator:
    """
    This fixture initialize the db singleton (so it will be accessible using mlrun.api.singletons.get_db()
    and generates a db session that can be used by the test
    """
    db_file = NamedTemporaryFile(suffix="-mlrun.db")
    logger.info(f"Created temp db file: {db_file.name}")
    config.httpdb.db_type = "sqldb"
    dsn = f"sqlite:///{db_file.name}?check_same_thread=false"
    config.httpdb.dsn = dsn

    # TODO: make it simpler - doesn't make sense to call 3 different functions to initialize the db
    # we need to force re-init the engine cause otherwise it is cached between tests
    _init_engine(config.httpdb.dsn)

    # forcing from scratch because we created an empty file for the db
    init_data(from_scratch=True)
    initialize_db()
    initialize_project_member()

    # we're also running client code in tests so set dbpath as well
    # note that setting this attribute triggers connection to the run db therefore must happen after the initialization
    config.dbpath = dsn
    yield create_session()
    logger.info(f"Removing temp db file: {db_file.name}")
    db_file.close()
Beispiel #6
0
def _get_function_status(data):
    logger.info("function_status:\n{}".format(data))
    selector = data.get("selector")
    kind = data.get("kind")
    if not selector or not kind:
        log_and_raise(
            HTTPStatus.BAD_REQUEST.value,
            reason="runtime error: selector or runtime kind not specified",
        )

    resource = runtime_resources_map.get(kind)
    if "status" not in resource:
        log_and_raise(
            HTTPStatus.BAD_REQUEST.value,
            reason="runtime error: 'status' not supported by this runtime",
        )

    resp = None
    try:
        resp = resource["status"](selector)
        logger.info("status: %s", resp)
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      reason="runtime error: {}".format(err))
Beispiel #7
0
def _submit_pipeline(request, data, namespace, experiment_name, run_name):
    arguments = {}
    arguments_data = request.headers.get("pipeline-arguments")
    if arguments_data:
        arguments = ast.literal_eval(arguments_data)
        logger.info("pipeline arguments {}".format(arguments_data))

    ctype = request.headers.get("content-type", "")
    if "/yaml" in ctype:
        ctype = ".yaml"
    elif " /zip" in ctype:
        ctype = ".zip"
    else:
        log_and_raise(HTTPStatus.BAD_REQUEST, reason="unsupported pipeline type {}".format(ctype))

    logger.info("writing file {}".format(ctype))

    print(str(data))
    pipe_tmp = tempfile.mktemp(suffix=ctype)
    with open(pipe_tmp, "wb") as fp:
        fp.write(data)

    run = None
    try:
        client = kfclient(namespace=namespace)
        experiment = client.create_experiment(name=experiment_name)
        run = client.run_pipeline(experiment.id, run_name, pipe_tmp,
                                  params=arguments)
    except Exception as e:
        remove(pipe_tmp)
        log_and_raise(HTTPStatus.BAD_REQUEST, reason="kfp err: {}".format(e))

    remove(pipe_tmp)

    return run
Beispiel #8
0
def _build_function(
    db_session,
    auth_info: mlrun.api.schemas.AuthInfo,
    function,
    with_mlrun,
    skip_deployed,
    mlrun_version_specifier,
):
    fn = None
    ready = None
    try:
        fn = new_function(runtime=function)

        run_db = get_run_db_instance(db_session, auth_info.session)
        fn.set_db_connection(run_db)
        fn.save(versioned=False)
        if fn.kind in RuntimeKinds.nuclio_runtimes():
            mlrun.api.api.utils.ensure_function_has_auth_set(fn, auth_info)
            deploy_nuclio_function(fn)
            # deploy only start the process, the get status API is used to check readiness
            ready = False
        else:
            ready = build_runtime(fn, with_mlrun, mlrun_version_specifier,
                                  skip_deployed)
        fn.save(versioned=True)
        logger.info("Fn:\n %s", fn.to_yaml())
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST.value,
                      reason=f"runtime error: {err}")
    return fn, ready
Beispiel #9
0
def _start_function(function, auth_info: mlrun.api.schemas.AuthInfo):
    db_session = mlrun.api.db.session.create_session()
    try:
        resource = runtime_resources_map.get(function.kind)
        if "start" not in resource:
            log_and_raise(
                HTTPStatus.BAD_REQUEST.value,
                reason="runtime error: 'start' not supported by this runtime",
            )
        try:
            run_db = get_run_db_instance(db_session)
            function.set_db_connection(run_db)
            mlrun.api.api.utils.ensure_function_has_auth_set(
                function, auth_info)
            mlrun.api.api.utils.process_function_service_account(function)
            #  resp = resource["start"](fn)  # TODO: handle resp?
            resource["start"](function)
            function.save(versioned=False)
            logger.info("Fn:\n %s", function.to_yaml())
        except Exception as err:
            logger.error(traceback.format_exc())
            log_and_raise(HTTPStatus.BAD_REQUEST.value,
                          reason=f"runtime error: {err}")
    finally:
        mlrun.api.db.session.close_session(db_session)
Beispiel #10
0
def _perform_data_migrations(db_session: sqlalchemy.orm.Session):
    # FileDB is not really a thing anymore, so using SQLDB directly
    db = mlrun.api.db.sqldb.db.SQLDB("")
    logger.info("Performing data migrations")
    _fill_project_state(db, db_session)
    _fix_artifact_tags_duplications(db, db_session)
    _fix_datasets_large_previews(db, db_session)
Beispiel #11
0
def test_path_control_routers():
    function = mlrun.new_function("tests", kind="serving")
    graph = function.set_topology("flow", engine="async")
    graph.to(name="s1", class_name="Echo").to(
        "*", name="r1", input_path="x", result_path="y"
    ).to(name="s3", class_name="Echo").respond()
    function.add_model("m1", class_name="ModelClass", model_path=".")
    logger.info(graph.to_yaml())
    server = function.to_mock_server()

    resp = server.test("/v2/models/m1/infer", body={"x": {"inputs": [5]}})
    server.wait_for_completion()
    print(resp)
    assert resp["y"]["outputs"] == 5, "wrong output"

    function = mlrun.new_function("tests", kind="serving")
    graph = function.set_topology("flow", engine="sync")
    graph.to(name="s1", class_name="Echo").to(
        "*mlrun.serving.routers.VotingEnsemble",
        name="r1",
        input_path="x",
        result_path="y",
        vote_type="regression",
    ).to(name="s3", class_name="Echo").respond()
    function.add_model("m1", class_name="ModelClassList", model_path=".", multiplier=10)
    function.add_model("m2", class_name="ModelClassList", model_path=".", multiplier=20)
    logger.info(graph.to_yaml())
    server = function.to_mock_server()

    resp = server.test("/v2/models/infer", body={"x": {"inputs": [[5]]}})
    server.wait_for_completion()
    # expect avg of (5*10) and (5*20) = 75
    assert resp["y"]["outputs"] == [75], "wrong output"
Beispiel #12
0
 def ensure_project(
     self,
     db_session: sqlalchemy.orm.Session,
     name: str,
     wait_for_completion: bool = True,
     auth_info: mlrun.api.schemas.AuthInfo = mlrun.api.schemas.AuthInfo(),
 ) -> bool:
     project_names = self.list_projects(
         db_session,
         format_=mlrun.api.schemas.ProjectsFormat.name_only,
         leader_session=auth_info.session,
     )
     if name in project_names.projects:
         return False
     logger.info(
         "Ensure project called, but project does not exist. Creating",
         name=name)
     project = mlrun.api.schemas.Project(
         metadata=mlrun.api.schemas.ProjectMetadata(name=name), )
     self.create_project(
         db_session,
         project,
         leader_session=auth_info.session,
         wait_for_completion=wait_for_completion,
     )
     return True
Beispiel #13
0
def start_migration(
    background_tasks: fastapi.BackgroundTasks,
    response: fastapi.Response,
):
    # we didn't yet decide who should have permissions to such actions, therefore no authorization at the moment
    # note in api.py we do declare to use the authenticate_request dependency - meaning we do have authentication
    global current_migration_background_task_name
    if mlrun.mlconf.httpdb.state == mlrun.api.schemas.APIStates.migrations_in_progress:
        background_task = mlrun.api.utils.background_tasks.Handler(
        ).get_background_task(current_migration_background_task_name)
        response.status_code = http.HTTPStatus.ACCEPTED.value
        return background_task
    elif mlrun.mlconf.httpdb.state == mlrun.api.schemas.APIStates.migrations_failed:
        raise mlrun.errors.MLRunPreconditionFailedError(
            "Migrations were already triggered and failed. Restart the API to retry"
        )
    elif (mlrun.mlconf.httpdb.state !=
          mlrun.api.schemas.APIStates.waiting_for_migrations):
        return fastapi.Response(status_code=http.HTTPStatus.OK.value)
    logger.info("Starting the migration process")
    background_task = mlrun.api.utils.background_tasks.Handler(
    ).create_background_task(
        background_tasks,
        _perform_migration,
    )
    current_migration_background_task_name = background_task.metadata.name
    response.status_code = http.HTTPStatus.ACCEPTED.value
    return background_task
Beispiel #14
0
async def store_run(
        request: Request,
        project: str,
        uid: str,
        iter: int = 0,
        auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier),
        db_session: Session = Depends(deps.get_db_session),
):
    data = None
    try:
        data = await request.json()
    except ValueError:
        log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body")

    logger.info("Storing run", data=data)
    await run_in_threadpool(
        get_db().store_run,
        db_session,
        data,
        uid,
        project,
        iter=iter,
        leader_session=auth_verifier.auth_info.session,
    )
    return {}
Beispiel #15
0
def _add_default_marketplace_source_if_needed(
        db: mlrun.api.db.sqldb.db.SQLDB, db_session: sqlalchemy.orm.Session):
    try:
        hub_marketplace_source = db.get_marketplace_source(
            db_session, config.marketplace.default_source.name)
    except mlrun.errors.MLRunNotFoundError:
        hub_marketplace_source = None

    if not hub_marketplace_source:
        hub_source = mlrun.api.schemas.MarketplaceSource.generate_default_source(
        )
        # hub_source will be None if the configuration has marketplace.default_source.create=False
        if hub_source:
            logger.info("Adding default marketplace source")
            # Not using db.store_marketplace_source() since it doesn't allow changing the default marketplace source.
            hub_record = db._transform_marketplace_source_schema_to_record(
                mlrun.api.schemas.IndexedMarketplaceSource(
                    index=mlrun.api.schemas.marketplace.last_source_index,
                    source=hub_source,
                ))
            db_session.add(hub_record)
            db_session.commit()
        else:
            logger.info(
                "Not adding default marketplace source, per configuration")
    return
Beispiel #16
0
def _create_model_monitoring_stream(project: str):

    stream_path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=project, kind="stream")

    _, container, stream_path = parse_model_endpoint_store_prefix(stream_path)

    # TODO: How should we configure sharding here?
    logger.info(
        "Creating model endpoint stream for project",
        project=project,
        stream_path=stream_path,
        container=container,
        endpoint=config.v3io_api,
    )

    v3io_client = v3io.dataplane.Client(
        endpoint=config.v3io_api, access_key=os.environ.get("V3IO_ACCESS_KEY"))
    response = v3io_client.create_stream(
        container=container,
        path=stream_path,
        shard_count=config.model_endpoint_monitoring.serving_stream_args.
        shard_count,
        retention_period_hours=config.model_endpoint_monitoring.
        serving_stream_args.retention_period_hours,
        raise_for_status=v3io.dataplane.RaiseForStatus.never,
    )

    if not (response.status_code == 400
            and "ResourceInUse" in str(response.body)):
        response.raise_for_status([409, 204])
Beispiel #17
0
def _is_migration_needed(
    alembic_util: AlembicUtil,
    sqlite_migration_util: typing.Optional[SQLiteMigrationUtil],
) -> bool:
    is_database_migration_needed = False
    if sqlite_migration_util is not None:
        is_database_migration_needed = (
            sqlite_migration_util.is_database_migration_needed())
    is_migration_from_scratch = alembic_util.is_migration_from_scratch()
    is_schema_migration_needed = alembic_util.is_schema_migration_needed()
    is_data_migration_needed = (not _is_latest_data_version()
                                and config.httpdb.db.data_migrations_mode
                                == "enabled")
    is_migration_needed = is_database_migration_needed or (
        not is_migration_from_scratch and
        (is_schema_migration_needed or is_data_migration_needed))
    logger.info(
        "Checking if migration is needed",
        is_migration_from_scratch=is_migration_from_scratch,
        is_schema_migration_needed=is_schema_migration_needed,
        is_data_migration_needed=is_data_migration_needed,
        is_database_migration_needed=is_database_migration_needed,
        is_migration_needed=is_migration_needed,
    )

    return is_migration_needed
Beispiel #18
0
def _start_periodic_cleanup():
    interval = int(config.runtimes_cleanup_interval)
    if interval > 0:
        logger.info("Starting periodic runtimes cleanup", interval=interval)
        run_function_periodically(
            interval, _cleanup_runtimes.__name__, False, _cleanup_runtimes
        )
Beispiel #19
0
def test_v2_async_mode():
    # model loading is async
    os.environ["SERVING_SPEC_ENV"] = json.dumps(asyncspec)
    context = GraphContext()
    nuclio_init_hook(context, globals(), serving_subkind)
    context.logger.info("model initialized")

    context.logger.info("test not ready, should return err 408")
    event = MockEvent("", path="/v2/models/m5/ready", method="GET")
    resp = context.mlrun_handler(context, event)
    assert (
        resp.status_code == 408
    ), f"didnt get proper ready resp, expected 408, got {resp.status_code}"

    event = MockEvent(testdata, path="/v2/models/m5/infer")
    resp = context.mlrun_handler(context, event)
    context.logger.info("model responded")
    logger.info(resp)
    assert (resp.status_code !=
            200), f"expected failure, got {resp.status_code} {resp.body}"

    event = MockEvent('{"model": "m5", "inputs": [5]}')
    event.trigger = "stream"
    resp = context.mlrun_handler(context, event)
    context.logger.info("model responded")
    logger.info(resp)
    data = json.loads(resp.body)
    assert data["outputs"] == 5, f"wrong model response {data}"
Beispiel #20
0
def _start_periodic_runs_monitoring():
    interval = int(config.runs_monitoring_interval)
    if interval > 0:
        logger.info("Starting periodic runs monitoring", interval=interval)
        run_function_periodically(
            interval, _monitor_runs.__name__, False, _monitor_runs
        )
Beispiel #21
0
def test_basic_flow():
    fn = mlrun.new_function("tests", kind="serving")
    graph = fn.set_topology("flow", engine="sync")
    graph.add_step(name="s1", class_name="Chain")
    graph.add_step(name="s2", class_name="Chain", after="$prev")
    graph.add_step(name="s3", class_name="Chain", after="$prev")

    server = fn.to_mock_server()
    # graph.plot("flow.png")
    print("\nFlow1:\n", graph.to_yaml())
    resp = server.test(body=[])
    assert resp == ["s1", "s2", "s3"], "flow1 result is incorrect"

    graph = fn.set_topology("flow", exist_ok=True, engine="sync")
    graph.add_step(name="s2", class_name="Chain")
    graph.add_step(name="s1", class_name="Chain",
                   before="s2")  # should place s1 first and s2 after it
    graph.add_step(name="s3", class_name="Chain", after="s2")

    server = fn.to_mock_server()
    logger.info(f"flow: {graph.to_yaml()}")
    resp = server.test(body=[])
    assert resp == ["s1", "s2", "s3"], "flow2 result is incorrect"

    graph = fn.set_topology("flow", exist_ok=True, engine="sync")
    graph.add_step(name="s1", class_name="Chain")
    graph.add_step(name="s3", class_name="Chain", after="$prev")
    graph.add_step(name="s2", class_name="Chain", after="s1", before="s3")

    server = fn.to_mock_server()
    logger.info(f"flow: {graph.to_yaml()}")
    resp = server.test(body=[])
    assert resp == ["s1", "s2", "s3"], "flow3 result is incorrect"
Beispiel #22
0
async def build_function(request: Request,
                         db_session: Session = Depends(deps.get_db_session)):
    data = None
    try:
        data = await request.json()
    except ValueError:
        log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body")

    logger.info(f"build_function:\n{data}")
    function = data.get("function")
    with_mlrun = strtobool(data.get("with_mlrun", "on"))
    skip_deployed = data.get("skip_deployed", False)
    mlrun_version_specifier = data.get("mlrun_version_specifier")
    fn, ready = await run_in_threadpool(
        _build_function,
        db_session,
        function,
        with_mlrun,
        skip_deployed,
        mlrun_version_specifier,
    )
    return {
        "data": fn.to_dict(),
        "ready": ready,
    }
Beispiel #23
0
    def _log(
        self,
        severity: str,
        endpoint_key: EndpointKey,
        message: str,
        meta_data: Optional[dict] = None,
    ):
        now = str(datetime.now())
        df = pd.DataFrame([{
            "timestamp":
            now,
            "ts":
            now,
            "sevirity":
            severity,
            "message":
            message,
            "endpoint_key":
            endpoint_key.hash,
            "meta_data":
            json.dumps(meta_data) if meta_data else "",
        }])

        df["timestamp"] = pd.to_datetime(df["timestamp"], format=ISO_8601)
        df.set_index(["timestamp", "model_hash"], inplace=True)

        if self.verbose:
            logger.info(message)

        get_frames_client().write(backend="tsdb", table=self.table, dfs=df)
Beispiel #24
0
async def start_function(
        request: Request,
        background_tasks: BackgroundTasks,
        db_session: Session = Depends(deps.get_db_session),
):
    data = None
    try:
        data = await request.json()
    except ValueError:
        log_and_raise(HTTPStatus.BAD_REQUEST.value, reason="bad JSON body")

    logger.info("Got request to start function", body=data)

    function = await run_in_threadpool(_parse_start_function_body, db_session,
                                       data)

    background_task = await run_in_threadpool(
        mlrun.api.utils.background_tasks.Handler().create_background_task,
        db_session,
        function.metadata.project,
        background_tasks,
        _start_function,
        function,
    )

    return background_task
Beispiel #25
0
def _start_function(db_session, data):
    logger.info("start_function:\n{}".format(data))
    url = data.get("functionUrl")
    if not url:
        log_and_raise(HTTPStatus.BAD_REQUEST,
                      reason="runtime error: functionUrl not specified")

    project, name, tag, hash_key = parse_function_uri(url)
    runtime = get_db().get_function(db_session, name, project, tag, hash_key)
    if not runtime:
        log_and_raise(
            HTTPStatus.BAD_REQUEST,
            reason="runtime error: function {} not found".format(url))

    fn = new_function(runtime=runtime)
    resource = runtime_resources_map.get(fn.kind)
    if "start" not in resource:
        log_and_raise(
            HTTPStatus.BAD_REQUEST,
            reason="runtime error: 'start' not supported by this runtime")

    try:

        run_db = get_run_db_instance(db_session)
        fn.set_db_connection(run_db)
        #  resp = resource["start"](fn)  # TODO: handle resp?
        resource["start"](fn)
        fn.save(versioned=False)
        logger.info("Fn:\n %s", fn.to_yaml())
    except Exception as err:
        logger.error(traceback.format_exc())
        log_and_raise(HTTPStatus.BAD_REQUEST,
                      reason="runtime error: {}".format(err))

    return fn
Beispiel #26
0
 def initialize(self):
     logger.info("Initializing projects follower")
     self.projects_store_mode = (
         mlrun.mlconf.httpdb.projects.follower_projects_store_mode)
     if self.projects_store_mode not in self.ProjectsStoreMode.all():
         raise mlrun.errors.MLRunInvalidArgumentError(
             f"Provided projects store mode is not supported. mode={self.projects_store_mode}"
         )
     self._projects: typing.Dict[str, mlrun.api.schemas.Project] = {}
     self._projects_store_for_deletion = self.ProjectsStore(self)
     self._leader_name = mlrun.mlconf.httpdb.projects.leader
     self._sync_session = None
     if self._leader_name == "iguazio":
         self._leader_client = mlrun.api.utils.clients.iguazio.Client()
         if not mlrun.mlconf.httpdb.projects.iguazio_access_key:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Iguazio access key must be configured when the leader is Iguazio"
             )
         self._sync_session = mlrun.mlconf.httpdb.projects.iguazio_access_key
     elif self._leader_name == "nop":
         self._leader_client = mlrun.api.utils.projects.remotes.nop_leader.Member(
         )
     else:
         raise NotImplementedError("Unsupported project leader")
     self._periodic_sync_interval_seconds = humanfriendly.parse_timespan(
         mlrun.mlconf.httpdb.projects.periodic_sync_interval)
     self._synced_until_datetime = None
     # Only if we're storing the projects in cache, we need to maintain this cache i.e. run the periodic sync
     if self.projects_store_mode == self.ProjectsStoreMode.cache:
         # run one sync to start off on the right foot and fill out the cache but don't fail initialization on it
         try:
             self._sync_projects()
         except Exception as exc:
             logger.warning("Initial projects sync failed", exc=str(exc))
         self._start_periodic_sync()
Beispiel #27
0
async def submit_job(
    request: Request,
    username: Optional[str] = Header(None, alias="x-remote-user"),
    db_session: Session = Depends(deps.get_db_session),
):
    data = None
    try:
        data = await request.json()
    except ValueError:
        mlrun.api.api.utils.log_and_raise(
            HTTPStatus.BAD_REQUEST.value, reason="bad JSON body"
        )

    # enrich job task with the username from the request header
    if username:
        # if task is missing, we don't want to create one
        if "task" in data:
            labels = data["task"].setdefault("metadata", {}).setdefault("labels", {})
            # TODO: remove this duplication
            labels.setdefault("v3io_user", username)
            labels.setdefault("owner", username)

    logger.info("Submit run", data=data)
    response = await mlrun.api.api.utils.submit_run(db_session, data)
    return response
Beispiel #28
0
def test_advanced_featureset():
    init_store()

    quotes_set = FeatureSet("stock-quotes", entities=[Entity("ticker")])

    flow = quotes_set.graph
    flow.to("MyMap", multiplier=3).to("storey.Extend",
                                      _fn="({'z': event['bid'] * 77})").to(
                                          "storey.Filter",
                                          "filter",
                                          _fn="(event['bid'] > 51.92)").to(
                                              FeaturesetValidator())

    quotes_set.add_aggregation("asks", "ask", ["sum", "max"], ["1h", "5h"],
                               "10m")
    quotes_set.add_aggregation("bids", "bid", ["min", "max"], ["1h"], "10m")

    df = fs.infer_metadata(
        quotes_set,
        quotes,
        entity_columns=["ticker"],
        timestamp_key="time",
        options=fs.InferOptions.default(),
    )
    logger.info(f"quotes spec: {quotes_set.spec.to_yaml()}")
    assert df["zz"].mean() == 9, "map didnt set the zz column properly"
    quotes_set["bid"].validator = MinMaxValidator(min=52, severity="info")

    quotes_set.plot(results_dir + "pipe.png", rankdir="LR", with_targets=True)
    df = fs.ingest(quotes_set, quotes, return_df=True)
    logger.info(f"output df:\n{df}")
    assert quotes_set.status.stats.get("asks_sum_1h"), "stats not created"
Beispiel #29
0
def function_status():
    try:
        data = request.get_json(force=True)
    except ValueError:
        return json_error(HTTPStatus.BAD_REQUEST, reason='bad JSON body')

    logger.info('function_status:\n{}'.format(data))
    selector = data.get('selector')
    kind = data.get('kind')
    if not selector or not kind:
        return json_error(
            HTTPStatus.BAD_REQUEST,
            reason='runtime error: selector or runtime kind not specified',
        )

    resource = runtime_resources_map.get(kind)
    if 'status' not in resource:
        return json_error(
            HTTPStatus.BAD_REQUEST,
            reason='runtime error: "status" not supported by this runtime',
        )

    try:
        resp = resource['status'](selector)
        logger.info('status: %s', resp)
    except Exception as err:
        logger.error(traceback.format_exc())
        return json_error(
            HTTPStatus.BAD_REQUEST,
            reason='runtime error: {}'.format(err),
        )

    return jsonify(ok=True, data=resp)
Beispiel #30
0
def test_async_nested():
    function = mlrun.new_function("tests", kind="serving")
    graph = function.set_topology("flow", engine="async")
    graph.add_step(name="s1", class_name="Echo")
    graph.add_step(name="s2", handler="multiply_input", after="s1")
    graph.add_step(name="s3", class_name="Echo", after="s2")

    router_step = graph.add_step("*", name="ensemble", after="s2")
    router_step.add_route("m1",
                          class_name="ModelClass",
                          model_path=".",
                          multiplier=100)
    router_step.add_route("m2",
                          class_name="ModelClass",
                          model_path=".",
                          multiplier=200)
    router_step.add_route("m3:v1",
                          class_name="ModelClass",
                          model_path=".",
                          multiplier=300)

    graph.add_step(name="final", class_name="Echo", after="ensemble").respond()

    logger.info(graph.to_yaml())
    server = function.to_mock_server()

    # plot the graph for test & debug
    graph.plot(f"{results}/serving/nested.png")
    resp = server.test("/v2/models/m2/infer", body={"inputs": [5]})
    server.wait_for_completion()
    # resp should be input (5) * multiply_input (2) * m2 multiplier (200)
    assert resp["outputs"] == 5 * 2 * 200, f"wrong health response {resp}"