def store_feature_set( self, name, feature_set: Union[dict, schemas.FeatureSet], project="", tag=None, uid=None, versioned=True, ) -> schemas.FeatureSet: if uid and tag: raise MLRunInvalidArgumentError("both uid and tag were provided") params = {"versioned": versioned} if isinstance(feature_set, schemas.FeatureSet): feature_set = feature_set.dict() project = project or default_project reference = uid or tag or "latest" path = f"projects/{project}/feature-sets/{name}/references/{reference}" error_message = f"Failed storing feature-set {project}/{name}" resp = self.api_call("PUT", path, error_message, params=params, body=json.dumps(feature_set)) return schemas.FeatureSet(**resp.json())
def ingest_feature_set( request: Request, project: str, name: str, reference: str, ingest_parameters: Optional[ schemas.FeatureSetIngestInput ] = schemas.FeatureSetIngestInput(), username: str = Header(None, alias="x-remote-user"), auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier), db_session: Session = Depends(deps.get_db_session), ): tag, uid = parse_reference(reference) feature_set_record = get_db().get_feature_set(db_session, project, name, tag, uid) feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict()) # Need to override the default rundb since we're in the server. feature_set._override_run_db(db_session, auth_verifier.auth_info.session) data_source = data_targets = None if ingest_parameters.source: data_source = DataSource.from_dict(ingest_parameters.source.dict()) if ingest_parameters.targets: data_targets = [ DataTargetBase.from_dict(data_target.dict()) for data_target in ingest_parameters.targets ] run_config = RunConfig() # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are # user-context parameters, we cannot use the api context. if _has_v3io_path(data_source, data_targets, feature_set): secrets = get_secrets(request) access_key = secrets.get("V3IO_ACCESS_KEY", None) if not access_key or not username: log_and_raise( HTTPStatus.BAD_REQUEST.value, reason="Request needs v3io access key and username in header", ) run_config = run_config.apply(v3io_cred(access_key=access_key, user=username)) infer_options = ingest_parameters.infer_options or InferOptions.default() run_params = ingest( feature_set, data_source, data_targets, infer_options=infer_options, return_df=False, run_config=run_config, ) # ingest may modify the feature-set contents, so returning the updated feature-set. result_feature_set = schemas.FeatureSet(**feature_set.to_dict()) return schemas.FeatureSetIngestOutput( feature_set=result_feature_set, run_object=run_params.to_dict() )
def create_feature_set(self, feature_set: Union[dict, schemas.FeatureSet], project="", versioned=True) -> schemas.FeatureSet: project = project or default_project path = f"projects/{project}/feature_sets" params = {"versioned": versioned} if isinstance(feature_set, dict): feature_set = schemas.FeatureSet(**feature_set) name = feature_set.metadata.name error_message = f"Failed creating feature-set {project}/{name}" resp = self.api_call( "POST", path, error_message, params=params, body=json.dumps(feature_set.dict()), ) return schemas.FeatureSet(**resp.json())
def get_feature_set(self, name: str, project: str = "", tag: str = None, uid: str = None) -> schemas.FeatureSet: if uid and tag: raise MLRunInvalidArgumentError("both uid and tag were provided") project = project or default_project reference = uid or tag or "latest" path = f"projects/{project}/feature-sets/{name}/references/{reference}" error_message = f"Failed retrieving feature-set {project}/{name}" resp = self.api_call("GET", path, error_message) return schemas.FeatureSet(**resp.json())
def test_create_feature_set(db: DBInterface, db_session: Session): name = "dummy" feature_set = _create_feature_set(name) project = "proj_test" feature_set = schemas.FeatureSet(**feature_set) db.create_feature_set(db_session, project, feature_set, versioned=True) db.get_feature_set(db_session, project, name) feature_set_res = db.list_feature_sets(db_session, project) assert len(feature_set_res.feature_sets) == 1 features_res = db.list_features(db_session, project, "time") assert len(features_res.features) == 1
def _create_resources_of_all_kinds(db: DBInterface, db_session: sqlalchemy.orm.Session, project: str): # Create several functions with several tags labels = { "name": "value", "name2": "value2", } function = { "bla": "blabla", "metadata": { "labels": labels }, "status": { "bla": "blabla" }, } function_names = ["function_name_1", "function_name_2", "function_name_3"] function_tags = ["some_tag", "some_tag2", "some_tag3"] for function_name in function_names: for function_tag in function_tags: db.store_function( db_session, function, function_name, project, tag=function_tag, versioned=True, ) # Create several artifacts with several tags artifact = { "bla": "blabla", "labels": labels, "status": { "bla": "blabla" }, } artifact_keys = ["artifact_key_1", "artifact_key_2", "artifact_key_3"] artifact_uids = ["some_uid", "some_uid2", "some_uid3"] artifact_tags = ["some_tag", "some_tag2", "some_tag3"] for artifact_key in artifact_keys: for artifact_uid in artifact_uids: for artifact_tag in artifact_tags: for artifact_iter in range(3): db.store_artifact( db_session, artifact_key, artifact, artifact_uid, artifact_iter, artifact_tag, project, ) # Create several runs run = { "bla": "blabla", "metadata": { "labels": labels }, "status": { "bla": "blabla" }, } run_uids = ["some_uid", "some_uid2", "some_uid3"] for run_uid in run_uids: for run_iter in range(3): db.store_run(db_session, run, run_uid, project, run_iter) # Create several logs log = b"some random log" log_uids = ["some_uid", "some_uid2", "some_uid3"] for log_uid in log_uids: db.store_log(db_session, log_uid, project, log) # Create several schedule schedule = { "bla": "blabla", "status": { "bla": "blabla" }, } schedule_cron_trigger = schemas.ScheduleCronTrigger(year=1999) schedule_names = ["schedule_name_1", "schedule_name_2", "schedule_name_3"] for schedule_name in schedule_names: db.create_schedule( db_session, project, schedule_name, schemas.ScheduleKinds.job, schedule, schedule_cron_trigger, labels, ) feature_set = schemas.FeatureSet( metadata=schemas.ObjectMetadata(name="dummy", tag="latest", labels={"owner": "nobody"}), spec=schemas.FeatureSetSpec( entities=[ schemas.Entity(name="ent1", value_type="str", labels={"label": "1"}) ], features=[ schemas.Feature(name="feat1", value_type="str", labels={"label": "1"}) ], ), status={}, ) db.create_feature_set(db_session, project, feature_set) feature_vector = schemas.FeatureVector( metadata=schemas.ObjectMetadata(name="dummy", tag="latest", labels={"owner": "somebody"}), spec=schemas.ObjectSpec(), status=schemas.ObjectStatus(state="created"), ) db.create_feature_vector(db_session, project, feature_vector)
def ingest_feature_set( project: str, name: str, reference: str, ingest_parameters: Optional[ schemas.FeatureSetIngestInput ] = schemas.FeatureSetIngestInput(), username: str = Header(None, alias="x-remote-user"), auth_verifier: deps.AuthVerifierDep = Depends(deps.AuthVerifierDep), db_session: Session = Depends(deps.get_db_session), ): mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.feature_set, project, name, mlrun.api.schemas.AuthorizationAction.update, auth_verifier.auth_info, ) mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.run, project, "", mlrun.api.schemas.AuthorizationAction.create, auth_verifier.auth_info, ) data_source = data_targets = None if ingest_parameters.source: data_source = DataSource.from_dict(ingest_parameters.source.dict()) if data_source.schedule: mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.schedule, project, "", mlrun.api.schemas.AuthorizationAction.create, auth_verifier.auth_info, ) tag, uid = parse_reference(reference) feature_set_record = mlrun.api.crud.FeatureStore().get_feature_set( db_session, project, name, tag, uid ) feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict()) if feature_set.spec.function and feature_set.spec.function.function_object: function = feature_set.spec.function.function_object mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.function, function.metadata.project, function.metadata.name, mlrun.api.schemas.AuthorizationAction.read, auth_verifier.auth_info, ) # Need to override the default rundb since we're in the server. feature_set._override_run_db(db_session) if ingest_parameters.targets: data_targets = [ DataTargetBase.from_dict(data_target.dict()) for data_target in ingest_parameters.targets ] run_config = RunConfig(owner=username) # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are # user-context parameters, we cannot use the api context. if _has_v3io_path(data_source, data_targets, feature_set): access_key = auth_verifier.auth_info.data_session if not access_key or not username: log_and_raise( HTTPStatus.BAD_REQUEST.value, reason="Request needs v3io access key and username in header", ) run_config = run_config.apply(v3io_cred(access_key=access_key, user=username)) infer_options = ingest_parameters.infer_options or InferOptions.default() run_params = ingest( feature_set, data_source, data_targets, infer_options=infer_options, return_df=False, run_config=run_config, ) # ingest may modify the feature-set contents, so returning the updated feature-set. result_feature_set = schemas.FeatureSet(**feature_set.to_dict()) return schemas.FeatureSetIngestOutput( feature_set=result_feature_set, run_object=run_params.to_dict() )