def ingest_feature_set( request: Request, project: str, name: str, reference: str, ingest_parameters: Optional[ schemas.FeatureSetIngestInput ] = schemas.FeatureSetIngestInput(), username: str = Header(None, alias="x-remote-user"), auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier), db_session: Session = Depends(deps.get_db_session), ): tag, uid = parse_reference(reference) feature_set_record = get_db().get_feature_set(db_session, project, name, tag, uid) feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict()) # Need to override the default rundb since we're in the server. feature_set._override_run_db(db_session, auth_verifier.auth_info.session) data_source = data_targets = None if ingest_parameters.source: data_source = DataSource.from_dict(ingest_parameters.source.dict()) if ingest_parameters.targets: data_targets = [ DataTargetBase.from_dict(data_target.dict()) for data_target in ingest_parameters.targets ] run_config = RunConfig() # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are # user-context parameters, we cannot use the api context. if _has_v3io_path(data_source, data_targets, feature_set): secrets = get_secrets(request) access_key = secrets.get("V3IO_ACCESS_KEY", None) if not access_key or not username: log_and_raise( HTTPStatus.BAD_REQUEST.value, reason="Request needs v3io access key and username in header", ) run_config = run_config.apply(v3io_cred(access_key=access_key, user=username)) infer_options = ingest_parameters.infer_options or InferOptions.default() run_params = ingest( feature_set, data_source, data_targets, infer_options=infer_options, return_df=False, run_config=run_config, ) # ingest may modify the feature-set contents, so returning the updated feature-set. result_feature_set = schemas.FeatureSet(**feature_set.to_dict()) return schemas.FeatureSetIngestOutput( feature_set=result_feature_set, run_object=run_params.to_dict() )
def get_default_targets(): """initialize the default feature set targets list""" return [ DataTargetBase(target, name=str(target)) for target in default_target_names() ]
def get_target_driver(target_spec, resource=None): if isinstance(target_spec, dict): target_spec = DataTargetBase.from_dict(target_spec) driver_class = kind_to_driver[target_spec.kind] return driver_class.from_spec(target_spec, resource)
def ingest_feature_set( project: str, name: str, reference: str, ingest_parameters: Optional[ schemas.FeatureSetIngestInput ] = schemas.FeatureSetIngestInput(), username: str = Header(None, alias="x-remote-user"), auth_verifier: deps.AuthVerifierDep = Depends(deps.AuthVerifierDep), db_session: Session = Depends(deps.get_db_session), ): mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.feature_set, project, name, mlrun.api.schemas.AuthorizationAction.update, auth_verifier.auth_info, ) mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.run, project, "", mlrun.api.schemas.AuthorizationAction.create, auth_verifier.auth_info, ) data_source = data_targets = None if ingest_parameters.source: data_source = DataSource.from_dict(ingest_parameters.source.dict()) if data_source.schedule: mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.schedule, project, "", mlrun.api.schemas.AuthorizationAction.create, auth_verifier.auth_info, ) tag, uid = parse_reference(reference) feature_set_record = mlrun.api.crud.FeatureStore().get_feature_set( db_session, project, name, tag, uid ) feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict()) if feature_set.spec.function and feature_set.spec.function.function_object: function = feature_set.spec.function.function_object mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.function, function.metadata.project, function.metadata.name, mlrun.api.schemas.AuthorizationAction.read, auth_verifier.auth_info, ) # Need to override the default rundb since we're in the server. feature_set._override_run_db(db_session) if ingest_parameters.targets: data_targets = [ DataTargetBase.from_dict(data_target.dict()) for data_target in ingest_parameters.targets ] run_config = RunConfig(owner=username) # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are # user-context parameters, we cannot use the api context. if _has_v3io_path(data_source, data_targets, feature_set): access_key = auth_verifier.auth_info.data_session if not access_key or not username: log_and_raise( HTTPStatus.BAD_REQUEST.value, reason="Request needs v3io access key and username in header", ) run_config = run_config.apply(v3io_cred(access_key=access_key, user=username)) infer_options = ingest_parameters.infer_options or InferOptions.default() run_params = ingest( feature_set, data_source, data_targets, infer_options=infer_options, return_df=False, run_config=run_config, ) # ingest may modify the feature-set contents, so returning the updated feature-set. result_feature_set = schemas.FeatureSet(**feature_set.to_dict()) return schemas.FeatureSetIngestOutput( feature_set=result_feature_set, run_object=run_params.to_dict() )