Example #1
0
def ingest_feature_set(
    request: Request,
    project: str,
    name: str,
    reference: str,
    ingest_parameters: Optional[
        schemas.FeatureSetIngestInput
    ] = schemas.FeatureSetIngestInput(),
    username: str = Header(None, alias="x-remote-user"),
    auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier),
    db_session: Session = Depends(deps.get_db_session),
):
    tag, uid = parse_reference(reference)
    feature_set_record = get_db().get_feature_set(db_session, project, name, tag, uid)

    feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict())
    # Need to override the default rundb since we're in the server.
    feature_set._override_run_db(db_session, auth_verifier.auth_info.session)

    data_source = data_targets = None
    if ingest_parameters.source:
        data_source = DataSource.from_dict(ingest_parameters.source.dict())
    if ingest_parameters.targets:
        data_targets = [
            DataTargetBase.from_dict(data_target.dict())
            for data_target in ingest_parameters.targets
        ]

    run_config = RunConfig()

    # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and
    # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are
    # user-context parameters, we cannot use the api context.
    if _has_v3io_path(data_source, data_targets, feature_set):
        secrets = get_secrets(request)
        access_key = secrets.get("V3IO_ACCESS_KEY", None)

        if not access_key or not username:
            log_and_raise(
                HTTPStatus.BAD_REQUEST.value,
                reason="Request needs v3io access key and username in header",
            )
        run_config = run_config.apply(v3io_cred(access_key=access_key, user=username))

    infer_options = ingest_parameters.infer_options or InferOptions.default()

    run_params = ingest(
        feature_set,
        data_source,
        data_targets,
        infer_options=infer_options,
        return_df=False,
        run_config=run_config,
    )
    # ingest may modify the feature-set contents, so returning the updated feature-set.
    result_feature_set = schemas.FeatureSet(**feature_set.to_dict())
    return schemas.FeatureSetIngestOutput(
        feature_set=result_feature_set, run_object=run_params.to_dict()
    )
Example #2
0
def get_default_targets():
    """initialize the default feature set targets list"""
    return [
        DataTargetBase(target, name=str(target))
        for target in default_target_names()
    ]
Example #3
0
def get_target_driver(target_spec, resource=None):
    if isinstance(target_spec, dict):
        target_spec = DataTargetBase.from_dict(target_spec)
    driver_class = kind_to_driver[target_spec.kind]
    return driver_class.from_spec(target_spec, resource)
Example #4
0
def ingest_feature_set(
    project: str,
    name: str,
    reference: str,
    ingest_parameters: Optional[
        schemas.FeatureSetIngestInput
    ] = schemas.FeatureSetIngestInput(),
    username: str = Header(None, alias="x-remote-user"),
    auth_verifier: deps.AuthVerifierDep = Depends(deps.AuthVerifierDep),
    db_session: Session = Depends(deps.get_db_session),
):
    mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
        mlrun.api.schemas.AuthorizationResourceTypes.feature_set,
        project,
        name,
        mlrun.api.schemas.AuthorizationAction.update,
        auth_verifier.auth_info,
    )
    mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
        mlrun.api.schemas.AuthorizationResourceTypes.run,
        project,
        "",
        mlrun.api.schemas.AuthorizationAction.create,
        auth_verifier.auth_info,
    )
    data_source = data_targets = None
    if ingest_parameters.source:
        data_source = DataSource.from_dict(ingest_parameters.source.dict())
    if data_source.schedule:
        mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
            mlrun.api.schemas.AuthorizationResourceTypes.schedule,
            project,
            "",
            mlrun.api.schemas.AuthorizationAction.create,
            auth_verifier.auth_info,
        )
    tag, uid = parse_reference(reference)
    feature_set_record = mlrun.api.crud.FeatureStore().get_feature_set(
        db_session, project, name, tag, uid
    )
    feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict())
    if feature_set.spec.function and feature_set.spec.function.function_object:
        function = feature_set.spec.function.function_object
        mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
            mlrun.api.schemas.AuthorizationResourceTypes.function,
            function.metadata.project,
            function.metadata.name,
            mlrun.api.schemas.AuthorizationAction.read,
            auth_verifier.auth_info,
        )
    # Need to override the default rundb since we're in the server.
    feature_set._override_run_db(db_session)

    if ingest_parameters.targets:
        data_targets = [
            DataTargetBase.from_dict(data_target.dict())
            for data_target in ingest_parameters.targets
        ]

    run_config = RunConfig(owner=username)

    # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and
    # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are
    # user-context parameters, we cannot use the api context.
    if _has_v3io_path(data_source, data_targets, feature_set):
        access_key = auth_verifier.auth_info.data_session

        if not access_key or not username:
            log_and_raise(
                HTTPStatus.BAD_REQUEST.value,
                reason="Request needs v3io access key and username in header",
            )
        run_config = run_config.apply(v3io_cred(access_key=access_key, user=username))

    infer_options = ingest_parameters.infer_options or InferOptions.default()

    run_params = ingest(
        feature_set,
        data_source,
        data_targets,
        infer_options=infer_options,
        return_df=False,
        run_config=run_config,
    )
    # ingest may modify the feature-set contents, so returning the updated feature-set.
    result_feature_set = schemas.FeatureSet(**feature_set.to_dict())
    return schemas.FeatureSetIngestOutput(
        feature_set=result_feature_set, run_object=run_params.to_dict()
    )