예제 #1
0
    def store_feature_set(
        self,
        name,
        feature_set: Union[dict, schemas.FeatureSet],
        project="",
        tag=None,
        uid=None,
        versioned=True,
    ) -> schemas.FeatureSet:
        if uid and tag:
            raise MLRunInvalidArgumentError("both uid and tag were provided")

        params = {"versioned": versioned}

        if isinstance(feature_set, schemas.FeatureSet):
            feature_set = feature_set.dict()

        project = project or default_project
        reference = uid or tag or "latest"
        path = f"projects/{project}/feature-sets/{name}/references/{reference}"
        error_message = f"Failed storing feature-set {project}/{name}"
        resp = self.api_call("PUT",
                             path,
                             error_message,
                             params=params,
                             body=json.dumps(feature_set))
        return schemas.FeatureSet(**resp.json())
예제 #2
0
def ingest_feature_set(
    request: Request,
    project: str,
    name: str,
    reference: str,
    ingest_parameters: Optional[
        schemas.FeatureSetIngestInput
    ] = schemas.FeatureSetIngestInput(),
    username: str = Header(None, alias="x-remote-user"),
    auth_verifier: deps.AuthVerifier = Depends(deps.AuthVerifier),
    db_session: Session = Depends(deps.get_db_session),
):
    tag, uid = parse_reference(reference)
    feature_set_record = get_db().get_feature_set(db_session, project, name, tag, uid)

    feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict())
    # Need to override the default rundb since we're in the server.
    feature_set._override_run_db(db_session, auth_verifier.auth_info.session)

    data_source = data_targets = None
    if ingest_parameters.source:
        data_source = DataSource.from_dict(ingest_parameters.source.dict())
    if ingest_parameters.targets:
        data_targets = [
            DataTargetBase.from_dict(data_target.dict())
            for data_target in ingest_parameters.targets
        ]

    run_config = RunConfig()

    # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and
    # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are
    # user-context parameters, we cannot use the api context.
    if _has_v3io_path(data_source, data_targets, feature_set):
        secrets = get_secrets(request)
        access_key = secrets.get("V3IO_ACCESS_KEY", None)

        if not access_key or not username:
            log_and_raise(
                HTTPStatus.BAD_REQUEST.value,
                reason="Request needs v3io access key and username in header",
            )
        run_config = run_config.apply(v3io_cred(access_key=access_key, user=username))

    infer_options = ingest_parameters.infer_options or InferOptions.default()

    run_params = ingest(
        feature_set,
        data_source,
        data_targets,
        infer_options=infer_options,
        return_df=False,
        run_config=run_config,
    )
    # ingest may modify the feature-set contents, so returning the updated feature-set.
    result_feature_set = schemas.FeatureSet(**feature_set.to_dict())
    return schemas.FeatureSetIngestOutput(
        feature_set=result_feature_set, run_object=run_params.to_dict()
    )
예제 #3
0
    def create_feature_set(self,
                           feature_set: Union[dict, schemas.FeatureSet],
                           project="",
                           versioned=True) -> schemas.FeatureSet:
        project = project or default_project
        path = f"projects/{project}/feature_sets"
        params = {"versioned": versioned}

        if isinstance(feature_set, dict):
            feature_set = schemas.FeatureSet(**feature_set)

        name = feature_set.metadata.name
        error_message = f"Failed creating feature-set {project}/{name}"
        resp = self.api_call(
            "POST",
            path,
            error_message,
            params=params,
            body=json.dumps(feature_set.dict()),
        )
        return schemas.FeatureSet(**resp.json())
예제 #4
0
    def get_feature_set(self,
                        name: str,
                        project: str = "",
                        tag: str = None,
                        uid: str = None) -> schemas.FeatureSet:
        if uid and tag:
            raise MLRunInvalidArgumentError("both uid and tag were provided")

        project = project or default_project
        reference = uid or tag or "latest"
        path = f"projects/{project}/feature-sets/{name}/references/{reference}"
        error_message = f"Failed retrieving feature-set {project}/{name}"
        resp = self.api_call("GET", path, error_message)
        return schemas.FeatureSet(**resp.json())
예제 #5
0
def test_create_feature_set(db: DBInterface, db_session: Session):
    name = "dummy"
    feature_set = _create_feature_set(name)

    project = "proj_test"

    feature_set = schemas.FeatureSet(**feature_set)
    db.create_feature_set(db_session, project, feature_set, versioned=True)
    db.get_feature_set(db_session, project, name)

    feature_set_res = db.list_feature_sets(db_session, project)
    assert len(feature_set_res.feature_sets) == 1

    features_res = db.list_features(db_session, project, "time")
    assert len(features_res.features) == 1
예제 #6
0
def _create_resources_of_all_kinds(db: DBInterface,
                                   db_session: sqlalchemy.orm.Session,
                                   project: str):
    # Create several functions with several tags
    labels = {
        "name": "value",
        "name2": "value2",
    }
    function = {
        "bla": "blabla",
        "metadata": {
            "labels": labels
        },
        "status": {
            "bla": "blabla"
        },
    }
    function_names = ["function_name_1", "function_name_2", "function_name_3"]
    function_tags = ["some_tag", "some_tag2", "some_tag3"]
    for function_name in function_names:
        for function_tag in function_tags:
            db.store_function(
                db_session,
                function,
                function_name,
                project,
                tag=function_tag,
                versioned=True,
            )

    # Create several artifacts with several tags
    artifact = {
        "bla": "blabla",
        "labels": labels,
        "status": {
            "bla": "blabla"
        },
    }
    artifact_keys = ["artifact_key_1", "artifact_key_2", "artifact_key_3"]
    artifact_uids = ["some_uid", "some_uid2", "some_uid3"]
    artifact_tags = ["some_tag", "some_tag2", "some_tag3"]
    for artifact_key in artifact_keys:
        for artifact_uid in artifact_uids:
            for artifact_tag in artifact_tags:
                for artifact_iter in range(3):
                    db.store_artifact(
                        db_session,
                        artifact_key,
                        artifact,
                        artifact_uid,
                        artifact_iter,
                        artifact_tag,
                        project,
                    )

    # Create several runs
    run = {
        "bla": "blabla",
        "metadata": {
            "labels": labels
        },
        "status": {
            "bla": "blabla"
        },
    }
    run_uids = ["some_uid", "some_uid2", "some_uid3"]
    for run_uid in run_uids:
        for run_iter in range(3):
            db.store_run(db_session, run, run_uid, project, run_iter)

    # Create several logs
    log = b"some random log"
    log_uids = ["some_uid", "some_uid2", "some_uid3"]
    for log_uid in log_uids:
        db.store_log(db_session, log_uid, project, log)

    # Create several schedule
    schedule = {
        "bla": "blabla",
        "status": {
            "bla": "blabla"
        },
    }
    schedule_cron_trigger = schemas.ScheduleCronTrigger(year=1999)
    schedule_names = ["schedule_name_1", "schedule_name_2", "schedule_name_3"]
    for schedule_name in schedule_names:
        db.create_schedule(
            db_session,
            project,
            schedule_name,
            schemas.ScheduleKinds.job,
            schedule,
            schedule_cron_trigger,
            labels,
        )

    feature_set = schemas.FeatureSet(
        metadata=schemas.ObjectMetadata(name="dummy",
                                        tag="latest",
                                        labels={"owner": "nobody"}),
        spec=schemas.FeatureSetSpec(
            entities=[
                schemas.Entity(name="ent1",
                               value_type="str",
                               labels={"label": "1"})
            ],
            features=[
                schemas.Feature(name="feat1",
                                value_type="str",
                                labels={"label": "1"})
            ],
        ),
        status={},
    )
    db.create_feature_set(db_session, project, feature_set)

    feature_vector = schemas.FeatureVector(
        metadata=schemas.ObjectMetadata(name="dummy",
                                        tag="latest",
                                        labels={"owner": "somebody"}),
        spec=schemas.ObjectSpec(),
        status=schemas.ObjectStatus(state="created"),
    )
    db.create_feature_vector(db_session, project, feature_vector)
예제 #7
0
def ingest_feature_set(
    project: str,
    name: str,
    reference: str,
    ingest_parameters: Optional[
        schemas.FeatureSetIngestInput
    ] = schemas.FeatureSetIngestInput(),
    username: str = Header(None, alias="x-remote-user"),
    auth_verifier: deps.AuthVerifierDep = Depends(deps.AuthVerifierDep),
    db_session: Session = Depends(deps.get_db_session),
):
    mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
        mlrun.api.schemas.AuthorizationResourceTypes.feature_set,
        project,
        name,
        mlrun.api.schemas.AuthorizationAction.update,
        auth_verifier.auth_info,
    )
    mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
        mlrun.api.schemas.AuthorizationResourceTypes.run,
        project,
        "",
        mlrun.api.schemas.AuthorizationAction.create,
        auth_verifier.auth_info,
    )
    data_source = data_targets = None
    if ingest_parameters.source:
        data_source = DataSource.from_dict(ingest_parameters.source.dict())
    if data_source.schedule:
        mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
            mlrun.api.schemas.AuthorizationResourceTypes.schedule,
            project,
            "",
            mlrun.api.schemas.AuthorizationAction.create,
            auth_verifier.auth_info,
        )
    tag, uid = parse_reference(reference)
    feature_set_record = mlrun.api.crud.FeatureStore().get_feature_set(
        db_session, project, name, tag, uid
    )
    feature_set = mlrun.feature_store.FeatureSet.from_dict(feature_set_record.dict())
    if feature_set.spec.function and feature_set.spec.function.function_object:
        function = feature_set.spec.function.function_object
        mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
            mlrun.api.schemas.AuthorizationResourceTypes.function,
            function.metadata.project,
            function.metadata.name,
            mlrun.api.schemas.AuthorizationAction.read,
            auth_verifier.auth_info,
        )
    # Need to override the default rundb since we're in the server.
    feature_set._override_run_db(db_session)

    if ingest_parameters.targets:
        data_targets = [
            DataTargetBase.from_dict(data_target.dict())
            for data_target in ingest_parameters.targets
        ]

    run_config = RunConfig(owner=username)

    # Try to deduce whether the ingest job will need v3io mount, by analyzing the paths to the source and
    # targets. If it needs it, apply v3io mount to the run_config. Note that the access-key and username are
    # user-context parameters, we cannot use the api context.
    if _has_v3io_path(data_source, data_targets, feature_set):
        access_key = auth_verifier.auth_info.data_session

        if not access_key or not username:
            log_and_raise(
                HTTPStatus.BAD_REQUEST.value,
                reason="Request needs v3io access key and username in header",
            )
        run_config = run_config.apply(v3io_cred(access_key=access_key, user=username))

    infer_options = ingest_parameters.infer_options or InferOptions.default()

    run_params = ingest(
        feature_set,
        data_source,
        data_targets,
        infer_options=infer_options,
        return_df=False,
        run_config=run_config,
    )
    # ingest may modify the feature-set contents, so returning the updated feature-set.
    result_feature_set = schemas.FeatureSet(**feature_set.to_dict())
    return schemas.FeatureSetIngestOutput(
        feature_set=result_feature_set, run_object=run_params.to_dict()
    )