Ejemplo n.º 1
0
def cleanup_endpoints(db: Session, client: TestClient):
    if not _is_env_params_dont_exist():
        kv_path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=TEST_PROJECT,
            kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS,
        )
        _, kv_container, kv_path = parse_model_endpoint_store_prefix(kv_path)

        tsdb_path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=TEST_PROJECT,
            kind=mlrun.api.schemas.ModelMonitoringStoreKinds.EVENTS,
        )
        _, tsdb_container, tsdb_path = parse_model_endpoint_store_prefix(
            tsdb_path)

        v3io = get_v3io_client(endpoint=config.v3io_api,
                               access_key=_get_access_key())

        frames = get_frames_client(
            token=_get_access_key(),
            container=tsdb_container,
            address=config.v3io_framesd,
        )

        try:
            all_records = v3io.kv.new_cursor(
                container=kv_container,
                table_path=kv_path,
                raise_for_status=RaiseForStatus.never,
            ).all()

            all_records = [r["__name"] for r in all_records]

            # Cleanup KV
            for record in all_records:
                v3io.kv.delete(
                    container=kv_container,
                    table_path=kv_path,
                    key=record,
                    raise_for_status=RaiseForStatus.never,
                )
        except RuntimeError:
            pass

        try:
            # Cleanup TSDB
            frames.delete(
                backend="tsdb",
                table=tsdb_path,
                if_missing=fpb2.IGNORE,
            )
        except CreateError:
            pass
Ejemplo n.º 2
0
def _create_model_monitoring_stream(project: str):

    stream_path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=project, kind="stream")

    _, container, stream_path = parse_model_endpoint_store_prefix(stream_path)

    # TODO: How should we configure sharding here?
    logger.info(
        "Creating model endpoint stream for project",
        project=project,
        stream_path=stream_path,
        container=container,
        endpoint=config.v3io_api,
    )

    v3io_client = v3io.dataplane.Client(
        endpoint=config.v3io_api, access_key=os.environ.get("V3IO_ACCESS_KEY"))
    response = v3io_client.create_stream(
        container=container,
        path=stream_path,
        shard_count=config.model_endpoint_monitoring.serving_stream_args.
        shard_count,
        retention_period_hours=config.model_endpoint_monitoring.
        serving_stream_args.retention_period_hours,
        raise_for_status=v3io.dataplane.RaiseForStatus.never,
    )

    if not (response.status_code == 400
            and "ResourceInUse" in str(response.body)):
        response.raise_for_status([409, 204])
Ejemplo n.º 3
0
    def delete_endpoint_record(access_key: str, project: str,
                               endpoint_id: str):
        """
        Deletes the KV record of a given model endpoint, project and endpoint_id are used for lookup

        :param access_key: V3IO access key for managing user permissions
        :param project: The name of the project
        :param endpoint_id: The id of the endpoint
        """

        logger.info("Clearing model endpoint table", endpoint_id=endpoint_id)
        client = get_v3io_client(endpoint=config.v3io_api)

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=project, kind=ENDPOINTS)
        _, container, path = parse_model_endpoint_store_prefix(path)

        client.kv.delete(
            container=container,
            table_path=path,
            key=endpoint_id,
            access_key=access_key,
        )

        logger.info("Model endpoint table cleared", endpoint_id=endpoint_id)
Ejemplo n.º 4
0
    def delete_endpoint_record(
        self,
        auth_info: mlrun.api.schemas.AuthInfo,
        project: str,
        endpoint_id: str,
        access_key: str,
    ):
        """
        Deletes the KV record of a given model endpoint, project and endpoint_id are used for lookup

        :param auth_info: The required auth information for doing the deletion
        :param project: The name of the project
        :param endpoint_id: The id of the endpoint
        :param access_key: access key with permission to delete
        """
        logger.info("Clearing model endpoint table", endpoint_id=endpoint_id)
        client = get_v3io_client(endpoint=config.v3io_api)

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=project, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS
        )
        _, container, path = parse_model_endpoint_store_prefix(path)

        client.kv.delete(
            container=container,
            table_path=path,
            key=endpoint_id,
            access_key=access_key,
        )

        logger.info("Model endpoint table cleared", endpoint_id=endpoint_id)
Ejemplo n.º 5
0
def test_get_endpoint_metrics(db: Session, client: TestClient):
    path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=TEST_PROJECT, kind=EVENTS
    )
    _, container, path = parse_model_endpoint_store_prefix(path)

    frames = get_frames_client(
        token=_get_access_key(), container=container, address=config.v3io_framesd,
    )

    start = datetime.utcnow()

    for i in range(5):
        endpoint = _mock_random_endpoint()
        write_endpoint_to_kv(_get_access_key(), endpoint)
        frames.create(backend="tsdb", table=path, rate="10/m", if_exists=1)

        total = 0

        dfs = []

        for i in range(10):
            count = randint(1, 10)
            total += count
            data = {
                "predictions_per_second_count_1s": count,
                "endpoint_id": endpoint.metadata.uid,
                "timestamp": start - timedelta(minutes=10 - i),
            }
            df = pd.DataFrame(data=[data])
            dfs.append(df)

        frames.write(
            backend="tsdb",
            table=path,
            dfs=dfs,
            index_cols=["timestamp", "endpoint_id"],
        )

        response = client.get(
            url=f"/api/projects/{TEST_PROJECT}/model-endpoints/{endpoint.metadata.uid}?metric=predictions_per_second_count_1s",  # noqa
            headers={"X-V3io-Session-Key": _get_access_key()},
        )

        endpoint = ModelEndpoint(**response.json())

        assert len(endpoint.status.metrics) > 0

        predictions_per_second = endpoint.status.metrics[
            "predictions_per_second_count_1s"
        ]

        assert predictions_per_second.name == "predictions_per_second_count_1s"

        response_total = sum((m[1] for m in predictions_per_second.values))

        assert total == response_total
    def __init__(self, context: MLClientCtx, project: str):
        self.context = context
        self.project = project
        self.virtual_drift = VirtualDrift(inf_capping=10)

        template = config.model_endpoint_monitoring.store_prefixes.default

        self.parquet_path = template.format(project=self.project,
                                            kind="parquet")

        kv_path = template.format(project=self.project, kind="endpoints")
        _, self.kv_container, self.kv_path = parse_model_endpoint_store_prefix(
            kv_path)

        tsdb_path = template.format(project=project, kind="events")
        _, self.tsdb_container, self.tsdb_path = parse_model_endpoint_store_prefix(
            tsdb_path)

        stream_path = template.format(project=self.project, kind="log_stream")
        _, self.stream_container, self.stream_path = parse_model_endpoint_store_prefix(
            stream_path)

        logger.info(
            "Initializing BatchProcessor",
            parquet_path=self.parquet_path,
            kv_container=self.kv_container,
            kv_path=self.kv_path,
            tsdb_container=self.tsdb_container,
            tsdb_path=self.tsdb_path,
            stream_container=self.stream_container,
            stream_path=self.stream_path,
        )

        self.default_possible_drift_threshold = (
            config.model_endpoint_monitoring.drift_thresholds.default.
            possible_drift)
        self.default_drift_detected_threshold = (
            config.model_endpoint_monitoring.drift_thresholds.default.
            drift_detected)

        self.db = get_run_db()
        self.v3io = get_v3io_client()
        self.frames = get_frames_client(address=config.v3io_framesd,
                                        container=self.tsdb_container)
Ejemplo n.º 7
0
    def write_endpoint_to_kv(
        self, access_key: str, endpoint: ModelEndpoint, update: bool = True
    ):
        """
        Writes endpoint data to KV, a prerequisite for initializing the monitoring process

        :param access_key: V3IO access key for managing user permissions
        :param endpoint: ModelEndpoint object
        :param update: When True, use client.kv.update, otherwise use client.kv.put
        """

        labels = endpoint.metadata.labels or {}
        searchable_labels = {f"_{k}": v for k, v in labels.items()} if labels else {}

        feature_names = endpoint.spec.feature_names or []
        label_names = endpoint.spec.label_names or []
        feature_stats = endpoint.status.feature_stats or {}
        current_stats = endpoint.status.current_stats or {}
        children = endpoint.status.children or []
        monitor_configuration = endpoint.spec.monitor_configuration or {}

        client = get_v3io_client(endpoint=config.v3io_api)
        function = client.kv.update if update else client.kv.put

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=endpoint.metadata.project, kind=self.ENDPOINTS
        )
        _, container, path = parse_model_endpoint_store_prefix(path)

        function(
            container=container,
            table_path=path,
            key=endpoint.metadata.uid,
            access_key=access_key,
            attributes={
                "endpoint_id": endpoint.metadata.uid,
                "project": endpoint.metadata.project,
                "function_uri": endpoint.spec.function_uri,
                "model": endpoint.spec.model,
                "model_class": endpoint.spec.model_class or "",
                "labels": json.dumps(labels),
                "model_uri": endpoint.spec.model_uri or "",
                "stream_path": endpoint.spec.stream_path or "",
                "active": endpoint.spec.active or "",
                "state": endpoint.status.state or "",
                "feature_stats": json.dumps(feature_stats),
                "current_stats": json.dumps(current_stats),
                "feature_names": json.dumps(feature_names),
                "children": json.dumps(children),
                "label_names": json.dumps(label_names),
                "monitor_configuration": json.dumps(monitor_configuration),
                **searchable_labels,
            },
        )

        return endpoint
Ejemplo n.º 8
0
def grafana_incoming_features(body: Dict[str, Any],
                              query_parameters: Dict[str,
                                                     str], access_key: str):
    endpoint_id = query_parameters.get("endpoint_id")
    project = query_parameters.get("project")
    start = body.get("rangeRaw", {}).get("from", "now-1h")
    end = body.get("rangeRaw", {}).get("to", "now")

    endpoint = ModelEndpoints.get_endpoint(access_key=access_key,
                                           project=project,
                                           endpoint_id=endpoint_id)

    time_series = []

    feature_names = endpoint.spec.feature_names

    if not feature_names:
        logger.warn(
            "'feature_names' is either missing or not initialized in endpoint record",
            endpoint_id=endpoint.metadata.uid,
        )
        return time_series

    path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=project, kind=EVENTS)
    _, container, path = parse_model_endpoint_store_prefix(path)

    client = get_frames_client(
        token=access_key,
        address=config.v3io_framesd,
        container=container,
    )

    data: pd.DataFrame = client.read(
        backend="tsdb",
        table=path,
        columns=feature_names,
        filter=f"endpoint_id=='{endpoint_id}'",
        start=start,
        end=end,
    )

    data.drop(["endpoint_id"], axis=1, inplace=True, errors="ignore")
    data.index = data.index.astype(np.int64) // 10**6

    for feature, indexed_values in data.to_dict().items():
        target = GrafanaTimeSeriesTarget(target=feature)
        for index, value in indexed_values.items():
            data_point = GrafanaDataPoint(value=float(value), timestamp=index)
            target.add_data_point(data_point)
        time_series.append(target)

    return time_series
Ejemplo n.º 9
0
    def cleanup_endpoints(self):
        db = mlrun.get_run_db()
        endpoints = db.list_model_endpoints(self.project_name)
        for endpoint in endpoints.endpoints:
            db.delete_model_endpoint_record(
                endpoint.metadata.project, endpoint.metadata.uid
            )

        v3io = get_v3io_client(
            endpoint=config.v3io_api, access_key=self._get_auth_info().data_session
        )

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=self.project_name, kind=mlrun.api.crud.ModelEndpoints().ENDPOINTS
        )
        _, container, path = parse_model_endpoint_store_prefix(path)

        frames = get_frames_client(
            token=self._get_auth_info().data_session,
            container=container,
            address=config.v3io_framesd,
        )
        try:
            all_records = v3io.kv.new_cursor(
                container=container,
                table_path=path,
                raise_for_status=RaiseForStatus.never,
            ).all()

            all_records = [r["__name"] for r in all_records]

            # Cleanup KV
            for record in all_records:
                v3io.kv.delete(
                    container=container,
                    table_path=path,
                    key=record,
                    raise_for_status=RaiseForStatus.never,
                )
        except RuntimeError:
            pass

        try:
            # Cleanup TSDB
            frames.delete(
                backend="tsdb", table=path, if_missing=fpb2.IGNORE,
            )
        except CreateError:
            pass
Ejemplo n.º 10
0
def test_get_endpoint_metric_function():
    path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=TEST_PROJECT, kind=EVENTS
    )
    _, container, path = parse_model_endpoint_store_prefix(path)

    frames = get_frames_client(
        token=_get_access_key(), container=container, address=config.v3io_framesd,
    )

    start = datetime.utcnow()

    endpoint = _mock_random_endpoint()
    write_endpoint_to_kv(_get_access_key(), endpoint)

    frames.create(backend="tsdb", table=path, rate="10/m", if_exists=1)

    total = 0
    dfs = []

    for i in range(10):
        count = randint(1, 10)
        total += count
        data = {
            "predictions_per_second_count_1s": count,
            "endpoint_id": endpoint.metadata.uid,
            "timestamp": start - timedelta(minutes=10 - i),
        }
        df = pd.DataFrame(data=[data])
        dfs.append(df)

    frames.write(
        backend="tsdb", table=path, dfs=dfs, index_cols=["timestamp", "endpoint_id"],
    )

    endpoint_metrics = get_endpoint_metrics(
        access_key=_get_access_key(),
        project=TEST_PROJECT,
        endpoint_id=endpoint.metadata.uid,
        metrics=["predictions_per_second_count_1s"],
    )

    assert "predictions_per_second_count_1s" in endpoint_metrics

    actual_values = endpoint_metrics["predictions_per_second_count_1s"].values
    assert len(actual_values) == 10
    assert sum(map(lambda t: t[1], actual_values)) == total
Ejemplo n.º 11
0
    def get_endpoint_metrics(
        self,
        access_key: str,
        project: str,
        endpoint_id: str,
        metrics: List[str],
        start: str = "now-1h",
        end: str = "now",
    ) -> Dict[str, Metric]:

        if not metrics:
            raise MLRunInvalidArgumentError("Metric names must be provided")

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=project, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.EVENTS
        )
        _, container, path = parse_model_endpoint_store_prefix(path)

        client = get_frames_client(
            token=access_key, address=config.v3io_framesd, container=container,
        )

        data = client.read(
            backend="tsdb",
            table=path,
            columns=["endpoint_id", *metrics],
            filter=f"endpoint_id=='{endpoint_id}'",
            start=start,
            end=end,
        )

        data_dict = data.to_dict()
        metrics_mapping = {}
        for metric in metrics:
            metric_data = data_dict.get(metric)
            if metric_data is None:
                continue

            values = [
                (str(timestamp), value) for timestamp, value in metric_data.items()
            ]
            metrics_mapping[metric] = Metric(name=metric, values=values)
        return metrics_mapping
Ejemplo n.º 12
0
def cleanup_endpoints(db: Session, client: TestClient):
    # Do nothing unless its system test env
    if _is_env_params_dont_exist():
        return

    v3io = get_v3io_client(endpoint=config.v3io_api, access_key=_get_access_key())

    path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=TEST_PROJECT, kind=ENDPOINTS
    )
    _, container, path = parse_model_endpoint_store_prefix(path)

    frames = get_frames_client(
        token=_get_access_key(), container=container, address=config.v3io_framesd,
    )
    try:
        all_records = v3io.kv.new_cursor(
            container=container, table_path=path, raise_for_status=RaiseForStatus.never,
        ).all()

        all_records = [r["__name"] for r in all_records]

        # Cleanup KV
        for record in all_records:
            v3io.kv.delete(
                container=container,
                table_path=path,
                key=record,
                raise_for_status=RaiseForStatus.never,
            )
    except RuntimeError:
        pass

    try:
        # Cleanup TSDB
        frames.delete(
            backend="tsdb", table=path, if_missing=fpb2.IGNORE,
        )
    except CreateError:
        pass
Ejemplo n.º 13
0
def test_grafana_incoming_features(db: Session, client: TestClient):
    path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=TEST_PROJECT, kind=EVENTS)
    _, container, path = parse_model_endpoint_store_prefix(path)

    frames = get_frames_client(
        token=_get_access_key(),
        container=container,
        address=config.v3io_framesd,
    )

    frames.create(backend="tsdb", table=path, rate="10/m", if_exists=1)

    start = datetime.utcnow()
    endpoints = [_mock_random_endpoint() for _ in range(5)]
    for e in endpoints:
        e.spec.feature_names = ["f0", "f1", "f2", "f3"]

    for endpoint in endpoints:
        ModelEndpoints.create_or_patch(_get_access_key(), endpoint)

        total = 0

        dfs = []

        for i in range(10):
            count = randint(1, 10)
            total += count
            data = {
                "f0": i,
                "f1": i + 1,
                "f2": i + 2,
                "f3": i + 3,
                "endpoint_id": endpoint.metadata.uid,
                "timestamp": start - timedelta(minutes=10 - i),
            }
            df = pd.DataFrame(data=[data])
            dfs.append(df)

        frames.write(
            backend="tsdb",
            table=path,
            dfs=dfs,
            index_cols=["timestamp", "endpoint_id"],
        )

    for endpoint in endpoints:
        response = client.post(
            url="/api/grafana-proxy/model-endpoints/query",
            headers={"X-V3io-Session-Key": _get_access_key()},
            json={
                "targets": [{
                    "target":
                    f"project={TEST_PROJECT};endpoint_id={endpoint.metadata.uid};target_endpoint=incoming_features"  # noqa
                }]
            },
        )
        response = response.json()
        targets = [t["target"] for t in response]
        assert targets == ["f0", "f1", "f2", "f3"]

        lens = [t["datapoints"] for t in response]
        assert all(map(lambda l: len(l) == 10, lens))
        self.tsdb_batching_max_events = tsdb_batching_max_events
        self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
        self.parquet_batching_max_events = parquet_batching_max_events
        self.parquet_batching_timeout_secs = parquet_batching_timeout_secs
        self.aggregate_count_windows = aggregate_count_windows or ["5m", "1h"]
        self.aggregate_count_period = aggregate_count_period
        self.aggregate_avg_windows = aggregate_avg_windows or ["5m", "1h"]
        self.aggregate_avg_period = aggregate_avg_period
        self.v3io_access_key = v3io_access_key or environ.get(
            "V3IO_ACCESS_KEY")
        self.v3io_framesd = v3io_framesd or config.v3io_framesd

        template = config.model_endpoint_monitoring.store_prefixes.default

        kv_path = template.format(project=project, kind="endpoints")
        _, self.kv_container, self.kv_path = parse_model_endpoint_store_prefix(
            kv_path)

        tsdb_path = template.format(project=project, kind="events")
        _, self.tsdb_container, self.tsdb_path = parse_model_endpoint_store_prefix(
            tsdb_path)
        self.tsdb_path = f"{self.tsdb_container}/{self.tsdb_path}"

        self.parquet_path = template.format(project=project, kind="parquet")

        logger.info(
            "Writer paths",
            kv_path=self.kv_path,
            tsdb_path=self.tsdb_path,
            parquet_path=self.parquet_path,
        )
Ejemplo n.º 15
0
    def delete_model_endpoints_resources(self, project_name: str):
        auth_info = mlrun.api.schemas.AuthInfo(
            data_session=os.getenv("V3IO_ACCESS_KEY")
        )
        access_key = auth_info.data_session

        # we would ideally base on config.v3io_api but can't for backwards compatibility reasons,
        # we're using the igz version heuristic
        if not config.igz_version or not config.v3io_api:
            return

        endpoints = self.list_endpoints(auth_info, project_name)
        for endpoint in endpoints.endpoints:
            self.delete_endpoint_record(
                auth_info, endpoint.metadata.project, endpoint.metadata.uid, access_key,
            )

        v3io = get_v3io_client(endpoint=config.v3io_api, access_key=access_key)

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=project_name,
            kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS,
        )
        tsdb_path = parse_model_endpoint_project_prefix(path, project_name)
        _, container, path = parse_model_endpoint_store_prefix(path)

        frames = get_frames_client(
            token=access_key, container=container, address=config.v3io_framesd,
        )
        try:
            all_records = v3io.kv.new_cursor(
                container=container,
                table_path=path,
                raise_for_status=RaiseForStatus.never,
                access_key=access_key,
            ).all()

            all_records = [r["__name"] for r in all_records]

            # Cleanup KV
            for record in all_records:
                v3io.kv.delete(
                    container=container,
                    table_path=path,
                    key=record,
                    access_key=access_key,
                    raise_for_status=RaiseForStatus.never,
                )
        except RuntimeError as exc:
            # KV might raise an exception even it was set not raise one.  exception is raised if path is empty or
            # not exist, therefore ignoring failures until they'll fix the bug.
            # TODO: remove try except after bug is fixed
            logger.debug(
                "Failed cleaning model endpoints KV. Ignoring",
                exc=str(exc),
                traceback=traceback.format_exc(),
            )
            pass

        # Cleanup TSDB
        try:
            frames.delete(
                backend="tsdb", table=path, if_missing=frames_pb2.IGNORE,
            )
        except CreateError:
            # frames might raise an exception if schema file does not exist.
            pass

        # final cleanup of tsdb path
        tsdb_path.replace("://u", ":///u")
        store, _ = mlrun.store_manager.get_or_create_store(tsdb_path)
        store.rm(tsdb_path, recursive=True)
Ejemplo n.º 16
0
    def list_endpoints(
        self,
        auth_info: mlrun.api.schemas.AuthInfo,
        project: str,
        model: Optional[str] = None,
        function: Optional[str] = None,
        labels: Optional[List[str]] = None,
        metrics: Optional[List[str]] = None,
        start: str = "now-1h",
        end: str = "now",
        top_level: Optional[bool] = False,
        uids: Optional[List[str]] = None,
    ) -> ModelEndpointList:
        """
        Returns a list of ModelEndpointState objects. Each object represents the current state of a model endpoint.
        This functions supports filtering by the following parameters:
        1) model
        2) function
        3) labels
        4) top level
        5) uids
        By default, when no filters are applied, all available endpoints for the given project will be listed.

        In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based
        and depends on the 'start' and 'end' parameters. By default, when the metrics parameter is None, no metrics are
        added to the output of this function.

        :param access_key: V3IO access key for managing user permissions
        :param project: The name of the project
        :param model: The name of the model to filter by
        :param function: The name of the function to filter by
        :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a label
        (i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key")
        :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
        :param start: The start time of the metrics
        :param end: The end time of the metrics
        :param top_level: if True will return only routers and endpoint that are NOT children of any router
        :param uids: will return ModelEndpointList of endpoints with uid in uids
        """

        logger.info(
            "Listing endpoints",
            project=project,
            model=model,
            function=function,
            labels=labels,
            metrics=metrics,
            start=start,
            end=end,
            top_level=top_level,
            uids=uids,
        )

        endpoint_list = ModelEndpointList(endpoints=[])

        if uids is None:
            client = get_v3io_client(endpoint=config.v3io_api)

            path = config.model_endpoint_monitoring.store_prefixes.default.format(
                project=project,
                kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS,
            )
            _, container, path = parse_model_endpoint_store_prefix(path)
            cursor = client.kv.new_cursor(
                container=container,
                table_path=path,
                access_key=auth_info.data_session,
                filter_expression=self.build_kv_cursor_filter_expression(
                    project, function, model, labels, top_level,
                ),
                attribute_names=["endpoint_id"],
                raise_for_status=RaiseForStatus.never,
            )
            try:
                items = cursor.all()
            except Exception:
                return endpoint_list

            uids = [item["endpoint_id"] for item in items]

        for endpoint_id in uids:
            endpoint = self.get_endpoint(
                auth_info=auth_info,
                project=project,
                endpoint_id=endpoint_id,
                metrics=metrics,
                start=start,
                end=end,
            )
            endpoint_list.endpoints.append(endpoint)
        return endpoint_list
Ejemplo n.º 17
0
    def list_endpoints(
        access_key: str,
        project: str,
        model: Optional[str] = None,
        function: Optional[str] = None,
        labels: Optional[List[str]] = None,
        metrics: Optional[List[str]] = None,
        start: str = "now-1h",
        end: str = "now",
    ) -> ModelEndpointList:
        """
        Returns a list of ModelEndpointState objects. Each object represents the current state of a model endpoint.
        This functions supports filtering by the following parameters:
        1) model
        2) function
        3) labels
        By default, when no filters are applied, all available endpoints for the given project will be listed.

        In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based
        and depends on the 'start' and 'end' parameters. By default, when the metrics parameter is None, no metrics are
        added to the output of this function.

        :param access_key: V3IO access key for managing user permissions
        :param project: The name of the project
        :param model: The name of the model to filter by
        :param function: The name of the function to filter by
        :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a label
        (i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key")
        :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
        :param start: The start time of the metrics
        :param end: The end time of the metrics
        """

        logger.info(
            "Listing endpoints",
            project=project,
            model=model,
            function=function,
            labels=labels,
            metrics=metrics,
            start=start,
            end=end,
        )

        client = get_v3io_client(endpoint=config.v3io_api)

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=project, kind=ENDPOINTS)
        _, container, path = parse_model_endpoint_store_prefix(path)

        cursor = client.kv.new_cursor(
            container=container,
            table_path=path,
            access_key=access_key,
            filter_expression=build_kv_cursor_filter_expression(
                project, function, model, labels),
            attribute_names=["endpoint_id"],
        )

        endpoint_list = ModelEndpointList(endpoints=[])
        while True:
            item = cursor.next_item()
            if item is None:
                break
            endpoint_id = item["endpoint_id"]
            endpoint = ModelEndpoints.get_endpoint(
                access_key=access_key,
                project=project,
                endpoint_id=endpoint_id,
                metrics=metrics,
                start=start,
                end=end,
            )
            endpoint_list.endpoints.append(endpoint)
        return endpoint_list
Ejemplo n.º 18
0
    def get_endpoint(
        access_key: str,
        project: str,
        endpoint_id: str,
        metrics: Optional[List[str]] = None,
        start: str = "now-1h",
        end: str = "now",
        feature_analysis: bool = False,
    ) -> ModelEndpoint:
        """
        Returns a ModelEndpoint object with additional metrics and feature related data.

        :param access_key: V3IO access key for managing user permissions
        :param project: The name of the project
        :param endpoint_id: The id of the model endpoint
        :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
        :param start: The start time of the metrics
        :param end: The end time of the metrics
        :param feature_analysis: When True, the base feature statistics and current feature statistics will be added to
        the output of the resulting object
        """

        logger.info(
            "Getting model endpoint record from kv",
            endpoint_id=endpoint_id,
        )

        client = get_v3io_client(endpoint=config.v3io_api)

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=project, kind=ENDPOINTS)
        _, container, path = parse_model_endpoint_store_prefix(path)

        endpoint = client.kv.get(
            container=container,
            table_path=path,
            key=endpoint_id,
            access_key=access_key,
            raise_for_status=RaiseForStatus.never,
        )
        endpoint = endpoint.output.item

        if not endpoint:
            raise MLRunNotFoundError(f"Endpoint {endpoint_id} not found")

        labels = endpoint.get("labels")

        feature_names = endpoint.get("feature_names")
        feature_names = _json_loads_if_not_none(feature_names)

        label_names = endpoint.get("label_names")
        label_names = _json_loads_if_not_none(label_names)

        feature_stats = endpoint.get("feature_stats")
        feature_stats = _json_loads_if_not_none(feature_stats)

        current_stats = endpoint.get("current_stats")
        current_stats = _json_loads_if_not_none(current_stats)

        drift_measures = endpoint.get("drift_measures")
        drift_measures = _json_loads_if_not_none(drift_measures)

        monitor_configuration = endpoint.get("monitor_configuration")
        monitor_configuration = _json_loads_if_not_none(monitor_configuration)

        endpoint = ModelEndpoint(
            metadata=ModelEndpointMetadata(
                project=endpoint.get("project"),
                labels=_json_loads_if_not_none(labels),
                uid=endpoint_id,
            ),
            spec=ModelEndpointSpec(
                function_uri=endpoint.get("function_uri"),
                model=endpoint.get("model"),
                model_class=endpoint.get("model_class") or None,
                model_uri=endpoint.get("model_uri") or None,
                feature_names=feature_names or None,
                label_names=label_names or None,
                stream_path=endpoint.get("stream_path") or None,
                algorithm=endpoint.get("algorithm") or None,
                monitor_configuration=monitor_configuration or None,
                active=endpoint.get("active") or None,
            ),
            status=ModelEndpointStatus(
                state=endpoint.get("state") or None,
                feature_stats=feature_stats or None,
                current_stats=current_stats or None,
                first_request=endpoint.get("first_request") or None,
                last_request=endpoint.get("last_request") or None,
                accuracy=endpoint.get("accuracy") or None,
                error_count=endpoint.get("error_count") or None,
                drift_status=endpoint.get("drift_status") or None,
            ),
        )

        if feature_analysis and feature_names:
            endpoint_features = get_endpoint_features(
                feature_names=feature_names,
                feature_stats=feature_stats,
                current_stats=current_stats,
            )
            if endpoint_features:
                endpoint.status.features = endpoint_features
                endpoint.status.drift_measures = drift_measures

        if metrics:
            endpoint_metrics = get_endpoint_metrics(
                access_key=access_key,
                project=project,
                endpoint_id=endpoint_id,
                start=start,
                end=end,
                metrics=metrics,
            )
            if endpoint_metrics:
                endpoint.status.metrics = endpoint_metrics

        return endpoint
Ejemplo n.º 19
0
    def test_get_endpoint_metrics(self):
        auth_info = self._get_auth_info()
        access_key = auth_info.data_session
        db = mlrun.get_run_db()

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=self.project_name, kind=mlrun.api.crud.ModelEndpoints().EVENTS
        )
        _, container, path = parse_model_endpoint_store_prefix(path)

        frames = get_frames_client(
            token=access_key, container=container, address=config.v3io_framesd,
        )

        start = datetime.utcnow()

        for i in range(5):
            endpoint = self._mock_random_endpoint()
            db.create_or_patch_model_endpoint(
                endpoint.metadata.project, endpoint.metadata.uid, endpoint
            )
            frames.create(backend="tsdb", table=path, rate="10/m", if_exists=1)

            total = 0

            dfs = []

            for j in range(10):
                count = randint(1, 10)
                total += count
                data = {
                    "predictions_per_second_count_1s": count,
                    "endpoint_id": endpoint.metadata.uid,
                    "timestamp": start - timedelta(minutes=10 - j),
                }
                df = pd.DataFrame(data=[data])
                dfs.append(df)

            frames.write(
                backend="tsdb",
                table=path,
                dfs=dfs,
                index_cols=["timestamp", "endpoint_id"],
            )

            endpoint = db.get_model_endpoint(
                self.project_name,
                endpoint.metadata.uid,
                metrics=["predictions_per_second_count_1s"],
            )
            assert len(endpoint.status.metrics) > 0

            predictions_per_second = endpoint.status.metrics[
                "predictions_per_second_count_1s"
            ]

            assert predictions_per_second.name == "predictions_per_second_count_1s"

            response_total = sum((m[1] for m in predictions_per_second.values))

            assert total == response_total