def _get_endpoint_metrics( endpoint_id: str, name: List[str], start: str = "now-1h", end: str = "now", ) -> List[Metric]: if not name: raise MLRunInvalidArgumentError("Metric names must be provided") try: metrics = [TimeMetric.from_string(n) for n in name] except NotImplementedError as e: raise MLRunInvalidArgumentError(str(e)) # Columns must have at least an endpoint_id attribute for frames' filter expression columns = ["endpoint_id"] for metric in metrics: columns.append(metric.tsdb_column) data = get_frames_client( container=config.httpdb.model_endpoint_monitoring.container).read( backend="tsdb", table=ENDPOINT_EVENTS_TABLE_PATH, columns=columns, filter=f"endpoint_id=='{endpoint_id}'", start=start, end=end, ) metrics = [ time_metric.transform_df_to_metric(data) for time_metric in metrics ] metrics = [metric for metric in metrics if metric is not None] return metrics
def store_feature_set( self, name, feature_set: Union[dict, schemas.FeatureSet], project="", tag=None, uid=None, versioned=True, ) -> schemas.FeatureSet: if uid and tag: raise MLRunInvalidArgumentError("both uid and tag were provided") params = {"versioned": versioned} if isinstance(feature_set, schemas.FeatureSet): feature_set = feature_set.dict() project = project or default_project reference = uid or tag or "latest" path = f"projects/{project}/feature-sets/{name}/references/{reference}" error_message = f"Failed storing feature-set {project}/{name}" resp = self.api_call("PUT", path, error_message, params=params, body=json.dumps(feature_set)) return schemas.FeatureSet(**resp.json())
def mark_as_best(self): """mark a child as the best iteration result, see .get_child_context()""" if not self._parent or not self._iteration: raise MLRunInvalidArgumentError( "can only mark a child run as best iteration") self._parent.log_iteration_results(self._iteration, None, self.to_dict())
def build_kv_cursor_filter_expression( project: str, function: Optional[str] = None, model: Optional[str] = None, labels: Optional[List[str]] = None, ): if not project: raise MLRunInvalidArgumentError("project can't be empty") filter_expression = [f"project=='{project}'"] if function: filter_expression.append(f"function=='{function}'") if model: filter_expression.append(f"model=='{model}'") if labels: for label in labels: if not label.startswith("_"): label = f"_{label}" if "=" in label: lbl, value = list(map(lambda x: x.strip(), label.split("="))) filter_expression.append(f"{lbl}=='{value}'") else: filter_expression.append(f"exists({label})") return " AND ".join(filter_expression)
def get_project(self, name: str) -> mlrun.projects.MlrunProject: if not name: raise MLRunInvalidArgumentError("Name must be provided") path = f"projects/{name}" error_message = f"Failed retrieving project {name}" response = self.api_call("GET", path, error_message) return mlrun.projects.MlrunProject.from_dict(response.json())
def add_vault_secrets(self, items, project=None, user=None): data_object = {"data": items} url = VaultStore._generate_path(project=project, user=user) response = self._api_call("POST", url, data_object) if not response: raise MLRunInvalidArgumentError( f"Vault failed the API call to create secrets. project={project}/user={user}" )
def _generate_path( prefix=vault_default_prefix, user=None, project=None, user_prefix="users", project_prefix="projects", ): if user and project: raise MLRunInvalidArgumentError( "Both user and project were provided for Vault operations") if user: return prefix + f"/mlrun/{user_prefix}/{user}" elif project: return prefix + f"/mlrun/{project_prefix}/{project}" else: raise MLRunInvalidArgumentError( "To generate a vault secret path, either user or project must be specified" )
def delete_vault_secrets(self, project=None, user=None): self._login() # Using the API to delete all versions + metadata of the given secret. url = "v1/secret/metadata" + VaultStore._generate_path( prefix="", project=project, user=user) response = self._api_call("DELETE", url) if not response: raise MLRunInvalidArgumentError( f"Vault failed the API call to delete secrets. project={project}/user={user}" )
def mount_pvc(pvc_name=None, volume_name="pipeline", volume_mount_path="/mnt/pipeline"): """ Modifier function to apply to a Container Op to simplify volume, volume mount addition and enable better reuse of volumes, volume claims across container ops. Usage:: train = train_op(...) train.apply(mount_pvc('claim-name', 'pipeline', '/mnt/pipeline')) """ if "MLRUN_PVC_MOUNT" in os.environ: mount = os.environ.get("MLRUN_PVC_MOUNT") items = mount.split(":") if len(items) != 2: raise MLRunInvalidArgumentError( "MLRUN_PVC_MOUNT should include <pvc-name>:<mount-path>") pvc_name = items[0] volume_mount_path = items[1] if not pvc_name: raise MLRunInvalidArgumentError( "No PVC name: use the pvc_name parameter or configure the MLRUN_PVC_MOUNT environment variable" ) def _mount_pvc(task): from kubernetes import client as k8s_client local_pvc = k8s_client.V1PersistentVolumeClaimVolumeSource( claim_name=pvc_name) return task.add_volume( k8s_client.V1Volume( name=volume_name, persistent_volume_claim=local_pvc)).add_volume_mount( k8s_client.V1VolumeMount(mount_path=volume_mount_path, name=volume_name)) return _mount_pvc
def get_feature_set(self, name: str, project: str = "", tag: str = None, uid: str = None) -> schemas.FeatureSet: if uid and tag: raise MLRunInvalidArgumentError("both uid and tag were provided") project = project or default_project reference = uid or tag or "latest" path = f"projects/{project}/feature-sets/{name}/references/{reference}" error_message = f"Failed retrieving feature-set {project}/{name}" resp = self.api_call("GET", path, error_message) return schemas.FeatureSet(**resp.json())
def store_path_to_spark(path): if path.startswith("v3io:///"): path = "v3io:" + path[len("v3io:/"):] elif path.startswith("s3://"): if path.startswith("s3:///"): # 's3:///' not supported since mlrun 0.9.0 should use s3:// instead from mlrun.errors import MLRunInvalidArgumentError valid_path = "s3:" + path[len("s3:/"):] raise MLRunInvalidArgumentError( f"'s3:///' is not supported, try using 's3://' instead.\nE.g: '{valid_path}'" ) else: path = "s3a:" + path[len("s3:"):] return path
def get_child_context(self, with_parent_params=False, **params): """get child context (iteration) allow sub experiments (epochs, hyper-param, ..) under a parent will create a new iteration, log_xx will update the child only use commit_children() to save all the children and specify the best run example:: def handler(context: mlrun.MLClientCtx, data: mlrun.DataItem): df = data.as_df() best_accuracy = accuracy_sum = 0 for param in param_list: with context.get_child_context(myparam=param) as child: accuracy = child_handler(child, df, **child.parameters) accuracy_sum += accuracy child.log_result('accuracy', accuracy) if accuracy > best_accuracy: child.mark_as_best() best_accuracy = accuracy context.log_result('avg_accuracy', accuracy_sum / len(param_list)) :param params: extra (or override) params to parent context :param with_parent_params: child will copy the parent parameters and add to them :return: child context """ if self.iteration != 0: raise MLRunInvalidArgumentError( "cannot create child from a child iteration!") ctx = deepcopy(self.to_dict()) if not with_parent_params: update_in(ctx, ["spec", "parameters"], {}) if params: for key, val in params.items(): update_in(ctx, ["spec", "parameters", key], val) update_in(ctx, ["metadata", "iteration"], len(self._children) + 1) ctx["status"] = {} ctx = MLClientCtx.from_dict(ctx, self._rundb, self._autocommit, log_stream=self._logger) ctx._parent = self self._children.append(ctx) return ctx
def log_results(self, results: dict, commit=False): """log a set of scalar result values example:: context.log_results({'accuracy': 0.85, 'loss': 0.2}) :param results: key/value dict or results :param commit: commit (write to DB now vs wait for the end of the run) """ if not isinstance(results, dict): raise MLRunInvalidArgumentError( "(multiple) results must be in the form of dict") for p in results.keys(): self._results[str(p)] = _cast_result(results[p]) self._update_db(commit=commit)
def get_endpoint_metrics( self, access_key: str, project: str, endpoint_id: str, metrics: List[str], start: str = "now-1h", end: str = "now", ) -> Dict[str, Metric]: if not metrics: raise MLRunInvalidArgumentError("Metric names must be provided") path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.EVENTS ) _, container, path = parse_model_endpoint_store_prefix(path) client = get_frames_client( token=access_key, address=config.v3io_framesd, container=container, ) data = client.read( backend="tsdb", table=path, columns=["endpoint_id", *metrics], filter=f"endpoint_id=='{endpoint_id}'", start=start, end=end, ) data_dict = data.to_dict() metrics_mapping = {} for metric in metrics: metric_data = data_dict.get(metric) if metric_data is None: continue values = [ (str(timestamp), value) for timestamp, value in metric_data.items() ] metrics_mapping[metric] = Metric(name=metric, values=values) return metrics_mapping
def create_project_role(self, project, sa, policy, namespace="default-tenant"): role_name = f"mlrun-role-project-{project}" # TODO - need to make sure name is escaped properly and invalid chars are stripped url = "v1/auth/kubernetes/role/" + role_name role_object = { "bound_service_account_names": sa, "bound_service_account_namespaces": namespace, "policies": [policy], "token_ttl": mlconf.secret_stores.vault.token_ttl, } response = self._api_call("POST", url, role_object) if not response: raise MLRunInvalidArgumentError( f"Vault failed the API call to create a secret. " f"Response code: ({response.status_code}) - {response.reason}" ) return role_name
def create_project_policy(self, project): policy_name = f"mlrun-project-{project}" # TODO - need to make sure name is escaped properly and invalid chars are stripped url = "v1/sys/policies/acl/" + policy_name policy_str = ( f'path "secret/data/mlrun/projects/{project}" {{\n' + ' capabilities = ["read", "list", "create", "delete", "update"]\n' + "}\n" + f'path "secret/data/mlrun/projects/{project}/*" {{\n' + ' capabilities = ["read", "list", "create", "delete", "update"]\n' + "}") data_object = {"policy": policy_str} response = self._api_call("PUT", url, data_object) if not response: raise MLRunInvalidArgumentError( f"Vault failed the API call to create a policy. " f"Response code: ({response.status_code}) - {response.reason}") return policy_name
def update_feature_set( self, name, feature_set: Union[dict, schemas.FeatureSetUpdate], project="", tag=None, uid=None, ): if uid and tag: raise MLRunInvalidArgumentError("both uid and tag were provided") project = project or default_project reference = uid or tag or "latest" if isinstance(feature_set, dict): feature_set = schemas.FeatureSetUpdate(**feature_set) path = f"projects/{project}/feature_sets/{name}/references/{reference}" error_message = f"Failed updating feature-set {project}/{name}" self.api_call("PUT", path, error_message, body=json.dumps(feature_set.dict()))
async def get_endpoint_metrics( access_key: str, project: str, endpoint_id: str, metrics: List[str], start: str = "now-1h", end: str = "now", ) -> Dict[str, Metric]: if not metrics: raise MLRunInvalidArgumentError("Metric names must be provided") client = get_frames_client( token=access_key, address=config.v3io_framesd, container=config.model_endpoint_monitoring.container, ) data = await run_in_threadpool( client.read, backend="tsdb", table=f"{project}/{ENDPOINT_EVENTS_TABLE_PATH}", columns=["endpoint_id", *metrics], filter=f"endpoint_id=='{endpoint_id}'", start=start, end=end, ) data_dict = data.to_dict() metrics_mapping = {} for metric in metrics: metric_data = data_dict.get(metric) if metric_data is None: continue values = [(str(timestamp), value) for timestamp, value in metric_data.items()] metrics_mapping[metric] = Metric(name=metric, values=values) return metrics_mapping
def update_feature_set( self, name, feature_set_update: dict, project="", tag=None, uid=None, patch_mode: Union[str, schemas.PatchMode] = schemas.PatchMode.replace, ): if uid and tag: raise MLRunInvalidArgumentError("both uid and tag were provided") project = project or default_project reference = uid or tag or "latest" params = {"patch-mode": patch_mode} path = f"projects/{project}/feature-sets/{name}/references/{reference}" error_message = f"Failed updating feature-set {project}/{name}" self.api_call( "PATCH", path, error_message, body=json.dumps(feature_set_update), params=params, )
def create_or_patch( db_session: Session, access_key: str, model_endpoint: ModelEndpoint, leader_session: Optional[str] = None, ): """ Creates or patch a KV record with the given model_endpoint record :param access_key: V3IO access key for managing user permissions :param model_endpoint: An object representing a model endpoint """ if model_endpoint.spec.model_uri or model_endpoint.status.feature_stats: logger.info( "Getting feature metadata", project=model_endpoint.metadata.project, model=model_endpoint.spec.model, function=model_endpoint.spec.function_uri, model_uri=model_endpoint.spec.model_uri, ) # If model artifact was supplied, grab model meta data from artifact if model_endpoint.spec.model_uri: logger.info( "Getting model object, inferring column names and collecting feature stats" ) run_db = mlrun.api.api.utils.get_run_db_instance( db_session, leader_session) model_obj: ModelArtifact = ( mlrun.datastore.store_resources.get_store_resource( model_endpoint.spec.model_uri, db=run_db)) if not model_endpoint.status.feature_stats and hasattr( model_obj, "feature_stats"): model_endpoint.status.feature_stats = model_obj.feature_stats if not model_endpoint.spec.label_names and hasattr( model_obj, "outputs"): model_label_names = [ _clean_feature_name(f.name) for f in model_obj.outputs ] model_endpoint.spec.label_names = model_label_names if not model_endpoint.spec.algorithm and hasattr( model_obj, "algorithm"): model_endpoint.spec.algorithm = model_obj.algorithm # If feature_stats was either populated by model_uri or by manual input, make sure to keep the names # of the features. If feature_names was supplied, replace the names set in feature_stats, otherwise - make # sure to keep a clean version of the names if model_endpoint.status.feature_stats: logger.info("Feature stats found, cleaning feature names") if model_endpoint.spec.feature_names: if len(model_endpoint.status.feature_stats) != len( model_endpoint.spec.feature_names): raise MLRunInvalidArgumentError( f"feature_stats and feature_names have a different number of names, while expected to match" f"feature_stats({len(model_endpoint.status.feature_stats)}), " f"feature_names({len(model_endpoint.spec.feature_names)})" ) clean_feature_stats = {} clean_feature_names = [] for i, (feature, stats) in enumerate( model_endpoint.status.feature_stats.items()): if model_endpoint.spec.feature_names: clean_name = _clean_feature_name( model_endpoint.spec.feature_names[i]) else: clean_name = _clean_feature_name(feature) clean_feature_stats[clean_name] = stats clean_feature_names.append(clean_name) model_endpoint.status.feature_stats = clean_feature_stats model_endpoint.spec.feature_names = clean_feature_names logger.info( "Done preparing feature names and stats", feature_names=model_endpoint.spec.feature_names, ) # If none of the above was supplied, feature names will be assigned on first contact with the model monitoring # system logger.info("Updating model endpoint", endpoint_id=model_endpoint.metadata.uid) write_endpoint_to_kv( access_key=access_key, endpoint=model_endpoint, update=True, ) logger.info("Model endpoint updated", endpoint_id=model_endpoint.metadata.uid) return model_endpoint
def _resolve_reference(tag, uid): if uid and tag: raise MLRunInvalidArgumentError("both uid and tag were provided") return uid or tag or "latest"
async def create_or_patch(access_key: str, model_endpoint: ModelEndpoint): """ Creates or updates a KV record with the given model_endpoint record :param access_key: V3IO access key for managing user permissions :param model_endpoint: An object representing a model endpoint """ if model_endpoint.spec.model_uri or model_endpoint.status.feature_stats: logger.info( "Getting feature metadata", project=model_endpoint.metadata.project, model=model_endpoint.spec.model, function=model_endpoint.spec.function_uri, model_uri=model_endpoint.spec.model_uri, ) # If model artifact was supplied but feature_stats was not, grab model artifact and get feature_stats if model_endpoint.spec.model_uri and not model_endpoint.status.feature_stats: logger.info( "Getting model object, inferring column names and collecting feature stats" ) model_obj = await run_in_threadpool( get_model, model_endpoint.spec.model_uri ) model_endpoint.status.feature_stats = model_obj[1].feature_stats # If feature_stats was either populated by model_uri or by manual input, make sure to keep the names # of the features. If feature_names was supplied, replace the names set in feature_stats, otherwise - make # sure to keep a clean version of the names if model_endpoint.status.feature_stats: logger.info("Feature stats found, cleaning feature names") if model_endpoint.spec.feature_names: if len(model_endpoint.status.feature_stats) != len( model_endpoint.spec.feature_names ): raise MLRunInvalidArgumentError( f"feature_stats and feature_names have a different number of names, while expected to match" f"feature_stats({len(model_endpoint.status.feature_stats)}), " f"feature_names({len(model_endpoint.spec.feature_names)})" ) clean_feature_stats = {} clean_feature_names = [] for i, (feature, stats) in enumerate( model_endpoint.status.feature_stats.items() ): if model_endpoint.spec.feature_names: clean_name = _clean_feature_name( model_endpoint.spec.feature_names[i] ) else: clean_name = _clean_feature_name(feature) clean_feature_stats[clean_name] = stats clean_feature_names.append(clean_name) model_endpoint.status.feature_stats = clean_feature_stats model_endpoint.spec.feature_names = clean_feature_names logger.info( "Done preparing feature names and stats", feature_names=model_endpoint.spec.feature_names, ) # If none of the above was supplied, feature names will be assigned on first contact with the model monitoring # system logger.info("Updating model endpoint", endpoint_id=model_endpoint.metadata.uid) await write_endpoint_to_kv( access_key=access_key, endpoint=model_endpoint, update=True, ) logger.info("Model endpoint updated", endpoint_id=model_endpoint.metadata.uid) return model_endpoint
def log_model( self, key, body=None, framework="", tag="", model_dir=None, model_file=None, algorithm=None, metrics=None, parameters=None, artifact_path=None, upload=True, labels=None, inputs: List[Feature] = None, outputs: List[Feature] = None, feature_vector: str = None, feature_weights: list = None, training_set=None, label_column: Union[str, list] = None, extra_data=None, db_key=None, **kwargs, ): """log a model artifact and optionally upload it to datastore example:: context.log_model("model", body=dumps(model), model_file="model.pkl", metrics=context.results, training_set=training_df, label_column='label', feature_vector=feature_vector_uri, labels={"app": "fraud"}) :param key: artifact key or artifact class () :param body: will use the body as the artifact content :param model_file: path to the local model file we upload (see also model_dir) :param model_dir: path to the local dir holding the model file and extra files :param artifact_path: target artifact path (when not using the default) to define a subpath under the default location use: `artifact_path=context.artifact_subpath('data')` :param framework: name of the ML framework :param algorithm: training algorithm name :param tag: version tag :param metrics: key/value dict of model metrics :param parameters: key/value dict of model parameters :param inputs: ordered list of model input features (name, type, ..) :param outputs: ordered list of model output/result elements (name, type, ..) :param upload: upload to datastore (default is True) :param labels: a set of key/value labels to tag the artifact with :param feature_vector: feature store feature vector uri (store://feature-vectors/<project>/<name>[:tag]) :param feature_weights: list of feature weights, one per input column :param training_set: training set dataframe, used to infer inputs & outputs :param label_column: which columns in the training set are the label (target) columns :param extra_data: key/value list of extra files/charts to link with this dataset value can be abs/relative path string | bytes | artifact object :param db_key: the key to use in the artifact DB table, by default its run name + '_' + key db_key=False will not register it in the artifacts table :returns: artifact object """ if training_set is not None and inputs: raise MLRunInvalidArgumentError( "cannot specify inputs and training set together") model = ModelArtifact( key, body, model_file=model_file, metrics=metrics, parameters=parameters, inputs=inputs, outputs=outputs, framework=framework, algorithm=algorithm, feature_vector=feature_vector, feature_weights=feature_weights, extra_data=extra_data, **kwargs, ) if training_set is not None: model.infer_from_df(training_set, label_column) item = self._artifacts_manager.log_artifact( self, model, local_path=model_dir, artifact_path=extend_artifact_path(artifact_path, self.artifact_path), tag=tag, upload=upload, db_key=db_key, labels=labels, ) self._update_db() return item