def init_context(context): setattr(context, "batch", []) setattr(context, "window", int(os.getenv("window", 10))) setattr(context, "save_to", os.getenv("save_to", "/bigdata/inference_pq/")) os.makedirs(context.save_to, exist_ok=True) mlrun.mlconf.dbpath = mlrun.mlconf.dbpath or "http://mlrun-api:8080" artifact_path = os.getenv("artifact_path", None) if artifact_path: mlrun.mlconf.artifact_path = artifact_path if "hub_url" in os.environ: mlrun.mlconf.hub_url = os.environ["hub_url"] virtual_drift_fn = mlrun.import_function("hub://virtual_drift") virtual_drift_fn.apply(mlrun.auto_mount()) setattr(context, "virtual_drift_fn", virtual_drift_fn) predictions_col = os.getenv("predictions", None) label_col = os.getenv("label_col", None) setattr(context, "base_dataset", os.getenv("base_dataset", "")) setattr(context, "indexes", json.loads(os.environ.get("indexes", "[]"))) setattr(context, "predictions_col", predictions_col) setattr(context, "label_col", label_col) setattr(context, "results_tsdb_container", os.getenv("results_tsdb_container", None)) setattr(context, "results_tsdb_table", os.getenv("results_tsdb_table", None))
def test_model_monitoring_voting_ensemble(self): simulation_time = 60 * 5 # 5 minutes project = mlrun.get_run_db().get_project(self.project_name) project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY")) iris = load_iris() train_set = pd.DataFrame( iris["data"], columns=[ "sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm", ], ) # Deploy Model Servers # Use the following code to deploy a model server in the Iguazio instance. # Import the serving function from the function hub serving_fn = mlrun.import_function( "hub://v2_model_server", project=self.project_name ).apply(mlrun.auto_mount()) serving_fn.set_topology( "router", "mlrun.serving.VotingEnsemble", name="VotingEnsemble" ) serving_fn.set_tracking() model_names = [ "sklearn_RandomForestClassifier", "sklearn_LogisticRegression", "sklearn_AdaBoostClassifier", ] for name in model_names: # Log the model through the projects API so that it is available through the feature store API project.log_model( name, model_file=os.path.relpath(str(self.assets_path / "model.pkl")), training_set=train_set, ) # Add the model to the serving function's routing spec serving_fn.add_model( name, model_path=f"store://models/{self.project_name}/{name}:latest" ) # Enable model monitoring serving_fn.deploy() iris_data = iris["data"].tolist() t_end = monotonic() + simulation_time while monotonic() < t_end: data_point = choice(iris_data) serving_fn.invoke( "v2/models/VotingEnsemble/infer", json.dumps({"inputs": [data_point]}) ) sleep(uniform(0.2, 1.7))
def test_basic_model_monitoring(self): simulation_time = 60 * 5 # 5 minutes # Deploy Model Servers project = mlrun.get_run_db().get_project(self.project_name) project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY")) iris = load_iris() train_set = pd.DataFrame( iris["data"], columns=[ "sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm", ], ) # Import the serving function from the function hub serving_fn = mlrun.import_function( "hub://v2_model_server", project=self.project_name ).apply(mlrun.auto_mount()) # enable model monitoring serving_fn.set_tracking() model_name = "sklearn_RandomForestClassifier" # Log the model through the projects API so that it is available through the feature store API project.log_model( model_name, model_file=os.path.relpath(str(self.assets_path / "model.pkl")), training_set=train_set, ) # Add the model to the serving function's routing spec serving_fn.add_model( model_name, model_path=f"store://models/{self.project_name}/{model_name}:latest", ) # Deploy the function serving_fn.deploy() # Simulating Requests iris_data = iris["data"].tolist() t_end = monotonic() + simulation_time while monotonic() < t_end: data_point = choice(iris_data) serving_fn.invoke( f"v2/models/{model_name}/infer", json.dumps({"inputs": [data_point]}) ) sleep(uniform(0.2, 1.7))
def test_model_monitoring_voting_ensemble(self): simulation_time = 20 # 20 seconds project = mlrun.get_run_db().get_project(self.project_name) project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY")) iris = load_iris() train_set = pd.DataFrame( iris["data"], columns=[ "sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm", ], ) # Deploy Model Servers # Use the following code to deploy a model server in the Iguazio instance. # Import the serving function from the function hub serving_fn = mlrun.import_function( "hub://v2_model_server", project=self.project_name ).apply(mlrun.auto_mount()) serving_fn.set_topology( "router", "mlrun.serving.VotingEnsemble", name="VotingEnsemble" ) serving_fn.set_tracking() model_names = [ "sklearn_RandomForestClassifier", "sklearn_LogisticRegression", "sklearn_AdaBoostClassifier", ] for name in model_names: # Log the model through the projects API so that it is available through the feature store API project.log_model( name, model_dir=os.path.relpath(self.assets_path), model_file="model.pkl", training_set=train_set, artifact_path=f"v3io:///projects/{project.metadata.name}", ) # Add the model to the serving function's routing spec serving_fn.add_model( name, model_path=project.get_artifact_uri( key=f"{name}:latest", category="model" ), ) # Enable model monitoring serving_fn.deploy() # checking that stream processing and batch monitoring were successfully deployed mlrun.get_run_db().get_schedule(self.project_name, "model-monitoring-batch") base_runtime = BaseRuntime( BaseMetadata( name="model-monitoring-stream", project=self.project_name, tag="" ) ) stat = mlrun.get_run_db().get_builder_status(base_runtime) assert base_runtime.status.state == "ready", stat iris_data = iris["data"].tolist() t_end = monotonic() + simulation_time while monotonic() < t_end: data_point = choice(iris_data) serving_fn.invoke( "v2/models/VotingEnsemble/infer", json.dumps({"inputs": [data_point]}) ) sleep(uniform(0.2, 1.7)) # checking top level methods top_level_endpoints = mlrun.get_run_db().list_model_endpoints( self.project_name, top_level=True ) assert len(top_level_endpoints.endpoints) == 1 assert ( top_level_endpoints.endpoints[0].status.endpoint_type == EndpointType.ROUTER ) children_list = top_level_endpoints.endpoints[0].status.children_uids assert len(children_list) == len(model_names) endpoints_children_list = mlrun.get_run_db().list_model_endpoints( self.project_name, uids=children_list ) assert len(endpoints_children_list.endpoints) == len(model_names) for child in endpoints_children_list.endpoints: assert child.status.endpoint_type == EndpointType.LEAF_EP
def test_basic_model_monitoring(self): simulation_time = 90 # 90 seconds # Deploy Model Servers project = mlrun.get_run_db().get_project(self.project_name) project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY")) iris = load_iris() train_set = pd.DataFrame( iris["data"], columns=[ "sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm", ], ) # Import the serving function from the function hub serving_fn = mlrun.import_function( "hub://v2_model_server", project=self.project_name ).apply(mlrun.auto_mount()) # enable model monitoring serving_fn.set_tracking() model_name = "sklearn_RandomForestClassifier" # Upload the model through the projects API so that it is available to the serving function project.log_model( model_name, model_dir=os.path.relpath(self.assets_path), model_file="model.pkl", training_set=train_set, artifact_path=f"v3io:///projects/{project.metadata.name}", ) # Add the model to the serving function's routing spec serving_fn.add_model( model_name, model_path=project.get_artifact_uri( key=f"{model_name}:latest", category="model" ), ) # Deploy the function serving_fn.deploy() # Simulating Requests iris_data = iris["data"].tolist() t_end = monotonic() + simulation_time while monotonic() < t_end: data_point = choice(iris_data) serving_fn.invoke( f"v2/models/{model_name}/infer", json.dumps({"inputs": [data_point]}) ) sleep(uniform(0.2, 1.1)) # test metrics endpoints_list = mlrun.get_run_db().list_model_endpoints( self.project_name, metrics=["predictions_per_second"] ) assert len(endpoints_list.endpoints) == 1 endpoint = endpoints_list.endpoints[0] assert len(endpoint.status.metrics) > 0 predictions_per_second = endpoint.status.metrics["predictions_per_second"] assert predictions_per_second.name == "predictions_per_second" total = sum((m[1] for m in predictions_per_second.values)) assert total > 0