def custom_setup(self): from mlrun import get_run_db from mlrun.run import new_function from mlrun.runtimes import RemoteSparkRuntime if not self.spark_image_deployed: self._init_env_from_file() store, _ = store_manager.get_or_create_store( self.get_remote_pq_source_path()) store.upload( self.get_remote_pq_source_path(without_prefix=True), self.get_local_pq_source_path(), ) if not self.test_branch: RemoteSparkRuntime.deploy_default_image() else: sj = new_function( kind="remote-spark", name="remote-spark-default-image-deploy-temp") sj.spec.build.image = RemoteSparkRuntime.default_image sj.with_spark_service(spark_service="dummy-spark") sj.spec.build.commands = [ "pip install git+" + self.test_branch ] sj.deploy(with_mlrun=False) get_run_db().delete_function(name=sj.metadata.name) self.spark_image_deployed = True
def _parallel_run_many(self, generator, execution: MLClientCtx, runobj: RunObject) -> RunList: results = RunList() tasks = generator.generate(runobj) handler = runobj.spec.handler self._force_handler(handler) set_paths(self.spec.pythonpath) _, handler = self._get_handler(handler) client, function_name = self._get_dask_client(generator.options) parallel_runs = generator.options.parallel_runs or 4 queued_runs = 0 num_errors = 0 def process_result(future): nonlocal num_errors resp, sout, serr = future.result() runobj = RunObject.from_dict(resp) try: log_std(self._db_conn, runobj, sout, serr, skip=self.is_child) resp = self._update_run_state(resp) except RunError as err: resp = self._update_run_state(resp, err=str(err)) num_errors += 1 results.append(resp) if num_errors > generator.max_errors: logger.error("max errors reached, stopping iterations!") return True run_results = resp["status"].get("results", {}) stop = generator.eval_stop_condition(run_results) if stop: logger.info( f"reached early stop condition ({generator.options.stop_condition}), stopping iterations!" ) return stop completed_iter = as_completed([]) for task in tasks: resp = client.submit(remote_handler_wrapper, task.to_json(), handler, self.spec.workdir) completed_iter.add(resp) queued_runs += 1 if queued_runs >= parallel_runs: future = next(completed_iter) early_stop = process_result(future) queued_runs -= 1 if early_stop: break for future in completed_iter: process_result(future) client.close() if function_name and generator.options.teardown_dask: logger.info("tearing down the dask cluster..") mlrun.get_run_db().delete_runtime_object("dask", function_name, force=True) return results
def deploy_default_image(cls): from mlrun import get_run_db from mlrun.run import new_function sj = new_function(kind="remote-spark", name="remote-spark-default-image-deploy-temp") sj.spec.build.image = cls.default_image sj.with_spark_service(spark_service="dummy-spark") sj.deploy() get_run_db().delete_function(name=sj.metadata.name)
def test_ctx_state_change(self): ctx_name = "some-context" ctx = mlrun.get_or_create_ctx(ctx_name) runs = mlrun.get_run_db().list_runs( name=ctx_name, project=mlrun.mlconf.default_project) assert len(runs) == 1 assert runs[0]["status"][ "state"] == mlrun.runtimes.constants.RunStates.running ctx.set_state(mlrun.runtimes.constants.RunStates.completed) runs = mlrun.get_run_db().list_runs( name=ctx_name, project=mlrun.mlconf.default_project) assert len(runs) == 1 assert (runs[0]["status"]["state"] == mlrun.runtimes.constants.RunStates.completed)
def set_project_model_monitoring_credentials(access_key: str, project: Optional[str] = None): """ Set the credentials that will be used by the project's model monitoring infrastructure functions. The supplied credentials must have data access :param access_key: Model Monitoring access key for managing user permissions. :param project: The name of the model monitoring project. """ mlrun.get_run_db().create_project_secrets( project=project or config.default_project, provider=mlrun.api.schemas.SecretProviderName.kubernetes, secrets={"MODEL_MONITORING_ACCESS_KEY": access_key}, )
def test_feature_set_db(self): name = "stocks_test" stocks_set = fs.FeatureSet( name, entities=[Entity("ticker", ValueType.STRING)]) fs.preview( stocks_set, stocks, ) stocks_set.save() db = mlrun.get_run_db() sets = db.list_feature_sets(self.project_name, name) assert len(sets) == 1, "bad number of results" feature_set = fs.get_feature_set(name, self.project_name) assert feature_set.metadata.name == name, "bad feature set response" fs.ingest(stocks_set, stocks) with pytest.raises(mlrun.errors.MLRunPreconditionFailedError): fs.delete_feature_set(name, self.project_name) stocks_set.purge_targets() fs.delete_feature_set(name, self.project_name) sets = db.list_feature_sets(self.project_name, name) assert not sets, "Feature set should be deleted"
def test_ctx_creation_creates_run_with_project(self): ctx_name = "some-context" mlrun.get_or_create_ctx(ctx_name) runs = mlrun.get_run_db().list_runs( name=ctx_name, project=mlrun.mlconf.default_project) assert len(runs) == 1 assert runs[0]["metadata"]["project"] == mlrun.mlconf.default_project
def run_summary_comment(context, workflow_id, repo: str, issue: int, project=""): db = get_run_db().connect() project = project or context.project runs = db.list_runs(project=project, labels=f"workflow={workflow_id}") had_errors = i = 0 for r in runs: name = r["metadata"]["name"] if r["status"].get("state", "") == "error": had_errors += 1 if name == context.name: del runs[i] i += 1 print("errors:", had_errors) html = "### Run Results\nWorkflow {} finished with {} errors".format( workflow_id, had_errors) html += "<br>click the hyper links below to see detailed results<br>" html += runs.show(display=False, short=True) if repo: pr_comment(context, repo, issue, html) else: print("repo not defined") print(html)
def get_feature_vector_by_uri(uri, project=None, update=True): """get feature vector object from db by uri""" db = mlrun.get_run_db() default_project = project or config.default_project # parse store://.. uri if mlrun.datastore.is_store_uri(uri): prefix, new_uri = mlrun.datastore.parse_store_uri(uri) if prefix != StorePrefix.FeatureVector: raise mlrun.errors.MLRunInvalidArgumentError( f"provided store uri ({uri}) does not represent a feature vector (prefix={prefix})" ) uri = new_uri project, name, tag, uid = parse_versioned_object_uri(uri, default_project) resource = mlrun.api.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string( project, "feature-vector") if update: auth_input = AuthorizationVerificationInput( resource=resource, action=mlrun.api.schemas.AuthorizationAction.update) else: auth_input = AuthorizationVerificationInput( resource=resource, action=mlrun.api.schemas.AuthorizationAction.read) db.verify_authorization(auth_input) return db.get_feature_vector(name, project, tag, uid)
def setup_method(self, method): self._logger = logger self._logger.info( f"Setting up test {self.__class__.__name__}::{method.__name__}") self._test_env = {} self._old_env = {} self._setup_env(self._get_env_from_file()) self._run_db = get_run_db() # the dbpath is already configured on the test startup before this stage # so even though we set the env var, we still need to directly configure # it in mlconf. mlconf.dbpath = self._test_env["MLRUN_DBPATH"] set_environment( artifact_path="/User/data", project=self.project_name, ) self.custom_setup() self._logger.info( f"Finished setting up test {self.__class__.__name__}::{method.__name__}" )
def delete_feature_vector(name, project=""): """ Delete a :py:class:`~mlrun.feature_store.FeatureVector` object from the DB. :param name: Name of the object to delete :param project: Name of the object's project """ db = mlrun.get_run_db() return db.delete_feature_vector(name=name, project=project)
def reload(self, update_spec=True): """reload/sync the feature set status and spec from the DB""" from_db = mlrun.get_run_db().get_feature_vector( self.metadata.name, self.metadata.project, self.metadata.tag) self.status = from_db.status if update_spec: self.spec = from_db.spec
def test_store_big_run(self): """ Sometimes when the run has artifacts (inputs or outputs) their preview is pretty big (but it is limited to some size), when we moved to MySQL a run similar to the one this test is storing was failing to be read from the DB after insert on _pickle.UnpicklingError: pickle data was truncated So we fixed this by changing the BLOB fields to sqlalchemy.dialects.mysql.MEDIUMBLOB This test verifies it's working """ project_name = "runs-project" mlrun.new_project(project_name) uid = "some-uid" run_body_path = str(self.assets_path / "big-run.json") with open(run_body_path) as run_body_file: run_body = json.load(run_body_file) mlrun.get_run_db().store_run(run_body, uid, project_name) mlrun.get_run_db().read_run(uid, project_name)
def test_local_context(): project_name = "xtst" mlrun.mlconf.artifact_path = out_path context = mlrun.get_or_create_ctx("xx", project=project_name, upload_artifacts=True) with context: context.log_artifact("xx", body="123", local_path="a.txt") context.log_model("mdl", body="456", model_file="mdl.pkl", artifact_path="+/mm") artifact = context.get_cached_artifact("xx") artifact.format = "z" context.update_artifact(artifact) assert context._state == "completed", "task did not complete" db = mlrun.get_run_db() run = db.read_run(context._uid, project=project_name) assert run["status"][ "state"] == "completed", "run status not updated in db" assert run["status"]["artifacts"][0][ "key"] == "xx", "artifact not updated in db" assert (run["status"]["artifacts"][0]["format"] == "z" ), "run/artifact attribute not updated in db" assert (run["status"]["artifacts"][1]["target_path"] == out_path + "/mm/"), "artifact not uploaded to subpath" db_artifact = db.read_artifact(artifact.db_key, project=project_name) assert db_artifact["format"] == "z", "artifact attribute not updated in db"
def save(self, tag="", versioned=False): """save to mlrun db""" db = mlrun.get_run_db() self.metadata.project = self.metadata.project or mlconf.default_project tag = tag or self.metadata.tag as_dict = self.to_dict() db.store_feature_vector(as_dict, tag=tag, versioned=versioned)
def test_store_endpoint_update_existing(self): endpoint = self._mock_random_endpoint() db = mlrun.get_run_db() db.create_or_patch_model_endpoint( endpoint.metadata.project, endpoint.metadata.uid, endpoint ) endpoint_before_update = db.get_model_endpoint( endpoint.metadata.project, endpoint.metadata.uid ) assert endpoint_before_update.status.state is None updated_state = "testing...testing...1 2 1 2" endpoint_before_update.status.state = updated_state db.create_or_patch_model_endpoint( endpoint_before_update.metadata.project, endpoint_before_update.metadata.uid, endpoint_before_update, ) endpoint_after_update = db.get_model_endpoint( endpoint.metadata.project, endpoint.metadata.uid ) assert endpoint_after_update.status.state == updated_state
def test_list_artifacts_filter_by_kind(self): prj, uid, key, body = "p9", "u19", "k802", "tomato" model_artifact = mlrun.artifacts.model.ModelArtifact( key, body, target_path="a.txt") data = {"col1": [1, 2], "col2": [3, 4]} data_frame = pandas.DataFrame(data=data) dataset_artifact = mlrun.artifacts.dataset.DatasetArtifact( key, df=data_frame, format="parquet", target_path="b.pq") db = mlrun.get_run_db() db.store_artifact(key, model_artifact, f"model_{uid}", project=prj) db.store_artifact(key, dataset_artifact, f"ds_{uid}", project=prj, iter=42) artifacts = db.list_artifacts(project=prj) assert len(artifacts) == 2, "bad number of artifacts" artifacts = db.list_artifacts(project=prj, kind="model") assert len(artifacts) == 1, "bad number of model artifacts" artifacts = db.list_artifacts( project=prj, category=mlrun.api.schemas.ArtifactCategories.dataset) assert len(artifacts) == 1, "bad number of dataset artifacts"
def test_create_project(self): project_name = "some-project" project = mlrun.new_project(project_name) project.save_to_db() projects = mlrun.get_run_db().list_projects() assert len(projects) == 1 assert projects[0].metadata.name == project_name
def _init_endpoint_record(graph_server, model: V2ModelServer): logger.info("Initializing endpoint records") try: project, uri, tag, hash_key = parse_versioned_object_uri( graph_server.function_uri) if model.version: versioned_model_name = f"{model.name}:{model.version}" else: versioned_model_name = f"{model.name}:latest" model_endpoint = ModelEndpoint( metadata=ModelEndpointMetadata(project=project, labels=model.labels), spec=ModelEndpointSpec( function_uri=graph_server.function_uri, model=versioned_model_name, model_class=model.__class__.__name__, model_uri=model.model_path, stream_path=config.model_endpoint_monitoring.store_prefixes. default.format(project=project, kind="stream"), active=True, ), status=ModelEndpointStatus(), ) db = mlrun.get_run_db() db.create_or_patch_model_endpoint( project=project, endpoint_id=model_endpoint.metadata.uid, model_endpoint=model_endpoint, ) except Exception as e: logger.error("Failed to create endpoint record", exc=e)
def test_model_monitoring_voting_ensemble(self): simulation_time = 60 * 5 # 5 minutes project = mlrun.get_run_db().get_project(self.project_name) project.set_model_monitoring_credentials(os.environ.get("V3IO_ACCESS_KEY")) iris = load_iris() train_set = pd.DataFrame( iris["data"], columns=[ "sepal_length_cm", "sepal_width_cm", "petal_length_cm", "petal_width_cm", ], ) # Deploy Model Servers # Use the following code to deploy a model server in the Iguazio instance. # Import the serving function from the function hub serving_fn = mlrun.import_function( "hub://v2_model_server", project=self.project_name ).apply(mlrun.auto_mount()) serving_fn.set_topology( "router", "mlrun.serving.VotingEnsemble", name="VotingEnsemble" ) serving_fn.set_tracking() model_names = [ "sklearn_RandomForestClassifier", "sklearn_LogisticRegression", "sklearn_AdaBoostClassifier", ] for name in model_names: # Log the model through the projects API so that it is available through the feature store API project.log_model( name, model_file=os.path.relpath(str(self.assets_path / "model.pkl")), training_set=train_set, ) # Add the model to the serving function's routing spec serving_fn.add_model( name, model_path=f"store://models/{self.project_name}/{name}:latest" ) # Enable model monitoring serving_fn.deploy() iris_data = iris["data"].tolist() t_end = monotonic() + simulation_time while monotonic() < t_end: data_point = choice(iris_data) serving_fn.invoke( "v2/models/VotingEnsemble/infer", json.dumps({"inputs": [data_point]}) ) sleep(uniform(0.2, 1.7))
def save(self, tag="", versioned=False): """save to mlrun db""" db = mlrun.get_run_db() self.metadata.project = self.metadata.project or mlconf.default_project tag = tag or self.metadata.tag as_dict = self.to_dict() as_dict["spec"]["features"] = as_dict["spec"].get("features", []) # bypass DB bug db.store_feature_set(as_dict, tag=tag, versioned=versioned)
def state(self): db = mlrun.get_run_db() run = db.read_run( uid=self.metadata.uid, project=self.metadata.project, iter=self.metadata.iteration, ) if run: return get_in(run, "status.state", "unknown")
def compare_db_runs( project_name=None, run_name=None, labels=None, iter=False, start_time_from: datetime = None, hide_identical: bool = True, exclude: list = [], show=None, colorscale: str = "Blues", filename=None, **query_args, ) -> str: """ Get the selected list of runs from MLRun DB and return/show a parallel coordinates plots + table. example: compare_db_runs("my-project", run_name="train") :param project_name: Project that the runs belongs to :param run_name: Name of the run to retrieve :param labels: List runs that have a specific label assigned. Currently only a single label filter can be applied, otherwise result will be empty. :param iter: If ``True`` return runs from all iterations. Otherwise, return only parent runs (iter 0). :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``. :param hide_identical: hide columns with identical values :param exclude: User-provided list of parameters to be excluded from the plot :param show: Allows the user to display the plot within the notebook :param filename: Output filename to save the plot html file :param colorscale: colors used for the lines in the parallel coordinate plot :param query_args: additional list_runs() query arguments :return: plot html """ runs_list = mlrun.get_run_db().list_runs( labels=labels, iter=iter, start_time_from=start_time_from, name=run_name, project=project_name, **query_args, ) runs_df = _runs_list_to_df(runs_list) plot_as_html = gen_pcp_plot( runs_df, index_col="iter", hide_identical=hide_identical, exclude=exclude, colorscale=colorscale, ) return _show_and_export_html(plot_as_html, show, filename, runs_list=runs_list)
def test_vault_end_to_end(): # This requires an MLRun API server to run and work with Vault. This port should # be configured to allow access to the server. api_server_port = 57764 _set_vault_mlrun_configuration(api_server_port) project_name = "abc" func_name = "vault-function" aws_key_value = "1234567890" github_key_value = "proj1Key!!!" project = new_project(project_name) # This call will initialize Vault infrastructure and add the given secrets # It executes on the API server project.create_vault_secrets({ "aws_key": aws_key_value, "github_key": github_key_value }) # This API executes on the client side project_secrets = project.get_vault_secret_keys() assert project_secrets == ["aws_key", "github_key"], "secrets not created" # Create function and set container configuration function = code_to_function( name=func_name, filename="{}/vault_function.py".format(examples_path), handler="vault_func", project=project_name, kind="job", ) function.spec.image = "saarcoiguazio/mlrun:unstable" # Create context for the execution spec = new_task( project=project_name, name="vault_test_run", handler="vault_func", out_path=out_path, params={"secrets": ["password", "path", "github_key", "aws_key"]}, ) spec.with_secrets("vault", []) result = function.run(spec) verify_state(result) db = get_run_db().connect() state, log = db.get_log(result.metadata.uid, project=project_name) log = str(log) print(state) assert (log.find("value: {}".format(aws_key_value)) != -1), "secret value not detected in function output" assert (log.find("value: {}".format(github_key_value)) != -1), "secret value not detected in function output"
def _init_dbs(self, rundb): if rundb: if isinstance(rundb, str): self._rundb = get_run_db(rundb, secrets=self._secrets_manager) else: self._rundb = rundb else: self._rundb = mlrun.get_run_db() self._data_stores = store_manager.set(self._secrets_manager, db=self._rundb) self._artifacts_manager = ArtifactManager(db=self._rundb)
def verify_feature_vector_permissions( feature_vector, action: mlrun.api.schemas.AuthorizationAction): project = feature_vector._metadata.project or mlconf.default_project resource = mlrun.api.schemas.AuthorizationResourceTypes.feature_vector.to_resource_string( project, "feature-vector") db = mlrun.get_run_db() auth_input = AuthorizationVerificationInput(resource=resource, action=action) db.verify_authorization(auth_input)
def test_local_no_context(): spec = tag_test(base_spec, 'test_local_no_context') spec.spec.parameters = {'xyz': '789'} result = new_function(command='{}/no_ctx.py'.format(here), mode='noctx').run(spec) verify_state(result) db = get_run_db().connect() log = str(db.get_log(result.metadata.uid)) print(log) assert log.find(", '--xyz', '789']") != -1, 'params not detected in noctx'
def test_main_run_pass(): out = exec_run( "python -c print(56)", ["--mode", "pass", "--uid", "123458"], "test_main_run_pass", ) print(out) assert out.find("state: completed") != -1, out db = mlrun.get_run_db() state, log = db.get_log("123458") assert str(log).find("56") != -1, "incorrect output"
def test_main_run_args(): out = exec_run( f"{tests_root_directory}/no_ctx.py -x " + "{p2}", ["--uid", "123457"] + compose_param_list(dict(p1=5, p2="aaa")), "test_main_run_args", ) print(out) assert out.find("state: completed") != -1, out db = mlrun.get_run_db() state, log = db.get_log("123457") print(log) assert str(log).find(", '-x', 'aaa']") != -1, "params not detected in argv"
def test_main_run_pass_args(): out = exec_run( "'python -c print({x})'", ["--mode", "pass", "--uid", "123451", "-p", "x=33"], "test_main_run_pass", ) print(out) assert out.find("state: completed") != -1, out db = mlrun.get_run_db() state, log = db.get_log("123451") print(log) assert str(log).find("33") != -1, "incorrect output"