def test_xgb_custom(): fn = code_to_function( name="test_xgb_custom", filename="xgb_custom.py", handler="gen_outliers", kind="job", ) outliers_run = fn.run( params={ "nrows": 8192, "label_type": "float", "local_path": "./artifacts/inputs/xgb_custom", }, local=True, ) fn = code_to_function( name="test_fit_model", filename="xgb_custom.py", handler="fit", kind="job", ) fit_run = fn.run( params={ "num_boost_round": 40, "verbose_eval": False, "XGB_max_depth": 2, "XGB_subsample": 0.9, "test_set_key": "./artifacts/inputs/test-set", }, inputs={"dataset": "./artifacts/inputs/xgb_custom.parquet"}, local=True, )
def to_function(self, default_kind=None): if self.url and "://" not in self.url: if not os.path.isfile(self.url): raise OSError("{} not found".format(self.url)) kind = self.kind or default_kind if self.spec: func = mlrun.new_function(self.name, runtime=self.spec) elif (self.url.endswith(".yaml") or self.url.startswith("db://") or self.url.startswith("hub://")): func = mlrun.import_function(self.url) if self.image: func.spec.image = self.image elif self.url.endswith(".ipynb"): func = mlrun.code_to_function(self.name, filename=self.url, image=self.image, kind=kind) elif self.url.endswith(".py"): # todo: support code text as input (for UI) if not self.image: raise ValueError( "image must be provided with py code files, " "use function object for more control/settings") func = mlrun.code_to_function(self.name, filename=self.url, image=self.image, kind=kind) else: raise ValueError("unsupported function url {} or no spec".format( self.url)) if self.requirements: func.with_requirements(self.requirements) self._function = func return func
def test_run_local_yaml(): spec = tag_test(base_spec, 'test_run_local_handler') spec.spec.handler = 'training' nbpath = '{}/mlrun_jobs.ipynb'.format(examples_path) ymlpath = path.join(out_path, 'nbyaml.yaml') print('out path:', out_path, ymlpath) code_to_function(filename=nbpath, kind='job').export(ymlpath) result = run_local(spec, command=ymlpath, workdir=out_path) verify_state(result)
def test_run_local_yaml(): spec = tag_test(base_spec, "test_run_local_yaml") spec.spec.handler = "training" nbpath = f"{examples_path}/mlrun_jobs.ipynb" ymlpath = path.join(out_path, "nbyaml.yaml") print("out path:", out_path, ymlpath) code_to_function(filename=nbpath, kind="job").export(ymlpath) result = run_local(spec, command=ymlpath, workdir=out_path) verify_state(result)
def to_function(self, default_kind=None): """generate a function object from the ref definitions""" if self.url and "://" not in self.url: if not os.path.isfile(self.url): raise OSError(f"{self.url} not found") kind = self.kind or default_kind if self.url: if ( self.url.endswith(".yaml") or self.url.startswith("db://") or self.url.startswith("hub://") ): func = mlrun.import_function(self.url) if self.image: func.spec.image = self.image elif self.url.endswith(".ipynb"): func = mlrun.code_to_function( self.name, filename=self.url, image=self.image, kind=kind ) elif self.url.endswith(".py"): # todo: support code text as input (for UI) if not self.image: raise ValueError( "image must be provided with py code files, " "use function object for more control/settings" ) func = mlrun.code_to_function( self.name, filename=self.url, image=self.image, kind=kind ) else: raise ValueError(f"unsupported function url {self.url} or no spec") if self.spec: func = enrich_function_from_dict(func, self.spec) elif self.code is not None: code = self.code if kind == mlrun.runtimes.RuntimeKinds.serving: code = code + mlrun_footer.format( mlrun.runtimes.serving.serving_subkind ) func = mlrun.new_function(self.name, kind=kind, image=self.image) data = b64encode(code.encode("utf-8")).decode("utf-8") func.spec.build.functionSourceCode = data if kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes(): func.spec.default_handler = "handler" if self.spec: func = enrich_function_from_dict(func, self.spec) elif self.spec: func = mlrun.new_function(self.name, runtime=self.spec) else: raise ValueError("url or spec or code must be specified") if self.requirements: func.with_requirements(self.requirements) self._function = func return func
def create_demo_project(self) -> mlrun.projects.MlrunProject: self._logger.debug("Creating sklearn project") demo_project = mlrun.new_project( self.project_name, str(self.assets_path), init_git=True ) self._logger.debug("Creating iris-generator function") function_path = str(self.assets_path / "iris_generator_function.py") iris_generator_function = mlrun.code_to_function( name="gen-iris", kind="job", filename=function_path, image="mlrun/mlrun", ) iris_generator_function.spec.remote = True iris_generator_function.spec.replicas = 1 iris_generator_function.spec.service_type = "NodePort" iris_generator_function.spec.build.commands.append( "pip install pandas sklearn pyarrow" ) self._logger.debug("Setting project functions") demo_project.set_function(iris_generator_function) demo_project.set_function("hub://describe", "describe") demo_project.set_function("hub://sklearn_classifier", "train") demo_project.set_function("hub://test_classifier", "test") demo_project.set_function("hub://model_server", "serving") demo_project.set_function("hub://model_server_tester", "live_tester") self._logger.debug("Setting project workflow") demo_project.set_workflow( "main", str(self.assets_path / "workflow.py"), embed=True ) return demo_project
def sklearn_classifier(run): cwd = os.getcwd() file_path = str(Path( cwd).parent.absolute()) + "/sklearn_classifier/sklearn_classifier.py" fn = code_to_function( name='test_sklearn_classifier', filename=file_path, handler="train_model", kind="local", ) fn.spec.command = file_path fn.run( params={ "sample": -5_000, # 5k random rows, "model_pkg_class": "sklearn.ensemble.RandomForestClassifier", "label_column": "interest_level", "CLASS_n_estimators": 100, "CLASS_min_samples_leaf": 1, "CLASS_n_jobs": -1, "CLASS_oob_score": True }, handler="train_model", inputs={"dataset": run.outputs["rent"]}, artifact_path='artifacts' # , local=True )
def create_demo_project(self) -> mlrun.projects.MlrunProject: self._logger.debug("Creating horovod project") demo_project = mlrun.new_project(self.project_name, str(self.assets_path), init_git=True) mlrun.mount_v3io() self._logger.debug("Uploading training file") trainer_src_path = str(self.assets_path / "horovod_training.py") trainer_dest_path = pathlib.Path("/assets/horovod_training.py") stores = mlrun.datastore.store_manager.set() datastore, subpath = stores.get_or_create_store( self._get_v3io_user_store_path(trainer_dest_path)) datastore.upload(subpath, trainer_src_path) self._logger.debug("Creating iris-generator function") function_path = str(self.assets_path / "utils_functions.py") utils = mlrun.code_to_function( name="utils", kind="job", filename=function_path, image="mlrun/mlrun", ) utils.spec.remote = True utils.spec.replicas = 1 utils.spec.service_type = "NodePort" utils.spec.command = function_path self._logger.debug("Setting project functions") demo_project.set_function(utils) trainer = mlrun.new_function( name="trainer", kind="mpijob", command=self._get_v3io_user_store_path(trainer_dest_path, remote=False), image="mlrun/ml-models", ) trainer.spec.remote = True trainer.spec.replicas = 4 trainer.spec.service_type = "NodePort" demo_project.set_function(trainer) demo_project.set_function("hub://tf2_serving", "serving") demo_project.log_artifact( "images", target_path= "http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip", artifact_path=mlrun.mlconf.artifact_path, ) self._logger.debug("Setting project workflow") demo_project.set_workflow("main", str(self.assets_path / "workflow.py"), embed=True) return demo_project
def test_run_local_aggregate(): fn = code_to_function( name='test_aggregate', filename="aggregate.py", handler="aggregate", kind="local", ) fn.spec.command = 'aggregate.py' fn.run( params={ 'metrics': ['cpu_utilization'], 'labels': ['is_error'], 'metric_aggs': ['mean', 'sum'], 'label_aggs': ['max'], 'suffix': 'daily', 'inplace': False, 'window': 5, 'center': True, 'save_to': AGGREGATE_PATH, 'files_to_select': 2 } #, local=True , inputs={'df_artifact': DATA}) assert Path(AGGREGATE_PATH).is_file()
def test_nuclio_nb_serving(): fn = code_to_function( filename= 'https://raw.githubusercontent.com/mlrun/mlrun/master/examples/xgb_serving.ipynb' ) assert fn.kind == 'remote', 'kind not set, test failed' assert fn.spec.function_kind == 'serving', 'code not embedded'
def composer(apiversion='v1alpha1', name='', project='default'): """Run a task on function/code (.py, .ipynb or .yaml) locally, e.g.: # define template task = get_template(name='myfunction', project='myproject') :param name: function name :param project: function project (none for 'default') :return: run mlrun function object """ import iguazioig _module_path = os.path.dirname(iguazioig.__file__) try: template_file = "%s/templates/processing_template_%s.py" % ( _module_path, apiversion) except: template_file = "%s/templates/processing_template_%s.ipynb" % ( _module_path, apiversion) return code_to_function(name, project=project, filename=template_file, kind='nuclio')
def main(): # Load environment variables load_dotenv() # Setup project project_name, artifact_path = set_environment( project="remote-model-deployment", artifact_path=os.getenv("MLRUN_ARTIFACT_PATH"), api_path=os.getenv("MLRUN_DBPATH"), access_key=os.getenv("V3IO_ACCESS_KEY"), ) print(f"Creating project '{project_name}'") # Push assets to V3IO v3io_client = v3io.dataplane.Client() push_to_v3io(v3io_client, "assets/model.pkl", "nick/tmp/model.pkl") # Create MLRun function serving_fn = code_to_function( name="serving", kind="serving", image="mlrun/mlrun", filename="assets/model_server.py", ).apply(mount_v3io()) print(f"Creating function '{serving_fn.metadata.name}'") # Configure MLRun function serving_fn.spec.default_class = "ClassifierModel" serving_fn.add_model("my_model", model_path="/User/tmp/model.pkl") # Deploy addr = serving_fn.deploy() # Test model inference test_inference(addr)
def _create_mlrun_function_and_matching_scheduled_object( db: Session, project: str, handler: str = "do_nothing"): function_name = "my-function" code_path = pathlib.Path(__file__).absolute().parent / "function.py" function = mlrun.code_to_function(name=function_name, kind="local", filename=str(code_path)) function.spec.command = f"{str(code_path)}" hash_key = get_db().store_function(db, function.to_dict(), function_name, project, versioned=True) scheduled_object = { "task": { "spec": { "function": f"{project}/{function_name}@{hash_key}", "handler": handler, }, "metadata": { "name": "my-task", "project": f"{project}" }, } } return scheduled_object
def test_nuclio_nb_serving(): fn = code_to_function( filename= "https://raw.githubusercontent.com/mlrun/mlrun/master/examples/xgb_serving.ipynb" ) assert fn.kind == "remote", "kind not set, test failed" assert fn.spec.function_kind == "serving", "code not embedded"
def test_job_file_codeout(): name = '{}/mlrun_jobs.ipynb'.format(examples_path) out = '{}/ctf_tst.py'.format(results) fn = code_to_function(filename=name, kind='job', code_output=out) assert fn.kind == 'job', 'kind not set, test failed' assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode assert fn.spec.command == out, 'filename not set to out in command' assert path.isfile(out), 'output not generated'
def test_local_file_noembed(): name = f"{examples_path}/training.py" fn = code_to_function(filename=name, kind="local", embed_code=False) assert fn.kind == "local", "kind not set, test failed" assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode assert fn.spec.command == name, "filename not set in command" fn.run(workdir=str(examples_path))
def test_job_file_codeout(): name = f"{examples_path}/mlrun_jobs.ipynb" out = f"{results}/ctf_tst.py" fn = code_to_function(filename=name, kind="job", code_output=out, embed_code=False) assert fn.kind == "job", "kind not set, test failed" assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode assert fn.spec.command == out, "filename not set to out in command" assert path.isfile(out), "output not generated"
def get_model_monitoring_stream_processing_function(project: str): return code_to_function( name="model-monitoring-stream", project=project, filename=str(STREAM_PROCESSING_FUNCTION_PATH), kind="nuclio", image="mlrun/mlrun", )
def test_local_file_noembed(): name = '{}/training.py'.format(examples_path) fn = code_to_function(filename=name, kind='local', embed_code=False) assert fn.kind == 'local', 'kind not set, test failed' assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode assert fn.spec.command == name, 'filename not set in command' fn.run(workdir=str(examples_path))
def test_run_local_from_func(): spec = tag_test(base_spec, "test_run_local_from_func") spec.spec.handler = "training" nb_path = f"{examples_path}/mlrun_jobs.ipynb" nbyml_path = path.join(out_path, "nbyaml.yaml") print("out path:", out_path, nbyml_path) fn = code_to_function(filename=nb_path, kind="job").export(nbyml_path) result = fn.run(spec, workdir=out_path, local=True) verify_state(result)
def test_vault_end_to_end(): # This requires an MLRun API server to run and work with Vault. This port should # be configured to allow access to the server. api_server_port = 57764 _set_vault_mlrun_configuration(api_server_port) project_name = "abc" func_name = "vault-function" aws_key_value = "1234567890" github_key_value = "proj1Key!!!" project = new_project(project_name) # This call will initialize Vault infrastructure and add the given secrets # It executes on the API server project.create_vault_secrets({ "aws_key": aws_key_value, "github_key": github_key_value }) # This API executes on the client side project_secrets = project.get_vault_secret_keys() assert project_secrets == ["aws_key", "github_key"], "secrets not created" # Create function and set container configuration function = code_to_function( name=func_name, filename="{}/vault_function.py".format(examples_path), handler="vault_func", project=project_name, kind="job", ) function.spec.image = "saarcoiguazio/mlrun:unstable" # Create context for the execution spec = new_task( project=project_name, name="vault_test_run", handler="vault_func", out_path=out_path, params={"secrets": ["password", "path", "github_key", "aws_key"]}, ) spec.with_secrets("vault", []) result = function.run(spec) verify_state(result) db = get_run_db().connect() state, log = db.get_log(result.metadata.uid, project=project_name) log = str(log) print(state) assert (log.find("value: {}".format(aws_key_value)) != -1), "secret value not detected in function output" assert (log.find("value: {}".format(github_key_value)) != -1), "secret value not detected in function output"
def test_main_local_flag(): fn = mlrun.code_to_function(filename=f"{examples_path}/handler.py", kind="job", handler="my_func") yaml_path = f"{out_path}/myfunc.yaml" fn.export(yaml_path) args = f"-f {yaml_path} --local" out = exec_run("", args.split(), "test_main_local_flag") print(out) assert out.find("state: completed") != -1, out
def custom_setup(self): self._logger.debug("Creating dask function") self.dask_function = code_to_function( "mydask", kind="dask", filename=str(self.assets_path / "dask_function.py"), ).apply(mount_v3io()) self.dask_function.spec.image = "mlrun/ml-models" self.dask_function.spec.remote = True self.dask_function.spec.replicas = 1 self.dask_function.spec.service_type = "NodePort" self.dask_function.spec.command = str(self.assets_path / "dask_function.py")
def _generate_runtime(self, kind="nuclio"): runtime = code_to_function( name=self.name, project=self.project, filename=self.code_filename, handler=self.code_handler, kind=kind, image=self.image_name, description="test function", ) return runtime
def test_local_file_codeout(): name = '{}/mlrun_jobs.ipynb'.format(examples_path) out = '{}/ctf_tst.py'.format(results) fn = code_to_function(filename=name, kind='local', code_output=out, embed_code=False) assert fn.kind == 'local', 'kind not set, test failed' assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode assert fn.spec.command == out, 'filename not set to out in command' assert path.isfile(out), 'output not generated' fn.run(handler='training', params={'p1': 5})
def test_run_sql_to_file(): fn = code_to_function( name='test_sql_to_file', filename="sql_to_file.py", handler="sql_to_file", kind="job", ) fn.run(params={ 'sql_query': mysql_query, 'database_url': mysql_url }, local=True)
def _generate_runtime(self, disable_auto_mount=False): runtime = code_to_function( name=self.name, project=self.project, filename=self.code_filename, handler=self.code_handler, kind="nuclio", image=self.image_name, description="test function", ) runtime.spec.disable_auto_mount = disable_auto_mount return runtime
def test_deploy_function_without_project(self): code_path = str(self.assets_path / "nuclio_function.py") self._logger.debug("Creating nuclio function") function = mlrun.code_to_function( name="simple-function", kind="nuclio", project=self.project_name, filename=code_path, ) self._logger.debug("Deploying nuclio function") function.deploy()
def test_run_alone(self): mlrun.projects.pipeline_context.clear(with_project=True) function = mlrun.code_to_function( "test1", filename=str(f'{self.assets_path / "localpipe.py"}'), handler="func1", kind="job", ) run_result = mlrun.run_function(function, params={"p1": 5}, local=True) print(run_result.to_yaml()) assert run_result.state() == "completed", "run didnt complete" # expect y = param1 * 2 = 10 assert run_result.output("accuracy") == 10, "unexpected run result"
def test_local_file_codeout(): name = "{}/mlrun_jobs.ipynb".format(examples_path) out = "{}/ctf_tst.py".format(results) fn = code_to_function(filename=name, kind="local", code_output=out, embed_code=False) assert fn.kind == "local", "kind not set, test failed" assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode assert fn.spec.command == out, "filename not set to out in command" assert path.isfile(out), "output not generated" fn.run(handler="training", params={"p1": 5})