def test_run_local(): if Path(ARTIFACTS_PATH).is_dir(): shutil.rmtree(ARTIFACTS_PATH) task = new_task(name="task-feature-selection", handler = feature_selection, params={'k': 2, 'min_votes': 0.3, 'label_column': 'is_error'}, inputs={'df_artifact': 'data/metrics.pq'}, ) run_local(task=task, artifact_path=os.path.join(os.path.abspath('./'), 'artifacts')) _validate_paths({'feature_scores.parquet', 'selected_features.parquet'})
def test_run_local_handler(): spec = tag_test(base_spec, "test_run_local_handler") spec.spec.handler = "my_func" result = run_local(spec, command=f"{examples_path}/handler.py", workdir=examples_path) verify_state(result)
def test_basics_hyper_parameters(self): run_object = run_local( self._basics_task.with_hyper_params({"p2": [5, 2, 3]}, "min.loss"), command="training.py", workdir=str(self.assets_path), ) self._logger.debug("Finished running task", run_object=run_object.to_dict()) run_uid = run_object.uid() assert run_uid is not None assert run_object.state() == "completed" self._verify_run_outputs( run_object.outputs, uid=run_uid, name="demo", project=self.project_name, output_path=self.results_path / run_uid, accuracy=10, loss=15, best_iteration=1, iteration_results=True, )
def test_serialization(): spec = tag_test(base_spec, "test_serialization") result = run_local(spec) verify_state(result) pprint(result.to_dict()) print(result.to_yaml()) pprint(result.to_json())
def test_run_local_handler(): spec = tag_test(base_spec, 'test_run_local_handler') spec.spec.handler = 'my_func' result = run_local(spec, command='{}/handler.py'.format(examples_path), workdir=examples_path) verify_state(result)
def test_train(): spec = tag_test(base_spec, 'test_run_local_train') result = run_local(spec, command='../notebooks/functions/train.py', workdir='./', artifact_path='./faces/artifacts') verify_state(result)
def test_describe_dask_local(): if Path(PLOTS_PATH).is_dir(): shutil.rmtree(PLOTS_PATH) task = new_task(name="task-describe", handler=summarize, inputs={"table": DATA_URL}, params={ 'update_dataset': True, 'label_column': 'label', 'dask_function': 'db://default/dask_tests' }) run_local(task) _validate_paths({ 'corr.html', 'correlation-matrix.csv', 'hist.html', 'imbalance.html', 'imbalance-weights-vec.csv', 'violin.html' })
def test_run_local_nb(): spec = tag_test(base_spec, 'test_run_local_handler') spec.spec.handler = 'training' result = run_local(spec, command='{}/mlrun_jobs.ipynb'.format(examples_path), workdir=examples_path) verify_state(result)
def test_run_local_nb(): spec = tag_test(base_spec, "test_run_local_nb") spec.spec.handler = "training" result = run_local(spec, command=f"{examples_path}/mlrun_jobs.ipynb", workdir=examples_path) verify_state(result)
def test_run_local_obj(): spec = tag_test(base_spec, 'test_run_local_handler') spec.spec.handler = 'training' nbpath = '{}/mlrun_jobs.ipynb'.format(examples_path) ymlpath = path.join(out_path, 'nbyaml.yaml') print('out path:', out_path, ymlpath) fn = code_to_function(filename=nbpath, kind='job').export(ymlpath) result = run_local(spec, command=fn, workdir=out_path) verify_state(result)
def test_inline_code(self): run_object = run_local(self._inline_task.with_params(p1=7)) self._logger.debug("Finished running task", run_object=run_object.to_dict()) run_uid = run_object.uid() assert run_uid is not None assert run_object.state() == "completed"
def test_run_local_obj(): spec = tag_test(base_spec, "test_run_local_obj") spec.spec.handler = "training" nbpath = f"{examples_path}/mlrun_jobs.ipynb" ymlpath = path.join(out_path, "nbyaml.yaml") print("out path:", out_path, ymlpath) fn = code_to_function(filename=nbpath, kind="job").export(ymlpath) result = run_local(spec, command=fn, workdir=out_path) verify_state(result)
def test_inline_code_with_param_file(self): run_object = run_local( self._inline_task.with_param_file( str(self.assets_path / "params.csv"), "max.accuracy")) self._logger.debug("Finished running task", run_object=run_object.to_dict()) run_uid = run_object.uid() assert run_uid is not None assert run_object.state() == "completed"
def test_run_local(): if Path(PLOTS_PATH).is_dir(): shutil.rmtree(PLOTS_PATH) task = new_task( name="task-describe", handler=summarize, inputs={"table": DATA_URL}, params={ "update_dataset": True, "label_column": "label" }, ) run_local(task) _validate_paths({ "corr.html", "correlation-matrix.csv", "hist.html", "imbalance.html", "imbalance-weights-vec.csv", "violin.html", })
def test_basics(self): run_object = run_local(self._basics_task, command="training.py", workdir=str(self.assets_path)) self._logger.debug("Finished running task", run_object=run_object.to_dict()) run_uid = run_object.uid() assert run_uid is not None self._verify_run_metadata( run_object.to_dict()["metadata"], uid=run_uid, name="demo", project=self.project_name, labels={ "v3io_user": self._test_env["V3IO_USERNAME"], "kind": "", "owner": self._test_env["V3IO_USERNAME"], "framework": "sklearn", }, ) self._verify_run_spec( run_object.to_dict()["spec"], parameters={ "p1": 5, "p2": "a-string" }, inputs={"infile.txt": str(self.assets_path / "infile.txt")}, outputs=[], output_path=str(self.results_path / run_uid), secret_sources=[], data_stores=[], scrape_metrics=False, ) assert run_object.state() == "completed" self._verify_run_outputs( run_object.outputs, uid=run_uid, name="demo", project=self.project_name, output_path=self.results_path / run_uid, accuracy=10, loss=15, )
def test_run_local_with_uid_does_not_exist(monkeypatch): """ Mocking a scenario that happened in field in which getuser raised the same error as the mock The problem was basically that the code was environ.get("V3IO_USERNAME", getpass.getuser()) instead of environ.get("V3IO_USERNAME") or getpass.getuser() """ def mock_getpwuid_raise(*args, **kwargs): raise KeyError("getpwuid(): uid not found: 400") environ["V3IO_USERNAME"] = "******" monkeypatch.setattr(getpass, "getuser", mock_getpwuid_raise) spec = tag_test(base_spec, "test_run_local") result = run_local(spec, command=f"{examples_path}/training.py", workdir=examples_path) verify_state(result)
def custom_setup(self): self._logger.debug("Connecting to database") self._logger.debug("Creating dummy task for db queries") # {{run.uid}} will be substituted with the run id, so output will be written to different directories per run output_path = str(self.results_path / "{{run.uid}}") task = (new_task(name="demo", params={ "p1": 5 }, artifact_path=output_path).with_secrets( "file", self.assets_path / "secrets.txt").set_label( "type", "demo")) self._logger.debug("Running dummy task") run_object = run_local(task, command="training.py", workdir=str(self.assets_path)) self._logger.debug("Finished running dummy task", run_object=run_object.to_dict()) self._run_uid = run_object.uid()
def test_db_commands(self): self._logger.debug("Creating dummy task for db queries") # {{run.uid}} will be substituted with the run id, so output will be written to different directories per run output_path = str(self.results_path / "{{run.uid}}") task = (new_task(name="demo", params={ "p1": 5 }, artifact_path=output_path).with_secrets( "file", self.assets_path / "secrets.txt").set_label( "type", "demo")) runs_count_before_run = len( self._run_db.list_runs(project=self.project_name)) artifacts_count_before_run = len( self._run_db.list_artifacts(project=self.project_name, tag="*")) self._logger.debug("Running dummy task") run_object = run_local(task, command="training.py", workdir=str(self.assets_path)) self._logger.debug("Finished running dummy task", run_object=run_object.to_dict()) self._run_uid = run_object.uid() runs = self._run_db.list_runs(project=self.project_name) assert len(runs) == runs_count_before_run + 1 self._verify_run_metadata( runs[0]["metadata"], uid=self._run_uid, name="demo", project=self.project_name, labels={ "kind": "", "framework": "sklearn" }, ) self._verify_run_spec( runs[0]["spec"], parameters={ "p1": 5, "p2": "a-string" }, inputs={"infile.txt": str(self.assets_path / "infile.txt")}, outputs=[], output_path=str(self.results_path / self._run_uid), secret_sources=[], data_stores=[], ) artifacts = self._run_db.list_artifacts(project=self.project_name, tag="*") assert len(artifacts) == artifacts_count_before_run + 4 for artifact_key in ["chart", "html_result", "model", "mydf"]: artifact_exists = False for artifact in artifacts: if artifact["key"] == artifact_key: artifact_exists = True break assert artifact_exists runtimes = self._run_db.list_runtimes() assert len(runtimes) == len( mlrun.runtimes.RuntimeKinds.runtime_with_handlers()) for runtime_kind in mlrun.runtimes.RuntimeKinds.runtime_with_handlers( ): runtime_exists = False for runtime in runtimes: if runtime["kind"] == runtime_kind: runtime_exists = True break assert runtime_exists
from os import path out = mlconf.artifact_path or path.abspath('./data') # {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run artifact_path = path.join(out, '{{run.uid}}') # #### _running and linking multiple tasks_ # In this example we run two functions, ```training``` and ```validation``` and we pass the result from one to the other. # We will see in the ```job``` example that linking works even when the tasks are run in a workflow on different processes or containers. # ```run_local()``` will run our task on a local function: # Run the training function. Functions can have multiple handlers/methods, here we call the ```training``` handler: # In[ ]: train_run = run_local( NewTask(handler=training, params={'p1': 5}, artifact_path=out)) # After the function runs it generates the result widget, you can click the `model` artifact to see its content. # In[ ]: train_run.outputs # The output from the first training function is passed to the validation function, let's run it: # In[ ]: model_path = train_run.outputs['model'] validation_run = run_local( NewTask(handler=validation,
from mlrun import run_local, RunTemplate, NewTask, mlconf from os import path mlconf.dbpath = mlconf.dbpath or './' out = mlconf.artifact_path or path.abspath('./data') # {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run artifact_path = path.join(out, '{{run.uid}}') task = NewTask(name='demo', params={ 'p1': 5 }, artifact_path=artifact_path).with_secrets('file', 'secrets.txt').set_label( 'type', 'demo') # run our task using our new function run_object = run_local(task, command='training.py') run_object.uid() run_object.to_dict() run_object.state() run_object.show() run_object.outputs run_object.logs() run_object.artifact('dataset')
def test_run_local(): spec = tag_test(base_spec, "test_run_local") result = run_local(spec, command=f"{examples_path}/training.py", workdir=examples_path) verify_state(result)
def test_run_local(): spec = tag_test(base_spec, 'test_run_local') result = run_local(spec, command='{}/training.py'.format(examples_path), workdir=examples_path) verify_state(result)