Ejemplo n.º 1
0
def test_run_local():
    if Path(ARTIFACTS_PATH).is_dir():
        shutil.rmtree(ARTIFACTS_PATH)

    task = new_task(name="task-feature-selection",
                    handler = feature_selection,
                    params={'k': 2,
                           'min_votes': 0.3,
                           'label_column': 'is_error'},
                   inputs={'df_artifact': 'data/metrics.pq'},
                   )
    run_local(task=task,
              artifact_path=os.path.join(os.path.abspath('./'), 'artifacts'))
    _validate_paths({'feature_scores.parquet',
                     'selected_features.parquet'})
Ejemplo n.º 2
0
def test_run_local_handler():
    spec = tag_test(base_spec, "test_run_local_handler")
    spec.spec.handler = "my_func"
    result = run_local(spec,
                       command=f"{examples_path}/handler.py",
                       workdir=examples_path)
    verify_state(result)
Ejemplo n.º 3
0
    def test_basics_hyper_parameters(self):
        run_object = run_local(
            self._basics_task.with_hyper_params({"p2": [5, 2, 3]}, "min.loss"),
            command="training.py",
            workdir=str(self.assets_path),
        )
        self._logger.debug("Finished running task",
                           run_object=run_object.to_dict())

        run_uid = run_object.uid()

        assert run_uid is not None
        assert run_object.state() == "completed"

        self._verify_run_outputs(
            run_object.outputs,
            uid=run_uid,
            name="demo",
            project=self.project_name,
            output_path=self.results_path / run_uid,
            accuracy=10,
            loss=15,
            best_iteration=1,
            iteration_results=True,
        )
Ejemplo n.º 4
0
def test_serialization():
    spec = tag_test(base_spec, "test_serialization")
    result = run_local(spec)
    verify_state(result)
    pprint(result.to_dict())
    print(result.to_yaml())
    pprint(result.to_json())
Ejemplo n.º 5
0
def test_run_local_handler():
    spec = tag_test(base_spec, 'test_run_local_handler')
    spec.spec.handler = 'my_func'
    result = run_local(spec,
                       command='{}/handler.py'.format(examples_path),
                       workdir=examples_path)
    verify_state(result)
Ejemplo n.º 6
0
def test_train():
    spec = tag_test(base_spec, 'test_run_local_train')
    result = run_local(spec,
                       command='../notebooks/functions/train.py',
                       workdir='./',
                       artifact_path='./faces/artifacts')
    verify_state(result)
Ejemplo n.º 7
0
def test_describe_dask_local():
    if Path(PLOTS_PATH).is_dir():
        shutil.rmtree(PLOTS_PATH)
    task = new_task(name="task-describe",
                    handler=summarize,
                    inputs={"table": DATA_URL},
                    params={
                        'update_dataset': True,
                        'label_column': 'label',
                        'dask_function': 'db://default/dask_tests'
                    })
    run_local(task)
    _validate_paths({
        'corr.html', 'correlation-matrix.csv', 'hist.html', 'imbalance.html',
        'imbalance-weights-vec.csv', 'violin.html'
    })
Ejemplo n.º 8
0
def test_run_local_nb():
    spec = tag_test(base_spec, 'test_run_local_handler')
    spec.spec.handler = 'training'
    result = run_local(spec,
                       command='{}/mlrun_jobs.ipynb'.format(examples_path),
                       workdir=examples_path)
    verify_state(result)
Ejemplo n.º 9
0
def test_run_local_nb():
    spec = tag_test(base_spec, "test_run_local_nb")
    spec.spec.handler = "training"
    result = run_local(spec,
                       command=f"{examples_path}/mlrun_jobs.ipynb",
                       workdir=examples_path)
    verify_state(result)
Ejemplo n.º 10
0
def test_run_local_obj():
    spec = tag_test(base_spec, 'test_run_local_handler')
    spec.spec.handler = 'training'
    nbpath = '{}/mlrun_jobs.ipynb'.format(examples_path)
    ymlpath = path.join(out_path, 'nbyaml.yaml')
    print('out path:', out_path, ymlpath)
    fn = code_to_function(filename=nbpath, kind='job').export(ymlpath)
    result = run_local(spec, command=fn, workdir=out_path)
    verify_state(result)
Ejemplo n.º 11
0
    def test_inline_code(self):
        run_object = run_local(self._inline_task.with_params(p1=7))
        self._logger.debug("Finished running task",
                           run_object=run_object.to_dict())

        run_uid = run_object.uid()

        assert run_uid is not None
        assert run_object.state() == "completed"
Ejemplo n.º 12
0
def test_run_local_obj():
    spec = tag_test(base_spec, "test_run_local_obj")
    spec.spec.handler = "training"
    nbpath = f"{examples_path}/mlrun_jobs.ipynb"
    ymlpath = path.join(out_path, "nbyaml.yaml")
    print("out path:", out_path, ymlpath)
    fn = code_to_function(filename=nbpath, kind="job").export(ymlpath)
    result = run_local(spec, command=fn, workdir=out_path)
    verify_state(result)
Ejemplo n.º 13
0
    def test_inline_code_with_param_file(self):
        run_object = run_local(
            self._inline_task.with_param_file(
                str(self.assets_path / "params.csv"), "max.accuracy"))
        self._logger.debug("Finished running task",
                           run_object=run_object.to_dict())

        run_uid = run_object.uid()

        assert run_uid is not None
        assert run_object.state() == "completed"
Ejemplo n.º 14
0
def test_run_local():
    if Path(PLOTS_PATH).is_dir():
        shutil.rmtree(PLOTS_PATH)
    task = new_task(
        name="task-describe",
        handler=summarize,
        inputs={"table": DATA_URL},
        params={
            "update_dataset": True,
            "label_column": "label"
        },
    )
    run_local(task)
    _validate_paths({
        "corr.html",
        "correlation-matrix.csv",
        "hist.html",
        "imbalance.html",
        "imbalance-weights-vec.csv",
        "violin.html",
    })
Ejemplo n.º 15
0
    def test_basics(self):
        run_object = run_local(self._basics_task,
                               command="training.py",
                               workdir=str(self.assets_path))
        self._logger.debug("Finished running task",
                           run_object=run_object.to_dict())

        run_uid = run_object.uid()

        assert run_uid is not None
        self._verify_run_metadata(
            run_object.to_dict()["metadata"],
            uid=run_uid,
            name="demo",
            project=self.project_name,
            labels={
                "v3io_user": self._test_env["V3IO_USERNAME"],
                "kind": "",
                "owner": self._test_env["V3IO_USERNAME"],
                "framework": "sklearn",
            },
        )
        self._verify_run_spec(
            run_object.to_dict()["spec"],
            parameters={
                "p1": 5,
                "p2": "a-string"
            },
            inputs={"infile.txt": str(self.assets_path / "infile.txt")},
            outputs=[],
            output_path=str(self.results_path / run_uid),
            secret_sources=[],
            data_stores=[],
            scrape_metrics=False,
        )

        assert run_object.state() == "completed"

        self._verify_run_outputs(
            run_object.outputs,
            uid=run_uid,
            name="demo",
            project=self.project_name,
            output_path=self.results_path / run_uid,
            accuracy=10,
            loss=15,
        )
Ejemplo n.º 16
0
def test_run_local_with_uid_does_not_exist(monkeypatch):
    """
    Mocking a scenario that happened in field in which getuser raised the same error as the mock
    The problem was basically that the code was
    environ.get("V3IO_USERNAME", getpass.getuser())
    instead of
    environ.get("V3IO_USERNAME") or getpass.getuser()
    """
    def mock_getpwuid_raise(*args, **kwargs):
        raise KeyError("getpwuid(): uid not found: 400")

    environ["V3IO_USERNAME"] = "******"
    monkeypatch.setattr(getpass, "getuser", mock_getpwuid_raise)
    spec = tag_test(base_spec, "test_run_local")
    result = run_local(spec,
                       command=f"{examples_path}/training.py",
                       workdir=examples_path)
    verify_state(result)
Ejemplo n.º 17
0
    def custom_setup(self):
        self._logger.debug("Connecting to database")

        self._logger.debug("Creating dummy task for db queries")

        # {{run.uid}} will be substituted with the run id, so output will be written to different directories per run
        output_path = str(self.results_path / "{{run.uid}}")
        task = (new_task(name="demo",
                         params={
                             "p1": 5
                         },
                         artifact_path=output_path).with_secrets(
                             "file",
                             self.assets_path / "secrets.txt").set_label(
                                 "type", "demo"))

        self._logger.debug("Running dummy task")
        run_object = run_local(task,
                               command="training.py",
                               workdir=str(self.assets_path))
        self._logger.debug("Finished running dummy task",
                           run_object=run_object.to_dict())

        self._run_uid = run_object.uid()
Ejemplo n.º 18
0
    def test_db_commands(self):
        self._logger.debug("Creating dummy task for db queries")

        # {{run.uid}} will be substituted with the run id, so output will be written to different directories per run
        output_path = str(self.results_path / "{{run.uid}}")
        task = (new_task(name="demo",
                         params={
                             "p1": 5
                         },
                         artifact_path=output_path).with_secrets(
                             "file",
                             self.assets_path / "secrets.txt").set_label(
                                 "type", "demo"))
        runs_count_before_run = len(
            self._run_db.list_runs(project=self.project_name))
        artifacts_count_before_run = len(
            self._run_db.list_artifacts(project=self.project_name, tag="*"))

        self._logger.debug("Running dummy task")
        run_object = run_local(task,
                               command="training.py",
                               workdir=str(self.assets_path))
        self._logger.debug("Finished running dummy task",
                           run_object=run_object.to_dict())

        self._run_uid = run_object.uid()

        runs = self._run_db.list_runs(project=self.project_name)
        assert len(runs) == runs_count_before_run + 1

        self._verify_run_metadata(
            runs[0]["metadata"],
            uid=self._run_uid,
            name="demo",
            project=self.project_name,
            labels={
                "kind": "",
                "framework": "sklearn"
            },
        )
        self._verify_run_spec(
            runs[0]["spec"],
            parameters={
                "p1": 5,
                "p2": "a-string"
            },
            inputs={"infile.txt": str(self.assets_path / "infile.txt")},
            outputs=[],
            output_path=str(self.results_path / self._run_uid),
            secret_sources=[],
            data_stores=[],
        )

        artifacts = self._run_db.list_artifacts(project=self.project_name,
                                                tag="*")
        assert len(artifacts) == artifacts_count_before_run + 4
        for artifact_key in ["chart", "html_result", "model", "mydf"]:
            artifact_exists = False
            for artifact in artifacts:
                if artifact["key"] == artifact_key:
                    artifact_exists = True
                    break
            assert artifact_exists

        runtimes = self._run_db.list_runtimes()
        assert len(runtimes) == len(
            mlrun.runtimes.RuntimeKinds.runtime_with_handlers())
        for runtime_kind in mlrun.runtimes.RuntimeKinds.runtime_with_handlers(
        ):
            runtime_exists = False
            for runtime in runtimes:
                if runtime["kind"] == runtime_kind:
                    runtime_exists = True
                    break
            assert runtime_exists
Ejemplo n.º 19
0
from os import path
out = mlconf.artifact_path or path.abspath('./data')
# {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run
artifact_path = path.join(out, '{{run.uid}}')

# #### _running and linking multiple tasks_
# In this example we run two functions, ```training``` and ```validation``` and we pass the result from one to the other.
# We will see in the ```job``` example that linking works even when the tasks are run in a workflow on different processes or containers.

# ```run_local()``` will run our task on a local function:

# Run the training function. Functions can have multiple handlers/methods, here we call the ```training``` handler:

# In[ ]:

train_run = run_local(
    NewTask(handler=training, params={'p1': 5}, artifact_path=out))

# After the function runs it generates the result widget, you can click the `model` artifact to see its content.

# In[ ]:

train_run.outputs

# The output from the first training function is passed to the validation function, let's run it:

# In[ ]:

model_path = train_run.outputs['model']

validation_run = run_local(
    NewTask(handler=validation,
Ejemplo n.º 20
0
from mlrun import run_local, RunTemplate, NewTask, mlconf
from os import path
mlconf.dbpath = mlconf.dbpath or './'
out = mlconf.artifact_path or path.abspath('./data')
# {{run.uid}} will be substituted with the run id, so output will be written to different directoried per run
artifact_path = path.join(out, '{{run.uid}}')
task = NewTask(name='demo', params={
    'p1': 5
}, artifact_path=artifact_path).with_secrets('file', 'secrets.txt').set_label(
    'type', 'demo')
# run our task using our new function
run_object = run_local(task, command='training.py')
run_object.uid()
run_object.to_dict()
run_object.state()
run_object.show()
run_object.outputs
run_object.logs()
run_object.artifact('dataset')
Ejemplo n.º 21
0
def test_run_local():
    spec = tag_test(base_spec, "test_run_local")
    result = run_local(spec,
                       command=f"{examples_path}/training.py",
                       workdir=examples_path)
    verify_state(result)
Ejemplo n.º 22
0
def test_run_local():
    spec = tag_test(base_spec, 'test_run_local')
    result = run_local(spec,
                       command='{}/training.py'.format(examples_path),
                       workdir=examples_path)
    verify_state(result)