def test_xgb_custom():
    fn = code_to_function(
        name="test_xgb_custom",
        filename="xgb_custom.py",
        handler="gen_outliers",
        kind="job",
    )

    outliers_run = fn.run(
        params={
            "nrows": 8192,
            "label_type": "float",
            "local_path": "./artifacts/inputs/xgb_custom",
        },
        local=True,
    )

    fn = code_to_function(
        name="test_fit_model",
        filename="xgb_custom.py",
        handler="fit",
        kind="job",
    )

    fit_run = fn.run(
        params={
            "num_boost_round": 40,
            "verbose_eval": False,
            "XGB_max_depth": 2,
            "XGB_subsample": 0.9,
            "test_set_key": "./artifacts/inputs/test-set",
        },
        inputs={"dataset": "./artifacts/inputs/xgb_custom.parquet"},
        local=True,
    )
Beispiel #2
0
    def to_function(self, default_kind=None):
        if self.url and "://" not in self.url:
            if not os.path.isfile(self.url):
                raise OSError("{} not found".format(self.url))

        kind = self.kind or default_kind
        if self.spec:
            func = mlrun.new_function(self.name, runtime=self.spec)
        elif (self.url.endswith(".yaml") or self.url.startswith("db://")
              or self.url.startswith("hub://")):
            func = mlrun.import_function(self.url)
            if self.image:
                func.spec.image = self.image
        elif self.url.endswith(".ipynb"):
            func = mlrun.code_to_function(self.name,
                                          filename=self.url,
                                          image=self.image,
                                          kind=kind)
        elif self.url.endswith(".py"):
            # todo: support code text as input (for UI)
            if not self.image:
                raise ValueError(
                    "image must be provided with py code files, "
                    "use function object for more control/settings")
            func = mlrun.code_to_function(self.name,
                                          filename=self.url,
                                          image=self.image,
                                          kind=kind)
        else:
            raise ValueError("unsupported function url {} or no spec".format(
                self.url))
        if self.requirements:
            func.with_requirements(self.requirements)
        self._function = func
        return func
Beispiel #3
0
def test_run_local_yaml():
    spec = tag_test(base_spec, 'test_run_local_handler')
    spec.spec.handler = 'training'
    nbpath = '{}/mlrun_jobs.ipynb'.format(examples_path)
    ymlpath = path.join(out_path, 'nbyaml.yaml')
    print('out path:', out_path, ymlpath)
    code_to_function(filename=nbpath, kind='job').export(ymlpath)
    result = run_local(spec, command=ymlpath, workdir=out_path)
    verify_state(result)
Beispiel #4
0
def test_run_local_yaml():
    spec = tag_test(base_spec, "test_run_local_yaml")
    spec.spec.handler = "training"
    nbpath = f"{examples_path}/mlrun_jobs.ipynb"
    ymlpath = path.join(out_path, "nbyaml.yaml")
    print("out path:", out_path, ymlpath)
    code_to_function(filename=nbpath, kind="job").export(ymlpath)
    result = run_local(spec, command=ymlpath, workdir=out_path)
    verify_state(result)
Beispiel #5
0
    def to_function(self, default_kind=None):
        """generate a function object from the ref definitions"""
        if self.url and "://" not in self.url:
            if not os.path.isfile(self.url):
                raise OSError(f"{self.url} not found")

        kind = self.kind or default_kind
        if self.url:
            if (
                self.url.endswith(".yaml")
                or self.url.startswith("db://")
                or self.url.startswith("hub://")
            ):
                func = mlrun.import_function(self.url)
                if self.image:
                    func.spec.image = self.image
            elif self.url.endswith(".ipynb"):
                func = mlrun.code_to_function(
                    self.name, filename=self.url, image=self.image, kind=kind
                )
            elif self.url.endswith(".py"):
                # todo: support code text as input (for UI)
                if not self.image:
                    raise ValueError(
                        "image must be provided with py code files, "
                        "use function object for more control/settings"
                    )
                func = mlrun.code_to_function(
                    self.name, filename=self.url, image=self.image, kind=kind
                )
            else:
                raise ValueError(f"unsupported function url {self.url} or no spec")
            if self.spec:
                func = enrich_function_from_dict(func, self.spec)
        elif self.code is not None:
            code = self.code
            if kind == mlrun.runtimes.RuntimeKinds.serving:
                code = code + mlrun_footer.format(
                    mlrun.runtimes.serving.serving_subkind
                )
            func = mlrun.new_function(self.name, kind=kind, image=self.image)
            data = b64encode(code.encode("utf-8")).decode("utf-8")
            func.spec.build.functionSourceCode = data
            if kind not in mlrun.runtimes.RuntimeKinds.nuclio_runtimes():
                func.spec.default_handler = "handler"
            if self.spec:
                func = enrich_function_from_dict(func, self.spec)
        elif self.spec:
            func = mlrun.new_function(self.name, runtime=self.spec)
        else:
            raise ValueError("url or spec or code must be specified")

        if self.requirements:
            func.with_requirements(self.requirements)
        self._function = func
        return func
Beispiel #6
0
    def create_demo_project(self) -> mlrun.projects.MlrunProject:
        self._logger.debug("Creating sklearn project")
        demo_project = mlrun.new_project(
            self.project_name, str(self.assets_path), init_git=True
        )

        self._logger.debug("Creating iris-generator function")
        function_path = str(self.assets_path / "iris_generator_function.py")
        iris_generator_function = mlrun.code_to_function(
            name="gen-iris", kind="job", filename=function_path, image="mlrun/mlrun",
        )

        iris_generator_function.spec.remote = True
        iris_generator_function.spec.replicas = 1
        iris_generator_function.spec.service_type = "NodePort"
        iris_generator_function.spec.build.commands.append(
            "pip install pandas sklearn pyarrow"
        )

        self._logger.debug("Setting project functions")
        demo_project.set_function(iris_generator_function)
        demo_project.set_function("hub://describe", "describe")
        demo_project.set_function("hub://sklearn_classifier", "train")
        demo_project.set_function("hub://test_classifier", "test")
        demo_project.set_function("hub://model_server", "serving")
        demo_project.set_function("hub://model_server_tester", "live_tester")

        self._logger.debug("Setting project workflow")
        demo_project.set_workflow(
            "main", str(self.assets_path / "workflow.py"), embed=True
        )

        return demo_project
Beispiel #7
0
def sklearn_classifier(run):
    cwd = os.getcwd()
    file_path = str(Path(
        cwd).parent.absolute()) + "/sklearn_classifier/sklearn_classifier.py"
    fn = code_to_function(
        name='test_sklearn_classifier',
        filename=file_path,
        handler="train_model",
        kind="local",
    )
    fn.spec.command = file_path
    fn.run(
        params={
            "sample": -5_000,  # 5k random rows,
            "model_pkg_class": "sklearn.ensemble.RandomForestClassifier",
            "label_column": "interest_level",
            "CLASS_n_estimators": 100,
            "CLASS_min_samples_leaf": 1,
            "CLASS_n_jobs": -1,
            "CLASS_oob_score": True
        },
        handler="train_model",
        inputs={"dataset": run.outputs["rent"]},
        artifact_path='artifacts'
        # , local=True
    )
Beispiel #8
0
    def create_demo_project(self) -> mlrun.projects.MlrunProject:
        self._logger.debug("Creating horovod project")
        demo_project = mlrun.new_project(self.project_name,
                                         str(self.assets_path),
                                         init_git=True)

        mlrun.mount_v3io()

        self._logger.debug("Uploading training file")
        trainer_src_path = str(self.assets_path / "horovod_training.py")
        trainer_dest_path = pathlib.Path("/assets/horovod_training.py")
        stores = mlrun.datastore.store_manager.set()
        datastore, subpath = stores.get_or_create_store(
            self._get_v3io_user_store_path(trainer_dest_path))
        datastore.upload(subpath, trainer_src_path)

        self._logger.debug("Creating iris-generator function")
        function_path = str(self.assets_path / "utils_functions.py")
        utils = mlrun.code_to_function(
            name="utils",
            kind="job",
            filename=function_path,
            image="mlrun/mlrun",
        )

        utils.spec.remote = True
        utils.spec.replicas = 1
        utils.spec.service_type = "NodePort"
        utils.spec.command = function_path

        self._logger.debug("Setting project functions")
        demo_project.set_function(utils)

        trainer = mlrun.new_function(
            name="trainer",
            kind="mpijob",
            command=self._get_v3io_user_store_path(trainer_dest_path,
                                                   remote=False),
            image="mlrun/ml-models",
        )
        trainer.spec.remote = True
        trainer.spec.replicas = 4
        trainer.spec.service_type = "NodePort"

        demo_project.set_function(trainer)
        demo_project.set_function("hub://tf2_serving", "serving")

        demo_project.log_artifact(
            "images",
            target_path=
            "http://iguazio-sample-data.s3.amazonaws.com/catsndogs.zip",
            artifact_path=mlrun.mlconf.artifact_path,
        )

        self._logger.debug("Setting project workflow")
        demo_project.set_workflow("main",
                                  str(self.assets_path / "workflow.py"),
                                  embed=True)

        return demo_project
Beispiel #9
0
def test_run_local_aggregate():
    fn = code_to_function(
        name='test_aggregate',
        filename="aggregate.py",
        handler="aggregate",
        kind="local",
    )
    fn.spec.command = 'aggregate.py'
    fn.run(
        params={
            'metrics': ['cpu_utilization'],
            'labels': ['is_error'],
            'metric_aggs': ['mean', 'sum'],
            'label_aggs': ['max'],
            'suffix': 'daily',
            'inplace': False,
            'window': 5,
            'center': True,
            'save_to': AGGREGATE_PATH,
            'files_to_select': 2
        }
        #, local=True
        ,
        inputs={'df_artifact': DATA})
    assert Path(AGGREGATE_PATH).is_file()
Beispiel #10
0
def test_nuclio_nb_serving():
    fn = code_to_function(
        filename=
        'https://raw.githubusercontent.com/mlrun/mlrun/master/examples/xgb_serving.ipynb'
    )
    assert fn.kind == 'remote', 'kind not set, test failed'
    assert fn.spec.function_kind == 'serving', 'code not embedded'
Beispiel #11
0
def composer(apiversion='v1alpha1', name='', project='default'):
    """Run a task on function/code (.py, .ipynb or .yaml) locally,
    e.g.:
       # define template
       task = get_template(name='myfunction', project='myproject')

    :param name:     function name
    :param project:  function project (none for 'default')
    :return: run mlrun function object
    """

    import iguazioig
    _module_path = os.path.dirname(iguazioig.__file__)

    try:
        template_file = "%s/templates/processing_template_%s.py" % (
            _module_path, apiversion)
    except:
        template_file = "%s/templates/processing_template_%s.ipynb" % (
            _module_path, apiversion)

    return code_to_function(name,
                            project=project,
                            filename=template_file,
                            kind='nuclio')
def main():
    # Load environment variables
    load_dotenv()

    # Setup project
    project_name, artifact_path = set_environment(
        project="remote-model-deployment",
        artifact_path=os.getenv("MLRUN_ARTIFACT_PATH"),
        api_path=os.getenv("MLRUN_DBPATH"),
        access_key=os.getenv("V3IO_ACCESS_KEY"),
    )
    print(f"Creating project '{project_name}'")

    # Push assets to V3IO
    v3io_client = v3io.dataplane.Client()
    push_to_v3io(v3io_client, "assets/model.pkl", "nick/tmp/model.pkl")

    # Create MLRun function
    serving_fn = code_to_function(
        name="serving",
        kind="serving",
        image="mlrun/mlrun",
        filename="assets/model_server.py",
    ).apply(mount_v3io())
    print(f"Creating function '{serving_fn.metadata.name}'")

    # Configure MLRun function
    serving_fn.spec.default_class = "ClassifierModel"
    serving_fn.add_model("my_model", model_path="/User/tmp/model.pkl")

    # Deploy
    addr = serving_fn.deploy()

    # Test model inference
    test_inference(addr)
Beispiel #13
0
def _create_mlrun_function_and_matching_scheduled_object(
        db: Session, project: str, handler: str = "do_nothing"):
    function_name = "my-function"
    code_path = pathlib.Path(__file__).absolute().parent / "function.py"
    function = mlrun.code_to_function(name=function_name,
                                      kind="local",
                                      filename=str(code_path))
    function.spec.command = f"{str(code_path)}"
    hash_key = get_db().store_function(db,
                                       function.to_dict(),
                                       function_name,
                                       project,
                                       versioned=True)
    scheduled_object = {
        "task": {
            "spec": {
                "function": f"{project}/{function_name}@{hash_key}",
                "handler": handler,
            },
            "metadata": {
                "name": "my-task",
                "project": f"{project}"
            },
        }
    }
    return scheduled_object
Beispiel #14
0
def test_nuclio_nb_serving():
    fn = code_to_function(
        filename=
        "https://raw.githubusercontent.com/mlrun/mlrun/master/examples/xgb_serving.ipynb"
    )
    assert fn.kind == "remote", "kind not set, test failed"
    assert fn.spec.function_kind == "serving", "code not embedded"
Beispiel #15
0
def test_job_file_codeout():
    name = '{}/mlrun_jobs.ipynb'.format(examples_path)
    out = '{}/ctf_tst.py'.format(results)
    fn = code_to_function(filename=name, kind='job', code_output=out)
    assert fn.kind == 'job', 'kind not set, test failed'
    assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode
    assert fn.spec.command == out, 'filename not set to out in command'
    assert path.isfile(out), 'output not generated'
Beispiel #16
0
def test_local_file_noembed():
    name = f"{examples_path}/training.py"
    fn = code_to_function(filename=name, kind="local", embed_code=False)
    assert fn.kind == "local", "kind not set, test failed"
    assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode
    assert fn.spec.command == name, "filename not set in command"

    fn.run(workdir=str(examples_path))
Beispiel #17
0
def test_job_file_codeout():
    name = f"{examples_path}/mlrun_jobs.ipynb"
    out = f"{results}/ctf_tst.py"
    fn = code_to_function(filename=name, kind="job", code_output=out, embed_code=False)
    assert fn.kind == "job", "kind not set, test failed"
    assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode
    assert fn.spec.command == out, "filename not set to out in command"
    assert path.isfile(out), "output not generated"
Beispiel #18
0
def get_model_monitoring_stream_processing_function(project: str):
    return code_to_function(
        name="model-monitoring-stream",
        project=project,
        filename=str(STREAM_PROCESSING_FUNCTION_PATH),
        kind="nuclio",
        image="mlrun/mlrun",
    )
Beispiel #19
0
def test_local_file_noembed():
    name = '{}/training.py'.format(examples_path)
    fn = code_to_function(filename=name, kind='local', embed_code=False)
    assert fn.kind == 'local', 'kind not set, test failed'
    assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode
    assert fn.spec.command == name, 'filename not set in command'

    fn.run(workdir=str(examples_path))
Beispiel #20
0
def test_run_local_from_func():
    spec = tag_test(base_spec, "test_run_local_from_func")
    spec.spec.handler = "training"
    nb_path = f"{examples_path}/mlrun_jobs.ipynb"
    nbyml_path = path.join(out_path, "nbyaml.yaml")
    print("out path:", out_path, nbyml_path)
    fn = code_to_function(filename=nb_path, kind="job").export(nbyml_path)
    result = fn.run(spec, workdir=out_path, local=True)
    verify_state(result)
Beispiel #21
0
def test_vault_end_to_end():
    # This requires an MLRun API server to run and work with Vault. This port should
    # be configured to allow access to the server.
    api_server_port = 57764

    _set_vault_mlrun_configuration(api_server_port)
    project_name = "abc"
    func_name = "vault-function"
    aws_key_value = "1234567890"
    github_key_value = "proj1Key!!!"

    project = new_project(project_name)
    # This call will initialize Vault infrastructure and add the given secrets
    # It executes on the API server
    project.create_vault_secrets({
        "aws_key": aws_key_value,
        "github_key": github_key_value
    })

    # This API executes on the client side
    project_secrets = project.get_vault_secret_keys()
    assert project_secrets == ["aws_key", "github_key"], "secrets not created"

    # Create function and set container configuration
    function = code_to_function(
        name=func_name,
        filename="{}/vault_function.py".format(examples_path),
        handler="vault_func",
        project=project_name,
        kind="job",
    )

    function.spec.image = "saarcoiguazio/mlrun:unstable"

    # Create context for the execution
    spec = new_task(
        project=project_name,
        name="vault_test_run",
        handler="vault_func",
        out_path=out_path,
        params={"secrets": ["password", "path", "github_key", "aws_key"]},
    )
    spec.with_secrets("vault", [])

    result = function.run(spec)
    verify_state(result)

    db = get_run_db().connect()
    state, log = db.get_log(result.metadata.uid, project=project_name)
    log = str(log)
    print(state)

    assert (log.find("value: {}".format(aws_key_value)) !=
            -1), "secret value not detected in function output"
    assert (log.find("value: {}".format(github_key_value)) !=
            -1), "secret value not detected in function output"
Beispiel #22
0
def test_main_local_flag():
    fn = mlrun.code_to_function(filename=f"{examples_path}/handler.py",
                                kind="job",
                                handler="my_func")
    yaml_path = f"{out_path}/myfunc.yaml"
    fn.export(yaml_path)
    args = f"-f {yaml_path} --local"
    out = exec_run("", args.split(), "test_main_local_flag")
    print(out)
    assert out.find("state: completed") != -1, out
Beispiel #23
0
    def custom_setup(self):
        self._logger.debug("Creating dask function")
        self.dask_function = code_to_function(
            "mydask", kind="dask", filename=str(self.assets_path / "dask_function.py"),
        ).apply(mount_v3io())

        self.dask_function.spec.image = "mlrun/ml-models"
        self.dask_function.spec.remote = True
        self.dask_function.spec.replicas = 1
        self.dask_function.spec.service_type = "NodePort"
        self.dask_function.spec.command = str(self.assets_path / "dask_function.py")
Beispiel #24
0
 def _generate_runtime(self, kind="nuclio"):
     runtime = code_to_function(
         name=self.name,
         project=self.project,
         filename=self.code_filename,
         handler=self.code_handler,
         kind=kind,
         image=self.image_name,
         description="test function",
     )
     return runtime
Beispiel #25
0
def test_local_file_codeout():
    name = '{}/mlrun_jobs.ipynb'.format(examples_path)
    out = '{}/ctf_tst.py'.format(results)
    fn = code_to_function(filename=name, kind='local', code_output=out,
                          embed_code=False)
    assert fn.kind == 'local', 'kind not set, test failed'
    assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode
    assert fn.spec.command == out, 'filename not set to out in command'
    assert path.isfile(out), 'output not generated'

    fn.run(handler='training', params={'p1': 5})
def test_run_sql_to_file():
    fn = code_to_function(
        name='test_sql_to_file',
        filename="sql_to_file.py",
        handler="sql_to_file",
        kind="job",
    )
    fn.run(params={
        'sql_query': mysql_query,
        'database_url': mysql_url
    },
           local=True)
Beispiel #27
0
 def _generate_runtime(self, disable_auto_mount=False):
     runtime = code_to_function(
         name=self.name,
         project=self.project,
         filename=self.code_filename,
         handler=self.code_handler,
         kind="nuclio",
         image=self.image_name,
         description="test function",
     )
     runtime.spec.disable_auto_mount = disable_auto_mount
     return runtime
Beispiel #28
0
    def test_deploy_function_without_project(self):
        code_path = str(self.assets_path / "nuclio_function.py")

        self._logger.debug("Creating nuclio function")
        function = mlrun.code_to_function(
            name="simple-function",
            kind="nuclio",
            project=self.project_name,
            filename=code_path,
        )

        self._logger.debug("Deploying nuclio function")
        function.deploy()
Beispiel #29
0
 def test_run_alone(self):
     mlrun.projects.pipeline_context.clear(with_project=True)
     function = mlrun.code_to_function(
         "test1",
         filename=str(f'{self.assets_path / "localpipe.py"}'),
         handler="func1",
         kind="job",
     )
     run_result = mlrun.run_function(function, params={"p1": 5}, local=True)
     print(run_result.to_yaml())
     assert run_result.state() == "completed", "run didnt complete"
     # expect y = param1 * 2 = 10
     assert run_result.output("accuracy") == 10, "unexpected run result"
Beispiel #30
0
def test_local_file_codeout():
    name = "{}/mlrun_jobs.ipynb".format(examples_path)
    out = "{}/ctf_tst.py".format(results)
    fn = code_to_function(filename=name,
                          kind="local",
                          code_output=out,
                          embed_code=False)
    assert fn.kind == "local", "kind not set, test failed"
    assert not fn.spec.build.functionSourceCode, fn.spec.build.functionSourceCode
    assert fn.spec.command == out, "filename not set to out in command"
    assert path.isfile(out), "output not generated"

    fn.run(handler="training", params={"p1": 5})