Ejemplo n.º 1
0
def test_create_project_from_file_with_legacy_structure():
    project_name = "project-name"
    description = "project description"
    params = {"param_key": "param value"}
    artifact_path = "/tmp"
    legacy_project = mlrun.projects.project.MlrunProjectLegacy(
        project_name, description, params, artifact_path=artifact_path
    )
    function_name = "trainer-function"
    function = mlrun.new_function(function_name, project_name)
    legacy_project.set_function(function, function_name)
    legacy_project.set_function("hub://describe", "describe")
    workflow_name = "workflow-name"
    workflow_file_path = (
        pathlib.Path(tests.conftest.tests_root_directory) / "projects" / "workflow.py"
    )
    legacy_project.set_workflow(workflow_name, str(workflow_file_path))
    artifact_dict = {
        "key": "raw-data",
        "kind": "",
        "iter": 0,
        "tree": "latest",
        "target_path": "https://raw.githubusercontent.com/mlrun/demos/master/customer-churn-prediction/WA_Fn-UseC_-Telc"
        "o-Customer-Churn.csv",
        "db_key": "raw-data",
    }
    legacy_project.artifacts = [artifact_dict]
    legacy_project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml"
    legacy_project.save(str(legacy_project_file_path))
    project = mlrun.load_project("./", str(legacy_project_file_path))
    assert project.kind == "project"
    assert project.metadata.name == project_name
    assert project.spec.description == description
    # assert accessible from the project as well
    assert project.description == description
    assert project.spec.artifact_path == artifact_path
    # assert accessible from the project as well
    assert project.artifact_path == artifact_path
    assert deepdiff.DeepDiff(params, project.spec.params, ignore_order=True,) == {}
    # assert accessible from the project as well
    assert deepdiff.DeepDiff(params, project.params, ignore_order=True,) == {}
    assert (
        deepdiff.DeepDiff(
            legacy_project.functions, project.functions, ignore_order=True,
        )
        == {}
    )
    assert (
        deepdiff.DeepDiff(
            legacy_project.workflows, project.workflows, ignore_order=True,
        )
        == {}
    )
    assert (
        deepdiff.DeepDiff(
            legacy_project.artifacts, project.artifacts, ignore_order=True,
        )
        == {}
    )
Ejemplo n.º 2
0
    def test_run(self):
        name = "pipe1"
        # create project in context
        self._create_project(name)

        # load project from context dir and run a workflow
        project2 = mlrun.load_project(str(self.assets_path), name=name)
        run = project2.run("main", watch=True, artifact_path=f"v3io:///projects/{name}")
        assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed"
        self._delete_test_project(name)
Ejemplo n.º 3
0
def test_sync_functions():
    project_name = "project-name"
    project = mlrun.new_project(project_name)
    project.set_function("hub://describe")
    project_function_object = project.spec._function_objects
    project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml"
    project.export(str(project_file_path))
    imported_project = mlrun.load_project(None, str(project_file_path))
    assert imported_project.spec._function_objects == {}
    imported_project.sync_functions()
    _assert_project_function_objects(imported_project, project_function_object)
Ejemplo n.º 4
0
    def test_run_cli(self):
        # load project from git
        name = "pipe4"
        project_dir = f"{projects_dir}/{name}"
        shutil.rmtree(project_dir, ignore_errors=True)

        # clone a project to local dir
        args = [
            "-n",
            name,
            "-u",
            "git://github.com/mlrun/project-demo.git",
            project_dir,
        ]
        out = exec_project(args, projects_dir)
        print(out)

        # load the project from local dir and change a workflow
        project2 = mlrun.load_project(project_dir)
        project2.spec.workflows = {}
        project2.set_workflow("kf", "./kflow.py")
        project2.save()
        print(project2.to_yaml())

        # exec the workflow
        args = [
            "-n",
            name,
            "-r",
            "kf",
            "-w",
            "-p",
            f"v3io:///projects/{name}",
            project_dir,
        ]
        out = exec_project(args, projects_dir)
        m = re.search(" Pipeline run id=(.+),", out)
        assert m, "pipeline id is not in output"

        run_id = m.group(1).strip()
        db = mlrun.get_run_db()
        pipeline = db.get_pipeline(run_id, project=name)
        state = pipeline["run"]["status"]
        assert state == mlrun.run.RunStatuses.succeeded, "pipeline failed"
        self._delete_test_project(name)
        self._delete_test_project(project2.metadata.name)
Ejemplo n.º 5
0
    def test_run_git_load(self):
        # load project from git
        name = "pipe2"
        project_dir = f"{projects_dir}/{name}"
        shutil.rmtree(project_dir, ignore_errors=True)

        project2 = mlrun.load_project(
            project_dir, "git://github.com/mlrun/project-demo.git#main", name=name
        )
        logger.info("run pipeline from git")

        # run project, load source into container at runtime
        project2.spec.load_source_on_run = True
        run = project2.run("main", artifact_path=f"v3io:///projects/{name}")
        run.wait_for_completion()
        assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed"
        self._delete_test_project(name)
Ejemplo n.º 6
0
def test_sync_functions():
    project_name = "project-name"
    project = mlrun.new_project(project_name)
    project.set_function("hub://describe", "describe")
    project_function_object = project.spec._function_objects
    project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml"
    project.export(str(project_file_path))
    imported_project = mlrun.load_project("./", str(project_file_path))
    assert imported_project.spec._function_objects == {}
    imported_project.sync_functions()
    _assert_project_function_objects(imported_project, project_function_object)

    fn = project.func("describe")
    assert fn.metadata.name == "describe", "func did not return"

    # test that functions can be fetched from the DB (w/o set_function)
    mlrun.import_function("hub://sklearn_classifier", new_name="train").save()
    fn = project.func("train")
    assert fn.metadata.name == "train", "train func did not return"
Ejemplo n.º 7
0
    def test_run_git_build(self):
        name = "pipe3"
        project_dir = f"{projects_dir}/{name}"
        shutil.rmtree(project_dir, ignore_errors=True)

        # load project from git, build the container image from source (in the workflow)
        project2 = mlrun.load_project(
            project_dir, "git://github.com/mlrun/project-demo.git#main", name=name
        )
        logger.info("run pipeline from git")
        project2.spec.load_source_on_run = False
        run = project2.run(
            "main",
            artifact_path=f"v3io:///projects/{name}",
            arguments={"build": 1},
            workflow_path=str(self.assets_path / "kflow.py"),
        )
        run.wait_for_completion()
        assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed"
        self._delete_test_project(name)
Ejemplo n.º 8
0
def project(context, name, url, run, arguments, artifact_path, namespace, db,
            init_git, clone, sync, dirty):
    """load and/or run a project"""
    if db:
        mlconf.dbpath = db

    proj = load_project(context, url, name, init_git=init_git, clone=clone)
    print('Loading project {}{} into {}:\n'.format(
        proj.name, ' from ' + url if url else '', context))
    print(proj.to_yaml())

    if run:
        workflow_path = None
        if run.endswith('.py'):
            workflow_path = run
            run = None

        args = None
        if arguments:
            try:
                args = literal_eval(arguments)
            except (SyntaxError, ValueError):
                print('arguments ({}) must be a dict object/str'.format(
                    arguments))
                exit(1)

        print('running workflow {} file: {}'.format(run, workflow_path))
        run = proj.run(run,
                       workflow_path,
                       arguments=args,
                       artifact_path=artifact_path,
                       namespace=namespace,
                       sync=sync,
                       dirty=dirty)
        print('run id: {}'.format(run))

    elif sync:
        print('saving project functions to db ..')
        proj.sync_functions(save=True)
Ejemplo n.º 9
0
 def test_load_project_from_db(self):
     project_name = "some-project"
     project = mlrun.new_project(project_name)
     project.save_to_db()
     mlrun.load_project(".", f"db://{project_name}")