def test_create_project_from_file_with_legacy_structure(): project_name = "project-name" description = "project description" params = {"param_key": "param value"} artifact_path = "/tmp" legacy_project = mlrun.projects.project.MlrunProjectLegacy( project_name, description, params, artifact_path=artifact_path ) function_name = "trainer-function" function = mlrun.new_function(function_name, project_name) legacy_project.set_function(function, function_name) legacy_project.set_function("hub://describe", "describe") workflow_name = "workflow-name" workflow_file_path = ( pathlib.Path(tests.conftest.tests_root_directory) / "projects" / "workflow.py" ) legacy_project.set_workflow(workflow_name, str(workflow_file_path)) artifact_dict = { "key": "raw-data", "kind": "", "iter": 0, "tree": "latest", "target_path": "https://raw.githubusercontent.com/mlrun/demos/master/customer-churn-prediction/WA_Fn-UseC_-Telc" "o-Customer-Churn.csv", "db_key": "raw-data", } legacy_project.artifacts = [artifact_dict] legacy_project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml" legacy_project.save(str(legacy_project_file_path)) project = mlrun.load_project("./", str(legacy_project_file_path)) assert project.kind == "project" assert project.metadata.name == project_name assert project.spec.description == description # assert accessible from the project as well assert project.description == description assert project.spec.artifact_path == artifact_path # assert accessible from the project as well assert project.artifact_path == artifact_path assert deepdiff.DeepDiff(params, project.spec.params, ignore_order=True,) == {} # assert accessible from the project as well assert deepdiff.DeepDiff(params, project.params, ignore_order=True,) == {} assert ( deepdiff.DeepDiff( legacy_project.functions, project.functions, ignore_order=True, ) == {} ) assert ( deepdiff.DeepDiff( legacy_project.workflows, project.workflows, ignore_order=True, ) == {} ) assert ( deepdiff.DeepDiff( legacy_project.artifacts, project.artifacts, ignore_order=True, ) == {} )
def test_run(self): name = "pipe1" # create project in context self._create_project(name) # load project from context dir and run a workflow project2 = mlrun.load_project(str(self.assets_path), name=name) run = project2.run("main", watch=True, artifact_path=f"v3io:///projects/{name}") assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed" self._delete_test_project(name)
def test_sync_functions(): project_name = "project-name" project = mlrun.new_project(project_name) project.set_function("hub://describe") project_function_object = project.spec._function_objects project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml" project.export(str(project_file_path)) imported_project = mlrun.load_project(None, str(project_file_path)) assert imported_project.spec._function_objects == {} imported_project.sync_functions() _assert_project_function_objects(imported_project, project_function_object)
def test_run_cli(self): # load project from git name = "pipe4" project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) # clone a project to local dir args = [ "-n", name, "-u", "git://github.com/mlrun/project-demo.git", project_dir, ] out = exec_project(args, projects_dir) print(out) # load the project from local dir and change a workflow project2 = mlrun.load_project(project_dir) project2.spec.workflows = {} project2.set_workflow("kf", "./kflow.py") project2.save() print(project2.to_yaml()) # exec the workflow args = [ "-n", name, "-r", "kf", "-w", "-p", f"v3io:///projects/{name}", project_dir, ] out = exec_project(args, projects_dir) m = re.search(" Pipeline run id=(.+),", out) assert m, "pipeline id is not in output" run_id = m.group(1).strip() db = mlrun.get_run_db() pipeline = db.get_pipeline(run_id, project=name) state = pipeline["run"]["status"] assert state == mlrun.run.RunStatuses.succeeded, "pipeline failed" self._delete_test_project(name) self._delete_test_project(project2.metadata.name)
def test_run_git_load(self): # load project from git name = "pipe2" project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) project2 = mlrun.load_project( project_dir, "git://github.com/mlrun/project-demo.git#main", name=name ) logger.info("run pipeline from git") # run project, load source into container at runtime project2.spec.load_source_on_run = True run = project2.run("main", artifact_path=f"v3io:///projects/{name}") run.wait_for_completion() assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed" self._delete_test_project(name)
def test_sync_functions(): project_name = "project-name" project = mlrun.new_project(project_name) project.set_function("hub://describe", "describe") project_function_object = project.spec._function_objects project_file_path = pathlib.Path(tests.conftest.results) / "project.yaml" project.export(str(project_file_path)) imported_project = mlrun.load_project("./", str(project_file_path)) assert imported_project.spec._function_objects == {} imported_project.sync_functions() _assert_project_function_objects(imported_project, project_function_object) fn = project.func("describe") assert fn.metadata.name == "describe", "func did not return" # test that functions can be fetched from the DB (w/o set_function) mlrun.import_function("hub://sklearn_classifier", new_name="train").save() fn = project.func("train") assert fn.metadata.name == "train", "train func did not return"
def test_run_git_build(self): name = "pipe3" project_dir = f"{projects_dir}/{name}" shutil.rmtree(project_dir, ignore_errors=True) # load project from git, build the container image from source (in the workflow) project2 = mlrun.load_project( project_dir, "git://github.com/mlrun/project-demo.git#main", name=name ) logger.info("run pipeline from git") project2.spec.load_source_on_run = False run = project2.run( "main", artifact_path=f"v3io:///projects/{name}", arguments={"build": 1}, workflow_path=str(self.assets_path / "kflow.py"), ) run.wait_for_completion() assert run.state == mlrun.run.RunStatuses.succeeded, "pipeline failed" self._delete_test_project(name)
def project(context, name, url, run, arguments, artifact_path, namespace, db, init_git, clone, sync, dirty): """load and/or run a project""" if db: mlconf.dbpath = db proj = load_project(context, url, name, init_git=init_git, clone=clone) print('Loading project {}{} into {}:\n'.format( proj.name, ' from ' + url if url else '', context)) print(proj.to_yaml()) if run: workflow_path = None if run.endswith('.py'): workflow_path = run run = None args = None if arguments: try: args = literal_eval(arguments) except (SyntaxError, ValueError): print('arguments ({}) must be a dict object/str'.format( arguments)) exit(1) print('running workflow {} file: {}'.format(run, workflow_path)) run = proj.run(run, workflow_path, arguments=args, artifact_path=artifact_path, namespace=namespace, sync=sync, dirty=dirty) print('run id: {}'.format(run)) elif sync: print('saving project functions to db ..') proj.sync_functions(save=True)
def test_load_project_from_db(self): project_name = "some-project" project = mlrun.new_project(project_name) project.save_to_db() mlrun.load_project(".", f"db://{project_name}")