def test_pull_config_missing(self, fake_kedro_cli, dummy_project, env, alias): """ Test for pulling a valid wheel file locally, but `config` directory is missing from the wheel file. """ # pylint: disable=too-many-locals self.call_pipeline_create(fake_kedro_cli) config_path = dummy_project / "conf" / "base" / "pipelines" / PIPELINE_NAME shutil.rmtree(config_path) assert not config_path.exists() self.call_pipeline_package(fake_kedro_cli) self.call_pipeline_delete(fake_kedro_cli) source_path = dummy_project / "src" / PACKAGE_NAME / "pipelines" / PIPELINE_NAME test_path = dummy_project / "src" / "tests" / "pipelines" / PIPELINE_NAME # Make sure the files actually deleted before pulling from the wheel file. assert not source_path.exists() assert not test_path.exists() wheel_file = ( dummy_project / "src" / "dist" / _get_wheel_name(name=PIPELINE_NAME, version="0.1") ) assert wheel_file.is_file() options = ["-e", env] if env else [] options += ["--alias", alias] if alias else [] result = CliRunner().invoke( fake_kedro_cli.cli, ["pipeline", "pull", str(wheel_file), *options] ) assert result.exit_code == 0 pipeline_name = alias or PIPELINE_NAME source_dest = dummy_project / "src" / PACKAGE_NAME / "pipelines" / pipeline_name config_env = env or "base" config_dest = dummy_project / "conf" / config_env / "pipelines" / pipeline_name test_dest = dummy_project / "src" / "tests" / "pipelines" / pipeline_name self.assert_package_files_exist(source_dest) assert not config_dest.exists() assert {f.name for f in test_dest.iterdir()} == { "__init__.py", "test_pipeline.py", }
def test_package_pipeline_with_deep_nested_parameters( self, fake_repo_path, fake_project_cli, fake_metadata): CliRunner().invoke(fake_project_cli, ["pipeline", "create", "retail"], obj=fake_metadata) deep_nested_param_path = Path(fake_repo_path / "conf" / "base" / "parameters" / "deep" / "retail") deep_nested_param_path.mkdir(parents=True, exist_ok=True) (deep_nested_param_path / "params1.yml").touch() deep_nested_param_path2 = Path(fake_repo_path / "conf" / "base" / "parameters" / "retail" / "deep") deep_nested_param_path2.mkdir(parents=True, exist_ok=True) (deep_nested_param_path2 / "params1.yml").touch() deep_nested_param_path3 = Path(fake_repo_path / "conf" / "base" / "parameters" / "deep") deep_nested_param_path3.mkdir(parents=True, exist_ok=True) (deep_nested_param_path3 / "retail.yml").touch() super_deep_nested_param_path = Path(fake_repo_path / "conf" / "base" / "parameters" / "a" / "b" / "c" / "d" / "retail") super_deep_nested_param_path.mkdir(parents=True, exist_ok=True) (super_deep_nested_param_path / "params3.yml").touch() result = CliRunner().invoke(fake_project_cli, ["pipeline", "package", "retail"], obj=fake_metadata) assert result.exit_code == 0 assert "Pipeline `retail` packaged!" in result.output wheel_location = fake_repo_path / "src" / "dist" assert f"Location: {wheel_location}" in result.output wheel_name = _get_wheel_name(name="retail", version="0.1") wheel_file = wheel_location / wheel_name assert wheel_file.is_file() assert len(list(wheel_location.iterdir())) == 1 # pylint: disable=consider-using-with wheel_contents = set(ZipFile(str(wheel_file)).namelist()) assert "retail/config/parameters/deep/retail/params1.yml" in wheel_contents assert "retail/config/parameters/retail/deep/params1.yml" in wheel_contents assert "retail/config/parameters/retail.yml" in wheel_contents assert "retail/config/parameters/deep/retail.yml" in wheel_contents assert "retail/config/parameters/a/b/c/d/retail/params3.yml" in wheel_contents
def test_pipeline_pull_all( # pylint: disable=too-many-locals self, fake_repo_path, fake_project_cli, fake_metadata, mocker): # pylint: disable=import-outside-toplevel from kedro.framework.cli import pipeline spy = mocker.spy(pipeline, "_pull_package") pyproject_toml = fake_repo_path / "pyproject.toml" wheel_file = (lambda name: fake_repo_path / "src" / "dist" / _get_wheel_name(name=name, version="0.1")) project_toml_str = textwrap.dedent(f""" [tool.kedro.pipeline.pull] "{wheel_file("first")}" = {{alias = "dp"}} "{wheel_file("second")}" = {{alias = "ds", env = "local"}} "{wheel_file("third")}" = {{}} """) with pyproject_toml.open(mode="a") as file: file.write(project_toml_str) for name in ("first", "second", "third"): call_pipeline_create(fake_project_cli, fake_metadata, pipeline_name=name) call_pipeline_package(fake_project_cli, fake_metadata, pipeline_name=name) call_pipeline_delete(fake_project_cli, fake_metadata, pipeline_name=name) result = CliRunner().invoke(fake_project_cli, ["pipeline", "pull", "--all"], obj=fake_metadata) assert result.exit_code == 0 assert "Pipelines pulled and unpacked!" in result.output assert spy.call_count == 3 build_config = toml.loads(project_toml_str) pull_manifest = build_config["tool"]["kedro"]["pipeline"]["pull"] for wheel_file, pull_specs in pull_manifest.items(): expected_call = mocker.call(wheel_file, fake_metadata, **pull_specs) assert expected_call in spy.call_args_list
def assert_wheel_contents_correct( self, wheel_location, package_name=PIPELINE_NAME, version="0.1" ): wheel_name = _get_wheel_name(name=package_name, version=version) wheel_file = wheel_location / wheel_name assert wheel_file.is_file() assert len(list((wheel_location).iterdir())) == 1 wheel_contents = set(ZipFile(str(wheel_file)).namelist()) expected_files = { f"{package_name}/__init__.py", f"{package_name}/README.md", f"{package_name}/nodes.py", f"{package_name}/pipeline.py", f"{package_name}/config/parameters/{package_name}.yml", "tests/__init__.py", "tests/test_pipeline.py", } assert expected_files <= wheel_contents
def test_pipeline_alias_refactors_imports( self, fake_project_cli, fake_package_path, fake_repo_path, fake_metadata ): call_pipeline_create(fake_project_cli, fake_metadata) pipeline_file = fake_package_path / "pipelines" / PIPELINE_NAME / "pipeline.py" import_stmt = ( f"import {fake_metadata.package_name}.pipelines.{PIPELINE_NAME}.nodes" ) with pipeline_file.open("a") as f: f.write(import_stmt) package_alias = "alpha" pull_alias = "beta" call_pipeline_package( cli=fake_project_cli, metadata=fake_metadata, alias=package_alias ) wheel_file = ( fake_repo_path / "src" / "dist" / _get_wheel_name(name=package_alias, version="0.1") ) CliRunner().invoke( fake_project_cli, ["pipeline", "pull", str(wheel_file)], obj=fake_metadata ) CliRunner().invoke( fake_project_cli, ["pipeline", "pull", str(wheel_file), "--alias", pull_alias], obj=fake_metadata, ) for alias in (package_alias, pull_alias): path = fake_package_path / "pipelines" / alias / "pipeline.py" file_content = path.read_text() expected_stmt = ( f"import {fake_metadata.package_name}.pipelines.{alias}.nodes" ) assert expected_stmt in file_content
def test_pull_whl_fs_args(self, fake_project_cli, fake_repo_path, mocker, tmp_path, fake_metadata): """Test for pulling a wheel file with custom fs_args specified.""" call_pipeline_create(fake_project_cli, fake_metadata) call_pipeline_package(fake_project_cli, fake_metadata) call_pipeline_delete(fake_project_cli, fake_metadata) fs_args_config = tmp_path / "fs_args_config.yml" with fs_args_config.open(mode="w") as f: yaml.dump({"fs_arg_1": 1, "fs_arg_2": {"fs_arg_2_nested_1": 2}}, f) mocked_filesystem = mocker.patch("fsspec.filesystem") wheel_file = (fake_repo_path / "src" / "dist" / _get_wheel_name(name=PIPELINE_NAME, version="0.1")) options = ["--fs-args", str(fs_args_config)] CliRunner().invoke( fake_project_cli, ["pipeline", "pull", str(wheel_file), *options]) mocked_filesystem.assert_called_once_with( "file", fs_arg_1=1, fs_arg_2=dict(fs_arg_2_nested_1=2))
def test_pull_two_dist_info(self, fake_kedro_cli, dummy_project, mocker, tmp_path): """ Test for pulling a wheel file with more than one dist-info directory. """ self.call_pipeline_create(fake_kedro_cli) self.call_pipeline_package(fake_kedro_cli) wheel_file = (dummy_project / "src" / "dist" / _get_wheel_name(name=PIPELINE_NAME, version="0.1")) assert wheel_file.is_file() (tmp_path / "dummy.dist-info").mkdir() mocker.patch( "kedro.framework.cli.pipeline.tempfile.TemporaryDirectory", return_value=tmp_path, ) result = CliRunner().invoke( fake_kedro_cli.cli, ["pipeline", "pull", str(wheel_file)]) assert result.exit_code assert "Error: More than 1 or no dist-info files found" in result.output
def test_package_pipeline_no_config( self, fake_repo_path, fake_project_cli, fake_metadata ): result = CliRunner().invoke( fake_project_cli.cli, ["pipeline", "create", PIPELINE_NAME, "--skip-config"], obj=fake_metadata, ) assert result.exit_code == 0 result = CliRunner().invoke( fake_project_cli.cli, ["pipeline", "package", PIPELINE_NAME], obj=fake_metadata, ) assert result.exit_code == 0 assert f"Pipeline `{PIPELINE_NAME}` packaged!" in result.output wheel_location = fake_repo_path / "src" / "dist" assert f"Location: {wheel_location}" in result.output # the wheel contents are slightly different (config shouldn't be included), # which is why we can't call self.assert_wheel_contents_correct here wheel_file = wheel_location / _get_wheel_name(name=PIPELINE_NAME, version="0.1") assert wheel_file.is_file() assert len(list((fake_repo_path / "src" / "dist").iterdir())) == 1 wheel_contents = set(ZipFile(str(wheel_file)).namelist()) expected_files = { f"{PIPELINE_NAME}/__init__.py", f"{PIPELINE_NAME}/README.md", f"{PIPELINE_NAME}/nodes.py", f"{PIPELINE_NAME}/pipeline.py", "tests/__init__.py", "tests/test_pipeline.py", } assert expected_files <= wheel_contents assert f"{PIPELINE_NAME}/config/parameters.yml" not in wheel_contents
def test_pull_local_whl_compare(self, fake_kedro_cli, dummy_project, env, alias): """ Test for pulling a valid wheel file locally, unpack it into another location and check that unpacked files are identical to the ones in the original modular pipeline. """ # pylint: disable=too-many-locals pipeline_name = "another_pipeline" self.call_pipeline_create(fake_kedro_cli) self.call_pipeline_package(fake_kedro_cli, pipeline_name) source_path = dummy_project / "src" / PACKAGE_NAME / "pipelines" / PIPELINE_NAME config_path = dummy_project / "conf" / "base" / "pipelines" / PIPELINE_NAME test_path = dummy_project / "src" / "tests" / "pipelines" / PIPELINE_NAME wheel_file = ( dummy_project / "src" / "dist" / _get_wheel_name(name=pipeline_name, version="0.1") ) assert wheel_file.is_file() options = ["-e", env] if env else [] options += ["--alias", alias] if alias else [] result = CliRunner().invoke( fake_kedro_cli.cli, ["pipeline", "pull", str(wheel_file), *options] ) assert result.exit_code == 0 pipeline_name = alias or pipeline_name source_dest = dummy_project / "src" / PACKAGE_NAME / "pipelines" / pipeline_name config_env = env or "base" config_dest = dummy_project / "conf" / config_env / "pipelines" / pipeline_name test_dest = dummy_project / "src" / "tests" / "pipelines" / pipeline_name assert not filecmp.dircmp(source_path, source_dest).diff_files assert not filecmp.dircmp(config_path, config_dest).diff_files assert not filecmp.dircmp(test_path, test_dest).diff_files
def test_pipeline_package_version(self, fake_repo_path, fake_package_path, fake_project_cli, fake_metadata): _pipeline_name = "data_engineering" # the test version value is set separately in # features/steps/test_starter/<repo>/src/<package>/pipelines/data_engineering/__init__.py _test_version = "4.20.69" pipelines_dir = fake_package_path / "pipelines" / _pipeline_name assert pipelines_dir.is_dir() result = CliRunner().invoke(fake_project_cli, ["pipeline", "package", _pipeline_name], obj=fake_metadata) assert result.exit_code == 0 # test for actual version wheel_location = fake_repo_path / "src" / "dist" wheel_name = _get_wheel_name(name=_pipeline_name, version=_test_version) wheel_file = wheel_location / wheel_name assert wheel_file.is_file() assert len(list(wheel_location.iterdir())) == 1
def test_pipeline_package_overwrites_wheel(self, fake_kedro_cli, tmp_path): destination = (tmp_path / "in" / "here").resolve() destination.mkdir(parents=True) wheel_file = destination / _get_wheel_name(name=PIPELINE_NAME, version="0.1") wheel_file.touch() result = CliRunner().invoke( fake_kedro_cli.cli, ["pipeline", "create", PIPELINE_NAME] ) assert result.exit_code == 0 result = CliRunner().invoke( fake_kedro_cli.cli, ["pipeline", "package", PIPELINE_NAME, "--destination", str(destination)], ) assert result.exit_code == 0 warning_message = f"Package file {wheel_file} will be overwritten!" success_message = ( f"Pipeline `{PIPELINE_NAME}` packaged! Location: {destination}" ) assert warning_message in result.output assert success_message in result.output self.assert_wheel_contents_correct(wheel_location=destination)
def test_pull_from_pypi( self, fake_project_cli, fake_repo_path, mocker, tmp_path, fake_package_path, env, alias, fake_metadata, ): """ Test for pulling a valid wheel file from pypi. """ # pylint: disable=too-many-locals self.call_pipeline_create(fake_project_cli.cli, fake_metadata) # We mock the `pip download` call, and manually create a package wheel file # to simulate the pypi scenario instead self.call_pipeline_package( fake_project_cli.cli, fake_metadata, destination=tmp_path ) wheel_file = tmp_path / _get_wheel_name(name=PIPELINE_NAME, version="0.1") assert wheel_file.is_file() self.call_pipeline_delete(fake_project_cli.cli, fake_metadata) source_path = fake_package_path / "pipelines" / PIPELINE_NAME test_path = fake_repo_path / "src" / "tests" / "pipelines" / PIPELINE_NAME source_params_config = ( fake_repo_path / CONF_ROOT / "base" / "parameters" / f"{PIPELINE_NAME}.yml" ) # Make sure the files actually deleted before pulling from pypi. assert not source_path.exists() assert not test_path.exists() assert not source_params_config.exists() python_call_mock = mocker.patch("kedro.framework.cli.pipeline.python_call") mocker.patch( "kedro.framework.cli.pipeline.tempfile.TemporaryDirectory", return_value=tmp_path, ) options = ["-e", env] if env else [] options += ["--alias", alias] if alias else [] result = CliRunner().invoke( fake_project_cli.cli, ["pipeline", "pull", PIPELINE_NAME, *options], obj=fake_metadata, ) assert result.exit_code == 0 python_call_mock.assert_called_once_with( "pip", ["download", "--no-deps", "--dest", str(tmp_path), PIPELINE_NAME], ) pipeline_name = alias or PIPELINE_NAME source_dest = fake_package_path / "pipelines" / pipeline_name test_dest = fake_repo_path / "src" / "tests" / "pipelines" / pipeline_name config_env = env or "base" dest_params_config = ( fake_repo_path / CONF_ROOT / config_env / "parameters" / f"{pipeline_name}.yml" ) self.assert_package_files_exist(source_dest) assert dest_params_config.is_file() actual_test_files = {f.name for f in test_dest.iterdir()} expected_test_files = {"__init__.py", "test_pipeline.py"} assert actual_test_files == expected_test_files
def test_pull_config_missing( self, fake_project_cli, fake_repo_path, fake_package_path, env, alias, fake_metadata, ): """ Test for pulling a valid wheel file locally, but `config` directory is missing from the wheel file. """ # pylint: disable=too-many-locals self.call_pipeline_create(fake_project_cli.cli, fake_metadata) source_params_config = ( fake_repo_path / CONF_ROOT / "base" / "parameters" / f"{PIPELINE_NAME}.yml" ) source_params_config.unlink() self.call_pipeline_package(fake_project_cli.cli, fake_metadata) self.call_pipeline_delete(fake_project_cli.cli, fake_metadata) source_path = fake_package_path / "pipelines" / PIPELINE_NAME test_path = fake_repo_path / "src" / "tests" / "pipelines" / PIPELINE_NAME # Make sure the files actually deleted before pulling from the wheel file. assert not source_path.exists() assert not test_path.exists() wheel_file = ( fake_repo_path / "src" / "dist" / _get_wheel_name(name=PIPELINE_NAME, version="0.1") ) assert wheel_file.is_file() options = ["-e", env] if env else [] options += ["--alias", alias] if alias else [] result = CliRunner().invoke( fake_project_cli.cli, ["pipeline", "pull", str(wheel_file), *options], obj=fake_metadata, ) assert result.exit_code == 0 pipeline_name = alias or PIPELINE_NAME source_dest = fake_package_path / "pipelines" / pipeline_name test_dest = fake_repo_path / "src" / "tests" / "pipelines" / pipeline_name config_env = env or "base" dest_params_config = ( fake_repo_path / CONF_ROOT / config_env / "parameters" / f"{pipeline_name}.yml" ) self.assert_package_files_exist(source_dest) assert not dest_params_config.exists() actual_test_files = {f.name for f in test_dest.iterdir()} expected_test_files = {"__init__.py", "test_pipeline.py"} assert actual_test_files == expected_test_files
def test_pull_local_whl_compare( self, fake_project_cli, fake_repo_path, fake_package_path, env, alias, fake_metadata, ): """Test for pulling a valid wheel file locally, unpack it into another location and check that unpacked files are identical to the ones in the original modular pipeline. """ # pylint: disable=too-many-locals pipeline_name = "another_pipeline" call_pipeline_create(fake_project_cli, fake_metadata) call_pipeline_package(fake_project_cli, fake_metadata, alias=pipeline_name) source_path = fake_package_path / "pipelines" / PIPELINE_NAME test_path = fake_repo_path / "src" / "tests" / "pipelines" / PIPELINE_NAME source_params_config = ( fake_repo_path / settings.CONF_ROOT / "base" / "parameters" / f"{PIPELINE_NAME}.yml" ) wheel_file = ( fake_repo_path / "src" / "dist" / _get_wheel_name(name=pipeline_name, version="0.1") ) assert wheel_file.is_file() options = ["-e", env] if env else [] options += ["--alias", alias] if alias else [] result = CliRunner().invoke( fake_project_cli, ["pipeline", "pull", str(wheel_file), *options], obj=fake_metadata, ) assert result.exit_code == 0, result.output assert "pulled and unpacked" in result.output pipeline_name = alias or pipeline_name source_dest = fake_package_path / "pipelines" / pipeline_name test_dest = fake_repo_path / "src" / "tests" / "pipelines" / pipeline_name config_env = env or "base" dest_params_config = ( fake_repo_path / settings.CONF_ROOT / config_env / "parameters" / f"{pipeline_name}.yml" ) assert not filecmp.dircmp(source_path, source_dest).diff_files assert not filecmp.dircmp(test_path, test_dest).diff_files assert source_params_config.read_bytes() == dest_params_config.read_bytes()