def _create_pipeline(name: str, kedro_version: str, output_dir: Path) -> Path: with _filter_deprecation_warnings(): # pylint: disable=import-outside-toplevel from cookiecutter.main import cookiecutter template_path = Path(kedro.__file__).parent / "templates" / "pipeline" cookie_context = {"pipeline_name": name, "kedro_version": kedro_version} click.echo(f"Creating the pipeline `{name}`: ", nl=False) try: result_path = cookiecutter( str(template_path), output_dir=str(output_dir), no_input=True, extra_context=cookie_context, ) except Exception as ex: click.secho("FAILED", fg="red") cls = ex.__class__ raise KedroCliError(f"{cls.__module__}.{cls.__qualname__}: {ex}") click.secho("OK", fg="green") result_path = Path(result_path) message = indent(f"Location: `{result_path.resolve()}`", " " * 2) click.secho(message, bold=True) _clean_pycache(result_path) return result_path
def _create_project(template_path: str, cookiecutter_args: Dict[str, str]): """Creates a new kedro project using cookiecutter. Args: template_path: The path to the cookiecutter template to create the project. It could either be a local directory or a remote VCS repository supported by cookiecutter. For more details, please see: https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project cookiecutter_args: Arguments to pass to cookiecutter. Raises: KedroCliError: If it fails to generate a project. """ with _filter_deprecation_warnings(): # pylint: disable=import-outside-toplevel from cookiecutter.main import cookiecutter # for performance reasons try: result_path = cookiecutter(template_path, **cookiecutter_args) except Exception as exc: raise KedroCliError( "Failed to generate project when running cookiecutter.") from exc _clean_pycache(Path(result_path)) click.secho( f"\nChange directory to the project generated in {result_path}", fg="green", ) click.secho( "\nA best-practice setup includes initialising git and creating " "a virtual environment before running ``kedro install`` to install " "project-specific dependencies. Refer to the Kedro documentation: " "https://kedro.readthedocs.io/")
def _package_pipeline( # pylint: disable=too-many-arguments name: str, context: KedroContext, package_name: str = None, destination: str = None, env: str = None, version: str = None, ) -> Path: package_dir = _get_project_package_dir(context) env = env or "base" package_name = package_name or name version = version or "0.1" artifacts_to_package = _get_pipeline_artifacts(context, pipeline_name=name, env=env) destination = Path( destination) if destination else package_dir.parent / "dist" _generate_wheel_file(package_name, destination, artifacts_to_package, version) _clean_pycache(package_dir) _clean_pycache(context.project_path) return destination
def _create_project(template_path: str, cookiecutter_args: Dict[str, str]): """Creates a new kedro project using cookiecutter. Args: template_path: The path to the cookiecutter template to create the project. It could either be a local directory or a remote VCS repository supported by cookiecutter. For more details, please see: https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project cookiecutter_args: Arguments to pass to cookiecutter. Raises: KedroCliError: If it fails to generate a project. """ with _filter_deprecation_warnings(): # pylint: disable=import-outside-toplevel from cookiecutter.exceptions import RepositoryCloneFailed, RepositoryNotFound from cookiecutter.main import cookiecutter # for performance reasons try: result_path = cookiecutter(template_path, **cookiecutter_args) except (RepositoryNotFound, RepositoryCloneFailed) as exc: error_message = (f"Kedro project template not found at {template_path}" f" with tag {cookiecutter_args.get('checkout')}.") tags = _get_available_tags(template_path) if tags: error_message += f" The following tags are available: {', '.join(tags)}" raise KedroCliError(error_message) from exc # we don't want the user to see a stack trace on the cli except Exception as exc: raise KedroCliError("Failed to generate project.") from exc _clean_pycache(Path(result_path)) _print_kedro_new_success_message(result_path)
def pull_package(metadata: ProjectMetadata, package_path, env, alias, fs_args, **kwargs): # pylint:disable=unused-argument """Pull and unpack a modular pipeline in your project.""" with tempfile.TemporaryDirectory() as temp_dir: temp_dir_path = Path(temp_dir).resolve() _unpack_wheel(package_path, temp_dir_path, fs_args) dist_info_file = list(temp_dir_path.glob("*.dist-info")) if len(dist_info_file) != 1: raise KedroCliError( f"More than 1 or no dist-info files found from {package_path}. " f"There has to be exactly one dist-info directory.") # Extract package name, based on the naming convention for wheel files # https://www.python.org/dev/peps/pep-0427/#file-name-convention package_name = dist_info_file[0].stem.split("-")[0] package_metadata = dist_info_file[0] / "METADATA" _clean_pycache(temp_dir_path) _install_files(metadata, package_name, temp_dir_path, env, alias) req_pattern = r"Requires-Dist: (.*?)\n" package_reqs = re.findall(req_pattern, package_metadata.read_text()) if package_reqs: requirements_in = _get_requirements_in(metadata.source_dir, create_empty=True) _append_package_reqs(requirements_in, package_reqs, package_name)
def _package_pipeline( name: str, context: KedroContext, package_name: str = None, destination: str = None, env: str = None, ) -> Path: package_dir = _get_project_package_dir(context) env = env or "base" package_name = package_name or name # Artifacts to package source_paths = _get_pipeline_artifacts(context, pipeline_name=name, env=env) destination = Path( destination) if destination else package_dir.parent / "dist" package_file = destination / f"{package_name}-0.1-py3-none-any.whl" if package_file.is_file(): click.secho(f"Package file {package_file} will be overwritten!", fg="yellow") _generate_wheel_file(package_name, destination, source_paths) _clean_pycache(package_dir) _clean_pycache(context.project_path) return destination
def _package_pipeline( # pylint: disable=too-many-arguments pipeline_name: str, metadata: ProjectMetadata, alias: str = None, destination: str = None, env: str = None, version: str = None, ) -> Path: package_dir = metadata.source_dir / metadata.package_name env = env or "base" artifacts_to_package = _get_pipeline_artifacts(metadata, pipeline_name=pipeline_name, env=env) # as the wheel file will only contain parameters, we aren't listing other # config files not to confuse users and avoid useless file copies configs_to_package = _find_config_files( artifacts_to_package.pipeline_conf, [ f"parameters*/**/{pipeline_name}.yml", f"parameters*/**/{pipeline_name}/*" ], ) source_paths = ( artifacts_to_package.pipeline_dir, artifacts_to_package.pipeline_tests, configs_to_package, ) # Check that pipeline directory exists and not empty _validate_dir(artifacts_to_package.pipeline_dir) destination = Path( destination) if destination else package_dir.parent / "dist" if not version: # default to pipeline package version try: pipeline_module = import_module( f"{metadata.package_name}.pipelines.{pipeline_name}") version = pipeline_module.__version__ # type: ignore except (AttributeError, ModuleNotFoundError): # if pipeline version doesn't exist, take the project one project_module = import_module(f"{metadata.package_name}") version = project_module.__version__ # type: ignore _generate_wheel_file( # type: ignore pipeline_name, destination, source_paths, version, alias=alias) _clean_pycache(package_dir) _clean_pycache(metadata.project_path) return destination
def _package_pipeline( # pylint: disable=too-many-arguments pipeline_name: str, metadata: ProjectMetadata, alias: str = None, destination: str = None, env: str = None, version: str = None, ) -> Path: package_dir = metadata.source_dir / metadata.package_name env = env or "base" artifacts_to_package = _get_pipeline_artifacts(metadata, pipeline_name=pipeline_name, env=env) # as the wheel file will only contain parameters, we aren't listing other # config files not to confuse users and avoid useless file copies configs_to_package = _find_config_files( artifacts_to_package.pipeline_conf, [ f"parameters*/**/{pipeline_name}.yml", f"parameters*/**/{pipeline_name}/*" ], ) source_paths = ( artifacts_to_package.pipeline_dir, artifacts_to_package.pipeline_tests, configs_to_package, ) # Check that pipeline directory exists and not empty _validate_dir(artifacts_to_package.pipeline_dir) destination = Path( destination) if destination else package_dir.parent / "dist" version = version or _get_default_version(metadata, pipeline_name) _generate_wheel_file( pipeline_name=pipeline_name, destination=destination, source_paths=source_paths, version=version, metadata=metadata, alias=alias, ) _clean_pycache(package_dir) _clean_pycache(metadata.project_path) return destination
def _create_project(config_path: str, verbose: bool): """Implementation of the kedro new cli command. Args: config_path: In non-interactive mode, the path of the config.yml which should contain the project_name, output_dir and repo_name. verbose: Extensive debug terminal logs. """ with _filter_deprecation_warnings(): # pylint: disable=import-outside-toplevel from cookiecutter.main import cookiecutter # for performance reasons try: if config_path: config = _parse_config(config_path, verbose) config = _check_config_ok(config_path, config) else: config = _get_config_from_prompts() config.setdefault("kedro_version", version) result_path = Path( cookiecutter( str(TEMPLATE_PATH), output_dir=config["output_dir"], no_input=True, extra_context=config, )) if not config["include_example"]: (result_path / "data" / "01_raw" / "iris.csv").unlink() pipelines_dir = result_path / "src" / config[ "python_package"] / "pipelines" for dir_path in [ pipelines_dir / "data_engineering", pipelines_dir / "data_science", ]: shutil.rmtree(str(dir_path)) _clean_pycache(result_path) _print_kedro_new_success_message(result_path) except click.exceptions.Abort: # pragma: no cover _handle_exception("User interrupt.") # we don't want the user to see a stack trace on the cli except Exception: # pylint: disable=broad-except _handle_exception("Failed to generate project.")
def pull_package(package_path, env, alias): """Pull a modular pipeline package, unpack it and install the files to corresponding locations. """ # pylint: disable=import-outside-toplevel import fsspec from kedro.io.core import get_protocol_and_path protocol, _ = get_protocol_and_path(package_path) filesystem = fsspec.filesystem(protocol) with tempfile.TemporaryDirectory() as temp_dir: temp_dir_path = Path(temp_dir).resolve() if package_path.endswith(".whl") and filesystem.exists(package_path): with filesystem.open(package_path) as fs_file: ZipFile(fs_file).extractall(temp_dir_path) else: python_call( "pip", [ "download", "--no-deps", "--dest", str(temp_dir_path), package_path ], ) wheel_file = list(temp_dir_path.glob("*.whl")) # `--no-deps` should fetch only one wheel file, and CLI should fail if that's # not the case. if len(wheel_file) != 1: file_names = [wf.name for wf in wheel_file] raise KedroCliError( f"More than 1 or no wheel files found: {str(file_names)}. " "There has to be exactly one distribution file.") ZipFile(wheel_file[0]).extractall(temp_dir_path) dist_info_file = list(temp_dir_path.glob("*.dist-info")) if len(dist_info_file) != 1: raise KedroCliError( f"More than 1 or no dist-info files found from {package_path}. " "There has to be exactly one dist-info directory.") # Extract package name, based on the naming convention for wheel files # https://www.python.org/dev/peps/pep-0427/#file-name-convention package_name = dist_info_file[0].stem.split("-")[0] _clean_pycache(temp_dir_path) _install_files(package_name, temp_dir_path, env, alias)
def test_clean_pycache(self, tmp_path, mocker): """Test `clean_pycache` utility function""" source = Path(tmp_path) pycache2 = Path(source / "nested1" / "nested2" / "__pycache__").resolve() pycache2.mkdir(parents=True) pycache1 = Path(source / "nested1" / "__pycache__").resolve() pycache1.mkdir() pycache = Path(source / "__pycache__").resolve() pycache.mkdir() mocked_rmtree = mocker.patch("shutil.rmtree") _clean_pycache(source) expected_calls = [ mocker.call(pycache, ignore_errors=True), mocker.call(pycache1, ignore_errors=True), mocker.call(pycache2, ignore_errors=True), ] assert mocked_rmtree.mock_calls == expected_calls
def pull_package(metadata: ProjectMetadata, package_path, env, alias, fs_args, **kwargs): # pylint:disable=unused-argument """Pull and unpack a modular pipeline in your project. """ with tempfile.TemporaryDirectory() as temp_dir: temp_dir_path = Path(temp_dir).resolve() _unpack_wheel(package_path, temp_dir_path, fs_args) dist_info_file = list(temp_dir_path.glob("*.dist-info")) if len(dist_info_file) != 1: raise KedroCliError( f"More than 1 or no dist-info files found from {package_path}. " f"There has to be exactly one dist-info directory.") # Extract package name, based on the naming convention for wheel files # https://www.python.org/dev/peps/pep-0427/#file-name-convention package_name = dist_info_file[0].stem.split("-")[0] _clean_pycache(temp_dir_path) _install_files(metadata, package_name, temp_dir_path, env, alias)
def _create_project( config_path: str, template_path: Path = TEMPLATE_PATH, checkout: str = None, directory: str = None, ): # pylint: disable=too-many-locals """Implementation of the kedro new cli command. Args: config_path: In non-interactive mode, the path of the config.yml which should contain the project_name, output_dir and repo_name. template_path: The path to the cookiecutter template to create the project. It could either be a local directory or a remote VCS repository supported by cookiecutter. For more details, please see: https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project checkout: The tag, branch or commit in the starter repository to checkout. Maps directly to cookiecutter's --checkout argument. If the value is not provided, cookiecutter will use the installed Kedro version by default. directory: The directory of a specific starter inside a repository containing multiple starters. Map directly to cookiecutter's --directory argument. https://cookiecutter.readthedocs.io/en/1.7.2/advanced/directories.html Raises: KedroCliError: If it fails to generate a project. """ with _filter_deprecation_warnings(): # pylint: disable=import-outside-toplevel from cookiecutter.exceptions import RepositoryCloneFailed, RepositoryNotFound from cookiecutter.main import cookiecutter # for performance reasons from cookiecutter.repository import determine_repo_dir config: Dict[str, str] = dict() checkout = checkout or version try: if config_path: config = _parse_config(config_path) config = _check_config_ok(config_path, config) else: with tempfile.TemporaryDirectory() as tmpdir: temp_dir_path = Path(tmpdir).resolve() repo, _ = determine_repo_dir( template=str(template_path), abbreviations=dict(), clone_to_dir=temp_dir_path, checkout=checkout, no_input=True, directory=directory, ) config_yml = temp_dir_path / repo / "starter_config.yml" if config_yml.is_file(): with open(config_yml) as config_file: prompts = yaml.safe_load(config_file) config = _get_config_from_starter_prompts(prompts) config.setdefault("kedro_version", version) cookiecutter_args = dict( output_dir=config.get("output_dir", str(Path.cwd().resolve())), no_input=True, extra_context=config, checkout=checkout, ) if directory: cookiecutter_args["directory"] = directory result_path = Path(cookiecutter(str(template_path), **cookiecutter_args)) _clean_pycache(result_path) _print_kedro_new_success_message(result_path) except click.exceptions.Abort as exc: # pragma: no cover raise KedroCliError("User interrupt.") from exc except RepositoryNotFound as exc: raise KedroCliError( f"Kedro project template not found at {template_path}" ) from exc except RepositoryCloneFailed as exc: error_message = ( f"Kedro project template not found at {template_path} with tag {checkout}." ) tags = _get_available_tags(str(template_path).replace("git+", "")) if tags: error_message += ( f" The following tags are available: {', '.join(tags.__iter__())}" ) raise KedroCliError(error_message) from exc # we don't want the user to see a stack trace on the cli except Exception as exc: raise KedroCliError("Failed to generate project.") from exc
def _create_project( config_path: str, verbose: bool, template_path: Path = TEMPLATE_PATH, should_prompt_for_example: bool = True, checkout: str = None, ): """Implementation of the kedro new cli command. Args: config_path: In non-interactive mode, the path of the config.yml which should contain the project_name, output_dir and repo_name. verbose: Extensive debug terminal logs. template_path: The path to the cookiecutter template to create the project. It could either be a local directory or a remote VCS repository supported by cookiecutter. For more details, please see: https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project should_prompt_for_example: Whether to display a prompt to generate an example pipeline. N.B.: this should only be here until the start project is complete and the starters with example are all located in public repositories. checkout: The tag, branch or commit in the starter repository to checkout. Maps directly to cookiecutter's --checkout argument. If the value is invalid, cookiecutter will use the default branch. """ with _filter_deprecation_warnings(): # pylint: disable=import-outside-toplevel from cookiecutter.exceptions import RepositoryNotFound from cookiecutter.main import cookiecutter # for performance reasons try: if config_path: config = _parse_config(config_path, verbose) config = _check_config_ok(config_path, config) else: config = _get_config_from_prompts(should_prompt_for_example) config.setdefault("kedro_version", version) result_path = Path( cookiecutter( str(template_path), output_dir=config["output_dir"], no_input=True, extra_context=config, checkout=checkout, )) # If user was prompted to generate an example but chooses not to, # Remove all placeholder directories. if should_prompt_for_example and not config["include_example"]: (result_path / "data" / "01_raw" / "iris.csv").unlink() pipelines_dir = result_path / "src" / config[ "python_package"] / "pipelines" for dir_path in [ pipelines_dir / "data_engineering", pipelines_dir / "data_science", ]: shutil.rmtree(str(dir_path)) _clean_pycache(result_path) _print_kedro_new_success_message(result_path) except click.exceptions.Abort: # pragma: no cover _handle_exception("User interrupt.") except RepositoryNotFound: _handle_exception( f"Kedro project template not found at {template_path}") # we don't want the user to see a stack trace on the cli except Exception: # pylint: disable=broad-except _handle_exception("Failed to generate project.")