Example #1
0
def _create_pipeline(name: str, kedro_version: str, output_dir: Path) -> Path:
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.main import cookiecutter

    template_path = Path(kedro.__file__).parent / "templates" / "pipeline"
    cookie_context = {"pipeline_name": name, "kedro_version": kedro_version}

    click.echo(f"Creating the pipeline `{name}`: ", nl=False)

    try:
        result_path = cookiecutter(
            str(template_path),
            output_dir=str(output_dir),
            no_input=True,
            extra_context=cookie_context,
        )
    except Exception as ex:
        click.secho("FAILED", fg="red")
        cls = ex.__class__
        raise KedroCliError(f"{cls.__module__}.{cls.__qualname__}: {ex}")

    click.secho("OK", fg="green")
    result_path = Path(result_path)
    message = indent(f"Location: `{result_path.resolve()}`", " " * 2)
    click.secho(message, bold=True)

    _clean_pycache(result_path)

    return result_path
Example #2
0
def _create_project(template_path: str, cookiecutter_args: Dict[str, str]):
    """Creates a new kedro project using cookiecutter.

    Args:
        template_path: The path to the cookiecutter template to create the project.
            It could either be a local directory or a remote VCS repository
            supported by cookiecutter. For more details, please see:
            https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project
        cookiecutter_args: Arguments to pass to cookiecutter.

    Raises:
        KedroCliError: If it fails to generate a project.
    """
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.main import cookiecutter  # for performance reasons

    try:
        result_path = cookiecutter(template_path, **cookiecutter_args)
    except Exception as exc:
        raise KedroCliError(
            "Failed to generate project when running cookiecutter.") from exc

    _clean_pycache(Path(result_path))
    click.secho(
        f"\nChange directory to the project generated in {result_path}",
        fg="green",
    )
    click.secho(
        "\nA best-practice setup includes initialising git and creating "
        "a virtual environment before running ``kedro install`` to install "
        "project-specific dependencies. Refer to the Kedro documentation: "
        "https://kedro.readthedocs.io/")
Example #3
0
def _package_pipeline(  # pylint: disable=too-many-arguments
    name: str,
    context: KedroContext,
    package_name: str = None,
    destination: str = None,
    env: str = None,
    version: str = None,
) -> Path:
    package_dir = _get_project_package_dir(context)
    env = env or "base"
    package_name = package_name or name
    version = version or "0.1"

    artifacts_to_package = _get_pipeline_artifacts(context,
                                                   pipeline_name=name,
                                                   env=env)
    destination = Path(
        destination) if destination else package_dir.parent / "dist"

    _generate_wheel_file(package_name, destination, artifacts_to_package,
                         version)

    _clean_pycache(package_dir)
    _clean_pycache(context.project_path)

    return destination
Example #4
0
def _create_project(template_path: str, cookiecutter_args: Dict[str, str]):
    """Creates a new kedro project using cookiecutter.

    Args:
        template_path: The path to the cookiecutter template to create the project.
            It could either be a local directory or a remote VCS repository
            supported by cookiecutter. For more details, please see:
            https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project
        cookiecutter_args: Arguments to pass to cookiecutter.

    Raises:
        KedroCliError: If it fails to generate a project.
    """
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.exceptions import RepositoryCloneFailed, RepositoryNotFound
        from cookiecutter.main import cookiecutter  # for performance reasons

    try:
        result_path = cookiecutter(template_path, **cookiecutter_args)
    except (RepositoryNotFound, RepositoryCloneFailed) as exc:
        error_message = (f"Kedro project template not found at {template_path}"
                         f" with tag {cookiecutter_args.get('checkout')}.")
        tags = _get_available_tags(template_path)
        if tags:
            error_message += f" The following tags are available: {', '.join(tags)}"
        raise KedroCliError(error_message) from exc
    # we don't want the user to see a stack trace on the cli
    except Exception as exc:
        raise KedroCliError("Failed to generate project.") from exc

    _clean_pycache(Path(result_path))
    _print_kedro_new_success_message(result_path)
Example #5
0
def pull_package(metadata: ProjectMetadata, package_path, env, alias, fs_args,
                 **kwargs):  # pylint:disable=unused-argument
    """Pull and unpack a modular pipeline in your project."""

    with tempfile.TemporaryDirectory() as temp_dir:
        temp_dir_path = Path(temp_dir).resolve()

        _unpack_wheel(package_path, temp_dir_path, fs_args)

        dist_info_file = list(temp_dir_path.glob("*.dist-info"))
        if len(dist_info_file) != 1:
            raise KedroCliError(
                f"More than 1 or no dist-info files found from {package_path}. "
                f"There has to be exactly one dist-info directory.")
        # Extract package name, based on the naming convention for wheel files
        # https://www.python.org/dev/peps/pep-0427/#file-name-convention
        package_name = dist_info_file[0].stem.split("-")[0]
        package_metadata = dist_info_file[0] / "METADATA"

        _clean_pycache(temp_dir_path)
        _install_files(metadata, package_name, temp_dir_path, env, alias)

        req_pattern = r"Requires-Dist: (.*?)\n"
        package_reqs = re.findall(req_pattern, package_metadata.read_text())
        if package_reqs:
            requirements_in = _get_requirements_in(metadata.source_dir,
                                                   create_empty=True)
            _append_package_reqs(requirements_in, package_reqs, package_name)
Example #6
0
def _package_pipeline(
    name: str,
    context: KedroContext,
    package_name: str = None,
    destination: str = None,
    env: str = None,
) -> Path:
    package_dir = _get_project_package_dir(context)
    env = env or "base"
    package_name = package_name or name

    # Artifacts to package
    source_paths = _get_pipeline_artifacts(context,
                                           pipeline_name=name,
                                           env=env)

    destination = Path(
        destination) if destination else package_dir.parent / "dist"
    package_file = destination / f"{package_name}-0.1-py3-none-any.whl"
    if package_file.is_file():
        click.secho(f"Package file {package_file} will be overwritten!",
                    fg="yellow")
    _generate_wheel_file(package_name, destination, source_paths)

    _clean_pycache(package_dir)
    _clean_pycache(context.project_path)

    return destination
Example #7
0
def _package_pipeline(  # pylint: disable=too-many-arguments
    pipeline_name: str,
    metadata: ProjectMetadata,
    alias: str = None,
    destination: str = None,
    env: str = None,
    version: str = None,
) -> Path:
    package_dir = metadata.source_dir / metadata.package_name
    env = env or "base"

    artifacts_to_package = _get_pipeline_artifacts(metadata,
                                                   pipeline_name=pipeline_name,
                                                   env=env)
    # as the wheel file will only contain parameters, we aren't listing other
    # config files not to confuse users and avoid useless file copies
    configs_to_package = _find_config_files(
        artifacts_to_package.pipeline_conf,
        [
            f"parameters*/**/{pipeline_name}.yml",
            f"parameters*/**/{pipeline_name}/*"
        ],
    )

    source_paths = (
        artifacts_to_package.pipeline_dir,
        artifacts_to_package.pipeline_tests,
        configs_to_package,
    )

    # Check that pipeline directory exists and not empty
    _validate_dir(artifacts_to_package.pipeline_dir)
    destination = Path(
        destination) if destination else package_dir.parent / "dist"

    if not version:  # default to pipeline package version
        try:
            pipeline_module = import_module(
                f"{metadata.package_name}.pipelines.{pipeline_name}")
            version = pipeline_module.__version__  # type: ignore
        except (AttributeError, ModuleNotFoundError):
            # if pipeline version doesn't exist, take the project one
            project_module = import_module(f"{metadata.package_name}")
            version = project_module.__version__  # type: ignore

    _generate_wheel_file(  # type: ignore
        pipeline_name,
        destination,
        source_paths,
        version,
        alias=alias)

    _clean_pycache(package_dir)
    _clean_pycache(metadata.project_path)

    return destination
Example #8
0
def _package_pipeline(  # pylint: disable=too-many-arguments
    pipeline_name: str,
    metadata: ProjectMetadata,
    alias: str = None,
    destination: str = None,
    env: str = None,
    version: str = None,
) -> Path:
    package_dir = metadata.source_dir / metadata.package_name
    env = env or "base"

    artifacts_to_package = _get_pipeline_artifacts(metadata,
                                                   pipeline_name=pipeline_name,
                                                   env=env)
    # as the wheel file will only contain parameters, we aren't listing other
    # config files not to confuse users and avoid useless file copies
    configs_to_package = _find_config_files(
        artifacts_to_package.pipeline_conf,
        [
            f"parameters*/**/{pipeline_name}.yml",
            f"parameters*/**/{pipeline_name}/*"
        ],
    )

    source_paths = (
        artifacts_to_package.pipeline_dir,
        artifacts_to_package.pipeline_tests,
        configs_to_package,
    )

    # Check that pipeline directory exists and not empty
    _validate_dir(artifacts_to_package.pipeline_dir)

    destination = Path(
        destination) if destination else package_dir.parent / "dist"
    version = version or _get_default_version(metadata, pipeline_name)

    _generate_wheel_file(
        pipeline_name=pipeline_name,
        destination=destination,
        source_paths=source_paths,
        version=version,
        metadata=metadata,
        alias=alias,
    )

    _clean_pycache(package_dir)
    _clean_pycache(metadata.project_path)

    return destination
Example #9
0
def _create_project(config_path: str, verbose: bool):
    """Implementation of the kedro new cli command.

    Args:
        config_path: In non-interactive mode, the path of the config.yml which
            should contain the project_name, output_dir and repo_name.
        verbose: Extensive debug terminal logs.
    """
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.main import cookiecutter  # for performance reasons

    try:
        if config_path:
            config = _parse_config(config_path, verbose)
            config = _check_config_ok(config_path, config)
        else:
            config = _get_config_from_prompts()
        config.setdefault("kedro_version", version)

        result_path = Path(
            cookiecutter(
                str(TEMPLATE_PATH),
                output_dir=config["output_dir"],
                no_input=True,
                extra_context=config,
            ))

        if not config["include_example"]:
            (result_path / "data" / "01_raw" / "iris.csv").unlink()

            pipelines_dir = result_path / "src" / config[
                "python_package"] / "pipelines"

            for dir_path in [
                    pipelines_dir / "data_engineering",
                    pipelines_dir / "data_science",
            ]:
                shutil.rmtree(str(dir_path))

        _clean_pycache(result_path)
        _print_kedro_new_success_message(result_path)
    except click.exceptions.Abort:  # pragma: no cover
        _handle_exception("User interrupt.")
    # we don't want the user to see a stack trace on the cli
    except Exception:  # pylint: disable=broad-except
        _handle_exception("Failed to generate project.")
Example #10
0
def pull_package(package_path, env, alias):
    """Pull a modular pipeline package, unpack it and install the files to corresponding
    locations.
    """
    # pylint: disable=import-outside-toplevel
    import fsspec

    from kedro.io.core import get_protocol_and_path

    protocol, _ = get_protocol_and_path(package_path)
    filesystem = fsspec.filesystem(protocol)

    with tempfile.TemporaryDirectory() as temp_dir:
        temp_dir_path = Path(temp_dir).resolve()
        if package_path.endswith(".whl") and filesystem.exists(package_path):
            with filesystem.open(package_path) as fs_file:
                ZipFile(fs_file).extractall(temp_dir_path)
        else:
            python_call(
                "pip",
                [
                    "download", "--no-deps", "--dest",
                    str(temp_dir_path), package_path
                ],
            )
            wheel_file = list(temp_dir_path.glob("*.whl"))
            # `--no-deps` should fetch only one wheel file, and CLI should fail if that's
            # not the case.
            if len(wheel_file) != 1:
                file_names = [wf.name for wf in wheel_file]
                raise KedroCliError(
                    f"More than 1 or no wheel files found: {str(file_names)}. "
                    "There has to be exactly one distribution file.")
            ZipFile(wheel_file[0]).extractall(temp_dir_path)

        dist_info_file = list(temp_dir_path.glob("*.dist-info"))
        if len(dist_info_file) != 1:
            raise KedroCliError(
                f"More than 1 or no dist-info files found from {package_path}. "
                "There has to be exactly one dist-info directory.")
        # Extract package name, based on the naming convention for wheel files
        # https://www.python.org/dev/peps/pep-0427/#file-name-convention
        package_name = dist_info_file[0].stem.split("-")[0]

        _clean_pycache(temp_dir_path)
        _install_files(package_name, temp_dir_path, env, alias)
Example #11
0
    def test_clean_pycache(self, tmp_path, mocker):
        """Test `clean_pycache` utility function"""
        source = Path(tmp_path)
        pycache2 = Path(source / "nested1" / "nested2" / "__pycache__").resolve()
        pycache2.mkdir(parents=True)
        pycache1 = Path(source / "nested1" / "__pycache__").resolve()
        pycache1.mkdir()
        pycache = Path(source / "__pycache__").resolve()
        pycache.mkdir()

        mocked_rmtree = mocker.patch("shutil.rmtree")
        _clean_pycache(source)

        expected_calls = [
            mocker.call(pycache, ignore_errors=True),
            mocker.call(pycache1, ignore_errors=True),
            mocker.call(pycache2, ignore_errors=True),
        ]
        assert mocked_rmtree.mock_calls == expected_calls
Example #12
0
def pull_package(metadata: ProjectMetadata, package_path, env, alias, fs_args,
                 **kwargs):  # pylint:disable=unused-argument
    """Pull and unpack a modular pipeline in your project.
    """

    with tempfile.TemporaryDirectory() as temp_dir:
        temp_dir_path = Path(temp_dir).resolve()

        _unpack_wheel(package_path, temp_dir_path, fs_args)

        dist_info_file = list(temp_dir_path.glob("*.dist-info"))
        if len(dist_info_file) != 1:
            raise KedroCliError(
                f"More than 1 or no dist-info files found from {package_path}. "
                f"There has to be exactly one dist-info directory.")
        # Extract package name, based on the naming convention for wheel files
        # https://www.python.org/dev/peps/pep-0427/#file-name-convention
        package_name = dist_info_file[0].stem.split("-")[0]

        _clean_pycache(temp_dir_path)
        _install_files(metadata, package_name, temp_dir_path, env, alias)
Example #13
0
def _create_project(
    config_path: str,
    template_path: Path = TEMPLATE_PATH,
    checkout: str = None,
    directory: str = None,
):  # pylint: disable=too-many-locals
    """Implementation of the kedro new cli command.

    Args:
        config_path: In non-interactive mode, the path of the config.yml which
            should contain the project_name, output_dir and repo_name.
        template_path: The path to the cookiecutter template to create the project.
            It could either be a local directory or a remote VCS repository
            supported by cookiecutter. For more details, please see:
            https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project
        checkout: The tag, branch or commit in the starter repository to checkout.
            Maps directly to cookiecutter's --checkout argument.
            If the value is not provided, cookiecutter will use the installed Kedro version
            by default.
        directory: The directory of a specific starter inside a repository containing
            multiple starters. Map directly to cookiecutter's --directory argument.
            https://cookiecutter.readthedocs.io/en/1.7.2/advanced/directories.html
    Raises:
        KedroCliError: If it fails to generate a project.
    """
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.exceptions import RepositoryCloneFailed, RepositoryNotFound
        from cookiecutter.main import cookiecutter  # for performance reasons
        from cookiecutter.repository import determine_repo_dir

    config: Dict[str, str] = dict()
    checkout = checkout or version
    try:
        if config_path:
            config = _parse_config(config_path)
            config = _check_config_ok(config_path, config)
        else:
            with tempfile.TemporaryDirectory() as tmpdir:
                temp_dir_path = Path(tmpdir).resolve()
                repo, _ = determine_repo_dir(
                    template=str(template_path),
                    abbreviations=dict(),
                    clone_to_dir=temp_dir_path,
                    checkout=checkout,
                    no_input=True,
                    directory=directory,
                )
                config_yml = temp_dir_path / repo / "starter_config.yml"
                if config_yml.is_file():
                    with open(config_yml) as config_file:
                        prompts = yaml.safe_load(config_file)
                    config = _get_config_from_starter_prompts(prompts)
        config.setdefault("kedro_version", version)

        cookiecutter_args = dict(
            output_dir=config.get("output_dir", str(Path.cwd().resolve())),
            no_input=True,
            extra_context=config,
            checkout=checkout,
        )
        if directory:
            cookiecutter_args["directory"] = directory
        result_path = Path(cookiecutter(str(template_path), **cookiecutter_args))
        _clean_pycache(result_path)
        _print_kedro_new_success_message(result_path)
    except click.exceptions.Abort as exc:  # pragma: no cover
        raise KedroCliError("User interrupt.") from exc
    except RepositoryNotFound as exc:
        raise KedroCliError(
            f"Kedro project template not found at {template_path}"
        ) from exc
    except RepositoryCloneFailed as exc:
        error_message = (
            f"Kedro project template not found at {template_path} with tag {checkout}."
        )
        tags = _get_available_tags(str(template_path).replace("git+", ""))
        if tags:
            error_message += (
                f" The following tags are available: {', '.join(tags.__iter__())}"
            )
        raise KedroCliError(error_message) from exc
    # we don't want the user to see a stack trace on the cli
    except Exception as exc:
        raise KedroCliError("Failed to generate project.") from exc
Example #14
0
def _create_project(
    config_path: str,
    verbose: bool,
    template_path: Path = TEMPLATE_PATH,
    should_prompt_for_example: bool = True,
    checkout: str = None,
):
    """Implementation of the kedro new cli command.

    Args:
        config_path: In non-interactive mode, the path of the config.yml which
            should contain the project_name, output_dir and repo_name.
        verbose: Extensive debug terminal logs.
        template_path: The path to the cookiecutter template to create the project.
            It could either be a local directory or a remote VCS repository
            supported by cookiecutter. For more details, please see:
            https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project
        should_prompt_for_example: Whether to display a prompt to generate an example pipeline.
            N.B.: this should only be here until the start project is complete and the
            starters with example are all located in public repositories.
        checkout: The tag, branch or commit in the starter repository to checkout.
            Maps directly to cookiecutter's --checkout argument.
            If the value is invalid, cookiecutter will use the default branch.
    """
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.exceptions import RepositoryNotFound
        from cookiecutter.main import cookiecutter  # for performance reasons

    try:
        if config_path:
            config = _parse_config(config_path, verbose)
            config = _check_config_ok(config_path, config)
        else:
            config = _get_config_from_prompts(should_prompt_for_example)
        config.setdefault("kedro_version", version)

        result_path = Path(
            cookiecutter(
                str(template_path),
                output_dir=config["output_dir"],
                no_input=True,
                extra_context=config,
                checkout=checkout,
            ))

        # If user was prompted to generate an example but chooses not to,
        # Remove all placeholder directories.
        if should_prompt_for_example and not config["include_example"]:
            (result_path / "data" / "01_raw" / "iris.csv").unlink()

            pipelines_dir = result_path / "src" / config[
                "python_package"] / "pipelines"

            for dir_path in [
                    pipelines_dir / "data_engineering",
                    pipelines_dir / "data_science",
            ]:
                shutil.rmtree(str(dir_path))

        _clean_pycache(result_path)
        _print_kedro_new_success_message(result_path)
    except click.exceptions.Abort:  # pragma: no cover
        _handle_exception("User interrupt.")
    except RepositoryNotFound:
        _handle_exception(
            f"Kedro project template not found at {template_path}")
    # we don't want the user to see a stack trace on the cli
    except Exception:  # pylint: disable=broad-except
        _handle_exception("Failed to generate project.")