Exemple #1
def _create_project(
    config_path: str,
    template_path: Path = TEMPLATE_PATH,
    checkout: str = None,
    directory: str = None,
    """Implementation of the kedro new cli command.

        config_path: In non-interactive mode, the path of the config.yml which
            should contain the project_name, output_dir and repo_name.
        template_path: The path to the cookiecutter template to create the project.
            It could either be a local directory or a remote VCS repository
            supported by cookiecutter. For more details, please see:
        checkout: The tag, branch or commit in the starter repository to checkout.
            Maps directly to cookiecutter's --checkout argument.
            If the value is not provided, cookiecutter will use the installed Kedro version
            by default.
        directory: The directory of a specific starter inside a repository containing
            multiple starters. Map directly to cookiecutter's --directory argument.
        KedroCliError: If it fails to generate a project.
    with _filter_deprecation_warnings():
        # pylint: disable=import-outside-toplevel
        from cookiecutter.exceptions import RepositoryCloneFailed, RepositoryNotFound
        from cookiecutter.main import cookiecutter  # for performance reasons

        if config_path:
            config = _parse_config(config_path)
            config = _check_config_ok(config_path, config)
            config = _get_config_from_prompts()
        config.setdefault("kedro_version", version)

        checkout = checkout or version
        cookiecutter_args = dict(
        if directory:
            cookiecutter_args["directory"] = directory
        result_path = Path(cookiecutter(str(template_path), **cookiecutter_args))
    except click.exceptions.Abort as exc:  # pragma: no cover
        raise KedroCliError("User interrupt.") from exc
    except RepositoryNotFound as exc:
        raise KedroCliError(
            f"Kedro project template not found at {template_path}"
        ) from exc
    except RepositoryCloneFailed as exc:
        error_message = (
            f"Kedro project template not found at {template_path} with tag {checkout}."
        tags = _get_available_tags(str(template_path).replace("git+", ""))
        if tags:
            error_message += (
                f" The following tags are available: {', '.join(tags.__iter__())}"
        raise KedroCliError(error_message) from exc
    # we don't want the user to see a stack trace on the cli
    except Exception as exc:
        raise KedroCliError("Failed to generate project.") from exc
Exemple #2
def _validate_dir(path: Path) -> None:
    if not path.is_dir():
        raise KedroCliError(f"Directory '{path}' doesn't exist.")
    if not list(path.iterdir()):
        raise KedroCliError(f"'{path}' is an empty directory.")
Exemple #3
def convert_notebook(  # pylint: disable=unused-argument,too-many-locals
        all_flag, overwrite_flag, filepath, env):
    """Convert selected or all notebooks found in a Kedro project
    to Kedro code, by exporting code from the appropriately-tagged cells:
    Cells tagged as `node` will be copied over to a Python file matching
    the name of the notebook, under `<source_dir>/<package_name>/nodes`.
    *Note*: Make sure your notebooks have unique names!
    FILEPATH: Path(s) to exact notebook file(s) to be converted. Both
    relative and absolute paths are accepted.
    Should not be provided if --all flag is already present.
    project_path = Path.cwd()
    static_data = get_static_project_data(project_path)
    source_path = static_data["source_dir"]
    package_name = (static_data.get("package_name")
                    or _load_project_context().package_name)


    if not filepath and not all_flag:
        secho("Please specify a notebook filepath "
              "or add '--all' to convert all notebooks.")

    if all_flag:
        # pathlib glob does not ignore hidden directories,
        # whereas Python glob does, which is more useful in
        # ensuring checkpoints will not be included
        pattern = project_path / "**" / "*.ipynb"
        notebooks = sorted(
            Path(p) for p in iglob(str(pattern), recursive=True))
        notebooks = [Path(f) for f in filepath]

    counter = Counter(n.stem for n in notebooks)
    non_unique_names = [name for name, counts in counter.items() if counts > 1]
    if non_unique_names:
        names = ", ".join(non_unique_names)
        raise KedroCliError(
            f"Found non-unique notebook names! Please rename the following: {names}"

    output_dir = source_path / package_name / "nodes"
    if not output_dir.is_dir():
        (output_dir / "__init__.py").touch()

    for notebook in notebooks:
        secho(f"Converting notebook '{notebook}'...")
        output_path = output_dir / f"{notebook.stem}.py"

        if output_path.is_file():
            overwrite = overwrite_flag or click.confirm(
                f"Output file {output_path} already exists. Overwrite?",
            if overwrite:
                _export_nodes(notebook, output_path)
            _export_nodes(notebook, output_path)

    secho("Done!", color="green")
Exemple #4
def new(
    config, starter_name, checkout, directory, **kwargs
):  # pylint: disable=unused-argument
    """Create a new kedro project, either interactively or from a
    configuration file.

    Create projects according to the Kedro default project template. This
    template is ideal for analytics projects and comes with a data
    architecture, folders for notebooks, configuration, source code, etc.

    ``kedro new``
    Create a new project interactively.

    You will have to provide four choices:
    * ``Project Name`` - name of the project, not to be confused with name of
    the project folder.
    * ``Repository Name`` - intended name of your project folder.
    * ``Package Name`` - intended name of your Python package.
    * ``Generate Example Pipeline`` - yes/no to generating an example pipeline
    in your project.

    ``kedro new --config <config.yml>``
    ``kedro new -c <config.yml>``
    Create a new project from configuration.

    * ``config.yml`` - The configuration YAML must contain at the top level
                    the above parameters (project_name, repo_name,
                    python_package) and output_dir - the
                    parent directory for the new project directory.

    ``kedro new --starter <starter>``
    Create a new project from a starter template. The starter can be either the path to
    a local directory, a URL to a remote VCS repository supported by `cookiecutter` or
    one of the aliases listed in ``kedro starter list``.

    ``kedro new --starter <starter> --checkout <checkout>``
    Create a new project from a starter template and a particular tag, branch or commit
    in the starter repository.

    ``kedro new --starter <starter> --directory <directory>``
    Create a new project from a starter repository and a directory within the location.
    Useful when you have multiple starters in the same repository.
    if checkout and not starter_name:
        raise KedroCliError("Cannot use the --checkout flag without a --starter value.")

    if directory and not starter_name:
        raise KedroCliError(
            "Cannot use the --directory flag without a --starter value."

    if starter_name in _STARTER_ALIASES:
        if directory:
            raise KedroCliError(
                "Cannot use the --directory flag with a --starter alias."
        template_path = _STARTERS_REPO
        directory = starter_name
    elif starter_name is not None:
        template_path = starter_name
        template_path = TEMPLATE_PATH

Exemple #5
def _port_callback(ctx, param, value):  # pylint: disable=unused-argument
    if is_port_in_use(value):
        raise KedroCliError(f"Port {value} is already in use on the host. "
                            f"Please specify an alternative port number.")
    return value
Exemple #6
def _assert_include_example_ok(include_example):
    if not isinstance(include_example, bool):
        message = (
            "`{}` value for `include_example` is invalid. It must be a boolean value "
            "True or False.".format(include_example))
        raise KedroCliError(message)
Exemple #7
def _assert_repo_name_ok(repo_name):
    if not re.match(r"^\w+(-*\w+)*$", repo_name):
        message = ("`{}` is not a valid repository name. It must contain "
                   "only word symbols and/or hyphens, must also start and "
                   "end with alphanumeric symbol.".format(repo_name))
        raise KedroCliError(message)
Exemple #8
def compose_docker_run_args(
    host_root: str = None,
    container_root: str = None,
    mount_volumes: Sequence[str] = None,
    required_args: Sequence[Tuple[str, Union[str, None]]] = None,
    optional_args: Sequence[Tuple[str, Union[str, None]]] = None,
    user_args: Sequence[str] = None,
) -> List[str]:
    Make a list of arguments for the docker command.

        host_root: Path project root on the host. It must be provided
            if `mount_volumes` are specified, optional otherwise.
        container_root: Path to project root in the container
            (e.g., `/home/kedro/<repo_name>`). It must be
            provided if `mount_volumes` are specified, optional otherwise.
        mount_volumes: List of volumes to be mounted.
        required_args: List of required arguments.
        optional_args: List of optional arguments, these will be added if only
            not present in `user_args` list.
        user_args: List of arguments already specified by the user.
        KedroCliError: If `mount_volumes` are provided but either `host_root`
            or `container_root` are missing.

        List of arguments for the docker command.

    mount_volumes = mount_volumes or []
    required_args = required_args or []
    optional_args = optional_args or []
    user_args = user_args or []
    split_user_args = {ua.split("=", 1)[0] for ua in user_args}

    def _add_args(name_: str,
                  value_: str = None,
                  force_: bool = False) -> List[str]:
        Add extra args to existing list of CLI args.
            name_: Arg name to add.
            value_: Arg value to add, skipped if None.
            force_: Add the argument even if it's present in the current list of args.

            List containing the new args and (optionally) its value or an empty list
                if no values to be added.
        if not force_ and name_ in split_user_args:
            return []
        return [name_] if value_ is None else [name_, value_]

    if mount_volumes:
        if not (host_root and container_root):
            raise KedroCliError("Both `host_root` and `container_root` must "
                                "be specified in `compose_docker_run_args` "
                                "call if `mount_volumes` are provided.")
        vol_gen = _list_docker_volumes(host_root, container_root,
        combined_args = list(chain.from_iterable(vol_gen))
        combined_args = []
    for arg_name, arg_value in required_args:
        combined_args += _add_args(arg_name, arg_value, True)
    for arg_name, arg_value in optional_args:
        combined_args += _add_args(arg_name, arg_value)
    return combined_args + user_args
Exemple #9
