def _create_project( config_path: str, template_path: Path = TEMPLATE_PATH, checkout: str = None, directory: str = None, ): """Implementation of the kedro new cli command. Args: config_path: In non-interactive mode, the path of the config.yml which should contain the project_name, output_dir and repo_name. template_path: The path to the cookiecutter template to create the project. It could either be a local directory or a remote VCS repository supported by cookiecutter. For more details, please see: https://cookiecutter.readthedocs.io/en/latest/usage.html#generate-your-project checkout: The tag, branch or commit in the starter repository to checkout. Maps directly to cookiecutter's --checkout argument. If the value is not provided, cookiecutter will use the installed Kedro version by default. directory: The directory of a specific starter inside a repository containing multiple starters. Map directly to cookiecutter's --directory argument. https://cookiecutter.readthedocs.io/en/1.7.2/advanced/directories.html Raises: KedroCliError: If it fails to generate a project. """ with _filter_deprecation_warnings(): # pylint: disable=import-outside-toplevel from cookiecutter.exceptions import RepositoryCloneFailed, RepositoryNotFound from cookiecutter.main import cookiecutter # for performance reasons try: if config_path: config = _parse_config(config_path) config = _check_config_ok(config_path, config) else: config = _get_config_from_prompts() config.setdefault("kedro_version", version) checkout = checkout or version cookiecutter_args = dict( output_dir=config["output_dir"], no_input=True, extra_context=config, checkout=checkout, ) if directory: cookiecutter_args["directory"] = directory result_path = Path(cookiecutter(str(template_path), **cookiecutter_args)) _clean_pycache(result_path) _print_kedro_new_success_message(result_path) except click.exceptions.Abort as exc: # pragma: no cover raise KedroCliError("User interrupt.") from exc except RepositoryNotFound as exc: raise KedroCliError( f"Kedro project template not found at {template_path}" ) from exc except RepositoryCloneFailed as exc: error_message = ( f"Kedro project template not found at {template_path} with tag {checkout}." ) tags = _get_available_tags(str(template_path).replace("git+", "")) if tags: error_message += ( f" The following tags are available: {', '.join(tags.__iter__())}" ) raise KedroCliError(error_message) from exc # we don't want the user to see a stack trace on the cli except Exception as exc: raise KedroCliError("Failed to generate project.") from exc
def _validate_dir(path: Path) -> None: if not path.is_dir(): raise KedroCliError(f"Directory '{path}' doesn't exist.") if not list(path.iterdir()): raise KedroCliError(f"'{path}' is an empty directory.")
def convert_notebook( # pylint: disable=unused-argument,too-many-locals all_flag, overwrite_flag, filepath, env): """Convert selected or all notebooks found in a Kedro project to Kedro code, by exporting code from the appropriately-tagged cells: Cells tagged as `node` will be copied over to a Python file matching the name of the notebook, under `<source_dir>/<package_name>/nodes`. *Note*: Make sure your notebooks have unique names! FILEPATH: Path(s) to exact notebook file(s) to be converted. Both relative and absolute paths are accepted. Should not be provided if --all flag is already present. """ project_path = Path.cwd() static_data = get_static_project_data(project_path) source_path = static_data["source_dir"] package_name = (static_data.get("package_name") or _load_project_context().package_name) _update_ipython_dir(project_path) if not filepath and not all_flag: secho("Please specify a notebook filepath " "or add '--all' to convert all notebooks.") sys.exit(1) if all_flag: # pathlib glob does not ignore hidden directories, # whereas Python glob does, which is more useful in # ensuring checkpoints will not be included pattern = project_path / "**" / "*.ipynb" notebooks = sorted( Path(p) for p in iglob(str(pattern), recursive=True)) else: notebooks = [Path(f) for f in filepath] counter = Counter(n.stem for n in notebooks) non_unique_names = [name for name, counts in counter.items() if counts > 1] if non_unique_names: names = ", ".join(non_unique_names) raise KedroCliError( f"Found non-unique notebook names! Please rename the following: {names}" ) output_dir = source_path / package_name / "nodes" if not output_dir.is_dir(): output_dir.mkdir() (output_dir / "__init__.py").touch() for notebook in notebooks: secho(f"Converting notebook '{notebook}'...") output_path = output_dir / f"{notebook.stem}.py" if output_path.is_file(): overwrite = overwrite_flag or click.confirm( f"Output file {output_path} already exists. Overwrite?", default=False) if overwrite: _export_nodes(notebook, output_path) else: _export_nodes(notebook, output_path) secho("Done!", color="green")
def new( config, starter_name, checkout, directory, **kwargs ): # pylint: disable=unused-argument """Create a new kedro project, either interactively or from a configuration file. Create projects according to the Kedro default project template. This template is ideal for analytics projects and comes with a data architecture, folders for notebooks, configuration, source code, etc. \b ``kedro new`` Create a new project interactively. \b You will have to provide four choices: * ``Project Name`` - name of the project, not to be confused with name of the project folder. * ``Repository Name`` - intended name of your project folder. * ``Package Name`` - intended name of your Python package. * ``Generate Example Pipeline`` - yes/no to generating an example pipeline in your project. \b ``kedro new --config <config.yml>`` ``kedro new -c <config.yml>`` Create a new project from configuration. * ``config.yml`` - The configuration YAML must contain at the top level the above parameters (project_name, repo_name, python_package) and output_dir - the parent directory for the new project directory. \b ``kedro new --starter <starter>`` Create a new project from a starter template. The starter can be either the path to a local directory, a URL to a remote VCS repository supported by `cookiecutter` or one of the aliases listed in ``kedro starter list``. \b ``kedro new --starter <starter> --checkout <checkout>`` Create a new project from a starter template and a particular tag, branch or commit in the starter repository. \b ``kedro new --starter <starter> --directory <directory>`` Create a new project from a starter repository and a directory within the location. Useful when you have multiple starters in the same repository. """ if checkout and not starter_name: raise KedroCliError("Cannot use the --checkout flag without a --starter value.") if directory and not starter_name: raise KedroCliError( "Cannot use the --directory flag without a --starter value." ) if starter_name in _STARTER_ALIASES: if directory: raise KedroCliError( "Cannot use the --directory flag with a --starter alias." ) template_path = _STARTERS_REPO directory = starter_name elif starter_name is not None: template_path = starter_name else: template_path = TEMPLATE_PATH _create_project( config_path=config, template_path=template_path, checkout=checkout, directory=directory, )
def _port_callback(ctx, param, value): # pylint: disable=unused-argument if is_port_in_use(value): raise KedroCliError(f"Port {value} is already in use on the host. " f"Please specify an alternative port number.") return value
def _assert_include_example_ok(include_example): if not isinstance(include_example, bool): message = ( "`{}` value for `include_example` is invalid. It must be a boolean value " "True or False.".format(include_example)) raise KedroCliError(message)
def _assert_repo_name_ok(repo_name): if not re.match(r"^\w+(-*\w+)*$", repo_name): message = ("`{}` is not a valid repository name. It must contain " "only word symbols and/or hyphens, must also start and " "end with alphanumeric symbol.".format(repo_name)) raise KedroCliError(message)
def compose_docker_run_args( host_root: str = None, container_root: str = None, mount_volumes: Sequence[str] = None, required_args: Sequence[Tuple[str, Union[str, None]]] = None, optional_args: Sequence[Tuple[str, Union[str, None]]] = None, user_args: Sequence[str] = None, ) -> List[str]: """ Make a list of arguments for the docker command. Args: host_root: Path project root on the host. It must be provided if `mount_volumes` are specified, optional otherwise. container_root: Path to project root in the container (e.g., `/home/kedro/<repo_name>`). It must be provided if `mount_volumes` are specified, optional otherwise. mount_volumes: List of volumes to be mounted. required_args: List of required arguments. optional_args: List of optional arguments, these will be added if only not present in `user_args` list. user_args: List of arguments already specified by the user. Raises: KedroCliError: If `mount_volumes` are provided but either `host_root` or `container_root` are missing. Returns: List of arguments for the docker command. """ mount_volumes = mount_volumes or [] required_args = required_args or [] optional_args = optional_args or [] user_args = user_args or [] split_user_args = {ua.split("=", 1)[0] for ua in user_args} def _add_args(name_: str, value_: str = None, force_: bool = False) -> List[str]: """ Add extra args to existing list of CLI args. Args: name_: Arg name to add. value_: Arg value to add, skipped if None. force_: Add the argument even if it's present in the current list of args. Returns: List containing the new args and (optionally) its value or an empty list if no values to be added. """ if not force_ and name_ in split_user_args: return [] return [name_] if value_ is None else [name_, value_] if mount_volumes: if not (host_root and container_root): raise KedroCliError("Both `host_root` and `container_root` must " "be specified in `compose_docker_run_args` " "call if `mount_volumes` are provided.") vol_gen = _list_docker_volumes(host_root, container_root, mount_volumes) combined_args = list(chain.from_iterable(vol_gen)) else: combined_args = [] for arg_name, arg_value in required_args: combined_args += _add_args(arg_name, arg_value, True) for arg_name, arg_value in optional_args: combined_args += _add_args(arg_name, arg_value) return combined_args + user_args
def new(config, starter_name, checkout): """Create a new kedro project, either interactively or from a configuration file. Create projects according to the Kedro default project template. This template is ideal for analytics projects and comes with a data architecture, folders for notebooks, configuration, source code, etc. \b ``kedro new`` Create a new project interactively. \b You will have to provide four choices: * ``Project Name`` - name of the project, not to be confused with name of the project folder. * ``Repository Name`` - intended name of your project folder. * ``Package Name`` - intended name of your Python package. * ``Generate Example Pipeline`` - yes/no to generating an example pipeline in your project. \b ``kedro new --config <config.yml>`` ``kedro new -c <config.yml>`` Create a new project from configuration. * ``config.yml`` - The configuration YAML must contain at the top level the above parameters (project_name, repo_name, python_package, include_example) and output_dir - the parent directory for the new project directory. \b ``kedro new --starter <starter>`` Create a new project from a starter template. The starter can be either the path to a local directory, a URL to a remote VCS repository supported by `cookiecutter` or one of the aliases listed in ``kedro starter list``. \b ``kedro new --starter <starter> --checkout <checkout>`` Create a new project from a starter template and a particular tag, branch or commit in the starter repository. """ if checkout and not starter_name: raise KedroCliError( "Cannot use the --checkout flag without a --starter value.") if starter_name: template_path = _STARTER_ALIASES.get(starter_name, starter_name) should_prompt_for_example = False else: template_path = TEMPLATE_PATH should_prompt_for_example = True _create_project( config_path=config, verbose=_VERBOSE, template_path=template_path, should_prompt_for_example=should_prompt_for_example, checkout=checkout, )