def describe_pipeline(metadata: ProjectMetadata, name, **kwargs): # pylint: disable=unused-argument, protected-access """Describe a pipeline by providing a pipeline name. Defaults to the __default__ pipeline. (DEPRECATED) """ deprecation_message = ( "DeprecationWarning: Command `kedro pipeline describe` is deprecated. " "Please use `kedro registry describe` instead.") click.secho(deprecation_message, fg="red") pipeline_obj = pipelines.get(name) if not pipeline_obj: all_pipeline_names = pipelines.keys() existing_pipelines = ", ".join(sorted(all_pipeline_names)) raise KedroCliError( f"`{name}` pipeline not found. Existing pipelines: [{existing_pipelines}]" ) nodes = [] for node in pipeline_obj.nodes: namespace = f"{node.namespace}." if node.namespace else "" nodes.append( f"{namespace}{node._name or node._func_name} ({node._func_name})") result = {"Nodes": nodes} click.echo(yaml.dump(result))
def list_datasets(metadata: ProjectMetadata, pipeline, env): """Show datasets per type.""" title = "DataSets in '{}' pipeline" not_mentioned = "Datasets not mentioned in pipeline" mentioned = "Datasets mentioned in pipeline" session = _create_session(metadata.package_name, env=env) context = session.load_context() datasets_meta = context.catalog._data_sets # pylint: disable=protected-access catalog_ds = set(context.catalog.list()) target_pipelines = pipeline or pipelines.keys() result = {} for pipe in target_pipelines: pl_obj = pipelines.get(pipe) if pl_obj: pipeline_ds = pl_obj.data_sets() else: existing_pls = ", ".join(sorted(pipelines.keys())) raise KedroCliError( f"`{pipe}` pipeline not found! Existing pipelines: {existing_pls}" ) unused_ds = catalog_ds - pipeline_ds default_ds = pipeline_ds - catalog_ds used_ds = catalog_ds - unused_ds unused_by_type = _map_type_to_datasets(unused_ds, datasets_meta) used_by_type = _map_type_to_datasets(used_ds, datasets_meta) if default_ds: used_by_type["DefaultDataSet"].extend(default_ds) data = ((not_mentioned, dict(unused_by_type)), (mentioned, dict(used_by_type))) result[title.format(pipe)] = { key: value for key, value in data if value } secho(yaml.dump(result))
def describe_pipeline(metadata: ProjectMetadata, name, **kwargs): # pylint: disable=unused-argument """Describe a pipeline by providing a pipeline name.""" pipeline_obj = pipelines.get(name) if not pipeline_obj: all_pipeline_names = pipelines.keys() existing_pipelines = ", ".join(sorted(all_pipeline_names)) raise KedroCliError( f"`{name}` pipeline not found. Existing pipelines: [{existing_pipelines}]" ) result = { "Nodes": [ f"{node.short_name} ({node._func_name})" # pylint: disable=protected-access for node in pipeline_obj.nodes ] } click.echo(yaml.dump(result))
def create_catalog(metadata: ProjectMetadata, pipeline_name, env): """Create Data Catalog YAML configuration with missing datasets. Add `MemoryDataSet` datasets to Data Catalog YAML configuration file for each dataset in a registered pipeline if it is missing from the `DataCatalog`. The catalog configuration will be saved to `<conf_root>/<env>/catalog/<pipeline_name>.yml` file. """ env = env or "base" session = _create_session(metadata.package_name, env=env) context = session.load_context() pipeline = pipelines.get(pipeline_name) if not pipeline: existing_pipelines = ", ".join(sorted(pipelines.keys())) raise KedroCliError( f"`{pipeline_name}` pipeline not found! Existing pipelines: {existing_pipelines}" ) pipe_datasets = { ds_name for ds_name in pipeline.data_sets() if not ds_name.startswith("params:") and ds_name != "parameters" } catalog_datasets = { ds_name for ds_name in context.catalog._data_sets.keys() # pylint: disable=protected-access if not ds_name.startswith("params:") and ds_name != "parameters" } # Datasets that are missing in Data Catalog missing_ds = sorted(pipe_datasets - catalog_datasets) if missing_ds: catalog_path = (context.project_path / settings.CONF_ROOT / env / "catalog" / f"{pipeline_name}.yml") _add_missing_datasets_to_catalog(missing_ds, catalog_path) click.echo( f"Data Catalog YAML configuration was created: {catalog_path}") else: click.echo("All datasets are already configured.")
def describe_registered_pipeline(metadata: ProjectMetadata, name, **kwargs): # pylint: disable=unused-argument, protected-access """Describe a registered pipeline by providing a pipeline name. Defaults to the `__default__` pipeline. """ pipeline_obj = pipelines.get(name) if not pipeline_obj: all_pipeline_names = pipelines.keys() existing_pipelines = ", ".join(sorted(all_pipeline_names)) raise KedroCliError( f"`{name}` pipeline not found. Existing pipelines: [{existing_pipelines}]" ) nodes = [] for node in pipeline_obj.nodes: namespace = f"{node.namespace}." if node.namespace else "" nodes.append( f"{namespace}{node._name or node._func_name} ({node._func_name})") result = {"Nodes": nodes} click.echo(yaml.dump(result))