def main( tags: Iterable[str] = None, env: str = None, runner: str = None, ): """Application main entry point. Args: tags: An optional list of node tags which should be used to filter the nodes of the ``Pipeline``. If specified, only the nodes containing *any* of these tags will be added to the ``Pipeline``. env: An optional parameter specifying the environment in which the ``Pipeline`` should be run. If not specified defaults to "local". runner: An optional parameter specifying the runner that you want to run the pipeline with. Raises: KedroCliError: If the resulting ``Pipeline`` is empty. """ # Report project name logging.info("** Kedro project {}".format(Path.cwd().name)) # Load Catalog conf = get_config(project_path=str(Path.cwd()), env=env) catalog = create_catalog(config=conf) # Load the pipeline pipeline = create_pipeline() pipeline = pipeline.only_nodes_with_tags(*tags) if tags else pipeline if not pipeline.nodes: if tags: raise KedroCliError("Pipeline contains no nodes with tags: " + str(tags)) raise KedroCliError("Pipeline contains no nodes") # Load the runner # When either --parallel or --runner is used, class_obj is assigned to runner runner = load_obj(runner, "kedro.runner") if runner else SequentialRunner # Initialise SparkSession spark = init_spark_session() # Run the runner #runner().run(pipeline, catalog) # Run the pipeline #io.add_feed_dict({'parameters': parameters}, replace=True) SequentialRunner().run(pipeline, catalog)
def pipeline(self) -> Pipeline: return create_pipeline()