def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        params = self.params
        import_modules(params.pop("IMPORT", []))

        params = HatchDict(params).get()
        self._register_kedro_hooks(params.pop("HOOKS", None) or [])
        self._kedro_pipelines = params.pop("PIPELINES", None)
        self._kedro_run_config = params.pop("RUN_CONFIG", None) or {}
        self._params = params
Ejemplo n.º 2
0
def test_networkx_dataset_e():
    # Test-specific parameter definitions
    path_catalog_yml: str = "tests/data/catalog.yml"
    catalog: dict = yaml.safe_load(open(path_catalog_yml, "r"))
    input_key: str = "test_networkx_dataset_e"

    # NetworkX Data access operations
    graph_dataset: NetworkXDataSetE = HatchDict(
        catalog[input_key]).get("nx_dataset")
    graph_dataset.save(G)
    G_reloaded = graph_dataset.load()
    assert G_reloaded
Ejemplo n.º 3
0
def test_e2e() -> None:
    conf_loader: ConfigLoader = ConfigLoader(
        conf_paths=["eos/conf/base", "eos/conf/local"]
    )

    conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**")
    logging.config.dictConfig(conf_logging)

    conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**")
    data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog)

    conf_params: Dict[str, Any] = conf_loader.get("parameters*", "parameters*/**")
    data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params))

    conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*", "pipelines*/**")
    ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get("autoencoder_pipeline")
    nx_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get("networkx_pipeline")
    dgl_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get("dgl_pipeline")

    runner: SequentialRunner = SequentialRunner()
    runner.run(pipeline=ae_pipeline + nx_pipeline + dgl_pipeline, catalog=data_catalog)
Ejemplo n.º 4
0
    def _format_kedro_catalog(self, conf_catalog):

        conf_catalog = HatchDict(conf_catalog).get()

        default_dict = {}
        if "/" in conf_catalog:
            default_dict = conf_catalog.pop("/")

        if "PIPELINE_JSON_TEXT" in conf_catalog:
            pipeline_json_text_dataset = conf_catalog.pop("PIPELINE_JSON_TEXT")
            assert isinstance(pipeline_json_text_dataset, dict)
            pipeline_json_text_dataset.setdefault(
                "type", "kedro.extras.datasets.text.TextDataSet"
            )
            self._pipeline_json_text_dataset = HatchDict(
                pipeline_json_text_dataset, obj_key="type"
            ).get()

        conf_catalog_processed = {}

        for ds_name, ds_dict_ in conf_catalog.items():
            ds_dict = copy.deepcopy(default_dict)
            if isinstance(ds_dict_, dict):
                ds_dict.update(ds_dict_)
            _check_type(ds_dict)
            ds_name, ds_dict = self._format_kedro_dataset(ds_name, ds_dict)
            conf_catalog_processed[ds_name] = ds_dict
        return conf_catalog_processed
Ejemplo n.º 5
0
def test_viz() -> None:
    dir_static_site: str = "./public"
    # Configure pipeline and catalog objects
    conf_loader: ConfigLoader = ConfigLoader(
        conf_paths=["eos/conf/base", "eos/conf/local"])

    conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**")
    logging.config.dictConfig(conf_logging)

    conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**")
    data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog)

    conf_params: Dict[str, Any] = conf_loader.get("parameters*",
                                                  "parameters*/**")
    data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params))

    conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*",
                                                    "pipelines*/**")
    ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "autoencoder_pipeline")
    nx_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "networkx_pipeline")
    dgl_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "dgl_pipeline")

    pipelines: Dict[str, FlexiblePipeline] = {
        "autoencoder_pipeline": ae_pipeline,
        "networkx_pipeline": nx_pipeline,
        "dgl_pipeline": dgl_pipeline,
        "master_pipeline": ae_pipeline + nx_pipeline + dgl_pipeline,
    }
    # Parse Python object information into JSON form and export to local
    call_viz(dir_static_site=dir_static_site,
             catalog=data_catalog,
             pipelines=pipelines)
    # Serve the static website from local
    # run_static_server(directory = dir_static_site, port = 4141)
    assert Path(dir_static_site).joinpath("pipeline.json")
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        params = self.params
        import_modules(params.pop("IMPORT", []))

        params = HatchDict(params).get()
        hooks = params.pop("HOOKS", None)
        if hooks:
            if kedro.__version__.startswith("0.16."):
                self._hooks_in_params_read = False
                self._hook_manager = getattr(self, "_hook_manager",
                                             get_hook_manager())
                self._register_kedro_hooks(hooks)
            else:
                log.warning(
                    "HOOKS defined in config file is ignored as the installed kedro version is not 0.16.x"
                )
        self._kedro_pipelines = params.pop("PIPELINES", None)
        self._kedro_run_config = params.pop("RUN_CONFIG", None) or {}
        self._params = params
        log.info("RUN_CONFIG: \n{}".format(self._kedro_run_config))
        log.debug("PIPELINES: \n{}".format(self._kedro_pipelines))
        log.debug("params: \n{}".format(self._params))
Ejemplo n.º 7
0
def test_dgl_dataset() -> None:
    # Test-specific parameter definitions
    path_catalog_yml: str = "tests/data/catalog.yml"
    catalog: dict = yaml.safe_load(open(path_catalog_yml, "r"))
    input_key: str = "test_dgl_dataset"

    # DGL Data access operations
    graph_dataset: DGLDataSet = HatchDict(
        catalog[input_key]).get("dgl_dataset")
    graph_dataset.save(G)
    glist, label_dict = graph_dataset.load()
    G_reloaded = glist[0]

    assert G_reloaded
Ejemplo n.º 8
0
def test_dock() -> None:
    input_key: str = "test_dock"
    catalog: dict = yaml.safe_load(open(path_catalog_yml, "r"))
    csv_dataset: CSVDataSet = HatchDict(catalog[input_key]).get("csv_dataset")
    csv_data: DataFrame = csv_dataset.load()
    assert isinstance(csv_data, DataFrame)
Ejemplo n.º 9
0
 def params(self) -> Dict[str, Any]:
     params = super().params
     params = HatchDict(params).get()
     return params