def create_data_merge_pipeline(mode: str = 'train', **kwargs): conf_loader = ConfigLoader('conf/local/parameters/') use_feature_names = conf_loader.get( 'data_engineering_parameters.yml')['use_feature_names'] if mode == 'train': return Pipeline([ node(merge_features, inputs=['params:data_primary_key', *use_feature_names], outputs='model_input_features'), node(extract_feature_columns, inputs='model_input_features', outputs='output_columns') ]) elif mode == 'inference': return Pipeline([ node(merge_features, inputs=['params:data_primary_key', *use_feature_names], outputs='tmp_model_input_features'), node(format_features_for_inference, inputs=['tmp_model_input_features', 'output_columns'], outputs='model_input_features') ])
def _create_config_loader( # pylint: disable=no-self-use self, conf_paths: Iterable[str]) -> ConfigLoader: """A factory method for the ConfigLoader instantiation. Returns: Instance of `ConfigLoader`. """ hook_manager = get_hook_manager() config_loader = hook_manager.hook.register_config_loader( # pylint: disable=no-member conf_paths=conf_paths) return config_loader or ConfigLoader( conf_paths) # for backwards compatibility
def test_table_embedding() -> None: conf_loader: ConfigLoader = ConfigLoader( conf_paths=["eos/conf/base", "eos/conf/local"] ) conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**") logging.config.dictConfig(conf_logging) conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**") data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog) conf_params: Dict[str, Any] = conf_loader.get("parameters*", "parameters*/**") data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params)) conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*", "pipelines*/**") ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get("autoencoder_pipeline") runner: SequentialRunner = SequentialRunner() runner.run(pipeline=ae_pipeline, catalog=data_catalog)
def get_config(project_path: str, env: str = None, **kwargs) -> ConfigLoader: """Loads Kedro's configuration at the root of the project. Args: project_path: The root directory of the Kedro project. env: The environment used for loading configuration. kwargs: Ignore any additional arguments added in the future. Returns: ConfigLoader which can be queried to access the project config. """ project_path = Path(project_path) env = env or DEFAULT_RUN_ENV conf_paths = [ str(project_path / CONF_ROOT / "base"), str(project_path / CONF_ROOT / env), ] return ConfigLoader(conf_paths)
def test_viz() -> None: dir_static_site: str = "./public" # Configure pipeline and catalog objects conf_loader: ConfigLoader = ConfigLoader( conf_paths=["eos/conf/base", "eos/conf/local"]) conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**") logging.config.dictConfig(conf_logging) conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**") data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog) conf_params: Dict[str, Any] = conf_loader.get("parameters*", "parameters*/**") data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params)) conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*", "pipelines*/**") ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get( "autoencoder_pipeline") nx_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get( "networkx_pipeline") dgl_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get( "dgl_pipeline") pipelines: Dict[str, FlexiblePipeline] = { "autoencoder_pipeline": ae_pipeline, "networkx_pipeline": nx_pipeline, "dgl_pipeline": dgl_pipeline, "master_pipeline": ae_pipeline + nx_pipeline + dgl_pipeline, } # Parse Python object information into JSON form and export to local call_viz(dir_static_site=dir_static_site, catalog=data_catalog, pipelines=pipelines) # Serve the static website from local # run_static_server(directory = dir_static_site, port = 4141) assert Path(dir_static_site).joinpath("pipeline.json")
def test_overlapping_patterns(self, tmp_path, caplog): """Check that same configuration file is not loaded more than once.""" paths = [ str(tmp_path / "base"), str(tmp_path / "dev"), str(tmp_path / "dev" / "user1"), ] _write_yaml(tmp_path / "base" / "catalog0.yml", { "env": "base", "common": "common" }) _write_yaml(tmp_path / "dev" / "catalog1.yml", { "env": "dev", "dev_specific": "wiz" }) _write_yaml(tmp_path / "dev" / "user1" / "catalog2.yml", {"user1_c2": True}) _write_yaml(tmp_path / "dev" / "user1" / "catalog3.yml", {"user1_c3": True}) catalog = ConfigLoader(paths).get("catalog*", "catalog*/**", "user1/catalog2*") expected_catalog = { "env": "dev", "common": "common", "dev_specific": "wiz", "user1_c2": True, "user1_c3": True, } assert catalog == expected_catalog log_messages = [record.getMessage() for record in caplog.records] expected_path = (tmp_path / "dev" / "user1" / "catalog2.yml").resolve() expected_message = ( f"Config file(s): {expected_path} already processed, skipping loading..." ) assert expected_message in log_messages
def test_load_base_config(self, tmp_path, conf_paths, base_config): """Test config loading if `local/` directory is empty""" (tmp_path / "local").mkdir(exist_ok=True) catalog = ConfigLoader(conf_paths).get("catalog*.yml") assert catalog == base_config
import pandas as pd import logging import numpy as np from one_two_trip.crossval.crossval import CV_score from kedro.config import ConfigLoader conf_paths = ['conf/base', 'conf/local'] conf_loader = ConfigLoader(conf_paths) conf_credentials = conf_loader.get('credentials*', 'credentials*/**') conf_parameters = conf_loader.get('parameters*', 'parameters*/**') conf_catalog = conf_loader.get('catalog*', 'catalog*/**') cols_target = conf_parameters['model']['cols_target'] col_id = conf_parameters['model']['col_id'] col_client = conf_parameters['model']['col_client'] cols_cat = conf_parameters['model']['cols_cat'] cv_byclient = conf_parameters['model']['cv_byclient'] n_splits = conf_parameters['model']['n_splits'] n_repeats = conf_parameters['model']['n_repeats'] params = conf_parameters['lightgbm']['params'] def union_data_node(train: pd.DataFrame, test: pd.DataFrame) -> pd.DataFrame: df_union = pd.concat([train, test], sort=False, ignore_index=True) return df_union def mean_byuser_node(df_union: pd.DataFrame) -> pd.DataFrame: cols = list(
def __init__(self): conf_paths = ["conf/base", "conf/local"] conf_loader = ConfigLoader(conf_paths) self.credentials = conf_loader.get("credentials*", "credentials*/**")
def register_config_loader(self, conf_paths): # pylint: disable=no-self-use return ConfigLoader(conf_paths)
def config_loader(): return ConfigLoader(conf_source=str(Path.cwd()))
def register_config_loader(self, conf_paths) -> ConfigLoader: return ConfigLoader(conf_paths)
def register_config_loader(self, conf_paths: Iterable[str]) -> ConfigLoader: self.logger.info("Registering config loader", extra={"conf_paths": conf_paths}) return ConfigLoader(conf_paths)
import logging from typing import Any, Dict import numpy as np import pandas as pd import discord from discord.ext import commands import datetime from urllib import parse, request import re from kedro.config import ConfigLoader conf_paths = ["conf/base", "conf/local"] conf_loader = ConfigLoader(conf_paths) credentials = conf_loader.get("credentials*", "credentials*/**") def run_bot_odesla(): bot = commands.Bot(command_prefix='!', description="Bot de ODESLA") @bot.command() async def info(ctx): embed = discord.Embed(title=f"{ctx.guild.name}", descripcion='Test', timestamp=datetime.datetime.utcnow(), color=discord.Color.blue()) embed.add_field(name="1", value=f"{ctx.guild.created_at}") embed.add_field(name="2", value=f"{ctx.guild.created_at}")
import matplotlib.pyplot as plt from kedro.config import ConfigLoader from kedro.io import DataCatalog from kedro.pipeline import Pipeline, node from kedro.runner import SequentialRunner # Find the configuration (catalog.yaml) in the current working directory and load it conf_loader = ConfigLoader(".") conf_catalog = conf_loader.get("catalog*") # Create the Data Catalog from the catalog.yml file io = DataCatalog.from_config(conf_catalog) df = io.load("titanic_training_data") # Create nodes by writing Python functions # Remove NaN values def clean_raw_data(df): df = df.drop(["Ticket", "Cabin"], axis=1) df = df.dropna() return df # Plot the amount of people who survived and who died. def plot_survival_breakdown(df): plt.figure(figsize=(6, 4)) fig, ax = plt.subplots() df.Survived.value_counts().plot(kind="barh", color="blue", alpha=0.65) ax.set_ylim(-1, len(df.Survived.value_counts())) plt.title("Survival Breakdown (1 = Survived, 0 = Died)") return fig
def test_nested_subdirs(self, tmp_path): """Test loading the config from subdirectories""" catalog = ConfigLoader(str(tmp_path / "base")).get("**/catalog*") assert (catalog["cars"]["type"] == catalog["prod"]["cars"]["type"] == "CSVLocalDataSet") assert catalog["cars"]["save_args"]["index"] is True
def get_model_params() -> Dict[str, Any]: conf = ConfigLoader(MODEL_PARAMETERS_PATH).get('*.yaml') return conf['best_evaluated_params']
def __get_creds(): conf_paths = ["conf/base", "conf/local"] conf_loader = ConfigLoader(conf_paths) credentials = conf_loader.get("credentials*", "credentials*/**") return credentials
from kedro.config import ConfigLoader from typing import Dict conf_paths = ["conf/base", "conf/local"] conf_loader = ConfigLoader(conf_paths) conf_catalog = conf_loader.get("catalog*", "catalog*/**") def fetch_catalog() -> Dict[str, Dict]: """Fetch the catalog dict Returns: Dict[str, Dict]: catalog dict """ return conf_catalog def print_catalog_path(conf_catalog: Dict[str, Dict]): """Print the catalog entries' path Args: conf_catalog (Dict[str, Dict]): catalog dict """ print("Catalog entry path:") for dataset in conf_catalog.keys(): print(f'{dataset} -- {conf_catalog[dataset]["filepath"]}')
def register_config_loader(self, conf_paths: Iterable[str]) -> ConfigLoader: import kedro.config.config kedro.config.config._load_config = _load_config return ConfigLoader(conf_paths)
def test_empty_patterns(self, conf_paths): """Check the error if no config patterns were specified""" pattern = (r"`patterns` must contain at least one glob pattern " r"to match config filenames against") with pytest.raises(ValueError, match=pattern): ConfigLoader(conf_paths).get()
def kedro_conf(mock_config): config = ConfigLoader(["conf/base", "conf/local"]) config_dict = config.get("catalog*", "catalog*/**") return config_dict
def valid_rf(race_results_df_processed_valid, model_rf, parameters): # mlflow print('FILE_DIR: ' + FILE_DIR) mlflow.set_tracking_uri(FILE_DIR + '/../../../logs/mlruns/') mlflow.set_experiment('forecast_keiba_valid') run_info = mlflow.start_run() mlflow.set_tag('model', 'lr') # 検証のデータ準備 race_results_df_processed_valid = race_results_df_processed_valid # 説明変数の取得 X_valid = race_results_df_processed_valid.drop(['rank'], axis=1) # 目的変数の取得 y_valid = race_results_df_processed_valid['rank'] # 推論実行 y_valid_pred = model_rf.predict(X_valid) # 集計用に処理 valid_results_df = pd.DataFrame({'pred': y_valid_pred, 'actual': y_valid}) race_id_list = list(set(list(valid_results_df.index))) valid_results_list = valid_results_df.reset_index().values.tolist() # シャッフル random.shuffle(valid_results_list) # 集計(馬単) correct_count = 0 for race_id in race_id_list: pred_cnt_by_race = 0 cnt_by_race = 0 for rank in [1]: for i in range(len(valid_results_list)): # 対象レースidのうち、{rank}位と予測された馬 if valid_results_list[i][0] == race_id and valid_results_list[ i][1] == rank: pred_cnt_by_race += 1 if pred_cnt_by_race <= 1 and (valid_results_list[i][2] == 1): cnt_by_race += 1 if cnt_by_race == 1: correct_count += 1 acc_exacta_1 = correct_count / 100 print('acc_exacta_1: ' + str(acc_exacta_1)) # 集計(馬連) correct_count = 0 for race_id in race_id_list: pred_cnt_by_race = 0 cnt_by_race = 0 for rank in [1, 2]: for i in range(len(valid_results_list)): # 対象レースidのうち、{rank}位と予測された馬 if valid_results_list[i][0] == race_id and valid_results_list[ i][1] == rank: pred_cnt_by_race += 1 if pred_cnt_by_race <= 2 and (valid_results_list[i][2] == 1 or valid_results_list[i][2] == 2): cnt_by_race += 1 if cnt_by_race == 2: correct_count += 1 acc_quinella_2 = correct_count / 100 print('acc_quinella_2: ' + str(acc_quinella_2)) # 集計(三連複) correct_count = 0 for race_id in race_id_list: pred_cnt_by_race = 0 cnt_by_race = 0 for rank in [1, 2, 3]: for i in range(len(valid_results_list)): # 対象レースidのうち、{rank}位と予測された馬 if valid_results_list[i][0] == race_id and valid_results_list[ i][1] == rank: pred_cnt_by_race += 1 if pred_cnt_by_race <= 3 and ( valid_results_list[i][2] == 1 or valid_results_list[i][2] == 2 or valid_results_list[i][2] == 3): cnt_by_race += 1 if cnt_by_race == 3: correct_count += 1 acc_trio_3 = correct_count / 100 print('acc_trio_3: ' + str(acc_trio_3)) mlflow.log_metric("acc_exacta_1", acc_exacta_1) mlflow.log_metric("acc_quinella_2", acc_quinella_2) mlflow.log_metric("acc_trio_3", acc_trio_3) # 通知 if parameters['is_notify']: run_result_dict = mlflow.get_run(run_info.info.run_id).to_dictionary() run_result_str = json.dumps(run_result_dict, indent=4) conf_paths = [ FILE_DIR + "/../../../conf/base", FILE_DIR + "/../../../conf/local" ] conf_loader = ConfigLoader(conf_paths) credentials = conf_loader.get("credentials*", "credentials*/**") token = credentials['dev_line']['access_token'] url = "https://notify-api.line.me/api/notify" headers = {"Authorization": "Bearer " + token} payload = {"message": "model_rf" + run_result_str} requests.post(url, headers=headers, data=payload) mlflow.end_run()
def kedro_conf_path() -> dict: config = ConfigLoader(["conf/base", "conf/local"]) conf_catalog = config.get("catalog*", "catalog*/**") return conf_catalog
from kedro.config import ConfigLoader # CONFIG conf_paths = ['conf/base'] conf_loader = ConfigLoader(conf_paths) config = conf_loader.get('credentials*', 'credentials*/**') # DROP TABLES staging_events_table_drop = "DROP TABLE IF EXISTS staging_events;" staging_songs_table_drop = "DROP TABLE IF EXISTS staging_songs;" songplay_table_drop = "DROP TABLE IF EXISTS songplays;" user_table_drop = "DROP TABLE IF EXISTS users;" song_table_drop = "DROP TABLE IF EXISTS songs;" artist_table_drop = "DROP TABLE IF EXISTS artists;" time_table_drop = "DROP TABLE IF EXISTS time;" # CREATE TABLES staging_events_table_create = (""" CREATE TABLE staging_events ( artist VARCHAR(200), auth VARCHAR(50), firstName VARCHAR(200), gender CHAR(1), itemInSession INTEGER, lastname VARCHAR(50), length NUMERIC(10,5), level VARCHAR(10), location VARCHAR(50), method VARCHAR(10),
import lightgbm as lgb import numpy as np import pandas as pd import shap import datetime import matplotlib.pyplot as plt import seaborn as sns import logging from hyperopt import STATUS_OK from digital_reputation_challenge.nodes.datatransform import get_fold_data from kedro.config import ConfigLoader conf_paths = ['conf/base', 'conf/local'] conf_loader = ConfigLoader(conf_paths) conf_parameters = conf_loader.get('parameters*', 'parameters*/**') from sklearn.model_selection import StratifiedKFold, KFold, RepeatedStratifiedKFold class CV_score: def __init__(self, params, cols_all, col_target, cols_cat='auto', num_boost_round=99999, early_stopping_rounds=50, valid=True): self.params = params self.cols_all = cols_all
def register_config_loader( self, conf_paths: Iterable[str], env: str, extra_params: Dict[str, Any] ) -> ConfigLoader: return ConfigLoader(conf_paths)
def config_loader(): return ConfigLoader(conf_source=str(Path.cwd() / settings.CONF_SOURCE))
def register_config_loader(self, conf_paths: Iterable[str]) -> ConfigLoader: return ConfigLoader(conf_paths)
# AUTOGENERATED! DO NOT EDIT! File to edit: catalog.ipynb (unless otherwise specified). __all__ = ['conf_loader', 'conf_test_data_catalog', 'test_data_catalog'] # Cell from kedro.config import ConfigLoader from kedro.io import DataCatalog # Cell conf_loader = ConfigLoader("conf/base") conf_test_data_catalog = conf_loader.get("catalog*.yaml", "catalog*/*.yaml") test_data_catalog = DataCatalog.from_config(conf_test_data_catalog)