Example #1
0
def create_data_merge_pipeline(mode: str = 'train', **kwargs):
    conf_loader = ConfigLoader('conf/local/parameters/')
    use_feature_names = conf_loader.get(
        'data_engineering_parameters.yml')['use_feature_names']
    if mode == 'train':
        return Pipeline([
            node(merge_features,
                 inputs=['params:data_primary_key', *use_feature_names],
                 outputs='model_input_features'),
            node(extract_feature_columns,
                 inputs='model_input_features',
                 outputs='output_columns')
        ])
    elif mode == 'inference':
        return Pipeline([
            node(merge_features,
                 inputs=['params:data_primary_key', *use_feature_names],
                 outputs='tmp_model_input_features'),
            node(format_features_for_inference,
                 inputs=['tmp_model_input_features', 'output_columns'],
                 outputs='model_input_features')
        ])
Example #2
0
    def _create_config_loader(  # pylint: disable=no-self-use
            self, conf_paths: Iterable[str]) -> ConfigLoader:
        """A factory method for the ConfigLoader instantiation.

        Returns:
            Instance of `ConfigLoader`.

        """
        hook_manager = get_hook_manager()
        config_loader = hook_manager.hook.register_config_loader(  # pylint: disable=no-member
            conf_paths=conf_paths)
        return config_loader or ConfigLoader(
            conf_paths)  # for backwards compatibility
Example #3
0
def test_table_embedding() -> None:
    conf_loader: ConfigLoader = ConfigLoader(
        conf_paths=["eos/conf/base", "eos/conf/local"]
    )

    conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**")
    logging.config.dictConfig(conf_logging)

    conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**")
    data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog)

    conf_params: Dict[str, Any] = conf_loader.get("parameters*", "parameters*/**")
    data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params))

    conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*", "pipelines*/**")
    ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get("autoencoder_pipeline")

    runner: SequentialRunner = SequentialRunner()
    runner.run(pipeline=ae_pipeline, catalog=data_catalog)
Example #4
0
def get_config(project_path: str, env: str = None, **kwargs) -> ConfigLoader:
    """Loads Kedro's configuration at the root of the project.

    Args:
        project_path: The root directory of the Kedro project.
        env: The environment used for loading configuration.
        kwargs: Ignore any additional arguments added in the future.

    Returns:
        ConfigLoader which can be queried to access the project config.

    """
    project_path = Path(project_path)
    env = env or DEFAULT_RUN_ENV
    conf_paths = [
        str(project_path / CONF_ROOT / "base"),
        str(project_path / CONF_ROOT / env),
    ]
    return ConfigLoader(conf_paths)
Example #5
0
def test_viz() -> None:
    dir_static_site: str = "./public"
    # Configure pipeline and catalog objects
    conf_loader: ConfigLoader = ConfigLoader(
        conf_paths=["eos/conf/base", "eos/conf/local"])

    conf_logging: Dict[str, Any] = conf_loader.get("logging*", "logging*/**")
    logging.config.dictConfig(conf_logging)

    conf_catalog: Dict[str, Any] = conf_loader.get("catalog*", "catalog*/**")
    data_catalog: DataCatalog = DataCatalog.from_config(conf_catalog)

    conf_params: Dict[str, Any] = conf_loader.get("parameters*",
                                                  "parameters*/**")
    data_catalog.add_feed_dict(feed_dict=get_feed_dict(params=conf_params))

    conf_pipeline: Dict[str, Any] = conf_loader.get("pipelines*",
                                                    "pipelines*/**")
    ae_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "autoencoder_pipeline")
    nx_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "networkx_pipeline")
    dgl_pipeline: FlexiblePipeline = HatchDict(conf_pipeline).get(
        "dgl_pipeline")

    pipelines: Dict[str, FlexiblePipeline] = {
        "autoencoder_pipeline": ae_pipeline,
        "networkx_pipeline": nx_pipeline,
        "dgl_pipeline": dgl_pipeline,
        "master_pipeline": ae_pipeline + nx_pipeline + dgl_pipeline,
    }
    # Parse Python object information into JSON form and export to local
    call_viz(dir_static_site=dir_static_site,
             catalog=data_catalog,
             pipelines=pipelines)
    # Serve the static website from local
    # run_static_server(directory = dir_static_site, port = 4141)
    assert Path(dir_static_site).joinpath("pipeline.json")
Example #6
0
    def test_overlapping_patterns(self, tmp_path, caplog):
        """Check that same configuration file is not loaded more than once."""
        paths = [
            str(tmp_path / "base"),
            str(tmp_path / "dev"),
            str(tmp_path / "dev" / "user1"),
        ]
        _write_yaml(tmp_path / "base" / "catalog0.yml", {
            "env": "base",
            "common": "common"
        })
        _write_yaml(tmp_path / "dev" / "catalog1.yml", {
            "env": "dev",
            "dev_specific": "wiz"
        })
        _write_yaml(tmp_path / "dev" / "user1" / "catalog2.yml",
                    {"user1_c2": True})
        _write_yaml(tmp_path / "dev" / "user1" / "catalog3.yml",
                    {"user1_c3": True})

        catalog = ConfigLoader(paths).get("catalog*", "catalog*/**",
                                          "user1/catalog2*")
        expected_catalog = {
            "env": "dev",
            "common": "common",
            "dev_specific": "wiz",
            "user1_c2": True,
            "user1_c3": True,
        }
        assert catalog == expected_catalog

        log_messages = [record.getMessage() for record in caplog.records]
        expected_path = (tmp_path / "dev" / "user1" / "catalog2.yml").resolve()
        expected_message = (
            f"Config file(s): {expected_path} already processed, skipping loading..."
        )
        assert expected_message in log_messages
Example #7
0
 def test_load_base_config(self, tmp_path, conf_paths, base_config):
     """Test config loading if `local/` directory is empty"""
     (tmp_path / "local").mkdir(exist_ok=True)
     catalog = ConfigLoader(conf_paths).get("catalog*.yml")
     assert catalog == base_config
Example #8
0
import pandas as pd
import logging
import numpy as np
from one_two_trip.crossval.crossval import CV_score

from kedro.config import ConfigLoader

conf_paths = ['conf/base', 'conf/local']
conf_loader = ConfigLoader(conf_paths)
conf_credentials = conf_loader.get('credentials*', 'credentials*/**')
conf_parameters = conf_loader.get('parameters*', 'parameters*/**')
conf_catalog = conf_loader.get('catalog*', 'catalog*/**')

cols_target = conf_parameters['model']['cols_target']
col_id = conf_parameters['model']['col_id']
col_client = conf_parameters['model']['col_client']
cols_cat = conf_parameters['model']['cols_cat']
cv_byclient = conf_parameters['model']['cv_byclient']
n_splits = conf_parameters['model']['n_splits']
n_repeats = conf_parameters['model']['n_repeats']

params = conf_parameters['lightgbm']['params']


def union_data_node(train: pd.DataFrame, test: pd.DataFrame) -> pd.DataFrame:
    df_union = pd.concat([train, test], sort=False, ignore_index=True)
    return df_union


def mean_byuser_node(df_union: pd.DataFrame) -> pd.DataFrame:
    cols = list(
Example #9
0
 def __init__(self):
     conf_paths = ["conf/base", "conf/local"]
     conf_loader = ConfigLoader(conf_paths)
     self.credentials = conf_loader.get("credentials*", "credentials*/**")
Example #10
0
 def register_config_loader(self, conf_paths):  # pylint: disable=no-self-use
     return ConfigLoader(conf_paths)
Example #11
0
def config_loader():
    return ConfigLoader(conf_source=str(Path.cwd()))
Example #12
0
 def register_config_loader(self, conf_paths) -> ConfigLoader:
     return ConfigLoader(conf_paths)
Example #13
0
 def register_config_loader(self,
                            conf_paths: Iterable[str]) -> ConfigLoader:
     self.logger.info("Registering config loader",
                      extra={"conf_paths": conf_paths})
     return ConfigLoader(conf_paths)
Example #14
0
import logging
from typing import Any, Dict

import numpy as np
import pandas as pd
import discord
from discord.ext import commands
import datetime

from urllib import parse, request
import re

from kedro.config import ConfigLoader

conf_paths = ["conf/base", "conf/local"]
conf_loader = ConfigLoader(conf_paths)
credentials = conf_loader.get("credentials*", "credentials*/**")


def run_bot_odesla():

    bot = commands.Bot(command_prefix='!', description="Bot de ODESLA")

    @bot.command()
    async def info(ctx):
        embed = discord.Embed(title=f"{ctx.guild.name}",
                              descripcion='Test',
                              timestamp=datetime.datetime.utcnow(),
                              color=discord.Color.blue())
        embed.add_field(name="1", value=f"{ctx.guild.created_at}")
        embed.add_field(name="2", value=f"{ctx.guild.created_at}")
Example #15
0
import matplotlib.pyplot as plt
from kedro.config import ConfigLoader
from kedro.io import DataCatalog
from kedro.pipeline import Pipeline, node
from kedro.runner import SequentialRunner

# Find the configuration (catalog.yaml) in the current working directory and load it
conf_loader = ConfigLoader(".")
conf_catalog = conf_loader.get("catalog*")

# Create the Data Catalog from the catalog.yml file
io = DataCatalog.from_config(conf_catalog)
df = io.load("titanic_training_data")


# Create nodes by writing Python functions
# Remove NaN values
def clean_raw_data(df):
    df = df.drop(["Ticket", "Cabin"], axis=1)
    df = df.dropna()
    return df


# Plot the amount of people who survived and who died.
def plot_survival_breakdown(df):
    plt.figure(figsize=(6, 4))
    fig, ax = plt.subplots()
    df.Survived.value_counts().plot(kind="barh", color="blue", alpha=0.65)
    ax.set_ylim(-1, len(df.Survived.value_counts()))
    plt.title("Survival Breakdown (1 = Survived, 0 = Died)")
    return fig
Example #16
0
 def test_nested_subdirs(self, tmp_path):
     """Test loading the config from subdirectories"""
     catalog = ConfigLoader(str(tmp_path / "base")).get("**/catalog*")
     assert (catalog["cars"]["type"] == catalog["prod"]["cars"]["type"] ==
             "CSVLocalDataSet")
     assert catalog["cars"]["save_args"]["index"] is True
Example #17
0
def get_model_params() -> Dict[str, Any]:
    conf = ConfigLoader(MODEL_PARAMETERS_PATH).get('*.yaml')
    return conf['best_evaluated_params']
Example #18
0
def __get_creds():
    conf_paths = ["conf/base", "conf/local"]
    conf_loader = ConfigLoader(conf_paths)
    credentials = conf_loader.get("credentials*", "credentials*/**")
    return credentials
Example #19
0
from kedro.config import ConfigLoader
from typing import Dict

conf_paths = ["conf/base", "conf/local"]
conf_loader = ConfigLoader(conf_paths)

conf_catalog = conf_loader.get("catalog*", "catalog*/**")


def fetch_catalog() -> Dict[str, Dict]:
    """Fetch the catalog dict

    Returns:
        Dict[str, Dict]: catalog dict
    """
    return conf_catalog


def print_catalog_path(conf_catalog: Dict[str, Dict]):
    """Print the catalog entries' path

    Args:
        conf_catalog (Dict[str, Dict]): catalog dict
    """
    print("Catalog entry path:")
    for dataset in conf_catalog.keys():
        print(f'{dataset} -- {conf_catalog[dataset]["filepath"]}')
Example #20
0
    def register_config_loader(self,
                               conf_paths: Iterable[str]) -> ConfigLoader:
        import kedro.config.config

        kedro.config.config._load_config = _load_config
        return ConfigLoader(conf_paths)
Example #21
0
 def test_empty_patterns(self, conf_paths):
     """Check the error if no config patterns were specified"""
     pattern = (r"`patterns` must contain at least one glob pattern "
                r"to match config filenames against")
     with pytest.raises(ValueError, match=pattern):
         ConfigLoader(conf_paths).get()
def kedro_conf(mock_config):

    config = ConfigLoader(["conf/base", "conf/local"])
    config_dict = config.get("catalog*", "catalog*/**")

    return config_dict
Example #23
0
def valid_rf(race_results_df_processed_valid, model_rf, parameters):
    # mlflow
    print('FILE_DIR: ' + FILE_DIR)
    mlflow.set_tracking_uri(FILE_DIR + '/../../../logs/mlruns/')
    mlflow.set_experiment('forecast_keiba_valid')
    run_info = mlflow.start_run()
    mlflow.set_tag('model', 'lr')

    # 検証のデータ準備
    race_results_df_processed_valid = race_results_df_processed_valid
    # 説明変数の取得
    X_valid = race_results_df_processed_valid.drop(['rank'], axis=1)
    # 目的変数の取得
    y_valid = race_results_df_processed_valid['rank']

    # 推論実行
    y_valid_pred = model_rf.predict(X_valid)

    # 集計用に処理
    valid_results_df = pd.DataFrame({'pred': y_valid_pred, 'actual': y_valid})
    race_id_list = list(set(list(valid_results_df.index)))
    valid_results_list = valid_results_df.reset_index().values.tolist()
    # シャッフル
    random.shuffle(valid_results_list)

    # 集計(馬単)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 1 and (valid_results_list[i][2]
                                                  == 1):
                        cnt_by_race += 1
        if cnt_by_race == 1:
            correct_count += 1
    acc_exacta_1 = correct_count / 100
    print('acc_exacta_1: ' + str(acc_exacta_1))

    # 集計(馬連)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1, 2]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 2 and (valid_results_list[i][2] == 1
                                                  or valid_results_list[i][2]
                                                  == 2):
                        cnt_by_race += 1
        if cnt_by_race == 2:
            correct_count += 1
    acc_quinella_2 = correct_count / 100
    print('acc_quinella_2: ' + str(acc_quinella_2))

    # 集計(三連複)
    correct_count = 0
    for race_id in race_id_list:
        pred_cnt_by_race = 0
        cnt_by_race = 0
        for rank in [1, 2, 3]:
            for i in range(len(valid_results_list)):
                # 対象レースidのうち、{rank}位と予測された馬
                if valid_results_list[i][0] == race_id and valid_results_list[
                        i][1] == rank:
                    pred_cnt_by_race += 1
                    if pred_cnt_by_race <= 3 and (
                            valid_results_list[i][2] == 1
                            or valid_results_list[i][2] == 2
                            or valid_results_list[i][2] == 3):
                        cnt_by_race += 1
        if cnt_by_race == 3:
            correct_count += 1
    acc_trio_3 = correct_count / 100
    print('acc_trio_3: ' + str(acc_trio_3))

    mlflow.log_metric("acc_exacta_1", acc_exacta_1)
    mlflow.log_metric("acc_quinella_2", acc_quinella_2)
    mlflow.log_metric("acc_trio_3", acc_trio_3)

    # 通知
    if parameters['is_notify']:
        run_result_dict = mlflow.get_run(run_info.info.run_id).to_dictionary()
        run_result_str = json.dumps(run_result_dict, indent=4)

        conf_paths = [
            FILE_DIR + "/../../../conf/base", FILE_DIR + "/../../../conf/local"
        ]
        conf_loader = ConfigLoader(conf_paths)
        credentials = conf_loader.get("credentials*", "credentials*/**")
        token = credentials['dev_line']['access_token']

        url = "https://notify-api.line.me/api/notify"
        headers = {"Authorization": "Bearer " + token}
        payload = {"message": "model_rf" + run_result_str}
        requests.post(url, headers=headers, data=payload)

    mlflow.end_run()
def kedro_conf_path() -> dict:
    config = ConfigLoader(["conf/base", "conf/local"])
    conf_catalog = config.get("catalog*", "catalog*/**")

    return conf_catalog
Example #25
0
from kedro.config import ConfigLoader

# CONFIG
conf_paths = ['conf/base']
conf_loader = ConfigLoader(conf_paths)
config = conf_loader.get('credentials*', 'credentials*/**')

# DROP TABLES
staging_events_table_drop = "DROP TABLE IF EXISTS staging_events;"
staging_songs_table_drop = "DROP TABLE IF EXISTS staging_songs;"
songplay_table_drop = "DROP TABLE IF EXISTS songplays;"
user_table_drop = "DROP TABLE IF EXISTS users;"
song_table_drop = "DROP TABLE IF EXISTS songs;"
artist_table_drop = "DROP TABLE IF EXISTS artists;"
time_table_drop = "DROP TABLE IF EXISTS time;"

# CREATE TABLES

staging_events_table_create = ("""
CREATE TABLE staging_events 
(
  artist        VARCHAR(200),
  auth          VARCHAR(50),
  firstName     VARCHAR(200),
  gender        CHAR(1),
  itemInSession INTEGER,
  lastname      VARCHAR(50),
  length        NUMERIC(10,5),
  level         VARCHAR(10),
  location      VARCHAR(50),
  method        VARCHAR(10),
import lightgbm as lgb
import numpy as np
import pandas as pd
import shap
import datetime
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from hyperopt import STATUS_OK
from digital_reputation_challenge.nodes.datatransform import get_fold_data

from kedro.config import ConfigLoader

conf_paths = ['conf/base', 'conf/local']
conf_loader = ConfigLoader(conf_paths)
conf_parameters = conf_loader.get('parameters*', 'parameters*/**')

from sklearn.model_selection import StratifiedKFold, KFold, RepeatedStratifiedKFold


class CV_score:
    def __init__(self,
                 params,
                 cols_all,
                 col_target,
                 cols_cat='auto',
                 num_boost_round=99999,
                 early_stopping_rounds=50,
                 valid=True):
        self.params = params
        self.cols_all = cols_all
Example #27
0
 def register_config_loader(
     self, conf_paths: Iterable[str], env: str, extra_params: Dict[str, Any]
 ) -> ConfigLoader:
     return ConfigLoader(conf_paths)
Example #28
0
def config_loader():
    return ConfigLoader(conf_source=str(Path.cwd() / settings.CONF_SOURCE))
Example #29
0
 def register_config_loader(self,
                            conf_paths: Iterable[str]) -> ConfigLoader:
     return ConfigLoader(conf_paths)
# AUTOGENERATED! DO NOT EDIT! File to edit: catalog.ipynb (unless otherwise specified).

__all__ = ['conf_loader', 'conf_test_data_catalog', 'test_data_catalog']

# Cell

from kedro.config import ConfigLoader
from kedro.io import DataCatalog

# Cell
conf_loader = ConfigLoader("conf/base")
conf_test_data_catalog = conf_loader.get("catalog*.yaml", "catalog*/*.yaml")
test_data_catalog = DataCatalog.from_config(conf_test_data_catalog)