from dku_io_utils.recipe_config_loading import load_predict_config
from dku_io_utils.utils import set_column_description
from dku_io_utils.checks_utils import external_features_check
from dku_io_utils.model_selection import ModelSelection
from gluonts_forecasts.utils import add_future_external_features
from gluonts_forecasts.trained_model import TrainedModel
from safe_logger import SafeLogger
from time import perf_counter

logger = SafeLogger("Forecast plugin")
params = load_predict_config()

start = perf_counter()
logger.info("Forecasting future values...")

model_selection = ModelSelection(
    folder=params["model_folder"],
    partition_root=params["partition_root"],
)

if params["manual_selection"]:
    model_selection.set_manual_selection_parameters(
        session_name=params["selected_session"],
        model_label=params["selected_model_label"])
else:
    model_selection.set_auto_selection_parameters(
        performance_metric=params["performance_metric"])

predictor = model_selection.get_model_predictor()

gluon_train_dataset = model_selection.get_gluon_train_dataset()
from dataiku.customrecipe import get_recipe_config
from datetime import datetime
from dku_io_utils.utils import set_column_description
from gluonts_forecasts.training_session import TrainingSession
from dku_io_utils.recipe_config_loading import load_training_config, get_models_parameters
from dku_io_utils.utils import write_to_folder
from gluonts_forecasts.model_handler import get_model_label
from dku_constants import ObjectType
from timeseries_preparation.preparation import TimeseriesPreparator
from safe_logger import SafeLogger
from time import perf_counter

logger = SafeLogger("Forecast plugin")
session_name = datetime.utcnow().isoformat() + "Z"
logger.info("Starting training session {}...".format(session_name))

config = get_recipe_config()
params = load_training_config(config)

mxnet_context = set_mxnet_context(params["gpu_devices"])

models_parameters = get_models_parameters(
    config, is_training_multivariate=params["is_training_multivariate"])
start = perf_counter()

training_df = params["training_dataset"].get_dataframe()

timeseries_preparator = TimeseriesPreparator(
    time_column_name=params["time_column_name"],
    frequency=params["frequency"],
예제 #3
0
        dimensions_types = partitioning.get("dimensions", [])
        dimensions = []
        for dimension_type in dimensions_types:
            dimensions.append(dimension_type.get("name"))
        for dimension in dimensions:
            dimension_src = "DKU_DST_{}".format(dimension)
            if dimension_src in dku_flow_variables:
                partitioning_keys[dimension] = dku_flow_variables.get(dimension_src)
    return partitioning_keys


input_A_names = get_input_names_for_role('input_A_role')
config = get_recipe_config()
dku_flow_variables = dataiku.get_flow_variables()

logger.info("config={}".format(logger.filter_secrets(config)))

credential_parameters = config.get("credential", {})
endpoint_parameters = get_endpoint_parameters(config)
extraction_key = endpoint_parameters.get("extraction_key", "")
is_raw_output = endpoint_parameters.get("raw_output", True)
parameter_columns = [column for column in config.get("parameter_columns", []) if column]
if len(parameter_columns) == 0:
    raise ValueError("There is no parameter column selected.")
parameter_renamings = get_dku_key_values(config.get("parameter_renamings", {}))
custom_key_values = get_dku_key_values(config.get("custom_key_values", {}))
input_parameters_dataset = dataiku.Dataset(input_A_names[0])
partitioning_keys = get_partitioning_keys(input_parameters_dataset, dku_flow_variables)
custom_key_values.update(partitioning_keys)
input_parameters_dataframe = input_parameters_dataset.get_dataframe()
from io_utils import get_input_output, set_column_description
from recipe_config_loading import get_decomposition_params
from safe_logger import SafeLogger
from timeseries_preparation.preparation import TimeseriesPreparator

logger = SafeLogger("Timeseries preparation plugin")

(input_dataset, output_dataset) = get_input_output()
config = get_recipe_config()
input_dataset_columns = [
    column["name"] for column in input_dataset.read_schema()
]
(dku_config, input_validator,
 decomposition) = get_decomposition_params(config, input_dataset_columns)

timeseries_preparator = TimeseriesPreparator(dku_config)
input_df = input_dataset.get_dataframe(infer_with_pandas=False)
df_prepared = timeseries_preparator.prepare_timeseries_dataframe(input_df)
input_validator.check(df_prepared)

start = perf_counter()
logger.info("Decomposing time series...")
transformed_df = decomposition.fit(df_prepared)
logger.info(
    "Decomposing time series: Done in {:.2f} seconds".format(perf_counter() -
                                                             start))
transformation_df = output_dataset.write_with_schema(transformed_df)
set_column_description(output_dataset, decomposition.columns_descriptions,
                       input_dataset)