from dku_io_utils.recipe_config_loading import load_predict_config from dku_io_utils.utils import set_column_description from dku_io_utils.checks_utils import external_features_check from dku_io_utils.model_selection import ModelSelection from gluonts_forecasts.utils import add_future_external_features from gluonts_forecasts.trained_model import TrainedModel from safe_logger import SafeLogger from time import perf_counter logger = SafeLogger("Forecast plugin") params = load_predict_config() start = perf_counter() logger.info("Forecasting future values...") model_selection = ModelSelection( folder=params["model_folder"], partition_root=params["partition_root"], ) if params["manual_selection"]: model_selection.set_manual_selection_parameters( session_name=params["selected_session"], model_label=params["selected_model_label"]) else: model_selection.set_auto_selection_parameters( performance_metric=params["performance_metric"]) predictor = model_selection.get_model_predictor() gluon_train_dataset = model_selection.get_gluon_train_dataset()
from dataiku.customrecipe import get_recipe_config from datetime import datetime from dku_io_utils.utils import set_column_description from gluonts_forecasts.training_session import TrainingSession from dku_io_utils.recipe_config_loading import load_training_config, get_models_parameters from dku_io_utils.utils import write_to_folder from gluonts_forecasts.model_handler import get_model_label from dku_constants import ObjectType from timeseries_preparation.preparation import TimeseriesPreparator from safe_logger import SafeLogger from time import perf_counter logger = SafeLogger("Forecast plugin") session_name = datetime.utcnow().isoformat() + "Z" logger.info("Starting training session {}...".format(session_name)) config = get_recipe_config() params = load_training_config(config) mxnet_context = set_mxnet_context(params["gpu_devices"]) models_parameters = get_models_parameters( config, is_training_multivariate=params["is_training_multivariate"]) start = perf_counter() training_df = params["training_dataset"].get_dataframe() timeseries_preparator = TimeseriesPreparator( time_column_name=params["time_column_name"], frequency=params["frequency"],
dimensions_types = partitioning.get("dimensions", []) dimensions = [] for dimension_type in dimensions_types: dimensions.append(dimension_type.get("name")) for dimension in dimensions: dimension_src = "DKU_DST_{}".format(dimension) if dimension_src in dku_flow_variables: partitioning_keys[dimension] = dku_flow_variables.get(dimension_src) return partitioning_keys input_A_names = get_input_names_for_role('input_A_role') config = get_recipe_config() dku_flow_variables = dataiku.get_flow_variables() logger.info("config={}".format(logger.filter_secrets(config))) credential_parameters = config.get("credential", {}) endpoint_parameters = get_endpoint_parameters(config) extraction_key = endpoint_parameters.get("extraction_key", "") is_raw_output = endpoint_parameters.get("raw_output", True) parameter_columns = [column for column in config.get("parameter_columns", []) if column] if len(parameter_columns) == 0: raise ValueError("There is no parameter column selected.") parameter_renamings = get_dku_key_values(config.get("parameter_renamings", {})) custom_key_values = get_dku_key_values(config.get("custom_key_values", {})) input_parameters_dataset = dataiku.Dataset(input_A_names[0]) partitioning_keys = get_partitioning_keys(input_parameters_dataset, dku_flow_variables) custom_key_values.update(partitioning_keys) input_parameters_dataframe = input_parameters_dataset.get_dataframe()
from io_utils import get_input_output, set_column_description from recipe_config_loading import get_decomposition_params from safe_logger import SafeLogger from timeseries_preparation.preparation import TimeseriesPreparator logger = SafeLogger("Timeseries preparation plugin") (input_dataset, output_dataset) = get_input_output() config = get_recipe_config() input_dataset_columns = [ column["name"] for column in input_dataset.read_schema() ] (dku_config, input_validator, decomposition) = get_decomposition_params(config, input_dataset_columns) timeseries_preparator = TimeseriesPreparator(dku_config) input_df = input_dataset.get_dataframe(infer_with_pandas=False) df_prepared = timeseries_preparator.prepare_timeseries_dataframe(input_df) input_validator.check(df_prepared) start = perf_counter() logger.info("Decomposing time series...") transformed_df = decomposition.fit(df_prepared) logger.info( "Decomposing time series: Done in {:.2f} seconds".format(perf_counter() - start)) transformation_df = output_dataset.write_with_schema(transformed_df) set_column_description(output_dataset, decomposition.columns_descriptions, input_dataset)