from safe_logger import SafeLogger logger = SafeLogger("api-connect plugin Pagination") class Pagination(object): def __init__(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None): self.next_page_key = None self.skip_key = None self.limit_key = None self.total_key = None self.total = None self.next_page_url = None self.remaining_records = None self.records_to_skip = None self.pagination_type = "" self.counting_key = None self.counter = None self.is_last_batch_empty = None self.is_first_batch = None self.is_paging_started = None self.next_page_number = None def configure_paging(self, config=None, skip_key=None, limit_key=None, total_key=None, next_page_key=None, url=None, pagination_type="na"): config = {} if config is None else config self.limit_key = config.get("limit_key", limit_key) self.pagination_type = config.get("pagination_type", pagination_type) if self.pagination_type == "next_page": self.next_page_key = config.get("next_page_key", next_page_key)
import pandas as pd import numpy as np from gluonts_forecasts.utils import concat_timeseries_per_identifiers, concat_all_timeseries, add_row_origin, quantile_forecasts_series from constants import METRICS_DATASET, METRICS_COLUMNS_DESCRIPTIONS, TIMESERIES_KEYS, ROW_ORIGIN, CUSTOMISABLE_FREQUENCIES_OFFSETS from gluonts.model.forecast import QuantileForecast from safe_logger import SafeLogger logger = SafeLogger("Forecast plugin") class TrainedModel: """ Wrapper class to make forecasts using a GluonTS Predictor and a training GluonTS ListDataset, and to output a well formatted forecasts dataframe Attributes: predictor (gluonts.model.predictor.Predictor) gluon_dataset (gluonts.dataset.common.ListDataset): GluonTS ListDataset generated by the GluonDataset class (with extra fields to name timeseries) prediction_length (int): Number of time steps to predict (at most the prediction length used in training) quantiles (list): List of forecasts quantiles to compute in the forecasts_df include_history (bool): True to append to the forecasts dataframe the training data history_length_limit (int): Maximum number of values to retrieve from historical data per timeseries. Default to None which means all. time_column_name (str): Time column name used in training identifiers_columns (list): List of timeseries identifiers column names used in training. forecasts_df (DataFrame): Dataframe with the different quantiles forecasts and the training data if include_history is True """ def __init__(self, predictor, gluon_dataset, prediction_length, quantiles,
from dku_io_utils.recipe_config_loading import load_predict_config from dku_io_utils.utils import set_column_description from dku_io_utils.checks_utils import external_features_check from dku_io_utils.model_selection import ModelSelection from gluonts_forecasts.utils import add_future_external_features from gluonts_forecasts.trained_model import TrainedModel from safe_logger import SafeLogger from time import perf_counter logger = SafeLogger("Forecast plugin") params = load_predict_config() start = perf_counter() logger.info("Forecasting future values...") model_selection = ModelSelection( folder=params["model_folder"], partition_root=params["partition_root"], ) if params["manual_selection"]: model_selection.set_manual_selection_parameters( session_name=params["selected_session"], model_label=params["selected_model_label"]) else: model_selection.set_auto_selection_parameters( performance_metric=params["performance_metric"]) predictor = model_selection.get_model_predictor() gluon_train_dataset = model_selection.get_gluon_train_dataset()
import os import requests from time import sleep from onedrive_item import OneDriveItem from onedrive_constants import OneDriveConstants from safe_logger import SafeLogger from common import get_value_from_path logger = SafeLogger("onedrive plugin", forbiden_keys=["onedrive_credentials"]) class OneDriveClient(): access_token = None CHUNK_SIZE = 320 * 1024 DRIVE_API_URL = "https://graph.microsoft.com/v1.0/me/drive/" ITEMS_API_URL = "https://graph.microsoft.com/v1.0/me/drive/items/" SHARED_API_URL = "https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{file_path}:" SHARED_WITH_ME_URL = "https://graph.microsoft.com/v1.0/me/drive/sharedWithMe" def __init__(self, access_token, shared_folder_root=""): self.access_token = access_token self.shared_with_me = None self.drive_id = None self.shared_folder_root = shared_folder_root if shared_folder_root: self.shared_with_me = self.get_shared_with_me() self.drive_id = self.get_shared_directory_drive_id(shared_folder_root) def upload(self, path, file_handle): # https://docs.microsoft.com/fr-fr/onedrive/developer/rest-api/api/driveitem_createuploadsession?view=odsp-graph-online
from safe_logger import SafeLogger logger = SafeLogger("Timeseries preparation plugin") class DecompositionInputValidator(object): """Checks if the input dataframe is consistent with a decomposition Attributes: dku_config(DecompositionConfig): mapping structure storing the recipe parameters minimum_observations(int): Minimum number of observations required by the decomposition method """ def __init__(self, dku_config): self.dku_config = dku_config self.minimum_observations = 3 def check(self, df): """Checks if the input dataframe is compatible with the decomposition method :param df: Input dataframe :type df: pd.DataFrame """ logger.info("Checking input values:...") self._check_model_compatibility(df) self._check_size(df) logger.info("Checking input values: the recipe parameters are consistent with the input dataset ") def _check_model_compatibility(self, df): """Checks if the input dataframe is compatible with the decomposition model
# -*- coding: utf-8 -*- import dataiku from dataiku.customrecipe import get_input_names_for_role, get_recipe_config, get_output_names_for_role import pandas as pd from safe_logger import SafeLogger from dku_utils import get_dku_key_values, get_endpoint_parameters from rest_api_recipe_session import RestApiRecipeSession logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) def get_partitioning_keys(id_list, dku_flow_variables): partitioning_keys = {} partitioning = id_list.get_config().get("partitioning") if partitioning: dimensions_types = partitioning.get("dimensions", []) dimensions = [] for dimension_type in dimensions_types: dimensions.append(dimension_type.get("name")) for dimension in dimensions: dimension_src = "DKU_DST_{}".format(dimension) if dimension_src in dku_flow_variables: partitioning_keys[dimension] = dku_flow_variables.get(dimension_src) return partitioning_keys input_A_names = get_input_names_for_role('input_A_role') config = get_recipe_config() dku_flow_variables = dataiku.get_flow_variables()
from gluonts.model.estimator import Estimator from gluonts.model.forecast import SampleForecast from gluonts.model.predictor import RepresentablePredictor from gluonts.support.pandas import frequency_add from gluonts.core.component import validated from gluonts_forecasts.custom_models.utils import cast_kwargs from constants import TIMESERIES_KEYS from pmdarima.arima.utils import nsdiffs import pmdarima as pm import numpy as np from safe_logger import SafeLogger from tqdm import tqdm from threadpoolctl import threadpool_limits logger = SafeLogger("Forecast plugin - AutoARIMA") class AutoARIMAPredictor(RepresentablePredictor): """ An abstract predictor that can be subclassed by models that are not based on Gluon. Subclasses should have @validated() constructors. (De)serialization and value equality are all implemented on top of the @validated() logic. Parameters ---------- prediction_length Prediction horizon. freq Frequency of the predicted data. """
import requests import time from pagination import Pagination from safe_logger import SafeLogger from loop_detector import LoopDetector from dku_utils import get_dku_key_values from dku_constants import DKUConstants logger = SafeLogger("api-connect plugin", forbiden_keys=["token", "password"]) def template_dict(dictionnary, **kwargs): """ Recurses into dictionnary and replace template {{keys}} with the matching values present in the kwargs dictionnary""" ret = dict.copy(dictionnary) for key in ret: if isinstance(ret[key], dict): ret[key] = template_dict(ret[key], **kwargs) if isinstance(ret[key], str): ret[key] = format_template(ret[key], **kwargs) return ret return ret def format_template(template, **kwargs): """ Replace {{keys}} elements in template with the matching value in the kwargs dictionnary""" if template is None: return None formated = template for key in kwargs: replacement = kwargs.get(key, "") formated = formated.replace("{{{{{}}}}}".format(key), str(replacement))
# -*- coding: utf-8 -*- from gluonts_forecasts.mxnet_utils import set_mxnet_context from dataiku.customrecipe import get_recipe_config from datetime import datetime from dku_io_utils.utils import set_column_description from gluonts_forecasts.training_session import TrainingSession from dku_io_utils.recipe_config_loading import load_training_config, get_models_parameters from dku_io_utils.utils import write_to_folder from gluonts_forecasts.model_handler import get_model_label from dku_constants import ObjectType from timeseries_preparation.preparation import TimeseriesPreparator from safe_logger import SafeLogger from time import perf_counter logger = SafeLogger("Forecast plugin") session_name = datetime.utcnow().isoformat() + "Z" logger.info("Starting training session {}...".format(session_name)) config = get_recipe_config() params = load_training_config(config) mxnet_context = set_mxnet_context(params["gpu_devices"]) models_parameters = get_models_parameters( config, is_training_multivariate=params["is_training_multivariate"]) start = perf_counter() training_df = params["training_dataset"].get_dataframe() timeseries_preparator = TimeseriesPreparator(
from gluonts.model.estimator import Estimator from gluonts.model.forecast import SampleForecast from gluonts.model.predictor import RepresentablePredictor from gluonts.support.pandas import frequency_add from gluonts.core.component import validated from gluonts_forecasts.custom_models.utils import cast_kwargs from constants import TIMESERIES_KEYS from statsmodels.tsa.api import STLForecast from statsmodels.tsa.exponential_smoothing.ets import ETSModel import numpy as np import pandas as pd from safe_logger import SafeLogger from tqdm import tqdm logger = SafeLogger("Forecast plugin - SeasonalTrend") class SeasonalTrendPredictor(RepresentablePredictor): """ An abstract predictor that can be subclassed by models that are not based on Gluon. Subclasses should have @validated() constructors. (De)serialization and value equality are all implemented on top of the @validated() logic. Parameters ---------- prediction_length Prediction horizon. freq Frequency of the predicted data. """
if sys.version_info.major == 2: raise PluginCodeEnvError( "This custom recipe requires a Python 3 code env. You are using Python {}.{}. Please ask an administrator to delete the " "existing Python 2 code environment and create a new Python 3 code environment if you want to use the decomposition " "recipe".format(sys.version_info.major, sys.version_info.minor)) from time import perf_counter from dataiku.customrecipe import get_recipe_config from io_utils import get_input_output, set_column_description from recipe_config_loading import get_decomposition_params from safe_logger import SafeLogger from timeseries_preparation.preparation import TimeseriesPreparator logger = SafeLogger("Timeseries preparation plugin") (input_dataset, output_dataset) = get_input_output() config = get_recipe_config() input_dataset_columns = [ column["name"] for column in input_dataset.read_schema() ] (dku_config, input_validator, decomposition) = get_decomposition_params(config, input_dataset_columns) timeseries_preparator = TimeseriesPreparator(dku_config) input_df = input_dataset.get_dataframe(infer_with_pandas=False) df_prepared = timeseries_preparator.prepare_timeseries_dataframe(input_df) input_validator.check(df_prepared) start = perf_counter()
import json from powerbi import PowerBI, generate_access_token from dataiku.exporter import Exporter from math import isnan from safe_logger import SafeLogger logger = SafeLogger( "power-bi-v2 plugin", forbiden_keys=["ms-oauth_credentials", "password", "client-secret"]) class PowerBIExporter(Exporter): EMPTY_CONNECTION = { "username": None, "password": None, "client-id": None, "client-secret": None } def __init__(self, config, plugin_config): logger.info("config={}, plugin_config={}".format( logger.filter_secrets(config), logger.filter_secrets(plugin_config))) self.config = config self.plugin_config = plugin_config self.row_index = 0 self.row_buffer = {} self.row_buffer["rows"] = [] self.pbi_dataset = self.config.get("dataset", None)