class ModelConfigAccount38(BaseModelConfig): """Model config for Account 38.""" MODEL_NAME = AccountID("Account 38") CONTRACTS = [ ContractID("Contract_222"), ContractID("Contract_579"), ContractID("Contract_27"), ] TRAINING_START = pd.Timestamp(2018, 4, 1) DEFAULT_FEATURES = default_features(lag=[1, 2, 3], moving_window=[3, 5]) WEIGHTING = 10 POSTPROCESS_DEPTH = 4 def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset(months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features(self, cleaned_data_run_id: int) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1) ) return {}, {"Shock_Feature": (market_shock, "None")}
class ModelConfigAccount8(BaseModelConfig): """Model config for Account 8.""" MODEL_NAME = AccountID("Account 8") CONTRACTS = [ContractID("Contract_102"), ContractID("Contract_28"), ContractID("Contract_61")] TRAINING_START = pd.Timestamp(2018, 4, 1) DEFAULT_FEATURES = default_features(lag=[1, 2, 3], moving_window=[3]) WEIGHTING = 10 POSTPROCESS_DEPTH = 4 def preprocess_account_data( self, sales_raw: pd.DataFrame, grouping: List[str], internal_features: InternalFeatures ) -> pd.DataFrame: sales_raw = self._replace_replenishment_project_ids(sales_raw) return super().preprocess_account_data(sales_raw, grouping, internal_features) @staticmethod def _replace_replenishment_project_ids(sales_raw: pd.DataFrame) -> pd.DataFrame: sales_raw = sales_raw.copy() replace_ids = { ProjectID("Project_365"): ProjectID("Project_364"), } for old, new in replace_ids.items(): sales_raw.loc[sales_raw["Project_ID"] == old, "Project_ID"] = new return sales_raw
class ModelConfigAccount11(BaseModelConfig): """Model config for Account 11.""" MODEL_NAME = AccountID("Account 11") CONTRACTS = [ContractID("Contract_284")] TRAINING_START = pd.Timestamp(year=2018, month=1, day=1) POSTPROCESS_DEPTH = 4 DEFAULT_FEATURES = default_features( lag=[1, 2, 3], moving_window=[3, 5], ) WEIGHTING = 10 def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: airframes = self._data_loader.load_exogenous_feature( feature_name="Airframe Map", run_id=cleaned_data_run_id) return generate_airframe_feature(airframes), {} def postprocess_forecast(self, ts: TimeSeries, ts_pred: TimeSeries, sales: pd.DataFrame, grouping: List[str]) -> pd.DataFrame: # Do not apply any post-processing for this account return ts_pred.result_data
def _setup_test_files(tmp_path: Path) -> Tuple[Path, Path]: expected_results_csv = tmp_path / f"Forecast {AccountID('Account 10')}" / "expected.csv" expected_results_csv.parent.mkdir(exist_ok=True) actual_results_csv = tmp_path / AccountID("Account 10") / "actual.csv" actual_results_csv.parent.mkdir(exist_ok=True) return expected_results_csv, actual_results_csv
class ModelConfigAccount16(BaseModelConfig): """Model config for Account 16.""" MODEL_NAME = AccountID("Account 16") CONTRACTS = [ContractID("Contract_361"), ContractID("Contract_360")] TRAINING_START = pd.Timestamp(year=2018, month=1, day=1) POSTPROCESS_DEPTH = 6 DEFAULT_FEATURES = default_features( lag=[1, 3, 5], moving_window=[3], moving_avg=[3], ) WEIGHTING = 40 # Only used for backward forecasts before COVID-19 def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Build Rates build_rates = self._data_loader.load_exogenous_feature( feature_name="Build Rate", run_id=cleaned_data_run_id) internal_features, exogenous_features = generate_build_rates_features( build_rates) # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) exogenous_features["Shock_Feature"] = (market_shock, "None") return internal_features, exogenous_features def postprocess_forecast(self, ts: TimeSeries, ts_pred: TimeSeries, sales: pd.DataFrame, grouping: List[str]) -> pd.DataFrame: # Do not apply any post-processing for this account return ts_pred.result_data
class ModelConfigAccount3(BaseModelConfig): """Model config for Account 3 and Account 4.""" MODEL_NAME = AccountID("Account 3") CONTRACTS = [ ContractID("Contract_225"), ContractID("Contract_227"), ContractID("Contract_623"), ContractID("Contract_629"), ContractID("Contract_630"), ContractID("Contract_632"), ContractID("Contract_633"), ContractID("Contract_635"), ] TRAINING_START = pd.Timestamp(2018, 1, 1) DEFAULT_FEATURES = default_features(lag=[1, 2, 3, 4, 5], moving_window=[3]) POSTPROCESS_DEPTH = 7 WEIGHTING = 10 # Only used for backward forecasts before COVID-19 def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) airframes = self._data_loader.load_exogenous_feature( feature_name="Airframe Map", run_id=cleaned_data_run_id) return generate_airframe_feature(airframes), { "Shock_Feature": (market_shock, "None") }
class ModelConfigAccount5(BaseModelConfig): """Model config for Account 5.""" MODEL_NAME = AccountID("Account 5") TRAINING_START = pd.Timestamp(2018, 1, 1) DEFAULT_FEATURES = default_features(lag=[1, 2, 3], moving_window=[2], moving_avg=[2, 3, 4], moving_std_dev=[3]) WEIGHTING = 10 # Only used for backward forecasts before COVID-19 POSTPROCESS_DEPTH = 4 CONTRACTS = [ ContractID("Contract_377"), ContractID("Contract_376"), ContractID("Contract_378"), ContractID("Contract_375"), ] def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) return {}, {"Shock_Feature": (market_shock, "None")} def postprocess_forecast(self, ts: TimeSeries, ts_pred: TimeSeries, sales: pd.DataFrame, grouping: List[str]) -> pd.DataFrame: # Do not apply any post-processing for this account return ts_pred.result_data
class ModelConfigAccount10(BaseModelConfig): """Model config for Account 10.""" MODEL_NAME = AccountID("Account 10") CONTRACTS = [ContractID("Contract_242")] TRAINING_START = pd.Timestamp(2018, 1, 1) DEFAULT_FEATURES = default_features(lag=[1, 2, 3], moving_window=[3]) WEIGHTING = 10 POSTPROCESS_DEPTH = 8
class ModelConfigAccount29(BaseModelConfig): """Model config for Account 29.""" MODEL_NAME = AccountID("Account 29") CONTRACTS = [ContractID("Contract_601"), ContractID("Contract_546")] TRAINING_START = pd.Timestamp(year=2018, month=1, day=1) POSTPROCESS_DEPTH = 4 DEFAULT_FEATURES = default_features( lag=[1, 2, 3], moving_window=[3, 5], ) EXCLUDE_ITEMS = [21160] WEIGHTING = 10 # Only used for backward forecasts before COVID-19 def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Build Rates build_rates = self._data_loader.load_exogenous_feature( feature_name="Build Rate", run_id=cleaned_data_run_id) internal_features, exogenous_features = generate_build_rates_features( build_rates) # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) exogenous_features["Shock_Feature"] = (market_shock, "None") return internal_features, exogenous_features
class ModelConfigAccount44(BaseModelConfig): """Model config for Account 44.""" MODEL_NAME = AccountID("Account 44") CONTRACTS = [ ContractID("Contract_153"), ContractID("Contract_148"), ContractID("Contract_162"), ContractID("Contract_160"), ContractID("Contract_150"), ContractID("Contract_108"), ] TRAINING_START = pd.Timestamp(year=2018, month=1, day=1) POSTPROCESS_DEPTH = 4 DEFAULT_FEATURES = default_features(lag=[1, 2, 3], moving_window=[], moving_avg=[3], moving_std_dev=[3], moving_non_zero=[]) WEIGHTING = 30 # Only used for backward forecasts before COVID-19 def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) return {}, {"Shock_Feature": (market_shock, "None")}
class ModelConfigAccount466(BaseModelConfig): """Model config for Account 466.""" MODEL_NAME = AccountID("Account 466") CONTRACTS = [ContractID("Contract_714")] TRAINING_START = pd.Timestamp(year=2018, month=4, day=1) POSTPROCESS_DEPTH = 4 DEFAULT_FEATURES = default_features( lag=[1, 2, 3], moving_window=[3, 5], ) EXCLUDE_ITEMS = [1061862, 1212899] PREPROCESS_UNIT_COST_AGGREGATION = "max" WEIGHTING = 10 # Only used for backward forecasts before COVID-19 def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) return {}, {"Shock_Feature": (market_shock, "None")}
class ModelConfigAccount7(BaseModelConfig): """Model config for Account 7.""" MODEL_NAME = AccountID("Account 7") CONTRACTS = [ ContractID("Contract_48"), ContractID("Contract_386"), ContractID("Contract_385"), ContractID("Contract_383"), ContractID("Contract_391"), ContractID("Contract_395"), ContractID("Contract_394"), ContractID("Contract_387"), ContractID("Contract_392"), ContractID("Contract_393"), ContractID("Contract_390"), ContractID("Contract_384"), ContractID("Contract_389"), ContractID("Contract_396"), ] TRAINING_START = pd.Timestamp(2018, 1, 1) DEFAULT_FEATURES = default_features(lag=[1, 2, 3], moving_window=[2], moving_std_dev=[3], moving_avg=[2, 3, 4]) WEIGHTING = 10 # Only used for backward forecasts before COVID-19 POSTPROCESS_DEPTH = 8 HYPER_SPACE = [ Integer(5, 50, name="max_depth"), Real(1e-4, 1e-1, name="learn_rate", prior="log-uniform"), Real(0.9, 0.9999, name="learn_rate_annealing"), Integer(10, 25, name="min_rows"), Integer(50, 200, name="ntrees"), Real(1e-4, 1e-2, name="stopping_tolerance"), Integer(2, 10, name="stopping_rounds"), Real(1e-1, 1, name="sample_rate"), Real(0.99, 1, name="col_sample_rate"), Integer(100, 500, name="nbins"), Real(1e-10, 1e-3, name="min_split_improvement"), Integer(10, 11, name="nfolds"), ] def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) return {}, {"Shock_Feature": (market_shock, "None")}
class ModelConfigAccount6(BaseModelConfig): """Model config for Account 6.""" MODEL_NAME = AccountID("Account 6") CONTRACTS = [ContractID("Contract_730")] TRAINING_START = pd.Timestamp(2018, 1, 1) _LAGS = [1, 2, 3, 4, 6, 8, 10] DEFAULT_FEATURES = default_features(lag=_LAGS, moving_window=[3, 4, 6, 8, 10]) WEIGHTING = 1000 # Only used for backward forecasts before COVID-19 POSTPROCESS_DEPTH = 6 OVERRIDE_HYPER_PARAMS = { "nfolds": 5, "min_split_improvement": 1e-5, } OPTIMIZE_HYPER_PARAMETERS_N_CALLS = 10 HYPER_SPACE = [ Integer(5, 30, name="max_depth"), Real(1e-4, 1, name="learn_rate", prior="log-uniform"), Real(0.9, 0.9999, name="learn_rate_annealing"), Integer(10, 25, name="min_rows"), Integer(50, 200, name="ntrees"), Real(1e-4, 1e-2, name="stopping_tolerance"), Integer(2, 10, name="stopping_rounds"), Real(1e-1, 1, name="sample_rate"), Real(0.99, 1, name="col_sample_rate"), Integer(100, 500, name="nbins"), Real(1e-10, 1e-3, name="min_split_improvement"), Integer(5, 6, name="nfolds"), ] SALES_MIN_PERIOD = max(_LAGS) PREPROCESS_OUTLIERS = True PREPROCESS_UNIT_COST_AGGREGATION = "max" _runner_flag: pd.DataFrame def preprocess_account_data( self, sales_raw: pd.DataFrame, grouping: List[str], internal_features: InternalFeatures) -> pd.DataFrame: ltm_end = pd.to_datetime(self._runtime_config.prediction_month, format="%Y%m") ltm_start = ltm_end - pd.DateOffset(months=11) self._runner_flag = RR_NR_Flag(sales_raw, ltm_start, ltm_end) return super().preprocess_account_data(sales_raw, grouping, internal_features) def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) return {}, {"Shock_Feature": (market_shock, "None")} def prepare_training_data(self, sales: pd.DataFrame, grouping: List[str], exo_features: ExogenousFeatures) -> TimeSeries: ts = super().prepare_training_data(sales, grouping, exo_features) ts.add_exogenous_feature("W_UC", extract_cost_info(sales)) ts.add_exogenous_feature("RR", self._runner_flag, default=False) return ts def postprocess_forecast(self, ts: TimeSeries, ts_pred: TimeSeries, sales: pd.DataFrame, grouping: List[str]) -> pd.DataFrame: results = postprocess_forecast_results( ts_pred.result_data, grouping, self._runtime_config.forecast_start, self.POSTPROCESS_DEPTH, is_adhoc_forecast=True, ) return reduce_hits(results, self._runner_flag, grouping, self._runtime_config.forecast_start)
class ModelConfigAccount15(BaseModelConfig): """Model config for Account 15.""" MODEL_NAME = AccountID("Account 15") CONTRACTS = [ ContractID("Contract_254"), ContractID("Contract_545"), ContractID("Contract_548"), ContractID("Contract_547"), ] EXCLUDE_PROJECTS = [ ProjectID("Project_2199"), ProjectID("Project_2196"), ProjectID("Project_2188"), ProjectID("Project_864"), ] TRAINING_START = pd.Timestamp(year=2018, month=4, day=1) POSTPROCESS_DEPTH = 3 DEFAULT_FEATURES = default_features(lag=[1, 2, 3], moving_window=[3, 5],) WEIGHTING = 10 # Only used for backward forecasts before COVID-19 def preprocess_account_data( self, sales_raw: pd.DataFrame, grouping: List[str], internal_features: InternalFeatures, ) -> pd.DataFrame: sales_raw = self._additional_scope_filters(sales_raw) return super().preprocess_account_data(sales_raw, grouping, internal_features) @staticmethod def _additional_scope_filters(sales_raw: pd.DataFrame) -> pd.DataFrame: sales_raw = sales_raw.copy() exclusion_parts = [362698, 21102, 1033556, 226282, 20754, 21297, 14518] mask = sales_raw["Contract_ID"].isin([ContractID("Contract_236")]) mask &= sales_raw["Item_ID"].isin(exclusion_parts) return sales_raw.loc[~mask] def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset(months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def configure_features(self, cleaned_data_run_id: int) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1) ) return {}, {"Shock_Feature": (market_shock, "None")} def postprocess_forecast( self, ts: TimeSeries, ts_pred: TimeSeries, sales: pd.DataFrame, grouping: List[str] ) -> pd.DataFrame: # Do not apply any post-processing for this account return ts_pred.result_data
class ModelConfigAccount14(BaseModelConfig): """Model config for Account 14.""" MODEL_NAME = AccountID("Account 14") CONTRACTS = [ ContractID("Contract_206"), ContractID("Contract_142"), ContractID("Contract_194"), ContractID("Contract_138"), ContractID("Contract_196"), ContractID("Contract_141"), ContractID("Contract_189"), ContractID("Contract_139"), ContractID("Contract_140"), ContractID("Contract_132"), ContractID("Contract_205"), ContractID("Contract_192"), ContractID("Contract_186"), ContractID("Contract_188"), ] TRAINING_START = pd.Timestamp(year=2018, month=1, day=1) POSTPROCESS_DEPTH = 4 DEFAULT_FEATURES = default_features( lag=[1, 2], moving_window=[2], ) PREPROCESS_UNIT_COST_AGGREGATION = "max" SALES_MIN_PERIOD = 2 WEIGHTING = 10 # Only used for backward forecasts before COVID-19 def calculate_weights(self) -> Weights: if self._runtime_config.forecast_start >= pd.Timestamp(2020, 6, 1): last_train_month = self._runtime_config.forecast_start - pd.DateOffset( months=1) return calculate_weights_shock( self.TRAINING_START, self._runtime_config.forecast_end, pd.Timestamp(2020, 4, 1), last_train_month, ) return super().calculate_weights() def _add_project_feature(self, sales: pd.DataFrame, name: str) -> pd.DataFrame: """Add project feature to input data. Args: sales: Sales to add feature to. name: Name of the feature column. Returns: :class:`~pandas.DataFrame` with added project feature. """ sales[name] = sales["Project_ID"] return sales def configure_features( self, cleaned_data_run_id: int ) -> Tuple[InternalFeatures, ExogenousFeatures]: # Market shock feature: COVID market_shock = generate_market_shock_feature( train_start=self.TRAINING_START, first_shock_month=pd.Timestamp(2020, 4, 1)) return { "Project_Feature": self._add_project_feature }, { "Shock_Feature": (market_shock, "None") }