def run(self):
        config = GlobalParams().config

        store_id = self.store_id
        logger.info(f"Training model for store_id: {store_id}")

        sales = self.input().load()
        train_dates = sorted(sales.date.unique())
        valid_start_date = min(train_dates[-config["valid_num_days"]:])

        X_train = sales[sales.date < valid_start_date].drop(
            [config["pred_target_col"], config["pred_date_col"], "date"],
            axis=1)
        y_train = sales[sales.date < valid_start_date][
            config["pred_target_col"]]

        X_valid = sales[sales.date >= valid_start_date].drop(
            [config["pred_target_col"], config["pred_date_col"], "date"],
            axis=1)
        y_valid = sales[sales.date >= valid_start_date][
            config["pred_target_col"]]

        model = LGBMRegressor(**GlobalParams().config["lgb_params"])
        model.fit(
            X_train,
            y_train,
            eval_set=[(X_train, y_train), (X_valid, y_valid)],
            eval_metric="rmse",
            early_stopping_rounds=10,
        )
        self.output().save(model)
    def run(self):
        config = GlobalParams().config
        test_start_date = config["test_start_date"]
        test_end_date = config["test_end_date"]

        logger.info("Preparing test data for date range: {} - {}".format(
            test_start_date, test_end_date))

        sales_base = self.input()["sales_base"].load()
        sales = sales_base[(test_start_date <= sales_base.date)
                           & (sales_base.date <= test_end_date)]
        del sales_base

        sales_features = self.input()["sales_ts"].load()
        sales_features = sales_features[
            (test_start_date <= sales_features.date)
            & (sales_features.date <= test_end_date)]
        columns = ["id", "date"
                   ] + list(set(sales_features.columns) - set(sales.columns))
        sales = sales.merge(
            sales_features[[c for c in sales_features.columns
                            if c in columns]],
            how="left",
            on=["id", "date"],
        )
        del sales_features
        self.output().save(sales)
    def run(self):
        config = GlobalParams().config
        train_start_date = config["train_start_date"]
        train_end_date = config["train_end_date"]

        logger.info("Preparing train data for range: {} - {}".format(
            train_start_date, train_end_date))
        sales_base = self.input()["sales_base"].load()
        sales = sales_base[(train_start_date <= sales_base.date)
                           & (sales_base.date <= train_end_date)]
        del sales_base

        sales_features = self.input()["sales_ts"].load()
        sales_features = sales_features[
            (train_start_date <= sales_features.date)
            & (sales_features.date <= train_end_date)]
        columns = ["id", "date"
                   ] + list(set(sales_features.columns) - set(sales.columns))
        sales = sales.merge(
            sales_features[[c for c in sales_features.columns
                            if c in columns]],
            how="left",
            on=["id", "date"],
        )
        del sales_features

        shift_days = self.pred_week * NUM_DAYS_1_WEEK
        sales[config["pred_target_col"]] = sales.groupby("id")[
            config["target_col"]].shift(-shift_days)
        sales[config["pred_date_col"]] = sales.groupby("id")["date"].shift(
            -shift_days)
        sales = sales[sales[config["pred_date_col"]].notnull()]
        self.output().save(sales)
    def run(self):
        config = GlobalParams().config
        preds = []
        for store_id in config["store_list"]:
            for pred_week in config["pred_week_list"]:
                preds.append(
                    RunPredictionStoreWeek(
                        store_id=store_id,
                        pred_week=pred_week).output().load())

        preds_all = pd.concat(preds)
        preds_all = preds_all.sort_values(["id", config["pred_date_col"]],
                                          ignore_index=True)
        self.output().save(preds_all)
    def run(self):
        config = GlobalParams().config
        model = self.input()["model"].load()
        X_test = self.input()["test_data"].load()
        preds = X_test[["id", "date"]].copy()
        preds[config["pred_date_col"]] = preds["date"] + np.timedelta64(
            self.pred_week * NUM_DAYS_1_WEEK, "D")
        X_test = X_test.drop("date", axis=1)

        preds[config["pred_target_col"]] = model.predict(X_test)
        self.output().save(preds)

        # Clean-up files that are not needed anymore
        file = (PrepareTrainData(store_id=self.store_id,
                                 pred_week=self.pred_week).output().path)
        if os.path.exists(file):
            os.remove(file)
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     GlobalParams(config_file=self.config_file)
 def requires(self):
     return {
         s: RunPredictionStore(store_id=s)
         for s in GlobalParams().config["store_list"]
     }
 def requires(self):
     return {
         w: RunPredictionStoreWeek(store_id=self.store_id, pred_week=w)
         for w in GlobalParams().config["pred_week_list"]
     }