def setup_class(self): self.df = pd.DataFrame({ "date": [ "2018-01-06", "2018-01-07", "2018-01-08", "2018-01-06", "2018-01-07", "2018-01-08" ], "volume": [2, 4, 2, 5, 2, 5], "revenue": [12, 13, 14, 15, 11, 10], "store": [1, 1, 1, 1, 1, 1], "item": [1, 1, 1, 2, 2, 2], "is_holiday": [0, 0, 0, 0, 1, 0], "is_weekend": [1, 0, 0, 1, 0, 0], }) self.df["date"] = pd.to_datetime( self.df["date"]).dt.tz_localize(tz=None) self.gluon_dataset = GluonDataset( dataframe=self.df, time_column_name="date", frequency="D", target_columns_names=["volume", "revenue"], timeseries_identifiers_names=["store", "item"], external_features_columns_names=["is_holiday", "is_weekend"], min_length=2, ) self.prediction_length = 1 gluon_list_datasets = self.gluon_dataset.create_list_datasets( cut_lengths=[self.prediction_length, 0]) self.train_list_dataset = gluon_list_datasets[0] self.test_list_dataset = gluon_list_datasets[1]
def create_gluon_datasets(self): """Create train and test gluon list datasets. The last prediction_length time steps are removed from each timeseries of the train dataset. Compute optimal num_batches_per_epoch value based on the train dataset size._check_target_columns_types """ gluon_dataset = GluonDataset( dataframe=self.training_df, time_column_name=self.time_column_name, frequency=self.frequency, target_columns_names=self.target_columns_names, timeseries_identifiers_names=self.timeseries_identifiers_names, external_features_columns_names=self. external_features_columns_names, min_length=2 * self. prediction_length, # Assuming that context_length = prediction_length ) gluon_list_datasets = gluon_dataset.create_list_datasets( cut_lengths=[self.prediction_length, 0]) self.evaluation_train_list_dataset = gluon_list_datasets[0] self.full_list_dataset = gluon_list_datasets[1] if self.user_num_batches_per_epoch == -1: self.num_batches_per_epoch = self._compute_optimal_num_batches_per_epoch( ) else: self.num_batches_per_epoch = self.user_num_batches_per_epoch
def setup_method(self): self.gluon_dataset = GluonDataset( dataframe=self.df, time_column_name="date", frequency="D", target_columns_names=["volume", "revenue"], timeseries_identifiers_names=["store", "item"], external_features_columns_names=["is_holiday", "is_weekend"], min_length=2, ) self.gluon_list_dataset = self.gluon_dataset.create_list_datasets( cut_lengths=[0])[0]
def setup_class(self): df = pd.DataFrame( { "date": ["2018-01-06", "2018-01-07", "2018-01-08", "2018-01-09", "2018-01-08", "2018-01-09", "2018-01-10", "2018-01-11", "2018-01-12"], "target": [2, 4, 2, 2, 5, 2, 3, 2, 3], "key": [1, 1, 1, 1, 2, 2, 2, 2, 2], "ext_feat": [0, 0, 0, 0, 0, 1, 0, 1, 1], } ) df["date"] = pd.to_datetime(df["date"]).dt.tz_localize(tz=None) self.frequency = "D" gluon_dataset = GluonDataset( dataframe=df, time_column_name="date", frequency=self.frequency, target_columns_names=["target"], timeseries_identifiers_names=["key"], external_features_columns_names=["ext_feat"], min_length=2, ) self.prediction_length = 2 gluon_list_datasets = gluon_dataset.create_list_datasets(cut_lengths=[self.prediction_length, 0]) self.train_list_dataset = gluon_list_datasets[0] self.test_list_dataset = gluon_list_datasets[1] self.model_name = "simplefeedforward" self.model = Model( self.model_name, model_parameters={"activated": True, "kwargs": {}}, frequency=self.frequency, prediction_length=self.prediction_length, epoch=1, use_external_features=True, batch_size=32, num_batches_per_epoch=50, )
class TestGluonDataset: def setup_class(self): self.df = pd.DataFrame({ "date": [ "2018-01-06", "2018-01-07", "2018-01-08", "2018-01-06", "2018-01-07", "2018-01-08" ], "volume": [2, 4, 2, 5, 2, 5], "revenue": [12, 13, 14, 15, 11, 10], "store": [1, 1, 1, 1, 1, 1], "item": [1, 1, 1, 2, 2, 2], "is_holiday": [0, 0, 0, 0, 1, 0], "is_weekend": [1, 0, 0, 1, 0, 0], }) self.df["date"] = pd.to_datetime( self.df["date"]).dt.tz_localize(tz=None) def setup_method(self): self.gluon_dataset = GluonDataset( dataframe=self.df, time_column_name="date", frequency="D", target_columns_names=["volume", "revenue"], timeseries_identifiers_names=["store", "item"], external_features_columns_names=["is_holiday", "is_weekend"], min_length=2, ) self.gluon_list_dataset = self.gluon_dataset.create_list_datasets( cut_lengths=[0])[0] def test_start_date(self): assert self.gluon_list_dataset.list_data[1][ TIMESERIES_KEYS.START] == pd.Timestamp("2018-01-06") def test_target(self): assert (self.gluon_list_dataset.list_data[1][TIMESERIES_KEYS.TARGET] == np.array([12, 13, 14])).all() def test_external_features(self): assert (self.gluon_list_dataset.list_data[1] [TIMESERIES_KEYS.FEAT_DYNAMIC_REAL] == np.array([[0, 0, 0], [1, 0, 0]])).all() def test_timeseries_identifiers(self): assert self.gluon_list_dataset.list_data[2][ TIMESERIES_KEYS.IDENTIFIERS] == { "store": 1, "item": 2 }
class TestModel: def setup_class(self): self.df = pd.DataFrame({ "date": [ "2018-01-06", "2018-01-07", "2018-01-08", "2018-01-06", "2018-01-07", "2018-01-08" ], "volume": [2, 4, 2, 5, 2, 5], "revenue": [12, 13, 14, 15, 11, 10], "store": [1, 1, 1, 1, 1, 1], "item": [1, 1, 1, 2, 2, 2], "is_holiday": [0, 0, 0, 0, 1, 0], "is_weekend": [1, 0, 0, 1, 0, 0], }) self.df["date"] = pd.to_datetime( self.df["date"]).dt.tz_localize(tz=None) self.gluon_dataset = GluonDataset( dataframe=self.df, time_column_name="date", frequency="D", target_columns_names=["volume", "revenue"], timeseries_identifiers_names=["store", "item"], external_features_columns_names=["is_holiday", "is_weekend"], min_length=2, ) self.prediction_length = 1 gluon_list_datasets = self.gluon_dataset.create_list_datasets( cut_lengths=[self.prediction_length, 0]) self.train_list_dataset = gluon_list_datasets[0] self.test_list_dataset = gluon_list_datasets[1] def test_deepar(self): model_name = "deepar" model = Model( model_name, model_parameters={ "activated": True, "kwargs": { "dropout_rate": "0.3", "cell_type": "gru" } }, frequency="D", prediction_length=self.prediction_length, epoch=1, use_external_features=True, batch_size=32, num_batches_per_epoch=50, ) metrics, identifiers_columns, forecasts_df = model.train_evaluate( self.train_list_dataset, self.test_list_dataset, make_forecasts=True) TestModel.metrics_assertions(metrics, model_name) TestModel.forecasts_assertions( forecasts_df, model_name, prediction_length=self.prediction_length) def test_transformer(self): model_name = "transformer" model = Model( model_name, model_parameters={ "activated": True, "kwargs": { "model_dim": 16 } }, frequency="D", prediction_length=self.prediction_length, epoch=1, use_external_features=True, batch_size=32, num_batches_per_epoch=50, ) metrics, identifiers_columns, forecasts_df = model.train_evaluate( self.train_list_dataset, self.test_list_dataset, make_forecasts=True) TestModel.metrics_assertions(metrics, model_name) TestModel.forecasts_assertions( forecasts_df, model_name, prediction_length=self.prediction_length) def test_seasonal_naive(self): model_name = "seasonal_naive" model = Model( model_name, model_parameters={ "activated": True, "kwargs": {} }, frequency="D", prediction_length=self.prediction_length, epoch=1, use_external_features=False, batch_size=32, num_batches_per_epoch=50, ) metrics, identifiers_columns, forecasts_df = model.train_evaluate( self.train_list_dataset, self.test_list_dataset, make_forecasts=True) TestModel.metrics_assertions(metrics, model_name) TestModel.forecasts_assertions( forecasts_df, model_name, prediction_length=self.prediction_length) def test_mqcnn(self): model_name = "mqcnn" model = Model( model_name, model_parameters={ "activated": True, "kwargs": {} }, frequency="D", prediction_length=self.prediction_length, epoch=1, use_external_features=False, batch_size=32, num_batches_per_epoch=50, ) model.train(self.test_list_dataset) assert model.predictor is not None @staticmethod def metrics_assertions(metrics, model_name): expected_metrics_columns = ["store", "item"] expected_metrics_columns += [ METRICS_DATASET.TARGET_COLUMN, METRICS_DATASET.MODEL_COLUMN, METRICS_DATASET.MODEL_PARAMETERS, METRICS_DATASET.TRAINING_TIME, ] expected_metrics_columns += list( EVALUATION_METRICS_DESCRIPTIONS.keys()) assert len(metrics.index) == 5 assert set(metrics.columns) == set(expected_metrics_columns) assert metrics[METRICS_DATASET.MODEL_COLUMN].unique( ) == MODEL_DESCRIPTORS[model_name][LABEL] @staticmethod def forecasts_assertions(forecasts_df, model_name, prediction_length=1): assert len(forecasts_df.index) == 2 assert forecasts_df["index"].nunique() == prediction_length