def test_add_time_features(): tran = transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=10, ) tran2 = clone( tran, { "time_features": [ time_feature.DayOfWeek(), time_feature.DayOfMonth(), ] }, ) assert equals(tran, clone(tran)) assert not equals(tran, tran2)
def test_chain(): chain = transform.Chain(trans=[ transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=10, ), transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", pred_length=10, log_scale=True, ), transform.AddObservedValuesIndicator(target_field=FieldName.TARGET, output_field="observed_values"), ]) assert equals(chain, clone(chain)) assert not equals(chain, clone(chain, {"trans": []})) another_chain = transform.Chain(trans=[ transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=10, ), transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", pred_length=10, log_scale=False, ), transform.AddObservedValuesIndicator(target_field=FieldName.TARGET, output_field="observed_values"), ]) assert not equals(chain, another_chain)
def dsinfo(request): from gluonts import time_feature from gluonts.dataset.artificial import constant_dataset, default_synthetic if request.param == "constant": ds_info, train_ds, test_ds = constant_dataset() return AttrDict( name="constant", cardinality=int(ds_info.metadata.feat_static_cat[0].cardinality), freq=ds_info.metadata.freq, num_parallel_samples=2, prediction_length=ds_info.prediction_length, # FIXME: Should time features should not be needed for GP time_features=[time_feature.DayOfWeek(), time_feature.HourOfDay()], train_ds=train_ds, test_ds=test_ds, ) elif request.param == "synthetic": ds_info, train_ds, test_ds = default_synthetic() return AttrDict( name="synthetic", batch_size=32, cardinality=int(ds_info.metadata.feat_static_cat[0].cardinality), context_length=2, freq=ds_info.metadata.freq, prediction_length=ds_info.prediction_length, num_parallel_samples=2, train_ds=train_ds, test_ds=test_ds, time_features=None, )
def test_Transformation(): train_length = 100 ds = gluonts.dataset.common.ListDataset( [{"start": "2012-01-01", "target": [0.2] * train_length}], freq="1D" ) pred_length = 10 t = transform.Chain( trans=[ transform.AddTimeFeatures( start_field=transform.FieldName.START, target_field=transform.FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=pred_length, ), transform.AddAgeFeature( target_field=transform.FieldName.TARGET, output_field="age", pred_length=pred_length, log_scale=True, ), transform.AddObservedValuesIndicator( target_field=transform.FieldName.TARGET, output_field="observed_values", ), transform.VstackFeatures( output_field="dynamic_feat", input_fields=["age", "time_feat"], drop_inputs=True, ), transform.InstanceSplitter( target_field=transform.FieldName.TARGET, is_pad_field=transform.FieldName.IS_PAD, start_field=transform.FieldName.START, forecast_start_field=transform.FieldName.FORECAST_START, train_sampler=transform.ExpectedNumInstanceSampler( num_instances=4 ), past_length=train_length, future_length=pred_length, time_series_fields=["dynamic_feat", "observed_values"], ), ] ) assert_serializable(t) for u in t(iter(ds), is_train=True): print(u)
def test_AddTimeFeatures(start, target, is_train): pred_length = 13 t = transform.AddTimeFeatures( start_field=transform.FieldName.START, target_field=transform.FieldName.TARGET, output_field="myout", pred_length=pred_length, time_features=[time_feature.DayOfWeek(), time_feature.DayOfMonth()], ) assert_serializable(t) data = {"start": start, "target": target} res = t.map_transform(data, is_train=is_train) mat = res["myout"] expected_length = len(target) + (0 if is_train else pred_length) assert mat.shape == (2, expected_length) tmp_idx = pd.date_range( start=start, freq=start.freq, periods=expected_length ) assert np.alltrue(mat[0] == time_feature.DayOfWeek()(tmp_idx)) assert np.alltrue(mat[1] == time_feature.DayOfMonth()(tmp_idx))
def test_add_method(): chain = transform.AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=24, ) + transform.AddAgeFeature( target_field=FieldName.TARGET, output_field="age", pred_length=24, log_scale=True, ) assert isinstance(chain, transform.Chain)
def test_multi_dim_transformation(is_train): train_length = 10 first_dim = np.arange(1, 11, 1).tolist() first_dim[-1] = "NaN" second_dim = np.arange(11, 21, 1).tolist() second_dim[0] = "NaN" ds = gluonts.dataset.common.ListDataset( data_iter=[{"start": "2012-01-01", "target": [first_dim, second_dim]}], freq="1D", one_dim_target=False, ) pred_length = 2 # Looks weird - but this is necessary to assert the nan entries correctly. first_dim[-1] = np.nan second_dim[0] = np.nan t = transform.Chain( trans=[ transform.AddTimeFeatures( start_field=transform.FieldName.START, target_field=transform.FieldName.TARGET, output_field="time_feat", time_features=[ time_feature.DayOfWeek(), time_feature.DayOfMonth(), time_feature.MonthOfYear(), ], pred_length=pred_length, ), transform.AddAgeFeature( target_field=transform.FieldName.TARGET, output_field="age", pred_length=pred_length, log_scale=True, ), transform.AddObservedValuesIndicator( target_field=transform.FieldName.TARGET, output_field="observed_values", convert_nans=False, ), transform.VstackFeatures( output_field="dynamic_feat", input_fields=["age", "time_feat"], drop_inputs=True, ), transform.InstanceSplitter( target_field=transform.FieldName.TARGET, is_pad_field=transform.FieldName.IS_PAD, start_field=transform.FieldName.START, forecast_start_field=transform.FieldName.FORECAST_START, train_sampler=transform.ExpectedNumInstanceSampler( num_instances=4 ), past_length=train_length, future_length=pred_length, time_series_fields=["dynamic_feat", "observed_values"], output_NTC=False, ), ] ) assert_serializable(t) if is_train: for u in t(iter(ds), is_train=True): assert_shape(u["past_target"], (2, 10)) assert_shape(u["past_dynamic_feat"], (4, 10)) assert_shape(u["past_observed_values"], (2, 10)) assert_shape(u["future_target"], (2, 2)) assert_padded_array( u["past_observed_values"], np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]), u["past_is_pad"], ) assert_padded_array( u["past_target"], np.array([first_dim, second_dim]), u["past_is_pad"], ) else: for u in t(iter(ds), is_train=False): assert_shape(u["past_target"], (2, 10)) assert_shape(u["past_dynamic_feat"], (4, 10)) assert_shape(u["past_observed_values"], (2, 10)) assert_shape(u["future_target"], (2, 0)) assert_padded_array( u["past_observed_values"], np.array([[1.0] * 9 + [0.0], [0.0] + [1.0] * 9]), u["past_is_pad"], ) assert_padded_array( u["past_target"], np.array([first_dim, second_dim]), u["past_is_pad"], )
from gluonts.model.gp_forecaster import GaussianProcessEstimator from gluonts.model.predictor import Predictor from gluonts.model.seasonal_naive import SeasonalNaiveEstimator from gluonts.model.seq2seq import ( MQCNNEstimator, MQRNNEstimator, Seq2SeqEstimator, ) from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator dataset_info, train_ds, test_ds = constant_dataset() freq = dataset_info.metadata.time_granularity prediction_length = dataset_info.prediction_length cardinality = int(dataset_info.metadata.feat_static_cat[0].cardinality) # FIXME: Should time features should not be needed for GP time_features = [time_feature.DayOfWeek(), time_feature.HourOfDay()] num_eval_samples = 2 epochs = 1 def seq2seq_base(seq2seq_model, hybridize: bool = True, batches_per_epoch=1): return ( seq2seq_model, dict( ctx='cpu', epochs=epochs, learning_rate=1e-2, hybridize=hybridize, prediction_length=prediction_length, context_length=prediction_length, num_eval_samples=num_eval_samples,