def create_transformation(self) -> Transformation: remove_field_names = [ FieldName.FEAT_DYNAMIC_CAT, FieldName.FEAT_STATIC_REAL, ] if not self.use_feat_dynamic_real: remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) return Chain( [RemoveFields(field_names=remove_field_names)] + ( [SetField(output_field=FieldName.FEAT_STATIC_CAT, value=[0.0])] if not self.use_feat_static_cat else [] ) + [ AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1), AsNumpyArray(field=FieldName.TARGET, expected_ndim=1), # gives target the (1, T) layout ExpandDimArray(field=FieldName.TARGET, axis=0), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, ), # Unnormalized seasonal features AddTimeFeatures( time_features=CompositeISSM.seasonal_features(self.freq), pred_length=self.prediction_length, start_field=FieldName.START, target_field=FieldName.TARGET, output_field=SEASON_INDICATORS_FIELD, ), AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=self.time_features, pred_length=self.prediction_length, ), AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.prediction_length, log_scale=True, ), VstackFeatures( output_field=FieldName.FEAT_TIME, input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] + ( [FieldName.FEAT_DYNAMIC_REAL] if self.use_feat_dynamic_real else [] ), ), CanonicalInstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=TestSplitSampler(), time_series_fields=[ FieldName.FEAT_TIME, SEASON_INDICATORS_FIELD, FieldName.OBSERVED_VALUES, ], allow_target_padding=True, instance_length=self.past_length, use_prediction_features=True, prediction_length=self.prediction_length, ), ] )
def __init__( self, freq: str, prediction_length: int, cardinality: List[int], add_trend: bool = False, past_length: Optional[int] = None, num_periods_to_train: int = 4, trainer: Trainer = Trainer(epochs=100, num_batches_per_epoch=50, hybridize=False), num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", num_parallel_samples: int = 100, dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = True, embedding_dimension: Optional[List[int]] = None, issm: Optional[ISSM] = None, scaling: bool = True, time_features: Optional[List[TimeFeature]] = None, noise_std_bounds: ParameterBounds = ParameterBounds(1e-6, 1.0), prior_cov_bounds: ParameterBounds = ParameterBounds(1e-6, 1.0), innovation_bounds: ParameterBounds = ParameterBounds(1e-6, 0.01), batch_size: int = 32, ) -> None: super().__init__(trainer=trainer, batch_size=batch_size) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (past_length is None or past_length > 0), "The value of `past_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert (num_parallel_samples > 0), "The value of `num_parallel_samples` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert not use_feat_static_cat or any(c > 1 for c in cardinality), ( f"Cardinality of at least one static categorical feature must be larger than 1 " f"if `use_feat_static_cat=True`. But cardinality provided is: {cardinality}" ) assert embedding_dimension is None or all( e > 0 for e in embedding_dimension ), "Elements of `embedding_dimension` should be > 0" assert all( np.isfinite(p.lower) and np.isfinite(p.upper) and p.lower > 0 for p in [noise_std_bounds, prior_cov_bounds, innovation_bounds] ), "All parameter bounds should be finite, and lower bounds should be positive" self.freq = freq self.past_length = (past_length if past_length is not None else num_periods_to_train * longest_period_from_frequency_str(freq)) self.prediction_length = prediction_length self.add_trend = add_trend self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.scaling = scaling self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.cardinality = (cardinality if cardinality and use_feat_static_cat else [1]) self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality]) self.issm = (issm if issm is not None else CompositeISSM.get_from_freq( freq, add_trend)) self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.noise_std_bounds = noise_std_bounds self.prior_cov_bounds = prior_cov_bounds self.innovation_bounds = innovation_bounds
def __init__( self, freq: str, prediction_length: int, add_trend: bool = False, past_length: Optional[int] = None, num_periods_to_train: int = 4, trainer: Trainer = Trainer(epochs=25, hybridize=False), num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", num_eval_samples: int = 100, dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, cardinality: Optional[List[int]] = None, embedding_dimension: int = 20, issm: Optional[ISSM] = None, scaling: bool = True, time_features: Optional[List[TimeFeature]] = None, ) -> None: super().__init__(trainer=trainer) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( past_length is None or past_length > 0 ), "The value of `past_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert ( num_eval_samples > 0 ), "The value of `num_eval_samples` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert ( cardinality is not None or not use_feat_static_cat ), "You must set `cardinality` if `use_feat_static_cat=True`" assert cardinality is None or [ c > 0 for c in cardinality ], "Elements of `cardinality` should be > 0" assert ( embedding_dimension > 0 ), "The value of `embedding_dimension` should be > 0" self.freq = freq self.past_length = ( past_length if past_length is not None else num_periods_to_train * longest_period_from_frequency_str(freq) ) self.prediction_length = prediction_length self.add_trend = add_trend self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_sample_paths = num_eval_samples self.scaling = scaling self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.cardinality = cardinality if use_feat_static_cat else [1] self.embedding_dimension = embedding_dimension self.issm = ( issm if issm is not None else CompositeISSM.get_from_freq(freq, add_trend) ) self.time_features = ( time_features if time_features is not None else time_features_from_frequency_str(self.freq) )
def create_input_transform( is_train, prediction_length, past_length, use_feat_static_cat, use_feat_dynamic_real, freq, time_features, extract_tail_chunks_for_train: bool = False, ): SEASON_INDICATORS_FIELD = "seasonal_indicators" remove_field_names = [ FieldName.FEAT_DYNAMIC_CAT, FieldName.FEAT_STATIC_REAL, ] if not use_feat_dynamic_real: remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) time_features = (time_features if time_features is not None else time_features_from_frequency_str(freq)) transform = Chain([RemoveFields(field_names=remove_field_names)] + ([ SetField(output_field=FieldName.FEAT_STATIC_CAT, value=[0.0]) ] if not use_feat_static_cat else []) + [ AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1), AsNumpyArray(field=FieldName.TARGET, expected_ndim=1), # gives target the (1, T) layout ExpandDimArray(field=FieldName.TARGET, axis=0), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, ), # Unnormalized seasonal features AddTimeFeatures( time_features=CompositeISSM.seasonal_features(freq), pred_length=prediction_length, start_field=FieldName.START, target_field=FieldName.TARGET, output_field=SEASON_INDICATORS_FIELD, ), AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=time_features, pred_length=prediction_length, ), AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=prediction_length, log_scale=True, ), VstackFeatures( output_field=FieldName.FEAT_TIME, input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] + ([FieldName.FEAT_DYNAMIC_REAL] if use_feat_dynamic_real else []), ), CanonicalInstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=ExpectedNumInstanceSampler(num_instances=1) if (is_train and not extract_tail_chunks_for_train ) else TestSplitSampler(), time_series_fields=[ FieldName.FEAT_TIME, SEASON_INDICATORS_FIELD, FieldName.OBSERVED_VALUES, ], allow_target_padding=True, instance_length=past_length, use_prediction_features=True, prediction_length=prediction_length, ), ]) return transform
def test_composite_issm_h_default(): issm = CompositeISSM.get_from_freq(freq="H") assert issm.latent_dim() == 2 + 24 + 7 assert issm.output_dim() == 1 time_features = issm.time_features() assert len(time_features) == 3 time_indices = [ pd.date_range("2020-01-01 21:00:00", periods=10, freq="H"), pd.date_range("2020-01-02 03:00:00", periods=10, freq="H"), ] features = mx.nd.array( np.stack( [ np.stack([f(time_index) for f in time_features], axis=-1) for time_index in time_indices ], axis=0, )) emission_coeff, transition_coeff, innovation_coeff = issm.get_issm_coeff( features) season_indices_hod = [ [21, 22, 23, 0, 1, 2, 3, 4, 5, 6], [3, 4, 5, 6, 7, 8, 9, 10, 11, 12], ] season_indices_dow = [[2] * 3 + [3] * 7, [3] * 10] for item in range(2): for time in range(10): expected_coeff = mx.nd.concat( mx.nd.ones((1, 2)), mx.nd.one_hot(mx.nd.array([season_indices_hod[item][time]]), 24), mx.nd.one_hot(mx.nd.array([season_indices_dow[item][time]]), 7), dim=-1, ) sliced_emission_coeff = mx.nd.slice( emission_coeff, begin=(item, time, None, None), end=(item + 1, time + 1, None, None), ) assert (sliced_emission_coeff == expected_coeff).asnumpy().all() sliced_transition_coeff = mx.nd.slice( transition_coeff, begin=(item, time, None, None), end=(item + 1, time + 1, None, None), ) expected_transition_coeff = mx.nd.eye(2 + 24 + 7) expected_transition_coeff[0, 1] = 1 assert ((sliced_transition_coeff == expected_transition_coeff ).asnumpy().all()) sliced_innovation_coeff = mx.nd.slice( innovation_coeff, begin=(item, time, None), end=(item + 1, time + 1, None), ) assert (sliced_innovation_coeff == expected_coeff).asnumpy().all()
def test_composite_issm_h(): issm = CompositeISSM( seasonal_issms=[ SeasonalityISSM(num_seasons=7, time_feature=DayOfWeekIndex()), SeasonalityISSM(num_seasons=12, time_feature=MonthOfYearIndex()), ], add_trend=False, ) assert issm.latent_dim() == 1 + 7 + 12 assert issm.output_dim() == 1 time_features = issm.time_features() assert len(time_features) == 3 time_indices = [ pd.date_range("2020-01-01 21:00:00", periods=10, freq="H"), pd.date_range("2020-01-31 22:00:00", periods=10, freq="H"), ] features = mx.nd.array( np.stack( [ np.stack([f(time_index) for f in time_features], axis=-1) for time_index in time_indices ], axis=0, )) emission_coeff, transition_coeff, innovation_coeff = issm.get_issm_coeff( features) season_indices_dow = [[2] * 3 + [3] * 7, [4] * 2 + [5] * 8] season_indices_moy = [[0] * 10, [0] * 2 + [1] * 8] for item in range(2): for time in range(10): expected_coeff = mx.nd.concat( mx.nd.ones((1, 1)), mx.nd.one_hot(mx.nd.array([season_indices_dow[item][time]]), 7), mx.nd.one_hot(mx.nd.array([season_indices_moy[item][time]]), 12), dim=-1, ) sliced_emission_coeff = mx.nd.slice( emission_coeff, begin=(item, time, None, None), end=(item + 1, time + 1, None, None), ) assert (sliced_emission_coeff == expected_coeff).asnumpy().all() sliced_transition_coeff = mx.nd.slice( transition_coeff, begin=(item, time, None, None), end=(item + 1, time + 1, None, None), ) assert ((sliced_transition_coeff == mx.nd.eye(1 + 7 + 12)).asnumpy().all()) sliced_innovation_coeff = mx.nd.slice( innovation_coeff, begin=(item, time, None), end=(item + 1, time + 1, None), ) assert (sliced_innovation_coeff == expected_coeff).asnumpy().all()
past_length=28, n_steps_forecast=14, ) freq = dataset.metadata.freq cardinalities = get_cardinalities( dataset=dataset, add_trend=add_trend ) batch = next(iter(inference_loader)) gts_seasonal_indicators = mx.ndarray.concat( batch["past_seasonal_indicators"], batch["future_seasonal_indicators"], dim=1, ) gts_issm = GtsCompositeISSM.get_from_freq( freq=freq, add_trend=add_trend ) ( emission_coeff, transition_coeff, innovation_coeff, ) = gts_issm.get_issm_coeff(gts_seasonal_indicators) data = transform_gluonts_to_pytorch( batch=batch, device="cuda", dtype=torch.float32, time_features=TimeFeatType.seasonal_indicator, **cardinalities, )