def _create_instance_splitter(self, mode: str): assert mode in ["training", "validation", "test"] instance_sampler = { "training": self.train_sampler, "validation": self.validation_sampler, "test": TestSplitSampler(), }[mode] return InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=instance_sampler, past_length=self.context_length, future_length=self.prediction_length, time_series_fields=[FieldName.OBSERVED_VALUES], )
def create_transformation(self) -> Transformation: return Chain([ AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, dtype=self.dtype, imputation_method=self.imputation_method, ), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExpectedNumInstanceSampler(num_instances=1), past_length=self.context_length, future_length=self.prediction_length, time_series_fields=[FieldName.OBSERVED_VALUES], ), ])
def create_transformation(self) -> Transformation: return Chain(trans=[ AsNumpyArray( field=FieldName.TARGET, expected_ndim=2, dtype=self.dtype), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, dtype=self.dtype, ), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExpectedNumInstanceSampler(num_instances=1), time_series_fields=[FieldName.OBSERVED_VALUES], past_length=self.context_length, future_length=self.future_length, output_NTC=False, # output NCT for first layer conv1d ), ])
def create_training_data_loader(self, dataset, **kwargs): instance_splitter = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=ExpectedNumInstanceSampler( num_instances=1, min_future=self.prediction_length ), past_length=self.context_length, future_length=self.prediction_length, ) input_names = get_hybrid_forward_input_names(MyProbTrainNetwork) return TrainDataLoader( dataset=dataset, transform=instance_splitter + SelectFields(input_names), batch_size=self.batch_size, stack_fn=functools.partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype), decode_fn=functools.partial(as_in_context, ctx=self.trainer.ctx), **kwargs, )
def create_transformation(self) -> Transformation: return Chain(trans=[ AsNumpyArray(field=FieldName.TARGET, expected_ndim=1), AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=time_features_from_frequency_str(self.freq), pred_length=self.prediction_length, ), SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT, value=[0.0]), AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=TestSplitSampler(), time_series_fields=[FieldName.FEAT_TIME], past_length=self.context_length, future_length=self.prediction_length, ), ])
def test_dataset(dataset): class ExactlyOneSampler(InstanceSampler): def __call__(self, ts: np.ndarray) -> np.ndarray: a, b = self._get_bounds(ts) window_size = b - a + 1 assert window_size > 0 return np.array([a]) transformation = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=ExactlyOneSampler(), past_length=10, future_length=5, dummy_value=1.0, ) dl = TrainDataLoader( dataset=dataset, transform=transformation, batch_size=batch_size, stack_fn=partial(batchify, ctx=current_context()), decode_fn=partial(as_in_context, ctx=current_context()), num_workers=num_workers, ) item_ids = defaultdict(int) for epoch in range(num_epochs): for batch in islice(dl, num_batches_per_epoch): for item_id in batch["item_id"]: item_ids[item_id] += 1 for i in range(len(dataset)): assert num_passes - 1 <= item_ids[i] <= num_passes + 1
def _create_instance_splitter(self, mode: str): assert mode in ["training", "validation", "test"] instance_sampler = { "training": self.train_sampler, "validation": self.validation_sampler, "test": TestSplitSampler(), }[mode] return (InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=instance_sampler, past_length=self.history_length, future_length=self.prediction_length, time_series_fields=[ FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES, ], ) + (CDFtoGaussianTransform( target_field=FieldName.TARGET, observed_values_field=FieldName.OBSERVED_VALUES, max_context_length=self.conditioning_length, target_dim=self.target_dim, ) if self.use_marginal_transformation else RenameFields( { f"past_{FieldName.TARGET}": f"past_{FieldName.TARGET}_cdf", f"future_{FieldName.TARGET}": f"future_{FieldName.TARGET}_cdf", })) + SampleTargetDim( field_name=FieldName.TARGET_DIM_INDICATOR, target_field=FieldName.TARGET + "_cdf", observed_values_field=FieldName.OBSERVED_VALUES, num_samples=self.target_dim_sample, shuffle=self.shuffle_target_dim, ))
def create_predictor( self, transformation: Transformation, trained_network: mx.gluon.HybridBlock ) -> Predictor: prediction_splitter = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=TestSplitSampler(), past_length=self.context_length + 1, future_length=self.prediction_length, time_series_fields=[ FieldName.FEAT_DYNAMIC_REAL, FieldName.OBSERVED_VALUES, ], ) prediction_network = MyProbPredRNN( prediction_length=self.prediction_length, context_length=self.context_length, distr_output=self.distr_output, num_cells=self.num_cells, num_layers=self.num_layers, num_sample_paths=self.num_sample_paths, scaling=self.scaling ) copy_parameters(trained_network, prediction_network) return RepresentableBlockPredictor( input_transform=transformation + prediction_splitter, prediction_net=prediction_network, batch_size=self.trainer.batch_size, freq=self.freq, prediction_length=self.prediction_length, ctx=self.trainer.ctx, )
def _create_instance_splitter(self, module: DeepARLightningModule, mode: str): assert mode in ["training", "validation", "test"] instance_sampler = { "training": self.train_sampler, "validation": self.validation_sampler, "test": TestSplitSampler(), }[mode] return InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=instance_sampler, past_length=module.model._past_length, future_length=self.prediction_length, time_series_fields=[ FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES, ], dummy_value=self.distr_output.value_in_support, )
def create_transformation(self) -> Transformation: remove_field_names = [FieldName.FEAT_DYNAMIC_CAT] if not self.use_feat_static_real: remove_field_names.append(FieldName.FEAT_STATIC_REAL) if not self.use_feat_dynamic_real: remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL) return Chain( [RemoveFields(field_names=remove_field_names)] + ([SetField(output_field=FieldName.FEAT_STATIC_CAT, value=[0.0] )] if not self.use_feat_static_cat else []) + ([SetField(output_field=FieldName.FEAT_STATIC_REAL, value=[0.0] )] if not self.use_feat_static_real else []) + [ AsNumpyArray( field=FieldName.FEAT_STATIC_CAT, expected_ndim=1, dtype=self.dtype, ), AsNumpyArray( field=FieldName.FEAT_STATIC_REAL, expected_ndim=1, dtype=self.dtype, ), AsNumpyArray( field=FieldName.TARGET, # in the following line, we add 1 for the time dimension expected_ndim=1 + len(self.distr_output.event_shape), dtype=self.dtype, ), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, dtype=self.dtype, imputation_method=self.imputation_method, ), AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=self.time_features, pred_length=self.prediction_length, ), AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.prediction_length, log_scale=True, dtype=self.dtype, ), VstackFeatures( output_field=FieldName.FEAT_TIME, input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] + ([FieldName.FEAT_DYNAMIC_REAL] if self. use_feat_dynamic_real else []), ), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=self.train_sampler, past_length=self.history_length, future_length=self.prediction_length, time_series_fields=[ FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES, ], dummy_value=self.distr_output.value_in_support, ), ])
def create_transformation(self) -> Transformation: def use_marginal_transformation( marginal_transformation: bool, ) -> Transformation: if marginal_transformation: return CDFtoGaussianTransform( target_field=FieldName.TARGET, observed_values_field=FieldName.OBSERVED_VALUES, max_context_length=self.conditioning_length, target_dim=self.target_dim, ) else: return RenameFields({ f"past_{FieldName.TARGET}": f"past_{FieldName.TARGET}_cdf", f"future_{FieldName.TARGET}": f"future_{FieldName.TARGET}_cdf", }) return Chain([ AsNumpyArray( field=FieldName.TARGET, expected_ndim=1 + len(self.distr_output.event_shape), ), # maps the target to (1, T) if the target data is uni # dimensional ExpandDimArray( field=FieldName.TARGET, axis=0 if self.distr_output.event_shape[0] == 1 else None, ), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, ), AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=self.time_features, pred_length=self.prediction_length, ), VstackFeatures( output_field=FieldName.FEAT_TIME, input_fields=[FieldName.FEAT_TIME], ), SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT, value=[0.0]), TargetDimIndicator( field_name=FieldName.TARGET_DIM_INDICATOR, target_field=FieldName.TARGET, ), AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExpectedNumInstanceSampler(num_instances=1), past_length=self.history_length, future_length=self.prediction_length, time_series_fields=[ FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES, ], pick_incomplete=self.pick_incomplete, ), use_marginal_transformation(self.use_marginal_transformation), SampleTargetDim( field_name=FieldName.TARGET_DIM_INDICATOR, target_field=FieldName.TARGET + "_cdf", observed_values_field=FieldName.OBSERVED_VALUES, num_samples=self.target_dim_sample, shuffle=self.shuffle_target_dim, ), ])
def create_transformation(self) -> Transformation: transforms = [] if self.use_feat_dynamic_real: transforms.append( AsNumpyArray( field=FieldName.FEAT_DYNAMIC_REAL, expected_ndim=2, )) else: transforms.extend([ SetField( output_field=FieldName.FEAT_DYNAMIC_REAL, value=[[]] * (self.context_length + self.prediction_length), ), AsNumpyArray( field=FieldName.FEAT_DYNAMIC_REAL, expected_ndim=2, ), # SwapAxes(input_fields=[FieldName.FEAT_DYNAMIC_REAL], axes=(0,1)), ]) if self.use_feat_dynamic_cat: transforms.append( AsNumpyArray( field=FieldName.FEAT_DYNAMIC_CAT, expected_ndim=2, )) else: # Manually set dummy dynamic categorical features and split by time # Unknown issue in dataloader if leave splitting to InstanceSplitter transforms.extend([ SetField( output_field="past_" + FieldName.FEAT_DYNAMIC_CAT, value=[[]] * self.context_length, ), AsNumpyArray( field="past_" + FieldName.FEAT_DYNAMIC_CAT, expected_ndim=2, ), SetField( output_field="future_" + FieldName.FEAT_DYNAMIC_CAT, value=[[]] * self.prediction_length, ), AsNumpyArray( field="future_" + FieldName.FEAT_DYNAMIC_CAT, expected_ndim=2, ), ]) if self.use_feat_static_real: transforms.append( AsNumpyArray( field=FieldName.FEAT_STATIC_REAL, expected_ndim=1, )) else: transforms.extend([ SetField( output_field=FieldName.FEAT_STATIC_REAL, value=[], ), AsNumpyArray( field=FieldName.FEAT_STATIC_REAL, expected_ndim=1, ), ]) if self.use_feat_static_cat: transforms.append( AsNumpyArray( field=FieldName.FEAT_STATIC_CAT, expected_ndim=1, )) time_series_fields = [FieldName.OBSERVED_VALUES] if self.use_feat_dynamic_cat: time_series_fields.append(FieldName.FEAT_DYNAMIC_CAT) if self.use_feat_dynamic_real or (self.time_features is not None): time_series_fields.append(FieldName.FEAT_DYNAMIC_REAL) transforms.extend([ AsNumpyArray(field=FieldName.TARGET, expected_ndim=1), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, ), AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=self.time_features, pred_length=self.prediction_length, ), AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.prediction_length, log_scale=True, ), VstackFeatures( output_field=FieldName.FEAT_DYNAMIC_REAL, input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] + ([FieldName.FEAT_DYNAMIC_REAL] if self.use_feat_dynamic_real else []), ), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=self.train_sampler, past_length=self.context_length, future_length=self.prediction_length, time_series_fields=time_series_fields, pick_incomplete=True, ), ]) return Chain(transforms)
def test_simple_model(): dsinfo, training_data, test_data = default_synthetic() freq = dsinfo.metadata.freq prediction_length = dsinfo.prediction_length context_length = 2 * prediction_length hidden_dimensions = [10, 10] net = LightningFeedForwardNetwork( freq=freq, prediction_length=prediction_length, context_length=context_length, hidden_dimensions=hidden_dimensions, distr_output=NormalOutput(), batch_norm=True, scaling=mean_abs_scaling, ) transformation = AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, ) training_splitter = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=ExpectedNumInstanceSampler( num_instances=1, min_future=prediction_length, ), past_length=context_length, future_length=prediction_length, time_series_fields=[FieldName.OBSERVED_VALUES], ) data_loader = TrainDataLoader( training_data, batch_size=8, stack_fn=batchify, transform=transformation + training_splitter, num_batches_per_epoch=5, ) trainer = pl.Trainer(max_epochs=3, callbacks=[], weights_summary=None) trainer.fit(net, train_dataloader=data_loader) prediction_splitter = InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, instance_sampler=TestSplitSampler(), past_length=context_length, future_length=prediction_length, time_series_fields=[FieldName.OBSERVED_VALUES], ) predictor = net.get_predictor(transformation + prediction_splitter) forecast_it, ts_it = make_evaluation_predictions( dataset=test_data, predictor=predictor ) evaluator = Evaluator(quantiles=[0.5, 0.9], num_workers=None) agg_metrics, _ = evaluator(ts_it, forecast_it)
def transform_data(self): # 首先需要把 target time_features = time_features_from_frequency_str(self.config.freq) self.time_dim = len(time_features) + 1 # 考虑多加了一个 age_features seasonal = CompositeISSM.seasonal_features(self.freq) self.seasonal_dim = len(seasonal) transformation = Chain([ SwapAxes( input_fields=[FieldName.TARGET], axes=(0, 1), ), AddObservedValuesIndicator( target_field=FieldName.TARGET, output_field=FieldName.OBSERVED_VALUES, ), # Unnormalized seasonal features AddTimeFeatures( time_features=seasonal, pred_length=self.pred_length, start_field=FieldName.START, target_field=FieldName.TARGET, output_field="seasonal_indicators", ), AddTimeFeatures( start_field=FieldName.START, target_field=FieldName.TARGET, output_field=FieldName.FEAT_TIME, time_features=time_features, pred_length=self.pred_length, ), AddAgeFeature( target_field=FieldName.TARGET, output_field=FieldName.FEAT_AGE, pred_length=self.pred_length, log_scale=True, ), VstackFeatures( output_field=FieldName.FEAT_TIME, input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] + ([FieldName.FEAT_DYNAMIC_REAL] if self.use_feat_dynamic_real else []), ), InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=TestSplitSampler(), past_length=self.config.past_length, future_length=self.config.pred_length, output_NTC=True, time_series_fields=[ FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES, "seasonal_indicators" ], pick_incomplete=False, ) ]) print('已设置时间特征~~') # 设置环境变量的 dataloader target_train_iters = [ iter( TrainDataLoader_OnlyPast( dataset=self.target_data[i].train, transform=transformation, batch_size=self.config.batch_size, num_batches_per_epoch=self.config.num_batches_per_epoch, )) for i in range(len(self.target_data)) ] target_test_iters = [ iter( InferenceDataLoader_WithFuture( dataset=self.target_data[i].test, transform=transformation, batch_size=self.config.batch_size, )) for i in range(len(self.target_data)) ] self.target_train_loader = stackIterOut( target_train_iters, fields=[FieldName.OBSERVED_VALUES, FieldName.TARGET], dim=0, include_future=False) self.target_test_loader = stackIterOut( target_test_iters, fields=[FieldName.OBSERVED_VALUES, FieldName.TARGET], dim=0, include_future=True)