Ejemplo n.º 1
0
    def _create_instance_splitter(self, mode: str):
        assert mode in ["training", "validation", "test"]

        instance_sampler = {
            "training": self.train_sampler,
            "validation": self.validation_sampler,
            "test": TestSplitSampler(),
        }[mode]

        return InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=instance_sampler,
            past_length=self.context_length,
            future_length=self.prediction_length,
            time_series_fields=[FieldName.OBSERVED_VALUES],
        )
Ejemplo n.º 2
0
 def create_transformation(self) -> Transformation:
     return Chain([
         AddObservedValuesIndicator(
             target_field=FieldName.TARGET,
             output_field=FieldName.OBSERVED_VALUES,
             dtype=self.dtype,
             imputation_method=self.imputation_method,
         ),
         InstanceSplitter(
             target_field=FieldName.TARGET,
             is_pad_field=FieldName.IS_PAD,
             start_field=FieldName.START,
             forecast_start_field=FieldName.FORECAST_START,
             train_sampler=ExpectedNumInstanceSampler(num_instances=1),
             past_length=self.context_length,
             future_length=self.prediction_length,
             time_series_fields=[FieldName.OBSERVED_VALUES],
         ),
     ])
Ejemplo n.º 3
0
 def create_transformation(self) -> Transformation:
     return Chain(trans=[
         AsNumpyArray(
             field=FieldName.TARGET, expected_ndim=2, dtype=self.dtype),
         AddObservedValuesIndicator(
             target_field=FieldName.TARGET,
             output_field=FieldName.OBSERVED_VALUES,
             dtype=self.dtype,
         ),
         InstanceSplitter(
             target_field=FieldName.TARGET,
             is_pad_field=FieldName.IS_PAD,
             start_field=FieldName.START,
             forecast_start_field=FieldName.FORECAST_START,
             train_sampler=ExpectedNumInstanceSampler(num_instances=1),
             time_series_fields=[FieldName.OBSERVED_VALUES],
             past_length=self.context_length,
             future_length=self.future_length,
             output_NTC=False,  # output NCT for first layer conv1d
         ),
     ])
Ejemplo n.º 4
0
		def create_training_data_loader(self, dataset, **kwargs):
			instance_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=ExpectedNumInstanceSampler(
					num_instances=1,
					min_future=self.prediction_length
				),
				past_length=self.context_length,
				future_length=self.prediction_length,
			)
			input_names = get_hybrid_forward_input_names(MyProbTrainNetwork)
			return TrainDataLoader(
				dataset=dataset,
				transform=instance_splitter + SelectFields(input_names),
				batch_size=self.batch_size,
				stack_fn=functools.partial(batchify, ctx=self.trainer.ctx, dtype=self.dtype),
				decode_fn=functools.partial(as_in_context, ctx=self.trainer.ctx),
				**kwargs,
			)
Ejemplo n.º 5
0
 def create_transformation(self) -> Transformation:
     return Chain(trans=[
         AsNumpyArray(field=FieldName.TARGET, expected_ndim=1),
         AddTimeFeatures(
             start_field=FieldName.START,
             target_field=FieldName.TARGET,
             output_field=FieldName.FEAT_TIME,
             time_features=time_features_from_frequency_str(self.freq),
             pred_length=self.prediction_length,
         ),
         SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT, value=[0.0]),
         AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
         InstanceSplitter(
             target_field=FieldName.TARGET,
             is_pad_field=FieldName.IS_PAD,
             start_field=FieldName.START,
             forecast_start_field=FieldName.FORECAST_START,
             train_sampler=TestSplitSampler(),
             time_series_fields=[FieldName.FEAT_TIME],
             past_length=self.context_length,
             future_length=self.prediction_length,
         ),
     ])
    def test_dataset(dataset):
        class ExactlyOneSampler(InstanceSampler):
            def __call__(self, ts: np.ndarray) -> np.ndarray:
                a, b = self._get_bounds(ts)
                window_size = b - a + 1
                assert window_size > 0
                return np.array([a])

        transformation = InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=ExactlyOneSampler(),
            past_length=10,
            future_length=5,
            dummy_value=1.0,
        )

        dl = TrainDataLoader(
            dataset=dataset,
            transform=transformation,
            batch_size=batch_size,
            stack_fn=partial(batchify, ctx=current_context()),
            decode_fn=partial(as_in_context, ctx=current_context()),
            num_workers=num_workers,
        )

        item_ids = defaultdict(int)

        for epoch in range(num_epochs):
            for batch in islice(dl, num_batches_per_epoch):
                for item_id in batch["item_id"]:
                    item_ids[item_id] += 1

        for i in range(len(dataset)):
            assert num_passes - 1 <= item_ids[i] <= num_passes + 1
Ejemplo n.º 7
0
    def _create_instance_splitter(self, mode: str):
        assert mode in ["training", "validation", "test"]

        instance_sampler = {
            "training": self.train_sampler,
            "validation": self.validation_sampler,
            "test": TestSplitSampler(),
        }[mode]

        return (InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=instance_sampler,
            past_length=self.history_length,
            future_length=self.prediction_length,
            time_series_fields=[
                FieldName.FEAT_TIME,
                FieldName.OBSERVED_VALUES,
            ],
        ) + (CDFtoGaussianTransform(
            target_field=FieldName.TARGET,
            observed_values_field=FieldName.OBSERVED_VALUES,
            max_context_length=self.conditioning_length,
            target_dim=self.target_dim,
        ) if self.use_marginal_transformation else RenameFields(
            {
                f"past_{FieldName.TARGET}": f"past_{FieldName.TARGET}_cdf",
                f"future_{FieldName.TARGET}": f"future_{FieldName.TARGET}_cdf",
            })) + SampleTargetDim(
                field_name=FieldName.TARGET_DIM_INDICATOR,
                target_field=FieldName.TARGET + "_cdf",
                observed_values_field=FieldName.OBSERVED_VALUES,
                num_samples=self.target_dim_sample,
                shuffle=self.shuffle_target_dim,
            ))
Ejemplo n.º 8
0
		def create_predictor(
			self, transformation: Transformation, trained_network: mx.gluon.HybridBlock
		) -> Predictor:
			prediction_splitter = InstanceSplitter(
				target_field=FieldName.TARGET,
				is_pad_field=FieldName.IS_PAD,
				start_field=FieldName.START,
				forecast_start_field=FieldName.FORECAST_START,
				instance_sampler=TestSplitSampler(),
				past_length=self.context_length + 1,
				future_length=self.prediction_length,
				time_series_fields=[
					FieldName.FEAT_DYNAMIC_REAL,
					FieldName.OBSERVED_VALUES,
				],
			)
			prediction_network = MyProbPredRNN(
				prediction_length=self.prediction_length,
				context_length=self.context_length,
				distr_output=self.distr_output,
				num_cells=self.num_cells,
				num_layers=self.num_layers,
				num_sample_paths=self.num_sample_paths,
				scaling=self.scaling
			)

			copy_parameters(trained_network, prediction_network)

			return RepresentableBlockPredictor(
				input_transform=transformation + prediction_splitter,
				prediction_net=prediction_network,
				batch_size=self.trainer.batch_size,
				freq=self.freq,
				prediction_length=self.prediction_length,
				ctx=self.trainer.ctx,
			)
Ejemplo n.º 9
0
    def _create_instance_splitter(self, module: DeepARLightningModule,
                                  mode: str):
        assert mode in ["training", "validation", "test"]

        instance_sampler = {
            "training": self.train_sampler,
            "validation": self.validation_sampler,
            "test": TestSplitSampler(),
        }[mode]

        return InstanceSplitter(
            target_field=FieldName.TARGET,
            is_pad_field=FieldName.IS_PAD,
            start_field=FieldName.START,
            forecast_start_field=FieldName.FORECAST_START,
            instance_sampler=instance_sampler,
            past_length=module.model._past_length,
            future_length=self.prediction_length,
            time_series_fields=[
                FieldName.FEAT_TIME,
                FieldName.OBSERVED_VALUES,
            ],
            dummy_value=self.distr_output.value_in_support,
        )
Ejemplo n.º 10
0
    def create_transformation(self) -> Transformation:
        remove_field_names = [FieldName.FEAT_DYNAMIC_CAT]
        if not self.use_feat_static_real:
            remove_field_names.append(FieldName.FEAT_STATIC_REAL)
        if not self.use_feat_dynamic_real:
            remove_field_names.append(FieldName.FEAT_DYNAMIC_REAL)

        return Chain(
            [RemoveFields(field_names=remove_field_names)] +
            ([SetField(output_field=FieldName.FEAT_STATIC_CAT, value=[0.0]
                       )] if not self.use_feat_static_cat else []) +
            ([SetField(output_field=FieldName.FEAT_STATIC_REAL, value=[0.0]
                       )] if not self.use_feat_static_real else []) +
            [
                AsNumpyArray(
                    field=FieldName.FEAT_STATIC_CAT,
                    expected_ndim=1,
                    dtype=self.dtype,
                ),
                AsNumpyArray(
                    field=FieldName.FEAT_STATIC_REAL,
                    expected_ndim=1,
                    dtype=self.dtype,
                ),
                AsNumpyArray(
                    field=FieldName.TARGET,
                    # in the following line, we add 1 for the time dimension
                    expected_ndim=1 + len(self.distr_output.event_shape),
                    dtype=self.dtype,
                ),
                AddObservedValuesIndicator(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.OBSERVED_VALUES,
                    dtype=self.dtype,
                    imputation_method=self.imputation_method,
                ),
                AddTimeFeatures(
                    start_field=FieldName.START,
                    target_field=FieldName.TARGET,
                    output_field=FieldName.FEAT_TIME,
                    time_features=self.time_features,
                    pred_length=self.prediction_length,
                ),
                AddAgeFeature(
                    target_field=FieldName.TARGET,
                    output_field=FieldName.FEAT_AGE,
                    pred_length=self.prediction_length,
                    log_scale=True,
                    dtype=self.dtype,
                ),
                VstackFeatures(
                    output_field=FieldName.FEAT_TIME,
                    input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] +
                    ([FieldName.FEAT_DYNAMIC_REAL] if self.
                     use_feat_dynamic_real else []),
                ),
                InstanceSplitter(
                    target_field=FieldName.TARGET,
                    is_pad_field=FieldName.IS_PAD,
                    start_field=FieldName.START,
                    forecast_start_field=FieldName.FORECAST_START,
                    train_sampler=self.train_sampler,
                    past_length=self.history_length,
                    future_length=self.prediction_length,
                    time_series_fields=[
                        FieldName.FEAT_TIME,
                        FieldName.OBSERVED_VALUES,
                    ],
                    dummy_value=self.distr_output.value_in_support,
                ),
            ])
Ejemplo n.º 11
0
    def create_transformation(self) -> Transformation:
        def use_marginal_transformation(
            marginal_transformation: bool, ) -> Transformation:
            if marginal_transformation:
                return CDFtoGaussianTransform(
                    target_field=FieldName.TARGET,
                    observed_values_field=FieldName.OBSERVED_VALUES,
                    max_context_length=self.conditioning_length,
                    target_dim=self.target_dim,
                )
            else:
                return RenameFields({
                    f"past_{FieldName.TARGET}":
                    f"past_{FieldName.TARGET}_cdf",
                    f"future_{FieldName.TARGET}":
                    f"future_{FieldName.TARGET}_cdf",
                })

        return Chain([
            AsNumpyArray(
                field=FieldName.TARGET,
                expected_ndim=1 + len(self.distr_output.event_shape),
            ),
            # maps the target to (1, T) if the target data is uni
            # dimensional
            ExpandDimArray(
                field=FieldName.TARGET,
                axis=0 if self.distr_output.event_shape[0] == 1 else None,
            ),
            AddObservedValuesIndicator(
                target_field=FieldName.TARGET,
                output_field=FieldName.OBSERVED_VALUES,
            ),
            AddTimeFeatures(
                start_field=FieldName.START,
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_TIME,
                time_features=self.time_features,
                pred_length=self.prediction_length,
            ),
            VstackFeatures(
                output_field=FieldName.FEAT_TIME,
                input_fields=[FieldName.FEAT_TIME],
            ),
            SetFieldIfNotPresent(field=FieldName.FEAT_STATIC_CAT, value=[0.0]),
            TargetDimIndicator(
                field_name=FieldName.TARGET_DIM_INDICATOR,
                target_field=FieldName.TARGET,
            ),
            AsNumpyArray(field=FieldName.FEAT_STATIC_CAT, expected_ndim=1),
            InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=ExpectedNumInstanceSampler(num_instances=1),
                past_length=self.history_length,
                future_length=self.prediction_length,
                time_series_fields=[
                    FieldName.FEAT_TIME,
                    FieldName.OBSERVED_VALUES,
                ],
                pick_incomplete=self.pick_incomplete,
            ),
            use_marginal_transformation(self.use_marginal_transformation),
            SampleTargetDim(
                field_name=FieldName.TARGET_DIM_INDICATOR,
                target_field=FieldName.TARGET + "_cdf",
                observed_values_field=FieldName.OBSERVED_VALUES,
                num_samples=self.target_dim_sample,
                shuffle=self.shuffle_target_dim,
            ),
        ])
Ejemplo n.º 12
0
    def create_transformation(self) -> Transformation:
        transforms = []
        if self.use_feat_dynamic_real:
            transforms.append(
                AsNumpyArray(
                    field=FieldName.FEAT_DYNAMIC_REAL,
                    expected_ndim=2,
                ))
        else:
            transforms.extend([
                SetField(
                    output_field=FieldName.FEAT_DYNAMIC_REAL,
                    value=[[]] *
                    (self.context_length + self.prediction_length),
                ),
                AsNumpyArray(
                    field=FieldName.FEAT_DYNAMIC_REAL,
                    expected_ndim=2,
                ),
                # SwapAxes(input_fields=[FieldName.FEAT_DYNAMIC_REAL], axes=(0,1)),
            ])
        if self.use_feat_dynamic_cat:
            transforms.append(
                AsNumpyArray(
                    field=FieldName.FEAT_DYNAMIC_CAT,
                    expected_ndim=2,
                ))
        else:
            # Manually set dummy dynamic categorical features and split by time
            # Unknown issue in dataloader if leave splitting to InstanceSplitter
            transforms.extend([
                SetField(
                    output_field="past_" + FieldName.FEAT_DYNAMIC_CAT,
                    value=[[]] * self.context_length,
                ),
                AsNumpyArray(
                    field="past_" + FieldName.FEAT_DYNAMIC_CAT,
                    expected_ndim=2,
                ),
                SetField(
                    output_field="future_" + FieldName.FEAT_DYNAMIC_CAT,
                    value=[[]] * self.prediction_length,
                ),
                AsNumpyArray(
                    field="future_" + FieldName.FEAT_DYNAMIC_CAT,
                    expected_ndim=2,
                ),
            ])
        if self.use_feat_static_real:
            transforms.append(
                AsNumpyArray(
                    field=FieldName.FEAT_STATIC_REAL,
                    expected_ndim=1,
                ))
        else:
            transforms.extend([
                SetField(
                    output_field=FieldName.FEAT_STATIC_REAL,
                    value=[],
                ),
                AsNumpyArray(
                    field=FieldName.FEAT_STATIC_REAL,
                    expected_ndim=1,
                ),
            ])
        if self.use_feat_static_cat:
            transforms.append(
                AsNumpyArray(
                    field=FieldName.FEAT_STATIC_CAT,
                    expected_ndim=1,
                ))
        time_series_fields = [FieldName.OBSERVED_VALUES]
        if self.use_feat_dynamic_cat:
            time_series_fields.append(FieldName.FEAT_DYNAMIC_CAT)
        if self.use_feat_dynamic_real or (self.time_features is not None):
            time_series_fields.append(FieldName.FEAT_DYNAMIC_REAL)

        transforms.extend([
            AsNumpyArray(field=FieldName.TARGET, expected_ndim=1),
            AddObservedValuesIndicator(
                target_field=FieldName.TARGET,
                output_field=FieldName.OBSERVED_VALUES,
            ),
            AddTimeFeatures(
                start_field=FieldName.START,
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_TIME,
                time_features=self.time_features,
                pred_length=self.prediction_length,
            ),
            AddAgeFeature(
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_AGE,
                pred_length=self.prediction_length,
                log_scale=True,
            ),
            VstackFeatures(
                output_field=FieldName.FEAT_DYNAMIC_REAL,
                input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] +
                ([FieldName.FEAT_DYNAMIC_REAL]
                 if self.use_feat_dynamic_real else []),
            ),
            InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=self.train_sampler,
                past_length=self.context_length,
                future_length=self.prediction_length,
                time_series_fields=time_series_fields,
                pick_incomplete=True,
            ),
        ])
        return Chain(transforms)
Ejemplo n.º 13
0
def test_simple_model():
    dsinfo, training_data, test_data = default_synthetic()

    freq = dsinfo.metadata.freq
    prediction_length = dsinfo.prediction_length
    context_length = 2 * prediction_length
    hidden_dimensions = [10, 10]

    net = LightningFeedForwardNetwork(
        freq=freq,
        prediction_length=prediction_length,
        context_length=context_length,
        hidden_dimensions=hidden_dimensions,
        distr_output=NormalOutput(),
        batch_norm=True,
        scaling=mean_abs_scaling,
    )

    transformation = AddObservedValuesIndicator(
        target_field=FieldName.TARGET,
        output_field=FieldName.OBSERVED_VALUES,
    )

    training_splitter = InstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        instance_sampler=ExpectedNumInstanceSampler(
            num_instances=1,
            min_future=prediction_length,
        ),
        past_length=context_length,
        future_length=prediction_length,
        time_series_fields=[FieldName.OBSERVED_VALUES],
    )

    data_loader = TrainDataLoader(
        training_data,
        batch_size=8,
        stack_fn=batchify,
        transform=transformation + training_splitter,
        num_batches_per_epoch=5,
    )

    trainer = pl.Trainer(max_epochs=3, callbacks=[], weights_summary=None)
    trainer.fit(net, train_dataloader=data_loader)

    prediction_splitter = InstanceSplitter(
        target_field=FieldName.TARGET,
        is_pad_field=FieldName.IS_PAD,
        start_field=FieldName.START,
        forecast_start_field=FieldName.FORECAST_START,
        instance_sampler=TestSplitSampler(),
        past_length=context_length,
        future_length=prediction_length,
        time_series_fields=[FieldName.OBSERVED_VALUES],
    )

    predictor = net.get_predictor(transformation + prediction_splitter)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=test_data, predictor=predictor
    )

    evaluator = Evaluator(quantiles=[0.5, 0.9], num_workers=None)

    agg_metrics, _ = evaluator(ts_it, forecast_it)
Ejemplo n.º 14
0
    def transform_data(self):
        # 首先需要把 target
        time_features = time_features_from_frequency_str(self.config.freq)
        self.time_dim = len(time_features) + 1  # 考虑多加了一个 age_features
        seasonal = CompositeISSM.seasonal_features(self.freq)
        self.seasonal_dim = len(seasonal)
        transformation = Chain([
            SwapAxes(
                input_fields=[FieldName.TARGET],
                axes=(0, 1),
            ),
            AddObservedValuesIndicator(
                target_field=FieldName.TARGET,
                output_field=FieldName.OBSERVED_VALUES,
            ),
            # Unnormalized seasonal features
            AddTimeFeatures(
                time_features=seasonal,
                pred_length=self.pred_length,
                start_field=FieldName.START,
                target_field=FieldName.TARGET,
                output_field="seasonal_indicators",
            ),
            AddTimeFeatures(
                start_field=FieldName.START,
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_TIME,
                time_features=time_features,
                pred_length=self.pred_length,
            ),
            AddAgeFeature(
                target_field=FieldName.TARGET,
                output_field=FieldName.FEAT_AGE,
                pred_length=self.pred_length,
                log_scale=True,
            ),
            VstackFeatures(
                output_field=FieldName.FEAT_TIME,
                input_fields=[FieldName.FEAT_TIME, FieldName.FEAT_AGE] +
                ([FieldName.FEAT_DYNAMIC_REAL]
                 if self.use_feat_dynamic_real else []),
            ),
            InstanceSplitter(
                target_field=FieldName.TARGET,
                is_pad_field=FieldName.IS_PAD,
                start_field=FieldName.START,
                forecast_start_field=FieldName.FORECAST_START,
                train_sampler=TestSplitSampler(),
                past_length=self.config.past_length,
                future_length=self.config.pred_length,
                output_NTC=True,
                time_series_fields=[
                    FieldName.FEAT_TIME, FieldName.OBSERVED_VALUES,
                    "seasonal_indicators"
                ],
                pick_incomplete=False,
            )
        ])
        print('已设置时间特征~~')
        # 设置环境变量的 dataloader
        target_train_iters = [
            iter(
                TrainDataLoader_OnlyPast(
                    dataset=self.target_data[i].train,
                    transform=transformation,
                    batch_size=self.config.batch_size,
                    num_batches_per_epoch=self.config.num_batches_per_epoch,
                )) for i in range(len(self.target_data))
        ]
        target_test_iters = [
            iter(
                InferenceDataLoader_WithFuture(
                    dataset=self.target_data[i].test,
                    transform=transformation,
                    batch_size=self.config.batch_size,
                )) for i in range(len(self.target_data))
        ]

        self.target_train_loader = stackIterOut(
            target_train_iters,
            fields=[FieldName.OBSERVED_VALUES, FieldName.TARGET],
            dim=0,
            include_future=False)
        self.target_test_loader = stackIterOut(
            target_test_iters,
            fields=[FieldName.OBSERVED_VALUES, FieldName.TARGET],
            dim=0,
            include_future=True)