예제 #1
0
    def flatmap_transform(self, data: DataEntry,
                          is_train: bool) -> Iterator[DataEntry]:
        ts_fields = self.dynamic_feature_fields + [self.target_field]
        ts_target = data[self.target_field]

        len_target = ts_target.shape[-1]

        if is_train:
            if len_target < self.instance_length:
                sampling_indices = (
                    # Returning [] for all time series will cause this to be in loop forever!
                    [len_target] if self.allow_target_padding else [])
            else:
                sampling_indices = self.instance_sampler(
                    ts_target, self.instance_length, len_target)
        else:
            sampling_indices = [len_target]

        for i in sampling_indices:
            d = data.copy()

            pad_length = max(self.instance_length - i, 0)

            # update start field
            d[self.start_field] = shift_timestamp(data[self.start_field],
                                                  i - self.instance_length)

            # set is_pad field
            is_pad = np.zeros(self.instance_length)
            if pad_length > 0:
                is_pad[:pad_length] = 1
            d[self.is_pad_field] = is_pad

            # update time series fields
            for ts_field in ts_fields:
                full_ts = data[ts_field]
                if pad_length > 0:
                    pad_pre = self.pad_value * np.ones(
                        shape=full_ts.shape[:-1] + (pad_length, ))
                    past_ts = np.concatenate([pad_pre, full_ts[..., :i]],
                                             axis=-1)
                else:
                    past_ts = full_ts[..., (i - self.instance_length):i]

                past_ts = past_ts.transpose() if self.time_first else past_ts
                d[self._past(ts_field)] = past_ts

                if self.use_prediction_features and not is_train:
                    if not ts_field == self.target_field:
                        future_ts = full_ts[..., i:i + self.prediction_length]
                        future_ts = (future_ts.transpose()
                                     if self.time_first else future_ts)
                        d[self._future(ts_field)] = future_ts

                del d[ts_field]

            d[self.forecast_start_field] = shift_timestamp(
                d[self.start_field], self.instance_length)

            yield d
예제 #2
0
    def flatmap_transform(self, data: DataEntry,
                          is_train: bool) -> Iterator[DataEntry]:
        pl = self.future_length
        slice_cols = self.ts_fields + [self.target_field]
        target = data[self.target_field]

        len_target = target.shape[-1]

        minimum_length = (self.future_length if self.pick_incomplete else
                          self.past_length + self.future_length)

        if is_train:
            sampling_bounds = ((0, len_target - self.future_length)
                               if self.pick_incomplete else
                               (self.past_length,
                                len_target - self.future_length))
            # We currently cannot handle time series that are
            # too short during training, so we just skip these.
            # If we want to include them we would need to pad and to
            # mask the loss.
            if self.is_full_batch:
                sampled_indices = tuple([
                    i for i in range(
                        self.past_length,
                        len_target - self.future_length + 1,
                    )
                ])
            else:
                sampled_indices = (
                    np.array([], dtype=int) if len_target < minimum_length else
                    self.train_sampler(target, *sampling_bounds))
        else:
            assert self.pick_incomplete or len_target >= self.past_length
            sampled_indices = np.array([len_target], dtype=int)
        for i in sampled_indices:
            pad_length = max(self.past_length - i, 0)
            if not self.pick_incomplete:
                assert (pad_length == 0
                        ), f"pad_length should be zero, got {pad_length}"
            d = data.copy()
            for ts_field in slice_cols:
                if i > self.past_length:
                    # truncate to past_length
                    past_piece = d[ts_field][..., i - self.past_length:i]
                elif i < self.past_length:
                    pad_block = np.zeros(
                        d[ts_field].shape[:-1] + (pad_length, ),
                        dtype=d[ts_field].dtype,
                    )
                    past_piece = np.concatenate(
                        [pad_block, d[ts_field][..., :i]], axis=-1)
                else:
                    past_piece = d[ts_field][..., :i]
                d[self._past(ts_field)] = past_piece
                d[self._future(ts_field)] = d[ts_field][..., i:i + pl]
                del d[ts_field]
            pad_indicator = np.zeros(self.past_length)
            if pad_length > 0:
                pad_indicator[:pad_length] = 1

            if self.time_first:
                for ts_field in slice_cols:
                    d[self._past(ts_field)] = d[self._past(
                        ts_field)].transpose()
                    d[self._future(ts_field)] = d[self._future(
                        ts_field)].transpose()

            d[self._past(self.is_pad_field)] = pad_indicator
            d[self.forecast_start_field] = shift_timestamp(
                d[self.start_field], i)
            yield d
예제 #3
0
 def transform(self, data: DataEntry) -> DataEntry:
     return self.func(data.copy())