def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: ts_fields = self.dynamic_feature_fields + [self.target_field] ts_target = data[self.target_field] len_target = ts_target.shape[-1] if is_train: if len_target < self.instance_length: sampling_indices = ( # Returning [] for all time series will cause this to be in loop forever! [len_target] if self.allow_target_padding else []) else: sampling_indices = self.instance_sampler( ts_target, self.instance_length, len_target) else: sampling_indices = [len_target] for i in sampling_indices: d = data.copy() pad_length = max(self.instance_length - i, 0) # update start field d[self.start_field] = shift_timestamp(data[self.start_field], i - self.instance_length) # set is_pad field is_pad = np.zeros(self.instance_length) if pad_length > 0: is_pad[:pad_length] = 1 d[self.is_pad_field] = is_pad # update time series fields for ts_field in ts_fields: full_ts = data[ts_field] if pad_length > 0: pad_pre = self.pad_value * np.ones( shape=full_ts.shape[:-1] + (pad_length, )) past_ts = np.concatenate([pad_pre, full_ts[..., :i]], axis=-1) else: past_ts = full_ts[..., (i - self.instance_length):i] past_ts = past_ts.transpose() if self.time_first else past_ts d[self._past(ts_field)] = past_ts if self.use_prediction_features and not is_train: if not ts_field == self.target_field: future_ts = full_ts[..., i:i + self.prediction_length] future_ts = (future_ts.transpose() if self.time_first else future_ts) d[self._future(ts_field)] = future_ts del d[ts_field] d[self.forecast_start_field] = shift_timestamp( d[self.start_field], self.instance_length) yield d
def flatmap_transform(self, data: DataEntry, is_train: bool) -> Iterator[DataEntry]: pl = self.future_length slice_cols = self.ts_fields + [self.target_field] target = data[self.target_field] len_target = target.shape[-1] minimum_length = (self.future_length if self.pick_incomplete else self.past_length + self.future_length) if is_train: sampling_bounds = ((0, len_target - self.future_length) if self.pick_incomplete else (self.past_length, len_target - self.future_length)) # We currently cannot handle time series that are # too short during training, so we just skip these. # If we want to include them we would need to pad and to # mask the loss. if self.is_full_batch: sampled_indices = tuple([ i for i in range( self.past_length, len_target - self.future_length + 1, ) ]) else: sampled_indices = ( np.array([], dtype=int) if len_target < minimum_length else self.train_sampler(target, *sampling_bounds)) else: assert self.pick_incomplete or len_target >= self.past_length sampled_indices = np.array([len_target], dtype=int) for i in sampled_indices: pad_length = max(self.past_length - i, 0) if not self.pick_incomplete: assert (pad_length == 0 ), f"pad_length should be zero, got {pad_length}" d = data.copy() for ts_field in slice_cols: if i > self.past_length: # truncate to past_length past_piece = d[ts_field][..., i - self.past_length:i] elif i < self.past_length: pad_block = np.zeros( d[ts_field].shape[:-1] + (pad_length, ), dtype=d[ts_field].dtype, ) past_piece = np.concatenate( [pad_block, d[ts_field][..., :i]], axis=-1) else: past_piece = d[ts_field][..., :i] d[self._past(ts_field)] = past_piece d[self._future(ts_field)] = d[ts_field][..., i:i + pl] del d[ts_field] pad_indicator = np.zeros(self.past_length) if pad_length > 0: pad_indicator[:pad_length] = 1 if self.time_first: for ts_field in slice_cols: d[self._past(ts_field)] = d[self._past( ts_field)].transpose() d[self._future(ts_field)] = d[self._future( ts_field)].transpose() d[self._past(self.is_pad_field)] = pad_indicator d[self.forecast_start_field] = shift_timestamp( d[self.start_field], i) yield d
def transform(self, data: DataEntry) -> DataEntry: return self.func(data.copy())