예제 #1
0
 def map_transform(self, data: DataEntry, is_train: bool) -> DataEntry:
     start = data[self.start_field]
     length = target_transformation_length(data[self.target_field],
                                           self.pred_length,
                                           is_train=is_train)
     self._update_cache(start, length)
     i0 = self._date_index[start]
     date_idx = self._date_index.iloc[i0:i0 + length].index
     # When is_train is false, date_idx has len of target_len + prediction_len
     # which is useful in time feature generation, but we only need target length
     date_idx = date_idx[:len(data[self.target_field])]
     feature = pd.Series(np.ones(len(date_idx)) * np.nan, index=date_idx)
     mask = data[self.target_field] > 0
     feature.loc[mask] = feature.loc[mask].index
     # filling in nan in first row with the corresponding date
     # Assumption: If the frame starts with a zero demand, earliest date in frame is taken as a start
     if len(feature) > 0:
         if pd.isnull(feature[0]):
             feature[0] = feature.index[0]
     feature = feature.ffill().to_frame()
     feature["diff"] = feature.index.to_period(
         feature.index.freqstr).astype(int) - pd.DatetimeIndex(
             feature.iloc[:, 0]).to_period(
                 feature.index.freqstr).astype(int)
     feature["diff"] = feature["diff"].shift(1).round() + 1
     feature["diff"] = feature["diff"].fillna(method="bfill")
     feature = feature["diff"].values
     if self.output_field in data.keys():
         data[self.output_field] = np.vstack(
             [data[self.output_field], feature])
     else:
         data[self.output_field] = feature
     return data
예제 #2
0
 def transform(self, data: DataEntry) -> DataEntry:
     if self.output_field not in data.keys():
         data[self.output_field] = self.value
     return data
예제 #3
0
 def transform(self, data: DataEntry) -> DataEntry:
     for k in self.field_names:
         if k in data.keys():
             del data[k]
     return data