Ejemplo n.º 1
0
 def encode(self, xs: pd.Series):
     if get_config('keep_original'):
         self.mask = xs.isna()
     if self.by is not None:
         return xs.fillna(getattr(xs, self.by)())
     else:
         return xs.fillna(self.value)
Ejemplo n.º 2
0
 def encode(self, xs:pd.Series)->pd.Series:
     if self.date_range is None:
         self.date_range = pd.date_range(
             self.start, self.end, self.periods, self.freq, self.tz, 
             self.normalize, closed=self.closed
         )
     
     result = pd.Series(
         np.zeros_like(xs, dtype=np.int64),
         index=xs.index, name=xs.name
     )
     for i, (start, end) in enumerate(zip(self.date_range, self.date_range[1:])):
         result[xs.between(start, end)] = i
     if get_config('keep_original'):
         self.original_xs = xs
     return result
Ejemplo n.º 3
0
    def encode(self, xs: pd.Series) -> pd.Series:
        if self.upper is None:
            upper = xs.quantile(self.percentile)
            lower = xs.quantile(1.0 - self.percentile)
            self.upper = upper
            self.lower = lower
        else:
            upper = self.upper
            lower = self.lower

        upper_mask = xs > upper
        lower_mask = xs < lower

        if get_config('keep_original'):
            self.lower_mask = lower_mask
            self.lower_original_values = xs[self.lower_mask]
            self.upper_mask = upper_mask
            self.upper_original_values = xs[self.upper_mask]

        xs = xs.mask(upper_mask, upper)
        xs = xs.mask(lower_mask, lower)

        return xs
Ejemplo n.º 4
0
 def encode(self, df:pd.DataFrame):
     mask = df.isna().any(axis=1)
     self.original_dropped_index = df.index[mask]
     if get_config('keep_original'):
         self.original = df.loc[mask]
     return df.dropna()
Ejemplo n.º 5
0
 def decode(self, xs:pd.Series)->pd.Series:
     if get_config('keep_original'):
         return self.original_xs
     else:
         return pd.Series(self.date_range.take(xs))