def transform(self, df_devices=None): """ Discretize the data. Parameters ---------- X : array-like of shape (n_samples, n_features) Data to be discretized. Returns ------- Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64} Data in the binned space. Will be a sparse matrix if `self.encode='onehot'` and ndarray otherwise. """ if self.encode == 'raw': return create_raw( df_devices, t_res=self.t_res, sample_strat=self.sample_strat ) elif self.encode == 'changepoint': return create_changepoint( df_devices, t_res=self.t_res ) elif self.encode == 'lastfired': return create_lastfired( df_devices, t_res=self.t_res )
def fit(self, df_devices, y=None): """ Fit the estimator. Parameters ---------- X : array-like of shape (n_samples, n_features) Data to be discretized. y : None Ignored. This parameter exists only for compatibility with :class:`~sklearn.pipeline.Pipeline`. Returns ------- self """ if self.encode == ENC_RAW: self.data = create_raw( df_devices, t_res=self.t_res, sample_strat=self.sample_strat ) elif self.encode == ENC_CP: self.data = create_changepoint( df_devices, t_res=self.t_res ) elif self.encode == ENC_LF: self.data = create_lastfired( df_devices, t_res=self.t_res ) else: raise ValueError return self
def transform(self, df_devs=None, y=None): """ Discretize the data. Parameters ---------- X : array-like of shape (n_samples, n_features) Data to be discretized. Returns ------- Xt : {ndarray, sparse matrix}, dtype={np.float32, np.float64} Data in the binned space. Will be a sparse matrix if `self.encode='onehot'` and ndarray otherwise. """ PRAEFIX_LF = 'lf_' PRAEFIX_CP = 'cp_' df_lst = [] iters = self.encode.split('+') for enc in iters: if enc == ENC_RAW: data = create_raw(df_devs) if self.t_res is not None: data = resample_raw( data, df_dev=df_devs, t_res=self.t_res, most_likely_values=self.dev_most_likely_values_) elif enc == ENC_CP: data = create_changepoint(df_devs) if self.t_res is not None: data = resample_changepoint(data, self.t_res) # add prefix to make column names unique if len(iters) > 1: data.columns = [TIME] + list( map(PRAEFIX_CP.__add__, data.columns[1:])) elif enc == ENC_LF: data = create_lastfired(df_devs) if self.t_res is not None: data = resample_last_fired(data, self.t_res) # add prefix to make column names unique if len(iters) > 1: data.columns = [TIME] + list( map(PRAEFIX_LF.__add__, data.columns[1:])) else: raise ValueError data = data.set_index(TIME) df_lst.append(data) data = pd.concat(df_lst, axis=1).reset_index() return data
def create_lagged_changepoint(df_dev, window_size=10, t_res=None): """ create a 3D tensor of sliding windows over the raw representation. Parameters ---------- df_dev: pd.DataFrame df_act: pd.DataFrame window_size: int how much raw vectors should be considered for the creation of the 2d image t_res: String how much time intervals TODO .... Returns ------- res: np.array 3D (K-window_size x window_size x devices) res_label: np.array 1D (K-window_size) """ cp = create_changepoint(df_dev, t_res=t_res).values return _image_from_reps(cp, window_size)
def create_lastfired(df_devs): """ creates the last fired representation """ return create_changepoint(df_devs)