def transform(self, X, y=None): """ Transform X, segments time-series in each column into random intervals using interval indices generated during `fit`. Parameters ---------- X : nested pandas DataFrame of shape [n_samples, n_features] Nested dataframe with time-series in cells. Returns ------- Xt : pandas DataFrame Transformed pandas DataFrame with same number of rows and one column for each generated interval. """ # Check inputs. self.check_is_fitted() X = check_X(X) # Check that the input is of the same shape as the one passed # during fit. if X.shape[1] != self.input_shape_[1]: raise ValueError( 'Number of columns of input is different from what was seen' 'in `fit`') # # Input validation # if not all([np.array_equal(fit_idx, trans_idx) # for trans_idx, fit_idx in zip(check_equal_index(X), # self._time_index)]): # raise ValueError('Indexes of input time-series are different # from what was seen in `fit`') # Segment into intervals. # TODO generalise to non-equal-index cases intervals = [] colname = X.columns[0] colnames = [] # Tabularise assuming series arr = tabularize(X, return_array=True) # have equal indexes in any given column print(self.intervals_) for start, end in self.intervals_: interval = arr[:, start:end] intervals.append(interval) colnames.append(f"{colname}_{start}_{end}") # Return nested pandas DataFrame. Xt = pd.DataFrame(concat_nested_arrays(intervals, return_arrays=True)) Xt.columns = colnames return Xt
def _apply_rowwise(self, func, X, y=None): """Helper function to apply transform or inverse_transform function on each row of data container""" self.check_is_fitted() X = check_X(X) # 1st attempt: apply, relatively fast but not robust # try and except, but sometimes breaks in other cases than excepted # ValueError # Works on single column, but on multiple columns only if columns # have equal-length series. # try: # Xt = X.apply(self.transformer.fit_transform) # # # Otherwise call apply on each column separately. # except ValueError as e: # if str(e) == "arrays must all be same length": # Xt = pd.concat([pd.Series(col.apply( # self.transformer.fit_transform)) for _, col in X.items()], # axis=1) # else: # raise # 2nd attempt: apply but iterate over columns, still relatively fast # but still not very robust # but column is not 2d and thus breaks if transformer expects 2d input try: Xt = pd.concat([pd.Series(col.apply(func)) for _, col in X.items()], axis=1) # 3rd attempt: explicit for-loops, most robust but very slow except Exception: cols_t = [] for c in range(X.shape[1]): # loop over columns col = X.iloc[:, c] rows_t = [] for row in col: # loop over rows in each column row_2d = pd.DataFrame(row) # convert into 2d dataframe row_t = func(row_2d).ravel() # apply transform rows_t.append(row_t) # append transformed rows cols_t.append(rows_t) # append transformed columns # if series-to-series transform, flatten transformed series Xt = concat_nested_arrays( cols_t) # concatenate transformed columns # tabularise/unnest series-to-primitive transforms xt = Xt.iloc[0, 0] if isinstance(xt, (pd.Series, np.ndarray)) and len(xt) == 1: Xt = tabularize(Xt) return Xt
def transform(self, X): """ Apply the `fit_transform()` method of the per-row transformer repeatedly on each row. Parameters ---------- X : 1D array-like, pandas Series, shape (n_samples, 1) The training input samples. Shoould not be a DataFrame. Returns ------- T : 1D array-like, pandas Series, shape (n_samples, ...) The transformed data """ # check the validity of input validate_X(X) check_is_fitted(self, 'is_fitted_') # 1st attempt: apply, relatively fast but not robust # try and except, but sometimes breaks in other cases than excepted ValueError # Works on single column, but on multiple columns only if columns have equal-length series. # try: # Xt = X.apply(self.transformer.fit_transform) # # # Otherwise call apply on each column separately. # except ValueError as e: # if str(e) == 'arrays must all be same length': # Xt = pd.concat([pd.Series(col.apply(self.transformer.fit_transform)) for _, col in X.items()], axis=1) # else: # raise # 2nd attempt: apply but iterate over columns, still relatively fast but still not very robust # but column is not 2d and thus breaks if transformer expects 2d input try: Xt = pd.concat([ pd.Series(col.apply(self.transformer.fit_transform)) for _, col in X.items() ], axis=1) # 3rd attempt: explicit for-loops, most robust but very slow except: cols_t = [] for c in range(X.shape[1]): # loop over columns col = X.iloc[:, c] rows_t = [] for row in col: # loop over rows in each column row_2d = pd.DataFrame(row) # convert into 2d dataframe row_t = self.transformer.fit_transform( row_2d).ravel() # apply transform rows_t.append(row_t) # append transformed rows cols_t.append(rows_t) # append transformed columns # if series-to-series transform, flatten transformed series Xt = concat_nested_arrays( cols_t) # concatenate transformed columns # tabularise/unnest series-to-primitive transforms xt = Xt.iloc[0, 0] if isinstance(xt, (pd.Series, np.ndarray)) and len(xt) == 1: Xt = tabularize(Xt) return Xt