def transform(self, data, **kwargs): # TODO: make this a function and remove copies levels = extract_level_names_dict(data) cols_to_check = filter_levels( levels, self._columns ) # if data has a multiindex if data.columns.nlevels > 1 \ and not isinstance(cols_to_check[0], tuple): if not isinstance(cols_to_check, dict): levels[0] = cols_to_check cols_to_check = levels # check the tuple combinations of selected levels cols_to_check = product(*cols_to_check.values()) i_to_keep = set() n_index = np.arange(data.shape[0]) for col in cols_to_check: valcount = data[col].value_counts() vals_to_keep = valcount[valcount >= self._values].index i_to_keep.update(n_index[data[col].isin(vals_to_keep)]) return data.copy().iloc[[*i_to_keep]]
def transform(self, data, **kwargs): levels = extract_level_names_dict(data) cols_to_check = filter_levels( levels, self._columns ) # if data has a multiindex if data.columns.nlevels > 1 \ and not isinstance(cols_to_check[0], tuple): if not isinstance(cols_to_check, dict): levels[0] = cols_to_check cols_to_check = levels # check the tuple combinations of selected levels cols_to_check = product(*cols_to_check.values()) for col in cols_to_check: # keep those values that are not (~) in self._values data = data[~data[col].isin(self._values)] return data
def transform(self, data, **kwargs): """Selects the specified columns or returns data as is if no column was specified. Returns: Data of the same format as before but only only containing the specified columns. """ cols = filter_levels( extract_level_names_dict(data), self._columns ) return extract_cols(data, cols)
def transform(self, data, **kwargs): inter_df = extract_cols(data, filter_levels( extract_level_names_dict(data), self._columns )) return data.copy()[ ~inter_df.apply( self._row_condition_builder(), axis=1 ) ]
def transform(self, data, **kwargs): """Apply custom transformation and insert back as specified This applies the transformation in three main steps: 1. Extract specified columns 2. Apply modification 3. Insert columns if needed or return modified dataframe These steps have further details for dealing with levels. Raises: RuntimeError: if transformed data is to be reintegrated but has a different shape than data being reintegrated on the dataframe. """ cols = filter_levels(extract_level_names_dict(data), self._columns) # inter_df should be a copied subset of the data inter_df = extract_cols(data.copy(), cols) orig_shape = inter_df.shape inter_df = self._gen_cols(inter_df) if isinstance(self._new_cols, str): inter_df.columns = add_suffix(inter_df.columns, cols, self._new_cols) elif self._new_cols is not None: inter_df.columns = self._new_cols # No reintegration if not self._reintegrate: return inter_df # Reintegration by joining if self._new_cols is not None: if self._drop: data = drop_cols(data, cols) # Join new columns into the data frame return mi_join(data, inter_df) # Reintegration by replacement if inter_df.shape != orig_shape: raise RuntimeError("Existing columns cannot be \ reintegrated if transformation changes data shape") # Insert new columns on top of old ones. # Copy is needed as original data is not intended to be changed. return insert_cols(data.copy(), inter_df, cols)
def transform(self, data, **kwargs): all_cols = extract_level_names_dict(data) drop_cols = filter_levels( all_cols, self._columns ) if not isinstance(drop_cols, dict): drop_cols = {0: drop_cols} cols = {level: list(set(ac).difference(dc)) for (level, ac), (level, dc) in zip(all_cols.items(), drop_cols.items())} return extract_cols(data, cols)
def transform(self, data, **kwargs): all_cols = extract_level_names_dict(data) for col, pos in self._map_dict.items(): all_cols[self._level].remove(col) # this works for edge cases like last cols because of # the remove all_cols[self._level] = all_cols[self._level][:pos] \ + [col] \ + all_cols[self._level][pos:] if data.columns.nlevels > 0: all_cols = pd.MultiIndex.from_tuples( [*product(*all_cols.values())] ) else: all_cols = pd.Index(all_cols[0]) inter_df = data.copy() inter_df.columns = all_cols return inter_df