Exemple #1
0
    def transform(self, data, **kwargs):

        # TODO: make this a function and remove copies
        levels = extract_level_names_dict(data)

        cols_to_check = filter_levels(
            levels,
            self._columns
        )

        # if data has a multiindex
        if data.columns.nlevels > 1 \
        and not isinstance(cols_to_check[0], tuple):

            if not isinstance(cols_to_check, dict):
                levels[0] = cols_to_check
                cols_to_check = levels

            # check the tuple combinations of selected levels
            cols_to_check = product(*cols_to_check.values())

        i_to_keep = set()
        n_index = np.arange(data.shape[0])
        for col in cols_to_check:
            valcount = data[col].value_counts()
            vals_to_keep = valcount[valcount >= self._values].index
            i_to_keep.update(n_index[data[col].isin(vals_to_keep)])

        return data.copy().iloc[[*i_to_keep]]
Exemple #2
0
    def transform(self, data, **kwargs):

        levels = extract_level_names_dict(data)

        cols_to_check = filter_levels(
            levels,
            self._columns
        )

        # if data has a multiindex
        if data.columns.nlevels > 1 \
        and not isinstance(cols_to_check[0], tuple):

            if not isinstance(cols_to_check, dict):
                levels[0] = cols_to_check
                cols_to_check = levels

            # check the tuple combinations of selected levels
            cols_to_check = product(*cols_to_check.values())

        for col in cols_to_check:
            # keep those values that are not (~) in self._values
            data = data[~data[col].isin(self._values)]

        return data
Exemple #3
0
    def transform(self, data, **kwargs):
        """Selects the specified columns or returns data as is if no column
        was specified.

        Returns:
            Data of the same format as before but only only containing the
            specified columns.
        """
        cols = filter_levels(
            extract_level_names_dict(data),
            self._columns
        )

        return extract_cols(data, cols)
Exemple #4
0
    def transform(self, data, **kwargs):

        inter_df = extract_cols(data,
                                filter_levels(
                                    extract_level_names_dict(data),
                                    self._columns
                                ))

        return data.copy()[
            ~inter_df.apply(
                self._row_condition_builder(),
                axis=1
            )
        ]
Exemple #5
0
    def transform(self, data, **kwargs):
        """Apply custom transformation and insert back as specified

        This applies the transformation in three main steps:
        1. Extract specified columns
        2. Apply modification
        3. Insert columns if needed or return modified dataframe

        These steps have further details for dealing with levels.

        Raises:
            RuntimeError: if transformed data is to be reintegrated but has a
                different shape than data being reintegrated on the dataframe.
        """

        cols = filter_levels(extract_level_names_dict(data), self._columns)

        # inter_df should be a copied subset of the data
        inter_df = extract_cols(data.copy(), cols)
        orig_shape = inter_df.shape

        inter_df = self._gen_cols(inter_df)

        if isinstance(self._new_cols, str):
            inter_df.columns = add_suffix(inter_df.columns, cols,
                                          self._new_cols)
        elif self._new_cols is not None:
            inter_df.columns = self._new_cols

        # No reintegration
        if not self._reintegrate:
            return inter_df

        # Reintegration by joining
        if self._new_cols is not None:
            if self._drop:
                data = drop_cols(data, cols)

            # Join new columns into the data frame
            return mi_join(data, inter_df)

        # Reintegration by replacement
        if inter_df.shape != orig_shape:
            raise RuntimeError("Existing columns cannot be \
                reintegrated if transformation changes data shape")

        # Insert new columns on top of old ones.
        # Copy is needed as original data is not intended to be changed.
        return insert_cols(data.copy(), inter_df, cols)
Exemple #6
0
    def transform(self, data, **kwargs):

        all_cols = extract_level_names_dict(data)

        drop_cols = filter_levels(
            all_cols,
            self._columns
        )

        if not isinstance(drop_cols, dict):
            drop_cols = {0: drop_cols}

        cols = {level: list(set(ac).difference(dc))
                for (level, ac), (level, dc)
                in zip(all_cols.items(), drop_cols.items())}

        return extract_cols(data, cols)
Exemple #7
0
    def transform(self, data, **kwargs):

        all_cols = extract_level_names_dict(data)

        for col, pos in self._map_dict.items():
            all_cols[self._level].remove(col)
            # this works for edge cases like last cols because of
            # the remove
            all_cols[self._level] = all_cols[self._level][:pos] \
                + [col] \
                + all_cols[self._level][pos:]

        if data.columns.nlevels > 0:
            all_cols = pd.MultiIndex.from_tuples(
                [*product(*all_cols.values())]
            )
        else:
            all_cols = pd.Index(all_cols[0])

        inter_df = data.copy()
        inter_df.columns = all_cols

        return inter_df