def transform(self, df, **transform_params):
     """
     Returns a copy of ``df`` holding the columns defined in ``cols``
     """
     # TODO add a test that self.cols is a subset of the columns if df
     pdu._is_cols_subset_of_df_cols(self.cols, df)
     return df[self.cols].copy()
 def fit(self, df, y=None, **fit_params):
     """
     Doesn't do anything.
     Provided for the sake of consistency with scikit-learn.
     """
     pdu._is_cols_subset_of_df_cols(self.cols, df)
     return self
    def transform(self, df, **transform_params):
        """
        Label encoding ``cols`` of ``df`` using the fitting

        Parameters
        ----------
        df : DataFrame
            DataFrame to be preprocessed
        """
        if not self._is_fitted:
            raise NotFittedError("Fitting was not preformed")
        pdu._is_cols_subset_of_df_cols(self.cols, df)

        df = df.copy()

        label_enc_dict = {}
        for col in self.cols:
            label_enc_dict[col] = self.les[col].transform(df[col])

        labelenc_cols = pd.DataFrame(
            label_enc_dict,
            # The index of the resulting DataFrame should be assigned and
            # equal to the one of the original DataFrame. Otherwise, upon
            # concatenation NaNs will be introduced.
            index=df.index)

        for col in self.cols:
            df[col] = labelenc_cols[col]
        return df
    def transform(self, df, **transform_params):
        """
        Scaling ``cols`` of ``df`` using the fitting

        Parameters
        ----------
        df : DataFrame
            DataFrame to be preprocessed
        """
        if not self._is_fitted:
            raise NotFittedError("Fitting was not preformed")
        pdu._is_cols_subset_of_df_cols(self.cols, df)

        df = df.copy()

        standartize_cols = pd.DataFrame(
            # StandardScaler returns a NumPy.array, and thus indexing
            # breaks. Explicitly fixed next.
            self.standard_scaler.transform(df[self.cols]),
            columns=self.cols,
            # The index of the resulting DataFrame should be assigned and
            # equal to the one of the original DataFrame. Otherwise, upon
            # concatenation NaNs will be introduced.
            index=df.index)
        df = df.drop(self.cols, axis=1)
        df = pd.concat([df, standartize_cols], axis=1)
        return df
 def transform(self, df, **transform_params):
     """
     Returns a copy of ``df`` with a new column (named ``feat_name``) and
     holding the day of the week as derived from ``df.col``.
     """
     df = df.copy()
     if isinstance(df, pd.DataFrame):
         pdu._is_cols_subset_of_df_cols([self.col], df)
         df[self.feat_name] = df[self.col].apply(
             lambda x: x.dayofweek).astype('category')
     else:
         raise ValueError("Non supported input")
     return df
 def transform(self, df, **transform_params):
     """
     Compute the ratio between ``df[col]`` and the mean/median which
     was fitted.
     """
     check_is_fitted(self, 'const_')
     df = df.copy()
     if isinstance(df, pd.DataFrame):
         pdu._is_cols_subset_of_df_cols([self.col], df)
         df[self.feat_name] = df[self.col].div(self.const_)
     else:
         raise ValueError("Non supported input")
     return df
    def fit(self, df, y=None, **fit_params):
        """
        Fitting the preprocessing

        Parameters
        ----------
        df : DataFrame
            Data to use for fitting.
            In many cases, should be ``X_train``.
        """
        pdu._is_cols_subset_of_df_cols(self.cols, df)
        self.standard_scaler.fit(df[self.cols])
        self._is_fitted = True
        return self
 def fit(self, df, y=None, **fit_params):
     """
     Fits the instance to the mean/median of ``df[col]``
     """
     if not isinstance(df, pd.DataFrame):
         raise ValueError("Non supported input")
     pdu._is_cols_subset_of_df_cols([self.col], df)
     if self.func == 'mean':
         self.const_ = df[self.col].mean()
     elif self.func == 'median':
         self.const_ = df[self.col].median()
     else:
         raise ValueError(
             "Unsupported function ({}). Can be either mean or "
             "median".format(self.func))
     return self
    def fit(self, df, y=None, **fit_params):
        """
        Fitting the instance on ``df``

        Parameters
        ----------
        df : DataFrame
            The base DataFrame from which column names are learned.
            These names will be used when transforming data
        """
        pdu._is_cols_subset_of_df_cols(self.cols, df)
        self.ohe.fit(df[self.cols])
        self.ohe_cols_names_ = []

        for col in self.cols:
            for i in range(self.n_values):
                self.ohe_cols_names_.append(col + '_' + str(i))

        return self