Exemple #1
0
    def categoricals(self,
                     model_name='onehot_model.pkl',
                     cols=None,
                     owr=False,
                     model_bin=None):
        """Onehot encoder on categoricals."""

        self.log('Apply onehot encoder on categorical')
        model_path = os.path.join(self.model_path, model_name)
        if cols is None:
            cols = self.data.cat_cols

        if ((not os.path.isfile(model_path)) or owr) and (model_bin is None):
            self.log('\nTrain model\n')
            model_bin = OneHotEncoder(
                cols=cols,
                use_cat_names=True,
                handle_unknown='error',
                drop_invariant=False,
                impute_missing=False)
            model_bin.fit(self.data._X)
            self.data._X = model_bin.transform(self.data._X)
            setattr(model_bin, 'data_schema', self.data._X.columns.values)

            # Save model
            if self.auto_save:
                joblib.dump(model_bin, model_path)

        elif os.path.isfile(model_path):
            # File exists/prediction:
            model_bin = joblib.load(model_path)
            self.data._X = model_bin.transform(self.data._X)
            self.data.check_schema(model_bin, '_X')

        else:
            # Prediction in pipeline
            self.data._X = model_bin.transform(self.data._X)
            self.data.check_schema(model_bin, '_X')

        return model_bin
    def _encode_categories(self):
        """
        This private method stands for encoding categorical variables. Label encoding used for ordinal categories and
        one-hot encoding used for nominal categories.
        """

        logging.info(f'#{self._index()} - Encoding categorical columns...')
        # get column names for categorical and numerical features
        categorical_vars = self.X.select_dtypes(include='object').columns
        numerical_vars = self.X.columns.difference(categorical_vars)

        ordinal = pd.Index([
            'ExterQual', 'ExterCond', 'BsmtQual', 'BsmtCond', 'HeatingQC',
            'KitchenQual', 'FireplaceQu', 'GarageQual', 'GarageCond', 'PoolQC'
        ])
        nominal = categorical_vars.difference(ordinal)

        standard_mapping = {
            'NA': 0,
            'Po': 1,
            'Fa': 2,
            'TA': 3,
            'Gd': 4,
            'Ex': 5
        }
        mapping_for_ordinals = [{
            'col': column,
            'mapping': standard_mapping
        } for column in ordinal]

        x_num = self.X[numerical_vars]
        x_test_num = self.X_test[numerical_vars]

        # one hot encode categorical columns
        one_hot_encoder = OneHotEncoder(use_cat_names=True)
        label_encoder = OrdinalEncoder(drop_invariant=True,
                                       mapping=mapping_for_ordinals,
                                       handle_unknown='error')

        x_cat_nom = one_hot_encoder.fit_transform(self.X[nominal])
        x_cat_ord = label_encoder.fit_transform(self.X[ordinal])
        x_test_cat_nom = one_hot_encoder.transform(self.X_test[nominal])
        x_test_cat_ord = label_encoder.transform(self.X_test[ordinal])

        self.X = x_num.join(x_cat_ord).join(x_cat_nom)
        self.X_test = x_test_num.join(x_test_cat_ord).join(x_test_cat_nom)
        logging.info(f'#{self._step_index} - DONE!')
Exemple #3
0
def fit_onehot(input_df: pd.DataFrame, cols: List[str], na_value: Any = None):
    """
    Creates the One-hot encoder by fitting it through the given DataFrame
    NaN values and Special value specified under `na_value` in the DataFrame will be encoded as unseen value.
    Args:
        input_df: DataFrame used to fit the encoder
        cols: List of categorical columns to be encoded
        na_value: Default null value for DataFrame

    Returns:
        result_df: encoded input_df DataFrame
        model : encoder model to be passed to `transform_onehot` method
    """
    df = input_df.copy()

    if na_value is not None:
        for col in cols:
            df[col] = df[col].replace({na_value: np.nan})

    drop_cols = ["{}_nan".format(col) for col in cols]

    encoder = OneHotEncoder(cols=cols, use_cat_names=True)
    encoder = encoder.fit(df)

    result_df = encoder.transform(df)

    for drop_col in drop_cols:
        if drop_col in result_df.columns:
            result_df = result_df.drop(columns=[drop_col])

    model = {
        "encoder": encoder,
        "cols": cols,
        "na_value": na_value,
        "drop_cols": drop_cols,
    }
    return result_df, model
def encode_low_cardinality_categorical_df(dataframe, fit=False):
   """
    Encode low cardinality categorical features using OneHot Encoding and dropping invariant features
    ---
    Arguments
        dataframe: pd.DataFrame
            Dataframe with pre-processed data (i.e. renamed features), low card. categorical features only
        fit: boolean
            Indicates if we should train or load an encoder
    Returns
        dataframe: pd.DataFrame
            Dataframe with encoded data
    """
    # Train or load an encoder    
    if fit:
        encoder = OneHotEncoder(cols=dataframe.columns.values, drop_invariant=True)
        encoder.fit(dataframe)
        
        pickle_obj(encoder, 'low_card_categorical_encoder')
    else:
        encoder = unpickle_obj('low_card_categorical_encoder')

    # transform data
    return encoder.transform(dataframe)
Exemple #5
0
                     'PassengerId', 'Survived']]
###########################################################################
# Split data into train and test                                          #
###########################################################################
trainData = fullData.loc[fullData.DataPartition == 'train']
testData = fullData.loc[fullData.DataPartition == 'test']
###########################################################################
# One hot encode                                                          #
###########################################################################
# https://github.com/scikit-learn-contrib/categorical-encoding
# http://contrib.scikit-learn.org/categorical-encoding/onehot.html
from category_encoders import OneHotEncoder
categories = list(set(trainData.select_dtypes(['category']).columns))
target = trainData.Survived
enc = OneHotEncoder(cols=categories,return_df = 1, handle_unknown = 'ignore').fit(trainData, target)
trainData = enc.transform(trainData)
testData = enc.transform(testData)
###########################################################################
# Drop multi collinear levels and no longer required                      #
###########################################################################
dropColumns = ['DataPartition']
trainData = trainData.drop(columns=dropColumns)
testData = testData.drop(columns=dropColumns)
testData = testData.drop(columns='Survived')
###########################################################################
# Start h2o cloud                                                         #
###########################################################################
import h2o
h2o.init()
h2o.remove_all  # clean slate, in case cluster was already running
# upload data to h2o cloud
from category_encoders import OneHotEncoder

encoder = OneHotEncoder(cols=['color'], use_cat_names=True)

train = encoder.fit_transform(train)
test = encoder.fit_transform(test)
train.head()
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()

encoder.fit(train['type'])

print(encoder.classes_)

train['type_no'] = encoder.transform(train['type'])
train.head()
sns.heatmap(train.corr(), xticklabels=list(train), yticklabels=list(train))
target = train['type_no'] # for visualizations
target_string = train['type'] # for final predictions

del train['type']
del train['type_no']

target.head()
from sklearn.model_selection import train_test_split

train_data, test_data, train_target, test_target = train_test_split(train, target, test_size=0.2, random_state=42)
from mlxtend.plotting import plot_decision_regions
import matplotlib.gridspec as gridspec
import itertools
class RFEncoder(BaseEstimator, TransformerMixin):
    def __init__(self,
                 cols=None,
                 handle_missing='value',
                 handle_unknown='value',
                 use_cat_names=False,
                 return_df=True,
                 max_subsets=None,
                 max_depth=3,
                 n_estimators=100,
                 min_count=1,
                 n_jobs=1):
        self.cols = cols
        self.handle_missing = handle_missing
        self.handle_unknown = handle_unknown
        self.use_cat_names = use_cat_names
        self.return_df = return_df
        self.max_subsets = max_subsets
        self.max_depth = max_depth
        self.n_estimators = n_estimators
        self.n_jobs = n_jobs
        self.min_count = min_count

    def fit(self, X, y=None):
        self._dim = X.shape[1]

        if self.cols is None:
            self.cols = get_obj_cols(X)

        self.dummy_encoder = OneHotEncoder(cols=self.cols,
                                           handle_unknown='value',
                                           handle_missing='value')

        self.dummy_encoder = self.dummy_encoder.fit(X)
        self.mapping = self.generate_mapping(X, y)

        X_temp = self.transform(X, override_return_df=True)
        self.feature_names = list(X_temp.columns)

        return self

    def generate_mapping(self, X, y):
        X = self.dummy_encoder.transform(X.copy(deep=True))
        y = y.copy(deep=True)

        mapping = []

        for switch in self.dummy_encoder.mapping:
            col = switch.get('col')
            values = switch.get('mapping').copy(deep=True)

            if isinstance(self.max_depth, int):
                max_depth = self.max_depth
            elif isinstance(self.max_depth, float):
                max_depth = round(self.max_depth * values.shape[1])
            else:
                max_depth = min(self.max_depth[1],
                                round(self.max_depth[0] * values.shape[1]))
            if max_depth == 0:
                continue

            forest = RandomForestClassifier(
                max_depth=max_depth,
                n_estimators=self.n_estimators,
                n_jobs=self.n_jobs,
            )

            forest.fit(X[values.columns], y)

            subsets = self.get_subsets(forest.decision_path(values))
            subset_df = pd.DataFrame(data=subsets,
                                     index=values.index,
                                     columns=[
                                         '{col}_subset_{i}'.format(col=col,
                                                                   i=i)
                                         for i in range(subsets.shape[1])
                                     ])

            base_df = values.join(subset_df)

            mapping.append({'col': col, 'mapping': base_df})

        return mapping

    def get_subsets(self, decision_path):
        subset_sizes = np.asarray(decision_path[0].sum(axis=0))[0]
        subsets = decision_path[0][:, subset_sizes != 1].toarray()

        subsets, count = np.unique(subsets, return_counts=True, axis=1)

        subsets = subsets[:, count >= self.min_count]
        count = count[count >= self.min_count]

        subsets = subsets[:, np.argsort(-count)]

        subset_sizes = subsets.sum(axis=0)
        subsets = subsets[:, np.argsort(subset_sizes)]

        if self.max_subsets is not None:
            subsets = subsets[:, :self.max_subsets]

        return subsets

    def transform(self, X, override_return_df=False):
        if self.handle_missing == 'error':
            if X[self.cols].isnull().any().any():
                raise ValueError('Columns to be encoded can not contain null')

        if self._dim is None:
            raise ValueError(
                'Must train encoder before it can be used to transform data.')

        if X.shape[1] != self._dim:
            raise ValueError('Unexpected input dimension %d, expected %d' % (
                X.shape[1],
                self._dim,
            ))

        if not list(self.cols):
            return X if self.return_df else X.values

        X = self.dummy_encoder.ordinal_encoder.transform(X)

        if self.handle_unknown == 'error':
            if X[self.cols].isin([-1]).any().any():
                raise ValueError(
                    'Columns to be encoded can not contain new values')

        X = self.get_dummies(X)

        if self.return_df or override_return_df:
            return X
        else:
            return X.values

    def get_dummies(self, X_in):
        X = X_in.copy(deep=True)

        cols = X.columns.values.tolist()

        for switch in self.mapping:
            col = switch.get('col')
            mod = switch.get('mapping')

            base_df = mod.reindex(X[col])
            base_df = base_df.set_index(X.index)
            X = pd.concat([base_df, X], axis=1)

            old_column_index = cols.index(col)
            cols[old_column_index:old_column_index + 1] = mod.columns

        X = X.reindex(columns=cols)

        return X

    def get_feature_names(self):
        if not isinstance(self.feature_names, list):
            raise ValueError(
                'Must transform data first. Affected feature names are not known before.'
            )
        else:
            return self.feature_names
    def _encode_categories(self):
        """
        This private method stands for encoding categorical variables. Label encoding used for ordinal categories and
        one-hot encoding used for nominal categories.
        """

        logging.info(f'#{self._index()} - Encoding categorical columns...')

        def encode(data):
            # encode Sex column
            data['Sex'] = data['Sex'] == 'male'

            # encode Name column
            name_cols = data['Name'].apply(lambda x: pd.Series(
                [str(x).split(",")[0],
                 str(x).split(", ")[1].split(".")[0]],
                index=['Family name', 'Title']))
            data = data.join(name_cols)

            # identify Titles with same meaning
            data['Title'].replace({
                'Mlle': 'Miss',
                'Ms': 'Miss',
                'Mme': 'Mrs'
            },
                                  inplace=True)

            # group rare Titles
            title_names = (data['Title'].value_counts() < 10)
            data['Title'] = data['Title'].apply(lambda x: 'Misc'
                                                if title_names.loc[x] else x)

            # create Family size and Alone column from SibSp, Parch cols
            data['Family size'] = data['SibSp'] + data['Parch'] + 1
            data['Alone'] = data['Family size'] == 1

            # make 5 equal size groups from Fares
            data['Fare'] = pd.qcut(data['Fare'], 5, labels=False)

            # make 5 groups from Ages
            data['Age'] = pd.cut(data['Age'], 5, labels=False)

            # rename columns and delete unnecessary features
            data = data.rename(columns={
                'Sex': 'Male',
                'Fare': 'FareBins',
                'Age': 'AgeBins'
            })
            data.drop(['Name', 'SibSp', 'Parch'], axis=1, inplace=True)

            return data

        self.X = encode(self.X)
        self.X_test = encode(self.X_test)

        for col in self.X.columns:
            if self.X[col].dtype != 'float64':
                table = self.X.join(self.y)[[col, 'Survived'
                                             ]].groupby(col,
                                                        as_index=False).mean()
                table['Survived'] = (table['Survived'] * 100).map(
                    '{:.2f} %'.format)
                logging.info(
                    f'Survival ratio by: {col}\n{table}\n{"-" * 10}\n')

        one_hot_encoder = OneHotEncoder(use_cat_names=True)
        one_hot_columns = one_hot_encoder.fit_transform(
            self.X[['Title', 'Embarked']])
        one_hot_columns_test = one_hot_encoder.transform(
            self.X_test[['Title', 'Embarked']])
        self.X = self.X.join(one_hot_columns)
        self.X_test = self.X_test.join(one_hot_columns_test)

        self.X.drop(['Family name', 'Title', 'Embarked'], axis=1, inplace=True)
        self.X_test.drop(['Family name', 'Title', 'Embarked'],
                         axis=1,
                         inplace=True)

        logging.info(f'#{self._step_index} - DONE!')
Exemple #9
0
# StandardScaler - pre-processor to put numerical column in the same scale
scaler = StandardScaler().fit(x_train)

scaler

values_scale = scaler.transform(x_train)
values_scale[:10]
x_train = scaler.transform(x_train)

# generate the model - could be any model
# instance of the classifier decision tree and train the model
clf_tree = tree.DecisionTreeClassifier()
clf_tree = clf_tree.fit(x_train, y_train)

# Apply object ohe and pre-processor on data for test
x_test = ohe.transform(x_test)
scaler_test = StandardScaler().fit(x_test)
x_test = scaler_test.transform(x_test)
x_test[:10]

# predict
clf_tree.predict(x_test)

# Validate the model
acuracy = clf_tree.score(x_test, y_test)

acuracy

# Pipeline
# will create a kind of alias for each method
pip_1 = Pipeline([('ohe', OneHotEncoder()), ('scaler', StandardScaler()),

X.head()


# In[249]:


one_hot= OneHotEncoder(cols=["user_name","country","hint_variety"],use_cat_names=True)  # OneHotEncoder from category_encoders package
one_hot.fit(X)


# In[250]:


X=one_hot.transform(X)


# In[253]:


X.columns


# #### Word2vec implementation -

# In[259]:


wine2vec.build_vocab(review_desc.values) 
Exemple #11
0
df[['host_is_superhost', 'bathrooms_text', 'has_availability', 'instant_bookable']].astype(float)

df.head

pip install category_encoders

# Instantiate transformer - one hot encoder
from category_encoders import OneHotEncoder
transformer = OneHotEncoder(use_cat_names=True)

# Transform to fit training data
transformer.fit(df)

# Transform our training data
df = transformer.transform(df)

X = df.drop('price', axis=1)
y= df['price']

X = X.astype(float)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from keras.layers import BatchNormalization, Dropout
import keras

model = Sequential([

# The Input Layer :
Dense(1024, input_dim = X_train.shape[1]),
# Define col names for the parameters of the network
pred_vars = ['MONTH', 'ORIGIN', 'DEST', 'DISTANCE']
target_var = 'PASSENGERS'
keep = pred_vars
keep.append(target_var)

# Subset only what's needed
data = data[keep]

# Encode the  source and target nodes using a catagory encoder
from category_encoders import OneHotEncoder
ce = OneHotEncoder()
ce.fit(data)

# transform the encoded data
data_encoded = ce.transform(data)
labels = data[target_var]
data_encoded.drop(target_var, 1, inplace=True)

# split out a final eval set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data_encoded,
                                                    labels,
                                                    random_state=0,
                                                    test_size=.25)

# convert to xgb data format
import xgboost as xgb
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)
]]
###########################################################################
# Split data into train and test                                          #
###########################################################################
trainData = fullData.loc[fullData.DataPartition == 'train']
testData = fullData.loc[fullData.DataPartition == 'test']
###########################################################################
# One hot encode                                                          #
###########################################################################
# https://github.com/scikit-learn-contrib/categorical-encoding
# http://contrib.scikit-learn.org/categorical-encoding/onehot.html
categories = list(set(trainData.select_dtypes(['category']).columns))
target = trainData.Survived
enc = OneHotEncoder(cols=categories, return_df=1,
                    handle_unknown='ignore').fit(trainData, target)
trainData = enc.transform(trainData)
testData = enc.transform(testData)
###########################################################################
# Drop multi collinear levels and no longer required                      #
###########################################################################
dropColumns = ['DataPartition']
trainData = trainData.drop(columns=dropColumns)
testData = testData.drop(columns=dropColumns)
testData = testData.drop(columns='Survived')
###########################################################################
# Start h2o cloud                                                         #
###########################################################################
h2o.init()
h2o.remove_all  # clean slate, in case cluster was already running
# upload data to h2o cloud
train = h2o.H2OFrame(trainData)
Exemple #14
0
    return score


######### Creating objects for 2 classification models.
logit = LogisticRegression(random_state=SEED)
rf = RandomForestClassifier(random_state=SEED)

###################################################################################################
######### Apply One Hot Encoding
from category_encoders import OneHotEncoder
onehot_enc = OneHotEncoder(cols=X_Columns)
onehot_enc.fit(X_train, y_train)

print('Original number of features: \n', X_train.shape[1], "\n")
data_ohe_train = onehot_enc.fit_transform(X_train)
data_ohe_test = onehot_enc.transform(X_test)
print('Features after OHE: \n', data_ohe_train.shape[1])

######### Logistic Regression
onehot_logit_score = get_score(logit, data_ohe_train, y_train, data_ohe_test,
                               y_test)
print('Logistic Regression score with One hot encoding:', onehot_logit_score)

######### Random Forest
onehot_rf_score = get_score(rf, data_ohe_train, y_train, data_ohe_test, y_test)
print('Random Forest score with One hot encoding:', onehot_logit_score)

###################################################################################################
######### Apply Hashing Encoding
from category_encoders import HashingEncoder
hashing_enc = HashingEncoder(n_components=10000, cols=X_Columns)
Exemple #15
0
class Encoder():
    encode_methods = {
        'OrdinalEncoder': OrdinalEncoder,
        'OneHotEncoder': OneHotEncoder,
        'CountEncoder': CountEncoder,
        'TargetEncoder': TargetEncoder,
    }

    # spark_encode_methods = {
    #     'mean_encoder':,
    #     'target_encoder':,
    #     'label_encoder':,
    #     'onehot_encoder'
    # }
    # target_encoder,mean_encoder在编码时,不能够把训练集和验证机concat在一起进行编码
    # label_encoder,onehot_encoder可以

    def __init__(self,
                 sparksess=None,
                 logdir='/encoder',
                 handle_unknown='-99999',
                 save_encoder=False):
        self.spark = sparksess
        self.logdir = logdir
        self.save_encoder

        self.ordinal_encoder_features = []
        self.onehot_encoder_features = []
        self.count_encoder_features = []
        self.target_encoder_features = []
        self.ordinal_encoder = OrdinalEncoder(
            cols=self.ordinal_encoder_features,
            return_df=True,
            handle_unknown=handle_unknown)
        self.onehot_encoder = OneHotEncoder(cols=self.onehot_encoder_features,
                                            return_df=True,
                                            handle_unknown=handle_unknown)
        self.count_encoder = CountEncoder(cols=self.count_encoder_features,
                                          return_df=True,
                                          handle_unknown=handle_unknown)
        self.target_encoder = TargetEncoder(cols=self.target_encoder_features,
                                            return_df=True,
                                            handle_unknown=handle_unknown)

    def fit(self,
            x_train,
            x_val=None,
            y_train=None,
            y_val=None,
            method_mapper=None):
        """
        Parameters
        ----------

        x_train: pd.DataFrame

        x_val: pd.DataFrame

        y_train: pd.DataFrame

        y_val: pd.DataFrame

        method_mapper: dict
            a mapping of feature to EncodeMethod
            example mapping: 
            {
                'feature1': OrdinalEncoder,
                'feature2': OneHotEncoder,
                'feature3': CountEncoder,
                'feature4': TargetEncoder,
            }
        """
        for feat in method_mapper:
            if method_mapper[feat] == 'OrdinalEncoder':
                self.ordinal_encoder_features.append(feat)
            elif method_mapper[feat] == 'OneHotEncoder':
                self.onehot_encoder_features.append(feat)
            elif method_mapper[feat] == 'CountEncoder':
                self.count_encoder_features.append(feat)
            elif method_mapper[feat] == 'TargetEncoder':
                self.target_encoder_features.append(feat)
            else:
                raise ValueError(
                    '编码方式只支持[OrdinalEncoder, OneHotEncoder, CountEncoder, TargetEncoder], 接收到%s'
                    % feat)

        if self.spark is None:
            if len(self.ordinal_encoder_features) != 0 or len(
                    self.onehot_encoder_features) != 0:
                x_whole = x_train.append(x_val)
                y_whole = None
                if not y_train is None and not y_val is None:
                    y_whole = y_train.append(y_val)

                x_whole = self.ordinal_encoder.fit_transform(x_whole, y_whole)
                x_whole = self.onehot_encoder.fit_transform(x_whole, y_whole)
                x_train = x_whole[:len(x_train)]
                x_val = x_whole[len(x_train):]

            x_train = self.count_encoder.fit_transform(x_train, y_train)
            x_val = self.count_encoder.transform(x_val, y_val)
            x_train = self.target_encoder.fit_transform(x_train, y_train)
            x_val = self.target_encoder.transform(x_val, y_val)

            if self.save_encoder:
                self.save_encoder()
        return x_train, y_train, x_val, y_val

    def transform(self, x, y=None):
        x = self.ordinal_encoder.transform(x, y)
        x = self.onehot_encoder.transform(x, y)
        x = self.count_encoder.transform(x, y)
        x = self.target_encoder.transform(x, y)
        return x, y

    def fit_transform(self,
                      x_train,
                      x_val=None,
                      y_train=None,
                      y_val=None,
                      method_mapper=None):
        """
        Parameters
        ----------

        x_train: pd.DataFrame

        x_val: pd.DataFrame

        y_train: pd.DataFrame

        y_val: pd.DataFrame
        
        method_mapper: dict
            a mapping of feature to EncodeMethod
            example mapping: 
            {
                'feature1': OrdinalEncoder,
                'feature2': OneHotEncoder,
                'feature3': CountEncoder,
                'feature4': TargetEncoder,
            }
        """
        self.fit(x_train, x_val, y_train, y_val, method_mapper)
        x_train, y_train = self.transform(x_train, y_train)
        if x_val is not None:
            x_val, y_val = self.transform(x_val, y_val)
        return x_train, y_train, x_val, y_val

    def save_encoder(self):
        now = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
        os.makedirs(os.path.join(self.logdir, now))

        with open(os.path.join(self.logdir, now, 'OrdinalEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.ordinal_encoder, f)
        with open(os.path.join(self.logdir, now, 'OneHotEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.onehot_encoder, f)
        with open(os.path.join(self.logdir, now, 'CountEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.count_encoder, f)
        with open(os.path.join(self.logdir, now, 'TargetEncoder.pkl'),
                  'wb') as f:
            pickle.dump(self.target_encoder, f)

        with open(
                os.path.join(self.logdir, now, 'OrdinalEncoderFeatures.json'),
                'w') as f:
            json.dump(self.ordinal_encoder_features, f)
        with open(os.path.join(self.logdir, now, 'OneHotEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.onehot_encoder_features, f)
        with open(os.path.join(self.logdir, now, 'CountEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.count_encoder_features, f)
        with open(os.path.join(self.logdir, now, 'TargetEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.target_encoder_features, f)

    def load_encoder(self, logdir=None):
        with open(os.path.join(self.logdir, 'OrdinalEncoder.pkl'), 'wb') as f:
            pickle.dump(self.ordinal_encoder, f)
        with open(os.path.join(self.logdir, 'OneHotEncoder.pkl'), 'wb') as f:
            pickle.dump(self.onehot_encoder, f)
        with open(os.path.join(self.logdir, 'CountEncoder.pkl'), 'wb') as f:
            pickle.dump(self.count_encoder, f)
        with open(os.path.join(self.logdir, 'TargetEncoder.pkl'), 'wb') as f:
            pickle.dump(self.target_encoder, f)

        with open(os.path.join(self.logdir, 'OrdinalEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.ordinal_encoder_features, f)
        with open(os.path.join(self.logdir, 'OneHotEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.onehot_encoder_features, f)
        with open(os.path.join(self.logdir, 'CountEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.count_encoder_features, f)
        with open(os.path.join(self.logdir, 'TargetEncoderFeatures.json'),
                  'w') as f:
            json.dump(self.target_encoder_features, f)