def sum_coding(): X, _, _ = get_mushroom_data() print(X.info()) enc = ce.SumEncoder() enc.fit(X, None) out = enc.transform(X) print(out.info()) del enc, _, X, out
def helmert(): X, _, _ = get_mushroom_data() print(X.info()) enc = ce.HelmertEncoder() enc.fit(X, None) out = enc.transform(X) print(out.info()) del enc, _, X, out
def polynomial(): X, _, _ = get_mushroom_data() print(X.info()) enc = ce.PolynomialEncoder() enc.fit(X, None) out = enc.transform(X) print(out.info()) del enc, _, X, out
def binary(): X, _, _ = get_mushroom_data() print(X.info()) enc = ce.BinaryEncoder() enc.fit(X, None) out = enc.transform(X) print(out.info()) del enc, _, X, out
def backward_difference(): X, _, _ = get_mushroom_data() print(X.info()) enc = ce.BackwardDifferenceEncoder() enc.fit(X, None) out = enc.transform(X) print(out.info()) del enc, _, X, out
def leaveoneout(): X, _, _ = get_mushroom_data() print(X.info()) enc = ce.LeaveOneOutEncoder() enc.fit(X, None) out = enc.transform(X) print(out.info()) del enc, _, X, out
from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report from sklearn.pipeline import Pipeline from sklearn.preprocessing import StandardScaler from category_encoders.basen import BaseNEncoder from examples.source_data.loaders import get_mushroom_data from sklearn.linear_model import LogisticRegression import warnings from sklearn.exceptions import DataConversionWarning warnings.filterwarnings(action='ignore', category=DataConversionWarning) print(__doc__) # first get data from the mushroom dataset X, y, _ = get_mushroom_data() X = X.values # use numpy array not dataframe here n_samples = X.shape[0] # split the dataset in two equal parts X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) # create a pipeline ppl = Pipeline([('enc', BaseNEncoder(base=2, return_df=False, verbose=True)), ('norm', StandardScaler()), ('clf', LogisticRegression(solver='lbfgs', random_state=0))]) # set the parameters by cross-validation
def control(): X, _, _ = get_mushroom_data() del X
from __future__ import print_function from sklearn import datasets from sklearn.grid_search import GridSearchCV from sklearn.cross_validation import train_test_split from sklearn.metrics import classification_report from sklearn.pipeline import Pipeline from category_encoders.basen import BaseNEncoder from examples.source_data.loaders import get_mushroom_data from sklearn.linear_model import LogisticRegression print(__doc__) # first we get data from the mushroom dataset X, y, _ = get_mushroom_data() X = X.values # use numpy array not dataframe here n_samples = X.shape[0] # Split the dataset in two equal parts X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0) # create a pipeline ppl = Pipeline([ ('enc', BaseNEncoder(base=2, return_df=False, verbose=True)), ('clf', LogisticRegression()) ]) # Set the parameters by cross-validation tuned_parameters = {