예제 #1
0
def sum_coding():
    X, _, _ = get_mushroom_data()
    print(X.info())
    enc = ce.SumEncoder()
    enc.fit(X, None)
    out = enc.transform(X)
    print(out.info())
    del enc, _, X, out
예제 #2
0
def helmert():
    X, _, _ = get_mushroom_data()
    print(X.info())
    enc = ce.HelmertEncoder()
    enc.fit(X, None)
    out = enc.transform(X)
    print(out.info())
    del enc, _, X, out
예제 #3
0
def polynomial():
    X, _, _ = get_mushroom_data()
    print(X.info())
    enc = ce.PolynomialEncoder()
    enc.fit(X, None)
    out = enc.transform(X)
    print(out.info())
    del enc, _, X, out
예제 #4
0
def binary():
    X, _, _ = get_mushroom_data()
    print(X.info())
    enc = ce.BinaryEncoder()
    enc.fit(X, None)
    out = enc.transform(X)
    print(out.info())
    del enc, _, X, out
예제 #5
0
def backward_difference():
    X, _, _ = get_mushroom_data()
    print(X.info())
    enc = ce.BackwardDifferenceEncoder()
    enc.fit(X, None)
    out = enc.transform(X)
    print(out.info())
    del enc, _, X, out
예제 #6
0
def leaveoneout():
    X, _, _ = get_mushroom_data()
    print(X.info())
    enc = ce.LeaveOneOutEncoder()
    enc.fit(X, None)
    out = enc.transform(X)
    print(out.info())
    del enc, _, X, out
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from category_encoders.basen import BaseNEncoder
from examples.source_data.loaders import get_mushroom_data
from sklearn.linear_model import LogisticRegression
import warnings
from sklearn.exceptions import DataConversionWarning

warnings.filterwarnings(action='ignore', category=DataConversionWarning)

print(__doc__)

# first get data from the mushroom dataset
X, y, _ = get_mushroom_data()
X = X.values  # use numpy array not dataframe here
n_samples = X.shape[0]

# split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.5,
                                                    random_state=0)

# create a pipeline
ppl = Pipeline([('enc', BaseNEncoder(base=2, return_df=False, verbose=True)),
                ('norm', StandardScaler()),
                ('clf', LogisticRegression(solver='lbfgs', random_state=0))])

# set the parameters by cross-validation
예제 #8
0
def control():
    X, _, _ = get_mushroom_data()
    del X
from __future__ import print_function

from sklearn import datasets
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from category_encoders.basen import BaseNEncoder
from examples.source_data.loaders import get_mushroom_data
from sklearn.linear_model import LogisticRegression

print(__doc__)

# first we get data from the mushroom dataset
X, y, _ = get_mushroom_data()
X = X.values  # use numpy array not dataframe here
n_samples = X.shape[0]

# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)

# create a pipeline
ppl = Pipeline([
    ('enc', BaseNEncoder(base=2, return_df=False, verbose=True)),
    ('clf', LogisticRegression())
])


# Set the parameters by cross-validation
tuned_parameters = {