Example #1
0
viz.plot_feature_importances(clf, X, y, ds._feature_names)

clf.fit(X, y)
export_graphviz(
    clf.tree_,
    out_file="tree.dot",
    class_names=ds._class_names,
    filled=True,
    impurity=False,
    proportion=True,
    rounded=True,
)
subprocess.call(["dot", "-Tpng", "tree.dot", "-o", "tree.png"])

viz.plot_class_balances(clf, lambda x: ds.sample(class_balance=x))

viz.plot_learning_curve(clf, X, y)

for param in [
    {"name": "max_depth", "range": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},  # ,20,40,100]}, #,160,280,560,1120]},
    {"name": "max_features", "range": np.array(np.linspace(1, 41, 20), dtype=np.int_)},
    {"name": "min_samples_leaf", "range": [2, 4, 8, 16, 32, 64, 128]},
    {"name": "min_samples_split", "range": [2, 4, 8, 16, 32, 64, 128]},
]:

    viz.plot_validation_curve(clf, X, y, param, name="DecisionTree")

X, y = ds.sample(class_balance=None)
viz.print_cv_score(clf, X, y)
Example #2
0
from lib import dao, viz


#ds = dao.load_ads()
ds = dao.load_credit()
#ds.sanitize(strategy='impute_mean', scale=False)
ds.onehot()
X, y = ds.sample(class_balance=None)

base = DecisionTreeClassifier(max_depth=1, splitter='best', min_samples_split=math.log(y.shape[0]),
        random_state=5557)

clf = AdaBoostClassifier(base_estimator=base, n_estimators=50, learning_rate=0.01,
        algorithm='SAMME.R', random_state=5557)

viz.plot_feature_selection(clf, X, y)

viz.plot_feature_importances(clf, X, y, ds._feature_names)

viz.plot_class_balances(clf, lambda x: ds.sample(class_balance=x))

viz.plot_learning_curve(clf, X, y)

for param in [ {'name': 'n_estimators', 'range': np.array(np.linspace(10, 500, 20), dtype=np.int_)},
            {'name': 'learning_rate', 'range': [0.001, 0.01, 0.1, 1, 10]}, ]:

    viz.plot_validation_curve(clf, X, y, param, name='AdaBoost')

X, y = ds.sample(class_balance=None)
viz.print_cv_score(clf, X, y)
Example #3
0
from lib import dao, viz


#ds = dao.load_ads()
ds = dao.load_credit()
#ds.sanitize(strategy='impute_mean', scale=True)
ds.onehot()
ds.scale_zmuv()
X, y = ds.sample(class_balance=None)
n_samples = ds.data.shape[0]

svm = {
    'linear': SVC(kernel='linear', gamma='auto', max_iter=1000, probability=False, random_state=5557),
    'poly': SVC(kernel='poly', gamma='auto', max_iter=1000, probability=False, random_state=5557),
    'rbf': SVC(kernel='rbf', gamma='auto', max_iter=1000, probability=False, random_state=5557)
}

for param in [ {'name': 'max_iter', 'range': np.array(np.linspace(1, 1000, 5), dtype=np.int_)} ]:

    X, y = ds.sample(class_balance=None, sparse=True)
    viz.plot_validation_curve(svm['linear'], X, y, param, name='linear_SVC')
    viz.plot_validation_curve(svm['poly'], X, y, param, name='polynomial_SVC')
    viz.plot_validation_curve(svm['rbf'], X, y, param, name='rbf_SVC')

ds.scale_zmuv()
X, y = ds.sample(class_balance=None)

viz.print_cv_score(svm['linear'], X, y)
viz.print_cv_score(svm['poly'], X, y)
viz.print_cv_score(svm['rbf'], X, y)
Example #4
0
import math
import numpy as np
from sklearn.neighbors import KNeighborsClassifier
from lib import dao, viz


#ds = dao.load_ads()
ds = dao.load_credit()
#ds.sanitize(strategy='impute_mean', scale=True)
ds.onehot()
ds.scale_zmuv()
X, y = ds.sample(class_balance=None)

clf = KNeighborsClassifier(n_neighbors=25, n_jobs=-1)

param = {'name': 'n_neighbors', 'range': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] }
viz.plot_validation_curve(clf, X, y, param, name='KNeighbors')

ds.scale_zmuv()
X, y = ds.sample(class_balance=None)

viz.print_cv_score(clf, X, y)