viz.plot_feature_importances(clf, X, y, ds._feature_names) clf.fit(X, y) export_graphviz( clf.tree_, out_file="tree.dot", class_names=ds._class_names, filled=True, impurity=False, proportion=True, rounded=True, ) subprocess.call(["dot", "-Tpng", "tree.dot", "-o", "tree.png"]) viz.plot_class_balances(clf, lambda x: ds.sample(class_balance=x)) viz.plot_learning_curve(clf, X, y) for param in [ {"name": "max_depth", "range": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, # ,20,40,100]}, #,160,280,560,1120]}, {"name": "max_features", "range": np.array(np.linspace(1, 41, 20), dtype=np.int_)}, {"name": "min_samples_leaf", "range": [2, 4, 8, 16, 32, 64, 128]}, {"name": "min_samples_split", "range": [2, 4, 8, 16, 32, 64, 128]}, ]: viz.plot_validation_curve(clf, X, y, param, name="DecisionTree") X, y = ds.sample(class_balance=None) viz.print_cv_score(clf, X, y)
from lib import dao, viz #ds = dao.load_ads() ds = dao.load_credit() #ds.sanitize(strategy='impute_mean', scale=False) ds.onehot() X, y = ds.sample(class_balance=None) base = DecisionTreeClassifier(max_depth=1, splitter='best', min_samples_split=math.log(y.shape[0]), random_state=5557) clf = AdaBoostClassifier(base_estimator=base, n_estimators=50, learning_rate=0.01, algorithm='SAMME.R', random_state=5557) viz.plot_feature_selection(clf, X, y) viz.plot_feature_importances(clf, X, y, ds._feature_names) viz.plot_class_balances(clf, lambda x: ds.sample(class_balance=x)) viz.plot_learning_curve(clf, X, y) for param in [ {'name': 'n_estimators', 'range': np.array(np.linspace(10, 500, 20), dtype=np.int_)}, {'name': 'learning_rate', 'range': [0.001, 0.01, 0.1, 1, 10]}, ]: viz.plot_validation_curve(clf, X, y, param, name='AdaBoost') X, y = ds.sample(class_balance=None) viz.print_cv_score(clf, X, y)
from lib import dao, viz #ds = dao.load_ads() ds = dao.load_credit() #ds.sanitize(strategy='impute_mean', scale=True) ds.onehot() ds.scale_zmuv() X, y = ds.sample(class_balance=None) n_samples = ds.data.shape[0] svm = { 'linear': SVC(kernel='linear', gamma='auto', max_iter=1000, probability=False, random_state=5557), 'poly': SVC(kernel='poly', gamma='auto', max_iter=1000, probability=False, random_state=5557), 'rbf': SVC(kernel='rbf', gamma='auto', max_iter=1000, probability=False, random_state=5557) } for param in [ {'name': 'max_iter', 'range': np.array(np.linspace(1, 1000, 5), dtype=np.int_)} ]: X, y = ds.sample(class_balance=None, sparse=True) viz.plot_validation_curve(svm['linear'], X, y, param, name='linear_SVC') viz.plot_validation_curve(svm['poly'], X, y, param, name='polynomial_SVC') viz.plot_validation_curve(svm['rbf'], X, y, param, name='rbf_SVC') ds.scale_zmuv() X, y = ds.sample(class_balance=None) viz.print_cv_score(svm['linear'], X, y) viz.print_cv_score(svm['poly'], X, y) viz.print_cv_score(svm['rbf'], X, y)
import math import numpy as np from sklearn.neighbors import KNeighborsClassifier from lib import dao, viz #ds = dao.load_ads() ds = dao.load_credit() #ds.sanitize(strategy='impute_mean', scale=True) ds.onehot() ds.scale_zmuv() X, y = ds.sample(class_balance=None) clf = KNeighborsClassifier(n_neighbors=25, n_jobs=-1) param = {'name': 'n_neighbors', 'range': [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] } viz.plot_validation_curve(clf, X, y, param, name='KNeighbors') ds.scale_zmuv() X, y = ds.sample(class_balance=None) viz.print_cv_score(clf, X, y)