""" import numpy as np import pandas as pd import matplotlib.pyplot as plt from smlib.decision_trees.dt import DecisionTree from smlib.boosting.xgb_regr import XGBoostRegressor # Create a random dataset rng = np.random.RandomState(10) X = np.sort(5 * rng.rand(80, 1), axis=0) y = np.sin(X).ravel() y[::5] += 3 * (0.5 - rng.rand(16)) # Fit regression model regr_1 = DecisionTree(task='regression', criterion='mse', max_depth=1) regr_2 = DecisionTree(task='regression', criterion='mse', max_depth=15, min_samples_leaf=1) regr_xgb = XGBoostRegressor(n_estimators=50, max_depth=1, gamma=0.005, lambd=1.0, tree_method='hist') regr_1.fit(X, y) regr_2.fit(X, y) regr_xgb.fit(X, y) # Predict X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
def _create_base_alg(self): return DecisionTree(task=self.task, criterion=self.criterion, max_depth=self.max_depth, min_samples_leaf=self.min_samples_leaf)
def _create_base_alg(self): return DecisionTree(task='regression', criterion='mse', max_depth=self.max_depth, min_samples_leaf=1)
complexity_param = range(1, 10) models = [kNN(task='regression', k=k, metric='l2') for k in complexity_param] EPE, B, V = bias_variance_regression(models, X, y, T, yT, n_subsamples=30) plt.figure(figsize=(10, 5)) plt.plot(complexity_param, EPE, c='r', label='avg(EPE)') plt.plot(complexity_param, B, c='b', label='avg(B**2)') plt.plot(complexity_param, V, c='g', label='avg(V)') plt.legend() plt.show() ################################################### #comparison with decision trees complexity_param = range(1, 10) models = [ DecisionTree(task='regression', criterion='mse', max_depth=k) for k in complexity_param ] EPE, B, V = bias_variance_regression(models, X, y, T, yT, n_subsamples=30) plt.figure(figsize=(10, 5)) plt.plot(complexity_param, EPE, c='r', label='avg(EPE)') plt.plot(complexity_param, B, c='b', label='avg(B**2)') plt.plot(complexity_param, V, c='g', label='avg(V)') plt.legend() plt.show()
import pandas as pd import matplotlib.pyplot as plt from smlib.decision_trees.dt import DecisionTree from smlib.bagging.random_forest import RandomForest from sklearn.ensemble import RandomForestRegressor as skRFR # Create a random dataset rng = np.random.RandomState(205) X = np.sort(5 * rng.rand(80, 1), axis=0) y = np.sin(X).ravel() y[::5] += 3 * (0.5 - rng.rand(16)) # Fit regression model #regr_1 = DecisionTree(task='regression', criterion='mse', max_depth=1) dt = DecisionTree(task='regression', criterion='mse', max_depth=15, min_samples_leaf=3, verbose=True) rf_params = {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 5} rf = RandomForest(task='regression', **rf_params) skrf = skRFR(**rf_params) dt.fit(X, y) rf.fit(X, y) skrf.fit(X, y) # Predict X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis] y_dt = dt.predict(X_test) y_rf = rf.predict(X_test) y_skrf = skrf.predict(X_test) # Plot the results plt.figure(1, (15, 10))
from smlib.knn import kNN from smlib.decision_trees.dt import DecisionTree from smlib.model_evaluation.bias_variance import * from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris iris = load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, shuffle=True, stratify=y, random_state=123) complexity_param = range(1, 7) models = [DecisionTree(max_depth=k) for k in complexity_param] #test_errors, biases, variances = bias_variance_classification_fixed_model(models[0], X_train, y_train, # X_test, y_test, n_subsamples=30) EPE, B, V, Vu, Vb, EPE_check = bias_variance_classification(models, X_train, y_train, X_test, y_test, n_subsamples=30)
import numpy as np import pandas as pd import matplotlib.pyplot as plt from smlib.decision_trees.dt import DecisionTree from sklearn import datasets, metrics from sklearn.tree import DecisionTreeClassifier as sklearn_DecisionTree digits = datasets.load_digits() n_samples = len(digits.images) data = digits.images.reshape((n_samples, -1)) dt_params = {'criterion': 'gini', 'max_depth': 7, 'min_samples_leaf': 5} dt_classifiers = [sklearn_DecisionTree(**dt_params), DecisionTree(**dt_params)] for clf in dt_classifiers: print(f'fitting {clf}') # We learn the digits on the first half of the digits clf.fit(data[:n_samples // 2], digits.target[:n_samples // 2]) # Now predict the value of the digit on the second half: expected = digits.target[n_samples // 2:] predicted = clf.predict(data[n_samples // 2:]) print("Classification report for classifier %s:\n%s\n" % (clf, metrics.classification_report(expected, predicted))) print("Confusion matrix:\n%s" % metrics.confusion_matrix(expected, predicted))
plot_step = 0.02 iris = load_iris() plt.figure(1, (15, 10)) for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]): # We only take the two corresponding features X = iris.data[:, pair] y = iris.target print('-' * 50) print('feature importances for: ') print(iris.feature_names[pair[0]], iris.feature_names[pair[1]]) dt = DecisionTree(criterion='gini', max_depth=5, min_samples_leaf=2) dt.fit(X, y) print(dt.feature_importances_) skdt = DecisionTreeClassifier(criterion='gini', max_depth=5, min_samples_leaf=2) skdt.fit(X, y) print(skdt.feature_importances_) plt.subplot(2, 3, pairidx + 1) plt.xlabel(iris.feature_names[pair[0]]) plt.ylabel(iris.feature_names[pair[1]]) x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1