예제 #1
0
"""

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from smlib.decision_trees.dt import DecisionTree
from smlib.boosting.xgb_regr import XGBoostRegressor

# Create a random dataset
rng = np.random.RandomState(10)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))

# Fit regression model
regr_1 = DecisionTree(task='regression', criterion='mse', max_depth=1)
regr_2 = DecisionTree(task='regression',
                      criterion='mse',
                      max_depth=15,
                      min_samples_leaf=1)
regr_xgb = XGBoostRegressor(n_estimators=50,
                            max_depth=1,
                            gamma=0.005,
                            lambd=1.0,
                            tree_method='hist')
regr_1.fit(X, y)
regr_2.fit(X, y)
regr_xgb.fit(X, y)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
예제 #2
0
 def _create_base_alg(self):
     return DecisionTree(task=self.task,
                         criterion=self.criterion,
                         max_depth=self.max_depth,
                         min_samples_leaf=self.min_samples_leaf)
예제 #3
0
 def _create_base_alg(self):
     return DecisionTree(task='regression',
                         criterion='mse',
                         max_depth=self.max_depth,
                         min_samples_leaf=1)
예제 #4
0
complexity_param = range(1, 10)
models = [kNN(task='regression', k=k, metric='l2') for k in complexity_param]

EPE, B, V = bias_variance_regression(models, X, y, T, yT, n_subsamples=30)

plt.figure(figsize=(10, 5))
plt.plot(complexity_param, EPE, c='r', label='avg(EPE)')
plt.plot(complexity_param, B, c='b', label='avg(B**2)')
plt.plot(complexity_param, V, c='g', label='avg(V)')

plt.legend()
plt.show()

###################################################
#comparison with decision trees
complexity_param = range(1, 10)
models = [
    DecisionTree(task='regression', criterion='mse', max_depth=k)
    for k in complexity_param
]

EPE, B, V = bias_variance_regression(models, X, y, T, yT, n_subsamples=30)

plt.figure(figsize=(10, 5))
plt.plot(complexity_param, EPE, c='r', label='avg(EPE)')
plt.plot(complexity_param, B, c='b', label='avg(B**2)')
plt.plot(complexity_param, V, c='g', label='avg(V)')

plt.legend()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
from smlib.decision_trees.dt import DecisionTree
from smlib.bagging.random_forest import RandomForest
from sklearn.ensemble import RandomForestRegressor as skRFR

# Create a random dataset
rng = np.random.RandomState(205)
X = np.sort(5 * rng.rand(80, 1), axis=0)
y = np.sin(X).ravel()
y[::5] += 3 * (0.5 - rng.rand(16))


# Fit regression model
#regr_1 = DecisionTree(task='regression', criterion='mse', max_depth=1)
dt = DecisionTree(task='regression', criterion='mse', max_depth=15, min_samples_leaf=3,
                      verbose=True)
rf_params = {'n_estimators': 100, 'max_depth': 15, 'min_samples_leaf': 5}
rf = RandomForest(task='regression', **rf_params)
skrf = skRFR(**rf_params)
dt.fit(X, y)
rf.fit(X, y)
skrf.fit(X, y)

# Predict
X_test = np.arange(0.0, 5.0, 0.01)[:, np.newaxis]
y_dt = dt.predict(X_test)
y_rf = rf.predict(X_test)
y_skrf = skrf.predict(X_test)

# Plot the results
plt.figure(1, (15, 10))
from smlib.knn import kNN
from smlib.decision_trees.dt import DecisionTree
from smlib.model_evaluation.bias_variance import *

from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    shuffle=True,
                                                    stratify=y,
                                                    random_state=123)

complexity_param = range(1, 7)
models = [DecisionTree(max_depth=k) for k in complexity_param]

#test_errors, biases, variances = bias_variance_classification_fixed_model(models[0], X_train, y_train,
#                                             X_test, y_test, n_subsamples=30)

EPE, B, V, Vu, Vb, EPE_check = bias_variance_classification(models,
                                                            X_train,
                                                            y_train,
                                                            X_test,
                                                            y_test,
                                                            n_subsamples=30)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from smlib.decision_trees.dt import DecisionTree

from sklearn import datasets, metrics
from sklearn.tree import DecisionTreeClassifier as sklearn_DecisionTree

digits = datasets.load_digits()

n_samples = len(digits.images)
data = digits.images.reshape((n_samples, -1))

dt_params = {'criterion': 'gini', 'max_depth': 7, 'min_samples_leaf': 5}
dt_classifiers = [sklearn_DecisionTree(**dt_params), DecisionTree(**dt_params)]

for clf in dt_classifiers:
    print(f'fitting {clf}')

    # We learn the digits on the first half of the digits
    clf.fit(data[:n_samples // 2], digits.target[:n_samples // 2])

    # Now predict the value of the digit on the second half:
    expected = digits.target[n_samples // 2:]
    predicted = clf.predict(data[n_samples // 2:])

    print("Classification report for classifier %s:\n%s\n" %
          (clf, metrics.classification_report(expected, predicted)))
    print("Confusion matrix:\n%s" %
          metrics.confusion_matrix(expected, predicted))
예제 #8
0
plot_step = 0.02

iris = load_iris()

plt.figure(1, (15, 10))
for pairidx, pair in enumerate([[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2,
                                                                         3]]):
    # We only take the two corresponding features
    X = iris.data[:, pair]
    y = iris.target

    print('-' * 50)
    print('feature importances for: ')
    print(iris.feature_names[pair[0]], iris.feature_names[pair[1]])

    dt = DecisionTree(criterion='gini', max_depth=5, min_samples_leaf=2)
    dt.fit(X, y)
    print(dt.feature_importances_)

    skdt = DecisionTreeClassifier(criterion='gini',
                                  max_depth=5,
                                  min_samples_leaf=2)
    skdt.fit(X, y)
    print(skdt.feature_importances_)

    plt.subplot(2, 3, pairidx + 1)
    plt.xlabel(iris.feature_names[pair[0]])
    plt.ylabel(iris.feature_names[pair[1]])

    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1