Ejemplo n.º 1
0
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from applied_machine_learning.fundamentals_of_machine_learning.adspy_shared_utilities import plot_class_regions_for_classifier_subplot

X_D2, y_D2 = make_blobs(n_samples=100,
                        n_features=2,
                        centers=8,
                        cluster_std=1.3,
                        random_state=4)
y_D2 = y_D2 % 2
X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)

fig, subaxes = plt.subplots(3, 1, figsize=(4, 11))

for this_gamma, subplot in zip([0.01, 1.0, 10.0], subaxes):
    clf = SVC(kernel='rbf', gamma=this_gamma).fit(X_train, y_train)
    title = 'Support Vector Classifier: \nRBF kernel, gamma = {:.2f}'.format(
        this_gamma)
    plot_class_regions_for_classifier_subplot(clf, X_train, y_train, None,
                                              None, title, subplot)
plt.tight_layout()
plt.show()
Ejemplo n.º 2
0
from sklearn.linear_model import LogisticRegression
from applied_machine_learning.fundamentals_of_machine_learning.adspy_shared_utilities import plot_class_regions_for_classifier_subplot

fruits = pd.read_table('../resources/fruit_data_with_colors.txt')
X_fruits_2d = fruits[['height', 'width']]
y_fruits_2d = fruits['fruit_label']
y_fruits_apple = y_fruits_2d == 1

X_train, X_test, y_train, y_test = train_test_split(X_fruits_2d.values,
                                                    y_fruits_apple.values,
                                                    random_state=0)
clf = LogisticRegression(C=100).fit(X_train, y_train)

fig, subaxes = plt.subplots(1, 1, figsize=(7, 5))
plot_class_regions_for_classifier_subplot(clf, X_train, y_train, None, None,
                                          'Logistic regression for binary classification\nFruit dataset: Apple vs others',
                                          subaxes)
subaxes.set_xlabel('height')
subaxes.set_ylabel('width')

h = 6
w = 8
print('A fruit with height {} and width {} is predicted as {}'
      .format(h, w, ['not an apple', 'an apple'][clf.predict([[h, w]])[0]]))

h = 10
w = 7
print('A fruit with height {} and width {} is predicted as {}'
      .format(h, w, ['not an apple', 'an apple'][clf.predict([[h, w]])[0]]))

from applied_machine_learning.fundamentals_of_machine_learning.adspy_shared_utilities import plot_class_regions_for_classifier_subplot
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

X_D2, y_D2 = make_blobs(n_samples=100, n_features=2, centers=8, cluster_std=1.3, random_state=4)
y_D2 = y_D2 % 2
X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)

fig, subaxes = plt.subplots(4, 1, figsize=(6, 23))
for this_alpha, axis in zip([0.01, 0.1, 1.0, 5.0], subaxes):
    nnclf = MLPClassifier(solver='lbfgs', max_iter=10000,
                          activation='tanh',
                          alpha=this_alpha,
                          hidden_layer_sizes=[10, 10],
                          random_state=0).fit(X_train, y_train)
    title = 'Dataset 2: NN classifier, alpha = {:.3f} '.format(this_alpha)
    plot_class_regions_for_classifier_subplot(nnclf, X_train, y_train, X_test, y_test, title, axis)
    plt.tight_layout()

plt.show()
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris
from applied_machine_learning.fundamentals_of_machine_learning.adspy_shared_utilities import plot_class_regions_for_classifier_subplot

iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                    iris.target,
                                                    random_state=0)
fig, subaxes = plt.subplots(6, 1, figsize=(6, 32))

pair_list = [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]
tree_max_depth = 4

for pair, axis in zip(pair_list, subaxes):
    X = X_train[:, pair]
    y = y_train

    clf = DecisionTreeClassifier(max_depth=tree_max_depth).fit(X, y)
    title = 'Decision Tree, max_depth = {:d}'.format(tree_max_depth)
    plot_class_regions_for_classifier_subplot(clf, X, y, None, None, title,
                                              axis, iris.target_names)

    axis.set_xlabel(iris.feature_names[pair[0]])
    axis.set_ylabel(iris.feature_names[pair[1]])

plt.tight_layout()
plt.show()
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from applied_machine_learning.fundamentals_of_machine_learning.adspy_shared_utilities import plot_class_regions_for_classifier_subplot

X_D2, y_D2 = make_blobs(n_samples=100,
                        n_features=2,
                        centers=8,
                        cluster_std=1.3,
                        random_state=4)
y_D2 = y_D2 % 2
X_train, X_test, y_train, y_test = train_test_split(X_D2, y_D2, random_state=0)
clf = GradientBoostingClassifier().fit(X_train, y_train)
title = 'GBDT Classifier, complex binary dataset, default settings'
fig, subaxes = plt.subplots(1, 1, figsize=(6, 6))
plot_class_regions_for_classifier_subplot(clf, X_train, y_train, X_test,
                                          y_test, title, subaxes)
plt.show()
pair_list = [[0, 1], [0, 2], [0, 3], [1, 2], [1, 3], [2, 3]]

X_fruits = fruits[feature_names_fruits]
y_fruits = fruits['fruit_label']

X_train, X_test, y_train, y_test = train_test_split(X_fruits.values,
                                                    y_fruits.values,
                                                    random_state=0)

fig, subaxes = plt.subplots(6, 1, figsize=(6, 50))
for pair, axes in zip(pair_list, subaxes):
    X = X_train[:, pair]
    y = y_train

    clf = RandomForestClassifier(n_jobs=-1).fit(X, y)
    plot_class_regions_for_classifier_subplot(clf, X, y, None, None, title,
                                              axes, target_fruit_names)

    axes.set_xlabel(feature_names_fruits[pair[0]])
    axes.set_ylabel(feature_names_fruits[pair[1]])

plt.tight_layout()
plt.show()

clf = RandomForestClassifier(n_jobs=-1).fit(X_train, y_train)
print('Random Forest, Fruit dataset, default settings')
print('Accuracy of RF classifier on training set: {:.2f}'.format(
    clf.score(X_train, y_train)))
print('Accuracy of RF classifier on test set: {:.2f}'.format(
    clf.score(X_test, y_test)))