Beispiel #1
0
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3, random_state = 0)

sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

#from sklearn.lda import LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

lda = LDA(n_components = 2)
X_train_lda = lda.fit_transform(X_train_std, y_train)

#lets see how the logistic regression classifier handles the lower-dimensional training dataset after the LDA transformation

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr = lr.fit(X_train_lda, y_train)
plot_decision_regions(X_train_lda, y_train, classifier=lr)
plt.xlabel("LD 1")
plt.ylabel("LD 2")
plt.legend(loc = "lower left")
plt.show()

X_test_lda = lda.transform(X_test_std)
plot_decision_regions(X_test_lda, y_test, classifier=lr)
plt.xlabel("LD 1")
plt.ylabel("LD 2")
plt.legend(loc = "lower left")
plt.show()
Beispiel #2
0
import numpy as np

import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier

iris = datasets.load_iris()

X = iris.data[:, [2,3]]
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)


knn = KNeighborsClassifier(n_neighbors = 5, p = 2, metric = "minkowski")

knn.fit(X_train, y_train)

X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))

plot_decision_regions(X_combined, y_combined, classifier = knn, test_idx = range(105, 150))

plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc = 'upper left')
plt.show()


    ada_train, ada_test))

#plot decision bounderies
from plotDecisionRegions import plot_decision_regions
import matplotlib.pyplot as plt

f, axarr = plt.subplots(nrows=1,
                        ncols=2,
                        sharex="col",
                        sharey="row",
                        figsize=(8, 3))

for idx, clf, tt in zip([0, 1], [tree, ada], ["Decision Tree", "AdaBoost"]):
    clf.fit(X_train, y_train)
    plot_decision_regions(axarr[idx],
                          X_train,
                          y_train,
                          classifier=clf,
                          title=tt)

plt.text(-10.2, -1.2, s="Hue", ha="center", va="center", fontsize=12)
plt.text(-10.5,
         4.5,
         s="Alcohol",
         ha="center",
         va="center",
         fontsize=12,
         rotation=90)

plt.show()
Beispiel #4
0
y_xor = np.where(y_xor, 1, -1)  #converting True/False values to 0 and 1.

plt.scatter(X_xor[y_xor == 1, 0],
            X_xor[y_xor == 1, 1],
            c='b',
            marker='x',
            label='1')

plt.scatter(X_xor[y_xor == -1, 0],
            X_xor[y_xor == -1, 1],
            c='r',
            marker='s',
            label='-1')

plt.ylim(-3.0)

plt.legend()

plt.show()

svm = SVC(kernel='rbf', random_state=0, gamma=0.10, C=10.0)

svm.fit(X_xor, y_xor)

plot_decision_regions(X_xor, y_xor, classifier=svm)

plt.legend(loc='upper left')

plt.show()
Beispiel #5
0
plt.show()


#now plot the decision regions.
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test)

from plotDecisionRegions import plot_decision_regions
from itertools import product

f,axarr = plt.subplots(nrows = 2, ncols = 2, sharex = "col", sharey = "row", figsize = (7,5))

for idx, clf, tt in zip(product([0,1],[0,1]), all_clf, clf_labels):
    clf.fit(X_train_std, y_train)
    plot_decision_regions(axarr[idx[0],idx[1]], X_train_std, y_train, classifier = clf, title = tt)

plt.text(-3.5, -4.5, s = "Sepal width [Standardized]", ha = "center", va = "center", fontsize = 12)
plt.text(-10.5, 4.5, s = "Petal width [Standardized]", ha = "center", va = "center", fontsize = 12, rotation = 90)

plt.show()


#get the parameters of the ensemble method
#print(mv_clf.get_params())

#now do a grid search to find the best parameters for the model.
#Let's work with inverse regularization parameter C of the logistic regression classifier and the decision tree depth

from sklearn.grid_search import GridSearchCV
params = {"decisiontreeclassifier__max_depth": [1,2], "pipeline-1__clf__C":[0.001, 0.1, 100.0]}