def build_and_evaluate_model(x, y): #use sklearn library to split the dataset : 33% to test and 66% to train x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, shuffle=True, random_state=None) #change the values of desire output both y_train and y_test #N- nonrecurre= -1 ,R - recurre =1 y_train = np.where(y_train == 'N', -1, 1) y_test = np.where(y_test == 'N', -1, 1) start = timeit.default_timer() #train Adaline model1 = AdalineGD(n_iter=2000, eta=1e-9) model1.fit(x_train, y_train) y_train_pred = model1.predict(x_train) print('************train ************') print('Misclassified samples: %d' % (y_train != y_train_pred).sum()) accuracy_train = accuracy_score(y_train, y_train_pred) * 100 print('Accuracy: %.2f' % accuracy_train, '%') #test Adaline print('************test ************') y_pred = model1.predict(x_test) stop = timeit.default_timer() execution_time = stop - start #time takes to train and test in seconds print('Program Executed in : %.2f' % execution_time, 'sec') print('Misclassified samples: %d' % (y_test != y_pred).sum()) accuracy = accuracy_score(y_test, y_pred) * 100 print('Accuracy: %.2f' % accuracy, '%') # plot the confusion matrix cm = confusion_matrix(y_test, y_pred) sns.heatmap(cm, annot=True, fmt="d") plt.show() return accuracy
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() X = X_std fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4)) ada1 = AdalineGD(eta=0.01, n_iter=1000) ada1.fit(X,y) ax[0].plot(range(1, len(ada1.cost_)+1), ada1.cost_, marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('Sum-squared-error') ax[0].set_title('Adaline - Learning rage 0.01') ada2 = AdalineGD(eta=0.0001, n_iter=1000) ada2.fit(X,y) ax[1].plot(range(1, len(ada2.cost_)+1), ada2.cost_, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rage 0.0001')
from adalinegd import AdalineGD from plot_cost_adaline import X, y, plt from plot_decision_regions import plot_decision_regions import numpy as np X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
#code is from 'Python Maching Learning' by Raschka and Mirialili ################################################################################# import pandas as pd import matplotlib.pyplot as plt import numpy as np from adalinegd import AdalineGD df = pd.read_csv( 'https://archive.ics.uci.edu/ml/' 'machine-learning-databases/iris/iris.data', header=None) # select setosa and versicolor y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) # extract sepal length and petal length X = df.iloc[:100, [0, 2]].values # plotting the cost for two different learning rates fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4)) ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(Sum-squared-error)') ax[0].set_title('Adaline - Learning rate 0.01') ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rate 0.0001') plt.show()
X = df.iloc[0:100, [0, 2]].values plt.scatter(X[:50, 0], X[:50, 1], color = 'red', marker = 'o', label = 'setosa') plt.scatter(X[50:100, 0], X[50:100, 1], color = 'blue', marker = 'x', label = 'versicolor') plt.xlabel('petal length') plt.ylabel('sepal length') plt.legend(loc = 'upper left') plt.show() # Standardize the data X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # Create the AdalineGD model model1 = AdalineGD(n_iter = 15, eta = 0.01) # Train the model model1.fit(X_std, y) # Plot the training error plt.plot(range(1, len(model1.cost_) + 1), model1.cost_, marker = 'o', color = 'red') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show() # Plot the decision boundary prd.plot_decision_regions(X_std, y, classifier = model1) plt.title('Adaline') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]')
x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plot.contourf(xx1, xx2, Z, alpha=0.3, cmap=cmap) plot.axis(xmin=xx1.min(),xmax=xx1.max(), ymin=xx2.min(), ymax=xx2.max()) # plot class examples for idx, cl in enumerate(np.unique(y)): plot.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[idx], marker=markers[idx], label=cl, edgecolor='black') fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4)) ada1 = AdalineGD(n_iter=15, eta=0.01).fit(X, y) ax[0].plot(range(1, len(ada1.cost_) + 1), ada1.cost_, marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('Sum-squared-error') ax[0].set_title('Adaline - Learning rate 0.01') plot_decision_regions(X, y, classifier=ada1, plot=ax[1]) ax[1].set_title('Adaline - Gradient Descent') ax[1].set_xlabel('sepal length') ax[1].set_ylabel('petal length') ax[1].legend(loc='upper left') plt.tight_layout() plt.show()