def main(): data = DataManager.load_data("data/data_banknote_authentication.txt") # remove break lines data = np.array([item.strip("\n") for item in data]) data = np.array([item.split(',') for item in data]) data = data.astype(np.float) # 0 for authentic and 1 for inauthentic df = pd.DataFrame(data) df[4] = df[4].astype(int) authentic = df[df[4] == 0] inauthentic = df[df[4] == 1] X = df.iloc[np.r_[0:200, 1100:1300], [0, 3]].values y = [0 if x < 200 else 1 for x in range(400)] plt.scatter(X[:200, 0], X[:200, 1], color='red', marker='o', label='authentic') plt.scatter(X[200:400, 0], X[200:400, 1], color='blue', marker='x', label='inauthentic') plt.xlabel('variance of Wavelet Transformed image') plt.ylabel('entropy of image') plt.legend(loc='upper left') plt.show() ppn = Perceptron(eta=0.1, n_iter=10) ppn.fit(X, y) plot_decision_regions(X, y, clasiifier=ppn) plt.xlabel('variance of Wavelet Transformed image') plt.ylabel('entropy of image') plt.legend(loc='upper left') plt.show() X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineGD(n_iter=10, eta=0.01) ada.fit(X_std, y) plot_decision_regions(X_std, y, clasiifier=ada) plt.title('Adaline - gradient descent') plt.xlabel('variance of Wavelet Transformed image') plt.ylabel('entropy of image') plt.legend(loc='upper left') plt.show()
def test_tashizan(self): """test method for adaline """ # 初期化 expected = np.array([1.59872116e-16, -1.26256159e-01, 1.10479201e+00]) df = pd.read_csv('../tests/data/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # テスト実行 ad = AdalineGD(n_iter=15, eta=0.01) ad.fit(X_std, y) actual = ad.w_ # Assert np.testing.assert_array_almost_equal(expected, actual, 2)
def main(): # prepare training data and target variable features = ['sepal length (cm)', 'petal length (cm)'] labels = ['setosa', 'versicolor'] D = IrisData(features, labels) X = D.X y = np.where(D.y == 0, -1, 1) # standardize training data X_std = np.copy(X) for i in range(len(labels)): X_std[:, i] = (X[:, i] - X[:, i].mean()) / X[:, i].std() # fit classifiers classifiers = [ AdalineGD(eta=0.01, n_iter=10).fit(X, y), AdalineGD(eta=0.0001, n_iter=10).fit(X, y), AdalineGD(eta=0.01, n_iter=15).fit(X_std, y), AdalineSGD(eta=0.01, n_iter=15).fit(X_std, y) ] # show history of costs for classifier in classifiers: plot_update_history(classifier) # show decision regions plot_decision_regions(X_std, y, classifier=classifiers[2], xlabel='sepal length [standardized]', ylabel='petal lnegth [standardized]') plot_decision_regions(X_std, y, classifier=classifiers[3], xlabel='sepal length [standardized]', ylabel='petal lnegth [standardized]')
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) # plot class samples for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl) ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
# Data Preparation df = pd.read_csv('raschka/data/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values # 標準化(Zvalue) X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(14, 4)) ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X_std, y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(Sum-squared-error)') ax[0].set_title("Adaline - Learning rate 0.01") ax[0].set_xlim([0, 16]) ada2 = AdalineGD(n_iter=10, eta=0.001).fit(X_std, y) ax[1].plot(range(1, len(ada2.cost_) + 1), np.log10(ada2.cost_), marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rate 0.001') ax[1].set_xlim([0, 16]) ada3 = AdalineGD(n_iter=10, eta=0.0001).fit(X_std, y) ax[2].plot(range(1, len(ada3.cost_) + 1), np.log10(ada3.cost_), marker='o')
def Test(): # get data ml_data = Data() ml_data.init_data() ml_data.view_initial_data() # test perceptron ppn = Perceptron(eta=0.1, n_iter=10) ppn.fit(ml_data.X, ml_data.y) plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of updates') plt.show() plot = ml_data.plot_decision_regions(ml_data.X, ml_data.y, classifier=ppn) plot.xlabel('sepal length') plot.ylabel('petal length') plot.legend(loc='upper left') plot.show() # Adaline implementation fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4)) ada1 = AdalineGD(n_iter=10, eta=0.1).fit(ml_data.X, ml_data.y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epoch') ax[0].set_ylabel('log(Sum Squared error)') ax[0].set_title('Adaline - Learning rate 0.01') ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(ml_data.X, ml_data.y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epoch') ax[1].set_ylabel('Sum Squared error') ax[1].set_title('Adaline - Learning rate 0.0001') plt.show() # Adaline with feature scaling ml_data.standard_scaled() ada = AdalineGD(n_iter=10, eta=0.01) ada.fit(ml_data.X_std, ml_data.y) plot = ml_data.plot_decision_regions(ml_data.X_std, ml_data.y, classifier=ada) plot.title('Adaline - Gradinet Descent') plot.xlabel('sepal length') plot.ylabel('petal length') plot.legend(loc='upper left') plot.tight_layout() plot.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum Squared error') plt.show() # Adaline with stochastic gradient ada3 = AdalineGD(n_iter=15, eta=0.01, random_state=1) ada3.fit(ml_data.X_std, ml_data.y) plot = ml_data.plot_decision_regions(ml_data.X_std, ml_data.y, classifier=ada3) plot.title('Adaline - Stochastic Gradient Descent') plot.xlabel('sepal length') plot.ylabel('petal length') plot.legend(loc='upper left') plot.show() plt.plot(range(1, len(ada3.cost_) + 1), ada3.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Average cost') plt.show()
# only select the setosa and versicolor species data = data.loc[(data["Class label"] == "Iris-setosa") | (data["Class label"] == "Iris-versicolor")] # convert species labels to integers, -1 for setosa and 1 for versicolor data["Class label"] = np.where(data["Class label"]=="Iris-setosa", -1, 1) # split data into feature matrix (only "Sepal length" and "Petal length" are # used) and target vector X = data[["Sepal length", "Petal length"]] y = data["Class label"] # create a plot of the cost function against the iteration number for learning # rates of 0.01 and 0.0001 iris_ada1 = AdalineGD(n_iterations=10, eta=0.01) iris_ada1.fit(X.values, y.values) iris_ada2 = AdalineGD(n_iterations=10, eta=0.0001) iris_ada2.fit(X.values, y.values) fig1, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6)) ax[0].plot(range(1, iris_ada1.n_iterations + 1), np.log10(iris_ada1.cost_), marker="o") ax[0].set(xlabel="Iterations", ylabel="log(SSE)", title="Adaline - Learning rate 0.01") ax[1].plot(range(1, iris_ada2.n_iterations + 1), iris_ada2.cost_, marker="o") ax[1].set(xlabel="Iterations", ylabel="SSE",
import numpy as np from adaline import AdalineGD agd = AdalineGD(eta=0.001) X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [0, 1, 1], [7, 8, 8], [7, 7, 7]]) Y = np.array([-1, -1, 1, -1, 1, 1]) agd.fit(X, Y) print(agd.predict(X))
import numpy as np from mlxtend.plotting import plot_decision_regions from adaline import AdalineGD dataset = pd.read_csv('iris.csv', header=None) # print (dataset.tail()) output = dataset.iloc[0:100, 4].values Y = np.where(output == 'Iris-setosa', -1, 1) X = dataset.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() obj = AdalineGD(epochs=15, learning_rate=0.01) obj.fit(X_std, Y) plot_decision_regions(X_std, Y, clf=obj) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(obj.cost_list) + 1), obj.cost_list, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
import matplotlib.pyplot as plt import numpy as np from adaline import AdalineGD dataset = pd.read_csv('iris.csv', header=None) # print (dataset.tail()) output = dataset.iloc[0:100, 4].values Y = np.where(output == 'Iris-setosa', -1, 1) X = dataset.iloc[0:100, [0, 2]].values fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4)) obj1 = AdalineGD(epochs=10, learning_rate=0.01).fit(X, Y) ax[0].plot(range(1, len(obj1.cost_list) + 1), np.log10(obj1.cost_list), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(Sum-squared-errors)') ax[0].set_title('Adaline - Learning rate 0.01') obj2 = AdalineGD(epochs=10, learning_rate=0.0001).fit(X, Y) ax[1].plot(range(1, len(obj2.cost_list) + 1), obj2.cost_list, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-errors') ax[1].set_title('Adaline - Learning rate 0.0001') plt.show()
machine-learning-databases/iris/iris.data' df = pd.read_csv(s, header=None, encoding='utf-8') # 1〜100行目の目的変数の抽出と変換 y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) # 1〜100行目の1、3列目(今回使うデータ)を抽出 X = df.iloc[0:100, [0, 2]].values # 描画領域を1行2列に分割 fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4)) # 勾配降下法によるADALINEの学習(学習率 = 0.01) ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(Sum-squared-error)') ax[0].set_title('Adaline - Learning rate 0.01') # 勾配降下法によるADALINEの学習(学習率 = 0.0001) ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rate 0.0001') plt.show() # データのスケーリング(標準化)を行う
plt.clf() plot_decision_regions(X2, y2, clf=ppn) plt.show() plt.clf() plt.plot(range(1, len(ppn.errors_)+1), ppn.errors_, marker='o') plt.xlabel('Iterations') plt.ylabel('Missclassifications') plt.show() ########### Adaline ########### from adaline import AdalineGD ada = AdalineGD(epochs=10, eta=0.01).train(X, y) plt.clf() plt.plot(range(1, len(ada.cost_)+1), np.log10(ada.cost_), marker='o') plt.xlabel('Iterations') plt.ylabel('log(Sum-squared-error)') plt.title('Adaline - Learning rate 0.01') plt.show() ada = AdalineGD(epochs=10, eta=0.001).train(X, y) plt.clf() plt.plot(range(1, len(ada.cost_)+1), ada.cost_, marker='o') plt.xlabel('Iterations') plt.ylabel('Sum-squared-error') plt.title('Adaline - Learning rate 0.0001') plt.show()
plt.contourf(xx, yy, Z, alpha=0.3, cmap=cmap) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=colors[idx], marker=markers[idx], label=cl, edgecolor="black") fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4)) ada1 = AdalineGD(eta=0.01, n_itr=10).fit(X, y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(sum_squarred error)') ax[0].set_title('Adaline learning rate 0.01') ada2 = AdalineGD(eta=0.00001, n_itr=10).fit(X, y) ax[1].plot(range(1, len(ada2.cost_) + 1), np.log10(ada2.cost_), marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('log(sum squarred errors)') ax[1].set_title('Adaline learning rate 0.00001') plt.show() X_std = np.copy(X) X_std[:, 0] = (X_std[:, 0] - X_std[:, 0].mean()) / X_std[:, 0].std() X_std[:, 1] = (X_std[:, 1] - X_std[:, 1].mean()) / X_std[:, 1].std()
from matplotlib import style from adaline import AdalineGD if __name__ == '__main__': # iris.data 품종 데이터를 읽어오는 부분, perceptron_iris와 동일함 df = pd.read_csv('/Users/narun/Desktop/mylib/hwang/dataSet/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4)) # learning rate = 0.01로 두고 아달라인을 수행 # 결과값을 보면 값이 점점 커져서 J(w)값이 발산해버린다. 원하는 값을 찾을 수 없다. adal = AdalineGD(eta=0.01, n_iter=10).fit(X, y) ax[0].plot(range(1, len(adal.cost_) + 1), np.log10(adal.cost_), marker='o') ax[0].set_xlabel('Epochs') ax[0].set_ylabel('log(SQE)') ax[0].set_title('Adaline - Learning rate 0.01') # learning rate = 0.0001로 두고 아달라인을 수행 # 결과값을 보면 값이 점점 작아져서 J(w)값이 0에 수렴한다. 원하는 값을 찾을 수 있다. adal2 = AdalineGD(eta=0.0001, n_iter=10).fit(X, y) ax[1].plot(range(1, len(adal2.cost_) + 1), np.log10(adal2.cost_), marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('log(SQE)') ax[1].set_title('Adaline - Learning rate 0.0001')
import pandas as pd import numpy as np import matplotlib import matplotlib.pyplot as plt from matplotlib import style from adaline import AdalineGD if __name__ == '__main__': # iris.data 품종 데이터를 읽어오는 부분, perceptron_iris와 동일함 df = pd.read_csv('/Users/narun/Desktop/mylib/hwang/dataSet/iris.data', header = None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) # X의 값을 복사해서 X_std에 저장 # Iris.data에서 꽃받침 길이와 꽃잎 길이의 표준화한 값을 X_std에 할당하는 부분 # numpy의 mean()은 numpy 배열에 있는 값들의 평균을 구한다. # numpy의 std()는 numpy 배열에 있는 값들의 표준편차를 구한다. X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # learning rate를 0.01로, 반복회수를 15로 두고, X_std를 아달라인으로 머신러닝을 수행한다. adal = AdalineGD(eta = 0.01, n_iter = 15).fit(X_std, y) plt.plot(range(1, len(adal.cost_) + 1), adal.cost_, marker = 'o') plt.xlabel('Epochs') plt.ylabel('SSE') plt.title('Adaline Standardized - Learning rate 0.01') plt.show()