def main(): data = DataManager.load_data("data/data_banknote_authentication.txt") # remove break lines data = np.array([item.strip("\n") for item in data]) data = np.array([item.split(',') for item in data]) data = data.astype(np.float) # 0 for authentic and 1 for inauthentic df = pd.DataFrame(data) df[4] = df[4].astype(int) authentic = df[df[4] == 0] inauthentic = df[df[4] == 1] X = df.iloc[np.r_[0:200, 1100:1300], [0, 3]].values y = [0 if x < 200 else 1 for x in range(400)] plt.scatter(X[:200, 0], X[:200, 1], color='red', marker='o', label='authentic') plt.scatter(X[200:400, 0], X[200:400, 1], color='blue', marker='x', label='inauthentic') plt.xlabel('variance of Wavelet Transformed image') plt.ylabel('entropy of image') plt.legend(loc='upper left') plt.show() ppn = Perceptron(eta=0.1, n_iter=10) ppn.fit(X, y) plot_decision_regions(X, y, clasiifier=ppn) plt.xlabel('variance of Wavelet Transformed image') plt.ylabel('entropy of image') plt.legend(loc='upper left') plt.show() X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() ada = AdalineGD(n_iter=10, eta=0.01) ada.fit(X_std, y) plot_decision_regions(X_std, y, clasiifier=ada) plt.title('Adaline - gradient descent') plt.xlabel('variance of Wavelet Transformed image') plt.ylabel('entropy of image') plt.legend(loc='upper left') plt.show()
def test_tashizan(self): """test method for adaline """ # 初期化 expected = np.array([1.59872116e-16, -1.26256159e-01, 1.10479201e+00]) df = pd.read_csv('../tests/data/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # テスト実行 ad = AdalineGD(n_iter=15, eta=0.01) ad.fit(X_std, y) actual = ad.w_ # Assert np.testing.assert_array_almost_equal(expected, actual, 2)
xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) # plot class samples for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl) ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(X_std, y) plot_decision_regions(X_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
def Test(): # get data ml_data = Data() ml_data.init_data() ml_data.view_initial_data() # test perceptron ppn = Perceptron(eta=0.1, n_iter=10) ppn.fit(ml_data.X, ml_data.y) plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o') plt.xlabel('Epochs') plt.ylabel('Number of updates') plt.show() plot = ml_data.plot_decision_regions(ml_data.X, ml_data.y, classifier=ppn) plot.xlabel('sepal length') plot.ylabel('petal length') plot.legend(loc='upper left') plot.show() # Adaline implementation fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4)) ada1 = AdalineGD(n_iter=10, eta=0.1).fit(ml_data.X, ml_data.y) ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o') ax[0].set_xlabel('Epoch') ax[0].set_ylabel('log(Sum Squared error)') ax[0].set_title('Adaline - Learning rate 0.01') ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(ml_data.X, ml_data.y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epoch') ax[1].set_ylabel('Sum Squared error') ax[1].set_title('Adaline - Learning rate 0.0001') plt.show() # Adaline with feature scaling ml_data.standard_scaled() ada = AdalineGD(n_iter=10, eta=0.01) ada.fit(ml_data.X_std, ml_data.y) plot = ml_data.plot_decision_regions(ml_data.X_std, ml_data.y, classifier=ada) plot.title('Adaline - Gradinet Descent') plot.xlabel('sepal length') plot.ylabel('petal length') plot.legend(loc='upper left') plot.tight_layout() plot.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum Squared error') plt.show() # Adaline with stochastic gradient ada3 = AdalineGD(n_iter=15, eta=0.01, random_state=1) ada3.fit(ml_data.X_std, ml_data.y) plot = ml_data.plot_decision_regions(ml_data.X_std, ml_data.y, classifier=ada3) plot.title('Adaline - Stochastic Gradient Descent') plot.xlabel('sepal length') plot.ylabel('petal length') plot.legend(loc='upper left') plot.show() plt.plot(range(1, len(ada3.cost_) + 1), ada3.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Average cost') plt.show()
#ada1 = AdalineGD(n_iter=10, eta=0.01).fit(x,y) #ax[0].plot(range(1, len(ada1.cost_)+1),np.log10(ada1.cost_), marker = 'o') # ax[0].set_xlabel('Epochs') # ax[0].set_ylabel('log(Sum-squared-error)') #ax[0].set_title('Adaline - Learning Rate 0.01') #ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(x,y) #ax[1].plot(range(1, len(ada2.cost_)+1), ada2.cost_, marker='o') # ax[1].set_xlabel('Epochs') # ax[1].set_ylabel('Sum-squared-error') #ax[1].set_title('Adaline - Learning rate 0.0001') # standardizing instead x_std = np.copy(x) x_std[:, 0] = (x[:, 0] - x[:, 0].mean()) / x[:, 0].std() x_std[:, 1] = (x[:, 1] - x[:, 1].mean()) / x[:, 1].std() ada = AdalineGD(n_iter=15, eta=0.01) ada.fit(x_std, y) plot_decision_regions(x_std, y, classifier=ada) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
# only select the setosa and versicolor species data = data.loc[(data["Class label"] == "Iris-setosa") | (data["Class label"] == "Iris-versicolor")] # convert species labels to integers, -1 for setosa and 1 for versicolor data["Class label"] = np.where(data["Class label"]=="Iris-setosa", -1, 1) # split data into feature matrix (only "Sepal length" and "Petal length" are # used) and target vector X = data[["Sepal length", "Petal length"]] y = data["Class label"] # create a plot of the cost function against the iteration number for learning # rates of 0.01 and 0.0001 iris_ada1 = AdalineGD(n_iterations=10, eta=0.01) iris_ada1.fit(X.values, y.values) iris_ada2 = AdalineGD(n_iterations=10, eta=0.0001) iris_ada2.fit(X.values, y.values) fig1, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6)) ax[0].plot(range(1, iris_ada1.n_iterations + 1), np.log10(iris_ada1.cost_), marker="o") ax[0].set(xlabel="Iterations", ylabel="log(SSE)", title="Adaline - Learning rate 0.01") ax[1].plot(range(1, iris_ada2.n_iterations + 1), iris_ada2.cost_, marker="o") ax[1].set(xlabel="Iterations", ylabel="SSE", title="Adaline - Learning rate 0.0001")
import numpy as np from adaline import AdalineGD agd = AdalineGD(eta=0.001) X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [0, 1, 1], [7, 8, 8], [7, 7, 7]]) Y = np.array([-1, -1, 1, -1, 1, 1]) agd.fit(X, Y) print(agd.predict(X))
import numpy as np from mlxtend.plotting import plot_decision_regions from adaline import AdalineGD dataset = pd.read_csv('iris.csv', header=None) # print (dataset.tail()) output = dataset.iloc[0:100, 4].values Y = np.where(output == 'Iris-setosa', -1, 1) X = dataset.iloc[0:100, [0, 2]].values X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() obj = AdalineGD(epochs=15, learning_rate=0.01) obj.fit(X_std, Y) plot_decision_regions(X_std, Y, clf=obj) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.show() plt.plot(range(1, len(obj.cost_list) + 1), obj.cost_list, marker='o') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error') plt.show()
ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y) ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o') ax[1].set_xlabel('Epochs') ax[1].set_ylabel('Sum-squared-error') ax[1].set_title('Adaline - Learning rate 0.0001') plt.show() # データのスケーリング(標準化)を行う X_std = np.copy(X) X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std() X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std() # 再度学習率(0.01)で実行 ada_gd = AdalineGD(n_iter=15, eta=0.01) ada_gd.fit(X_std, y) # 境界領域のプロット plot_dicision_regions(X_std, y, classifier=ada_gd) plt.title('Adaline - Gradient Descent') plt.xlabel('sepal length [standardized]') plt.ylabel('petal length [standardized]') plt.legend(loc='upper left') plt.tight_layout() plt.show() # エポック数とコストの関係 plt.plot(range(1, len(ada_gd.cost_) + 1), ada_gd.cost_, marker='o') plt.title('Adaline - Learning2 rate 0.01') plt.xlabel('Epochs') plt.ylabel('Sum-squared-error')