Ejemplo n.º 1
0
def main():
    data = DataManager.load_data("data/data_banknote_authentication.txt")
    # remove break lines
    data = np.array([item.strip("\n") for item in data])
    data = np.array([item.split(',') for item in data])
    data = data.astype(np.float)

    # 0 for authentic and 1 for inauthentic
    df = pd.DataFrame(data)
    df[4] = df[4].astype(int)
    authentic = df[df[4] == 0]
    inauthentic = df[df[4] == 1]
    X = df.iloc[np.r_[0:200, 1100:1300], [0, 3]].values
    y = [0 if x < 200 else 1 for x in range(400)]
    plt.scatter(X[:200, 0],
                X[:200, 1],
                color='red',
                marker='o',
                label='authentic')
    plt.scatter(X[200:400, 0],
                X[200:400, 1],
                color='blue',
                marker='x',
                label='inauthentic')
    plt.xlabel('variance of Wavelet Transformed image')
    plt.ylabel('entropy of image')
    plt.legend(loc='upper left')
    plt.show()

    ppn = Perceptron(eta=0.1, n_iter=10)
    ppn.fit(X, y)

    plot_decision_regions(X, y, clasiifier=ppn)
    plt.xlabel('variance of Wavelet Transformed image')
    plt.ylabel('entropy of image')
    plt.legend(loc='upper left')
    plt.show()

    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

    ada = AdalineGD(n_iter=10, eta=0.01)
    ada.fit(X_std, y)
    plot_decision_regions(X_std, y, clasiifier=ada)
    plt.title('Adaline - gradient descent')
    plt.xlabel('variance of Wavelet Transformed image')
    plt.ylabel('entropy of image')
    plt.legend(loc='upper left')
    plt.show()
Ejemplo n.º 2
0
    def test_tashizan(self):
        """test method for adaline
        """
        # 初期化
        expected = np.array([1.59872116e-16, -1.26256159e-01, 1.10479201e+00])

        df = pd.read_csv('../tests/data/iris.data', header=None)
        y = df.iloc[0:100, 4].values
        y = np.where(y == 'Iris-setosa', -1, 1)
        X = df.iloc[0:100, [0, 2]].values
        X_std = np.copy(X)
        X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
        X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

        # テスト実行
        ad = AdalineGD(n_iter=15, eta=0.01)
        ad.fit(X_std, y)
        actual = ad.w_

        # Assert
        np.testing.assert_array_almost_equal(expected, actual, 2)
Ejemplo n.º 3
0
def main():
    # prepare training data and target variable
    features = ['sepal length (cm)', 'petal length (cm)']
    labels = ['setosa', 'versicolor']
    D = IrisData(features, labels)
    X = D.X
    y = np.where(D.y == 0, -1, 1)

    # standardize training data
    X_std = np.copy(X)
    for i in range(len(labels)):
        X_std[:, i] = (X[:, i] - X[:, i].mean()) / X[:, i].std()

    # fit classifiers
    classifiers = [
        AdalineGD(eta=0.01, n_iter=10).fit(X, y),
        AdalineGD(eta=0.0001, n_iter=10).fit(X, y),
        AdalineGD(eta=0.01, n_iter=15).fit(X_std, y),
        AdalineSGD(eta=0.01, n_iter=15).fit(X_std, y)
    ]

    # show history of costs
    for classifier in classifiers:
        plot_update_history(classifier)

    # show decision regions
    plot_decision_regions(X_std,
                          y,
                          classifier=classifiers[2],
                          xlabel='sepal length [standardized]',
                          ylabel='petal lnegth [standardized]')
    plot_decision_regions(X_std,
                          y,
                          classifier=classifiers[3],
                          xlabel='sepal length [standardized]',
                          ylabel='petal lnegth [standardized]')
Ejemplo n.º 4
0
    xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution),
                           np.arange(x2_min, x2_max, resolution))
    Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T)
    Z = Z.reshape(xx1.shape)
    plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap)
    plt.xlim(xx1.min(), xx1.max())
    plt.ylim(xx2.min(), xx2.max())

    # plot class samples
    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0],
                    y=X[y == cl, 1],
                    alpha=0.8,
                    c=cmap(idx),
                    marker=markers[idx],
                    label=cl)


ada = AdalineGD(n_iter=15, eta=0.01)
ada.fit(X_std, y)
plot_decision_regions(X_std, y, classifier=ada)
plt.title('Adaline - Gradient Descent')
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.legend(loc='upper left')
plt.show()
plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Sum-squared-error')
plt.show()
Ejemplo n.º 5
0
# Data Preparation
df = pd.read_csv('raschka/data/iris.data', header=None)
y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', -1, 1)
X = df.iloc[0:100, [0, 2]].values

# 標準化(Zvalue)
X_std = np.copy(X)

X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(14, 4))

ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X_std, y)
ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(Sum-squared-error)')
ax[0].set_title("Adaline - Learning rate 0.01")
ax[0].set_xlim([0, 16])

ada2 = AdalineGD(n_iter=10, eta=0.001).fit(X_std, y)
ax[1].plot(range(1, len(ada2.cost_) + 1), np.log10(ada2.cost_), marker='o')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Sum-squared-error')
ax[1].set_title('Adaline - Learning rate 0.001')
ax[1].set_xlim([0, 16])

ada3 = AdalineGD(n_iter=10, eta=0.0001).fit(X_std, y)
ax[2].plot(range(1, len(ada3.cost_) + 1), np.log10(ada3.cost_), marker='o')
Ejemplo n.º 6
0
def Test():
    # get data
    ml_data = Data()
    ml_data.init_data()
    ml_data.view_initial_data()

    # test perceptron
    ppn = Perceptron(eta=0.1, n_iter=10)
    ppn.fit(ml_data.X, ml_data.y)
    plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Number of updates')
    plt.show()

    plot = ml_data.plot_decision_regions(ml_data.X, ml_data.y, classifier=ppn)
    plot.xlabel('sepal length')
    plot.ylabel('petal length')
    plot.legend(loc='upper left')
    plot.show()

    # Adaline implementation
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
    ada1 = AdalineGD(n_iter=10, eta=0.1).fit(ml_data.X, ml_data.y)
    ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o')
    ax[0].set_xlabel('Epoch')
    ax[0].set_ylabel('log(Sum Squared error)')
    ax[0].set_title('Adaline - Learning rate 0.01')
    ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(ml_data.X, ml_data.y)
    ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o')
    ax[1].set_xlabel('Epoch')
    ax[1].set_ylabel('Sum Squared error')
    ax[1].set_title('Adaline - Learning rate 0.0001')
    plt.show()

    # Adaline with feature scaling
    ml_data.standard_scaled()

    ada = AdalineGD(n_iter=10, eta=0.01)
    ada.fit(ml_data.X_std, ml_data.y)
    plot = ml_data.plot_decision_regions(ml_data.X_std,
                                         ml_data.y,
                                         classifier=ada)
    plot.title('Adaline - Gradinet Descent')
    plot.xlabel('sepal length')
    plot.ylabel('petal length')
    plot.legend(loc='upper left')
    plot.tight_layout()
    plot.show()

    plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Sum Squared error')
    plt.show()

    # Adaline with stochastic gradient
    ada3 = AdalineGD(n_iter=15, eta=0.01, random_state=1)
    ada3.fit(ml_data.X_std, ml_data.y)

    plot = ml_data.plot_decision_regions(ml_data.X_std,
                                         ml_data.y,
                                         classifier=ada3)
    plot.title('Adaline - Stochastic Gradient Descent')
    plot.xlabel('sepal length')
    plot.ylabel('petal length')
    plot.legend(loc='upper left')
    plot.show()

    plt.plot(range(1, len(ada3.cost_) + 1), ada3.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Average cost')
    plt.show()
Ejemplo n.º 7
0
# only select the setosa and versicolor species
data = data.loc[(data["Class label"] == "Iris-setosa") |
                (data["Class label"] == "Iris-versicolor")]

# convert species labels to integers, -1 for setosa and 1 for versicolor
data["Class label"] = np.where(data["Class label"]=="Iris-setosa", -1, 1)

# split data into feature matrix (only "Sepal length" and "Petal length" are
# used) and target vector
X = data[["Sepal length", "Petal length"]]
y = data["Class label"]

# create a plot of the cost function against the iteration number for learning
# rates of 0.01 and 0.0001
iris_ada1 = AdalineGD(n_iterations=10, eta=0.01)
iris_ada1.fit(X.values, y.values)

iris_ada2 = AdalineGD(n_iterations=10, eta=0.0001)
iris_ada2.fit(X.values, y.values)

fig1, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6))

ax[0].plot(range(1, iris_ada1.n_iterations + 1), np.log10(iris_ada1.cost_),
           marker="o")
ax[0].set(xlabel="Iterations", ylabel="log(SSE)",
          title="Adaline - Learning rate 0.01")

ax[1].plot(range(1, iris_ada2.n_iterations + 1), iris_ada2.cost_,
           marker="o")
ax[1].set(xlabel="Iterations", ylabel="SSE",
Ejemplo n.º 8
0
import numpy as np
from adaline import AdalineGD

agd = AdalineGD(eta=0.001)

X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [0, 1, 1], [7, 8, 8], [7, 7,
                                                                      7]])
Y = np.array([-1, -1, 1, -1, 1, 1])

agd.fit(X, Y)

print(agd.predict(X))
import numpy as np

from mlxtend.plotting import plot_decision_regions

from adaline import AdalineGD

dataset = pd.read_csv('iris.csv', header=None)
# print (dataset.tail())

output = dataset.iloc[0:100, 4].values

Y = np.where(output == 'Iris-setosa', -1, 1)
X = dataset.iloc[0:100, [0, 2]].values

X_std = np.copy(X)
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

obj = AdalineGD(epochs=15, learning_rate=0.01)
obj.fit(X_std, Y)
plot_decision_regions(X_std, Y, clf=obj)
plt.title('Adaline - Gradient Descent')
plt.xlabel('sepal length [standardized]')
plt.ylabel('petal length [standardized]')
plt.legend(loc='upper left')
plt.show()

plt.plot(range(1, len(obj.cost_list) + 1), obj.cost_list, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Sum-squared-error')
plt.show()
import matplotlib.pyplot as plt
import numpy as np

from adaline import AdalineGD

dataset = pd.read_csv('iris.csv', header=None)
# print (dataset.tail())

output = dataset.iloc[0:100, 4].values

Y = np.where(output == 'Iris-setosa', -1, 1)
X = dataset.iloc[0:100, [0, 2]].values

fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))

obj1 = AdalineGD(epochs=10, learning_rate=0.01).fit(X, Y)
ax[0].plot(range(1,
                 len(obj1.cost_list) + 1),
           np.log10(obj1.cost_list),
           marker='o')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(Sum-squared-errors)')
ax[0].set_title('Adaline - Learning rate 0.01')

obj2 = AdalineGD(epochs=10, learning_rate=0.0001).fit(X, Y)
ax[1].plot(range(1, len(obj2.cost_list) + 1), obj2.cost_list, marker='o')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Sum-squared-errors')
ax[1].set_title('Adaline - Learning rate 0.0001')

plt.show()
Ejemplo n.º 11
0
machine-learning-databases/iris/iris.data'

df = pd.read_csv(s, header=None, encoding='utf-8')

# 1〜100行目の目的変数の抽出と変換
y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', -1, 1)

# 1〜100行目の1、3列目(今回使うデータ)を抽出
X = df.iloc[0:100, [0, 2]].values

# 描画領域を1行2列に分割
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))

# 勾配降下法によるADALINEの学習(学習率 = 0.01)
ada1 = AdalineGD(n_iter=10, eta=0.01).fit(X, y)
ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(Sum-squared-error)')
ax[0].set_title('Adaline - Learning rate 0.01')

# 勾配降下法によるADALINEの学習(学習率 = 0.0001)
ada2 = AdalineGD(n_iter=10, eta=0.0001).fit(X, y)
ax[1].plot(range(1, len(ada2.cost_) + 1), ada2.cost_, marker='o')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('Sum-squared-error')
ax[1].set_title('Adaline - Learning rate 0.0001')

plt.show()

# データのスケーリング(標準化)を行う
Ejemplo n.º 12
0
plt.clf()
plot_decision_regions(X2, y2, clf=ppn)
plt.show()

plt.clf()
plt.plot(range(1, len(ppn.errors_)+1), ppn.errors_, marker='o')
plt.xlabel('Iterations')
plt.ylabel('Missclassifications')
plt.show()


########### Adaline ###########
from adaline import AdalineGD

ada = AdalineGD(epochs=10, eta=0.01).train(X, y)
plt.clf()
plt.plot(range(1, len(ada.cost_)+1), np.log10(ada.cost_), marker='o')
plt.xlabel('Iterations')
plt.ylabel('log(Sum-squared-error)')
plt.title('Adaline - Learning rate 0.01')
plt.show()

ada = AdalineGD(epochs=10, eta=0.001).train(X, y)
plt.clf()
plt.plot(range(1, len(ada.cost_)+1), ada.cost_, marker='o')
plt.xlabel('Iterations')
plt.ylabel('Sum-squared-error')
plt.title('Adaline - Learning rate 0.0001')
plt.show()
Ejemplo n.º 13
0
    plt.contourf(xx, yy, Z, alpha=0.3, cmap=cmap)
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())

    for idx, cl in enumerate(np.unique(y)):
        plt.scatter(x=X[y == cl, 0],
                    y=X[y == cl, 1],
                    alpha=0.8,
                    c=colors[idx],
                    marker=markers[idx],
                    label=cl,
                    edgecolor="black")


fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
ada1 = AdalineGD(eta=0.01, n_itr=10).fit(X, y)
ax[0].plot(range(1, len(ada1.cost_) + 1), np.log10(ada1.cost_), marker='o')
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('log(sum_squarred error)')
ax[0].set_title('Adaline learning rate 0.01')

ada2 = AdalineGD(eta=0.00001, n_itr=10).fit(X, y)
ax[1].plot(range(1, len(ada2.cost_) + 1), np.log10(ada2.cost_), marker='o')
ax[1].set_xlabel('Epochs')
ax[1].set_ylabel('log(sum squarred errors)')
ax[1].set_title('Adaline learning rate 0.00001')
plt.show()

X_std = np.copy(X)
X_std[:, 0] = (X_std[:, 0] - X_std[:, 0].mean()) / X_std[:, 0].std()
X_std[:, 1] = (X_std[:, 1] - X_std[:, 1].mean()) / X_std[:, 1].std()
from matplotlib import style
from adaline import AdalineGD

if __name__ == '__main__':
    # iris.data 품종 데이터를 읽어오는 부분, perceptron_iris와 동일함
    df = pd.read_csv('/Users/narun/Desktop/mylib/hwang/dataSet/iris.data',
                     header=None)
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:100, [0, 2]].values

    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(8, 4))

    # learning rate = 0.01로 두고 아달라인을 수행
    # 결과값을 보면 값이 점점 커져서 J(w)값이 발산해버린다. 원하는 값을 찾을 수 없다.
    adal = AdalineGD(eta=0.01, n_iter=10).fit(X, y)
    ax[0].plot(range(1, len(adal.cost_) + 1), np.log10(adal.cost_), marker='o')
    ax[0].set_xlabel('Epochs')
    ax[0].set_ylabel('log(SQE)')
    ax[0].set_title('Adaline - Learning rate 0.01')

    # learning rate = 0.0001로 두고 아달라인을 수행
    # 결과값을 보면 값이 점점 작아져서 J(w)값이 0에 수렴한다. 원하는 값을 찾을 수 있다.
    adal2 = AdalineGD(eta=0.0001, n_iter=10).fit(X, y)
    ax[1].plot(range(1,
                     len(adal2.cost_) + 1),
               np.log10(adal2.cost_),
               marker='o')
    ax[1].set_xlabel('Epochs')
    ax[1].set_ylabel('log(SQE)')
    ax[1].set_title('Adaline - Learning rate 0.0001')
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import style
from adaline import AdalineGD

if __name__ == '__main__':
    # iris.data 품종 데이터를 읽어오는 부분, perceptron_iris와 동일함
    df = pd.read_csv('/Users/narun/Desktop/mylib/hwang/dataSet/iris.data', header = None)
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:100, [0, 2]].values

    X_std = np.copy(X) # X의 값을 복사해서 X_std에 저장
    # Iris.data에서 꽃받침 길이와 꽃잎 길이의 표준화한 값을 X_std에 할당하는 부분
    # numpy의 mean()은 numpy 배열에 있는 값들의 평균을 구한다.
    # numpy의 std()는 numpy 배열에 있는 값들의 표준편차를 구한다.
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

    # learning rate를 0.01로, 반복회수를 15로 두고, X_std를 아달라인으로 머신러닝을 수행한다.
    adal = AdalineGD(eta = 0.01, n_iter = 15).fit(X_std, y)
    plt.plot(range(1, len(adal.cost_) + 1), adal.cost_, marker = 'o')
    plt.xlabel('Epochs')
    plt.ylabel('SSE')
    plt.title('Adaline Standardized - Learning rate 0.01')
    plt.show()