def plot_data(self):
        """
		Plot the train data using matplotlib.
		"""
        X_combined = np.vstack((self.train_X, self.test_X))
        y_combined = np.hstack((self.train_y, self.test_y))
        plot_decision_regions(X_combined,
                              y_combined,
                              classifier=self.model,
                              test_idx=range(105, 150))
        plt.xlabel('petal length [cm]')
        plt.ylabel('petal width [cm]')
        plt.legend(loc='upper left')
        plt.show()
Exemple #2
0
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target
print('Class labels:', np.unique(y))

X_train, X_test, y_train, y_test = train_test_split(X, y,
    test_size=0.3,
    random_state=1,
    stratify=y
)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

lr = LogisticRegression(C=100, solver='lbfgs', multi_class='ovr', random_state=1)
lr.fit(X_train_std, y_train)

import matplotlib.pyplot as plt
X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X_combined_std, y_combined, lr, test_idx=range(105, 150))
plt.xlabel('petal length [std]')
plt.ylabel('petal width [std]')
plt.legend(loc='upper left')
plt.show()

y_pred = lr.predict(X_test_std)
print('Accuracy:', accuracy_score(y_test, y_pred))
                                                        y,
                                                        test_size=0.3,
                                                        random_state=0)

    sc = StandardScaler()
    sc.fit(X_train)
    X_train_std = sc.transform(X_train)
    X_test_std = sc.transform(X_test)

    X_combined_std = np.vstack((X_train_std, X_test_std))
    y_combined = np.hstack((y_train, y_test))

    lr = LogisticRegression(C=1000.0, random_state=0)
    lr.fit(X_train_std, y_train)
    utils.plot_decision_regions(X_combined_std,
                                y_combined,
                                classifier=lr,
                                test_idx=range(105, 150))
    plt.xlabel('petal length [standardized')
    plt.ylabel('pegal width [standardized')
    plt.legend(loc='upper left')
    plt.show()

    # 逆正則化パラメータによる変化
    weights, params = [], []
    for c in range(-5, 5):
        lr = LogisticRegression(C=10**c, random_state=0)
        lr.fit(X_train_std, y_train)
        weights.append(lr.coef_[1])
        params.append(10**c)

    weights = np.array(weights)
Exemple #4
0
        cost = 0.5 * error**2
        return cost

    def net_input(self, X):
        return np.dot(X, self.w_[1:]) + self.w_[0]

    def activation(self, X):
        return X

    def predict(self, X):
        return np.where(self.activation(self.net_input(X)) >= 0.0, 1, -1)


if __name__ == '__main__':
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
        header=None)
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:100, [0, 2]].values
    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()
    ada = AdalineSGD(n_iter=15, eta=0.01, random_state=1)
    ada.fit(X_std, y)
    plot_decision_regions(X_std, y, classifier=ada)
    plt.title('Adaline - Stochastic Gradient Descent')
    plt.xlabel('sepal length [standardized]')
    plt.ylabel('petal length [standardized]')
    plt.legend(loc='upper left')
    plt.show()
Exemple #5
0
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.svm import SVC
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import utils

if __name__ == '__main__':

    X_xor, y_xor = utils.generateDataSet()

    # gammaが小さいとトレーニングサンプルの影響力大。決定協会が滑らかに
    svm = SVC(kernel='rbf', gamma=0.10, C=10.0, random_state=0)
    svm.fit(X_xor, y_xor)
    utils.plot_decision_regions(X_xor, y_xor, classifier=svm)
    plt.xlabel('petal length [standardized')
    plt.ylabel('petal width [standardized')
    plt.legend(loc='upper left')
    plt.show()
Exemple #6
0
print("Class labels:", np.unique(y))

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))

lda = LinearDiscriminantAnalysis(n_components=2)
X_train_lda = lda.fit_transform(X_train_std, y_train)
X_combined_lda = lda.transform(X_combined_std)
lr = LogisticRegression()
le = lr.fit(X_train_lda, y_train)
utils.plot_decision_regions(X_combined_lda,
                            y_combined,
                            classifier=lr,
                            test_idx=range(X_train.shape[0],
                                           X_train.shape[0] + X_test.shape[0]))
plt.xlabel('LD 1')
plt.ylabel('LD 2')
plt.legend(loc='lower left')
plt.show()
Exemple #7
0
    import pandas as pd
    df = pd.read_csv('iris.data', header=None)
    # select petal and sepal length
    X = df.iloc[0:100, [0, 1]].values

    # Feature standardization
    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

    # select setosa and versicolor
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    # y = np.where(y == 'Iris-setosa', 0, 1)

    lr = LogisticRegressionSGD(eta=0.05, n_iter=150, random_state=1)
    lr.fit(X_std, y)

    import matplotlib.pyplot as plt
    plt.plot(lr.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Total cost')
    plt.show()

    from utils import plot_decision_regions
    plot_decision_regions(X_std, y, lr)
    plt.xlabel('petal length [cm]')
    plt.ylabel('sepal length [cm]')
    plt.legend(loc='upper left')
    plt.show()
Exemple #8
0
# L2-regularizer lambda=1/C, set to np.inf to get MLE
logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)
errs = (y_pred != y_test)
nerrs = np.sum(errs)
print("Made {} errors out of {}, on instances {}".format(
    nerrs, len(y_pred), np.where(errs)))
# With ndims=2: Made 10 errors out of 50, on instances
#  (array([ 4, 15, 21, 32, 35, 36, 40, 41, 42, 48]),)

from sklearn.metrics import zero_one_loss
err_rate_test = zero_one_loss(y_test, y_pred)
assert np.isclose(err_rate_test, nerrs / len(y_pred))
err_rate_train = zero_one_loss(y_train, logreg.predict(X_train))
print("Error rates on train {:0.3f} and test {:0.3f}".format(
    err_rate_train, err_rate_test))
#Error rates on train 0.180 and test 0.200

if ndims == 2:
    fig, ax = utils.plot_decision_regions(X, y, logreg, iris.target_names)
    ax.set(xlabel='Sepal length')
    ax.set(ylabel='Sepal width')
    utils.save_fig("iris-logistic")
    plt.show()

    # Get predictive distribution for some ambiguous test points
    X = [[5.7, 3.5]]  # (1,2) array
    y_probs = logreg.predict_proba(X)
    print(np.round(y_probs, 2))
y = (data[0]-data[0].mean())/data[0].std()
x = (data[1]-data[1].mean())/data[1].std()

#fit the linear regression prediction line to the graph
params = regression.fit(x,y)

#add the data to the plot
plot.scatter(x[:180],y[:180], s = 15)
prediction = np.matmul(np.array(x[:180]).reshape(-1,1),params[0])+params[1]
plot.plot(x[:180],prediction, color = 'red')

#set plot title and axes
plot.title('Linear Regression: Iris Flower Set')
plot.xlabel('Sepal Length')
plot.ylabel('Sepal Width')
plot.show()

##creating and setting perceptron for classification of data based on x dimension
xPercep = data.iloc[0:180, [1,0]].values
yPercep = data.iloc[0:180, 4].values
yPercep = np.where(yPercep == 'Iris-setosa', -1, 1)

perceptronClassifier.fit(xPercep,yPercep)

stump.fit(xPercep,yPercep)

#plot vc dim classification
utils.plot_decision_regions(xPercep, yPercep, stump, "Stump Decision Classifier, Low VC")

utils.plot_decision_regions(xPercep, yPercep, perceptronClassifier, "Perceptron Classifier on Same Data")
Exemple #10
0
    def predict(self, X):
        """Predict class label(s)"""
        return np.where(self.net_input(X) >= 0, 1, -1)


if __name__ == '__main__':
    import pandas as pd
    df = pd.read_csv('iris.data', header=None)
    # select petal and sepal length
    X = df.iloc[0:100, [0, 2]].values
    # select setosa and versicolor
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)

    perceptron = Perceptron(eta=0.01, n_iter=10, random_state=1)
    perceptron.fit(X, y)

    import matplotlib.pyplot as plt
    plt.plot(perceptron.errors_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Number of updates')
    plt.show()

    from utils import plot_decision_regions
    plot_decision_regions(X, y, perceptron)
    plt.xlabel('petal length [cm]')
    plt.ylabel('sepal length [cm]')
    plt.legend(loc='upper left')
    plt.show()
#set axes info with csv
x = df.iloc[0:100, [0, 2]].values
y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', -1, 1)

#set graph characteristics for sepal/petal
plt.scatter(x[:50, 0], x[:50, 1], color='red', marker='o', label='setosa')
plt.scatter(x[50:100, 0],
            x[50:100, 1],
            color='blue',
            marker='x',
            label='versicolor')
plt.xlabel('sepal length')
plt.ylabel('petal length')
plt.legend(loc='upper left')
plt.show()

#create and set perceptron, fit using x,y
pn = Perceptron(0.1, 10)
pn.fit(x, y)

#set graph characteristics for misclassifications
plt.plot(range(1, len(pn.misclass) + 1), pn.misclass, marker='o')
plt.xlabel('Iteration')
plt.ylabel('# of misclassifications')
plt.show()

#plot decision regions
utils.plot_decision_regions(x, y, pn)
Exemple #12
0
    import pandas as pd
    df = pd.read_csv('iris.data', header=None)
    # select petal and sepal length
    X = df.iloc[0:100, [0, 2]].values

    # Feature standardization
    X_std = np.copy(X)
    X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
    X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

    # select setosa and versicolor
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)

    adaline = AdalineSGD(eta=0.01, n_iter=15, random_state=1)
    adaline.fit(X_std, y)
    adaline.partial_fit(X_std[0, :], y[0])

    import matplotlib.pyplot as plt
    plt.plot(adaline.cost_, marker='o')
    plt.xlabel('Epochs')
    plt.ylabel('Total cost')
    plt.show()

    from utils import plot_decision_regions
    plot_decision_regions(X_std, y, adaline)
    plt.xlabel('petal length [cm]')
    plt.ylabel('sepal length [cm]')
    plt.legend(loc='upper left')
    plt.show()