Example #1
0
def test_knn():
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
        header=None)
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    y = np.random.randint(2, size=100)
    x = df.iloc[0:100, [0, 2]].values
    print("Testing 2-D Iris data set with only one neighbor...")
    neighbor = KNN(k=1)
    neighbor.fit(x, y)
    neighbor.plot(x, y)
    print("Testing Iris data set with 15 neighbor...")
    iris = datasets.load_iris()
    x = iris.data[:, :2]
    y = iris.target
    neighbor = KNN(15)
    neighbor.fit(x, y)
    y_pred = neighbor.predict(x)
    neighbor.accuracy(y_pred, y)
    neighbor.plot(x, y)
    print(
        "Adding new point to dataset and testing with full Iris 1-k data set..."
    )
    neighbor = KNN(1)
    neighbor.fit(x, y)
    y2 = np.array([1])
    y2 = np.append(y, y2)
    x2 = np.vstack([x, [5.0, 3.2]])
    neighbor.plot(x2, y2)
    print("Testing SKLearn's model...")
    clf = neighbors.KNeighborsClassifier(1)
    clf.fit(x, y)
    plot_decision_regions(x2, y2, clf)
Example #2
0
def test_perceptron():
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
        header=None)
    print("Creating a two-feature data set")
    y = df.iloc[0:100, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:100, [0, 2]].values
    print("Creating Perceptron")
    pn = Perceptron(0.1, 10)
    print("Perceptron created")
    pn.fit(X, y)
    print("Perceptron fitted")
    print("Error List")
    print(pn.errors)
    print("Weight vector")
    print(pn.weight)
    print("Using plot_decision_regions function")
    plot_decision_regions(X, y, pn, 0.02)
    pn.plot(X, y)
    plt.xlabel('sepal length [cm]')
    plt.ylabel('petal length [cm]')
    plt.title('Petal Length vs Sepal Length')

    print("Creating a three-feature data set")
    y = df.iloc[0:150, 4].values
    y = np.where(y == 'Iris-setosa', -1, 1)
    X = df.iloc[0:150, [0, 1, 2]].values
    print("Creating Perceptron")
    pn1 = Perceptron(0.1, 10)
    print("Perceptron created")
    pn1.fit(X, y)
    print("Perceptron fitted")
    print("Error List")
    print(pn1.errors)
    print("Weight vector")
    print(pn1.weight)

    print(
        "Creating a perceptron that does not have enough iterations to learn the three-feature data set"
    )
    pn2 = Perceptron(0.1, 4)
    print("Perceptron created")
    pn2.fit(X, y)
    print("Perceptron fitted")
    print("Error List")
    print(pn2.errors)
    print("Weight vector")
    print(pn2.weight)
Example #3
0
 def plot(self, X, y):
     plot_decision_regions(X, y, self)
def main():
    # IRIS DATASET #############################################################
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
        header=None)

    # Extract the first 100 labels
    y = df.iloc[0:100, 4].values

    # Convert the labels to either 1 or 0
    y = np.where(y == 'Iris-setosa', 0, 1)

    # Extract features from dataset [sepal_length, petal_length]
    X = df.iloc[0:100, [0, 2]].values

    # plot variables
    title = 'Iris Dataset'
    xlabel = 'Sepal Length [cm]'
    ylabel = 'Petal Length [cm]'

    # Plot what we have so far
    # Plot labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # Plot the setosa data
    plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa')
    # Plot the versicolor data
    plt.scatter(X[50:100, 0],
                X[50:100, 1],
                color='blue',
                marker='x',
                label='versicolor')
    # Setup the plot legend
    plt.legend(loc='upper left')

    # Display the plot
    plt.show()

    # scikit-learn logistic regression
    skLR = skLogisticRegression()
    skLR.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          skLR,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title +
                          "\nscikit-learn Logistic Regression Model")
    print(title + "\nscikit-learn Logistic Regression Model")
    print(classification_report(y, skLR.predict(X)))

    # ML.py logistic regression
    mlLR = LogisticRegression(learning_rate=0.05, iterations=25)
    mlLR.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          mlLR,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title + "\nML.py Logistic Regression Model")
    print(title + "\nML.py Logistic Regression Model")
    print(classification_report(y, mlLR.predict(X)))

    # FISH DATASET #############################################################
    df = pd.read_csv('./Fish.csv')
    df = df.drop(df.index[0:61])  # Parkki rows
    df = df.drop(df.index[11:84])  # Smelt rows

    # Extract the data for Parkki and Smelt fish
    y = df.iloc[:, 0].values

    # Convert the labels to either 0 or 1
    y = np.where(y == 'Parkki', 0, 1)

    # Extract features from dataset [weight, length]
    X = df.iloc[:, [2, 1]].values

    # plot variables
    title = 'Fish Dataset'
    xlabel = 'Length [cm]'
    ylabel = 'Weight [g]'

    # Plot what we have so far
    # Plot labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # Plot the Parkki data
    plt.scatter(X[:11, 0], X[:11, 1], color='red', marker='o', label='Parkki')
    # Plot the Smelt data
    plt.scatter(X[11:, 0], X[11:, 1], color='blue', marker='x', label='Smelt')
    # Setup the plot legend
    plt.legend(loc='upper left')

    # Display the plot
    plt.show()

    # scikit-learn logistic regression
    skLR = skLogisticRegression()
    skLR.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          skLR,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title +
                          "\nscikit-learn Logistic Regression Model")
    print(title + "\nscikit-learn Logistic Regression Model")
    print(classification_report(y, skLR.predict(X)))

    # ML.py logistic regression
    mlLR = LogisticRegression(learning_rate=0.01, iterations=10)
    mlLR.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          mlLR,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title + "\nML.py Logistic Regression Model")
    print(title + "\nML.py Logistic Regression Model")
    print(classification_report(y, mlLR.predict(X)))
Example #5
0
def main():
    # IRIS DATASET #####################################################################################################
    df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)

    # Extract the first 100 labels
    Y = df.iloc[0:100, 4].values

    # Convert the labels to either 1 or -1
    Y = np.where(Y == 'Iris-setosa', -1, 1)

    # Extract features from dataset [sepal_length, petal_length]
    X = df.iloc[0:100, [0, 2]].values

    # plot variables
    title = 'Iris Dataset'
    xlabel = 'Sepal Length [cm]'
    ylabel = 'Petal Length [cm]'

    # Plot what we have so far
    # Plot labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # Plot the setosa data
    plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa')
    # Plot the versicolor data
    plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor')
    # Setup the plot legend
    plt.legend(loc='upper left')

    # Display the plot
    plt.show()

    # scikit-learn AdaBoost With scikit-learn Pre-built Decision Stump
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2),
                               algorithm='SAMME', n_estimators=10, learning_rate=1.0)
    boost.fit(X, Y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X, Y, boost, 0.01, x_label=xlabel, y_label=ylabel,
                          title=title + "\nscikit-learn Decision Stump Classifier")
    print("scikit-learn AdaBoost With scikit-learn Pre-Built Decision Stump")
    print("Score:", boost.score(X, Y))

    # scikit-learn AdaBoost With Custom Decision Stump
    boost = AdaBoostClassifier(base_estimator=AxisAlignedRectangles(),
                               algorithm='SAMME', n_estimators=10, learning_rate=1.0)
    boost.fit(X, Y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X, Y, boost, 0.01, x_label=xlabel, y_label=ylabel,
                          title=title + "\nML.py Axis Aligned Rectangle Classifier")
    print("scikit-learn AdaBoost With ML.py Axis Aligned Rectangle Classifier")
    print("Score:", boost.score(X, Y))
    ####################################################################################################################

    # FISH DATASET 1 ###################################################################################################
    df = pd.read_csv('./Fish.csv')
    df = df.drop(df.index[0:61])        # Parkki rows
    df = df.drop(df.index[11:84])       # Smelt rows

    # Extract the data for Parkki and Smelt fish
    Y = df.iloc[:, 0].values

    # Convert the labels to either 1 or -1
    Y = np.where(Y == 'Parkki', -1, 1)

    # Extract features from dataset [weight, length]
    X = df.iloc[:, [2, 1]].values

    # plot variables
    title = 'Fish Dataset'
    xlabel = 'Length [cm]'
    ylabel = 'Weight [g]'

    # Plot what we have so far
    # Plot labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # Plot the Parkki data
    plt.scatter(X[:11, 0], X[:11, 1], color='red', marker='o', label='Parkki')
    # Plot the Smelt data
    plt.scatter(X[11:, 0], X[11:, 1], color='blue', marker='x', label='Smelt')
    # Setup the plot legend
    plt.legend(loc='upper left')

    # Display the plot
    plt.show()

    # scikit-learn AdaBoost With scikit-learn Pre-built Decision Stump
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2),
                               algorithm='SAMME', n_estimators=10, learning_rate=1.0)
    boost.fit(X, Y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X, Y, boost, x_label=xlabel, y_label=ylabel,
                          title=title + "\nscikit-learn Decision Stump Classifier")
    print("\nscikit-learn AdaBoost With scikit-learn Pre-Built Decision Stump")
    print("Score:", boost.score(X, Y))

    # scikit-learn AdaBoost With Custom Decision Stump
    boost = AdaBoostClassifier(base_estimator=AxisAlignedRectangles(),
                               algorithm='SAMME', n_estimators=10, learning_rate=1.0)
    boost.fit(X, Y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X, Y, boost, 0.1, x_label=xlabel, y_label=ylabel,
                          title=title + "\nML.py Axis Aligned Rectangle Classifier")
    print("scikit-learn AdaBoost With ML.py Axis Aligned Rectangle Classifier")
    print("Score:", boost.score(X, Y))
    ###################################################################################################################

    # FISH DATASET 2 ###################################################################################################
    df = pd.read_csv('./Fish.csv')

    # Extract the data for Bream and Roach fish
    Y = df.iloc[:55, 0].values

    # Convert the labels to either 1 or -1
    Y = np.where(Y == 'Bream', -1, 1)

    # Extract features from dataset [weight, length]
    X = df.iloc[:55, [2, 1]].values

    # plot variables
    title = 'Fish Dataset'
    xlabel = 'Length [cm]'
    ylabel = 'Weight [g]'

    # Plot what we have so far
    # Plot labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # Plot the Bream data
    plt.scatter(X[:35, 0], X[:35, 1], color='red', marker='o', label='Bream')
    # Plot the Roach data
    plt.scatter(X[35:, 0], X[35:, 1], color='blue', marker='x', label='Roach')
    # Setup the plot legend
    plt.legend(loc='upper left')

    # Display the plot
    plt.show()

    # scikit-learn AdaBoost With scikit-learn Pre-built Decision Stump
    boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2),
                               algorithm='SAMME', n_estimators=10, learning_rate=1.0)
    boost.fit(X, Y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X, Y, boost, resolution=0.1, x_label=xlabel, y_label=ylabel,
                          title=title + "\nscikit-learn Decision Stump Classifier")
    print("\nscikit-learn AdaBoost With scikit-learn Pre-Built Decision Stump")
    print("Score:", boost.score(X, Y))

    # scikit-learn AdaBoost With Custom Decision Stump
    boost = AdaBoostClassifier(base_estimator=AxisAlignedRectangles(),
                               algorithm='SAMME', n_estimators=10, learning_rate=1.0)
    boost.fit(X, Y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X, Y, boost, 0.1, x_label=xlabel, y_label=ylabel,
                          title=title + "\nML.py Axis Aligned Rectangle Classifier")
    print("scikit-learn AdaBoost With ML.py Axis Aligned Rectangle Classifier")
    print("Score:", boost.score(X, Y))
Example #6
0
def main():
    # IRIS DATASET #############################################################
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
        header=None)

    # Extract the first 100 labels
    y = df.iloc[0:100, 4].values

    # Convert the labels to either 1 or 0
    y = np.where(y == 'Iris-setosa', 0, 1)

    # Extract features from dataset [sepal_length, petal_length]
    X = df.iloc[0:100, [0, 2]].values

    # plot variables
    title = 'Iris Dataset'
    xlabel = 'Sepal Length [cm]'
    ylabel = 'Petal Length [cm]'

    # Plot what we have so far
    # Plot labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # Plot the setosa data
    plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa')
    # Plot the versicolor data
    plt.scatter(X[50:100, 0],
                X[50:100, 1],
                color='blue',
                marker='x',
                label='versicolor')
    # Setup the plot legend
    plt.legend(loc='upper left')

    # Display the plot
    plt.show()

    # scikit-learn k-Nearest Neighbors
    k = 1
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          neigh,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title +
                          "\nscikit-learn k-Nearest Neighbors\nk = " + str(k))
    print(title + "\nscikit-learn k-Nearest Neighbors\nk = " + str(k))
    print(classification_report(y, neigh.predict(X)))

    # ML.py k-Nearest Neighbors
    k = 1  # NOTE: the ML.py k-Nearest Neighbors class currently only supports k=1
    knn = NearestNeighbors(k)
    knn.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          knn,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title + "\nML.py k-Nearest Neighbors\nk = " +
                          str(k))
    print(title + "\nML.py k-Nearest Neighbors\nk = " + str(k))
    print(classification_report(y, knn.predict(X)))
    ############################################################################

    # IRIS DATASET 2 ###########################################################
    df = pd.read_csv(
        'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data',
        header=None)

    # Extract 100 labels
    y = df.iloc[50:150, 4].values

    # Convert the labels to either 1 or 0
    y = np.where(y == 'Iris-versicolor', 0, 1)

    # Extract features from dataset [sepal_length, petal_length]
    X = df.iloc[50:150, [0, 2]].values

    # plot variables
    title = 'Iris Dataset'
    xlabel = 'Sepal Length [cm]'
    ylabel = 'Petal Length [cm]'

    # Plot what we have so far
    # Plot labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    # Plot the versicolor data
    plt.scatter(X[:50, 0],
                X[:50, 1],
                color='red',
                marker='o',
                label='versicolor')
    # Plot the virginica data
    plt.scatter(X[50:100, 0],
                X[50:100, 1],
                color='blue',
                marker='x',
                label='virginica')
    # Setup the plot legend
    plt.legend(loc='upper left')

    # Display the plot
    plt.show()

    # scikit-learn k-Nearest Neighbors
    k = 1
    neigh = KNeighborsClassifier(n_neighbors=k)
    neigh.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          neigh,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title +
                          "\nscikit-learn k-Nearest Neighbors\nk = " + str(k))
    print(title + "\nscikit-learn k-Nearest Neighbors\nk = " + str(k))
    print(classification_report(y, neigh.predict(X)))

    # ML.py k-Nearest Neighbors
    k = 1  # NOTE: the ML.py k-Nearest Neighbors class currently only supports k=1
    knn = NearestNeighbors(k)
    knn.fit(X, y)

    # plot the decision regions and display metrics to the console
    plot_decision_regions(X,
                          y,
                          knn,
                          resolution=0.1,
                          x_label=xlabel,
                          y_label=ylabel,
                          title=title + "\nML.py k-Nearest Neighbors\nk = " +
                          str(k))
    print(title + "\nML.py k-Nearest Neighbors\nk = " + str(k))
    print(classification_report(y, knn.predict(X)))
Example #7
0
# Plot the versicolor data
plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor')

# Setup the plot legend
plt.legend(loc='upper left')

# Display the plot
plt.show()

# Setup the Perceptron
pn = Perceptron(0.1, 10)

# Fit X to y (i.e. find the weights)
pn.fit(X, y)

# Print the error array
print("Errors:", pn.errors)

# Plot the results of the first fit
plt.plot(range(1, len(pn.errors) + 1), pn.errors, marker='o')
plt.title('Iris Dataset')
plt.xlabel('Iteration')
plt.ylabel('# of Misclassifications')
plt.show()

print("Net Input X:", pn.net_input(X))
print("Predict X:", pn.predict(X))
print("Weights:", pn.weight)

plot_decision_regions(X, y, pn, x_label='Sepal Length [cm]', y_label='Petal Length [cm]', title='Iris Dataset')