def test_knn(): df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) y = np.random.randint(2, size=100) x = df.iloc[0:100, [0, 2]].values print("Testing 2-D Iris data set with only one neighbor...") neighbor = KNN(k=1) neighbor.fit(x, y) neighbor.plot(x, y) print("Testing Iris data set with 15 neighbor...") iris = datasets.load_iris() x = iris.data[:, :2] y = iris.target neighbor = KNN(15) neighbor.fit(x, y) y_pred = neighbor.predict(x) neighbor.accuracy(y_pred, y) neighbor.plot(x, y) print( "Adding new point to dataset and testing with full Iris 1-k data set..." ) neighbor = KNN(1) neighbor.fit(x, y) y2 = np.array([1]) y2 = np.append(y, y2) x2 = np.vstack([x, [5.0, 3.2]]) neighbor.plot(x2, y2) print("Testing SKLearn's model...") clf = neighbors.KNeighborsClassifier(1) clf.fit(x, y) plot_decision_regions(x2, y2, clf)
def test_perceptron(): df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) print("Creating a two-feature data set") y = df.iloc[0:100, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:100, [0, 2]].values print("Creating Perceptron") pn = Perceptron(0.1, 10) print("Perceptron created") pn.fit(X, y) print("Perceptron fitted") print("Error List") print(pn.errors) print("Weight vector") print(pn.weight) print("Using plot_decision_regions function") plot_decision_regions(X, y, pn, 0.02) pn.plot(X, y) plt.xlabel('sepal length [cm]') plt.ylabel('petal length [cm]') plt.title('Petal Length vs Sepal Length') print("Creating a three-feature data set") y = df.iloc[0:150, 4].values y = np.where(y == 'Iris-setosa', -1, 1) X = df.iloc[0:150, [0, 1, 2]].values print("Creating Perceptron") pn1 = Perceptron(0.1, 10) print("Perceptron created") pn1.fit(X, y) print("Perceptron fitted") print("Error List") print(pn1.errors) print("Weight vector") print(pn1.weight) print( "Creating a perceptron that does not have enough iterations to learn the three-feature data set" ) pn2 = Perceptron(0.1, 4) print("Perceptron created") pn2.fit(X, y) print("Perceptron fitted") print("Error List") print(pn2.errors) print("Weight vector") print(pn2.weight)
def plot(self, X, y): plot_decision_regions(X, y, self)
def main(): # IRIS DATASET ############################################################# df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) # Extract the first 100 labels y = df.iloc[0:100, 4].values # Convert the labels to either 1 or 0 y = np.where(y == 'Iris-setosa', 0, 1) # Extract features from dataset [sepal_length, petal_length] X = df.iloc[0:100, [0, 2]].values # plot variables title = 'Iris Dataset' xlabel = 'Sepal Length [cm]' ylabel = 'Petal Length [cm]' # Plot what we have so far # Plot labels plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) # Plot the setosa data plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') # Plot the versicolor data plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # scikit-learn logistic regression skLR = skLogisticRegression() skLR.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, skLR, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nscikit-learn Logistic Regression Model") print(title + "\nscikit-learn Logistic Regression Model") print(classification_report(y, skLR.predict(X))) # ML.py logistic regression mlLR = LogisticRegression(learning_rate=0.05, iterations=25) mlLR.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, mlLR, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nML.py Logistic Regression Model") print(title + "\nML.py Logistic Regression Model") print(classification_report(y, mlLR.predict(X))) # FISH DATASET ############################################################# df = pd.read_csv('./Fish.csv') df = df.drop(df.index[0:61]) # Parkki rows df = df.drop(df.index[11:84]) # Smelt rows # Extract the data for Parkki and Smelt fish y = df.iloc[:, 0].values # Convert the labels to either 0 or 1 y = np.where(y == 'Parkki', 0, 1) # Extract features from dataset [weight, length] X = df.iloc[:, [2, 1]].values # plot variables title = 'Fish Dataset' xlabel = 'Length [cm]' ylabel = 'Weight [g]' # Plot what we have so far # Plot labels plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) # Plot the Parkki data plt.scatter(X[:11, 0], X[:11, 1], color='red', marker='o', label='Parkki') # Plot the Smelt data plt.scatter(X[11:, 0], X[11:, 1], color='blue', marker='x', label='Smelt') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # scikit-learn logistic regression skLR = skLogisticRegression() skLR.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, skLR, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nscikit-learn Logistic Regression Model") print(title + "\nscikit-learn Logistic Regression Model") print(classification_report(y, skLR.predict(X))) # ML.py logistic regression mlLR = LogisticRegression(learning_rate=0.01, iterations=10) mlLR.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, mlLR, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nML.py Logistic Regression Model") print(title + "\nML.py Logistic Regression Model") print(classification_report(y, mlLR.predict(X)))
def main(): # IRIS DATASET ##################################################################################################### df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) # Extract the first 100 labels Y = df.iloc[0:100, 4].values # Convert the labels to either 1 or -1 Y = np.where(Y == 'Iris-setosa', -1, 1) # Extract features from dataset [sepal_length, petal_length] X = df.iloc[0:100, [0, 2]].values # plot variables title = 'Iris Dataset' xlabel = 'Sepal Length [cm]' ylabel = 'Petal Length [cm]' # Plot what we have so far # Plot labels plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) # Plot the setosa data plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') # Plot the versicolor data plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # scikit-learn AdaBoost With scikit-learn Pre-built Decision Stump boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2), algorithm='SAMME', n_estimators=10, learning_rate=1.0) boost.fit(X, Y) # plot the decision regions and display metrics to the console plot_decision_regions(X, Y, boost, 0.01, x_label=xlabel, y_label=ylabel, title=title + "\nscikit-learn Decision Stump Classifier") print("scikit-learn AdaBoost With scikit-learn Pre-Built Decision Stump") print("Score:", boost.score(X, Y)) # scikit-learn AdaBoost With Custom Decision Stump boost = AdaBoostClassifier(base_estimator=AxisAlignedRectangles(), algorithm='SAMME', n_estimators=10, learning_rate=1.0) boost.fit(X, Y) # plot the decision regions and display metrics to the console plot_decision_regions(X, Y, boost, 0.01, x_label=xlabel, y_label=ylabel, title=title + "\nML.py Axis Aligned Rectangle Classifier") print("scikit-learn AdaBoost With ML.py Axis Aligned Rectangle Classifier") print("Score:", boost.score(X, Y)) #################################################################################################################### # FISH DATASET 1 ################################################################################################### df = pd.read_csv('./Fish.csv') df = df.drop(df.index[0:61]) # Parkki rows df = df.drop(df.index[11:84]) # Smelt rows # Extract the data for Parkki and Smelt fish Y = df.iloc[:, 0].values # Convert the labels to either 1 or -1 Y = np.where(Y == 'Parkki', -1, 1) # Extract features from dataset [weight, length] X = df.iloc[:, [2, 1]].values # plot variables title = 'Fish Dataset' xlabel = 'Length [cm]' ylabel = 'Weight [g]' # Plot what we have so far # Plot labels plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) # Plot the Parkki data plt.scatter(X[:11, 0], X[:11, 1], color='red', marker='o', label='Parkki') # Plot the Smelt data plt.scatter(X[11:, 0], X[11:, 1], color='blue', marker='x', label='Smelt') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # scikit-learn AdaBoost With scikit-learn Pre-built Decision Stump boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2), algorithm='SAMME', n_estimators=10, learning_rate=1.0) boost.fit(X, Y) # plot the decision regions and display metrics to the console plot_decision_regions(X, Y, boost, x_label=xlabel, y_label=ylabel, title=title + "\nscikit-learn Decision Stump Classifier") print("\nscikit-learn AdaBoost With scikit-learn Pre-Built Decision Stump") print("Score:", boost.score(X, Y)) # scikit-learn AdaBoost With Custom Decision Stump boost = AdaBoostClassifier(base_estimator=AxisAlignedRectangles(), algorithm='SAMME', n_estimators=10, learning_rate=1.0) boost.fit(X, Y) # plot the decision regions and display metrics to the console plot_decision_regions(X, Y, boost, 0.1, x_label=xlabel, y_label=ylabel, title=title + "\nML.py Axis Aligned Rectangle Classifier") print("scikit-learn AdaBoost With ML.py Axis Aligned Rectangle Classifier") print("Score:", boost.score(X, Y)) ################################################################################################################### # FISH DATASET 2 ################################################################################################### df = pd.read_csv('./Fish.csv') # Extract the data for Bream and Roach fish Y = df.iloc[:55, 0].values # Convert the labels to either 1 or -1 Y = np.where(Y == 'Bream', -1, 1) # Extract features from dataset [weight, length] X = df.iloc[:55, [2, 1]].values # plot variables title = 'Fish Dataset' xlabel = 'Length [cm]' ylabel = 'Weight [g]' # Plot what we have so far # Plot labels plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) # Plot the Bream data plt.scatter(X[:35, 0], X[:35, 1], color='red', marker='o', label='Bream') # Plot the Roach data plt.scatter(X[35:, 0], X[35:, 1], color='blue', marker='x', label='Roach') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # scikit-learn AdaBoost With scikit-learn Pre-built Decision Stump boost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1, max_leaf_nodes=2), algorithm='SAMME', n_estimators=10, learning_rate=1.0) boost.fit(X, Y) # plot the decision regions and display metrics to the console plot_decision_regions(X, Y, boost, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nscikit-learn Decision Stump Classifier") print("\nscikit-learn AdaBoost With scikit-learn Pre-Built Decision Stump") print("Score:", boost.score(X, Y)) # scikit-learn AdaBoost With Custom Decision Stump boost = AdaBoostClassifier(base_estimator=AxisAlignedRectangles(), algorithm='SAMME', n_estimators=10, learning_rate=1.0) boost.fit(X, Y) # plot the decision regions and display metrics to the console plot_decision_regions(X, Y, boost, 0.1, x_label=xlabel, y_label=ylabel, title=title + "\nML.py Axis Aligned Rectangle Classifier") print("scikit-learn AdaBoost With ML.py Axis Aligned Rectangle Classifier") print("Score:", boost.score(X, Y))
def main(): # IRIS DATASET ############################################################# df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) # Extract the first 100 labels y = df.iloc[0:100, 4].values # Convert the labels to either 1 or 0 y = np.where(y == 'Iris-setosa', 0, 1) # Extract features from dataset [sepal_length, petal_length] X = df.iloc[0:100, [0, 2]].values # plot variables title = 'Iris Dataset' xlabel = 'Sepal Length [cm]' ylabel = 'Petal Length [cm]' # Plot what we have so far # Plot labels plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) # Plot the setosa data plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='setosa') # Plot the versicolor data plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # scikit-learn k-Nearest Neighbors k = 1 neigh = KNeighborsClassifier(n_neighbors=k) neigh.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, neigh, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nscikit-learn k-Nearest Neighbors\nk = " + str(k)) print(title + "\nscikit-learn k-Nearest Neighbors\nk = " + str(k)) print(classification_report(y, neigh.predict(X))) # ML.py k-Nearest Neighbors k = 1 # NOTE: the ML.py k-Nearest Neighbors class currently only supports k=1 knn = NearestNeighbors(k) knn.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, knn, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nML.py k-Nearest Neighbors\nk = " + str(k)) print(title + "\nML.py k-Nearest Neighbors\nk = " + str(k)) print(classification_report(y, knn.predict(X))) ############################################################################ # IRIS DATASET 2 ########################################################### df = pd.read_csv( 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None) # Extract 100 labels y = df.iloc[50:150, 4].values # Convert the labels to either 1 or 0 y = np.where(y == 'Iris-versicolor', 0, 1) # Extract features from dataset [sepal_length, petal_length] X = df.iloc[50:150, [0, 2]].values # plot variables title = 'Iris Dataset' xlabel = 'Sepal Length [cm]' ylabel = 'Petal Length [cm]' # Plot what we have so far # Plot labels plt.title(title) plt.xlabel(xlabel) plt.ylabel(ylabel) # Plot the versicolor data plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='versicolor') # Plot the virginica data plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='virginica') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # scikit-learn k-Nearest Neighbors k = 1 neigh = KNeighborsClassifier(n_neighbors=k) neigh.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, neigh, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nscikit-learn k-Nearest Neighbors\nk = " + str(k)) print(title + "\nscikit-learn k-Nearest Neighbors\nk = " + str(k)) print(classification_report(y, neigh.predict(X))) # ML.py k-Nearest Neighbors k = 1 # NOTE: the ML.py k-Nearest Neighbors class currently only supports k=1 knn = NearestNeighbors(k) knn.fit(X, y) # plot the decision regions and display metrics to the console plot_decision_regions(X, y, knn, resolution=0.1, x_label=xlabel, y_label=ylabel, title=title + "\nML.py k-Nearest Neighbors\nk = " + str(k)) print(title + "\nML.py k-Nearest Neighbors\nk = " + str(k)) print(classification_report(y, knn.predict(X)))
# Plot the versicolor data plt.scatter(X[50:100, 0], X[50:100, 1], color='blue', marker='x', label='versicolor') # Setup the plot legend plt.legend(loc='upper left') # Display the plot plt.show() # Setup the Perceptron pn = Perceptron(0.1, 10) # Fit X to y (i.e. find the weights) pn.fit(X, y) # Print the error array print("Errors:", pn.errors) # Plot the results of the first fit plt.plot(range(1, len(pn.errors) + 1), pn.errors, marker='o') plt.title('Iris Dataset') plt.xlabel('Iteration') plt.ylabel('# of Misclassifications') plt.show() print("Net Input X:", pn.net_input(X)) print("Predict X:", pn.predict(X)) print("Weights:", pn.weight) plot_decision_regions(X, y, pn, x_label='Sepal Length [cm]', y_label='Petal Length [cm]', title='Iris Dataset')