def main(): data = datasets.load_digits() X = data.data y = data.target digit1 = 1 digit2 = 8 idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0]) y = data.target[idx] # Change labels to {-1, 1} y[y == digit1] = -1 y[y == digit2] = 1 X = data.data[idx] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification clf = Adaboost(n_clf=5) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # MLP clf = MultilayerPerceptron(n_hidden=12, n_iterations=5000, learning_rate=0.01, early_stopping=True, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Multilayer Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): print("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Optimization method for finding weights that minimizes loss optimizer = RMSprop(learning_rate=0.01) # Perceptron clf = Perceptron(n_iterations=5000, activation_function=ExpLU, optimizer=optimizer, early_stopping=True, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=4000, learning_rate=0.01, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend=True)
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] # Change class labels from strings to numbers # df = df.replace(to_replace="setosa", value="-1") # df = df.replace(to_replace="virginica", value="1") # df = df.replace(to_replace="versicolor", value="2") # Only select data for two classes #X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix() #y = df.loc[df['species'] != "2"]["species"].as_matrix() X = X[y != 2] y = y[y != 2] y[y == 0] = -1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Adaboost classification clf = Adaboost(n_clf=8) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): # Load the dataset X, y = get_spiral_dataset(100,2,3) # Cluster the data clf = GaussianMixtureModel(k=3) y_pred = clf.predict(X) pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data clf = GaussianMixtureModel(k=3) y_pred = clf.predict(X) pca = PCA() pca.plot_in_2d(X, y_pred, title="GMM Clustering") pca.plot_in_2d(X, y, title="Actual Clustering")
def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data clf = GaussianMixtureModel(k=3) y_pred = clf.predict(X) pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data using K-Means clf = GaussianMixtureModel(k=3) y_pred = clf.predict(X) pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data using K-Medoids clf = PartitioningAroundMedoids(k=3) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data using K-Means clf = KMeans(k=3) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): # Load the dataset X, y = datasets.make_moons(n_samples=300, noise=0.1) # Cluster the data using DBSCAN clf = DBSCAN(eps=0.17, min_samples=5) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components pca = PCA() pca.plot_in_2d(X, y_pred, title="DBSCAN") pca.plot_in_2d(X, y, title="Actual Clustering")
def main(): # Load the dataset X, y = datasets.make_blobs() # Cluster the data using K-Medoids clf = PAM(k=3) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components pca = PCA() pca.plot_in_2d(X, y_pred, title="PAM Clustering") pca.plot_in_2d(X, y, title="Actual Clustering")
def main(): # Load the dataset X, y = datasets.make_blobs(noise=0.1) # Cluster the data using K-Means clf = KMeans(k=3) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): # Load the dataset X, y = datasets.make_moons(noise=0.05) # Cluster the data using DBSCAN clf = DBSCAN(eps=0.3, min_samples=5) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): iris = load_iris() X = normalize(iris.data) y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=3) y_pred = clf.predict(X_test, X_train, y_train) print "Accuracy score:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): # Load the dataset data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] # Cluster the data using K-Medoids clf = PAM(k=3) y_pred = clf.predict(X) # Project the data onto the 2 primary principal components pca = PCA() pca.plot_in_2d(X, y_pred) pca.plot_in_2d(X, y)
def main(): print("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names) print("-- Gradient Boosting Regression --") X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5) clf = GradientBoostingRegressor(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.title("Gradient Boosting Regression (%.2f MSE)" % mse) plt.show()
def main(): iris = datasets.load_iris() X = normalize(iris.data) y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy score:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = RandomForest(n_estimators=50, debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data=load_iris_dataset(dir_path + r"/../data/iris.csv") X=data['X'] y=data['target'] X = normalize(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=3) y_pred = clf.predict(X_test, X_train, y_train) print "Accuracy score:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = RandomForest(n_estimators=50, debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Perceptron clf = Perceptron() clf.fit(X_train, y_train, plot_errors=True) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=3) y_pred = clf.predict(X_test, X_train, y_train) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) # MLP clf = MultilayerPerceptron(n_hidden=10) clf.fit(X_train, y_train, n_iterations=4000, learning_rate=0.01) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] X = normalize(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Perceptron clf = Perceptron() clf.fit(X_train, y_train, plot_errors=True) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_digits() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = DecisionTree() clf.fit(X_train, y_train) # clf.print_tree() y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = load_iris_dataset(dir_path + r"/../data/iris.csv") X = data['X'] y = data['target'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = DecisionTree() clf.fit(X_train, y_train) # clf.print_tree() y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) clf = NaiveBayes() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = RandomForest(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = -1 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main(): print("-- Classification Tree --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = ClassificationTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Accuracy:", accuracy_score(y_test, y_pred)) pca = PCA() pca.plot_in_2d(X_test, y_pred) print("-- Regression Tree --") X, y = datasets.make_regression(n_features=1, n_samples=100, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.3) clf = RegressionTree() clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print("Mean Squared Error:", mean_squared_error(y_test, y_pred)) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.show()
def main(): print ("-- Gradient Boosting Classification --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) clf = GradientBoostingClassifier(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Gradient Boosting", accuracy=accuracy, legend_labels=data.target_names) print ("-- Gradient Boosting Regression --") X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5) X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5) clf = GradientBoostingRegressor(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean Squared Error:", mse) # Plot the results plt.scatter(X_test[:, 0], y_test, color='black') plt.scatter(X_test[:, 0], y_pred, color='green') plt.title("Gradient Boosting Regression (%.2f MSE)" % mse) plt.show()
def main(): # Load dataset data = datasets.load_iris() X = normalize(data.data[data.target != 0]) y = data.target[data.target != 0] y[y == 1] = 0 y[y == 2] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) clf = LogisticRegression(gradient_descent=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
def main(): data = datasets.load_digits() X = normalize(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1) # Perceptron clf = Perceptron(n_iterations=5000, learning_rate=0.01, early_stopping=True, plot_errors=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimension to two using PCA and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
def main(): df = pd.read_csv(dir_path + "/../data/iris.csv") # Change class labels from strings to numbers df = df.replace(to_replace="setosa", value="-1") df = df.replace(to_replace="virginica", value="1") df = df.replace(to_replace="versicolor", value="2") # Only select data for two classes X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix() y = df.loc[df['species'] != "2"]["species"].as_matrix() X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Adaboost classification clf = Adaboost(n_clf = 8) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print "Accuracy:", accuracy_score(y_test, y_pred) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred)
def main(): # Load the dataset data = datasets.load_iris() X = data.data y = data.target # Three -> two classes X = X[y != 2] y = y[y != 2] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) # Fit and predict using LDA lda = LDA() lda.fit(X_train, y_train) y_pred = lda.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
def main(): print ("-- XGBoost --") data = datasets.load_iris() X = data.data y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2) clf = XGBoost(debug=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) pca = PCA() pca.plot_in_2d(X_test, y_pred, title="XGBoost", accuracy=accuracy, legend_labels=data.target_names)
def main(): data = datasets.load_iris() X = data.data y = data.target X = X[y != 2] y = y[y != 2] y[y == 0] = -1 y[y == 1] = 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # Adaboost classification clf = Adaboost(n_clf=10) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print ("Accuracy:", accuracy) # Reduce dimensions to 2d using pca and plot the results pca = PCA() pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)