예제 #1
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    digit1 = 1
    digit2 = 8
    idx = np.append(np.where(y == digit1)[0], np.where(y == digit2)[0])
    y = data.target[idx]
    # Change labels to {-1, 1}
    y[y == digit1] = -1
    y[y == digit2] = 1
    X = data.data[idx]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification
    clf = Adaboost(n_clf=5)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Logistic Regression",
                   accuracy=accuracy)
예제 #3
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # MLP
    clf = MultilayerPerceptron(n_hidden=12,
                               n_iterations=5000,
                               learning_rate=0.01,
                               early_stopping=True,
                               plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Multilayer Perceptron",
                   accuracy=accuracy,
                   legend_labels=np.unique(y))
예제 #4
0
def main():

    print("-- XGBoost --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = XGBoost(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="XGBoost",
                   accuracy=accuracy,
                   legend_labels=data.target_names)
예제 #5
0
def main():
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']
    # Change class labels from strings to numbers
    # df = df.replace(to_replace="setosa", value="-1")
    # df = df.replace(to_replace="virginica", value="1")
    # df = df.replace(to_replace="versicolor", value="2")

    # Only select data for two classes
    #X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix()
    #y = df.loc[df['species'] != "2"]["species"].as_matrix()

    X = X[y != 2]
    y = y[y != 2]
    y[y == 0] = -1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Adaboost classification
    clf = Adaboost(n_clf=8)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #6
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Optimization method for finding weights that minimizes loss
    optimizer = RMSprop(learning_rate=0.01)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
                     activation_function=ExpLU,
                     optimizer=optimizer,
                     early_stopping=True,
                     plot_errors=True)

    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Perceptron",
                   accuracy=accuracy,
                   legend_labels=np.unique(y))
예제 #7
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.33,
                                                        seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=4000, learning_rate=0.01, plot_errors=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Perceptron",
                   accuracy=accuracy,
                   legend=True)
def main():
    # Load the dataset
    X, y = datasets.make_blobs()

    # Cluster the data
    clf = GaussianMixtureModel(k=3)
    y_pred = clf.predict(X)
    
    pca = PCA()
    pca.plot_in_2d(X, y_pred)
    pca.plot_in_2d(X, y)
def main():
    # Load the dataset
    X, y = datasets.make_blobs()

    # Cluster the data
    clf = GaussianMixtureModel(k=3)
    y_pred = clf.predict(X)

    pca = PCA()
    pca.plot_in_2d(X, y_pred, title="GMM Clustering")
    pca.plot_in_2d(X, y, title="Actual Clustering")
예제 #10
0
def main():
    # Load the dataset
    X, y = datasets.make_blobs()

    # Cluster the data using K-Means
    clf = KMeans(k=3)
    y_pred = clf.predict(X)

    # Project the data onto the 2 primary principal components
    pca = PCA()
    pca.plot_in_2d(X, y_pred)
    pca.plot_in_2d(X, y)
def main():
    # Load the dataset
    X, y = datasets.make_blobs()

    # Cluster the data using K-Medoids
    clf = PAM(k=3)
    y_pred = clf.predict(X)

    # Project the data onto the 2 primary principal components
    pca = PCA()
    pca.plot_in_2d(X, y_pred, title="PAM Clustering")
    pca.plot_in_2d(X, y, title="Actual Clustering")
def main():
	iris = load_iris()
	X = normalize(iris.data)
	y = iris.target
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

	clf = KNN(k=3)
	y_pred = clf.predict(X_test, X_train, y_train)
	print "Accuracy score:", accuracy_score(y_test, y_pred)

	# Reduce dimensions to 2d using pca and plot the results
	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
예제 #13
0
def main():
    iris = load_iris()
    X = normalize(iris.data)
    y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=3)
    y_pred = clf.predict(X_test, X_train, y_train)
    print "Accuracy score:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #14
0
def main():

    print("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="Gradient Boosting",
                   accuracy=accuracy,
                   legend_labels=data.target_names)

    print("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=150,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
예제 #15
0
def main():
	iris = datasets.load_iris()
	X = normalize(iris.data)
	y = iris.target
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

	clf = NaiveBayes()
	clf.fit(X_train, y_train)
	y_pred = clf.predict(X_test)

	print "Accuracy score:", accuracy_score(y_test, y_pred)

	# Reduce dimension to two using PCA and plot the results
	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
예제 #16
0
def main():
    iris = datasets.load_iris()
    X = normalize(iris.data)
    y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy score:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #17
0
def main():
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = RandomForest(n_estimators=50, debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #18
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = RandomForest(n_estimators=50, debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    data=load_iris_dataset(dir_path + r"/../data/iris.csv")
    X=data['X']
    y=data['target']
    X = normalize(X)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=3)
    y_pred = clf.predict(X_test, X_train, y_train)
    print "Accuracy score:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #20
0
def main():
	data = datasets.load_digits()
	X = data.data
	y = data.target

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

	clf = RandomForest(n_estimators=50, debug=True)
	clf.fit(X_train, y_train)
	y_pred = clf.predict(X_test)

	print "Accuracy:", accuracy_score(y_test, y_pred)

	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # MLP
    clf = MultilayerPerceptron(n_hidden=10)
    clf.fit(X_train, y_train, n_iterations=4000, learning_rate=0.01)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #22
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Perceptron
    clf = Perceptron()
    clf.fit(X_train, y_train, plot_errors=True)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=3)
    y_pred = clf.predict(X_test, X_train, y_train)
    
    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="K Nearest Neighbors", accuracy=accuracy, legend_labels=data.target_names)
예제 #24
0
    def plotIn3D(self, X, y=None, k=1, labels=[]):
        X_transformed = PCA().transform(X, n_components=3)
        x1 = X_transformed[:, 0]
        x2 = X_transformed[:, 1]
        x3 = X_transformed[:, 2]
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.scatter(x1, x2, x3, c=y)

        # Plot a star for each centroid
        for i in range(k):
            ax.scatter(np.mean(X_transformed[y == i, 0]),
                       np.mean(X_transformed[y == i, 1]),
                       np.mean(X_transformed[y == i, 2]),
                       s=400,
                       marker='*',
                       c='w',
                       cmap=chosenCmap)

        plt.title(labels[0])
        plt.xlabel(labels[1])
        plt.ylabel(labels[2])
        #        plt.legend(handles = graphLegend)

        # Do I add the

        plt.show()
예제 #25
0
def main():
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']
    X = normalize(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Perceptron
    clf = Perceptron()
    clf.fit(X_train, y_train, plot_errors=True)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #26
0
def main():

	data = datasets.load_digits()
	X = data.data
	y = data.target

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

	clf = DecisionTree()
	clf.fit(X_train, y_train)
	# clf.print_tree()
	y_pred = clf.predict(X_test)

	print "Accuracy:", accuracy_score(y_test, y_pred)

	pca = PCA()
	pca.plot_in_2d(X_test, y_pred)
예제 #27
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #28
0
def main():

    data = datasets.load_digits()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = DecisionTree()
    clf.fit(X_train, y_train)
    # clf.print_tree()
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #29
0
def main():

    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = DecisionTree()
    clf.fit(X_train, y_train)
    # clf.print_tree()
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #30
0
def main():
    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = RandomForest(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #31
0
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    clf = NaiveBayes()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Naive Bayes", accuracy=accuracy, legend_labels=data.target_names)
def main():
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = -1
    y[y == 2] = 1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = SupportVectorMachine(kernel=polynomial_kernel, power=4, coef=1)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Support Vector Machine", accuracy=accuracy)
def main():

    data = datasets.load_iris()
    X = data.data[data.target != 0]
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = LogisticRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():

    print ("-- Gradient Boosting Classification --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    clf = GradientBoostingClassifier(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, 
        title="Gradient Boosting", 
        accuracy=accuracy, 
        legend_labels=data.target_names)

    print ("-- Gradient Boosting Regression --")

    X, y = datasets.make_regression(n_features=1, n_samples=150, bias=0, noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X), y, test_size=0.5)

    clf = GradientBoostingRegressor(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    mse = mean_squared_error(y_test, y_pred)

    print ("Mean Squared Error:", mse)

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.title("Gradient Boosting Regression (%.2f MSE)" % mse)
    plt.show()
예제 #35
0
def main():

    print("-- Classification Tree --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=2)

    clf = ClassificationTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)

    print("-- Regression Tree --")

    X, y = datasets.make_regression(n_features=1,
                                    n_samples=100,
                                    bias=0,
                                    noise=5)

    X_train, X_test, y_train, y_test = train_test_split(standardize(X),
                                                        y,
                                                        test_size=0.3)

    clf = RegressionTree()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Mean Squared Error:", mean_squared_error(y_test, y_pred))

    # Plot the results
    plt.scatter(X_test[:, 0], y_test, color='black')
    plt.scatter(X_test[:, 0], y_pred, color='green')
    plt.show()
예제 #36
0
 def plotIn2D(self, X, y=None, labels=[]):
     X_transformed = PCA().transform(X, n_components=2)
     x1 = X_transformed[:, 0]
     x2 = X_transformed[:, 1]
     plt.scatter(x1, x2, c=y)
     plt.title(labels[0])
     plt.xlabel(labels[1])
     plt.ylabel(labels[2])
     plt.legend(handles=graphLegend)
     plt.show()
def main():
    data = datasets.load_iris()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = KNN(k=3)
    y_pred = clf.predict(X_test, X_train, y_train)

    accuracy = accuracy_score(y_test, y_pred)

    print("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test,
                   y_pred,
                   title="K Nearest Neighbors",
                   accuracy=accuracy,
                   legend_labels=data.target_names)
예제 #38
0
def main():
    data = datasets.load_digits()
    X = data.data
    y = data.target

    pca = PCA()
    X = pca.transform(X, n_components=5)  # Reduce to 5 dimensions

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.4,
                                                        seed=1)

    clf = RandomForest(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print("Accuracy:", accuracy_score(y_test, y_pred))

    pca.plot_in_2d(X_test, y_pred)
예제 #39
0
def main():
    # Load the dataset
    data = datasets.load_iris()
    X = data.data
    y = data.target

    # Three -> two classes
    X = X[y != 2]
    y = y[y != 2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Fit and predict using LDA
    lda = LDA()
    lda.fit(X_train, y_train)
    y_pred = lda.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    # Load the dataset
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']

    # Three -> two classes
    X = X[y != 2]
    y = y[y != 2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Fit and predict using LDA
    lda = LDA()
    lda.fit(X_train, y_train)
    y_pred = lda.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']
    X = normalize(X)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)

    # MLP
    clf = MultilayerPerceptron(n_hidden=10)
    clf.fit(X_train,
            y_train,
            n_iterations=4000,
            learning_rate=0.01,
            plot_errors=True)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    # Load dataset
    data = datasets.load_iris()
    X = normalize(data.data[data.target != 0])
    y = data.target[data.target != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    clf = LogisticRegression(gradient_descent=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Logistic Regression", accuracy=accuracy)
예제 #43
0
def main():
    data = datasets.load_digits()
    X = normalize(data.data)
    y = data.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, seed=1)

    # Perceptron
    clf = Perceptron(n_iterations=5000,
        learning_rate=0.01, 
        early_stopping=True,
        plot_errors=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Perceptron", accuracy=accuracy, legend_labels=np.unique(y))
예제 #44
0
def main():
    df = pd.read_csv(dir_path + "/../data/iris.csv")
    # Change class labels from strings to numbers
    df = df.replace(to_replace="setosa", value="-1")
    df = df.replace(to_replace="virginica", value="1")
    df = df.replace(to_replace="versicolor", value="2")

    # Only select data for two classes
    X = df.loc[df['species'] != "2"].drop("species", axis=1).as_matrix()
    y = df.loc[df['species'] != "2"]["species"].as_matrix()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
    
    # Adaboost classification
    clf = Adaboost(n_clf = 8)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
예제 #45
0
def main():
    # Load dataset
    data = load_iris_dataset(dir_path + r"/../data/iris.csv")
    X = data['X']
    y = data['target']

    X = normalize(X[y != 0])
    y = y[y != 0]
    y[y == 1] = 0
    y[y == 2] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    clf = LogisticRegression()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    print "Accuracy:", accuracy_score(y_test, y_pred)

    # Reduce dimension to two using PCA and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred)
def main():
    # Load the dataset
    data = datasets.load_iris()
    X = data.data
    y = data.target

    # Three -> two classes
    X = X[y != 2]
    y = y[y != 2]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

    # Fit and predict using LDA
    lda = LDA()
    lda.fit(X_train, y_train)
    y_pred = lda.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="LDA", accuracy=accuracy)
예제 #47
0
def main():

    print ("-- XGBoost --")

    data = datasets.load_iris()
    X = data.data
    y = data.target

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, seed=2)  

    clf = XGBoost(debug=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, 
        title="XGBoost", 
    accuracy=accuracy, 
    legend_labels=data.target_names)
예제 #48
0
def main():
    data = datasets.load_iris()
    X = data.data
    y = data.target

    X = X[y != 2]
    y = y[y != 2]
    y[y == 0] = -1
    y[y == 1] = 1

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

    # Adaboost classification
    clf = Adaboost(n_clf=10)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)

    print ("Accuracy:", accuracy)

    # Reduce dimensions to 2d using pca and plot the results
    pca = PCA()
    pca.plot_in_2d(X_test, y_pred, title="Adaboost", accuracy=accuracy)
예제 #49
0
# ...........
data = datasets.load_digits()
digit1 = 1
digit2 = 8
idx = np.append(np.where(data.target == digit1)[0], np.where(data.target == digit2)[0])
y = data.target[idx]
# Change labels to {0, 1}
y[y == digit1] = 0
y[y == digit2] = 1
X = data.data[idx]
X = normalize(X)

# ..........................
#  DIMENSIONALITY REDUCTION
# ..........................
pca = PCA()
X = pca.transform(X, n_components=5) # Reduce to 5 dimensions


# ..........................
#  TRAIN / TEST SPLIT
# ..........................
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)
# Rescale label for Adaboost to {-1, 1}
rescaled_y_train = 2*y_train - np.ones(np.shape(y_train))
rescaled_y_test = 2*y_test - np.ones(np.shape(y_test))

# .......
#  SETUP
# .......
adaboost = Adaboost(n_clf = 8)