""" Run the column data set with K-NN algorithm """ import matplotlib.pyplot as plt from src.Algorithms.Supervised.knearestneighbours import KNN from numpy import array, arange, mean, hstack from pandas import DataFrame from src.Utils.utils import get_data from src.Utils.CrossValidate import CrossValidation if __name__ == '__main__': accuracys = [] all_k = [] data = get_data("column_2C_weka.arff", type="arff") data = DataFrame(data[0]) knn = KNN(data) for realization in range(20): knn.split() best_k = knn.validate() all_k.append(best_k) knn.split() Y_output, Y_test = knn.train(best_k) accuracys.append(knn.test(Y_output, Y_test)) print(" # --------------------------------- #") print("Realization: ", realization) print("Best K: ", best_k) print("Accuracy: ", accuracys[realization]) print("Finished") print("Mean accuracy: ", mean(accuracys))
from numpy import zeros, concatenate, array, where, log, argmax, inf, sqrt, pi, exp, mean from Strings import string from sklearn.model_selection import train_test_split from numpy.random import rand from src.Utils.utils import sigmoid, get_data, get_confusion_matrix, plot_confusion_matrix from sklearn.model_selection import KFold from src.Algorithms.Supervised.NaiveBayes import NaiveBayes if __name__ == '__main__': data = get_data("data_banknote_authentication.txt", type="csv") number_lines = data.shape[0] number_columns = data.shape[1] X = array(data, ndmin=2)[:, :number_columns - 1] Y = array(array(data, ndmin=2)[:, number_columns - 1], ndmin=2).T train_size = .8 test_size = .2 indices = where(Y == -1) Y[indices] = 0 print(X) print(Y) acc = [] acc_eta = [] kf = KFold(n_splits=10) eta_validation_vector = [1., 0.15, 0.1, 1e-2, 1e-3] for realization in range(5): x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=test_size)
import matplotlib.pyplot as plt from numpy import reshape, array, mean from sklearn.datasets import load_iris from src.Utils.ColorMap import ColorMap from matplotlib.colors import ListedColormap from src.Utils.utils import heaveside, normalize # just for the plot IRIS = load_iris() """ Utilizando simgmoid logistica """ if __name__ == '__main__': data = get_data("Iris", type='csv') p = perceptron(data, 0.015, 500, type=2, logist=True) p.X = normalize(p.X) p.add_bias() feature_names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)'] target_names = array(['setosa', 'versicolor', 'virginica'], dtype='<U10') sepal_lenth = data[0] # sepal length (cm) sepal_width = data[1] # sepal width (cm) # plt.figure(figsize=(10, 7)) # # plt.hist([sepal_lenth, sepal_width], bins=20, color=["green", "red"]) # plt.title("sepal length (cm) vs sepal width (cm)") # plt.xlabel("sepal length (cm) vs sepal width (cm)") # plt.ylabel("Count") # plt.show()
""" Run the column data set with K-NN algorithm """ import matplotlib.pyplot as plt from src.Algorithms.Supervised.knearestneighbours import KNN from numpy import array, arange, mean, hstack from pandas import DataFrame from src.Utils.utils import get_data from src.Utils.CrossValidate import CrossValidation if __name__ == '__main__': accuracys = [] data = get_data("Iris", type="csv") data = DataFrame(data) knn = KNN(data) all_k = [] for realization in range(20): knn.split() best_k = knn.validate() all_k.append(best_k) knn.split() Y_output, Y_test = knn.train(3) accuracys.append(knn.test(Y_output, Y_test)) print(" # --------------------------------- #") print("Realization: ", realization) print("Best K: ", best_k) print("Accuracy: ", accuracys[realization]) print(" # --------------------------------- #")
def pre_processing(reviews, bag, type="tr"): Matrix = zeros((len(reviews), len(bag) + 1)) i = 0 for review, sentiment in reviews: if str(sentiment) != 'nan' and len(sentiment.split()) == 1: Matrix[i] = SentimentAnalisys.text_to_vector( Matrix[i], review, int(sentiment), bag) i += 1 return Matrix if __name__ == '__main__': data = get_data('chennai_reviews.csv', type='csv') bag = (SentimentAnalisys.create_vocabulary(data[2])) accuracys = [] accuracys_train = [] for realization in range(10): hit = 0 reviews_train, reviews_test = train_test_split(array(data[[2, 3]])[1:], test_size=0.2) Matrix_train = pre_processing(reviews_train, bag) naive = NaiveBayes(Matrix_train, reviews_train, bag, type="bernolli") thetas_ic, thetas_c = naive.train() for row in Matrix_train: p = naive.test(row=row, thetas_ic=thetas_ic, thetas_c=thetas_c) Y_output = naive.predict(p) + 1