Exemple #1
0
"""
    Run the column data set with K-NN algorithm
"""
import matplotlib.pyplot as plt
from src.Algorithms.Supervised.knearestneighbours import KNN
from numpy import array, arange, mean, hstack
from pandas import DataFrame
from src.Utils.utils import get_data
from src.Utils.CrossValidate import CrossValidation

if __name__ == '__main__':
    accuracys = []
    all_k = []
    data = get_data("column_2C_weka.arff", type="arff")
    data = DataFrame(data[0])
    knn = KNN(data)

    for realization in range(20):
        knn.split()
        best_k = knn.validate()
        all_k.append(best_k)
        knn.split()
        Y_output, Y_test = knn.train(best_k)
        accuracys.append(knn.test(Y_output, Y_test))
        print(" # --------------------------------- #")
        print("Realization: ", realization)
        print("Best K: ", best_k)
        print("Accuracy: ", accuracys[realization])

    print("Finished")
    print("Mean accuracy: ", mean(accuracys))
Exemple #2
0
from numpy import zeros, concatenate, array, where, log, argmax, inf, sqrt, pi, exp, mean
from Strings import string
from sklearn.model_selection import train_test_split
from numpy.random import rand
from src.Utils.utils import sigmoid, get_data, get_confusion_matrix, plot_confusion_matrix
from sklearn.model_selection import KFold
from src.Algorithms.Supervised.NaiveBayes import NaiveBayes


if __name__ == '__main__':
    data = get_data("data_banknote_authentication.txt", type="csv")
    number_lines = data.shape[0]
    number_columns = data.shape[1]
    X = array(data, ndmin=2)[:, :number_columns - 1]
    Y = array(array(data, ndmin=2)[:, number_columns - 1], ndmin=2).T
    train_size = .8
    test_size = .2

    indices = where(Y == -1)
    Y[indices] = 0

    print(X)
    print(Y)

    acc = []
    acc_eta = []
    kf = KFold(n_splits=10)
    eta_validation_vector = [1., 0.15, 0.1, 1e-2, 1e-3]
    for realization in range(5):
        x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=test_size)
import matplotlib.pyplot as plt
from numpy import reshape, array, mean
from sklearn.datasets import load_iris
from src.Utils.ColorMap import ColorMap
from matplotlib.colors import ListedColormap
from src.Utils.utils import heaveside, normalize

#  just for the plot
IRIS = load_iris()

"""
    Utilizando simgmoid logistica
"""

if __name__ == '__main__':
    data = get_data("Iris", type='csv')
    p = perceptron(data, 0.015, 500, type=2, logist=True)
    p.X = normalize(p.X)
    p.add_bias()
    feature_names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
    target_names = array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
    sepal_lenth = data[0]  # sepal length (cm)
    sepal_width = data[1]  # sepal width (cm)

    # plt.figure(figsize=(10, 7))
    #
    # plt.hist([sepal_lenth, sepal_width], bins=20, color=["green", "red"])
    # plt.title("sepal length (cm) vs sepal width (cm)")
    # plt.xlabel("sepal length (cm) vs sepal width (cm)")
    # plt.ylabel("Count")
    # plt.show()
"""
    Run the column data set with K-NN algorithm
"""
import matplotlib.pyplot as plt
from src.Algorithms.Supervised.knearestneighbours import KNN
from numpy import array, arange, mean, hstack
from pandas import DataFrame
from src.Utils.utils import get_data
from src.Utils.CrossValidate import CrossValidation


if __name__ == '__main__':
    accuracys = []
    data = get_data("Iris", type="csv")
    data = DataFrame(data)
    knn = KNN(data)
    all_k = []

    for realization in range(20):
        knn.split()
        best_k = knn.validate()
        all_k.append(best_k)
        knn.split()
        Y_output, Y_test = knn.train(3)
        accuracys.append(knn.test(Y_output, Y_test))
        print(" # --------------------------------- #")
        print("Realization: ", realization)
        print("Best K: ", best_k)
        print("Accuracy: ", accuracys[realization])
        print(" # --------------------------------- #")

def pre_processing(reviews, bag, type="tr"):
    Matrix = zeros((len(reviews), len(bag) + 1))
    i = 0
    for review, sentiment in reviews:
        if str(sentiment) != 'nan' and len(sentiment.split()) == 1:
            Matrix[i] = SentimentAnalisys.text_to_vector(
                Matrix[i], review, int(sentiment), bag)
        i += 1

    return Matrix


if __name__ == '__main__':
    data = get_data('chennai_reviews.csv', type='csv')
    bag = (SentimentAnalisys.create_vocabulary(data[2]))

    accuracys = []
    accuracys_train = []

    for realization in range(10):
        hit = 0
        reviews_train, reviews_test = train_test_split(array(data[[2, 3]])[1:],
                                                       test_size=0.2)
        Matrix_train = pre_processing(reviews_train, bag)
        naive = NaiveBayes(Matrix_train, reviews_train, bag, type="bernolli")
        thetas_ic, thetas_c = naive.train()
        for row in Matrix_train:
            p = naive.test(row=row, thetas_ic=thetas_ic, thetas_c=thetas_c)
            Y_output = naive.predict(p) + 1