예제 #1
0
def rank_query(features, query_idx, gallery_idx, file_list, labels, cam_idx, rank=1, display=False, cluster_means=None):
    feat_query, feat_gallery = set_feat_query_gallery(features, query_idx, gallery_idx)

    rank_score = np.zeros(len(feat_query), dtype=int)

    color = np.zeros(rank + 1, dtype=int)
    i = 0
    for idx in query_idx:
        query_id = labels[idx]
        cam_id = cam_idx[idx]

        feat_gall_cam_rem, gall_cam_rem_idx = rem_feat_cam_label(feat_gallery, gallery_idx, query_id, cam_id,
                                                                 labels, cam_idx)

        if cluster_means is None:
            k_idx = knn(np.array(features[idx]), np.array(feat_gall_cam_rem), k=rank)
        else:
            cluster_idx = np.asscalar(knn(np.array(features[idx]), np.array(cluster_means)))
            k_idx = knn(np.array(cluster_means[cluster_idx]), np.array(feat_gall_cam_rem), k=rank)

        gallery_id = labels[gall_cam_rem_idx[k_idx]]
        file_idx = np.concatenate((idx, gall_cam_rem_idx[k_idx]), axis=None)

        for j in range(rank):
            if query_id == gallery_id[j]:
                color[j+1] = 1
                rank_score[i] = 1
            else:
                color[j+1] = 2

        print('-- Query:', query_id, '/ Gallery:', gallery_id, '/ Retrieval:', rank_score[i])
        if display:
            result_display(rank, color, file_list[file_idx])

        i += 1

    rank_score = np.mean(rank_score, axis=None)

    return rank_score
예제 #2
0
def test_machine(data_test, machine, n_p=52):
    rows, cols = data_test.shape
    n_pp_test = int(cols / n_p)
    mach_size = len(machine)

    p_id_pred_ar = np.zeros(mach_size, dtype=int)
    prec = 0
    eps = np.zeros((mach_size, cols))
    y_actu, y_pred = np.zeros(cols, dtype=int), np.zeros(cols, dtype=int)

    for j in range(0, cols):
        test = data_test[:, j]

        y_actu[j] = j // n_pp_test

        for t in range(0, mach_size):
            test_proj = machine[t].w.T.dot((test - machine[t].mu)[:, None])
            indices = knn(test_proj, machine[t].data_train_proj)
            p_id_pred_ar[t] = mode(machine[t].data_id_memory[indices])[0]

            if y_actu[j] != p_id_pred_ar[t]:
                eps[t, j] += 1

        y_pred[j] = mode(p_id_pred_ar)[0]

        if y_actu[j] == y_pred[j]:
            prec += 1

    prec /= cols
    print('Precision is %.2f%%' % (100 * prec))

    e_av = np.mean(np.mean(np.square(eps), axis=1))
    e_com = np.mean(np.square(np.mean(eps, axis=0)))

    print('The average error of each machine member by acting individually is Eav = %.2f' % e_av)
    print('The expected error of the whole machine is Ecom = %.2f' % e_com)
    if e_com <= e_av:
        print('We have Ecom <= Eav \n -- Success!! Machine performs better than individual members. Good teamwork!')
    else:
        print('We have Ecom > Eav \n -- Failure... You need to review your teamwork.')

    conf_mat(y_actu, y_pred)
예제 #3
0
def test_pca_lda(data_test, data_id_memory, data_train_proj, w, mu, n_p=52):
    rows, cols = data_test.shape
    n_pp_test = int(cols / n_p)

    prec = 0
    y_actu, y_pred = np.zeros(cols, dtype=int), np.zeros(cols, dtype=int)

    for j in range(0, cols):
        test = data_test[:, j]

        test_proj = w.T.dot((test - mu).reshape(rows, 1))

        index = knn(test_proj, data_train_proj)

        y_actu[j] = j // n_pp_test
        y_pred[j] = data_id_memory[index]

        if y_actu[j] == y_pred[j]:
            prec += 1

    prec /= cols
    print('Precision is %.2f%%' % (100 * prec))

    conf_mat(y_actu, y_pred)
dataframe = functions.load_dataframe() 

# Dividindo conjuntos de treino e teste
X_train, X_test, y_train, y_test = functions.train_test(dataframe) 

# Modelo PCA para extração de features da imagem
pca = functions.pca_model(X_train) 

# Conjunto de treino com features extraídas
X_train = pca.transform(X_train) 

# Conjunto de teste com features extraídas
X_test = pca.transform(X_test) 

# Treinando modelo classificatório KNN
knn = functions.knn(X_train, y_train) 

# Rótulo das classificações
label = {
    0: "acho que nao...",
    1: "acho que sim..."
}

# Abrindo a webcam...
while True:
    # Lendo a imagem e extraindo frame
    status, frame = cam.read() 

    if not status:
       break
예제 #5
0
Y = Y.to_numpy()

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=5)

print('Ridge Linear Regression')

alpha_list = [0, .1, 0.3, .5, 1, 1.2]

functions.ridge_regression(X_train, X_test, Y_train, Y_test, alpha_list, True,
                           cfg.default.student_figures, 'ridge_reg')

print('KNN')

k_values = [1, 2, 5, 7, 10]

functions.knn(X_train, X_test, Y_train, Y_test, k_values, True, ['uniform', 'distance'],
              cfg.default.student_figures, 'knn')

print('Decission Tree Regression')

max_depths = [1, 10, 30, 50, 100, 300]
min_weight_fraction_leafs = [.0, .125, .25, .375, .5]
min_samples_leaf=[1, 10, 100, 200]

functions.decision_tree(X_train, X_test, Y_train, Y_test, max_depths, min_weight_fraction_leafs, min_samples_leaf,
                        cfg.default.student_figures, 'dtree')

print('MLP')

scaler = preprocessing.StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)
예제 #6
0
test = test.replace('\r','') #Removes the carriage return cuz I use windows

#Load the data in
with open(train,"r") as read_file:
    train = json.load(read_file)
with open(test,"r") as read_file:
    test = json.load(read_file)
with open(val,"r") as read_file:
    val = json.load(read_file)

#Loop through k
accuracy = np.zeros(kmax+1)
for k_index in range(1,kmax+1):
    nn = []
    winners = []
    nn = knn(k_index,train,val)
    winners = predict_label(train,val,nn)
    accuracy[k_index] = calculate_accuracy(val,winners)
    print(k_index, accuracy[k_index], sep = "," )

#Print optimal k
opt_k = np.argmax(accuracy)
print(opt_k)

#Find accuracy on test set
#Train on train + val
for i in val['data']:
    train['data'].append(i)
nn = knn(opt_k,train,test)
winners = predict_label(train,test,nn)
test_accuracy = calculate_accuracy(test,winners)
예제 #7
0
file_name = "haarcascade_frontalface_alt2.xml"
classifier = cv.CascadeClassifier(
    f"{cv.haarcascades}/{file_name}")  #Modelo para reconhecer faces

dataframe = functions.load_dataframe(
)  #Carregando dataframe com as imagens para treinamento

X_train, X_test, y_train, y_test = functions.train_test(
    dataframe)  #Dividindo conjuntos de treino e teste
pca = functions.pca_model(
    X_train)  #Modelo PCA para extração de features da imagem

X_train = pca.transform(X_train)  #Conjunto de treino com features extraídas
X_test = pca.transform(X_test)  #Conjunto de teste com features extraídas

knn = functions.knn(X_train, y_train)  #Treinando modelo classificatório KNN

#Rótulo das classificações
label = {0: "Sem mascara", 1: "Com mascara"}

#Abrindo a webcam...
while True:
    status, frame = cam.read()  #Lendo a imagem e extraindo frame

    if not status:
        break

    if cv.waitKey(1) & 0xff == ord('q'):
        break

    #Transformando a imagem em escala de cinza
예제 #8
0
import functions as fs
import numpy as np
import matplotlib.pyplot as plt
import time
import pdb

t1 = time.time()
train, test = fs.init_data()
trainSet, testSet = fs.data_ready1(train, test)
trainSetf1, testSetf1 = fs.feat1(trainSet, testSet)
#trainSetf1, testSetf1 = fs.feat2(trainSet, testSet, dX=3)
result = fs.knn(trainSetf1, testSetf1, k=1)
acc, pre, rec, f1 = fs.calcMeasure(result)
t2 = time.time()
print(t2 - t1)
print(acc.mean())
print(f1.mean())
예제 #9
0
def n_features(input_excel, ark, y_navn):
    '''
    Classification using thirteen classifiers defined in the script "functions".  
    Uses 4-folds-CV ten times with different splits each times. Tries different number of features from 1-20.
    
    :param str input_excel: The name of the excel-file with the dataset
    :param str ark: The name of the sheet with the dataset
    :param str y_navn: The name of the column with the response
    :return: Matrix with the AUC of all classificatons for different number of features and matrix with the selected features
    '''

    # Reads the excel-file
    xls = pd.ExcelFile(input_excel)
    data_raw_df = pd.read_excel(xls, sheetname=ark, index_col=0)

    # Creates the result-matrix
    results = [[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],
                [], [], [], []],
               [[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [],
                [], [], [], []]]
    for j in range(0, 2):
        for i in range(0, 20):
            results[j][i] = np.zeros((13, 4))

    # Splits the respons y and the variables X and sets the random states
    y_name = y_navn
    y = data_raw_df[y_name].values
    X = data_raw_df.drop(y_name, 1)
    stdsc = StandardScaler()
    colNames = list(X.columns)
    states = [209, 979]  # Change to wanted seeds
    features = []
    stdsc = StandardScaler()

    # Splits the dataset into the 2*4 folders and selects features and uses the classifers
    # for 1-20 number of features-
    for l in range(0, 2):
        state = states[l]
        cv = StratifiedKFold(n_splits=4, random_state=state, shuffle=True)
        for k in range(0, 20):
            i = 0
            n_features = k + 1
            for train, test in cv.split(X, y):
                print(k, i)
                X_train = X.iloc[train]
                X_test = X.iloc[test]
                y_train = y[train]
                y_test = y[test]
                X_std_train = stdsc.fit_transform(X_train)
                X_std_test = stdsc.transform(X_test)
                X_std_train, X_std_test = relieff(X_std_train, X_std_test,
                                                  y_train, n_features,
                                                  colNames, features)

                model = logrel1(X_std_train, X_std_test, y_train, y_test,
                                state)
                results[l][k][0, i] = model.score(X_std_test, y_test)
                model = logrel2(X_std_train, X_std_test, y_train, y_test,
                                state)
                results[l][k][1, i] = model.score(X_std_test, y_test)
                model = rf(X_train, y_train, X_test, y_test, state)
                results[l][k][2, i] = model.score(X_test, y_test)
                model = knn(X_std_train, X_std_test, y_train, y_test)
                results[l][k][3, i] = model.score(X_std_test, y_test)
                model = adaboostlog(X_std_train,
                                    X_std_test,
                                    y_train,
                                    y_test,
                                    state=state)
                results[l][k][4, i] = model.score(X_std_test, y_test)
                model = decisiontree(X_std_train, y_train, X_std_test, y_test,
                                     state)
                results[l][k][5, i] = model.score(X_std_test, y_test)
                model = gnb(X_std_train,
                            X_std_test,
                            y_train,
                            y_test,
                            state=state)
                results[l][k][6, i] = model.score(X_std_test, y_test)
                model = lda(X_std_train, X_std_test, y_train, y_test)
                results[l][k][7, i] = model.score(X_std_test, y_test)
                model = qda(X_std_train, X_std_test, y_train, y_test)
                results[l][k][8, i] = model.score(X_std_test, y_test)
                model = nnet(X_std_train,
                             X_std_test,
                             y_train,
                             y_test,
                             state=state)
                results[l][k][9, i] = model.score(X_std_test, y_test)
                model = mars(X_std_train, X_std_test, y_train, y_test)
                results[l][k][10, i] = model.score(X_std_test, y_test)
                model = plsr(X_std_train, X_std_test, y_train, y_test)
                results[l][k][11, i] = model.score(X_std_test, y_test)
                model = svc(X_std_train, X_std_test, y_train, y_test, state)
                results[l][k][12, i] = model.score(X_std_test, y_test)
                model = linearsvc(X_std_train, X_std_test, y_train, y_test,
                                  state)
                results[l][k][13, i] = model.score(X_std_test, y_test)
                i += 1

    return (results)
예제 #10
0
def classify(input_excel, ark, y_navn, n_features):
    '''
    Classification using thirteen classifiers defined in the script "functions". 
    Uses 4-folds-CV ten times with different splits each times. Uses given number of
    features and chosen feature selector.
    
    :param str input_excel: The name of the excel-file with the dataset
    :param str ark: The name of the sheet with the dataset
    :param str y_navn: The name of the column with the response
    :param int n_features: Number of features to use in the models
    :return: Matrix with the AUC of all classificatons and matrix with the selected 
             features
    '''
    # Reads the excel-file
    xls = pd.ExcelFile(input_excel)
    data_raw_df = pd.read_excel(xls, sheetname=ark, index_col=0)

    # Creates the result-matrix
    results = [[], [], [], [], [], [], [], [], [], []]
    for i in range(0, 10):
        results[i] = np.zeros((13, 4))

    # Splits the respons y and the variables X and sets the random states
    y_name = y_navn
    y = data_raw_df[y_name].values
    X = data_raw_df.drop(y_name, 1)
    colNames = list(X.columns)
    states = [108, 355, 44, 129, 111, 362, 988, 266, 82,
              581]  # Change to wanted seeds
    features = []
    stdsc = StandardScaler()

    # Splits the dataset into the 10*4 folders and selects features and uses the classifers
    for k in range(0, 10):
        i = 0
        state = states[k]
        cv = StratifiedKFold(n_splits=4, random_state=state, shuffle=True)
        for train, test in cv.split(X, y):
            print(k, i)
            X_train = X.iloc[train]
            X_test = X.iloc[test]
            y_train = y[train]
            y_test = y[test]
            X_std_train = stdsc.fit_transform(X_train)
            X_std_test = stdsc.transform(X_test)
            X_std_train, X_std_test, features = relieff(
                X_std_train, X_std_test, y_train, n_features, colNames,
                features)

            model = logrel1(X_std_train, X_std_test, state)
            print('Test score L1-Logistic regression:',
                  model.score(X_std_test, y_test))
            results[k][0, i] = model.score(X_std_test, y_test)
            model = logrel2(X_std_train, X_std_test, state)
            print('Test score L2-Logistic regression:',
                  model.score(X_std_test, y_test))
            results[k][1, i] = model.score(X_std_test, y_test)
            model = rf(X_train, y_train, state)
            print('Test score Random forest:', model.score(X_test, y_test))
            results[k][2, i] = model.score(X_test, y_test)
            model = knn(X_std_train, y_train)
            print('Test score KNN:', model.score(X_std_test, y_test))
            results[k][3, i] = model.score(X_std_test, y_test)
            model = adaboostlog(X_std_train, y_train, state=state)
            print('Test score AdaBoost:', model.score(X_std_test, y_test))
            results[k][4, i] = model.score(X_std_test, y_test)
            model = decisiontree(X_std_train, y_train, state)
            print('Test score Decision Tree:', model.score(X_test, y_test))
            results[k][5, i] = model.score(X_std_test, y_test)
            model = gnb(X_std_train, y_train, state=state)
            print('Test score GNB:', model.score(X_std_test, y_test))
            results[k][6, i] = model.score(X_std_test, y_test)
            model = lda(X_std_train, y_train)
            print('Test score Linear LDA:', model.score(X_std_test, y_test))
            results[k][7, i] = model.score(X_std_test, y_test)
            model = qda(X_std_train, y_train)
            print('Test score QDA:', model.score(X_std_test, y_test))
            results[k][8, i] = model.score(X_std_test, y_test)
            model = nnet(X_std_train, y_train, state=state)
            print('Test score Neural network:',
                  model.score(X_std_test, y_test))
            results[k][9, i] = model.score(X_std_test, y_test)
            model = mars(X_std_train, y_train)
            print('Test score MARS:', model.score(X_std_test, y_test))
            results[k][10, i] = model.score(X_std_test, y_test)
            model = plsr(X_std_train, y_train)
            print('Test score PLSR:', model.score(X_std_test, y_test))
            results[k][11, i] = model.score(X_std_test, y_test)
            model = svc(X_std_train, X_std_test, y_train, y_test, state)
            print('Test score SVC:', model.score(X_std_test, y_test))
            results[k][12, i] = model.score(X_std_test, y_test)
            model = linearsvc(X_std_train, y_train, state)
            print('Test score Linear SVC:', model.score(X_std_test, y_test))
            results[k][13, i] = model.score(X_std_test, y_test)
            i += 1

    return (results, features)
예제 #11
0
from sklearn.neighbors import KNeighborsClassifier
import functions as fs
import numpy as np

train, test = fs.init_data()
trainSet, testSet = fs.data_ready2(train, test)
result = fs.knn(trainSet, testSet, 3000)

#label = np.tile(np.arange(0,10),(300,1))

#knn = KNeighborsClassifier(n_neighbors=10)
#knn.fit(trainSet, label.T.flatten())
#print(testSet)
#result = knn.predict(testSet)
#result = result.reshape(10,100).T
acc, pre, rec, f1 = fs.calcMeasure(result)
#print(acc, pre, rec, f1)
print(acc, end="\n\n")
print(f1)
예제 #12
0
                                                    y,
                                                    test_size=0.2,
                                                    random_state=5)

print('Ridge Linear Regression')

alpha_list = [0, .1, 0.3, .5, 1, 1.2]

functions.ridge_regression(X_train, X_test, y_train, y_test, alpha_list, True,
                           cfg.default.real_estate_figures, 'ridge_reg')

print('KNN')

k_values = [1, 2, 5, 7, 10]

functions.knn(X_train, X_test, y_train, y_test, k_values, True,
              ['uniform', 'distance'], cfg.default.real_estate_figures, 'knn')

print('Decission Tree Regression')

max_depths = [1, 10, 30, 50, 100, 300]
min_weight_fraction_leafs = [.0, .125, .25, .375, .5]
min_samples_leaf = [1, 10, 100, 200]

functions.decision_tree(X_train, X_test, y_train, y_test, max_depths,
                        min_weight_fraction_leafs, min_samples_leaf,
                        cfg.default.real_estate_figures, 'dtree')

print('MLP')

scaler = preprocessing.StandardScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
예제 #13
0
percentage_test = 0.2

validation_methods = ['holdout', 'cross-validation']
baselines = ['stratified', 'uniform']

path = cfg.default.amazon_figures

# %% #%% k-Nearest Neighbor Classification
# k-nn
if True:
    functions.knn(X=training_data_x,
                  y=training_data_y_encoded,
                  test_size=percentage_test,
                  random_state=random_seed,
                  list_k=[1, 2, 5, 8, 9, 10, 11, 12, 15, 20],
                  scaling=True,
                  weights=['uniform', 'distance'],
                  validation_methods=validation_methods,
                  baselines=baselines,
                  path=path,
                  filename='knn')

if True:
    # Plot performance (efficiency and effectiveness)
    functions.plot_evaluation_knn(path, 'knn')
if True:
    # For cross-validation scatter-plot fit time mean and score time
    functions.plot_efficiency_knn(path, 'knn')

if True:
    # For cross-validation scatter-plot accuracy mean and standard deviation
예제 #14
0
#!/usr/bin/python3.6
import json
import numpy as np
import sys
import argparse
from functions import knn, display_winner

parser = argparse.ArgumentParser(description='Implement a k-NN algorithm')
parser.add_argument(
    "-k",
    type=int,
    help="Number of nearest neighbors to look for in choose(n,k)")
parser.add_argument('-train', type=str, help='Train data set path')
parser.add_argument('-test', type=str, help='Test data set path')
args = parser.parse_args()
k = args.k
train = args.train
test = args.test
test = test.replace('\r', '')  #Removes the carriage return cuz I use windows

#Load the data in
with open(train, "r") as read_file:
    train = json.load(read_file)
with open(test, "r") as read_file:
    test = json.load(read_file)

nn = knn(k, train, test)
display_winner(train, test, nn)
예제 #15
0
#%% Data scaling (remove mean and scale to unit variance)
# scaler = preprocessing.StandardScaler().fit(X_train)
# X_train_scaled = scaler.transform(X_train)
# X_test_scaled = scaler.transform(X_test)


validation_methods = ['holdout', 'cross-validation']
baselines=['stratified', 'uniform']

#%% k-Nearest Neighbor Classification
if False:
    list_k = [1, 10, 50, 100, 300, 500]
    weights = ['uniform', 'distance']

    functions.knn(X, y, test_size, random_state, list_k, True,
        weights, validation_methods, baselines,
        cfg.default.occupancy_figures,
        'knn')

if False:
    # Plot performance (efficiency and effectiveness)
    functions.plot_evaluation_knn(cfg.default.occupancy_figures, 'knn')
if False:
    # For cross-validation scatter-plot fit time mean and score time
    functions.plot_efficiency_knn(cfg.default.occupancy_figures, 'knn')

if False:
    # For cross-validation scatter-plot accuracy mean and standard deviation
    functions.plot_accuracy_knn(cfg.default.occupancy_figures, 'knn')
if False:
    # List variants with highest and lowest accuracy values
    path = cfg.default.occupancy_figures
예제 #16
0
def test_mmachine(data_test, *mmachs, n_p=52, fusion='vote'):

    fusion_dict = {'vote': 0, 'prod': 1, 'sum': 2}
    if fusion in fusion_dict:
        print('Fusion scheme is \'' + str(fusion) + '\'')
    else:
        print('Not valid fusion scheme. \n Exiting.')
        return

    rows, cols = data_test.shape
    n_pp_test = int(cols / n_p)
    mmach_size = len(mmachs)
    p_id_pred_mtrx = np.zeros((mmach_size, n_p), dtype=int)
    prec = 0
    eps = np.zeros((mmach_size, cols))
    y_actu, y_pred = np.zeros(cols, dtype=int), np.zeros(cols, dtype=int)

    for j in range(0, cols):
        test = data_test[:, j]

        y_actu[j] = j // n_pp_test

        i = 0
        for mach in mmachs:
            mach_size = len(mach)

            p_id_pred_ar = np.zeros(mach_size, dtype=int)

            for t in range(0, mach_size):
                test_proj = mach[t].w.T.dot((test - mach[t].mu)[:, None])
                indices = knn(test_proj, mach[t].data_train_proj)
                p_id_pred_ar[t] = mode(mach[t].data_id_memory[indices])[0]

            if y_actu[j] != mode(p_id_pred_ar)[0]:
                eps[i, j] += 1

            for k in range(0, n_p):
                k_set = np.transpose(np.argwhere(p_id_pred_ar == k))[0]
                p_id_pred_mtrx[i, k] = k_set.size

            i += 1

        if fusion_dict.get(fusion) == 0:
            y_pred[j] = mode(np.argmax(p_id_pred_mtrx, axis=1))[0]
        elif fusion_dict.get(fusion) == 1:
            y_pred[j] = np.argmax(np.prod(p_id_pred_mtrx, axis=0))
        elif fusion_dict.get(fusion) == 2:
            y_pred[j] = np.argmax(np.sum(p_id_pred_mtrx, axis=0))

        if y_actu[j] == y_pred[j]:
            prec += 1

    prec /= cols
    print('Precision is %.2f%%' % (100 * prec))

    e_mach_avg = np.mean(np.mean(np.square(eps), axis=1))
    e_mmach = np.mean(np.square(np.mean(eps, axis=0)))

    print('The average error of each machine member by acting individually is Emach = %.2f' % e_mach_avg)
    print('The expected error of the whole master machine is Emach+ = %.2f' % e_mmach)
    if e_mmach <= e_mach_avg:
        print('We have Emach+ <= Emach \n -- Success!! Master Machine performs better than individual machines. '
              'Great teamwork!')
    else:
        print('We have Emach+ > Emach \n -- Failure... You need to review your teamwork.')

    conf_mat(y_actu, y_pred)