Beispiel #1
0
def kNN_test(X_train, X_test, y_train, y_test, distance = "euclidean",k=3):
    output_classes = []
    for i in range(0, X_test.shape[0]):
        output = knn.train(X_train, X_test[i],distance,k)
        predictedClass = knn.predict(output, y_train)
        output_classes.append(predictedClass)
    return output_classes
Beispiel #2
0
def train(train_x, train_y):
    ''' this is a function to train all classifiers '''
    tree_start = time.time()
    tree_clf = tree.train(train_x, train_y)
    print('Decision Tree - Training Time: ', round(time.time() - tree_start,
                                                   3), 's')

    svm_start = time.time()
    svm_clf = svm.train(train_x, train_y)
    print('SVM - Training Time: ', round(time.time() - svm_start, 3), 's')

    knn_start = time.time()
    knn_clf = knn.train(train_x, train_y)
    print('k-NN - Training Time: ', round(time.time() - knn_start, 3), 's')

    nn_start = time.time()
    nn_clf = nn.train(train_x, train_y)
    print('Neural Network - Training Time: ', round(time.time() - nn_start, 3),
          's')

    boost_start = time.time()
    boost_clf = boost.train(train_x, train_y)
    print('Boosted Tree - Training Time: ', round(time.time() - boost_start,
                                                  3), 's')

    return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf]
Beispiel #3
0
def train(folds_x, folds_y):
    ''' this is a function to train all classifiers '''
    tree_clf = tree.train(folds_x, folds_y)
    svm_clf = svm.train(folds_x, folds_y)
    knn_clf = knn.train(folds_x, folds_y)
    nn_clf = nn.train(folds_x, folds_y)
    boost_clf = boost.train(folds_x, folds_y)

    return [tree_clf, svm_clf, knn_clf, nn_clf, boost_clf]  #
def main():
    args_parser = build_args_parser()
    args = args_parser.parse_args()

    results_dir_path = 'results'
    raw_data_dir_path = 'data'

    if not os.path.exists(results_dir_path):
        os.makedirs(results_dir_path)

    for file_path in glob.glob(raw_data_dir_path + '/*.csv'):
        file_name = os.path.basename(file_path)
        file_name = file_name.replace(pathlib.Path(file_name).suffix, "")

        df = pd.read_csv(file_path)
        train_sample, test_sample = train_test_split(df, test_size=0.2)

        model = None

        if args.option == 1:
            model = nb.train(train_sample)
        if args.option == 2:
            model = knn.train(train_sample)

        if model is not None:
            predicted = model.predict(test_sample['conteudo'])
            precision = np.mean(predicted == test_sample['saida'])

            from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

            file = open(results_dir_path + "/" + file_name + "_classification.txt", "w")
            file.write("Quantidade de entradas para treino: " + str(len(train_sample.index)) + "\n")
            file.write("Quantidade de entradas para teste: " + str(len(test_sample.index)) + "\n")
            file.write("Precisão: " + str(precision) + "\n")
            file.write(str(confusion_matrix(test_sample['saida'], predicted)))
            file.write(str(classification_report(test_sample['saida'], predicted)))
            file.write(str(accuracy_score(test_sample['saida'], predicted)))
            # file.write(str("Recall: %0.2f (+/- %0.2f)" % (scores['test_recall_macro'] .mean(), scores['test_recall_macro'] .std() * 2)))
            file.close()
Beispiel #5
0
from knn import train

if __name__ == "__main__":
    print("Training KNN classifier...")
    classifier = train("data/train",
                       model_save_path="data/model/knn.clf",
                       n_neighbors=4)
    # TODO: set n_neighbors dynamicaly to the number of people
    print("Training complete!")

    # PREDICTION
    # for image_file in os.listdir("data/train/biden"):
    #     full_file_path = os.path.join("data/train/biden", image_file)
    #     if 'DS_Store' in full_file_path:
    #         continue
    #     print("Looking for faces in {}".format(image_file))

    #     # Find all people in the image using a trained classifier model
    #     predictions = predict(
    #         full_file_path, model_path="more_class.clf")

    #     # Print results on the console
    #     for name, (top, right, bottom, left) in predictions:
    #         print("- Found {} at ({}, {})".format(name, left, top))

    #     Display results overlaid on an image
    #     show_prediction_labels_on_image(os.path.join(
    #         "data/train/phu", image_file), predictions)
Beispiel #6
0
    return LabeledSample(old_sample.label, new_features)


def linear_project(dataset, d):
    n_features = len(dataset.training_set[0].features)

    new_basis = []
    for i in range(d):
        new_basis.append(unit_normal(n_features))

    new_training = list(
        transform_sample(s, new_basis) for s in dataset.training_set)
    new_validation = list(
        transform_sample(s, new_basis) for s in dataset.validation_set)
    new_testing = list(
        transform_sample(s, new_basis) for s in dataset.testing_set)
    return NumericalDataset(new_training, new_validation, new_testing,
                            DatasetNumericalMetadata(d))


if __name__ == '__main__':
    # here, k is the parameter in the kNN classifier,
    # and *d* is the parameter for reducing the dimensionality of the dataset.

    dataset = pickle.load(open(sys.argv[1], "rb")).convert_to_numerical()
    k = int(sys.argv[2])
    d = int(sys.argv[3])
    dataset = linear_project(dataset, d)
    model = knn.train(dataset.training_set, k)
    evaluate_model(model, dataset)
Beispiel #7
0
    # sys.stdout = open('sysout.txt', 'w')

    # Parsing the input file and creating the image objects
    image_list = list()
    with open(file_name, 'r') as t_file:
        for line in t_file:
            image_list.append(Image(line))

    print 'Start time', time()

    if model == 'best':
        model = 'nnet'

    # K-Nearest neighbors
    if model == 'nearest' and phase == 'train':
        model = knn.train(image_list)
        serialize_to_file(model, model_file)
    elif model == 'nearest' and phase == 'test':
        model = deserialize_from_file(model_file)
        knn.test(image_list, model)

    # ADA boost
    elif model == "adaboost" and phase == "train":
        params = Adaboost(image_list).adaboost()
        serialize_to_file(params, model_file)
    elif model == "adaboost" and phase == "test":
        params = deserialize_from_file(model_file)
        Adaboost(image_list).adaboost_test(image_list, params)

    # Neural net
    elif model == 'nnet' and phase == 'train':
Beispiel #8
0
import numpy

DATASET_PATH = 'datasets/iris.data'

# 读取数据集
dataset = []
with open(DATASET_PATH, 'r') as file:
    for line in file:
        line = line.strip().split(',')
        sample = [float(line[i]) for i in range(len(line) - 1)]
        sample.append(line[-1])
        dataset.append(sample)
# dataset = numpy.loadtxt(DATASET_PATH)
# 特征值规范化
scaler = preprocess.MinMaxScaler(dataset)
dataset = scaler.scale(dataset)

# bestK, error = knn.train(dataset, True)
# print '最优k:%d' % bestK
# print '平均误差:%f' % error
bestK, accuracy = knn.train(dataset, False)
print '最优k:%d' % bestK
print '平均精度:%f%%' % (accuracy * 100)

# 使用knn训练(找出最优k)
# 使用knn预测(测试集自己编的)
# testset = [[800, 0, 0.3048, 71.3, 0.00266337]]
# testset = scaler.scale(testset)
# classList = knn.classify(dataset, testset, bestK, True)
# print '预测类别:%s' % classList[0]
Beispiel #9
0
            avg_acc_lvq1 = 0
            avg_acc_lvq2 = 0
            avg_acc_lvq3 = 0
            avg_acc_knn = 0
            for i in range(len(folded_dataset)):
                test_set = folded_dataset[i]
                training_set = []
                for j in range(len(folded_dataset)):
                    if j != i:
                        training_set += folded_dataset[j]

                lvq1_proto = lvq.lvq1(training_set, proto_num, .01)
                lvq2_proto = lvq.lvq2(lvq1_proto, training_set, .01)
                lvq3_proto = lvq.lvq3(lvq1_proto, training_set, .01)

                avg_acc_lvq1 += knn.train(lvq1_proto, test_set, k)
                avg_acc_lvq2 += knn.train(lvq2_proto, test_set, k)
                avg_acc_lvq3 += knn.train(lvq3_proto, test_set, k)
                avg_acc_knn += knn.train(training_set, test_set, k)

            avg_acc_lvq1 = 100*avg_acc_lvq1/len(folded_dataset)
            avg_acc_lvq2 = 100*avg_acc_lvq2/len(folded_dataset)
            avg_acc_lvq3 = 100*avg_acc_lvq3/len(folded_dataset)
            avg_acc_knn = 100*avg_acc_knn/len(folded_dataset)

            print("""
DATASET {} - prototype number: {}
lvq1: {:2.2f}%
lvq2: {:2.2f}%
lvq3: {:2.2f}%
{}-nn: {:2.2f}%""".format(c, proto_num, avg_acc_lvq1, avg_acc_lvq2, avg_acc_lvq3, k, avg_acc_knn))
Beispiel #10
0
import knn
import naiveBayes
import sys
import os

trainingSet,lexicon = knn.train()
naiveBayes.trainNaiveBayes("train_formatted.txt")
k = 5
while(1):
    test_in = raw_input("Input: ")
    if test_in == "q":
        exit()    
    
    #baseline
    pos = 0
    neg = 0    
    words = test_in.split(" ")
    for word in words:
        try:
            if lexicon[word.lower()] == 1:
                pos += 1
            elif lexicon[word.lower()] ==0:
                neg += 1
        except KeyError:
            pass
    
    if pos > neg:
       pass
       #print("Baseline: positive")
    elif pos < neg:
        pass
Beispiel #11
0
            print("output file has not been generated")
        
        if myBoost.isTrained:
            Xtest,yTest,XtestID = myBoost.getDataFromFile(train_test_file)
            finalPredictions = myBoost.predict(Xtest)
            myBoost.writeToFile(XtestID,finalPredictions,'output.txt')
            print("Accuracy is: " ,sum(finalPredictions==yTest)/len(yTest))
        else:
            print("Untrained model being tested")

#train train-data.txt knn_model.txt knn
#test test-data.txt knn_model.txt knn
if model == 'knn' :
    
    if trainOrTest == 'train':
        knn.train(train_test_file,model_file)
        
    if trainOrTest == 'test':
        try:
            myKnn = open(model_file,'rb')
        except:
            print("output file has not been generated")
        
        finalPredictions,yTest,XtestID= knn.test(48,model_file ,train_test_file) 
        knn.writeToFile(XtestID,finalPredictions,'output.txt')
        print("Accuracy is: " ,knn.accuracy(finalPredictions,yTest))

    
    
    
    
Beispiel #12
0
# 201420907_homework1 main.py
import knn
import numpy as np


train_data = np.loadtxt(fname='./digits_data/digits_train.csv', delimiter=',', dtype='float64')   # Training Set load
X_train, Y_train = knn.train(train_data)  # KNN don't need training step. So, knn.train(data) just load train_data.

test_data = np.loadtxt(fname='./digits_data/digits_test.csv', delimiter=',', dtype='float64')     # Test Set load
X_test = np.array(test_data[:, 1:], dtype='float64')  # Features of Test set data
Y_test = np.array(test_data[:, 0], dtype='int64')  # Labels of Test set data

# knn.predict(train_x, train_y, test_x, k)
knn_Y_pred = knn.predict(X_train, Y_train, X_test, 1)  # k means n of neighbors

# Calculation of TP, TN, FP, FN
TP = np.zeros(10)  # True Positive (Correct Predicted target class)
TN = np.zeros(10)  # True Negative (Correct Predicted not-target class)
FP = np.zeros(10)  # False Positive (Incorrect Predicted target class)
FN = np.zeros(10)  # False Negative (Incorrect Predicted  non-target class)

Confusion_matrix = np.zeros((10, 10), dtype='int64')

for idx in range(200):
    Confusion_matrix[Y_test[idx]][knn_Y_pred[idx]] += 1

for i in range(10):
    TP[i] = Confusion_matrix[i][i]

    for j in range(10):
        if i != j:
Beispiel #13
0
        └── ...
'''

if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("usage: python3 %s <algorithm> <train_data_dir> [model_name]" %
              sys.argv[0])
        sys.exit(2)

    face_algorithm = sys.argv[1]

    if face_algorithm not in ALGORITHM.keys():
        print('Algorithm not found!')
        sys.exit(2)

    train_data_dir = sys.argv[2]

    if len(sys.argv) > 3:
        model_name = sys.argv[3]
    else:
        model_name = 'trained_knn_model'

    # Train the KNN classifier and save it to disk
    print("Training KNN classifier...")
    classifier = knn.train(train_data_dir,
                           model_save_path=model_name +
                           ALGORITHM[face_algorithm]['ext'],
                           n_neighbors=2,
                           face_algorithm=face_algorithm)
    print("Training complete!")