예제 #1
0
def import_files(dataset_name):
    """
    This function is used to import both the training and the testing datasets
    :param dataset_name: The name of the dataset
    :return: The dictionaries for each users in the dataset
    """
    try:
        string_train = import_data.import_train_data(dataset_name)
        data_dict_train = convert_data_to_array.convert_to_array(string_train)
    except:
        print "You have given wrong dataset name or the dataset is in wrong directory, please check"
        exit(1)
    string_train = import_data.import_test_data(dataset_name)
    data_dict_test = convert_data_to_array.convert_to_array(string_train)
    return data_dict_test, data_dict_train
예제 #2
0
import numpy as np
from sklearn import svm
from import_data import import_train_data, import_test_data
from feature_map import feature_map_part1_1

train_raw_digit_dataset = import_train_data("digitdata/trainingimages",
                                            "digitdata/traininglabels")
test_raw_digit_dataset = import_test_data("digitdata/testimages",
                                          "digitdata/testlabels")


def run_part1_digit_ec4():
    print("Importing data...")
    (_, traindata, _,
     trainlabel) = feature_map_part1_1(train_raw_digit_dataset, {
         ' ': 0,
         '#': 1,
         '+': 1
     }, (28, 28), 10)
    (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
        '+': 1
    }, (28, 28), 10)
    clf = svm.SVC()

    print("Training...")
    clf.fit(traindata, trainlabel)
    print("Testing")
    preds_test = clf.predict(testdata)
예제 #3
0
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
from import_data import import_train_data, import_test_data
from naive_bayes import train, test, evaluation, output_result, run

from feature_map import feature_map_part2_2

train_audio_dataset = import_train_data("mfccdata/training_data.txt",
                                        "mfccdata/training_labels.txt")
test_audio_dataset = import_test_data("mfccdata/testing_data.txt",
                                      "mfccdata/testing_labels.txt")


def run_part22_audio(k):
    print("================MFC - BINARY FEATURE================")
    print("Importing data...")

    train_dataset = feature_map_part2_2(train_audio_dataset, {
        ' ': 1,
        '%': 0
    }, (30, 13), 5)
    test_dataset = feature_map_part2_2(test_audio_dataset, {
        ' ': 1,
        '%': 0
    }, (30, 13), 5)

    run(k, train_dataset, test_dataset)
    print("===================================================\n")

예제 #4
0
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
from import_data import import_train_data, import_test_data
from naive_bayes import train, test, evaluation, output_result, run
from feature_map import feature_map_part1_1, feature_map_part1_2

train_raw_digit_dataset = import_train_data("digitdata/trainingimages",
                                            "digitdata/traininglabels")
test_raw_digit_dataset = import_test_data("digitdata/testimages",
                                          "digitdata/testlabels")
train_raw_face_dataset = import_train_data("facedata/facedatatrain",
                                           "facedata/facedatatrainlabels")
test_raw_face_dataset = import_test_data("facedata/facedatatest",
                                         "facedata/facedatatestlabels")


# Returns the n largest indices from a numpy array
def largest_indices(ary, n):
    flat = ary.flatten()
    indices = np.argpartition(flat, -n)[-n:]
    indices = indices[np.argsort(-flat[indices])]
    return np.unravel_index(indices, ary.shape)


def run_part1_digit_1(k):
    print("================DIGIT - BINARY FEATURE================")
    print("Importing data...")
    train_dataset = feature_map_part1_1(train_raw_digit_dataset, {
        ' ': 0,
        '#': 1,
예제 #5
0
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import ticker
from import_data import import_train_data, import_test_data
from naive_bayes import train, test, evaluation, output_result, run
from feature_map import feature_map_part2_extra3
import itertools

(train_yes_data,
 train_yes_label) = import_train_data("audiodata/yes_train.txt",
                                      "audiodata/yes_train_label.txt")
(test_yes_data,
 test_yes_label) = import_test_data("audiodata/yes_test.txt",
                                    "audiodata/yes_test_label.txt")
(train_no_data,
 train_no_label) = import_train_data("audiodata/no_train.txt",
                                     "audiodata/no_train_label.txt")
(test_no_data,
 test_no_label) = import_test_data("audiodata/no_test.txt",
                                   "audiodata/no_test_label.txt")


def run_part2_extra3(k):
    print("================AUDIO - AVERAGE FEATURE================")
    print("Importing data...")

    train_audio_dataset = (np.concatenate((train_yes_data, train_no_data)),
                           np.concatenate((train_yes_label, train_no_label)))
    test_audio_dataset = (np.concatenate((test_yes_data, test_no_data)),
                          np.concatenate((test_yes_label, test_no_label)))