def import_files(dataset_name): """ This function is used to import both the training and the testing datasets :param dataset_name: The name of the dataset :return: The dictionaries for each users in the dataset """ try: string_train = import_data.import_train_data(dataset_name) data_dict_train = convert_data_to_array.convert_to_array(string_train) except: print "You have given wrong dataset name or the dataset is in wrong directory, please check" exit(1) string_train = import_data.import_test_data(dataset_name) data_dict_test = convert_data_to_array.convert_to_array(string_train) return data_dict_test, data_dict_train
import numpy as np from sklearn import svm from import_data import import_train_data, import_test_data from feature_map import feature_map_part1_1 train_raw_digit_dataset = import_train_data("digitdata/trainingimages", "digitdata/traininglabels") test_raw_digit_dataset = import_test_data("digitdata/testimages", "digitdata/testlabels") def run_part1_digit_ec4(): print("Importing data...") (_, traindata, _, trainlabel) = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) (_, testdata, _, testlabel) = feature_map_part1_1(test_raw_digit_dataset, { ' ': 0, '#': 1, '+': 1 }, (28, 28), 10) clf = svm.SVC() print("Training...") clf.fit(traindata, trainlabel) print("Testing") preds_test = clf.predict(testdata)
import numpy as np import matplotlib.pyplot as plt from matplotlib import ticker from import_data import import_train_data, import_test_data from naive_bayes import train, test, evaluation, output_result, run from feature_map import feature_map_part2_2 train_audio_dataset = import_train_data("mfccdata/training_data.txt", "mfccdata/training_labels.txt") test_audio_dataset = import_test_data("mfccdata/testing_data.txt", "mfccdata/testing_labels.txt") def run_part22_audio(k): print("================MFC - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part2_2(train_audio_dataset, { ' ': 1, '%': 0 }, (30, 13), 5) test_dataset = feature_map_part2_2(test_audio_dataset, { ' ': 1, '%': 0 }, (30, 13), 5) run(k, train_dataset, test_dataset) print("===================================================\n")
import numpy as np import matplotlib.pyplot as plt from matplotlib import ticker from import_data import import_train_data, import_test_data from naive_bayes import train, test, evaluation, output_result, run from feature_map import feature_map_part1_1, feature_map_part1_2 train_raw_digit_dataset = import_train_data("digitdata/trainingimages", "digitdata/traininglabels") test_raw_digit_dataset = import_test_data("digitdata/testimages", "digitdata/testlabels") train_raw_face_dataset = import_train_data("facedata/facedatatrain", "facedata/facedatatrainlabels") test_raw_face_dataset = import_test_data("facedata/facedatatest", "facedata/facedatatestlabels") # Returns the n largest indices from a numpy array def largest_indices(ary, n): flat = ary.flatten() indices = np.argpartition(flat, -n)[-n:] indices = indices[np.argsort(-flat[indices])] return np.unravel_index(indices, ary.shape) def run_part1_digit_1(k): print("================DIGIT - BINARY FEATURE================") print("Importing data...") train_dataset = feature_map_part1_1(train_raw_digit_dataset, { ' ': 0, '#': 1,
import numpy as np import matplotlib.pyplot as plt from matplotlib import ticker from import_data import import_train_data, import_test_data from naive_bayes import train, test, evaluation, output_result, run from feature_map import feature_map_part2_extra3 import itertools (train_yes_data, train_yes_label) = import_train_data("audiodata/yes_train.txt", "audiodata/yes_train_label.txt") (test_yes_data, test_yes_label) = import_test_data("audiodata/yes_test.txt", "audiodata/yes_test_label.txt") (train_no_data, train_no_label) = import_train_data("audiodata/no_train.txt", "audiodata/no_train_label.txt") (test_no_data, test_no_label) = import_test_data("audiodata/no_test.txt", "audiodata/no_test_label.txt") def run_part2_extra3(k): print("================AUDIO - AVERAGE FEATURE================") print("Importing data...") train_audio_dataset = (np.concatenate((train_yes_data, train_no_data)), np.concatenate((train_yes_label, train_no_label))) test_audio_dataset = (np.concatenate((test_yes_data, test_no_data)), np.concatenate((test_yes_label, test_no_label)))