def run_single_test(data_dir, output_dir): from fit_and_classify import fit_and_classify, extract_hog from glob import glob from numpy import zeros from os.path import basename, join from skimage.io import imread train_dir = join(data_dir, 'train') test_dir = join(data_dir, 'test') def read_gt(gt_dir): fgt = open(join(gt_dir, 'gt.csv')) next(fgt) lines = fgt.readlines() filenames = [] labels = zeros(len(lines)) for i, line in enumerate(lines): filename, label = line.rstrip('\n').split(',') filenames.append(filename) labels[i] = int(label) return filenames, labels def extract_features(path, filenames): hog_length = len(extract_hog(imread(join(path, filenames[0])))) data = zeros((len(filenames), hog_length)) for i in range(0, len(filenames)): filename = join(path, filenames[i]) data[i, :] = extract_hog(imread(filename)) return data train_filenames, train_labels = read_gt(train_dir) test_filenames = [] for path in sorted(glob(join(test_dir, '*png'))): test_filenames.append(basename(path)) print('started train features extraction:', time()) train_features = extract_features(train_dir, train_filenames) print('started test features extraction:', time()) test_features = extract_features(test_dir, test_filenames) print('Started svm training:', time()) y = fit_and_classify(train_features, train_labels, test_features) with open(join(output_dir, 'output.csv'), 'w') as fout: for i, filename in enumerate(test_filenames): print('%s,%d' % (filename, y[i]), file=fout)
labels[i] = int(label) return filenames, labels def extract_features(path, filenames): hog_length = len( extract_hog(imread(join(path, filenames[0]), plugin='matplotlib'))) data = zeros((len(filenames), hog_length)) for i in range(0, len(filenames)): filename = join(path, filenames[i]) data[i, :] = extract_hog(imread(filename, plugin='matplotlib')) return data if len(argv) != 3: print('Usage: %s train_data_path test_data_path' % argv[0]) exit(0) train_data_path = argv[1] test_data_path = argv[2] train_filenames, train_labels = read_gt(train_data_path) test_filenames, test_labels = read_gt(test_data_path) train_features = extract_features(train_data_path, train_filenames) test_features = extract_features(test_data_path, test_filenames) y = fit_and_classify(train_features, train_labels, test_features) print('Accuracy: %.4f' % (sum(test_labels == y) / float(test_labels.shape[0])))
#!/usr/bin/python from sys import argv, stdout, exit from numpy import array, loadtxt import pandas as pd from fit_and_classify import fit_and_classify if len(argv) != 3: stdout.write('Usage: %s train_file test_file\n' % argv[0]) exit(1) train_file = argv[1] test_file = argv[2] train = loadtxt(train_file, delimiter=',', skiprows=1) train_features = train[:, :-1] train_labels = train[:, -1] test = loadtxt(test_file, delimiter=',', skiprows=1) test_features = test[:, :-1] test_labels = test[:, -1] stdout.write('%.4f\n' % (sum(test_labels == fit_and_classify(train_features, train_labels, test_features)) / float(test_labels.shape[0])))
return (filenames, rois, labels) def extract_features(path, filenames, rois): hog_length = len(extract_hog(imread(path + '/' + filenames[0], plugin='matplotlib'), rois[0, :])) print(rois[0, :]) data = zeros((len(filenames), hog_length)) for i in range(0, len(filenames)): filename = path + '/' + filenames[i] data[i, :] = extract_hog(imread(filename, plugin='matplotlib'), rois[i, :]) return data if len(argv) != 3: stdout.write('Usage: %s train_data_path test_data_path\n' % argv[0]) exit(1) train_data_path = argv[1] test_data_path = argv[2] (train_filenames, train_rois, train_labels) = read_gt(train_data_path) (test_filenames, test_rois, test_labels) = read_gt(test_data_path) train_features = extract_features(train_data_path, train_filenames, train_rois) test_features = extract_features(test_data_path, test_filenames, test_rois) y = fit_and_classify(train_features, train_labels, test_features) stdout.write('%.4f\n' % (sum(test_labels == y) / float(test_labels.shape[0])))
def run_single_test(data_dir, output_dir, seed, first): from fit_and_classify import fit_and_classify, extract_hog, extract_data from glob import glob from numpy import zeros from os.path import basename, join from skimage.io import imread train_dir = join(data_dir, 'train') test_dir = join(data_dir, 'test') def read_gt(gt_dir): fgt = open(join(gt_dir, 'gt.csv')) next(fgt) lines = fgt.readlines() filenames = [] labels = zeros(len(lines)) for i, line in enumerate(lines): filename, label = line.rstrip('\n').split(',')[:2] filenames.append(filename) labels[i] = int(label) return filenames, labels HOG_FILENAME = 'train_hog_file_size80_block4_442fixed_10bins.csv' def dump_features(path, filenames): hog_length = len(extract_hog(imread(join(path, filenames[0])))) data = zeros((len(filenames), hog_length)) with open(HOG_FILENAME, mode='w') as hog_file: hog_writer = csv.writer(hog_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hog_writer.writerow(['filename', 'hog_vector']) for i in range(0, len(filenames)): filename = join(path, filenames[i]) data[i, :] = extract_hog(imread(filename)) hog_writer.writerow([filename, ','.join(np.asarray(np.round(data[i], 3), dtype=str))]) if i % 100 == 0: print('{} done'.format(i)) def extract_features(path, filenames): hog_length = len(extract_hog(imread(join(path, filenames[0])))) data = zeros((len(filenames), hog_length)) hog_data = pd.read_csv(HOG_FILENAME) hog_data = hog_data.set_index('filename') for i in range(0, len(filenames)): filename = join(path, filenames[i]) data[i, :] = np.asarray(hog_data.loc[filename].hog_vector.split(','), dtype=float) if i % 5000 == 0: print('{} done'.format(i)) train_data, test_data = extract_data(seed) train_data['filenames'] = 'public_tests/00_test_img_input/train/' + train_data['filenames'] test_data['filenames'] = 'public_tests/00_test_img_input/train/' + test_data['filenames'] train_data = train_data.merge(hog_data, how='inner', left_on='filenames', right_on='filename') test_data = test_data.merge(hog_data, how='inner', left_on='filenames', right_on='filename') return train_data, test_data train_filenames, train_labels = read_gt(train_dir) test_filenames = [] for path in sorted(glob(join(test_dir, '*png'))): test_filenames.append(basename(path)) # train_features = extract_features(train_dir, train_filenames, 'train_hog_file.csv') # test_features = extract_features(test_dir, test_filenames, 'test_hog_file.csv') if seed == first: dump_features(train_dir, train_filenames) train_data, test_data = extract_features(train_dir, train_filenames) train_features = np.stack(train_data['hog_vector'].apply(lambda x: np.asarray(x.split(','), dtype=float))) test_features = np.stack(test_data['hog_vector'].apply(lambda x: np.asarray(x.split(','), dtype=float))) train_labels = np.array(train_data['class_id']) test_labels = np.array(test_data['class_id']) y = fit_and_classify(train_features, train_labels, test_features) print("Length of test: {}".format(len(y))) correct = 0 incorrect = 0 conf_matrix = np.zeros((43, 43), dtype=int) miss = np.zeros((43), dtype=int) total = np.zeros((43), dtype=int) for i in range(len(y)): conf_matrix[y[i], test_labels[i]] += 1 total[test_labels[i]]+=1 if y[i] == test_labels[i]: correct += 1 else: incorrect += 1 miss[test_labels[i]]+=1 # print("Name: {}, expected: {}, actual: {}".format(test_data.filenames[i], test_labels[i], y[i])) print(HOG_FILENAME) print("Correct: {}; Incorrect: {}; Accuracy: {}; Seed: {}".format(correct, incorrect, correct / len(y) * 100, seed)) print("Per class stats:") for i in range(43): conf_matrix[i][i] //= 100 print("Total: {}, missed: {}, miss percentage: {}".format(total[i], miss[i], miss[i] / total[i])) df_cm = pd.DataFrame(conf_matrix, index=[i for i in range(43)], columns=[i for i in range(43)]) plt.figure(figsize=(15, 11)) sn.heatmap(df_cm, annot=True) plt.show()