Ejemplo n.º 1
0
def run_single_test(data_dir, output_dir):
    from fit_and_classify import fit_and_classify, extract_hog
    from glob import glob
    from numpy import zeros
    from os.path import basename, join
    from skimage.io import imread

    train_dir = join(data_dir, 'train')
    test_dir = join(data_dir, 'test')

    def read_gt(gt_dir):
        fgt = open(join(gt_dir, 'gt.csv'))
        next(fgt)
        lines = fgt.readlines()

        filenames = []
        labels = zeros(len(lines))
        for i, line in enumerate(lines):
            filename, label = line.rstrip('\n').split(',')
            filenames.append(filename)
            labels[i] = int(label)

        return filenames, labels

    def extract_features(path, filenames):
        hog_length = len(extract_hog(imread(join(path, filenames[0]))))
        data = zeros((len(filenames), hog_length))
        for i in range(0, len(filenames)):
            filename = join(path, filenames[i])
            data[i, :] = extract_hog(imread(filename))
        return data

    train_filenames, train_labels = read_gt(train_dir)
    test_filenames = []
    for path in sorted(glob(join(test_dir, '*png'))):
        test_filenames.append(basename(path))

    print('started train features extraction:', time())
    train_features = extract_features(train_dir, train_filenames)
    print('started test features extraction:', time())
    test_features = extract_features(test_dir, test_filenames)

    print('Started svm training:', time())
    y = fit_and_classify(train_features, train_labels, test_features)

    with open(join(output_dir, 'output.csv'), 'w') as fout:
        for i, filename in enumerate(test_filenames):
            print('%s,%d' % (filename, y[i]), file=fout)
Ejemplo n.º 2
0
        labels[i] = int(label)

    return filenames, labels


def extract_features(path, filenames):
    hog_length = len(
        extract_hog(imread(join(path, filenames[0]), plugin='matplotlib')))
    data = zeros((len(filenames), hog_length))
    for i in range(0, len(filenames)):
        filename = join(path, filenames[i])
        data[i, :] = extract_hog(imread(filename, plugin='matplotlib'))
    return data


if len(argv) != 3:
    print('Usage: %s train_data_path test_data_path' % argv[0])
    exit(0)

train_data_path = argv[1]
test_data_path = argv[2]

train_filenames, train_labels = read_gt(train_data_path)
test_filenames, test_labels = read_gt(test_data_path)

train_features = extract_features(train_data_path, train_filenames)
test_features = extract_features(test_data_path, test_filenames)

y = fit_and_classify(train_features, train_labels, test_features)
print('Accuracy: %.4f' % (sum(test_labels == y) / float(test_labels.shape[0])))
Ejemplo n.º 3
0
#!/usr/bin/python

from sys import argv, stdout, exit
from numpy import array, loadtxt
import pandas as pd
from fit_and_classify import fit_and_classify

if len(argv) != 3:
    stdout.write('Usage: %s train_file test_file\n' % argv[0])
    exit(1)

train_file = argv[1]
test_file = argv[2]

train = loadtxt(train_file, delimiter=',', skiprows=1)
train_features = train[:, :-1]
train_labels = train[:, -1]

test = loadtxt(test_file, delimiter=',', skiprows=1)
test_features = test[:, :-1]
test_labels = test[:, -1]

stdout.write('%.4f\n' % (sum(test_labels == fit_and_classify(train_features, train_labels, test_features)) / float(test_labels.shape[0])))
    return (filenames, rois, labels)


def extract_features(path, filenames, rois):
    hog_length = len(extract_hog(imread(path + '/' + filenames[0], plugin='matplotlib'), rois[0, :]))
    print(rois[0, :])
    data = zeros((len(filenames), hog_length))
    for i in range(0, len(filenames)):
        filename = path + '/' + filenames[i]
        data[i, :] = extract_hog(imread(filename, plugin='matplotlib'), rois[i, :])
    return data

if len(argv) != 3:
    stdout.write('Usage: %s train_data_path test_data_path\n' % argv[0])
    exit(1)

train_data_path = argv[1]
test_data_path = argv[2]

(train_filenames, train_rois, train_labels) = read_gt(train_data_path)
(test_filenames, test_rois, test_labels) = read_gt(test_data_path)

train_features = extract_features(train_data_path, train_filenames, train_rois)
test_features = extract_features(test_data_path, test_filenames, test_rois)

y = fit_and_classify(train_features, train_labels, test_features)
stdout.write('%.4f\n' % (sum(test_labels == y) / float(test_labels.shape[0])))


Ejemplo n.º 5
0
def run_single_test(data_dir, output_dir, seed, first):
    from fit_and_classify import fit_and_classify, extract_hog, extract_data
    from glob import glob
    from numpy import zeros
    from os.path import basename, join
    from skimage.io import imread

    train_dir = join(data_dir, 'train')
    test_dir = join(data_dir, 'test')

    def read_gt(gt_dir):
        fgt = open(join(gt_dir, 'gt.csv'))
        next(fgt)
        lines = fgt.readlines()

        filenames = []
        labels = zeros(len(lines))
        for i, line in enumerate(lines):
            filename, label = line.rstrip('\n').split(',')[:2]
            filenames.append(filename)
            labels[i] = int(label)

        return filenames, labels

    HOG_FILENAME = 'train_hog_file_size80_block4_442fixed_10bins.csv'

    def dump_features(path, filenames):
        hog_length = len(extract_hog(imread(join(path, filenames[0]))))
        data = zeros((len(filenames), hog_length))
        with open(HOG_FILENAME, mode='w') as hog_file:
            hog_writer = csv.writer(hog_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
            hog_writer.writerow(['filename', 'hog_vector'])
            for i in range(0, len(filenames)):
                filename = join(path, filenames[i])
                data[i, :] = extract_hog(imread(filename))
                hog_writer.writerow([filename, ','.join(np.asarray(np.round(data[i], 3), dtype=str))])
                if i % 100 == 0:
                    print('{} done'.format(i))

    def extract_features(path, filenames):
        hog_length = len(extract_hog(imread(join(path, filenames[0]))))
        data = zeros((len(filenames), hog_length))
        hog_data = pd.read_csv(HOG_FILENAME)
        hog_data = hog_data.set_index('filename')
        for i in range(0, len(filenames)):
            filename = join(path, filenames[i])
            data[i, :] = np.asarray(hog_data.loc[filename].hog_vector.split(','), dtype=float)
            if i % 5000 == 0:
                print('{} done'.format(i))

        train_data, test_data = extract_data(seed)

        train_data['filenames'] = 'public_tests/00_test_img_input/train/' + train_data['filenames']
        test_data['filenames'] = 'public_tests/00_test_img_input/train/' + test_data['filenames']
        train_data = train_data.merge(hog_data, how='inner', left_on='filenames', right_on='filename')
        test_data = test_data.merge(hog_data, how='inner', left_on='filenames', right_on='filename')
        return train_data, test_data

    train_filenames, train_labels = read_gt(train_dir)
    test_filenames = []
    for path in sorted(glob(join(test_dir, '*png'))):
        test_filenames.append(basename(path))

    # train_features = extract_features(train_dir, train_filenames, 'train_hog_file.csv')
    # test_features = extract_features(test_dir, test_filenames, 'test_hog_file.csv')

    if seed == first:
        dump_features(train_dir, train_filenames)
    train_data, test_data = extract_features(train_dir, train_filenames)
    train_features = np.stack(train_data['hog_vector'].apply(lambda x: np.asarray(x.split(','), dtype=float)))
    test_features = np.stack(test_data['hog_vector'].apply(lambda x: np.asarray(x.split(','), dtype=float)))
    train_labels = np.array(train_data['class_id'])
    test_labels = np.array(test_data['class_id'])

    y = fit_and_classify(train_features, train_labels, test_features)

    print("Length of test: {}".format(len(y)))
    correct = 0
    incorrect = 0
    conf_matrix = np.zeros((43, 43), dtype=int)
    miss = np.zeros((43), dtype=int)
    total = np.zeros((43), dtype=int)
    for i in range(len(y)):
        conf_matrix[y[i], test_labels[i]] += 1
        total[test_labels[i]]+=1
        if y[i] == test_labels[i]:
            correct += 1
        else:
            incorrect += 1
            miss[test_labels[i]]+=1
            # print("Name: {}, expected: {}, actual: {}".format(test_data.filenames[i], test_labels[i], y[i]))

    print(HOG_FILENAME)
    print("Correct: {}; Incorrect: {}; Accuracy: {}; Seed: {}".format(correct, incorrect, correct / len(y) * 100, seed))
    print("Per class stats:")
    for i in range(43):
        conf_matrix[i][i] //= 100
        print("Total: {}, missed: {}, miss percentage: {}".format(total[i], miss[i], miss[i] / total[i]))

    df_cm = pd.DataFrame(conf_matrix, index=[i for i in range(43)], columns=[i for i in range(43)])
    plt.figure(figsize=(15, 11))
    sn.heatmap(df_cm, annot=True)
    plt.show()