def run():
    with h5py.File('HWDB1.1.hdf5', 'w') as f:
        for name, size, dirpath in [('trn', TRN_SIZE, trn_dirpath), ('tst', TST_SIZE, tst_dirpath)]:
            print('Converting \'%s\'...' % name)

            grp = f.create_group(name)
            dset_bitmap  = grp.create_dataset('bitmap',  (size, 64, 64, 1), dtype='uint8')
            dset_tagcode = grp.create_dataset('tagcode', (size, 1),         dtype='uint16')
            sample_num = 0

            for i, (bitmap, tagcode) in enumerate(utils.read_gnt_in_directory(dirpath)):
                dset_bitmap[i]  = utils.normalize_bitmap(bitmap)
                dset_tagcode[i] = tagcode
                sample_num += 1
            print("Sample Number: {0}".format(sample_num))
#!/usr/bin/env python
# This script can be used to see how bitmaps will be preprocessed before
# subsetting the CASIA HWDB1.1 data set
import sys

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np

import utils

if len(sys.argv) != 2:
    print 'Usage: %s gnt_dirpath' % sys.argv[0]
    exit()

gnt_dirpath = sys.argv[1]

for i, (bitmap,
        tagcode) in enumerate(utils.read_gnt_in_directory(gnt_dirpath)):
    print utils.tagcode_to_unicode(tagcode).encode(
        'utf-8')  # wrong terminal encoding = garbage

    proc_bitmap = utils.normalize_bitmap(bitmap)
    proc_bitmap = utils.preprocess_bitmap(proc_bitmap)

    plt.subplot(121)
    plt.imshow(bitmap, cmap=cm.Greys_r)
    plt.subplot(122)
    plt.imshow(np.squeeze(proc_bitmap, axis=0), cmap=cm.Greys_r)
    plt.show()
Esempio n. 3
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# This script counts the characters of the CASIA HWDB1.1 data set
import sys
from collections import Counter, defaultdict

import utils

if len(sys.argv) != 3:
    print('Usage: %s trn_dirpath tst_dirpath' % sys.argv[0])
    exit()

trn_dirpath = sys.argv[1]
tst_dirpath = sys.argv[2]
frequencies = defaultdict(Counter)

for bitmap, tagcode in utils.read_gnt_in_directory(trn_dirpath):
    tagcode_unicode = utils.tagcode_to_unicode(tagcode)
    frequencies[tagcode_unicode].update(trn=1)
for bitmap, tagcode in utils.read_gnt_in_directory(tst_dirpath):
    tagcode_unicode = utils.tagcode_to_unicode(tagcode)
    frequencies[tagcode_unicode].update(tst=1)

with open('frequencies.txt', 'w') as f:
    for k, v in sorted(frequencies.items(),
                       key=lambda k_v: k_v[1]['trn'],
                       reverse=True):
        f.write('%s: %d, %d\n' % (k.encode('utf-8'), v['trn'], v['tst']))
#!/usr/bin/env python2
# This script can be used to see how bitmaps will be preprocessed before
# subsetting the CASIA HWDB1.1 data set
import sys

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np

import utils

if len(sys.argv) != 2:
    print 'Usage: %s gnt_dirpath' % sys.argv[0]
    exit()

gnt_dirpath = sys.argv[1]

for i, (bitmap, tagcode) in enumerate(utils.read_gnt_in_directory(gnt_dirpath)):
    print utils.tagcode_to_unicode(tagcode).encode('utf-8')  # wrong terminal encoding = garbage

    proc_bitmap = utils.normalize_bitmap(bitmap)
    proc_bitmap = utils.preprocess_bitmap(proc_bitmap)

    plt.subplot(121)
    plt.imshow(bitmap, cmap=cm.Greys_r)
    plt.subplot(122)
    plt.imshow(np.squeeze(proc_bitmap, axis=0), cmap=cm.Greys_r)
    plt.show()
Esempio n. 5
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys

import h5py

import utils

if len(sys.argv) != 3:
    print('Usage: %s trn_dirpath tst_dirpath' % sys.argv[0])
    sys.exit(1)

trn_dirpath = sys.argv[1]
tst_dirpath = sys.argv[2]

with h5py.File('HWDB1.1.hdf5', 'w') as f:
    for name, size, dirpath in [('trn', 897758, trn_dirpath), ('tst', 223991, tst_dirpath)]:
        print('Converting \'%s\'...' % name)

        grp = f.create_group(name)
        dset_bitmap  = grp.create_dataset('bitmap',  (size, 1, 64, 64), dtype='uint8')
        dset_tagcode = grp.create_dataset('tagcode', (size, 1),         dtype='uint16')

        for i, (bitmap, tagcode) in enumerate(utils.read_gnt_in_directory(dirpath)):
            dset_bitmap[i]  = utils.normalize_bitmap(bitmap)
            dset_tagcode[i] = tagcode
Esempio n. 6
0
import sys

import h5py

import utils

if len(sys.argv) != 3:
    print('Usage: %s trn_dirpath tst_dirpath' % sys.argv[0])
    sys.exit(1)

trn_dirpath = sys.argv[1]
tst_dirpath = sys.argv[2]

with h5py.File('HWDB1.1.hdf5', 'w') as f:
    for name, size, dirpath in [('trn', 897758, trn_dirpath),
                                ('tst', 223991, tst_dirpath)]:
        print('Converting \'%s\'...' % name)

        grp = f.create_group(name)
        dset_bitmap = grp.create_dataset('bitmap', (size, 1, 64, 64),
                                         dtype='uint8')
        dset_tagcode = grp.create_dataset('tagcode', (size, 1), dtype='uint16')
        dset_writercode = grp.create_dataset('writercode', (size, 1),
                                             dtype='uint16')

        for i, (bitmap, tagcode,
                writercode) in enumerate(utils.read_gnt_in_directory(dirpath)):
            dset_bitmap[i] = utils.normalize_bitmap(bitmap)
            dset_tagcode[i] = tagcode
            dset_writercode[i] = writercode
#!/usr/bin/env python2
# This script counts the characters of the CASIA HWDB1.1 data set
import sys
from collections import Counter, defaultdict

import utils

if len(sys.argv) != 3:
    print 'Usage: %s trn_dirpath tst_dirpath' % sys.argv[0]
    exit()

trn_dirpath = sys.argv[1]
tst_dirpath = sys.argv[2]
frequencies = defaultdict(Counter)

for bitmap, tagcode in utils.read_gnt_in_directory(trn_dirpath):
    tagcode_unicode = utils.tagcode_to_unicode(tagcode)
    frequencies[tagcode_unicode].update(trn=1)
for bitmap, tagcode in utils.read_gnt_in_directory(tst_dirpath):
    tagcode_unicode = utils.tagcode_to_unicode(tagcode)
    frequencies[tagcode_unicode].update(tst=1)

with open('frequencies.txt', 'w') as f:
    for k, v in sorted(frequencies.iteritems(), key=lambda (k, v): v['trn'], reverse=True):
        f.write('%s: %d, %d\n' % (k.encode('utf-8'), v['trn'], v['tst']))