Esempio n. 1
0
def run_model(
    hdf5_file,
    num_authors,
    num_forms_per_author,
    shingle_dim,
    num_iters=10,
    batch_size=32,
    use_form=False,
    lr=0.03,
    loadweights=None,
):
    # Create Keras model
    model = create_model(num_authors, shingle_dim, lr)
    if loadweights:
        model.load_weights(loadweights)

    # Create a mini_batcher for
    iam_m = IAM_MiniBatcher(
        hdf5_file,
        num_authors,
        num_forms_per_author,
        shingle_dim=shingle_dim,
        use_form=use_form,
        default_mode=MiniBatcher.TRAIN,
        batch_size=batch_size,
    )

    # Get validation dataset
    [X_test, Y_test] = iam_m.get_test_batch(batch_size * 20)
    X_test = np.expand_dims(X_test, 1)
    Y_test = to_categorical(Y_test, num_authors)
    print "test_size:", X_test.shape, Y_test.shape

    for i in range(num_iters):
        print "Starting Epoch: ", i
        start_time = time.time()
        # Get training batch
        (X_train, Y_train) = iam_m.get_train_batch(batch_size * 100)
        X_train = np.expand_dims(X_train, 1)
        Y_train = to_categorical(Y_train, num_authors)

        # TODO: Maybe we should only validate every N iters since right now we are doing 20% extra work every iter
        model.fit(
            X_train,
            Y_train,
            batch_size=batch_size,
            nb_epoch=1,
            show_accuracy=True,
            verbose=1,
            validation_data=(X_test, Y_test),
        )  # verbose=1
        print "Elapsed Time: ", time.time() - start_time

        # Set checkpoint every 500 iterations
        if i % 500 == 0 and i != 0:
            fname = "authors_%d_forms_per_author_%d_epoch_%d.hdf5" % (num_authors, num_forms_per_author, i)
            model.save_weights("%s.hdf5" % fname, overwrite=True)

    fname = "authors_%d_forms_per_author_%d_final.hdf5" % (num_authors, num_forms_per_author)
    model.save_weights("%s.hdf5" % fname, overwrite=True)
Esempio n. 2
0
def run_model(hdf5_file,
              num_authors,
              num_forms_per_author,
              shingle_dim,
              num_iters=10,
              batch_size=32,
              use_form=False,
              lr=0.03,
              loadweights=None):
    # Create Keras model
    model = create_model(num_authors, shingle_dim, lr)
    if loadweights:
        model.load_weights(loadweights)

    # Create a mini_batcher for
    iam_m = IAM_MiniBatcher(hdf5_file,
                            num_authors,
                            num_forms_per_author,
                            shingle_dim=shingle_dim,
                            use_form=use_form,
                            default_mode=MiniBatcher.TRAIN,
                            batch_size=batch_size)

    # Get validation dataset
    [X_test, Y_test] = iam_m.get_test_batch(batch_size * 20)
    X_test = np.expand_dims(X_test, 1)
    Y_test = to_categorical(Y_test, num_authors)
    print 'test_size:', X_test.shape, Y_test.shape

    for i in range(num_iters):
        print 'Starting Epoch: ', i
        start_time = time.time()
        # Get training batch
        (X_train, Y_train) = iam_m.get_train_batch(batch_size * 100)
        X_train = np.expand_dims(X_train, 1)
        Y_train = to_categorical(Y_train, num_authors)

        # TODO: Maybe we should only validate every N iters since right now we are doing 20% extra work every iter
        model.fit(X_train,
                  Y_train,
                  batch_size=batch_size,
                  nb_epoch=1,
                  show_accuracy=True,
                  verbose=1,
                  validation_data=(X_test, Y_test))  # verbose=1
        print 'Elapsed Time: ', time.time() - start_time

        # Set checkpoint every 500 iterations
        if i % 500 == 0 and i != 0:
            fname = 'authors_%d_forms_per_author_%d_epoch_%d.hdf5' % (
                num_authors, num_forms_per_author, i)
            model.save_weights('%s.hdf5' % fname, overwrite=True)

    fname = 'authors_%d_forms_per_author_%d_final.hdf5' % (
        num_authors, num_forms_per_author)
    model.save_weights('%s.hdf5' % fname, overwrite=True)
Esempio n. 3
0
from keras.optimizers import SGD
from keras.utils.np_utils import to_categorical
from keras.layers.normalization import BatchNormalization as BN

import matplotlib.pylab as plt
import sys
sys.path.append('/work/code/repo/d-script/')
# d-script imports
from data_iters.minibatcher import MiniBatcher
from data_iters.iam_hdf5_iterator import IAM_MiniBatcher
from fielutil import *

hdf5_file = '/memory/author_lines.hdf5'
num_forms_per_author=50; batch_size=32; num_authors=47; shingle_dim=(120,120); use_form=True

iam_m = IAM_MiniBatcher(hdf5_file, num_authors, num_forms_per_author, shingle_dim=shingle_dim, use_form=use_form, default_mode=MiniBatcher.TRAIN, batch_size=batch_size)
[X_test, Y_test] = iam_m.get_test_batch(batch_size*20)
X_test = np.expand_dims(X_test, 1)
X_test = randangle(X_test)
Y_test = to_categorical(Y_test, num_authors)

im = smi.imread('/fileserver/iam/forms/h07-025a.png')
im = 1.0-im/256.0
maxx, maxy = im.shape
maxx = maxx/3
maxy = maxy/3
halfim = im[ :maxx, :maxy ]
halfim = np.expand_dims( np.expand_dims( halfim, 0 ), 0 )

if True:
  print "Loading original weights into GPU memory"