def main():
    """ Process NMEC with precompiled denoiser and Fielnet
    """
    def denoising_func(x):
        orig_shape = x.shape

        x = 1. - x/255.
        x = denoiser.predict(x, verbose=0)
        x = x.reshape(orig_shape)
        return x
        
    try:
        featext  = load_verbatimnet('fc7', paramsfile='/fileserver/iam/iam-processed/models/fiel_657.hdf5')
        featext.compile(loss='mse', optimizer='sgd')
        featext_func = lambda x: featext.predict(x, verbose=0.0)
        print "Making the denoiser"
        denoiser = conv4p_model()
        denoiser.load_weights('/fileserver/iam/models/conv4p_linet56-iambin-tifs.hdf5')
        hdf5file='/fileserver/nmec-handwriting/flat_nmec_bin_uint8.hdf5'
        
        with h5py.File(hdf5file, "r") as data_file:
            features = extract_features_for_corpus(data_file,featext_func,shingle_dim=(56,56),transform=denoising_func)
        with h5py.File("output_features.hdf5", "w") as feature_file:
            for document_id, document_features in features.iteritems():
                feature_file.create_dataset(document_id, data=document_features)
    except Exception as e:
        print e
        pdb.post_mortem()
Exemple #2
0
            xe = ii[0].max()
            ye = ii[1].max()
            ccs += [shingler(ccis[0][xb:xe, yb:ye], shingle_dim=(56, 56))]
    print "Finished finding " + str(len(ccs)) + " features from image."
    return np.expand_dims(np.array(ccs), 1)


# Neural network
from globalclassify.fielutil import load_verbatimnet
featext = load_verbatimnet(
    'fc7', paramsfile='/fileserver/iam/iam-processed/models/fiel_657.hdf5')
featext.compile(loss='mse', optimizer='sgd')

# Denoising neural network
from denoiser.noisenet import conv4p_model
denoiser = conv4p_model()
denoiser.load_weights('/work/models/conv4p_linet56-iambin-tifs.hdf5')

hdf5file = '/fileserver/nmec-handwriting/flat_nmec_cropped_bin_uint8.hdf5'
flatnmec = h5py.File(hdf5file, 'r')

outputdir = '/fileserver/nmec-handwriting/localfeatures/second-pass/'

# Extract connected components, and then shingles with minimum threshold 500
for imname in flatnmec.keys():
    ccis = connectedcomponents(flatnmec[imname])
    shards = shinglesfromcc(ccis, minthresh=500)
    if len(shards) == 0:
        print "WARNING " + str(imname) + " has no features!"
        continue
    denoised = denoiser.predict(shards, verbose=1)
Exemple #3
0
            yb = ii[1].min()
            xe = ii[0].max()
            ye = ii[1].max()
            ccs += [ shingler( ccis[0][xb:xe, yb:ye], shingle_dim=(56,56) ) ]
    print "Finished finding "+str(len(ccs))+" features from image."
    return np.expand_dims( np.array( ccs ), 1 )


# Neural network
from globalclassify.fielutil import load_verbatimnet
featext  = load_verbatimnet('fc7', paramsfile='/fileserver/iam/iam-processed/models/fiel_657.hdf5')
featext.compile(loss='mse', optimizer='sgd')

# Denoising neural network
from denoiser.noisenet import conv4p_model
denoiser = conv4p_model()
denoiser.load_weights('/work/models/conv4p_linet56-iambin-tifs.hdf5')

hdf5file='/fileserver/nmec-handwriting/flat_nmec_cropped_bin_uint8.hdf5'
flatnmec=h5py.File(hdf5file,'r')

outputdir = '/fileserver/nmec-handwriting/localfeatures/second-pass/'

# Extract connected components, and then shingles with minimum threshold 500
for imname in flatnmec.keys():
    ccis = connectedcomponents( flatnmec[imname] )
    shards = shinglesfromcc( ccis, minthresh=500 )
    if len(shards)==0:
        print "WARNING "+str(imname)+" has no features!"
        continue
    denoised = denoiser.predict( shards, verbose=1 )