def main(): """ Process NMEC with precompiled denoiser and Fielnet """ def denoising_func(x): orig_shape = x.shape x = 1. - x/255. x = denoiser.predict(x, verbose=0) x = x.reshape(orig_shape) return x try: featext = load_verbatimnet('fc7', paramsfile='/fileserver/iam/iam-processed/models/fiel_657.hdf5') featext.compile(loss='mse', optimizer='sgd') featext_func = lambda x: featext.predict(x, verbose=0.0) print "Making the denoiser" denoiser = conv4p_model() denoiser.load_weights('/fileserver/iam/models/conv4p_linet56-iambin-tifs.hdf5') hdf5file='/fileserver/nmec-handwriting/flat_nmec_bin_uint8.hdf5' with h5py.File(hdf5file, "r") as data_file: features = extract_features_for_corpus(data_file,featext_func,shingle_dim=(56,56),transform=denoising_func) with h5py.File("output_features.hdf5", "w") as feature_file: for document_id, document_features in features.iteritems(): feature_file.create_dataset(document_id, data=document_features) except Exception as e: print e pdb.post_mortem()
xe = ii[0].max() ye = ii[1].max() ccs += [shingler(ccis[0][xb:xe, yb:ye], shingle_dim=(56, 56))] print "Finished finding " + str(len(ccs)) + " features from image." return np.expand_dims(np.array(ccs), 1) # Neural network from globalclassify.fielutil import load_verbatimnet featext = load_verbatimnet( 'fc7', paramsfile='/fileserver/iam/iam-processed/models/fiel_657.hdf5') featext.compile(loss='mse', optimizer='sgd') # Denoising neural network from denoiser.noisenet import conv4p_model denoiser = conv4p_model() denoiser.load_weights('/work/models/conv4p_linet56-iambin-tifs.hdf5') hdf5file = '/fileserver/nmec-handwriting/flat_nmec_cropped_bin_uint8.hdf5' flatnmec = h5py.File(hdf5file, 'r') outputdir = '/fileserver/nmec-handwriting/localfeatures/second-pass/' # Extract connected components, and then shingles with minimum threshold 500 for imname in flatnmec.keys(): ccis = connectedcomponents(flatnmec[imname]) shards = shinglesfromcc(ccis, minthresh=500) if len(shards) == 0: print "WARNING " + str(imname) + " has no features!" continue denoised = denoiser.predict(shards, verbose=1)
yb = ii[1].min() xe = ii[0].max() ye = ii[1].max() ccs += [ shingler( ccis[0][xb:xe, yb:ye], shingle_dim=(56,56) ) ] print "Finished finding "+str(len(ccs))+" features from image." return np.expand_dims( np.array( ccs ), 1 ) # Neural network from globalclassify.fielutil import load_verbatimnet featext = load_verbatimnet('fc7', paramsfile='/fileserver/iam/iam-processed/models/fiel_657.hdf5') featext.compile(loss='mse', optimizer='sgd') # Denoising neural network from denoiser.noisenet import conv4p_model denoiser = conv4p_model() denoiser.load_weights('/work/models/conv4p_linet56-iambin-tifs.hdf5') hdf5file='/fileserver/nmec-handwriting/flat_nmec_cropped_bin_uint8.hdf5' flatnmec=h5py.File(hdf5file,'r') outputdir = '/fileserver/nmec-handwriting/localfeatures/second-pass/' # Extract connected components, and then shingles with minimum threshold 500 for imname in flatnmec.keys(): ccis = connectedcomponents( flatnmec[imname] ) shards = shinglesfromcc( ccis, minthresh=500 ) if len(shards)==0: print "WARNING "+str(imname)+" has no features!" continue denoised = denoiser.predict( shards, verbose=1 )