Beispiel #1
0
def get_linenormalizer():
    global network
    global lnorm
    # load the network used for classification
    try:
        network = ocrolib.load_object(args['model'], verbose=1)
        for x in network.walk():
            x.postLoad()
        for x in network.walk():
            if isinstance(x, lstm.LSTM):
                x.allocate(5000)
    except FileNotFound:
        print_error("")
        print_error("Cannot find OCR model file:" + args['model'])
        print_error("Download a model and put it into:" +
                    ocrolib.default.modeldir)
        print_error("(Or override the location with OCROPUS_DATA.)")
        print_error("")
        sys.exit(1)

    # get the line normalizer from the loaded network, or optionally
    # let the user override it (this is not very useful)
    lnorm = getattr(network, "lnorm", None)

    if args['height'] > 0:
        lnorm.setHeight(args['height'])
def initRnnModel(model_name="en-default.pyrnn"):
    """
  This function load the neural network model from slapos backend
  and initialise it.
  ----------------------------
    @args:
      - model_name: string, default: en-default.pyrnn
          Id of the object in the filesystem that contain the rnn model
    @return:
      - network: lstm object
          Represent the trained neural net
      - lnorm: method from lstm object
          Represent the size of the lstm object. Is used to scale the objects
          to recognize from original size to the average network object.
  ----------------------------
  LSTM meaning: https://en.wikipedia.org/wiki/Long_short-term_memory
  lnorm is extracted for clarity. This function initialize the neural net after
  loading.
  """
    network = ocrolib.load_object(model_name)
    for node in network.walk():
        if isinstance(node, ocrolib.lstm.LSTM):
            node.allocate(5000)
    lnorm = getattr(network, "lnorm", None)

    return network, lnorm
Beispiel #3
0
def load_lstm(fname):
    if args.clstm:
        network = lstm.SeqRecognizer(args.height,args.hiddensize,
            codec=codec,
            normalize=lstm.normalize_nfkc)
        import clstm
        mylstm = clstm.make_BIDILSTM()
        mylstm.init(network.No,args.hiddensize,network.Ni)
        mylstm.load(fname)
        network.lstm = clstm.CNetwork(mylstm)
        return network
    else:
        network = ocrolib.load_object(last_save)
        network.upgrade()
        for x in network.walk(): x.postLoad()
        return network
parser.add_argument("files",
                    nargs="+",
                    help="input files; glob and @ expansion performed")

args = parser.parse_args()

files = args.files

str1 = files[0]
fname_out = str1[0:str1.rfind('-')] + '-error'
f = open(fname_out + '.txt', 'w')
err = []
count = 0
m_name = []
for model in files:
    network = ocrolib.load_object(model)
    err.append(network.error)
    m_name.append(model)
    count += 1

data = {}
data['minimum'] = {
    'name': m_name[err.index(min(err))],
    'error': min(list(err))
}
data['last'] = {'name': m_name[-1], 'error': list(err)[-1]}

json.dump(data, f)

f.close()
Beispiel #5
0
def ocr(image_path, segmentation_path, output_path, model_path):
    """
    Scan a single image with ocropus.

    Reads a single image file from ```imagepath``` and writes the recognized
    text as a TEI document into output_path.

    Args:
        image_path (unicode): Path of the input file
        segmentation_path (unicode): Path of the segmentation XML file.
        output_path (unicode): Path of the output file
        model_path (unicode): Path of the recognition model. Must be a pyrnn.gz
                             pickle dump interoperable with ocropus-rpred.

    Returns:
        (unicode): A string of the output file that is actually written. As
                   Ocropus rewrites output file paths without notice it may be
                   different from the ```outputfilepath``` argument.

    Raises:
        NidabaOcropusException: Ocropus somehow failed. The error output is
                                contained in the message but as it is de facto
                                unusable as a library it's impossible to deduct
                                the nature of the problem.
    """

    try:
        logger.debug('Loading pyrnn from {}'.format(model_path))
        network = ocrolib.load_object(model_path, verbose=0)
        lnorm = getattr(network, "lnorm")
    except Exception as e:
        raise NidabaOcropusException('Something somewhere broke: ' + e.msg)
    im = Image.open(image_path)

    logger.debug('Loading TEI segmentation {}'.format(segmentation_path))
    tei = TEIFacsimile()
    with open(segmentation_path, 'r') as seg_fp:
        tei.read(seg_fp)

    logger.debug('Clearing out word/grapheme boxes')
    # ocropus is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('ocropus', 'character recognition')
    for box in tei.lines:
        logger.debug('Recognizing line {}'.format(box[4]))
        ib = tuple(int(x) for x in box[:-2])
        line = ocrolib.pil2array(im.crop(ib))
        temp = np.amax(line) - line
        temp = temp * 1.0 / np.amax(temp)
        lnorm.measure(temp)
        line = lnorm.normalize(line, cval=np.amax(line))
        if line.ndim == 3:
            np.mean(line, 2)
        line = ocrolib.lstm.prepare_line(line, 16)
        pred = network.predictString(line)
        pred = ocrolib.normalize_text(pred)
        logger.debug('Scoping line {}'.format(box[4]))
        tei.scope_line(box[4])
        logger.debug('Adding graphemes: {}'.format(pred))
        tei.add_graphemes(pred)
    with open(output_path, 'wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write(fp)
    return output_path
Beispiel #6
0
def ocr(image_path, segmentation_path, output_path, model_path):
    """
    Scan a single image with ocropus.

    Reads a single image file from ```imagepath``` and writes the recognized
    text as a TEI document into output_path.

    Args:
        image_path (unicode): Path of the input file
        segmentation_path (unicode): Path of the segmentation XML file.
        output_path (unicode): Path of the output file
        model_path (unicode): Path of the recognition model. Must be a pyrnn.gz
                             pickle dump interoperable with ocropus-rpred.

    Returns:
        (unicode): A string of the output file that is actually written. As
                   Ocropus rewrites output file paths without notice it may be
                   different from the ```outputfilepath``` argument.

    Raises:
        NidabaOcropusException: Ocropus somehow failed. The error output is
                                contained in the message but as it is de facto
                                unusable as a library it's impossible to deduct
                                the nature of the problem.
    """

    try:
        logger.debug('Loading pyrnn from {}'.format(model_path))
        network = ocrolib.load_object(model_path, verbose=0)
        lnorm = getattr(network, "lnorm")
    except Exception as e:
        raise NidabaOcropusException('Something somewhere broke: ' + e.msg)
    im = Image.open(image_path)

    logger.debug('Loading TEI segmentation {}'.format(segmentation_path))
    tei = TEIFacsimile()
    with open(segmentation_path, 'r') as seg_fp:
        tei.read(seg_fp)

    logger.debug('Clearing out word/grapheme boxes')
    # ocropus is a line recognizer
    tei.clear_graphemes()
    tei.clear_segments()
    # add and scope new responsibility statement
    tei.add_respstmt('ocropus', 'character recognition')
    for box in tei.lines:
        logger.debug('Recognizing line {}'.format(box[4]))
        ib = tuple(int(x) for x in box[:-2])
        line = ocrolib.pil2array(im.crop(ib))
        temp = np.amax(line) - line
        temp = temp * 1.0 / np.amax(temp)
        lnorm.measure(temp)
        line = lnorm.normalize(line, cval=np.amax(line))
        if line.ndim == 3:
            np.mean(line, 2)
        line = ocrolib.lstm.prepare_line(line, 16)
        pred = network.predictString(line)
        pred = ocrolib.normalize_text(pred)
        logger.debug('Scoping line {}'.format(box[4]))
        tei.scope_line(box[4])
        logger.debug('Adding graphemes: {}'.format(pred))
        tei.add_graphemes(pred)
    with open(output_path, 'wb') as fp:
        logger.debug('Writing TEI to {}'.format(fp.abs_path))
        tei.write(fp)
    return output_path
print_info("")
print_info("#" * 10, (" ".join(sys.argv))[:60])
print_info("")

inputs = ocrolib.glob_all(args.files)
if not args.quiet: print_info("#inputs" + str(len(inputs)))

# disable parallelism when anything is being displayed

if args.show >= 0 or args.save is not None:
	args.parallel = 1

# load the network used for classification

try:
	network = ocrolib.load_object(args.model, verbose=1)
	for x in network.walk(): x.postLoad()
	for x in network.walk():
		if isinstance(x, lstm.LSTM):
			x.allocate(5000)
except ocrolib.FileNotFound:
	print_error("")
	print_error("Cannot find OCR model file:" + args.model)
	print_error("Download a model and put it into:" + ocrolib.default.modeldir)
	print_error("(Or override the location with OCROPUS_DATA.)")
	print_error("")
	sys.exit(1)

# get the line normalizer from the loaded network, or optionally
# let the user override it (this is not very useful)
Beispiel #8
0
import h5py
import numpy
import json
import argparse
import ocrolib

parser = argparse.ArgumentParser("convert pyrnn to clstm")
parser.add_argument("file")

parser.add_argument("-o","--out",default="en-default.hdf5", 
	help="Filename to export the parameters")

args = parser.parse_args()


recognizer = ocrolib.load_object(args.file)
# TODO: load gzipped things
# recognizer = pickle.load(open('en-default.pyrnn'))

parallel, softmax = recognizer.lstm.nets
fwdnet, revnet = parallel.nets

nf = h5py.File(args.out, "w")

for w in "WGI WGF WGO WCI".split():
    print getattr(fwdnet, w).shape
    dset = nf.create_dataset(".bidilstm.0.parallel.0.lstm." + w, getattr(fwdnet, w).shape, dtype='f')
    dset[...] = getattr(fwdnet, w)
    dset = nf.create_dataset(".bidilstm.0.parallel.1.reversed.0.lstm." + w, getattr(revnet.net, w).shape, dtype='f')
    dset[...] = getattr(revnet.net, w)