def predictString(self, line): """ Predicts a string from an input image. Args: line (numpy.array): Input image Returns: A unicode string containing the recognition result. """ line = line.reshape(-1, self.rnn.ninput(), 1) self.rnn.inputs.aset(line.astype('float32')) self.rnn.forward() self.outputs = self.rnn.outputs.array().reshape( line.shape[0], self.rnn.noutput()) codes = [ x[0] for x in kraken.lib.lstm.translate_back_locations(self.outputs) ] cls = clstm.Classes() cls.resize(len(codes)) for i, v in enumerate(codes): cls[i] = int(v) res = self.rnn.decode(cls) return res
def trainString(self, line, s, update=1): """ Trains the network using an input numpy array and a unicode string. Strings are assumed to be in ``display`` order as produced as the result of the BiDi algorithm. Args: line (numpy.array): Input image s (str): Expected output string update (bool): Switch to disable weight updates Returns: An unicode string containing the recognized sequence. """ labels = clstm.Classes() self.rnn.encode(labels, s) cls = self.trainSequence(line, labels) return self.rnn.decode(cls)
def trainSequence(self, line, labels, update=1): """ Trains the network using an input numpy array and a series of labels. Args: line (numpy.array): Input image labels (clstm.Classes): Label sequence update (bool): Switch to disable weight updates Returns: clstm.Classes containing the recognized label sequence. """ line = line.reshape(-1, self.rnn.ninput(), 1) self.rnn.inputs.aset(line.astype('float32')) self.rnn.forward() self.outputs = self.rnn.outputs.array().reshape( line.shape[0], self.rnn.noutput()) # build CTC alignment targets = clstm.Sequence() aligned = clstm.Sequence() clstm.mktargets(targets, labels, self.rnn.noutput()) clstm.seq_ctc_align(aligned, self.rnn.outputs, targets) # calculate deltas, backpropagate and update weights deltas = aligned.array() - self.rnn.outputs.array() self.rnn.d_outputs.aset(deltas) self.rnn.backward() if update: self.rnn.update() codes = kraken.lib.lstm.translate_back(self.outputs) cls = clstm.Classes() cls.resize(len(codes)) for i, v in enumerate(codes): cls[i] = v return cls