class HTRModel: def __init__(self, architecture, input_size, vocab_size, greedy=False, beam_width=10, top_paths=1): """ Initialization of a HTR Model. :param architecture: option of the architecture model to build and compile greedy, beam_width, top_paths: Parameters of the CTC decoding (see ctc decoding tensorflow for more details) """ self.architecture = globals()[architecture] self.input_size = input_size self.vocab_size = vocab_size self.model = None self.greedy = greedy self.beam_width = beam_width self.top_paths = max(1, top_paths) def summary(self, output=None, target=None): """Show/Save model structure (summary)""" self.model.summary() if target is not None: os.makedirs(output, exist_ok=True) with open(os.path.join(output, target), "w") as f: with redirect_stdout(f): self.model.summary() def load_checkpoint(self, target): """ Load a model with checkpoint file""" if os.path.isfile(target): if self.model is None: self.compile() self.model.load_weights(target) def get_callbacks(self, logdir, checkpoint, monitor="val_loss", verbose=0): """Setup the list of callbacks for the model""" callbacks = [ CSVLogger( filename=os.path.join(logdir, "epochs.log"), separator=";", append=True), TensorBoard( log_dir=logdir, histogram_freq=10, profile_batch=0, write_graph=True, write_images=False, update_freq="epoch"), ModelCheckpoint( filepath=checkpoint, monitor=monitor, save_best_only=True, save_weights_only=True, verbose=verbose), EarlyStopping( monitor=monitor, min_delta=1e-8, patience=20, restore_best_weights=True, verbose=verbose), ReduceLROnPlateau( monitor=monitor, min_delta=1e-8, factor=0.2, patience=15, verbose=verbose) ] return callbacks def compile(self, learning_rate=None): """ Configures the HTR Model for training/predict. :param optimizer: optimizer for training """ # define inputs, outputs and optimizer of the chosen architecture outs = self.architecture(self.input_size, self.vocab_size + 1, learning_rate) inputs, outputs, optimizer = outs # create and compile self.model = Model(inputs=inputs, outputs=outputs) self.model.compile(optimizer=optimizer, loss=self.ctc_loss_lambda_func) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False, **kwargs): """ Model training on data yielded (fit function has support to generator). A fit() abstration function of TensorFlow 2. Provide x parameter of the form: yielding (x, y, sample_weight). :param: See tensorflow.keras.Model.fit() :return: A history object """ out = self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, class_weight=class_weight, sample_weight=sample_weight, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, **kwargs) return out def predict(self, x, batch_size=None, verbose=0, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, ctc_decode=True): """ Model predicting on data yielded (predict function has support to generator). A predict() abstration function of TensorFlow 2. Provide x parameter of the form: yielding [x]. :param: See tensorflow.keras.Model.predict() :return: raw data on `ctc_decode=False` or CTC decode on `ctc_decode=True` (both with probabilities) """ self.model._make_predict_function() if verbose == 1: print("Model Predict") out = self.model.predict(x=x, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing) if not ctc_decode: return np.log(out) steps_done = 0 if verbose == 1: print("CTC Decode") progbar = Progbar(target=steps) batch_size = int(np.ceil(len(out) / steps)) input_length = len(max(out, key=len)) predicts, probabilities = [], [] while steps_done < steps: index = steps_done * batch_size until = index + batch_size x_test = np.asarray(out[index:until]) x_test_len = np.asarray([input_length for _ in range(len(x_test))]) decode, log = K.ctc_decode(x_test, x_test_len, greedy=self.greedy, beam_width=self.beam_width, top_paths=self.top_paths) probabilities.extend([np.exp(x) for x in log]) decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode] predicts.extend(np.swapaxes(decode, 0, 1)) steps_done += 1 if verbose == 1: progbar.update(steps_done) return (predicts, probabilities) @staticmethod def ctc_loss_lambda_func(y_true, y_pred): """Function for computing the CTC loss""" if len(y_true.shape) > 2: y_true = tf.squeeze(y_true) input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False) input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True) label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64") loss = K.ctc_batch_cost(y_true, y_pred, input_length, label_length) loss = tf.reduce_mean(loss) return loss
class HTRModel: def __init__(self, architecture, input_size, vocab_size, greedy=False, beam_width=100, top_paths=1): """ Initialization of a HTR Model. :param architecture: option of the architecture model to build and compile greedy, beam_width, top_paths: Parameters of the CTC decoding (see ctc decoding tensorflow for more details) """ self.architecture = globals()[architecture] self.input_size = input_size self.vocab_size = vocab_size self.greedy = greedy self.beam_width = beam_width self.top_paths = max(1, top_paths) self.model = None self.model_infer = None def summary(self, output=None, target=None): """Show/Save model structure (summary)""" self.model.summary() if target is not None: os.makedirs(output, exist_ok=True) with open(os.path.join(output, target), "w") as f: with redirect_stdout(f): self.model.summary() def load_checkpoint(self, target): """ Load a model with checkpoint file""" if os.path.isfile(target): if self.model is None: self.compile() self.model.load_weights(target) self.model_infer.load_weights(target) def get_callbacks(self, logdir, checkpoint, monitor="val_loss", verbose=0): """Setup the list of callbacks for the model""" callbacks = [ CSVLogger(filename=os.path.join(logdir, "epochs.log"), separator=";", append=True), TensorBoard(log_dir=logdir, histogram_freq=10, profile_batch=0, write_graph=True, write_images=False, update_freq="epoch"), ModelCheckpoint(filepath=checkpoint, monitor=monitor, save_best_only=True, save_weights_only=True, verbose=verbose), EarlyStopping(monitor=monitor, min_delta=1e-8, patience=20, restore_best_weights=True, verbose=verbose), ReduceLROnPlateau(monitor=monitor, min_delta=1e-8, factor=0.2, patience=12, verbose=verbose) ] return callbacks def compile(self, learning_rate=None): """ Configures the HTR Model for training/predict. There are 2 Tensorflow Keras models: - one for training - one for predicting (with/without CTC decode) Lambda layers are used to compute: - the CTC loss function - the CTC decoding :param optimizer: The optimizer used during training """ # define inputs, outputs and optimizer of the chosen architecture outs = self.architecture(self.input_size, self.vocab_size + 1, learning_rate) inputs, outputs, optimizer = outs # others inputs for the CTC approach labels = Input(name="labels", shape=[None]) input_length = Input(name="input_length", shape=[1]) label_length = Input(name="label_length", shape=[1]) # lambda layer for computing the loss function loss_out = Lambda(self.ctc_loss_lambda_func, output_shape=(1, ), name="CTCloss")( [outputs, labels, input_length, label_length]) # lambda layer for the raw data function out_raw_dense = Lambda(lambda y_pred: y_pred[0], output_shape=(None, None), name="NoCTCdecode", dtype="float32")([outputs, input_length]) # create Tensorflow Keras models self.model = Model(inputs=[inputs, labels, input_length, label_length], outputs=loss_out) self.model_infer = Model(inputs=[inputs, input_length], outputs=out_raw_dense) # compile models self.model.compile(loss={ "CTCloss": lambda yt, yp: yp }, optimizer=optimizer) self.model_infer.compile(loss={ "NoCTCdecode": lambda yt, yp: yp }, optimizer=optimizer) def fit(self, x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False, **kwargs): """ Model training on data yielded (fit function has support to generator). A fit() abstration function of TensorFlow 2 using the model_train. Provide x parameter of the form: (x, y, sample_weight), where: x: inputs = { "input": x_valid, "labels": y_valid, "input_length": x_valid_len, "label_length": y_valid_len } y: output = { "CTCloss": np.zeros(self.batch_size, dtype=int) } sample_weight: [] yielding: (inputs, output, []) :param: See tensorflow.keras.Model.fit() :return: A history object """ out = self.model.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, validation_split=validation_split, validation_data=validation_data, shuffle=shuffle, class_weight=class_weight, sample_weight=sample_weight, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps, validation_freq=validation_freq, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing, **kwargs) return out def predict(self, x, batch_size=None, verbose=0, steps=1, callbacks=None, max_queue_size=10, workers=1, use_multiprocessing=False, ctc_decode=True): """ Model predicting on data yielded (predict function has support to generator). A predict() abstration function of TensorFlow 2 using the model_infer. Provide x parameter of the form: [x_test, x_test_len] :param: See tensorflow.keras.Model.predict() :return: raw data on `ctc_decode=False` or CTC decode on `ctc_decode=True` (both with probabilities) """ self.model_infer._make_predict_function() if verbose == 1: print("Model Predict") out = self.model_infer.predict(x=x, batch_size=batch_size, verbose=verbose, steps=steps, callbacks=callbacks, max_queue_size=max_queue_size, workers=workers, use_multiprocessing=use_multiprocessing) if not ctc_decode: return out steps_done = 0 if verbose == 1: print("CTC Decode") progbar = Progbar(target=steps) batch_size = len(out) // steps max_text_length = len(max(out, key=len)) x_test_len = np.asarray([max_text_length for _ in range(batch_size)]) predicts, probabilities = [], [] while steps_done < steps: current_index = steps_done * batch_size until_index = current_index + batch_size x_test = np.asarray(out[current_index:until_index]) decode, log = K.ctc_decode(x_test, x_test_len, greedy=self.greedy, beam_width=self.beam_width, top_paths=self.top_paths) probabilities.extend([np.exp(x) for x in log]) decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode] predicts.extend(np.swapaxes(decode, 0, 1)) steps_done += 1 if verbose == 1: progbar.update(steps_done) return (predicts, probabilities) @staticmethod def ctc_loss_lambda_func(args): """ Function for computing the ctc loss (can be put in a Lambda layer) :param args: y_pred, labels, input_length, label_length :return: CTC loss """ y_pred, labels, input_length, label_length = args return K.ctc_batch_cost(labels, y_pred, input_length, label_length)