def train_ocr_model_sim(params): dsize = params['dsize'] model_params = params['model_params'] in_folder = params['input_folder'] alphabet = params['alphabet'] # in_channels = model_params['in_channels'] out_channels = model_params['out_channels'] metadata = get_plates_text_metadata(params) metadata.image = 'plates_' + metadata.image metadata.image = metadata.image.str.split('.').str[0] + '.png' test_meta = metadata.query("set == 'test'") test_meta = set_index(test_meta) model, preprocess_input = get_model_definition(**model_params) f_train_params = { 'dsize': dsize, 'in_channels': in_channels, 'out_channels': out_channels, 'alphabet': alphabet } f_test_params = { 'folder': in_folder, 'metadata': test_meta, 'dsize': dsize, 'in_channels': in_channels, 'out_channels': out_channels, 'alphabet': alphabet } data_train = ImageTextLabelGenerator(get_image_text_label_sim, preprocess_input, f_train_params) data_val = ImageTextLabelGenerator(get_image_text_label, preprocess_input, f_test_params) train_model_gen(data_train, data_val, model, params, logger)
def ocr_plates(params, logger): model_file = params['plate_ocr_model_file'] input_folder = params['input_folder'] dsize = params['plate_dsize'] in_channels = params['plate_ocr_model_params']['in_channels'] out_channels = params['plate_ocr_model_params']['out_channels'] model_params = params['plate_ocr_model_params'] alphabet = params['alphabet'] logger.info("Loading model") plate_ocr_model, plate_ocr_preprocessing = plate_ocr_model_def( **model_params) plate_ocr_model.load_weights(model_file) logger.info("Loading data") meta = get_plates_text_metadata(params) meta.file_name = 'plate_' + meta.file_name meta.file_name = meta.file_name.str.split('.').str[0] + '.png' meta = set_index(meta) x, _ = get_image_text_label(input_folder, meta, dsize, in_channels, out_channels, alphabet) images = map(lambda idx: pred2im(x, dsize, idx, in_channels), range(len(x))) context = { 'plate_ocr_model': plate_ocr_model, 'plate_ocr_preprocessing': plate_ocr_preprocessing, 'logger': logger, } context.update(params) events = [{ 'image': im, 'file': filename, 'ejec_id': ejec_id } for ejec_id, filename, im in zip(range(len(meta)), meta.file_name, images)] results = map(lambda e: image_ocr(event=e, context=context), events) results = map(lambda e: {k: e[k] for k in ('filename', 'text')}, results) results = pd.DataFrame(results) results.to_csv(f"{params['output_folder']}/ocr_events_results.csv")
def train_ocr_model(params): embedding_dim = 10 units = 20 vocab_size = len(params['alphabet']) num_steps = 10 alphabet = '*- abcdefghijklmnopqrstuvwxyz0123456789' # {*: start, -: end} word_index = {char: idx for idx, char in enumerate(alphabet)} index_word = {idx: char for idx, char in enumerate(alphabet)} img_height = 16 * 4 img_width = 16 * 16 encoder = CNN_Encoder(embedding_dim, img_height, img_width) decoder = RNN_Decoder(embedding_dim, units, vocab_size) optimizer = tf.keras.optimizers.Adam() loss_plot = [] @tf.function def train_step(img_tensor, target): loss = 0 """ Reset of the hidden state for each batch """ batch_size = target.shape[0] sentence_len = target.shape[2] hidden = decoder.reset_state(batch_size=batch_size) dec_input = tf.expand_dims([word_index['*']] * batch_size, 1) with tf.GradientTape() as tape: features = encoder(img_tensor) for idx in range(1, sentence_len): # Passing the features through the decoder predictions, hidden, _ = decoder(dec_input, features, hidden) # print(1, predictions.numpy()) target_char = tf.reshape(target[0, 0, idx, :], (1, target.shape[-1])) target_char = tf.argmax(target_char, axis=1) # print(2, target_char.eval()) partial_loss = loss_function(target_char, predictions) loss += partial_loss # Using teacher forcing dec_input = tf.expand_dims(target_char, 1) total_loss = (loss / sentence_len) trainable_variables = encoder.trainable_variables + decoder.trainable_variables gradients = tape.gradient(loss, trainable_variables) optimizer.apply_gradients(zip(gradients, trainable_variables)) return loss, total_loss epochs = 20 dsize = params['dsize'] model_params = params['model_params'] in_folder = params['input_folder'] alphabet = params['alphabet'] # in_channels = model_params['in_channels'] out_channels = model_params['out_channels'] metadata = get_plates_text_metadata(params) metadata.file_name = 'plate_' + metadata.file_name metadata.file_name = metadata.file_name.str.split('.').str[0] + '.png' train_meta = metadata.query("set == 'train'") train_meta = set_index(train_meta) model, preprocess_input = get_model_definition(**model_params) f_train_params = { 'folder': in_folder, 'metadata': train_meta, 'dsize': dsize, 'in_channels': in_channels, 'out_channels': out_channels, 'alphabet': alphabet } data_train = ImageTextLabelGenerator(get_image_text_label, preprocess_input, f_train_params) for epoch in range(0, epochs): total_loss = 0 for batch, (img_tensor, target) in enumerate(data_train): batch_loss, t_loss = train_step(img_tensor, target) total_loss += t_loss # logger.info(f"target.mean(): {target.mean()}") loss_debug = batch_loss.numpy() / int(target.shape[1]) logger.info(f'Epoch {epoch + 1} Batch {batch} Loss {loss_debug}') # Storing the epoch end loss value to plot later loss_plot.append(total_loss / num_steps) logger.info(f'Epoch {epoch + 1} Loss {total_loss / num_steps}')
def test_get_plates_text_metadata(): params = get_params() plates_text_metadata = get_plates_text_metadata(params) a = 0