예제 #1
0
def train_ocr_model_sim(params):
    dsize = params['dsize']
    model_params = params['model_params']
    in_folder = params['input_folder']
    alphabet = params['alphabet']
    #
    in_channels = model_params['in_channels']
    out_channels = model_params['out_channels']
    metadata = get_plates_text_metadata(params)
    metadata.image = 'plates_' + metadata.image
    metadata.image = metadata.image.str.split('.').str[0] + '.png'
    test_meta = metadata.query("set == 'test'")
    test_meta = set_index(test_meta)
    model, preprocess_input = get_model_definition(**model_params)
    f_train_params = {
        'dsize': dsize,
        'in_channels': in_channels,
        'out_channels': out_channels,
        'alphabet': alphabet
    }
    f_test_params = {
        'folder': in_folder,
        'metadata': test_meta,
        'dsize': dsize,
        'in_channels': in_channels,
        'out_channels': out_channels,
        'alphabet': alphabet
    }
    data_train = ImageTextLabelGenerator(get_image_text_label_sim,
                                         preprocess_input, f_train_params)
    data_val = ImageTextLabelGenerator(get_image_text_label, preprocess_input,
                                       f_test_params)
    train_model_gen(data_train, data_val, model, params, logger)
예제 #2
0
def ocr_plates(params, logger):
    model_file = params['plate_ocr_model_file']
    input_folder = params['input_folder']
    dsize = params['plate_dsize']
    in_channels = params['plate_ocr_model_params']['in_channels']
    out_channels = params['plate_ocr_model_params']['out_channels']
    model_params = params['plate_ocr_model_params']
    alphabet = params['alphabet']
    logger.info("Loading model")
    plate_ocr_model, plate_ocr_preprocessing = plate_ocr_model_def(
        **model_params)
    plate_ocr_model.load_weights(model_file)
    logger.info("Loading data")
    meta = get_plates_text_metadata(params)
    meta.file_name = 'plate_' + meta.file_name
    meta.file_name = meta.file_name.str.split('.').str[0] + '.png'
    meta = set_index(meta)
    x, _ = get_image_text_label(input_folder, meta, dsize, in_channels,
                                out_channels, alphabet)
    images = map(lambda idx: pred2im(x, dsize, idx, in_channels),
                 range(len(x)))
    context = {
        'plate_ocr_model': plate_ocr_model,
        'plate_ocr_preprocessing': plate_ocr_preprocessing,
        'logger': logger,
    }
    context.update(params)
    events = [{
        'image': im,
        'file': filename,
        'ejec_id': ejec_id
    } for ejec_id, filename, im in zip(range(len(meta)), meta.file_name,
                                       images)]
    results = map(lambda e: image_ocr(event=e, context=context), events)
    results = map(lambda e: {k: e[k] for k in ('filename', 'text')}, results)
    results = pd.DataFrame(results)
    results.to_csv(f"{params['output_folder']}/ocr_events_results.csv")
예제 #3
0
def train_ocr_model(params):
    embedding_dim = 10
    units = 20
    vocab_size = len(params['alphabet'])
    num_steps = 10
    alphabet = '*- abcdefghijklmnopqrstuvwxyz0123456789'  # {*: start, -: end}
    word_index = {char: idx for idx, char in enumerate(alphabet)}
    index_word = {idx: char for idx, char in enumerate(alphabet)}

    img_height = 16 * 4
    img_width = 16 * 16
    encoder = CNN_Encoder(embedding_dim, img_height, img_width)
    decoder = RNN_Decoder(embedding_dim, units, vocab_size)
    optimizer = tf.keras.optimizers.Adam()
    loss_plot = []

    @tf.function
    def train_step(img_tensor, target):
        loss = 0
        """
        Reset of the hidden state for each batch
        """
        batch_size = target.shape[0]
        sentence_len = target.shape[2]
        hidden = decoder.reset_state(batch_size=batch_size)
        dec_input = tf.expand_dims([word_index['*']] * batch_size, 1)
        with tf.GradientTape() as tape:
            features = encoder(img_tensor)
            for idx in range(1, sentence_len):
                # Passing the features through the decoder
                predictions, hidden, _ = decoder(dec_input, features, hidden)
                # print(1, predictions.numpy())
                target_char = tf.reshape(target[0, 0, idx, :],
                                         (1, target.shape[-1]))
                target_char = tf.argmax(target_char, axis=1)
                # print(2, target_char.eval())
                partial_loss = loss_function(target_char, predictions)
                loss += partial_loss
                # Using teacher forcing
                dec_input = tf.expand_dims(target_char, 1)
        total_loss = (loss / sentence_len)
        trainable_variables = encoder.trainable_variables + decoder.trainable_variables
        gradients = tape.gradient(loss, trainable_variables)
        optimizer.apply_gradients(zip(gradients, trainable_variables))
        return loss, total_loss

    epochs = 20

    dsize = params['dsize']
    model_params = params['model_params']
    in_folder = params['input_folder']
    alphabet = params['alphabet']
    #
    in_channels = model_params['in_channels']
    out_channels = model_params['out_channels']
    metadata = get_plates_text_metadata(params)
    metadata.file_name = 'plate_' + metadata.file_name
    metadata.file_name = metadata.file_name.str.split('.').str[0] + '.png'
    train_meta = metadata.query("set == 'train'")
    train_meta = set_index(train_meta)
    model, preprocess_input = get_model_definition(**model_params)
    f_train_params = {
        'folder': in_folder,
        'metadata': train_meta,
        'dsize': dsize,
        'in_channels': in_channels,
        'out_channels': out_channels,
        'alphabet': alphabet
    }
    data_train = ImageTextLabelGenerator(get_image_text_label,
                                         preprocess_input, f_train_params)
    for epoch in range(0, epochs):
        total_loss = 0
        for batch, (img_tensor, target) in enumerate(data_train):
            batch_loss, t_loss = train_step(img_tensor, target)
            total_loss += t_loss
            # logger.info(f"target.mean(): {target.mean()}")
            loss_debug = batch_loss.numpy() / int(target.shape[1])
            logger.info(f'Epoch {epoch + 1} Batch {batch} Loss {loss_debug}')
        # Storing the epoch end loss value to plot later
        loss_plot.append(total_loss / num_steps)
        logger.info(f'Epoch {epoch + 1} Loss {total_loss / num_steps}')
예제 #4
0
def test_get_plates_text_metadata():
    params = get_params()
    plates_text_metadata = get_plates_text_metadata(params)
    a = 0