Esempio n. 1
0
def save_model(weights_path: str, output_path: str):
    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[BATCH_SIZE, 32, 100, 3],
                               name='input')

    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=inputdata)
    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out,
                                               sequence_length=25 *
                                               np.ones(BATCH_SIZE),
                                               merge_repeated=False)
    sparse_tensor_values = tf.to_int32(decodes[0]).values
    sparse_tensor_indices = tf.to_int32(decodes[0]).indices
    flattened_indices = tf.to_int32(tf.reshape(sparse_tensor_indices, [-1]))
    output = tf.concat([flattened_indices, sparse_tensor_values],
                       0,
                       name='output')

    saver = tf.train.Saver()
    sess = tf.Session()

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)
        save_graph(sess, output_path)
Esempio n. 2
0
def recognize(image_path: str, weights_path: str, files_limit=4):
    decoder = TextFeatureIO().reader
    images, filenames = load_images(image_path, files_limit)
    images = np.squeeze(images)
    padded_images = np.zeros([32, 32, 100, 3])
    padded_images[:images.shape[0], :, :, :] = images
    tf.reset_default_graph()

    inputdata = tf.placeholder(dtype=tf.float32, shape=[32, 32, 100, 3], name='input')

    images_sh = tf.cast(x=inputdata, dtype=tf.float32)

    # build shadownet
    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)
    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=images_sh)
    decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False)

    # config tf saver
    saver = tf.train.Saver()
    sess = tf.Session()
    with sess.as_default():

        # restore the model weights
        saver.restore(sess=sess, save_path=weights_path)
        print("Predict...")
        start_time = time()
        predictions = sess.run(decoded, feed_dict={inputdata: padded_images})
        end_time = time()
        print("Prediction time: {}".format(end_time - start_time))
        preds_res = decoder.sparse_tensor_to_str(predictions[0])

        for i, fname in enumerate(filenames):
            print("{}: {}".format(fname, preds_res[i]))
Esempio n. 3
0
    def run(self):
        self._recognition_time = []
        images_sh, labels_sh, imagenames_sh = self.load_data()
        images_sh = tf.cast(x=images_sh, dtype=tf.float32)

        net = CRNN(phase='Test',
                   hidden_nums=256,
                   seq_length=25,
                   num_classes=37)
        with tf.variable_scope('shadow'):
            net_out = net.build(inputdata=images_sh)
        decoded, _ = tf.nn.ctc_beam_search_decoder(
            net_out,
            25 * np.ones(self._batch_size),
            merge_repeated=self._merge_repeated)
        sess_config = self.config_tf_session()

        # config tf saver
        saver = tf.train.Saver()
        sess = tf.Session(config=sess_config)

        with sess.as_default():
            # restore the model weights
            saver.restore(sess=sess, save_path=self._weights_path)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            self._log.info('Start predicting ...')
            accuracy, distance = self.test(decoded, imagenames_sh, images_sh,
                                           labels_sh, sess)
            coord.request_stop()
            coord.join(threads=threads)
        sess.close()
        avg_time = np.mean(self._recognition_time)
        return accuracy, distance, avg_time
Esempio n. 4
0
 def _build_net_model(self, input_data):
     self._log.info('Build net model...')
     crnn = CRNN(phase='Train',
                 hidden_nums=256,
                 seq_length=25,
                 num_classes=37)
     with tf.variable_scope('shadow', reuse=False):
         net_out = crnn.build(inputdata=input_data)
     return net_out
Esempio n. 5
0
 def __init__(self, model_path='./checkpoints/CRNN.pth'):
     self.alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet])
     self.nclass = len(self.alphabet) + 1
     self.model = CRNN(crnn_params.imgH, 1, self.nclass, 256)
     self.use_gpu = torch.cuda.is_available()
     if self.use_gpu:
         self.model.cuda()
     self.model.load_state_dict(torch.load(model_path))
     for p in self.model.parameters():
         p.requires_grad = False
     self.model.eval()
     self.converter = strLabelConverter(self.alphabet)
Esempio n. 6
0
def recognize(image_path: str,
              weights_path: str,
              config: GlobalConfig,
              is_vis=True):
    logger = LogFactory.get_logger()
    image = load_and_resize_image(image_path)

    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[1, 32, 100, 3],
                               name='input')

    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=inputdata)

    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out,
                                               sequence_length=25 * np.ones(1),
                                               merge_repeated=False)

    decoder = TextFeatureIO()

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.get_gpu_config(
    ).memory_fraction
    sess_config.gpu_options.allow_growth = config.get_gpu_config(
    ).is_tf_growth_allowed()

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)
        preds = sess.run(decodes, feed_dict={inputdata: image})
        preds = decoder.writer.sparse_tensor_to_str(preds[0])
        logger.info('Predict image {:s} label {:s}'.format(
            ops.split(image_path)[1], preds[0]))

        if is_vis:
            plt.figure('CRNN Model Demo')
            plt.imshow(
                cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)])
            plt.show()
    sess.close()
Esempio n. 7
0
class OcrTextRec():
    def __init__(self, model_path='./checkpoints/CRNN.pth'):
        self.alphabet = ''.join([chr(uni) for uni in crnn_params.alphabet])
        self.nclass = len(self.alphabet) + 1
        self.model = CRNN(crnn_params.imgH, 1, self.nclass, 256)
        self.use_gpu = torch.cuda.is_available()
        if self.use_gpu:
            self.model.cuda()
        self.model.load_state_dict(torch.load(model_path))
        for p in self.model.parameters():
            p.requires_grad = False
        self.model.eval()
        self.converter = strLabelConverter(self.alphabet)

    def inference(self, image):
        if len(image.shape) == 3:
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        h, w = image.shape
        w_new = int(w / h * crnn_params.imgH)
        image = cv2.resize(image, (w_new, crnn_params.imgH),
                           interpolation=cv2.INTER_CUBIC)
        image = (np.reshape(image,
                            (crnn_params.imgH, w_new, 1))).transpose(2, 0, 1)
        image = image.astype(np.float32) / 255.
        image = torch.from_numpy(image).type(torch.FloatTensor)
        image.sub_(crnn_params.mean).div_(crnn_params.std)
        image = image.view(1, *image.size())
        if self.use_gpu:
            image = image.cuda()

        preds = self.model(image)
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = torch.IntTensor([preds.size(0)])
        sim_pred = self.converter.decode(preds.data,
                                         preds_size.data,
                                         raw=False)

        return sim_pred
Esempio n. 8
0
    image_names = np.sort(
        glob.glob(os.path.join(data_path, dataset_name, '*', '*.bmp')))
    print(len(image_names))
    annotation_path = os.path.join(data_path, dataset_name, 'Annotations')

    if not os.path.exists(annotation_path):
        os.makedirs(annotation_path)

    # TextBoxes++ + DenseNet
    tbpp = TBPP512_dense(softmax=False)
    # model = TBPP512(softmax=False)
    prior_util = PriorUtil(tbpp)
    checkdir = os.path.dirname(weights_path)

    input_width = 256
    input_height = 32
    crnn = CRNN((input_width, input_height, 1),
                len(alphabet),
                prediction_only=True,
                gru=False)

    print("started loading model weights")
    tic = time.time()
    tbpp.load_weights(weights_path)
    crnn.load_weights(weights_path_crnn)
    print(time.time() - tic)
    print("finished loading model weights")

    anno_tool = Annotater(tbpp, crnn, prior_util, image_names, annotation_path)
    anno_tool.annotate()
Esempio n. 9
0
import tensorflow as tf
import numpy as np
from crnn_model import CRNN
from utils import params, char_dict, decode_to_text, data_generator, sparse_tuple_from

# options
np.set_printoptions(precision=3)
np.set_printoptions(threshold=np.inf)
np.set_printoptions(edgeitems=30, linewidth=100000)

# init
iter = 0
continue_training = True
model = CRNN(num_classes=params['NUM_CLASSES'], training=True)
[
    model.load_weights('checkpoints/model_default')
    if continue_training else True
]
# model.build(input_shape=(2, 32, 200, 1))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=5)
loss_hist = []
# [print(i.name, i.shape) for i in model.trainable_variables]

# training
# dataset: https://www.robots.ox.ac.uk/~vgg/data/text/#sec-synth
# please check the data_generator in utils for path to the dataset
# the training set containts 7224612 images / 32 = 225769 batches
for x_batch, y_batch in data_generator(batches=112884,
                                       batch_size=64,
                                       epochs=10):
Esempio n. 10
0
    print('Test loss: %.6f, accuray: %.6f' % (loss_avg.val(), accuracy))

    return accuracy


if __name__ == "__main__":
    opt = parser.parse_args()
    image_root = opt.image_root
    val_label = opt.val_label
    trained_net = opt.trained_net
    batch_size = opt.batch_size

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    nclass = len(crnn_params.alphabet) + 1
    nc = 1
    model = CRNN(32, nc, nclass, crnn_params.nh)
    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
        model = model.cuda()
    criterion = torch.nn.CTCLoss(reduction='sum')
    if trained_net != '' and os.path.exists(trained_net):
        print('loading pretrained model from %s' % trained_net)
        model.load_state_dict(torch.load(trained_net))

    val_dataset = imgDataset(image_root,
                             val_label,
                             crnn_params.alphabet,
                             (crnn_params.imgW, crnn_params.imgH),
                             crnn_params.mean,
                             crnn_params.std,
                             is_aug=False)
Esempio n. 11
0
def recognize(image_path: str,
              weights_path: str,
              output_file: str,
              files_limit=32):
    decoder = TextFeatureIO().reader
    #Read all the files in the images folder
    files = [
        join(image_path, f) for f in listdir(image_path)
        if isfile(join(image_path, f))
    ][:]
    tf.reset_default_graph()
    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[BATCH_SIZE, 32, 100, 3],
                               name='input')
    images_sh = tf.cast(x=inputdata, dtype=tf.float32)
    # build shadownet
    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)
    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=images_sh)
    #top_paths=NUMBER_OF_PREDICTIONS is the number of words to predict
    decoded, log_probabilities = tf.nn.ctc_beam_search_decoder(
        net_out,
        25 * np.ones(BATCH_SIZE),
        merge_repeated=False,
        top_paths=NUMBER_OF_PREDICTIONS)

    # config tf saver
    saver = tf.train.Saver()
    sess = tf.Session()
    with sess.as_default():

        # restore the model weights
        #print('TFVERSION',tf.__version__)
        print("Restoring trained model")
        saver.restore(sess=sess, save_path=weights_path)
        print("Predicting {} images in chunks of {}".format(
            len(files), BATCH_SIZE))
        starting_time = time()

        #Run inference in groups of BATCH_SIZE images
        #Run it with all the files from the provided folder
        for group in chunker(files, BATCH_SIZE):
            start_time = time()
            images, filenames = load_images(group, files_limit)
            images = np.squeeze(images)
            padded_images = np.zeros([BATCH_SIZE, 32, 100, 3])
            padded_images[:images.shape[0], :, :, :] = images

            predictions, probs = sess.run([decoded, log_probabilities],
                                          feed_dict={inputdata: padded_images})
            for i, fname in enumerate(filenames):
                result = ''
                #log_probabilities is recomputed for softmax probs
                e_x = np.exp(probs[i, :]) / np.sum(np.exp(probs[i, :]))

                #build the array of N predictions for each image
                for x in range(NUMBER_OF_PREDICTIONS):
                    preds_res2 = decoder.sparse_tensor_to_str(predictions[x])
                    result = result + ',{:s},{:f}'.format(
                        preds_res2[i], e_x[x])
                #output string formatting and writing to csv file
                result = (basename(fname) + result)
                with open(output_file, 'a') as f:
                    f.write(result)
                    f.write('\n')
            end_time = time()
            print("Prediction time for {} images: {}".format(
                BATCH_SIZE, end_time - start_time))

        print("Total prediction time: {}".format(end_time - starting_time))
        print("Predictions saved in file {}".format(output_file))
Esempio n. 12
0
        train_data_path + '/country/', train_data_path + '/hiphop/',
        train_data_path + '/jazz/', train_data_path + '/light/'
    ]

    # Generate input data (mel-grams)
    melgrams = np.zeros((0, 96, 1366, 1))
    trainDataX, trainDataY, testDataX, testDataY = generateData.generateData(
        taggedFilePath, 6, 50)

    # Save input data
    np.save(output_path + '/trainDataX.npy', trainDataX)
    np.save(output_path + '/testDataX.npy', testDataX)
    np.save(output_path + '/trainDataY.npy', trainDataY)
    np.save(output_path + '/testDataY.npy', testDataY)
else:
    trainDataX = np.load(train_data_path + '/trainDataX.npy')
    testDataX = np.load(train_data_path + '/testDataX.npy')
    trainDataY = np.load(train_data_path + '/trainDataY.npy')
    testDataY = np.load(train_data_path + '/testDataY.npy')

obj_NN = CRNN(dropout_layer_rate=0.1,
              rnn_dropout_rate=0.1,
              nb_epoch=200,
              optimizer="adam",
              batch_size=32,
              save_model=True,
              save_model_path=output_path)
obj_NN.NN_getData(trainDataX, trainDataY, testDataX, testDataY)
model1, score1 = obj_NN.NN_model_train()
plotLossAcc(200, obj_NN.train_history)
Esempio n. 13
0
# Validation
val_pkl = PICKLE_DIR + os.path.splitext(
    os.path.basename(PICKLE_NAME))[0] + '_val.pkl'
with open(val_pkl, 'rb') as f:
    gt_util_val = pickle.load(f)

ph_dict = ph_utils.get_ph_dict(data_path=PICKLE_DIR, file_name=PICKLE_NAME)
# print(len(ph_dict))

input_width = 256
input_height = 32
batch_size = 128
input_shape = (input_width, input_height, 1)

model, model_pred = CRNN(input_shape, len(ph_dict))

max_string_len = model_pred.output_shape[1]

gen_val = InputGenerator(gt_util_val,
                         batch_size,
                         ph_dict,
                         input_shape[:2],
                         grayscale=True,
                         max_string_len=max_string_len)

model.load_weights(CHECKPOINT_PATH)

g = gen_val.generate()

mean_ed = 0
          encoding='UTF8') as f:
    gt_data = json.load(f)

data_info = gt_data['info']

# crnn references
ph_dict = ph_utils.get_ph_dict(data_path=PICKLE_DIR, file_name=PICKLE_NAME)

input_width = 256
input_height = 32
batch_size = 128
input_shape = (input_width, input_height, 1)

# model, model_pred = CRNN(input_shape, len(ph_dict))
model = CRNN((input_width, input_height, 1),
             len(ph_dict),
             prediction_only=True)

model.load_weights(
    './checkpoints/202004011502_crnn_lstm_ph_all_v1/weights.110000.h5')

# tesseract references
lang = 'kor'
tess_cfg = " --psm 6 --oem 1 --tessdata-dir tessdata/org"

img_fnames = sorted(
    get_filenames('/home/sungsoo/Downloads/WORDS/',
                  extensions='png',
                  recursive_=True,
                  exit_=True))
Esempio n. 15
0
def recognize_text(batch_text_image):
    """ model configuration """
    if 'CTC' in crnn_opt.Prediction:
        converter = CTCLabelConverter(crnn_opt.character)
    else:
        converter = AttnLabelConverter(crnn_opt.character)
    crnn_opt.num_class = len(converter.character)
    #log = open(f'result/predict_and_gt.txt', 'a')
    if crnn_opt.rgb:
        crnn_opt.input_channel = 3

    if crnn_opt.sensitive:
        crnn_opt.character = string.printable[:
                                              -6]  # same with ASTER setting (use 94 char).

    # model = CRNN(crnn_opt)
    # print('model input parameters', crnn_opt.imgH, crnn_opt.imgW, crnn_opt.num_fiducial, crnn_opt.input_channel, crnn_opt.output_channel,
    #       crnn_opt.hidden_size, crnn_opt.num_class, crnn_opt.batch_max_length, crnn_opt.Transformation, crnn_opt.FeatureExtraction,
    #       crnn_opt.SequenceModeling, crnn_opt.Prediction)
    # model = torch.nn.DataParallel(model).to(device)
    #
    # # load model
    # print('loading pretrained model from %s' % crnn_opt.saved_model)
    # model.load_state_dict(torch.load(crnn_opt.saved_model, map_location=device))
    # crnn_opt.exp_name = '_'.join(crnn_opt.saved_model.split('/')[1:])
    # print(model)
    """ keep evaluation model and result logs """
    # os.makedirs(f'./result/{crnn_opt.exp_name}', exist_ok=True)
    # os.system(f'cp {crnn_opt.saved_model} ./result/{crnn_opt.exp_name}/')
    """ setup loss """
    if 'CTC' in crnn_opt.Prediction:
        criterion = torch.nn.CTCLoss(zero_infinity=True).to(device)
    else:
        criterion = torch.nn.CrossEntropyLoss(ignore_index=0).to(
            device)  # ignore [GO] token = ignore index 0
    """ evaluation """
    crnn_net = CRNN(crnn_opt)
    crnn_net = torch.nn.DataParallel(crnn_net, device_ids=[0]).to(device)
    crnn_net.load_state_dict(
        torch.load(
            './weights/TPS-ResNet-BiLSTM-Attn-Seed1111/best_accuracy.pth',
            map_location=device))
    #log.write("==================================================================="+"\n")
    #log.write('Finished loading CRNN model!'+'\n')
    #print('Finished loading CRNN model!')
    crnn_net.eval()

    with torch.no_grad():
        #evaluation_loader = (batch_text_image, labels)
        evaluation_loader = batch_text_image
        if crnn_opt.benchmark_all_eval:  # evaluation with 10 benchmark evaluation datasets
            benchmark_all_eval(crnn_net, criterion, evaluation_loader,
                               converter, crnn_opt)
        # log = open(f'./result/{crnn_opt.exp_name}/log_evaluation.txt', 'a')
        # AlignCollate_evaluation = AlignCollate(imgH=crnn_opt.imgH, imgW=crnn_opt.imgW, keep_ratio_with_pad=crnn_opt.PAD)
        # eval_data, eval_data_log = hierarchical_dataset(root=crnn_opt.eval_data, crnn_opt=crnn_opt)
        # evaluation_loader = torch.utils.data.DataLoader(
        #     eval_data, batch_size=crnn_opt.batch_size,
        #     shuffle=False,
        #     num_workers=int(crnn_opt.workers),
        #     collate_fn=AlignCollate_evaluation, pin_memory=True)
        # _, accuracy_by_best_model, _, _, _, _, _, _ = validation(
        #     crnn_net, criterion, evaluation_loader, converter, crnn_opt)
        pred = validation(crnn_net, criterion, evaluation_loader, converter,
                          crnn_opt)
        # log.write(eval_data_log)
        # print(f'{accuracy_by_best_model:0.3f}')
        # log.write(f'{accuracy_by_best_model:0.3f}\n')
        # log.close()

        return pred
Esempio n. 16
0
ph_dict = ph_utils.get_ph_dict(data_path=PICKLE_DIR, file_name=PICKLE_NAME)
print(len(ph_dict))

# AI-HUB
# input_width = 256
# input_height = 32
# batch_size = 128

# AIG IDR
input_width = 256
input_height = 32
batch_size = 128

input_shape = (input_width, input_height, 1)

model, model_pred = CRNN(input_shape, len(ph_dict), gru=False)
max_string_len = model_pred.output_shape[1]

gen_train = InputGenerator(gt_util_train,
                           batch_size,
                           ph_dict,
                           input_shape[:2],
                           grayscale=True,
                           max_string_len=max_string_len)
gen_val = InputGenerator(gt_util_val,
                         batch_size,
                         ph_dict,
                         input_shape[:2],
                         grayscale=True,
                         max_string_len=max_string_len)
 Model = DSODSL512
 input_shape = (512,512,3)
 weights_path = './checkpoints/201711132011_dsodsl512_synthtext/weights.001.h5'
 segment_threshold = 0.55
 link_threshold = 0.40
 
 det_model = Model(input_shape)
 prior_util = PriorUtil(det_model)
 det_model.load_weights(weights_path)
 
 #input_width = 256
 input_width = 384
 input_height = 32
 weights_path = './checkpoints/201806190711_crnn_gru_synthtext/weights.300000.h5'
 
 rec_model = CRNN((input_width, input_height, 1), len(alphabet), prediction_only=True, gru=True)
 rec_model.load_weights(weights_path, by_name=True)
 
 # To test on webcam 0, /dev/video0
 video_path = 0
 start_frame = 0
 record = True
 record_file_name = 'sl_end2end_record.avi'
 try:
     vid = cv2.VideoCapture(video_path)
     if not vid.isOpened():
         raise IOError(("Couldn't open video file or webcam. If you're "
         "trying to open a webcam, make sure you video_path is an integer!"))
     
     vid_w = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
     vid_h = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)
Esempio n. 18
0
        m.bias.data.fill_(0)


def backward_hook(self, grad_input, grad_output):
    for g in grad_input:
        g[g != g] = 0   # replace all nan/inf in gradients to zero


if __name__ == "__main__":
    print('alphabet length : ', len(crnn_params.alphabet_list))
    opt = parser.parse_args()
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    nclass = len(crnn_params.alphabet) + 1
    nc = 1
    model = CRNN(crnn_params.imgH, nc, nclass, crnn_params.nh)
    criterion = torch.nn.CTCLoss(reduction='sum')
    optimizer = optim.Adam(model.parameters(), lr=crnn_params.lr, betas=(crnn_params.beta1, 0.999))
    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True
        model = model.cuda()
        criterion = criterion.cuda()
    if opt.resume_net !='' and os.path.exists(opt.resume_net):
        print('loading pretrained model from %s' % opt.resume_net)
        model.load_state_dict(torch.load(opt.resume_net))
    else:
        model.apply(weights_init)
    model.register_backward_hook(backward_hook)

    train_dataset = imgDataset(opt.image_root, opt.train_label, 
                               crnn_params.alphabet, (crnn_params.imgW, crnn_params.imgH), crnn_params.mean, crnn_params.std)