Example #1
0
def crnnOcr(image):
    """
       crnn模型,ocr识别
       @@model,
       @@converter,
       @@im
       @@text_recs:text box

       """
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    #print "im size:{},{}".format(image.size,w)
    transformer = dataset.resizeNormalize((w, 32))
    if torch.cuda.is_available() and GPU:
        image = transformer(image).cuda()
    else:
        image = transformer(image).cpu()

    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)

    return sim_pred
Example #2
0
def predict_img(imgpath):
    converter = util.strLabelConverter(alphabet)
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1,
                      lstmFlag=LSTMFLAG).cpu()
    ocrModel = './ocr-dense.pth'
    # ocrModel = './models/ocr-dense.pth'
    state_dict = torch.load(ocrModel,
                            map_location=lambda storage, loc: storage)
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k.replace('module.', '')  # remove `module.`
        new_state_dict[name] = v
    # load params

    model.load_state_dict(new_state_dict)
    model.eval()
    # imgpath = 'j8yc.png'
    image = Image.open(imgpath).convert('L')
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    # print "im size:{},{}".format(image.size,w)
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image).cpu()
    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)

    # print(sim_pred)
    return sim_pred
Example #3
0
def process_img(images):
    model_path = 'crnn.pth'
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    model = crnn.CRNN(32, 1, 37, 256, 1)
    print('loading pretrained model from %s' % model_path)
    model.load_state_dict(torch.load(model_path))
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))
    for img_path in glob.glob('crop/*.jpg'):
        image = Image.open(img_path).convert('L')
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        print('%-20s => %-20s' % (raw_pred, sim_pred))
        places = GeoText(sim_pred)
        if (len(places.cities) > 0):
            print('Location Found: ')
            print(places.cities)
        else:
            print('Location Not Found')
        language_classifier.classify(sim_pred)

    return 1
 def load(self):
     logging.info("Loding CRNN model first apply will be slow")
     if torch.cuda.is_available():
         self.session = crnn_model.CRNN(32, 1, 37, 256, 1).cuda()
         self.cuda = True
     else:
         self.session = crnn_model.CRNN(32, 1, 37, 256, 1)
     self.session.load_state_dict(torch.load(self.model_path))
     self.session.eval()
     self.converter = utils.strLabelConverter(self.alphabet)
     self.transformer = dataset.resizeNormalize((100, 32))
Example #5
0
def crnn_single(img):
    alphabet = keys_crnn.alphabet
    # print(len(alphabet))
    # input('\ninput:')
    converter = util.strLabelConverter(alphabet)
    # model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda()
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1)
    path = './crnn/samples/model_acc97.pth'
    model.load_state_dict(torch.load(path))
    # print(model)

    img = Image.fromarray(np.array(img))
    image = img.convert('L')
    # print(image.size)
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    # print("width:" + str(w))

    transformer = dataset.resizeNormalize((w, 32))
    # image = transformer(image).cuda()
    image = transformer(image)
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)
    # print(preds.shape)
    _, preds = preds.max(2)
    # print(preds.shape)

    # preds = preds.squeeze(2)
    # preds = preds.transpose(1, 0).contiguous().view(-1)
    preds = preds.squeeze(1)
    preds = preds.transpose(-1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    sim_pred = sim_pred.lower()
    # print('%-20s => %-20s' % (raw_pred, sim_pred))
    return deletedot(sim_pred)
Example #6
0
def crnnRec(model, converter, im, text_recs):
    index = 0
    raw_preds = []
    sim_preds = []
    for rec in text_recs:
        pt1 = (rec[0], rec[1])
        pt2 = (rec[2], rec[3])
        pt3 = (rec[6], rec[7])
        pt4 = (rec[4], rec[5])
        partImg = dumpRotateImage(
            im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2,
            pt3, pt4)
        # mahotas.imsave('%s.jpg'%index, partImg)
        if (partImg.shape[0] == 0 or partImg.shape[1] == 0
                or partImg.shape[2] == 0):
            continue
        image = Image.fromarray(partImg).convert('L')
        # height,width,channel=partImg.shape[:3]
        # print(height,width,channel)
        # print(image.size)

        # image = Image.open('./img/t4.jpg').convert('L')
        scale = image.size[1] * 1.0 / 32
        w = image.size[0] / scale
        w = int(w)
        # print(w)

        transformer = dataset.resizeNormalize((w, 32))
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        raw_preds.append(raw_pred)
        sim_preds.append(sim_pred)
    return raw_preds, sim_preds
Example #7
0
def getTextFromImage(model, img):
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))

    img = transformer(img).cuda()
    img = img.view(1, *img.size())
    img = Variable(img)

    model.eval()
    preds = model(img)

    _, preds = preds.max(2)
    preds = preds.squeeze(1)
    preds = preds.transpose(0, 0).contiguous().view(-1)

    predictionSize = Variable(torch.IntTensor([preds.size(0)]))
    rawPrediction = converter.decode(preds.data, predictionSize.data, raw=True)
    decodedPrediction = converter.decode(preds.data, predictionSize.data, raw=False)
    return decodedPrediction
Example #8
0
def get_text_service(image):
    image = Image.fromarray(image).convert('L')
    model = crnn.CRNN(32, 1, 37, 256)
    if torch.cuda.is_available():
        model = model.cuda()

    model.load_state_dict(torch.load(MODEL_PATH_CRNN))
    converter = utils.strLabelConverter(ALPHABET)
    transformer = dataset.resizeNormalize((100, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    
    return sim_pred
Example #9
0
def crnnOcr(image):
    """
    crnn模型,自然场景端到端识别
    :param image: 'L' image
    :return: box识别后的text
    """
    scale = image.size[1] * 1.0 / 32
    w = int(image.size[0] / scale)
    transformer = dataset.resizeNormalize((w, 32))
    if torch.cuda.is_available() and cfg.GPU:
        image = transformer(image).cuda()
    else:
        image = transformer(image).cpu()

    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)

    return sim_pred
Example #10
0
assert train_dataset
if not opt.random_sample:
    sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize)
else:
    sampler = None
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           batch_size=opt.batchSize,
                                           shuffle=True,
                                           sampler=sampler,
                                           num_workers=int(opt.workers),
                                           collate_fn=dataset.alignCollate(
                                               imgH=opt.imgH,
                                               imgW=opt.imgW,
                                               keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(root=opt.valroot,
                                   transform=dataset.resizeNormalize(
                                       (100, 32)))

alphabet = opt.alphabet.decode('utf-8')

nclass = len(alphabet) + 1
nc = 1

converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()


# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
Example #11
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        f_score, f_geometry = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)

                timer = {'net': 0, 'restore': 0, 'nms': 0}
                start = time.time()
                score, geometry = sess.run(
                    [f_score, f_geometry],
                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start

                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                print(
                    '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                        im_fn, timer['net'] * 1000, timer['restore'] * 1000,
                        timer['nms'] * 1000))

                if boxes is not None:
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                duration = time.time() - start_time
                print('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))

                    with open(res_file, 'w') as f:
                        for box in boxes:
                            # to avoid submitting errors
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue
                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0],
                                box[0, 1],
                                box[1, 0],
                                box[1, 1],
                                box[2, 0],
                                box[2, 1],
                                box[3, 0],
                                box[3, 1],
                            ))
                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=1)
                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])

            model_path = './crnn/crnn.pth'
            alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
            model_crnn = crnn.CRNN(32, 1, 37, 256)
            # if torch.cuda.is_available():
            #  model_crnn = model_crnn.cuda()
            print('loading pretrained model from %s' % model_path)
            model_crnn.load_state_dict(torch.load(model_path))

            converter = utils.strLabelConverter(alphabet)
            transformer = dataset.resizeNormalize((100, 32))
            seq = re.compile(",")
            with open('./output/img_demo.txt') as f:
                img = cv2.imread('./test_img/img_demo.jpg')
                line_id = 0
                with open('./output/output.txt', 'w') as fp:
                    for line in f:
                        line_id += 1
                        lst = seq.split(line.strip())
                        x1 = int(lst[0])
                        y1 = int(lst[1])
                        x2 = int(lst[2])
                        y2 = int(lst[3])
                        x3 = int(lst[4])
                        y3 = int(lst[5])
                        x4 = int(lst[6])
                        y4 = int(lst[7])
                        cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4,
                                                                       y4]])
                        rect = cv2.minAreaRect(cnt)
                        # print(rect)
                        box = cv2.boxPoints(rect)
                        box = np.int0(box)
                        # print(box)
                        roi_img = img[min(box[:, 1]):max(box[:, 1]),
                                      min(box[:, 0]):max(box[:, 0])]
                        # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1]))
                        cv2.imwrite(
                            './output/word_area_img/word_area_img' +
                            str(line_id) + '.png', roi_img)
                        img_path = './output/word_area_img/word_area_img' + str(
                            line_id) + '.png'
                        image = Image.open(img_path).convert('L')
                        image = transformer(image)
                        # if torch.cuda.is_available():
                        #  image = image.cuda()
                        image = image.view(1, *image.size())
                        image = Variable(image)
                        model_crnn.eval()
                        preds = model_crnn(image)
                        _, preds = preds.max(2)
                        preds = preds.transpose(1, 0).contiguous().view(-1)

                        preds_size = Variable(torch.IntTensor([preds.size(0)]))
                        raw_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=True)
                        sim_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=False)
                        print('%-20s => %-20s' % (raw_pred, sim_pred))
                        fp.write(sim_pred)
                        fp.write('\n')
Example #12
0
converter = util.strLabelConverter(alphabet)
model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu()
ocrModel = './models/epoch9_step7000_model_dense.pth'
# ocrModel = './models/ocr-dense.pth'
state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage)
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k.replace('module.', '')  # remove `module.`
    new_state_dict[name] = v
# load params

model.load_state_dict(new_state_dict)
model.eval()
imgpath = 'ss_350.png'
image = Image.open(imgpath).convert('L')
scale = image.size[1] * 1.0 / 32
w = image.size[0] / scale
w = int(w)
# print "im size:{},{}".format(image.size,w)
transformer = dataset.resizeNormalize((w, 32))
image = transformer(image).cpu()
image = image.view(1, *image.size())
image = Variable(image)
model.eval()
preds = model(image)
_, preds = preds.max(2)
preds = preds.transpose(1, 0).contiguous().view(-1)
preds_size = Variable(torch.IntTensor([preds.size(0)]))
sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
print(sim_pred)