Esempio n. 1
0
    def _ctc_loss(self, label, text_preds, text_targets):
        inds_text = Variable(
            ((label.data == 1) + (label.data == 2) + (label.data == 2) +
             (label.data == 4) + (label.data == 5)).nonzero().view(-1))

        if inds_text.nelement() == 0:
            return Variable(torch.FloatTensor([0])).cuda()
        text_preds_filtered = text_preds.index_select(1, inds_text)
        text_targets_filtered = tuple([
            ''.join(list(filter(lambda ch: ch in alphabet, text_targets[i])))
            for i in inds_text.data
        ])  #text_targets.index_select(0,inds_text)

        converter = crnn_utils.strLabelConverter(alphabet)
        t, l = converter.encode(text_targets_filtered)
        crnn_utils.loadData(text, t)
        crnn_utils.loadData(length, l)

        print("CTCLoss")
        b = text_preds_filtered.size(1)
        pred_size = Variable(torch.IntTensor([text_preds_filtered.size(0)] *
                                             b))
        if not len(text.size()) == 1:
            return Variable(torch.FloatTensor([0])).cuda()
        print(ctc(text_preds_filtered, text, pred_size, length) / b)

        _, preds = text_preds_filtered.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        sim_preds = converter.decode(preds.data, pred_size.data, raw=False)
        print('predicted text=  ')
        print(sim_preds)
        print(text_targets_filtered)
        return ctc(text_preds_filtered, text, pred_size, length) / b
Esempio n. 2
0
def crnnSource():
    """
    加载模型
    """
    if chinsesModel:
        alphabet = keys.alphabetChinese##中英文模型
    else:
        alphabet = keys.alphabetEnglish##英文模型
        
    converter = strLabelConverter(alphabet)
    if torch.cuda.is_available() and GPU:
        model = CRNN(32, 1, len(alphabet)+1, 256, 1,lstmFlag=LSTMFLAG).cuda()##LSTMFLAG=True crnn 否则 dense ocr
    else:
        model = CRNN(32, 1, len(alphabet)+1, 256, 1,lstmFlag=LSTMFLAG).cpu()

    trainWeights = torch.load(ocrModel,map_location=lambda storage, loc: storage)
    modelWeights = OrderedDict()
    for k, v in trainWeights.items():
        name = k.replace('module.','') # remove `module.`
        modelWeights[name] = v
    # load params
  
    model.load_state_dict(modelWeights)

    return model,converter
Esempio n. 3
0
def crnnSource():
    alphabet = keys.alphabet
    converter = util.strLabelConverter(alphabet)
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1)
    path = 'crnn/models/netCRNNcpu.pth'
    model.load_state_dict(torch.load(path))
    return model, converter
Esempio n. 4
0
def process_img(images):
    model_path = 'crnn.pth'
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    model = crnn.CRNN(32, 1, 37, 256, 1)
    print('loading pretrained model from %s' % model_path)
    model.load_state_dict(torch.load(model_path))
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))
    for img_path in glob.glob('crop/*.jpg'):
        image = Image.open(img_path).convert('L')
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        print('%-20s => %-20s' % (raw_pred, sim_pred))
        places = GeoText(sim_pred)
        if (len(places.cities) > 0):
            print('Location Found: ')
            print(places.cities)
        else:
            print('Location Not Found')
        language_classifier.classify(sim_pred)

    return 1
Esempio n. 5
0
def crnnSource():
    alphabet = keys.alphabet
    converter = util.strLabelConverter(alphabet)
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1)
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'models/netCRNNcpu.pth')
    model.load_state_dict(torch.load(path))
    return model, converter
Esempio n. 6
0
 def predict(self, image):
     image = resizeNormalize(image, 32)
     image = image.astype(np.float32)
     image = np.array([[image]])
     self.model.setInput(image)
     preds = self.model.forward()
     preds = preds.transpose(0, 2, 3, 1)
     preds = preds[0]
     preds = np.argmax(preds, axis=2).reshape((-1, ))
     raw = strLabelConverter(preds, self.alphabet)
     return raw
Esempio n. 7
0
 def predict(self, image):
     image = resizeNormalize(image, 32)
     image = image.astype(np.float32)
     image = np.array([[image]])
     global graph
     with graph.as_default():
         preds = self.model.predict(image)
     # preds = preds[0]
     preds = np.argmax(preds, axis=2).reshape((-1, ))
     raw = strLabelConverter(preds, self.alphabet)
     return raw
Esempio n. 8
0
 def load(self):
     logging.info("Loding CRNN model first apply will be slow")
     if torch.cuda.is_available():
         self.session = crnn_model.CRNN(32, 1, 37, 256, 1).cuda()
         self.cuda = True
     else:
         self.session = crnn_model.CRNN(32, 1, 37, 256, 1)
     self.session.load_state_dict(torch.load(self.model_path))
     self.session.eval()
     self.converter = utils.strLabelConverter(self.alphabet)
     self.transformer = dataset.resizeNormalize((100, 32))
Esempio n. 9
0
    def predict(self, image):
        image = resizeNormalize(image, 32)
        image = image.astype(np.float32)
        image = torch.from_numpy(image)
        if torch.cuda.is_available() and self.GPU:
            image = image.cuda()
        else:
            image = image.cpu()

        image = image.view(1, 1, *image.size())
        image = Variable(image)
        preds = self(image)
        _, preds = preds.max(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        raw = strLabelConverter(preds, self.alphabet)
        return raw
Esempio n. 10
0
    def predict_batch(self, boxes, batch_size=1):
        """
        predict on batch
        """

        N = len(boxes)
        res = []
        imgW = 0
        batch = N // batch_size
        if batch * batch_size != N:
            batch += 1
        for i in range(batch):
            tmpBoxes = boxes[i * batch_size:(i + 1) * batch_size]
            imageBatch = []
            imgW = 0
            for box in tmpBoxes:
                img = box['img']
                image = resizeNormalize(img, 32)
                h, w = image.shape[:2]
                imgW = max(imgW, w)
                imageBatch.append(np.array([image]))

            imageArray = np.zeros((len(imageBatch), 1, 32, imgW),
                                  dtype=np.float32)
            n = len(imageArray)
            for j in range(n):
                _, h, w = imageBatch[j].shape
                imageArray[j][:, :, :w] = imageBatch[j]

            image = torch.from_numpy(imageArray)
            image = Variable(image)
            if torch.cuda.is_available() and self.GPU:
                image = image.cuda()
            else:
                image = image.cpu()

            preds = self(image)
            preds = preds.argmax(2)
            n = preds.shape[1]
            for j in range(n):
                res.append(strLabelConverter(preds[:, j], self.alphabet))

        for i in range(N):
            boxes[i]['text'] = res[i]
        return boxes
Esempio n. 11
0
    def predict_batch(self, boxes, batch_size=1):
        """
        predict on batch
        """

        N = len(boxes)
        res = []
        imgW = 0
        batch = N // batch_size
        if batch * batch_size != N:
            batch += 1
        for i in range(batch):
            tmpBoxes = boxes[i * batch_size:(i + 1) * batch_size]
            imageBatch = []
            imgW = 0
            for box in tmpBoxes:
                img = box['img']
                image = resizeNormalize(img, 32)
                h, w = image.shape[:2]
                imgW = max(imgW, w)
                imageBatch.append(np.array([image]))

            imageArray = np.zeros((len(imageBatch), 1, 32, imgW),
                                  dtype=np.float32)
            n = len(imageArray)
            for j in range(n):
                _, h, w = imageBatch[j].shape
                imageArray[j][:, :, :w] = imageBatch[j]

            global graph
            with graph.as_default():
                preds = self.model.predict(imageArray, batch_size=batch_size)

            preds = preds.argmax(axis=2)
            n = preds.shape[0]
            for j in range(n):
                res.append(
                    strLabelConverter(preds[j, ].tolist(), self.alphabet))

        for i in range(N):
            boxes[i]['text'] = res[i]
        return boxes
Esempio n. 12
0
def getTextFromImage(model, img):
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))

    img = transformer(img).cuda()
    img = img.view(1, *img.size())
    img = Variable(img)

    model.eval()
    preds = model(img)

    _, preds = preds.max(2)
    preds = preds.squeeze(1)
    preds = preds.transpose(0, 0).contiguous().view(-1)

    predictionSize = Variable(torch.IntTensor([preds.size(0)]))
    rawPrediction = converter.decode(preds.data, predictionSize.data, raw=True)
    decodedPrediction = converter.decode(preds.data, predictionSize.data, raw=False)
    return decodedPrediction
Esempio n. 13
0
def get_text_service(image):
    image = Image.fromarray(image).convert('L')
    model = crnn.CRNN(32, 1, 37, 256)
    if torch.cuda.is_available():
        model = model.cuda()

    model.load_state_dict(torch.load(MODEL_PATH_CRNN))
    converter = utils.strLabelConverter(ALPHABET)
    transformer = dataset.resizeNormalize((100, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    
    return sim_pred
Esempio n. 14
0
def crnnSource():
    alphabet = keys.alphabetChinese##中英文模型
    converter = strLabelConverter(alphabet)
    model = CRNN(32, 1, len(alphabet)+1, 256, 1,lstmFlag=LSTMFLAG)
    model.load_weights(ocrModelKeras)
    return model,converter
Esempio n. 15
0
                                           sampler=sampler,
                                           num_workers=int(opt.workers),
                                           collate_fn=dataset.alignCollate(
                                               imgH=opt.imgH,
                                               imgW=opt.imgW,
                                               keep_ratio=opt.keep_ratio))
test_dataset = dataset.lmdbDataset(root=opt.valroot,
                                   transform=dataset.resizeNormalize(
                                       (100, 32)))

alphabet = opt.alphabet.decode('utf-8')

nclass = len(alphabet) + 1
nc = 1

converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()


# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh)
crnn.apply(weights_init)
Esempio n. 16
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        f_score, f_geometry = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)

                timer = {'net': 0, 'restore': 0, 'nms': 0}
                start = time.time()
                score, geometry = sess.run(
                    [f_score, f_geometry],
                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start

                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                print(
                    '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                        im_fn, timer['net'] * 1000, timer['restore'] * 1000,
                        timer['nms'] * 1000))

                if boxes is not None:
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                duration = time.time() - start_time
                print('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))

                    with open(res_file, 'w') as f:
                        for box in boxes:
                            # to avoid submitting errors
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue
                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0],
                                box[0, 1],
                                box[1, 0],
                                box[1, 1],
                                box[2, 0],
                                box[2, 1],
                                box[3, 0],
                                box[3, 1],
                            ))
                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=1)
                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])

            model_path = './crnn/crnn.pth'
            alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
            model_crnn = crnn.CRNN(32, 1, 37, 256)
            # if torch.cuda.is_available():
            #  model_crnn = model_crnn.cuda()
            print('loading pretrained model from %s' % model_path)
            model_crnn.load_state_dict(torch.load(model_path))

            converter = utils.strLabelConverter(alphabet)
            transformer = dataset.resizeNormalize((100, 32))
            seq = re.compile(",")
            with open('./output/img_demo.txt') as f:
                img = cv2.imread('./test_img/img_demo.jpg')
                line_id = 0
                with open('./output/output.txt', 'w') as fp:
                    for line in f:
                        line_id += 1
                        lst = seq.split(line.strip())
                        x1 = int(lst[0])
                        y1 = int(lst[1])
                        x2 = int(lst[2])
                        y2 = int(lst[3])
                        x3 = int(lst[4])
                        y3 = int(lst[5])
                        x4 = int(lst[6])
                        y4 = int(lst[7])
                        cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4,
                                                                       y4]])
                        rect = cv2.minAreaRect(cnt)
                        # print(rect)
                        box = cv2.boxPoints(rect)
                        box = np.int0(box)
                        # print(box)
                        roi_img = img[min(box[:, 1]):max(box[:, 1]),
                                      min(box[:, 0]):max(box[:, 0])]
                        # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1]))
                        cv2.imwrite(
                            './output/word_area_img/word_area_img' +
                            str(line_id) + '.png', roi_img)
                        img_path = './output/word_area_img/word_area_img' + str(
                            line_id) + '.png'
                        image = Image.open(img_path).convert('L')
                        image = transformer(image)
                        # if torch.cuda.is_available():
                        #  image = image.cuda()
                        image = image.view(1, *image.size())
                        image = Variable(image)
                        model_crnn.eval()
                        preds = model_crnn(image)
                        _, preds = preds.max(2)
                        preds = preds.transpose(1, 0).contiguous().view(-1)

                        preds_size = Variable(torch.IntTensor([preds.size(0)]))
                        raw_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=True)
                        sim_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=False)
                        print('%-20s => %-20s' % (raw_pred, sim_pred))
                        fp.write(sim_pred)
                        fp.write('\n')