Exemple #1
0
def crnnSource():
    if cfg.chinese_model:
        alphabet = keys.alphabetChinese
    else:
        alphabet = keys.alphabetEnglish

    converter = strLabelConverter(alphabet)
    if torch.cuda.is_available() and cfg.GPU:
        model = crnn.CRNN(32,
                          1,
                          len(alphabet) + 1,
                          256,
                          1,
                          lstmFlag=cfg.lstm_flag).cuda()
    else:
        model = crnn.CRNN(32,
                          1,
                          len(alphabet) + 1,
                          256,
                          1,
                          lstmFlag=cfg.lstm_flag).cpu()

    state_dict = torch.load(cfg.ocr_model,
                            map_location=lambda storage, loc: storage)

    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k.replace('module.', '')  # remove `module.` torch的版本问题
        new_state_dict[name] = v
    # load params
    model.load_state_dict(new_state_dict)
    model.eval()

    return model, converter
Exemple #2
0
def crnnSource():
    if chinsesModel:
        alphabet = keys.alphabetChinese
    else:
        alphabet = keys.alphabetEnglish

    converter = util.strLabelConverter(alphabet)
    if torch.cuda.is_available() and GPU:
        model = crnn.CRNN(
            32, 1, len(alphabet) + 1, 256, 1,
            lstmFlag=LSTMFLAG).cuda()  ##LSTMFLAG=True crnn 否则 dense ocr
    else:
        model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1,
                          lstmFlag=LSTMFLAG).cpu()

    state_dict = torch.load(ocrModel,
                            map_location=lambda storage, loc: storage)
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k.replace('module.', '')  # remove `module.`
        new_state_dict[name] = v
    # load params

    model.load_state_dict(new_state_dict)
    model.eval()

    return model, converter
Exemple #3
0
def crnnSource():
    alphabet = keys1.alphabet
    converter = util.strLabelConverter(alphabet)
    if torch.cuda.is_available() and GPU:
       model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda()
    else:
        model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cpu()
    path = './crnn/samples/model_acc97.pth'
    model.eval()
    model.load_state_dict(torch.load(path))
    return model,converter
 def load(self):
     logging.info("Loding CRNN model first apply will be slow")
     if torch.cuda.is_available():
         self.session = crnn_model.CRNN(32, 1, 37, 256, 1).cuda()
         self.cuda = True
     else:
         self.session = crnn_model.CRNN(32, 1, 37, 256, 1)
     self.session.load_state_dict(torch.load(self.model_path))
     self.session.eval()
     self.converter = utils.strLabelConverter(self.alphabet)
     self.transformer = dataset.resizeNormalize((100, 32))
def crnnSource():
    alphabet = keys.alphabet
    converter = util.strLabelConverter(alphabet)
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1)
    path = 'crnn/models/netCRNNcpu.pth'
    model.load_state_dict(torch.load(path))
    return model, converter
Exemple #6
0
def process_img(images):
    model_path = 'crnn.pth'
    alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
    model = crnn.CRNN(32, 1, 37, 256, 1)
    print('loading pretrained model from %s' % model_path)
    model.load_state_dict(torch.load(model_path))
    converter = utils.strLabelConverter(alphabet)
    transformer = dataset.resizeNormalize((100, 32))
    for img_path in glob.glob('crop/*.jpg'):
        image = Image.open(img_path).convert('L')
        image = transformer(image)
        image = image.view(1, *image.size())
        image = Variable(image)
        model.eval()
        preds = model(image)
        _, preds = preds.max(2)
        #preds = preds.squeeze(2)
        preds = preds.transpose(1, 0).contiguous().view(-1)
        preds_size = Variable(torch.IntTensor([preds.size(0)]))
        raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
        sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
        print('%-20s => %-20s' % (raw_pred, sim_pred))
        places = GeoText(sim_pred)
        if (len(places.cities) > 0):
            print('Location Found: ')
            print(places.cities)
        else:
            print('Location Not Found')
        language_classifier.classify(sim_pred)

    return 1
Exemple #7
0
def predict_img(imgpath):
    converter = util.strLabelConverter(alphabet)
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1,
                      lstmFlag=LSTMFLAG).cpu()
    ocrModel = './ocr-dense.pth'
    # ocrModel = './models/ocr-dense.pth'
    state_dict = torch.load(ocrModel,
                            map_location=lambda storage, loc: storage)
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k.replace('module.', '')  # remove `module.`
        new_state_dict[name] = v
    # load params

    model.load_state_dict(new_state_dict)
    model.eval()
    # imgpath = 'j8yc.png'
    image = Image.open(imgpath).convert('L')
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    # print "im size:{},{}".format(image.size,w)
    transformer = dataset.resizeNormalize((w, 32))
    image = transformer(image).cpu()
    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)

    # print(sim_pred)
    return sim_pred
Exemple #8
0
def crnnSource():
    alphabet = keys.alphabet
    converter = util.strLabelConverter(alphabet)
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1)
    path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                        'models/netCRNNcpu.pth')
    model.load_state_dict(torch.load(path))
    return model, converter
Exemple #9
0
def crnnSource():
    alphabet = keys.alphabet
    converter = util.strLabelConverter(alphabet)
    if torch.cuda.is_available() and GPU:
        model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda()
    else:
        model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cpu()

    state_dict = torch.load(ocrModel)
    new_state_dict = OrderedDict()
    for k, v in state_dict.items():
        name = k.replace('module.', '')  # remove `module.`
        new_state_dict[name] = v
    # load params

    model.load_state_dict(new_state_dict)
    model.eval()

    return model, converter
Exemple #10
0
def crnn_single(img):
    alphabet = keys_crnn.alphabet
    # print(len(alphabet))
    # input('\ninput:')
    converter = util.strLabelConverter(alphabet)
    # model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda()
    model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1)
    path = './crnn/samples/model_acc97.pth'
    model.load_state_dict(torch.load(path))
    # print(model)

    img = Image.fromarray(np.array(img))
    image = img.convert('L')
    # print(image.size)
    scale = image.size[1] * 1.0 / 32
    w = image.size[0] / scale
    w = int(w)
    # print("width:" + str(w))

    transformer = dataset.resizeNormalize((w, 32))
    # image = transformer(image).cuda()
    image = transformer(image)
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)
    # print(preds.shape)
    _, preds = preds.max(2)
    # print(preds.shape)

    # preds = preds.squeeze(2)
    # preds = preds.transpose(1, 0).contiguous().view(-1)
    preds = preds.squeeze(1)
    preds = preds.transpose(-1, 0).contiguous().view(-1)

    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    sim_pred = sim_pred.lower()
    # print('%-20s => %-20s' % (raw_pred, sim_pred))
    return deletedot(sim_pred)
Exemple #11
0
    def __init__(self, model_path, gpu_id=None):
        '''
        初始化pytorch模型
        :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件)
        :param gpu_id: 在哪一块gpu上运行
        '''
        self.gpu_id = gpu_id
        self.converter = util.strLabelConverter(alphabet)
        if self.gpu_id is not None and isinstance(self.gpu_id, int) and torch.cuda.is_available():
            checkpoint = torch.load(model_path)
            self.device = torch.device("cuda:%s" % self.gpu_id)
        else:
            checkpoint = torch.load(model_path, map_location='cpu')
            self.device = torch.device("cpu")
        print('text recognition running on device:', self.device)

        self.net = crnn.CRNN(config, nClass)
        self.net.load_state_dict(checkpoint['state_dict'])
        self.net.to(self.device)
        self.net.eval()
        self.transform = transforms.Compose([transforms.ToTensor()])
Exemple #12
0
def get_text_service(image):
    image = Image.fromarray(image).convert('L')
    model = crnn.CRNN(32, 1, 37, 256)
    if torch.cuda.is_available():
        model = model.cuda()

    model.load_state_dict(torch.load(MODEL_PATH_CRNN))
    converter = utils.strLabelConverter(ALPHABET)
    transformer = dataset.resizeNormalize((100, 32))
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)
    model.eval()
    preds = model(image)
    _, preds = preds.max(2)
    preds = preds.transpose(1, 0).contiguous().view(-1)
    preds_size = Variable(torch.IntTensor([preds.size(0)]))
    raw_pred = converter.decode(preds.data, preds_size.data, raw=True)
    sim_pred = converter.decode(preds.data, preds_size.data, raw=False)
    
    return sim_pred
Exemple #13
0
import torch
from torch.autograd import Variable
from crnn import utils
from crnn import dataset
from PIL import Image

import crnn.models.crnn as crnn

model_path = './crnn/data/crnn.pth'
#img_path = './crnn/data/1.png'
alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'

model = crnn.CRNN(32, 1, 37, 256)
if torch.cuda.is_available():
    model = model.cuda()
print('loading pretrained model from %s' % model_path)
model.load_state_dict(torch.load(model_path))
converter = utils.strLabelConverter(alphabet)
transformer = dataset.resizeNormalize((100, 32))


def func(img_path):
    image = Image.open(img_path).convert('L')
    image = transformer(image)
    if torch.cuda.is_available():
        image = image.cuda()
    image = image.view(1, *image.size())
    image = Variable(image)

    model.eval()
    preds = model(image)
Exemple #14
0
converter = utils.strLabelConverter(alphabet)
criterion = CTCLoss()


# custom weights initialization called on crnn
def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        m.weight.data.normal_(0.0, 0.02)
    elif classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)


crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh)
crnn.apply(weights_init)
if opt.crnn != '':
    print('loading pretrained model from %s' % opt.crnn)
    pre_trainmodel = torch.load(opt.crnn)
    model_dict = crnn.state_dict()
    weig1 = 'rnn.1.embedding.weight'
    bias1 = 'rnn.1.embedding.bias'
    if len(model_dict[weig1]) == len(pre_trainmodel[weig1]) and len(
            model_dict[bias1]) == len(pre_trainmodel[bias1]):
        crnn.load_state_dict(pre_trainmodel)
    else:
        for k, v in model_dict.items():
            if (k != weig1 or k != bias1):
                model_dict[k] = pre_trainmodel[k]
        crnn.load_state_dict(model_dict)
Exemple #15
0
        if py_name == 'LSTM':
            n_layer = 2 if py_layer.bidirectional else 1
            n_layer *= py_layer.num_layers
            t7_layer = t7_layers[j:j + n_layer]
            j += n_layer
        else:
            j += 1

        load_params(py_layer, t7_layer)

    torch.save(model.state_dict(), output)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='Convert torch t7 model to pytorch')
    parser.add_argument('--model_file',
                        '-m',
                        type=str,
                        required=True,
                        help='torch model file in t7 format')
    parser.add_argument('--output',
                        '-o',
                        type=str,
                        default=None,
                        help='output file name prefix, xxx.py xxx.pth')
    args = parser.parse_args()

    py_model = crnn.CRNN(32, 1, 37, 256, 1)
    torch_to_pytorch(py_model, args.model_file, args.output)
Exemple #16
0
def main(argv=None):
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list

    try:
        os.makedirs(FLAGS.output_dir)
    except OSError as e:
        if e.errno != 17:
            raise

    with tf.get_default_graph().as_default():
        input_images = tf.placeholder(tf.float32,
                                      shape=[None, None, None, 3],
                                      name='input_images')
        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        f_score, f_geometry = model.model(input_images, is_training=False)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                im = cv2.imread(im_fn)[:, :, ::-1]
                start_time = time.time()
                im_resized, (ratio_h, ratio_w) = resize_image(im)

                timer = {'net': 0, 'restore': 0, 'nms': 0}
                start = time.time()
                score, geometry = sess.run(
                    [f_score, f_geometry],
                    feed_dict={input_images: [im_resized]})
                timer['net'] = time.time() - start

                boxes, timer = detect(score_map=score,
                                      geo_map=geometry,
                                      timer=timer)
                print(
                    '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format(
                        im_fn, timer['net'] * 1000, timer['restore'] * 1000,
                        timer['nms'] * 1000))

                if boxes is not None:
                    boxes = boxes[:, :8].reshape((-1, 4, 2))
                    boxes[:, :, 0] /= ratio_w
                    boxes[:, :, 1] /= ratio_h

                duration = time.time() - start_time
                print('[timing] {}'.format(duration))

                # save to file
                if boxes is not None:
                    res_file = os.path.join(
                        FLAGS.output_dir,
                        '{}.txt'.format(os.path.basename(im_fn).split('.')[0]))

                    with open(res_file, 'w') as f:
                        for box in boxes:
                            # to avoid submitting errors
                            box = sort_poly(box.astype(np.int32))
                            if np.linalg.norm(box[0] -
                                              box[1]) < 5 or np.linalg.norm(
                                                  box[3] - box[0]) < 5:
                                continue
                            f.write('{},{},{},{},{},{},{},{}\r\n'.format(
                                box[0, 0],
                                box[0, 1],
                                box[1, 0],
                                box[1, 1],
                                box[2, 0],
                                box[2, 1],
                                box[3, 0],
                                box[3, 1],
                            ))
                            cv2.polylines(
                                im[:, :, ::-1],
                                [box.astype(np.int32).reshape((-1, 1, 2))],
                                True,
                                color=(255, 255, 0),
                                thickness=1)
                if not FLAGS.no_write_images:
                    img_path = os.path.join(FLAGS.output_dir,
                                            os.path.basename(im_fn))
                    cv2.imwrite(img_path, im[:, :, ::-1])

            model_path = './crnn/crnn.pth'
            alphabet = '0123456789abcdefghijklmnopqrstuvwxyz'
            model_crnn = crnn.CRNN(32, 1, 37, 256)
            # if torch.cuda.is_available():
            #  model_crnn = model_crnn.cuda()
            print('loading pretrained model from %s' % model_path)
            model_crnn.load_state_dict(torch.load(model_path))

            converter = utils.strLabelConverter(alphabet)
            transformer = dataset.resizeNormalize((100, 32))
            seq = re.compile(",")
            with open('./output/img_demo.txt') as f:
                img = cv2.imread('./test_img/img_demo.jpg')
                line_id = 0
                with open('./output/output.txt', 'w') as fp:
                    for line in f:
                        line_id += 1
                        lst = seq.split(line.strip())
                        x1 = int(lst[0])
                        y1 = int(lst[1])
                        x2 = int(lst[2])
                        y2 = int(lst[3])
                        x3 = int(lst[4])
                        y3 = int(lst[5])
                        x4 = int(lst[6])
                        y4 = int(lst[7])
                        cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4,
                                                                       y4]])
                        rect = cv2.minAreaRect(cnt)
                        # print(rect)
                        box = cv2.boxPoints(rect)
                        box = np.int0(box)
                        # print(box)
                        roi_img = img[min(box[:, 1]):max(box[:, 1]),
                                      min(box[:, 0]):max(box[:, 0])]
                        # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1]))
                        cv2.imwrite(
                            './output/word_area_img/word_area_img' +
                            str(line_id) + '.png', roi_img)
                        img_path = './output/word_area_img/word_area_img' + str(
                            line_id) + '.png'
                        image = Image.open(img_path).convert('L')
                        image = transformer(image)
                        # if torch.cuda.is_available():
                        #  image = image.cuda()
                        image = image.view(1, *image.size())
                        image = Variable(image)
                        model_crnn.eval()
                        preds = model_crnn(image)
                        _, preds = preds.max(2)
                        preds = preds.transpose(1, 0).contiguous().view(-1)

                        preds_size = Variable(torch.IntTensor([preds.size(0)]))
                        raw_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=True)
                        sim_pred = converter.decode(preds.data,
                                                    preds_size.data,
                                                    raw=False)
                        print('%-20s => %-20s' % (raw_pred, sim_pred))
                        fp.write(sim_pred)
                        fp.write('\n')
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
from crnn import keys
from crnn import util
from crnn import dataset
from crnn.models import crnn as crnn
import torch
import torch.utils.data
from collections import OrderedDict
from PIL import Image
from torch.autograd import Variable

alphabet = keys.alphabetChinese
LSTMFLAG = False

converter = util.strLabelConverter(alphabet)
model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu()
ocrModel = './models/epoch9_step7000_model_dense.pth'
# ocrModel = './models/ocr-dense.pth'
state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage)
new_state_dict = OrderedDict()
for k, v in state_dict.items():
    name = k.replace('module.', '')  # remove `module.`
    new_state_dict[name] = v
# load params

model.load_state_dict(new_state_dict)
model.eval()
imgpath = 'ss_350.png'
image = Image.open(imgpath).convert('L')
scale = image.size[1] * 1.0 / 32
w = image.size[0] / scale