def crnnSource(): if cfg.chinese_model: alphabet = keys.alphabetChinese else: alphabet = keys.alphabetEnglish converter = strLabelConverter(alphabet) if torch.cuda.is_available() and cfg.GPU: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=cfg.lstm_flag).cuda() else: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=cfg.lstm_flag).cpu() state_dict = torch.load(cfg.ocr_model, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` torch的版本问题 new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) model.eval() return model, converter
def crnnSource(): if chinsesModel: alphabet = keys.alphabetChinese else: alphabet = keys.alphabetEnglish converter = util.strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: model = crnn.CRNN( 32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cuda() ##LSTMFLAG=True crnn 否则 dense ocr else: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu() state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) model.eval() return model, converter
def crnnSource(): alphabet = keys1.alphabet converter = util.strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cuda() else: model = crnn.CRNN(32, 1, len(alphabet)+1, 256, 1).cpu() path = './crnn/samples/model_acc97.pth' model.eval() model.load_state_dict(torch.load(path)) return model,converter
def load(self): logging.info("Loding CRNN model first apply will be slow") if torch.cuda.is_available(): self.session = crnn_model.CRNN(32, 1, 37, 256, 1).cuda() self.cuda = True else: self.session = crnn_model.CRNN(32, 1, 37, 256, 1) self.session.load_state_dict(torch.load(self.model_path)) self.session.eval() self.converter = utils.strLabelConverter(self.alphabet) self.transformer = dataset.resizeNormalize((100, 32))
def crnnSource(): alphabet = keys.alphabet converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1) path = 'crnn/models/netCRNNcpu.pth' model.load_state_dict(torch.load(path)) return model, converter
def process_img(images): model_path = 'crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256, 1) print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) for img_path in glob.glob('crop/*.jpg'): image = Image.open(img_path).convert('L') image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) #preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) places = GeoText(sim_pred) if (len(places.cities) > 0): print('Location Found: ') print(places.cities) else: print('Location Not Found') language_classifier.classify(sim_pred) return 1
def predict_img(imgpath): converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu() ocrModel = './ocr-dense.pth' # ocrModel = './models/ocr-dense.pth' state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) model.eval() # imgpath = 'j8yc.png' image = Image.open(imgpath).convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print "im size:{},{}".format(image.size,w) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) # print(sim_pred) return sim_pred
def crnnSource(): alphabet = keys.alphabet converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1) path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models/netCRNNcpu.pth') model.load_state_dict(torch.load(path)) return model, converter
def crnnSource(): alphabet = keys.alphabet converter = util.strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() else: model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cpu() state_dict = torch.load(ocrModel) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) model.eval() return model, converter
def crnn_single(img): alphabet = keys_crnn.alphabet # print(len(alphabet)) # input('\ninput:') converter = util.strLabelConverter(alphabet) # model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1) path = './crnn/samples/model_acc97.pth' model.load_state_dict(torch.load(path)) # print(model) img = Image.fromarray(np.array(img)) image = img.convert('L') # print(image.size) scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print("width:" + str(w)) transformer = dataset.resizeNormalize((w, 32)) # image = transformer(image).cuda() image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) # print(preds.shape) _, preds = preds.max(2) # print(preds.shape) # preds = preds.squeeze(2) # preds = preds.transpose(1, 0).contiguous().view(-1) preds = preds.squeeze(1) preds = preds.transpose(-1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) sim_pred = sim_pred.lower() # print('%-20s => %-20s' % (raw_pred, sim_pred)) return deletedot(sim_pred)
def __init__(self, model_path, gpu_id=None): ''' 初始化pytorch模型 :param model_path: 模型地址(可以是模型的参数或者参数和计算图一起保存的文件) :param gpu_id: 在哪一块gpu上运行 ''' self.gpu_id = gpu_id self.converter = util.strLabelConverter(alphabet) if self.gpu_id is not None and isinstance(self.gpu_id, int) and torch.cuda.is_available(): checkpoint = torch.load(model_path) self.device = torch.device("cuda:%s" % self.gpu_id) else: checkpoint = torch.load(model_path, map_location='cpu') self.device = torch.device("cpu") print('text recognition running on device:', self.device) self.net = crnn.CRNN(config, nClass) self.net.load_state_dict(checkpoint['state_dict']) self.net.to(self.device) self.net.eval() self.transform = transforms.Compose([transforms.ToTensor()])
def get_text_service(image): image = Image.fromarray(image).convert('L') model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() model.load_state_dict(torch.load(MODEL_PATH_CRNN)) converter = utils.strLabelConverter(ALPHABET) transformer = dataset.resizeNormalize((100, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
import torch from torch.autograd import Variable from crnn import utils from crnn import dataset from PIL import Image import crnn.models.crnn as crnn model_path = './crnn/data/crnn.pth' #img_path = './crnn/data/1.png' alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) def func(img_path): image = Image.open(img_path).convert('L') image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image)
converter = utils.strLabelConverter(alphabet) criterion = CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init) if opt.crnn != '': print('loading pretrained model from %s' % opt.crnn) pre_trainmodel = torch.load(opt.crnn) model_dict = crnn.state_dict() weig1 = 'rnn.1.embedding.weight' bias1 = 'rnn.1.embedding.bias' if len(model_dict[weig1]) == len(pre_trainmodel[weig1]) and len( model_dict[bias1]) == len(pre_trainmodel[bias1]): crnn.load_state_dict(pre_trainmodel) else: for k, v in model_dict.items(): if (k != weig1 or k != bias1): model_dict[k] = pre_trainmodel[k] crnn.load_state_dict(model_dict)
if py_name == 'LSTM': n_layer = 2 if py_layer.bidirectional else 1 n_layer *= py_layer.num_layers t7_layer = t7_layers[j:j + n_layer] j += n_layer else: j += 1 load_params(py_layer, t7_layer) torch.save(model.state_dict(), output) if __name__ == "__main__": parser = argparse.ArgumentParser( description='Convert torch t7 model to pytorch') parser.add_argument('--model_file', '-m', type=str, required=True, help='torch model file in t7 format') parser.add_argument('--output', '-o', type=str, default=None, help='output file name prefix, xxx.py xxx.pth') args = parser.parse_args() py_model = crnn.CRNN(32, 1, 37, 256, 1) torch_to_pytorch(py_model, args.model_file, args.output)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry = sess.run( [f_score, f_geometry], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) print( '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( im_fn, timer['net'] * 1000, timer['restore'] * 1000, timer['nms'] * 1000)) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h duration = time.time() - start_time print('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format(os.path.basename(im_fn).split('.')[0])) with open(res_file, 'w') as f: for box in boxes: # to avoid submitting errors box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], )) cv2.polylines( im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im[:, :, ::-1]) model_path = './crnn/crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model_crnn = crnn.CRNN(32, 1, 37, 256) # if torch.cuda.is_available(): # model_crnn = model_crnn.cuda() print('loading pretrained model from %s' % model_path) model_crnn.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) seq = re.compile(",") with open('./output/img_demo.txt') as f: img = cv2.imread('./test_img/img_demo.jpg') line_id = 0 with open('./output/output.txt', 'w') as fp: for line in f: line_id += 1 lst = seq.split(line.strip()) x1 = int(lst[0]) y1 = int(lst[1]) x2 = int(lst[2]) y2 = int(lst[3]) x3 = int(lst[4]) y3 = int(lst[5]) x4 = int(lst[6]) y4 = int(lst[7]) cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]) rect = cv2.minAreaRect(cnt) # print(rect) box = cv2.boxPoints(rect) box = np.int0(box) # print(box) roi_img = img[min(box[:, 1]):max(box[:, 1]), min(box[:, 0]):max(box[:, 0])] # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1])) cv2.imwrite( './output/word_area_img/word_area_img' + str(line_id) + '.png', roi_img) img_path = './output/word_area_img/word_area_img' + str( line_id) + '.png' image = Image.open(img_path).convert('L') image = transformer(image) # if torch.cuda.is_available(): # image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model_crnn.eval() preds = model_crnn(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) fp.write(sim_pred) fp.write('\n')
# os.environ["CUDA_VISIBLE_DEVICES"] = "" from crnn import keys from crnn import util from crnn import dataset from crnn.models import crnn as crnn import torch import torch.utils.data from collections import OrderedDict from PIL import Image from torch.autograd import Variable alphabet = keys.alphabetChinese LSTMFLAG = False converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu() ocrModel = './models/epoch9_step7000_model_dense.pth' # ocrModel = './models/ocr-dense.pth' state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) model.eval() imgpath = 'ss_350.png' image = Image.open(imgpath).convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale