def crnnOcr(image): """ crnn模型,ocr识别 @@model, @@converter, @@im @@text_recs:text box """ scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) #print "im size:{},{}".format(image.size,w) transformer = dataset.resizeNormalize((w, 32)) if torch.cuda.is_available() and GPU: image = transformer(image).cuda() else: image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
def predict_img(imgpath): converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu() ocrModel = './ocr-dense.pth' # ocrModel = './models/ocr-dense.pth' state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) model.eval() # imgpath = 'j8yc.png' image = Image.open(imgpath).convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print "im size:{},{}".format(image.size,w) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) # print(sim_pred) return sim_pred
def process_img(images): model_path = 'crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256, 1) print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) for img_path in glob.glob('crop/*.jpg'): image = Image.open(img_path).convert('L') image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) #preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) places = GeoText(sim_pred) if (len(places.cities) > 0): print('Location Found: ') print(places.cities) else: print('Location Not Found') language_classifier.classify(sim_pred) return 1
def load(self): logging.info("Loding CRNN model first apply will be slow") if torch.cuda.is_available(): self.session = crnn_model.CRNN(32, 1, 37, 256, 1).cuda() self.cuda = True else: self.session = crnn_model.CRNN(32, 1, 37, 256, 1) self.session.load_state_dict(torch.load(self.model_path)) self.session.eval() self.converter = utils.strLabelConverter(self.alphabet) self.transformer = dataset.resizeNormalize((100, 32))
def crnn_single(img): alphabet = keys_crnn.alphabet # print(len(alphabet)) # input('\ninput:') converter = util.strLabelConverter(alphabet) # model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1) path = './crnn/samples/model_acc97.pth' model.load_state_dict(torch.load(path)) # print(model) img = Image.fromarray(np.array(img)) image = img.convert('L') # print(image.size) scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print("width:" + str(w)) transformer = dataset.resizeNormalize((w, 32)) # image = transformer(image).cuda() image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) # print(preds.shape) _, preds = preds.max(2) # print(preds.shape) # preds = preds.squeeze(2) # preds = preds.transpose(1, 0).contiguous().view(-1) preds = preds.squeeze(1) preds = preds.transpose(-1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) sim_pred = sim_pred.lower() # print('%-20s => %-20s' % (raw_pred, sim_pred)) return deletedot(sim_pred)
def crnnRec(model, converter, im, text_recs): index = 0 raw_preds = [] sim_preds = [] for rec in text_recs: pt1 = (rec[0], rec[1]) pt2 = (rec[2], rec[3]) pt3 = (rec[6], rec[7]) pt4 = (rec[4], rec[5]) partImg = dumpRotateImage( im, degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])), pt1, pt2, pt3, pt4) # mahotas.imsave('%s.jpg'%index, partImg) if (partImg.shape[0] == 0 or partImg.shape[1] == 0 or partImg.shape[2] == 0): continue image = Image.fromarray(partImg).convert('L') # height,width,channel=partImg.shape[:3] # print(height,width,channel) # print(image.size) # image = Image.open('./img/t4.jpg').convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print(w) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) #preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) raw_preds.append(raw_pred) sim_preds.append(sim_pred) return raw_preds, sim_preds
def getTextFromImage(model, img): alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) img = transformer(img).cuda() img = img.view(1, *img.size()) img = Variable(img) model.eval() preds = model(img) _, preds = preds.max(2) preds = preds.squeeze(1) preds = preds.transpose(0, 0).contiguous().view(-1) predictionSize = Variable(torch.IntTensor([preds.size(0)])) rawPrediction = converter.decode(preds.data, predictionSize.data, raw=True) decodedPrediction = converter.decode(preds.data, predictionSize.data, raw=False) return decodedPrediction
def get_text_service(image): image = Image.fromarray(image).convert('L') model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() model.load_state_dict(torch.load(MODEL_PATH_CRNN)) converter = utils.strLabelConverter(ALPHABET) transformer = dataset.resizeNormalize((100, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
def crnnOcr(image): """ crnn模型,自然场景端到端识别 :param image: 'L' image :return: box识别后的text """ scale = image.size[1] * 1.0 / 32 w = int(image.size[0] / scale) transformer = dataset.resizeNormalize((w, 32)) if torch.cuda.is_available() and cfg.GPU: image = transformer(image).cuda() else: image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
assert train_dataset if not opt.random_sample: sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) else: sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, sampler=sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) test_dataset = dataset.lmdbDataset(root=opt.valroot, transform=dataset.resizeNormalize( (100, 32))) alphabet = opt.alphabet.decode('utf-8') nclass = len(alphabet) + 1 nc = 1 converter = utils.strLabelConverter(alphabet) criterion = CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry = sess.run( [f_score, f_geometry], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) print( '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( im_fn, timer['net'] * 1000, timer['restore'] * 1000, timer['nms'] * 1000)) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h duration = time.time() - start_time print('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format(os.path.basename(im_fn).split('.')[0])) with open(res_file, 'w') as f: for box in boxes: # to avoid submitting errors box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], )) cv2.polylines( im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im[:, :, ::-1]) model_path = './crnn/crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model_crnn = crnn.CRNN(32, 1, 37, 256) # if torch.cuda.is_available(): # model_crnn = model_crnn.cuda() print('loading pretrained model from %s' % model_path) model_crnn.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) seq = re.compile(",") with open('./output/img_demo.txt') as f: img = cv2.imread('./test_img/img_demo.jpg') line_id = 0 with open('./output/output.txt', 'w') as fp: for line in f: line_id += 1 lst = seq.split(line.strip()) x1 = int(lst[0]) y1 = int(lst[1]) x2 = int(lst[2]) y2 = int(lst[3]) x3 = int(lst[4]) y3 = int(lst[5]) x4 = int(lst[6]) y4 = int(lst[7]) cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]) rect = cv2.minAreaRect(cnt) # print(rect) box = cv2.boxPoints(rect) box = np.int0(box) # print(box) roi_img = img[min(box[:, 1]):max(box[:, 1]), min(box[:, 0]):max(box[:, 0])] # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1])) cv2.imwrite( './output/word_area_img/word_area_img' + str(line_id) + '.png', roi_img) img_path = './output/word_area_img/word_area_img' + str( line_id) + '.png' image = Image.open(img_path).convert('L') image = transformer(image) # if torch.cuda.is_available(): # image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model_crnn.eval() preds = model_crnn(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) fp.write(sim_pred) fp.write('\n')
converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1, lstmFlag=LSTMFLAG).cpu() ocrModel = './models/epoch9_step7000_model_dense.pth' # ocrModel = './models/ocr-dense.pth' state_dict = torch.load(ocrModel, map_location=lambda storage, loc: storage) new_state_dict = OrderedDict() for k, v in state_dict.items(): name = k.replace('module.', '') # remove `module.` new_state_dict[name] = v # load params model.load_state_dict(new_state_dict) model.eval() imgpath = 'ss_350.png' image = Image.open(imgpath).convert('L') scale = image.size[1] * 1.0 / 32 w = image.size[0] / scale w = int(w) # print "im size:{},{}".format(image.size,w) transformer = dataset.resizeNormalize((w, 32)) image = transformer(image).cpu() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print(sim_pred)