def _ctc_loss(self, label, text_preds, text_targets): inds_text = Variable( ((label.data == 1) + (label.data == 2) + (label.data == 2) + (label.data == 4) + (label.data == 5)).nonzero().view(-1)) if inds_text.nelement() == 0: return Variable(torch.FloatTensor([0])).cuda() text_preds_filtered = text_preds.index_select(1, inds_text) text_targets_filtered = tuple([ ''.join(list(filter(lambda ch: ch in alphabet, text_targets[i]))) for i in inds_text.data ]) #text_targets.index_select(0,inds_text) converter = crnn_utils.strLabelConverter(alphabet) t, l = converter.encode(text_targets_filtered) crnn_utils.loadData(text, t) crnn_utils.loadData(length, l) print("CTCLoss") b = text_preds_filtered.size(1) pred_size = Variable(torch.IntTensor([text_preds_filtered.size(0)] * b)) if not len(text.size()) == 1: return Variable(torch.FloatTensor([0])).cuda() print(ctc(text_preds_filtered, text, pred_size, length) / b) _, preds = text_preds_filtered.max(2) #preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) sim_preds = converter.decode(preds.data, pred_size.data, raw=False) print('predicted text= ') print(sim_preds) print(text_targets_filtered) return ctc(text_preds_filtered, text, pred_size, length) / b
def crnnSource(): """ 加载模型 """ if chinsesModel: alphabet = keys.alphabetChinese##中英文模型 else: alphabet = keys.alphabetEnglish##英文模型 converter = strLabelConverter(alphabet) if torch.cuda.is_available() and GPU: model = CRNN(32, 1, len(alphabet)+1, 256, 1,lstmFlag=LSTMFLAG).cuda()##LSTMFLAG=True crnn 否则 dense ocr else: model = CRNN(32, 1, len(alphabet)+1, 256, 1,lstmFlag=LSTMFLAG).cpu() trainWeights = torch.load(ocrModel,map_location=lambda storage, loc: storage) modelWeights = OrderedDict() for k, v in trainWeights.items(): name = k.replace('module.','') # remove `module.` modelWeights[name] = v # load params model.load_state_dict(modelWeights) return model,converter
def crnnSource(): alphabet = keys.alphabet converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1) path = 'crnn/models/netCRNNcpu.pth' model.load_state_dict(torch.load(path)) return model, converter
def process_img(images): model_path = 'crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model = crnn.CRNN(32, 1, 37, 256, 1) print('loading pretrained model from %s' % model_path) model.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) for img_path in glob.glob('crop/*.jpg'): image = Image.open(img_path).convert('L') image = transformer(image) image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) #preds = preds.squeeze(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) places = GeoText(sim_pred) if (len(places.cities) > 0): print('Location Found: ') print(places.cities) else: print('Location Not Found') language_classifier.classify(sim_pred) return 1
def crnnSource(): alphabet = keys.alphabet converter = util.strLabelConverter(alphabet) model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1) path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models/netCRNNcpu.pth') model.load_state_dict(torch.load(path)) return model, converter
def predict(self, image): image = resizeNormalize(image, 32) image = image.astype(np.float32) image = np.array([[image]]) self.model.setInput(image) preds = self.model.forward() preds = preds.transpose(0, 2, 3, 1) preds = preds[0] preds = np.argmax(preds, axis=2).reshape((-1, )) raw = strLabelConverter(preds, self.alphabet) return raw
def predict(self, image): image = resizeNormalize(image, 32) image = image.astype(np.float32) image = np.array([[image]]) global graph with graph.as_default(): preds = self.model.predict(image) # preds = preds[0] preds = np.argmax(preds, axis=2).reshape((-1, )) raw = strLabelConverter(preds, self.alphabet) return raw
def load(self): logging.info("Loding CRNN model first apply will be slow") if torch.cuda.is_available(): self.session = crnn_model.CRNN(32, 1, 37, 256, 1).cuda() self.cuda = True else: self.session = crnn_model.CRNN(32, 1, 37, 256, 1) self.session.load_state_dict(torch.load(self.model_path)) self.session.eval() self.converter = utils.strLabelConverter(self.alphabet) self.transformer = dataset.resizeNormalize((100, 32))
def predict(self, image): image = resizeNormalize(image, 32) image = image.astype(np.float32) image = torch.from_numpy(image) if torch.cuda.is_available() and self.GPU: image = image.cuda() else: image = image.cpu() image = image.view(1, 1, *image.size()) image = Variable(image) preds = self(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) raw = strLabelConverter(preds, self.alphabet) return raw
def predict_batch(self, boxes, batch_size=1): """ predict on batch """ N = len(boxes) res = [] imgW = 0 batch = N // batch_size if batch * batch_size != N: batch += 1 for i in range(batch): tmpBoxes = boxes[i * batch_size:(i + 1) * batch_size] imageBatch = [] imgW = 0 for box in tmpBoxes: img = box['img'] image = resizeNormalize(img, 32) h, w = image.shape[:2] imgW = max(imgW, w) imageBatch.append(np.array([image])) imageArray = np.zeros((len(imageBatch), 1, 32, imgW), dtype=np.float32) n = len(imageArray) for j in range(n): _, h, w = imageBatch[j].shape imageArray[j][:, :, :w] = imageBatch[j] image = torch.from_numpy(imageArray) image = Variable(image) if torch.cuda.is_available() and self.GPU: image = image.cuda() else: image = image.cpu() preds = self(image) preds = preds.argmax(2) n = preds.shape[1] for j in range(n): res.append(strLabelConverter(preds[:, j], self.alphabet)) for i in range(N): boxes[i]['text'] = res[i] return boxes
def predict_batch(self, boxes, batch_size=1): """ predict on batch """ N = len(boxes) res = [] imgW = 0 batch = N // batch_size if batch * batch_size != N: batch += 1 for i in range(batch): tmpBoxes = boxes[i * batch_size:(i + 1) * batch_size] imageBatch = [] imgW = 0 for box in tmpBoxes: img = box['img'] image = resizeNormalize(img, 32) h, w = image.shape[:2] imgW = max(imgW, w) imageBatch.append(np.array([image])) imageArray = np.zeros((len(imageBatch), 1, 32, imgW), dtype=np.float32) n = len(imageArray) for j in range(n): _, h, w = imageBatch[j].shape imageArray[j][:, :, :w] = imageBatch[j] global graph with graph.as_default(): preds = self.model.predict(imageArray, batch_size=batch_size) preds = preds.argmax(axis=2) n = preds.shape[0] for j in range(n): res.append( strLabelConverter(preds[j, ].tolist(), self.alphabet)) for i in range(N): boxes[i]['text'] = res[i] return boxes
def getTextFromImage(model, img): alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) img = transformer(img).cuda() img = img.view(1, *img.size()) img = Variable(img) model.eval() preds = model(img) _, preds = preds.max(2) preds = preds.squeeze(1) preds = preds.transpose(0, 0).contiguous().view(-1) predictionSize = Variable(torch.IntTensor([preds.size(0)])) rawPrediction = converter.decode(preds.data, predictionSize.data, raw=True) decodedPrediction = converter.decode(preds.data, predictionSize.data, raw=False) return decodedPrediction
def get_text_service(image): image = Image.fromarray(image).convert('L') model = crnn.CRNN(32, 1, 37, 256) if torch.cuda.is_available(): model = model.cuda() model.load_state_dict(torch.load(MODEL_PATH_CRNN)) converter = utils.strLabelConverter(ALPHABET) transformer = dataset.resizeNormalize((100, 32)) image = transformer(image) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() preds = model(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) return sim_pred
def crnnSource(): alphabet = keys.alphabetChinese##中英文模型 converter = strLabelConverter(alphabet) model = CRNN(32, 1, len(alphabet)+1, 256, 1,lstmFlag=LSTMFLAG) model.load_weights(ocrModelKeras) return model,converter
sampler=sampler, num_workers=int(opt.workers), collate_fn=dataset.alignCollate( imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) test_dataset = dataset.lmdbDataset(root=opt.valroot, transform=dataset.resizeNormalize( (100, 32))) alphabet = opt.alphabet.decode('utf-8') nclass = len(alphabet) + 1 nc = 1 converter = utils.strLabelConverter(alphabet) criterion = CTCLoss() # custom weights initialization called on crnn def weights_init(m): classname = m.__class__.__name__ if classname.find('Conv') != -1: m.weight.data.normal_(0.0, 0.02) elif classname.find('BatchNorm') != -1: m.weight.data.normal_(1.0, 0.02) m.bias.data.fill_(0) crnn = crnn.CRNN(opt.imgH, nc, nclass, opt.nh) crnn.apply(weights_init)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry = sess.run( [f_score, f_geometry], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) print( '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( im_fn, timer['net'] * 1000, timer['restore'] * 1000, timer['nms'] * 1000)) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h duration = time.time() - start_time print('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format(os.path.basename(im_fn).split('.')[0])) with open(res_file, 'w') as f: for box in boxes: # to avoid submitting errors box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], )) cv2.polylines( im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im[:, :, ::-1]) model_path = './crnn/crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model_crnn = crnn.CRNN(32, 1, 37, 256) # if torch.cuda.is_available(): # model_crnn = model_crnn.cuda() print('loading pretrained model from %s' % model_path) model_crnn.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) seq = re.compile(",") with open('./output/img_demo.txt') as f: img = cv2.imread('./test_img/img_demo.jpg') line_id = 0 with open('./output/output.txt', 'w') as fp: for line in f: line_id += 1 lst = seq.split(line.strip()) x1 = int(lst[0]) y1 = int(lst[1]) x2 = int(lst[2]) y2 = int(lst[3]) x3 = int(lst[4]) y3 = int(lst[5]) x4 = int(lst[6]) y4 = int(lst[7]) cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]) rect = cv2.minAreaRect(cnt) # print(rect) box = cv2.boxPoints(rect) box = np.int0(box) # print(box) roi_img = img[min(box[:, 1]):max(box[:, 1]), min(box[:, 0]):max(box[:, 0])] # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1])) cv2.imwrite( './output/word_area_img/word_area_img' + str(line_id) + '.png', roi_img) img_path = './output/word_area_img/word_area_img' + str( line_id) + '.png' image = Image.open(img_path).convert('L') image = transformer(image) # if torch.cuda.is_available(): # image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model_crnn.eval() preds = model_crnn(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) fp.write(sim_pred) fp.write('\n')