def detect(self,img_file): start_time = time.time() img = cv2.imread(im_file) img_ori,(rh, rw) = resize_image(img) h, w, c = img_ori.shape im_info = np.array([h, w, c]).reshape([1, 3]) img = toTensorImage(img_ori) pre_score,pre_reg = self.model(img) score = pre_score.reshape((pre_score.shape[0], 10, 2, pre_score.shape[2], pre_score.shape[3])).squeeze(0).permute(0,2,3,1).reshape((-1, 2)) score = F.softmax(score, dim=1) score = score.reshape((10, pre_reg.shape[2], -1, 2)) pre_score =score.permute(1,2,0,3).reshape(pre_reg.shape[2],pre_reg.shape[3],-1).unsqueeze(0).cpu().detach().numpy() pre_reg =pre_reg.permute(0,2,3,1).cpu().detach().numpy() textsegs, _ = proposal_layer(pre_score, pre_reg, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes ,text_proposals= textdetector.detect(textsegs, scores[:, np.newaxis], img_ori.shape[:2]) boxes = np.array(boxes, dtype=np.int) text_proposals = text_proposals.astype(np.int) print('cost_time:'+str(time.time()-start_time)+'s') return boxes,text_proposals
def detect(self, img_file): img = Image.open(img_file).convert('RGB') img = np.array(img) img_ori, (rh, rw) = resize_image(img) h, w, c = img_ori.shape im_info = np.array([h, w, c]).reshape([1, 3]) img = toTensorImage(img_ori) with torch.no_grad(): pre_score, pre_reg, refine_ment = self.model(img) score = pre_score.reshape( (pre_score.shape[0], 10, 2, pre_score.shape[2], pre_score.shape[3])).squeeze(0).permute(0, 2, 3, 1).reshape( (-1, 2)) score = F.softmax(score, dim=1) score = score.reshape((10, pre_reg.shape[2], -1, 2)) pre_score = score.permute( 1, 2, 0, 3).reshape(pre_reg.shape[2], pre_reg.shape[3], -1).unsqueeze(0).cpu().detach().numpy() pre_reg = pre_reg.permute(0, 2, 3, 1).cpu().detach().numpy() refine_ment = refine_ment.permute(0, 2, 3, 1).cpu().detach().numpy() textsegs, _ = proposal_layer(pre_score, pre_reg, refine_ment, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE=self.detect_type) boxes, text_proposals = textdetector.detect(textsegs, scores[:, np.newaxis], img_ori.shape[:2]) boxes = np.array(boxes, dtype=np.int) text_proposals = text_proposals.astype(np.int) return boxes, text_proposals, rh, rw
def detect_single_img(self, img): ori_h, ori_w, _ = img.shape img_res, (rh, rw) = self.resize_image(img) h, w, c = img_res.shape # print('===img.shape==:', h, w, c) # cv2.imwrite('./img_res.jpg', img_res) im_info = np.array([h, w, c]).reshape([1, 3]) img = self.toTensorImage(img_res) #(b, 20, h, w) #(b, 40, h, w) pre_score, pre_reg = self.model(img) print('==pre_score.shape===', pre_score.shape) print('====pre_reg.shape:', pre_reg.shape) #(b,10,2,h,w)-->(10, h, w,2)-->(10*h*w, 2) score = pre_score.reshape((pre_score.shape[0], 10, 2, pre_score.shape[2], pre_score.shape[3])).squeeze(0).permute(0,2,3,1).reshape((-1, 2)) score = F.softmax(score, dim=1) #(10, h, w, 2) score = score.reshape((10, pre_reg.shape[2], -1, 2)) #(h, w, 10, 2)-->(b, h, w, 20) pre_score =score.permute(1, 2, 0, 3).reshape(pre_reg.shape[2],pre_reg.shape[3],-1).unsqueeze(0).cpu().detach().numpy() #(b, h, w, 40) pre_reg =pre_reg.permute(0, 2, 3, 1).cpu().detach().numpy() textsegs, _ = proposal_layer(pre_score, pre_reg, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes, text_proposals= textdetector.detect(textsegs, scores[:, np.newaxis], img_res.shape[:2]) boxes = np.array(boxes, dtype=np.int) text_proposals = np.array(text_proposals, dtype=np.int) # print('===text_proposals.shape:', text_proposals.shape) # #还原到原尺寸box # news_boxs = [] # for i, box in enumerate(boxes): # box = box[:8].reshape(-1, 2) # box_temp = np.zeros(box.shape) # box_temp[:, 0] = box[:, 0] / rw # box_temp[:, 1] = box[:, 1] / rh # news_boxs.append(box_temp.reshape(-1).astype(np.int)) if boxes is not None: boxes[:, [0, 2, 4, 6]] = boxes[:,[0,2,4,6]] / rw boxes[:, [1, 3, 5, 7]] = boxes[:,[1,3,5,7]] / rh boxes[:, [0, 6]] = np.clip(boxes[:, [0, 6]] - 5, 0, ori_w - 1)#x1限制 boxes[:, [2, 4]] = np.clip(boxes[:, [2, 4]] + 20, 0, ori_w - 1)#x2限制解决右边界压字 boxes[:, [1, 3]] = np.clip(boxes[:, [1, 3]] - 5, 0, ori_h - 1)#y1限制 boxes[:, [5, 7]] = np.clip(boxes[:, [5, 7]] + 5, 0, ori_h - 1)#y2限制 news_text_proposals = [] for item in text_proposals: item = item.reshape(-1, 2) item_temp = np.zeros(item.shape) item_temp[:, 0] = item[:, 0] / rw item_temp[:, 1] = item[:, 1] / rh news_text_proposals.append(item_temp.reshape(-1).astype(np.int)) return boxes, np.array(news_text_proposals)
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: img = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\n" print(line)
def find(self): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu tf.reset_default_graph() with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = self.get_global_step() bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) print('===============') try: im = cv2.imread(self.img_path)[:, :, ::-1] except: print("Error reading image {}!".format(self.img_path)) img, (rh, rw) = self.resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) for box in boxes: box_idx = 0 while box_idx < 8: if box_idx % 2 == 0: witdth_scale = box[box_idx] / rw box[box_idx] = self.round_half_up(witdth_scale) else: height_scale = box[box_idx] / rh box[box_idx] = self.round_half_up(height_scale) box_idx +=1 boxes = np.array([box[:-1] for box in boxes], dtype=np.int) return boxes
def ctpn(sess, net, image_name): timer = Timer() timer.tic() img = cv2.imread(image_name) img = cv2.medianBlur(img, 3) img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE) scores, boxes = test_ctpn(sess, net, img) textdetector = TextDetector() boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) draw_boxes(img, image_name, boxes, scale) timer.toc() print(('Detection took {:.3f}s for ' '{:d} object proposals').format(timer.total_time, boxes.shape[0])) return timer.total_time
def ctpn(image_url): ''' 调用ctpn接口进行图片解析 :param image_url: :return img: 图片矩阵 :return boxes: 矩形框坐标 :return scores: 置信度 ''' start = time.time() images = requests.get(image_url).content buf = np.asarray(bytearray(images), dtype="uint8") im = cv2.imdecode(buf, cv2.IMREAD_COLOR) img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) input_image = [img.tolist()] url = 'http://127.0.0.1:7501/v1/models/ctpn:predict' data = json.dumps({ "name": 'tfserving-ctpn', "signature_name": 'predict_images', "inputs": input_image }) result = requests.post(url, data=data).json() cls_prob_val = np.asarray(result['outputs']['cls_prob_output'], dtype=np.float32) bbox_pred_val = np.asarray(result['outputs']['bbox_pred_output'], dtype=np.float32) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) return img, boxes, scores
def ctpnParse(im): ''' 转换获取图片文字区域组 ''' # im = cv2.imread(image_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) #################################################################################### # read img # print(image_path) start = time.time() try: im = im[:, :, ::-1] except: # print("Error reading image {}!".format(image_path)) exit(1) #################################################################################### # resize img, (rh, rw) = resize_image(im) print("Ritu ", rh, rw) print("Mae: ", im.shape[0], im.shape[1]) print("Ushiro: ", img.shape[0], img.shape[1]) # Ritu: 0.6375 0.6333333333333333 # Mae: 1280 960 # Ushiro: 816 608 h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) #################################################################################### # parse textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] # textdetector = TextDetector(DETECT_MODE='H') textdetector = TextDetector(DETECT_MODE='O') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) #################################################################################### # data cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) # frames frames = [] for i, box in enumerate(boxes): pnts = [] # i = 01, 23, 45, 67 pnts.extend({ "x": int(box[i * 2] / rh), "y": int(box[i * 2 + 1] / rw) } for i in range(4)) frames.append({ "points": pnts, "score": scores[i] }) return { "size": { "x": im.shape[0], "y": im.shape[1] }, "cnt": len(boxes), "frames": frames } '''
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) try: im = cv2.imread(im_fn) im = im[:im.shape[0] / 3, :, ::-1] # only for roi except: print("Error reading image {}!".format(im_fn)) continue start0 = time.time() img, (rh, rw) = resize_image(im) h, w, c = img.shape print("resize_image cost time: {:.2f}s".format(time.time() - start0)) start = time.time() im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) print("sess.run cost time: {:.2f}s".format(time.time() - start)) start = time.time() textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] print("proposal_layer cost time: {:.2f}s".format(time.time() - start)) start = time.time() textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) print("textdetector cost time: {:.2f}s".format(time.time() - start)) print("total cost time: {:.2f}s".format(time.time() - start0)) for i, box in enumerate(boxes): img = cv2.resize(img[box[1]:box[5], box[0]:box[4]], None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite( os.path.join( FLAGS.output_path, os.path.basename(im_fn).replace( '.', '_' + str(i) + '.')), img[:, :, ::-1]) '''for i, box in enumerate(boxes): cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])''' with open( os.path.join( FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line)
def start(self) : self.running = True tf.app.flags.DEFINE_string('gpu', '0', '') # 已经训练好的模型加载路径 tf.app.flags.DEFINE_string('checkpoint_path', self.checkpoint_path, '') # 图 with tf.compat.v1.get_default_graph().as_default(): # 占位符 - 输入图片 input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') # 占位符 - 输入图片信息 input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info') # 创建一个变量 global_step global_step = tf.compat.v1.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # tensorflow op bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.compat.v1.train.Saver(variable_averages.variables_to_restore()) # tensorflow session 配置 sessionConfig = tf.compat.v1.ConfigProto(allow_soft_placement=True) # 显存占用率 # sessionConfig.gpu_options.per_process_gpu_memory_fraction = 0.3 # 动态申请内存 sessionConfig.gpu_options.allow_growth = True with tf.compat.v1.Session(config=sessionConfig) as sess: # 基于 checkpoint 文件(ckpt)加载参数 ckpt_state = tf.compat.v1.train.get_checkpoint_state(self.checkpoint_path) # 模型路径 model_path = os.path.join(self.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) logger.info(u'Restore from {}'.format(model_path)) # 恢复变量 saver.restore(sess, model_path) while self.running: logger.info(u'等待接收图片') imgFilePath = self.workerQueue.get() if self.is_stop_signal(imgFilePath): logger.info(u'接收到队列停止信号') break logger.info(u'开始处理图片: {}'.format(imgFilePath)) # 开始计时 start = time.time() try: im = cv2.imread(imgFilePath)[:, :, ::-1] except: logger.exception(sys.exc_info()) continue # 压缩图片尺寸,不超过 600 * 1200 img, (rh, rw) = self.resize_image(im) # 高、宽、通道数 h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) # 执行运算 bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) # 根据RPN目标回归值修正anchors并做排序、nms等后处理输出由proposal坐标和batch_ind全0索引组成的blob textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) # 结束计时 logger.info(u'总计耗时: {}'.format(time.time() - start)) if self.debug: with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".json", "w") as f: f.writelines(json.dumps(self.wrapResult(boxes, scores))) # 将 python 数组 转换为 numpy 数组 boxes = np.array(boxes, dtype=np.int) for i, box in enumerate(boxes): cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(self.outputPath, os.path.basename(imgFilePath)), img[:, :, ::-1]) with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\n" f.writelines(line) if self.callback : self.callback(fileName = imgFilePath, ctpnRes = self.wrapResult(boxes, scores))
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] orig = im.copy() except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): reshaped_coords = [box[:8].astype(np.int32).reshape((-1, 1, 2))] cv2.polylines(img, reshaped_coords, True, color=(0, 255, 0), thickness=2) reshaped_coords = np.asarray(reshaped_coords) roi = img[reshaped_coords[0][0][0][1]:reshaped_coords[0][2][0][1], reshaped_coords[0][0][0][0]:reshaped_coords[0][2][0][0]] text = pytesseract.image_to_string(roi, config=config) text = unidecode.unidecode(text) cv2.putText(img, text, (reshaped_coords[0][0][0][0], reshaped_coords[0][0][0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line) # if __name__ == '__main__': # tf.app.run() # def delete_prev(path): # for the_file in os.listdir(path): # file_path = os.path.join(path, the_file) # try: # if os.path.isfile(file_path): # os.unlink(file_path) # elif os.path.isdir(file_path): shutil.rmtree(file_path) # except Exception as e: # print(e) # continue # app = Flask(__name__) # app._static_folder = os.path.basename('static') # UPLOAD_FOLDER = os.path.join('main', 'uploads') # app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # @app.route('/') # def hello_world(): # return render_template('home_al.html') # @app.route('/upload', methods=['POST', 'GET']) # def upload_file(): # if request.method == 'POST': # file = request.files['image'] # filename = file.filename # # prepare directory for processing # delete_prev(app.config['UPLOAD_FOLDER']) # f = os.path.join(app.config['UPLOAD_FOLDER'], filename) # # add your custom code to check that the uploaded file is a valid image and not a malicious file (out-of-scope for this post) # file.save(f) # tf.app.run() # print('done') # processed_file = os.path.join('data/res', filename) # # return render_template('home_al.html', processed_file = processed_file) # return redirect(url_for('send_file', filename=filename)) # print('redirected to', url_for('send_file', filename=filename)) # else: # print('No request') # return render_template('home_al.html') # # @app.route('/show/<filename>') # # def uploaded_file(filename): # # filename = 'http://127.0.0.1:5000/upload/' + filename # # return render_template('home_al.html') # @app.route('/uploaded/<filename>') # def send_file(filename): # return send_from_directory('data/res', filename) # app.run(debug=True)
def main(im=None): # if os.path.exists(FLAGS.output_path): # shutil.rmtree(FLAGS.output_path) # os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = '0' checkpoint_path = 'checkpoints_mlt/' with tf.compat.v1.get_default_graph().as_default(): input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.compat.v1.get_variable( 'global_step', [], initializer=tf.compat.v1.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.compat.v1.train.Saver( variable_averages.variables_to_restore()) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) model_path = os.path.join( checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) # print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) # im_fn_list = get_images() # for im_fn in im_fn_list: # print('===============') # print(im_fn) # start = time.time() # try: # im = cv2.imread(im_fn)[:, :, ::-1] # except: # print("Error reading image {}!".format(im_fn)) # continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) # cost_time = (time.time() - start) # print("cost time: {:.2f}s".format(cost_time)) # for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), # thickness=2) # img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) return_array = [] for i, box in enumerate(boxes): box[0] = box[0] / rh box[2] = box[2] / rh box[1] = box[1] / rw box[7] = box[7] / rw return_array.append([box[0], box[1], box[2], box[7]]) # print(return_array) # line += ",".join(str(box[k]) for k in [0,1,2,7]) +'),\r\n' return return_array
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: #print('===============') #print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) #print("cost time: {:.2f}s".format(cost_time)) if len(boxes) != 1: print(im_fn, len(boxes)) flag = -1 for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),thickness=2) #img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) arr = np.array(box[:8].astype(np.int32).reshape((-1, 2))) #print(arr) x1 = min(arr[:, 0]) x2 = max(arr[:, 0]) y1 = min(arr[:, 1]) y2 = max(arr[:, 1]) pad_w = int((y2 - y1) * 0.5) img_cp = img[y1 - 10:y2 + 10, x1 - pad_w:x2 + pad_w, :] #print(x1,x2,y1,y2) if flag < (x2 - x1) / (y2 - y1): flag = (x2 - x1) / (y2 - y1) if flag > 3: cv2.imwrite( os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img_cp[:, :, ::-1]) '''
def main(argv): of_list = [] if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # print("dickk") # print(sys.argv[1]) ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() print(im_fn_list) for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue print("printing im.shape") print(im.shape) img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): cv2.polylines( img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) box_arr = box[:8].astype(np.int32) y = box_arr[0] x = box_arr[1] h = box_arr[2] - box_arr[0] w = box_arr[5] - box_arr[3] img1, (rh, rw) = resize_image(im) img2 = img1[x:x + w, y:y + h, :] cv2.imwrite(FLAGS.output_path + str(i) + '.png', img2) of_list.append(FLAGS.output_path + str(i) + '.png')
def main(argv=None): if train_or_test_1800 == 'no_seperate_mianzhi_train' or train_or_test_1800 == 'no_seperate_mianzhi_test': if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() ii = a for im_fn in im_fn_list[int(a):b]: #修改这里 ii += 1 print(str(ii) + '===============' + str(ii)) print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue try: img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run( [bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1: 5] # 每张图片N个poly,textsegs是这些poly的四个坐标。 textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect( textsegs, scores[:, np.newaxis], img.shape[:2]) #xzy 方法内部已修改,只显示一个框 boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), # thickness=2) img = img[int(box[1]):int(box[5]), int(box[0]):int(box[2])] # xzy 裁剪 img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite( os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) except Exception as e: #xzy Corrupt JPEG data: premature end of data segment immmm = cv2.imread( "../../../dataset_warm_up/train_data/13X6EGWI.jpg" ) #xzy 可能WBNGQ9R7.jpg出错 cv2.imwrite( os.path.join(FLAGS.output_path, "xzywa" + str(os.path.basename(im_fn))), immmm[:, :, ::-1]) print(str(im_fn) + " is broken!!!!!!!!")
def main(argv=None): # if os.path.exists(FLAGS.output_path): # shutil.rmtree(FLAGS.output_path) # os.makedirs(FLAGS.output_path) # print(FLAGS.output_path) # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) print("init sess") with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state('checkpoints_mlt/') model_path = os.path.join( 'checkpoints_mlt/', os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) #im_fn_list = get_images() print('===============') im = rotate_img('hoadontiendien-3.png') print(im.shape) cv2.imwrite('rotated2.png', im[:, :, :]) print("write rotate img") start = time.time() img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) min_x, max_x, min_y, max_y = 0, w, 0, h box_minx = min([b[0] for b in boxes]) box_miny = min([b[1] for b in boxes]) box_maxx = max([b[4] for b in boxes]) box_maxy = max([b[5] for b in boxes]) print(box_minx, box_miny) print(box_maxx, box_maxy) crop_img = img[box_miny:box_maxy, box_minx:box_maxx] print(crop_img.shape) # for b in boxes: # if b[0] < # texts = [] for i, box in enumerate(boxes): cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=1) #crop_img2 = img[box[1]-5:box[5]+5, box[0]:box[4]] img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) #print(img[:, :, ::-1].shape) #cv2.imshow('aaa',img[:, :, ::-1]) #cv2.waitKey() cv2.imwrite('rotate_cuted2.png', crop_img[:, :, :])
def process(): output = { 'path': None, 'percentage': 0, 'locate_time': 0, 'ocr_time': 0, 'ocr_text': [], 'err': False } if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu index = 0 with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) #file_whole = open('data/res/text/whole.txt','w') im_fn_list = get_images() start_all = time.time() for count, im_fn in enumerate(im_fn_list): output["err"] = False output["path"] = im_fn output["ocr_text"].clear() output["percentage"] = count / len(im_fn_list) print('===============') print( im_fn ) #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) output["err"] = True yield output continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') # DETECT_MODE can be H / O depending on context boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) output["locate_time"] = cost_time print("cost time: {:.2f}s".format(cost_time)) ''' Do the text recognition ''' text_start = time.time() grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ######################################################## for i, box in enumerate(boxes): cv2.polylines( img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) ################################################### # First get the number id startX = box[0] startY = box[1] endX = box[4] endY = box[5] ret, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV) roi = thresh[startY:endY, startX:endX] ################################################### # Single out the digit ################################################### # in order to apply Tesseract v4 to OCR text we must supply # (1) a language, (2) an OEM flag of 4, indicating that the we # wish to use the LSTM neural net model for OCR, and finally # (3) an OEM value, in this case, 7 which implies that we are # treating the ROI as a single line of text config = ("-l digits --oem 1 --psm 7") # config = ("--oem 0 -c tessedit_char_whitelist=0123456789") text = pytesseract.image_to_string(roi, config=config) output["ocr_text"].append(text) # add the bounding box coordinates and OCR'd text to the list # of results # Only print if number is detected #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg if text.isdigit(): print(text) if len(text) == 4: data = im_fn.split("/") fn = data[len(data) - 1] # 73-500_0.jpg folder = data[len(data) - 4] + '/' + data[len( data ) - 3] + '/' + data[ len(data) - 2] # recording_2019_10_30/bbq/cam_bbq-8000-18120 print(folder + '/' + fn) fn_data = fn.split("-") id_num = fn_data[0] #73 image_name = fn_data[1] #500_0.jpg directory = 'OCR_text/' + folder + '/' directory = os.path.join(root, directory) if not os.path.exists(directory): os.makedirs(directory) file_whole = open( directory + 'whole-' + id_num + '.txt', 'a') file_whole.write(folder + '/' + fn + ':' + text + '\n') file_whole.close() #cv2.imwrite(str(index) + '.png', roi) index += 1 # results.append(((startX, startY, endX, endY), text)) output["ocr_time"] = time.time() - text_start ######################################################## ''' img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line) ''' yield output cost_time_all = (time.time() - start_all) print("Total cost time: {:.2f}s".format(cost_time_all))
def main(argv=None): print('Mode :%s' % FLAGS.detect_mode) sys.path.append(os.getcwd()) from utils.text_connector.detectors import TextDetector from nets import model_train as model from utils.rpn_msr.proposal_layer import proposal_layer if FLAGS.output_path: # if need overide output? may be no need for testing # shutil.rmtree(FLAGS.output_path) if not os.path.exists(FLAGS.output_path): os.makedirs(FLAGS.output_path) image_path = os.path.join(FLAGS.output_path, "image") label_path = os.path.join(FLAGS.output_path, "label") if not os.path.exists(image_path): os.makedirs(image_path) if not os.path.exists(label_path): os.makedirs(label_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.compat.v1.get_default_graph().as_default(): input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.compat.v1.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) saver = tf.compat.v1.train.Saver( variable_averages.variables_to_restore()) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() # print(im_fn_list) for im_fn in im_fn_list: print('===============') print(im_fn) try: im = cv2.imread(im_fn) # [:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im, FLAGS.image_size) img = cv2.detailEnhance(img) # process image start = time.time() h, w, c = img.shape # print(h, w, rh, rw) im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) thickness = max(1, int(im.shape[0] / 400)) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE=FLAGS.detect_mode) boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.float64) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) # applied to result and fix scale for i, box in enumerate(boxes): box[:8][::2] /= rh box[1:8][::2] /= rh basename = os.path.basename(im_fn) if FLAGS.output_path: bfn, ext = os.path.splitext(basename) gt_path = os.path.join(FLAGS.output_path, "label", 'gt_' + bfn + '.txt') img_path = os.path.join(FLAGS.output_path, "image", basename) # save image and coordination, may be resize image # cv2.imwrite(img_path, im) shutil.copyfile(im_fn, img_path) with open(gt_path, "w") as f: for i, box in enumerate(boxes): line = ",".join(str(int(box[k])) for k in range(8)) # line += "," + str(scores[i]) + "\r\n" # store label as 0-9 for simple line += "," + str(i % 10) + "\r\n" f.writelines(line) else: # cv2.namedWindow(basename, cv2.WND_PROP_FULLSCREEN) # cv2.setWindowProperty( # basename, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # draw polyline and show for i, box in enumerate(boxes): points = [box[:8].astype(np.int32).reshape((-1, 1, 2))] cv2.polylines(im, points, True, color=(0, 255, 0), thickness=thickness, lineType=cv2.LINE_AA) cv2.namedWindow(basename, cv2.WINDOW_NORMAL) cv2.resizeWindow(basename, w, h) cv2.imshow(basename, im) cv2.waitKey(0)
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu textExtractor = TessaractImpl(CONFIG) with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) dataBoxes = [] for i, box in enumerate(boxes): crop_img = cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) # cv2.imshow("newImage", img) # cv2.waitKey(0) bbx_data = box[:8].astype(np.int32).reshape((-1, 1, 2)) startX, startY, endX, endY = crop_image_box(bbx_data) crop_img = img[startY:endY, startX:endX] dataBox = {"boxImg": crop_img} dataBoxes.append(dataBox) print(textExtractor.extractData(dataBoxes)) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line)
def main(argv=None): # if os.path.exists(FLAGS.output_path): # shutil.rmtree(FLAGS.output_path) # os.makedirs(FLAGS.output_path) # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() img = cv2.imread(im_fn) img_size = img.shape # 旋转竖的图片 try: im = cv2.imread(im_fn)[:, :, ::-1] im = cv2.transpose(im) im = cv2.flip(im, 0) except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] # print(scores) # print(textsegs) textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) # print(boxes) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) # for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), # thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open( os.path.join(FLAGS.output_path, 'txt', "cpth_result.txt"), "a") as f: for i, box in enumerate(boxes): line = os.path.basename(im_fn) line += "," line += ",".join(str(box[k]) for k in range(8)) line += "," line += str(i) line += "," + str(scores[i]) + "\r\n" f.writelines(line) # print('begin.....') maxy = int(max(box[1:8:2]) / rw) miny = int(min(box[1:8:2]) / rw) maxx = int(max(box[:8:2]) / rh) minx = int(min(box[:8:2]) / rh) # print(img.shape) # print(maxy, miny, maxx, minx) img_new = img[miny:maxy, minx:maxx] cv2.imwrite( os.path.join( FLAGS.output_path, 'img', os.path.basename(im_fn).replace( '.jpg', '_' + str(i) + '.jpg')), img_new)
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) print(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) texts = [] for i, box in enumerate(boxes): cv2.polylines( img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) crop_img = img[box[1] - 5:box[5] + 5, box[0]:box[4]] #print(crop_img.shape) crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY) crop_img = unsharp_mask(crop_img) try: text = pytesseract.image_to_string( crop_img, config='-l vie --psm 13') except: print("OCR Error") text = "error" print(text) texts.append(text) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) #print(img[:, :, ::-1].shape) #cv2.imshow('aaa',img[:, :, ::-1]) #cv2.waitKey() cv2.imwrite( os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join( FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w", encoding="UTF-8") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(texts[i]) + "\r\n" #print(line) f.writelines(line)