def load_tf_model(): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) return sess, input_image, input_im_info, bbox_pred, cls_pred, cls_prob
def ctpn_recognition(test_images_path, app): if os.path.exists(params.middle_path): shutil.rmtree(params.middle_path) os.makedirs(params.middle_path) os.environ['CUDA_VISIBLE_DEVICES'] = params.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder( tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder( tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(params.checkpoint_path) model_path = os.path.join(params.checkpoint_path, os.path.basename( ckpt_state.model_checkpoint_path)) app.logger.info('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im = cv2.imread(test_images_path)[:, :, ::-1] img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer( cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect( textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) img_copy = img.copy() boxes_array = np.array(boxes, dtype=np.int) widths = {} for i, box in enumerate(boxes_array): width, height = get_wh(box[:8].tolist()) # 计算宽高比 widths[width] = [i, height] width_max = max(widths) width_max_value = widths[width_max] part_img = img.copy() for i, box in enumerate(boxes_array): color = (0, 255, 0) if i == width_max_value[0] and width_max_value[1] > 20: color = (255, 0, 0) box[0] = box[0] - 5 box[2] = box[2] + 5 part_img = img[box[1]:box[5], box[0]:box[2]][:, :, 0] cv2.polylines(img_copy, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=color, thickness=2) img_copy = cv2.resize( img_copy, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(params.middle_path, os.path.basename(test_images_path)), img_copy[:, :, ::-1]) part_img = Image.fromarray(part_img.astype('uint8')) return part_img
def ctpn_pred(input_path, output_path,textloc_output_path, checkpoint_path,gpu): print("========== detect text using ctpn ==============") if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) os.environ['CUDA_VISIBLE_DEVICES'] = gpu tf.reset_default_graph() with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) saver.restore(sess, model_path) im_fn_list = get_images(input_path) for im_fn in im_fn_list: print('===============') print(im_fn) #remove existing detected component from text txtfileloc = os.path.join (textloc_output_path, os.path.splitext(os.path.basename(im_fn))[0]) + "_loc.txt" if os.path.isfile(txtfileloc): f = open(txtfileloc, 'r+') textlines = txtremove(f,['ORIG','ROT']) f.seek(0) f.write(textlines) f.truncate() f.close() start = time.time() try: img_raw = cv2.imread(im_fn) except: print("Error reading image {}!".format(im_fn)) continue # image used to draw bounding box H, W, _ = img_raw.shape img_blank = np.ones(shape=[H, W], dtype=np.uint8)*255 img_draw = img_raw.copy() img, (rh, rw) = resize_image(img_raw) h, w, c = img.shape res = [] for ifrot in ['ORIG','ROT']: im = img.copy() if ifrot == 'ROT': im = cv2.transpose(im) im = cv2.flip(im,1) bbox_color = (255,0,0) im_info = np.array([w, h, c]).reshape([1, 3]) else: bbox_color = (0,255,0) im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [im], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], im.shape[:2]) boxes = np.array(boxes, dtype=np.int) print("Find number of text:",len(boxes)) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) fx=1.0 / rw fy=1.0 / rh for i, box in enumerate(boxes): if ifrot == 'ROT': box = np.array([box[3],h-box[2],box[5],h-box[4],box[7],h-box[6],box[1],h-box[0],box[8]]) #resize the images box[:8:2] = (box[:8:2]*fx).astype(np.int32) box[1::2] = (box[1::2]*fy).astype(np.int32) loc = [int(i) for i in box[0:-1]] # crop image with rectangle box and save x0,y0,w0,h0 = cv2.boundingRect(np.array(loc[:8]).astype(np.int32).reshape((-1, 2))) img_crop = img_blank[y0:y0+h0,x0:x0+w0].copy() hc, wc = img_crop.shape[:2] countzero = hc*wc - cv2.countNonZero(img_crop) if countzero *1.0 / (hc*wc) <= 0.2: # if there is minimum overlap with previous bounding box cv2.drawContours(img_blank, [np.array(loc).reshape((-1,1,2))], 0, (0), thickness = -1, lineType=8) cv2.polylines(img_draw, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=bbox_color, thickness=2) # crop image with rectangle box and save x0,y0,w0,h0 = cv2.boundingRect(box[:8].astype(np.int32).reshape((-1, 2))) img_crop = img_raw[y0:y0+h0,x0:x0+w0].copy() txtrecog = txt_recog(img_crop) res.append([ifrot]+loc+[txtrecog]) cv2.imwrite(os.path.join(output_path, os.path.splitext(os.path.basename(im_fn))[0])+"_"+ifrot+"_"+str(format(i, "04"))+".jpg", img_crop) cv2.putText(img_draw, str(i), (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX ,1.0, bbox_color, 2, cv2.LINE_AA) cv2.imwrite(os.path.join(output_path, os.path.splitext(os.path.basename(im_fn))[0])+"_"+ifrot+".jpg", img_draw) with open(txtfileloc, "a") as f: for i, ir in enumerate(res): line = "\t".join(str(ir[k]) for k in range(10)) line += "\n" print(line) f.writelines(line) f.close()
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu tf.reset_default_graph() with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue im, (rh, rw) = resize_image(im) h, w, c = im.shape img = im for im_rot in ['orig', 'rot90']: if im_rot == 'rot90': img = cv2.transpose(img) img = cv2.flip(img, 1) bbox_color = (255, 0, 0) im_info = np.array([w, h, c]).reshape([1, 3]) else: bbox_color = (0, 255, 0) im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run( [bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): if im_rot == 'rot90': box = np.array([ box[3], h - box[2], box[5], h - box[4], box[7], h - box[6], box[1], h - box[0], box[8] ]) cv2.polylines( im, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=bbox_color, thickness=2) #im = cv2.resize(im, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite( os.path.join(FLAGS.output_path, im_rot + "-" + os.path.basename(im_fn)), im[:, :, ::-1]) with open( os.path.join( FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "a") as f: f.writelines("\n") for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line) f.close()
def ctpn(): if os.path.exists(FLAGS.ctpn_output_path): shutil.rmtree(FLAGS.ctpn_output_path) os.makedirs(FLAGS.ctpn_output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu tf.reset_default_graph() with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) saver.restore(sess, model_path) im_fn_list = get_images(FLAGS.ctpn_input_path) for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: img_raw = cv2.imread(im_fn) #(thresh, img_raw) = cv2.threshold(img_raw, 127, 255, cv2.THRESH_BINARY) # Create kernel #kernel = np.array([[-1, -1, -1], # [-1, 9,-1], # [-1, -1, -1]]) # Sharpen image #img_raw = cv2.filter2D(img_raw, -1, kernel) except: print("Error reading image {}!".format(im_fn)) continue img_draw = img_raw.copy() img, (rh, rw) = resize_image(img_raw) # image used to draw bounding box h, w, c = img.shape for ifrot in ['orig','rot']: im = img.copy() if ifrot == 'rot': im = cv2.transpose(im) im = cv2.flip(im,1) bbox_color = (255,0,0) im_info = np.array([w, h, c]).reshape([1, 3]) else: bbox_color = (0,255,0) im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [im], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], im.shape[:2]) boxes = np.array(boxes, dtype=np.int) print(len(boxes)) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) fx=1.0 / rw fy=1.0 / rh for i, box in enumerate(boxes): if ifrot == 'rot': box = np.array([box[3],h-box[2],box[5],h-box[4],box[7],h-box[6],box[1],h-box[0],box[8]]) #resize the images box[:8:2] = (box[:8:2]*fx).astype(np.int32) box[1::2] = (box[1::2]*fy).astype(np.int32) cv2.polylines(img_draw, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=bbox_color, thickness=2) # crop image with rectangle box and save x0,y0,w0,h0 = cv2.boundingRect(box[:8].astype(np.int32).reshape((-1, 2))) img_crop = img_raw[y0:y0+h0,x0:x0+w0].copy() cv2.imwrite(os.path.join(FLAGS.ctpn_output_path, ifrot+str(format(i, "04"))+"-"+os.path.basename(im_fn)), img_crop) cv2.putText(img_draw, str(i), (box[0],box[1]), cv2.FONT_HERSHEY_SIMPLEX ,1.0, bbox_color, 2, cv2.LINE_AA) #im = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.ctpn_output_path, ifrot+"-"+os.path.basename(im_fn)),img_draw[:, :, ::-1]) with open(os.path.join(FLAGS.ctpn_output_path, ifrot+"-"+os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "a") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line) f.close()