def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: img = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\n" print(line)
def find(self): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu tf.reset_default_graph() with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = self.get_global_step() bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) print('===============') try: im = cv2.imread(self.img_path)[:, :, ::-1] except: print("Error reading image {}!".format(self.img_path)) img, (rh, rw) = self.resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) for box in boxes: box_idx = 0 while box_idx < 8: if box_idx % 2 == 0: witdth_scale = box[box_idx] / rw box[box_idx] = self.round_half_up(witdth_scale) else: height_scale = box[box_idx] / rh box[box_idx] = self.round_half_up(height_scale) box_idx +=1 boxes = np.array([box[:-1] for box in boxes], dtype=np.int) return boxes
def main(argv=None): print('Mode :%s' % FLAGS.detect_mode) sys.path.append(os.getcwd()) from utils.text_connector.detectors import TextDetector from nets import model_train as model from utils.rpn_msr.proposal_layer import proposal_layer if FLAGS.output_path: # if need overide output? may be no need for testing # shutil.rmtree(FLAGS.output_path) if not os.path.exists(FLAGS.output_path): os.makedirs(FLAGS.output_path) image_path = os.path.join(FLAGS.output_path, "image") label_path = os.path.join(FLAGS.output_path, "label") if not os.path.exists(image_path): os.makedirs(image_path) if not os.path.exists(label_path): os.makedirs(label_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.compat.v1.get_default_graph().as_default(): input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.compat.v1.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) saver = tf.compat.v1.train.Saver( variable_averages.variables_to_restore()) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() # print(im_fn_list) for im_fn in im_fn_list: print('===============') print(im_fn) try: im = cv2.imread(im_fn) # [:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im, FLAGS.image_size) img = cv2.detailEnhance(img) # process image start = time.time() h, w, c = img.shape # print(h, w, rh, rw) im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) thickness = max(1, int(im.shape[0] / 400)) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE=FLAGS.detect_mode) boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.float64) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) # applied to result and fix scale for i, box in enumerate(boxes): box[:8][::2] /= rh box[1:8][::2] /= rh basename = os.path.basename(im_fn) if FLAGS.output_path: bfn, ext = os.path.splitext(basename) gt_path = os.path.join(FLAGS.output_path, "label", 'gt_' + bfn + '.txt') img_path = os.path.join(FLAGS.output_path, "image", basename) # save image and coordination, may be resize image # cv2.imwrite(img_path, im) shutil.copyfile(im_fn, img_path) with open(gt_path, "w") as f: for i, box in enumerate(boxes): line = ",".join(str(int(box[k])) for k in range(8)) # line += "," + str(scores[i]) + "\r\n" # store label as 0-9 for simple line += "," + str(i % 10) + "\r\n" f.writelines(line) else: # cv2.namedWindow(basename, cv2.WND_PROP_FULLSCREEN) # cv2.setWindowProperty( # basename, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) # draw polyline and show for i, box in enumerate(boxes): points = [box[:8].astype(np.int32).reshape((-1, 1, 2))] cv2.polylines(im, points, True, color=(0, 255, 0), thickness=thickness, lineType=cv2.LINE_AA) cv2.namedWindow(basename, cv2.WINDOW_NORMAL) cv2.resizeWindow(basename, w, h) cv2.imshow(basename, im) cv2.waitKey(0)
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] orig = im.copy() except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): reshaped_coords = [box[:8].astype(np.int32).reshape((-1, 1, 2))] cv2.polylines(img, reshaped_coords, True, color=(0, 255, 0), thickness=2) reshaped_coords = np.asarray(reshaped_coords) roi = img[reshaped_coords[0][0][0][1]:reshaped_coords[0][2][0][1], reshaped_coords[0][0][0][0]:reshaped_coords[0][2][0][0]] text = pytesseract.image_to_string(roi, config=config) text = unidecode.unidecode(text) cv2.putText(img, text, (reshaped_coords[0][0][0][0], reshaped_coords[0][0][0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line) # if __name__ == '__main__': # tf.app.run() # def delete_prev(path): # for the_file in os.listdir(path): # file_path = os.path.join(path, the_file) # try: # if os.path.isfile(file_path): # os.unlink(file_path) # elif os.path.isdir(file_path): shutil.rmtree(file_path) # except Exception as e: # print(e) # continue # app = Flask(__name__) # app._static_folder = os.path.basename('static') # UPLOAD_FOLDER = os.path.join('main', 'uploads') # app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # @app.route('/') # def hello_world(): # return render_template('home_al.html') # @app.route('/upload', methods=['POST', 'GET']) # def upload_file(): # if request.method == 'POST': # file = request.files['image'] # filename = file.filename # # prepare directory for processing # delete_prev(app.config['UPLOAD_FOLDER']) # f = os.path.join(app.config['UPLOAD_FOLDER'], filename) # # add your custom code to check that the uploaded file is a valid image and not a malicious file (out-of-scope for this post) # file.save(f) # tf.app.run() # print('done') # processed_file = os.path.join('data/res', filename) # # return render_template('home_al.html', processed_file = processed_file) # return redirect(url_for('send_file', filename=filename)) # print('redirected to', url_for('send_file', filename=filename)) # else: # print('No request') # return render_template('home_al.html') # # @app.route('/show/<filename>') # # def uploaded_file(filename): # # filename = 'http://127.0.0.1:5000/upload/' + filename # # return render_template('home_al.html') # @app.route('/uploaded/<filename>') # def send_file(filename): # return send_from_directory('data/res', filename) # app.run(debug=True)
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) is_reverse, bbox = get_daxie_bbox(boxes, h) if not bbox is None: pil_image = Image.fromarray( cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) pil_image = pil_image.crop( [bbox[0], bbox[1], bbox[4], bbox[5]]) if is_reverse == 1: pil_image = pil_image.transpose(Image.ROTATE_180) img = np.array(pil_image.convert('RGB'))[:, :, ::-1] img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite( os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])
def main(argv=None): os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu now = datetime.datetime.now() StyleTime = now.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(FLAGS.logs_path + StyleTime) if not os.path.exists(FLAGS.checkpoint_path): os.makedirs(FLAGS.checkpoint_path) input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_bbox = tf.placeholder(tf.float32, shape=[None, 5], name='input_bbox') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) learning_rate = tf.Variable(FLAGS.learning_rate, trainable=False) tf.summary.scalar('learning_rate', learning_rate) opt = tf.train.AdamOptimizer(learning_rate) gpu_id = int(FLAGS.gpu) with tf.device('/gpu:%d' % gpu_id): with tf.name_scope('model_%d' % gpu_id) as scope: bbox_pred, cls_pred, cls_prob = model.model(input_image) total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = model.loss( bbox_pred, cls_pred, input_bbox, input_im_info) batch_norm_updates_op = tf.group( *tf.get_collection(tf.GraphKeys.UPDATE_OPS, scope)) grads = opt.compute_gradients(total_loss) apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) summary_op = tf.summary.merge_all() variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) with tf.control_dependencies( [variables_averages_op, apply_gradient_op, batch_norm_updates_op]): train_op = tf.no_op(name='train_op') saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) summary_writer = tf.summary.FileWriter(FLAGS.logs_path + StyleTime, tf.get_default_graph()) init = tf.global_variables_initializer() if FLAGS.pretrained_model_path is not None: variable_restore_op = slim.assign_from_checkpoint_fn( FLAGS.pretrained_model_path, slim.get_trainable_variables(), ignore_missing_vars=True) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.95 config.allow_soft_placement = True with tf.Session(config=config) as sess: if FLAGS.restore: ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path) restore_step = int(ckpt.split('.')[0].split('_')[-1]) print("continue training from previous checkpoint {}".format( restore_step)) saver.restore(sess, ckpt) else: sess.run(init) restore_step = 0 if FLAGS.pretrained_model_path is not None: variable_restore_op(sess) data_generator = data_provider.get_batch(num_workers=FLAGS.num_readers) start = time.time() for step in range(restore_step, FLAGS.max_steps): data = next(data_generator) ml, tl, _, summary_str = sess.run( [model_loss, total_loss, train_op, summary_op], feed_dict={ input_image: data[0], input_bbox: data[1], input_im_info: data[2] }) summary_writer.add_summary(summary_str, global_step=step) if step != 0 and step % FLAGS.decay_steps == 0: sess.run( tf.assign(learning_rate, learning_rate.eval() * FLAGS.decay_rate)) if step % 10 == 0: avg_time_per_step = (time.time() - start) / 10 start = time.time() print( 'Step {:06d}, model loss {:.4f}, total loss {:.4f}, {:.2f} seconds/step, LR: {:.6f}' .format(step, ml, tl, avg_time_per_step, learning_rate.eval())) if (step + 1) % FLAGS.save_checkpoint_steps == 0: filename = ('ctpn_{:d}'.format(step + 1) + '.ckpt') filename = os.path.join(FLAGS.checkpoint_path, filename) saver.save(sess, filename, write_meta_graph=False, write_state=True) print('Write model to: {:s}'.format(filename))
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) try: im = cv2.imread(im_fn) im = im[:im.shape[0] / 3, :, ::-1] # only for roi except: print("Error reading image {}!".format(im_fn)) continue start0 = time.time() img, (rh, rw) = resize_image(im) h, w, c = img.shape print("resize_image cost time: {:.2f}s".format(time.time() - start0)) start = time.time() im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) print("sess.run cost time: {:.2f}s".format(time.time() - start)) start = time.time() textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] print("proposal_layer cost time: {:.2f}s".format(time.time() - start)) start = time.time() textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) print("textdetector cost time: {:.2f}s".format(time.time() - start)) print("total cost time: {:.2f}s".format(time.time() - start0)) for i, box in enumerate(boxes): img = cv2.resize(img[box[1]:box[5], box[0]:box[4]], None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite( os.path.join( FLAGS.output_path, os.path.basename(im_fn).replace( '.', '_' + str(i) + '.')), img[:, :, ::-1]) '''for i, box in enumerate(boxes): cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])''' with open( os.path.join( FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line)
def main(argv=None): if train_or_test_1800 == 'no_seperate_mianzhi_train' or train_or_test_1800 == 'no_seperate_mianzhi_test': if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() ii = a for im_fn in im_fn_list[int(a):b]: #修改这里 ii += 1 print(str(ii) + '===============' + str(ii)) print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue try: img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run( [bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1: 5] # 每张图片N个poly,textsegs是这些poly的四个坐标。 textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect( textsegs, scores[:, np.newaxis], img.shape[:2]) #xzy 方法内部已修改,只显示一个框 boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), # thickness=2) img = img[int(box[1]):int(box[5]), int(box[0]):int(box[2])] # xzy 裁剪 img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite( os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) except Exception as e: #xzy Corrupt JPEG data: premature end of data segment immmm = cv2.imread( "../../../dataset_warm_up/train_data/13X6EGWI.jpg" ) #xzy 可能WBNGQ9R7.jpg出错 cv2.imwrite( os.path.join(FLAGS.output_path, "xzywa" + str(os.path.basename(im_fn))), immmm[:, :, ::-1]) print(str(im_fn) + " is broken!!!!!!!!")
def process(): output = { 'path': None, 'percentage': 0, 'locate_time': 0, 'ocr_time': 0, 'ocr_text': [], 'err': False } if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu index = 0 with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) #file_whole = open('data/res/text/whole.txt','w') im_fn_list = get_images() start_all = time.time() for count, im_fn in enumerate(im_fn_list): output["err"] = False output["path"] = im_fn output["ocr_text"].clear() output["percentage"] = count / len(im_fn_list) print('===============') print( im_fn ) #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) output["err"] = True yield output continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') # DETECT_MODE can be H / O depending on context boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) output["locate_time"] = cost_time print("cost time: {:.2f}s".format(cost_time)) ''' Do the text recognition ''' text_start = time.time() grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ######################################################## for i, box in enumerate(boxes): cv2.polylines( img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) ################################################### # First get the number id startX = box[0] startY = box[1] endX = box[4] endY = box[5] ret, thresh = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV) roi = thresh[startY:endY, startX:endX] ################################################### # Single out the digit ################################################### # in order to apply Tesseract v4 to OCR text we must supply # (1) a language, (2) an OEM flag of 4, indicating that the we # wish to use the LSTM neural net model for OCR, and finally # (3) an OEM value, in this case, 7 which implies that we are # treating the ROI as a single line of text config = ("-l digits --oem 1 --psm 7") # config = ("--oem 0 -c tessedit_char_whitelist=0123456789") text = pytesseract.image_to_string(roi, config=config) output["ocr_text"].append(text) # add the bounding box coordinates and OCR'd text to the list # of results # Only print if number is detected #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg if text.isdigit(): print(text) if len(text) == 4: data = im_fn.split("/") fn = data[len(data) - 1] # 73-500_0.jpg folder = data[len(data) - 4] + '/' + data[len( data ) - 3] + '/' + data[ len(data) - 2] # recording_2019_10_30/bbq/cam_bbq-8000-18120 print(folder + '/' + fn) fn_data = fn.split("-") id_num = fn_data[0] #73 image_name = fn_data[1] #500_0.jpg directory = 'OCR_text/' + folder + '/' directory = os.path.join(root, directory) if not os.path.exists(directory): os.makedirs(directory) file_whole = open( directory + 'whole-' + id_num + '.txt', 'a') file_whole.write(folder + '/' + fn + ':' + text + '\n') file_whole.close() #cv2.imwrite(str(index) + '.png', roi) index += 1 # results.append(((startX, startY, endX, endY), text)) output["ocr_time"] = time.time() - text_start ######################################################## ''' img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line) ''' yield output cost_time_all = (time.time() - start_all) print("Total cost time: {:.2f}s".format(cost_time_all))
def main(argv=None): # if os.path.exists(FLAGS.output_path): # shutil.rmtree(FLAGS.output_path) # os.makedirs(FLAGS.output_path) # print(FLAGS.output_path) # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) print("init sess") with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state('checkpoints_mlt/') model_path = os.path.join( 'checkpoints_mlt/', os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) #im_fn_list = get_images() print('===============') im = rotate_img('hoadontiendien-3.png') print(im.shape) cv2.imwrite('rotated2.png', im[:, :, :]) print("write rotate img") start = time.time() img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) min_x, max_x, min_y, max_y = 0, w, 0, h box_minx = min([b[0] for b in boxes]) box_miny = min([b[1] for b in boxes]) box_maxx = max([b[4] for b in boxes]) box_maxy = max([b[5] for b in boxes]) print(box_minx, box_miny) print(box_maxx, box_maxy) crop_img = img[box_miny:box_maxy, box_minx:box_maxx] print(crop_img.shape) # for b in boxes: # if b[0] < # texts = [] for i, box in enumerate(boxes): cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=1) #crop_img2 = img[box[1]-5:box[5]+5, box[0]:box[4]] img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) #print(img[:, :, ::-1].shape) #cv2.imshow('aaa',img[:, :, ::-1]) #cv2.waitKey() cv2.imwrite('rotate_cuted2.png', crop_img[:, :, :])
def main(im=None): # if os.path.exists(FLAGS.output_path): # shutil.rmtree(FLAGS.output_path) # os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = '0' checkpoint_path = 'checkpoints_mlt/' with tf.compat.v1.get_default_graph().as_default(): input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.compat.v1.get_variable( 'global_step', [], initializer=tf.compat.v1.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.compat.v1.train.Saver( variable_averages.variables_to_restore()) with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) model_path = os.path.join( checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) # print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) # im_fn_list = get_images() # for im_fn in im_fn_list: # print('===============') # print(im_fn) # start = time.time() # try: # im = cv2.imread(im_fn)[:, :, ::-1] # except: # print("Error reading image {}!".format(im_fn)) # continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) # cost_time = (time.time() - start) # print("cost time: {:.2f}s".format(cost_time)) # for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), # thickness=2) # img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) return_array = [] for i, box in enumerate(boxes): box[0] = box[0] / rh box[2] = box[2] / rh box[1] = box[1] / rw box[7] = box[7] / rw return_array.append([box[0], box[1], box[2], box[7]]) # print(return_array) # line += ",".join(str(box[k]) for k in [0,1,2,7]) +'),\r\n' return return_array
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: #print('===============') #print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) #print("cost time: {:.2f}s".format(cost_time)) if len(boxes) != 1: print(im_fn, len(boxes)) flag = -1 for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),thickness=2) #img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) arr = np.array(box[:8].astype(np.int32).reshape((-1, 2))) #print(arr) x1 = min(arr[:, 0]) x2 = max(arr[:, 0]) y1 = min(arr[:, 1]) y2 = max(arr[:, 1]) pad_w = int((y2 - y1) * 0.5) img_cp = img[y1 - 10:y2 + 10, x1 - pad_w:x2 + pad_w, :] #print(x1,x2,y1,y2) if flag < (x2 - x1) / (y2 - y1): flag = (x2 - x1) / (y2 - y1) if flag > 3: cv2.imwrite( os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img_cp[:, :, ::-1]) '''
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) #############################################################下面为新的代码 # Create SavedModelBuilder class # defines where the model will be exported export_path_base = FLAGS.export_model_dir export_path = os.path.join( tf.compat.as_bytes(export_path_base), tf.compat.as_bytes(str(FLAGS.model_version))) print('Exporting trained model to', export_path) builder = tf.saved_model.builder.SavedModelBuilder(export_path) # Creates the TensorInfo protobuf objects that encapsulates the input/output tensors tensor_info_input = tf.saved_model.utils.build_tensor_info( input_image) # output tensor info bbox_pred_output = tf.saved_model.utils.build_tensor_info( bbox_pred) cls_pred_output = tf.saved_model.utils.build_tensor_info(cls_pred) cls_prob_output = tf.saved_model.utils.build_tensor_info(cls_prob) # Defines the DeepLab signatures, uses the TF Predict API # It receives an image and its dimensions and output the segmentation mask prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'images': tensor_info_input}, outputs={ 'bbox_pred_output': bbox_pred_output, 'cls_pred_output': cls_pred_output, 'cls_prob_output': cls_prob_output }, method_name=tf.saved_model.signature_constants. PREDICT_METHOD_NAME)) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'predict_images': prediction_signature, }) # export the model builder.save(as_text=True) print('Done exporting!')
def main(argv): of_list = [] if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # print("dickk") # print(sys.argv[1]) ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() print(im_fn_list) for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue print("printing im.shape") print(im.shape) img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) for i, box in enumerate(boxes): cv2.polylines( img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) box_arr = box[:8].astype(np.int32) y = box_arr[0] x = box_arr[1] h = box_arr[2] - box_arr[0] w = box_arr[5] - box_arr[3] img1, (rh, rw) = resize_image(im) img2 = img1[x:x + w, y:y + h, :] cv2.imwrite(FLAGS.output_path + str(i) + '.png', img2) of_list.append(FLAGS.output_path + str(i) + '.png')
def start(self) : self.running = True tf.app.flags.DEFINE_string('gpu', '0', '') # 已经训练好的模型加载路径 tf.app.flags.DEFINE_string('checkpoint_path', self.checkpoint_path, '') # 图 with tf.compat.v1.get_default_graph().as_default(): # 占位符 - 输入图片 input_image = tf.compat.v1.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') # 占位符 - 输入图片信息 input_im_info = tf.compat.v1.placeholder(tf.float32, shape=[None, 3], name='input_im_info') # 创建一个变量 global_step global_step = tf.compat.v1.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) # tensorflow op bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.compat.v1.train.Saver(variable_averages.variables_to_restore()) # tensorflow session 配置 sessionConfig = tf.compat.v1.ConfigProto(allow_soft_placement=True) # 显存占用率 # sessionConfig.gpu_options.per_process_gpu_memory_fraction = 0.3 # 动态申请内存 sessionConfig.gpu_options.allow_growth = True with tf.compat.v1.Session(config=sessionConfig) as sess: # 基于 checkpoint 文件(ckpt)加载参数 ckpt_state = tf.compat.v1.train.get_checkpoint_state(self.checkpoint_path) # 模型路径 model_path = os.path.join(self.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) logger.info(u'Restore from {}'.format(model_path)) # 恢复变量 saver.restore(sess, model_path) while self.running: logger.info(u'等待接收图片') imgFilePath = self.workerQueue.get() if self.is_stop_signal(imgFilePath): logger.info(u'接收到队列停止信号') break logger.info(u'开始处理图片: {}'.format(imgFilePath)) # 开始计时 start = time.time() try: im = cv2.imread(imgFilePath)[:, :, ::-1] except: logger.exception(sys.exc_info()) continue # 压缩图片尺寸,不超过 600 * 1200 img, (rh, rw) = self.resize_image(im) # 高、宽、通道数 h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) # 执行运算 bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) # 根据RPN目标回归值修正anchors并做排序、nms等后处理输出由proposal坐标和batch_ind全0索引组成的blob textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) # 结束计时 logger.info(u'总计耗时: {}'.format(time.time() - start)) if self.debug: with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".json", "w") as f: f.writelines(json.dumps(self.wrapResult(boxes, scores))) # 将 python 数组 转换为 numpy 数组 boxes = np.array(boxes, dtype=np.int) for i, box in enumerate(boxes): cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(self.outputPath, os.path.basename(imgFilePath)), img[:, :, ::-1]) with open(os.path.join(self.outputPath, os.path.splitext(os.path.basename(imgFilePath))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\n" f.writelines(line) if self.callback : self.callback(fileName = imgFilePath, ctpnRes = self.wrapResult(boxes, scores))
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu textExtractor = TessaractImpl(CONFIG) with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={input_image: [img], input_im_info: im_info}) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='O') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) dataBoxes = [] for i, box in enumerate(boxes): crop_img = cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) # cv2.imshow("newImage", img) # cv2.waitKey(0) bbx_data = box[:8].astype(np.int32).reshape((-1, 1, 2)) startX, startY, endX, endY = crop_image_box(bbx_data) crop_img = img[startY:endY, startX:endX] dataBox = {"boxImg": crop_img} dataBoxes.append(dataBox) print(textExtractor.extractData(dataBoxes)) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(scores[i]) + "\r\n" f.writelines(line)
def main(argv=None): # if os.path.exists(FLAGS.output_path): # shutil.rmtree(FLAGS.output_path) # os.makedirs(FLAGS.output_path) # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() img = cv2.imread(im_fn) img_size = img.shape # 旋转竖的图片 try: im = cv2.imread(im_fn)[:, :, ::-1] im = cv2.transpose(im) im = cv2.flip(im, 0) except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] # print(scores) # print(textsegs) textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) # print(boxes) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) # for i, box in enumerate(boxes): # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), # thickness=2) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open( os.path.join(FLAGS.output_path, 'txt', "cpth_result.txt"), "a") as f: for i, box in enumerate(boxes): line = os.path.basename(im_fn) line += "," line += ",".join(str(box[k]) for k in range(8)) line += "," line += str(i) line += "," + str(scores[i]) + "\r\n" f.writelines(line) # print('begin.....') maxy = int(max(box[1:8:2]) / rw) miny = int(min(box[1:8:2]) / rw) maxx = int(max(box[:8:2]) / rh) minx = int(min(box[:8:2]) / rh) # print(img.shape) # print(maxy, miny, maxx, minx) img_new = img[miny:maxy, minx:maxx] cv2.imwrite( os.path.join( FLAGS.output_path, 'img', os.path.basename(im_fn).replace( '.jpg', '_' + str(i) + '.jpg')), img_new)
def main(argv=None): if os.path.exists(FLAGS.output_path): shutil.rmtree(FLAGS.output_path) os.makedirs(FLAGS.output_path) print(FLAGS.output_path) os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu with tf.get_default_graph().as_default(): input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image') input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) bbox_pred, cls_pred, cls_prob = model.model(input_image) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: print('===============') print(im_fn) start = time.time() try: im = cv2.imread(im_fn)[:, :, ::-1] except: print("Error reading image {}!".format(im_fn)) continue img, (rh, rw) = resize_image(im) h, w, c = img.shape im_info = np.array([h, w, c]).reshape([1, 3]) bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob], feed_dict={ input_image: [img], input_im_info: im_info }) textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info) scores = textsegs[:, 0] textsegs = textsegs[:, 1:5] textdetector = TextDetector(DETECT_MODE='H') boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2]) boxes = np.array(boxes, dtype=np.int) cost_time = (time.time() - start) print("cost time: {:.2f}s".format(cost_time)) texts = [] for i, box in enumerate(boxes): cv2.polylines( img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0), thickness=2) crop_img = img[box[1] - 5:box[5] + 5, box[0]:box[4]] #print(crop_img.shape) crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY) crop_img = unsharp_mask(crop_img) try: text = pytesseract.image_to_string( crop_img, config='-l vie --psm 13') except: print("OCR Error") text = "error" print(text) texts.append(text) img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR) #print(img[:, :, ::-1].shape) #cv2.imshow('aaa',img[:, :, ::-1]) #cv2.waitKey() cv2.imwrite( os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1]) with open(os.path.join( FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt", "w", encoding="UTF-8") as f: for i, box in enumerate(boxes): line = ",".join(str(box[k]) for k in range(8)) line += "," + str(texts[i]) + "\r\n" #print(line) f.writelines(line)