def get_predictor(checkpoint_path): logger.info('loading model') input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) model_path = os.path.join( checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) logger.info('Restore from {}'.format(model_path)) saver.restore(sess, model_path) return sess, f_score, f_geometry, input_images, global_step
def east_detect(self): # import os # os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_list try: os.makedirs(self.output_dir) except OSError as e: if e.errno != 17: raise #with tf.device('/device:GPU:0'): with tf.Graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable( 'global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) # 创建会话 with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: # 初始化模型参数:从checkpoint文件导入 ckpt_state = tf.train.get_checkpoint_state(self.checkpoint_dir) model_path = os.path.join( self.checkpoint_dir, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = self.get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] start_time = time.time() im_resized, (ratio_h, ratio_w) = self.resize_image(im) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry = sess.run( [f_score, f_geometry], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start boxes, timer = self.detect(score_map=score, geo_map=geometry, timer=timer) print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'. format(im_fn, timer['net'] * 1000, timer['restore'] * 1000, timer['nms'] * 1000)) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h duration = time.time() - start_time print('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( self.output_dir, '{}.txt'.format( os.path.basename(im_fn).split('.')[0])) with open(res_file, 'w') as f: for i, box in enumerate(boxes): # to avoid submitting errors box = self.sort_poly(box.astype(np.int32)) if np.linalg.norm( box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], )) #cv2.polylines(im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, #color=(255, 255, 0), thickness=1) self.cut_roi(im[:, :, ::-1], box, im_fn, i) #if not self.no_write_images: #img_path = os.path.join(self.output_dir, os.path.basename(im_fn)) #cv2.imwrite(img_path, im[:, :, ::-1]) sess.close()
import tensorflow as tf from east import model from east.icdar import restore_rectangle from east import lanms from east.eval import resize_image, sort_poly, detect checkpoint_path = "east_icdar2015_resnet_v1_50_rbox/" input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.65 sess = tf.Session(config=config) ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) model_path = os.path.join(checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) logger.info('Restore from {}'.format(model_path)) saver.restore(sess, model_path) @functools.lru_cache(maxsize=1)
def main(argv=None): import os os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu_list try: os.makedirs(FLAGS.output_dir) except OSError as e: if e.errno != 17: raise with tf.get_default_graph().as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) saver = tf.train.Saver(variable_averages.variables_to_restore()) with tf.Session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_path = os.path.join( FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) saver.restore(sess, model_path) im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] start_time = time.time() im_resized, (ratio_h, ratio_w) = resize_image(im) timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry = sess.run( [f_score, f_geometry], feed_dict={input_images: [im_resized]}) timer['net'] = time.time() - start boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) print( '{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( im_fn, timer['net'] * 1000, timer['restore'] * 1000, timer['nms'] * 1000)) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h duration = time.time() - start_time print('[timing] {}'.format(duration)) # save to file if boxes is not None: res_file = os.path.join( FLAGS.output_dir, '{}.txt'.format(os.path.basename(im_fn).split('.')[0])) with open(res_file, 'w') as f: for box in boxes: # to avoid submitting errors box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue f.write('{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1], box[2, 0], box[2, 1], box[3, 0], box[3, 1], )) cv2.polylines( im[:, :, ::-1], [box.astype(np.int32).reshape((-1, 1, 2))], True, color=(255, 255, 0), thickness=1) if not FLAGS.no_write_images: img_path = os.path.join(FLAGS.output_dir, os.path.basename(im_fn)) cv2.imwrite(img_path, im[:, :, ::-1]) model_path = './crnn/crnn.pth' alphabet = '0123456789abcdefghijklmnopqrstuvwxyz' model_crnn = crnn.CRNN(32, 1, 37, 256) # if torch.cuda.is_available(): # model_crnn = model_crnn.cuda() print('loading pretrained model from %s' % model_path) model_crnn.load_state_dict(torch.load(model_path)) converter = utils.strLabelConverter(alphabet) transformer = dataset.resizeNormalize((100, 32)) seq = re.compile(",") with open('./output/img_demo.txt') as f: img = cv2.imread('./test_img/img_demo.jpg') line_id = 0 with open('./output/output.txt', 'w') as fp: for line in f: line_id += 1 lst = seq.split(line.strip()) x1 = int(lst[0]) y1 = int(lst[1]) x2 = int(lst[2]) y2 = int(lst[3]) x3 = int(lst[4]) y3 = int(lst[5]) x4 = int(lst[6]) y4 = int(lst[7]) cnt = np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]]) rect = cv2.minAreaRect(cnt) # print(rect) box = cv2.boxPoints(rect) box = np.int0(box) # print(box) roi_img = img[min(box[:, 1]):max(box[:, 1]), min(box[:, 0]):max(box[:, 0])] # print(min(box[:,0]),max(box[:,0]),min(box[:,1]),max(box[:,1])) cv2.imwrite( './output/word_area_img/word_area_img' + str(line_id) + '.png', roi_img) img_path = './output/word_area_img/word_area_img' + str( line_id) + '.png' image = Image.open(img_path).convert('L') image = transformer(image) # if torch.cuda.is_available(): # image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model_crnn.eval() preds = model_crnn(image) _, preds = preds.max(2) preds = preds.transpose(1, 0).contiguous().view(-1) preds_size = Variable(torch.IntTensor([preds.size(0)])) raw_pred = converter.decode(preds.data, preds_size.data, raw=True) sim_pred = converter.decode(preds.data, preds_size.data, raw=False) print('%-20s => %-20s' % (raw_pred, sim_pred)) fp.write(sim_pred) fp.write('\n')
def process_images(dir_name, split_names, images_indices, checkpoint_path, crnn_path): ### There will be two separate graphs, one for the EAST detection part and another for ### the crnn part east_graph = tf.Graph() with east_graph.as_default(): input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False) f_score, f_geometry = model.model(input_images, is_training=False) variable_averages = tf.train.ExponentialMovingAverage( 0.997, global_step) east_saver = tf.train.Saver(variable_averages.variables_to_restore()) ## Now the crnn_model crnn_graph = tf.Graph() with crnn_graph.as_default(): cropped_image = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='cropped_image') word_recog = ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): recog = word_recog.build_shadownet(inputdata=cropped_image) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=recog, sequence_length=25 * np.ones(1), merge_repeated=False) decoder = data_utils.TextFeatureIO() crnn_saver = tf.train.Saver() ### loading the checkpoint east_session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True), graph=east_graph) with east_graph.as_default(): with east_session.as_default(): ckpt_state = tf.train.get_checkpoint_state(checkpoint_path) model_path = os.path.join( checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path)) print('Restore from {}'.format(model_path)) east_saver.restore(east_session, model_path) crnn_session = tf.Session(config=tf.ConfigProto(allow_soft_placement=True), graph=crnn_graph) with crnn_graph.as_default(): with crnn_session.as_default(): crnn_saver.restore(crnn_session, save_path=crnn_path) for image_name in generate_filename(dir_name, split_names, images_indices): print('processing {}'.format(image_name)) box_list = [] smaller_image_list = [] centers = [] words_list = [] final_boxes = [] file_name = image_name.split('.')[0] file_name = file_name + '.txt' print(image_name) im = cv2.imread(image_name)[:, :, ::-1] im_resized, (ratio_h, ratio_w) = resize_image(im) with east_session.as_default(): with east_graph.as_default(): score, geometry = east_session.run( [f_score, f_geometry], feed_dict={input_images: [im_resized]}) boxes = detect(score, geometry) if boxes is not None: boxes = boxes[:, :8].reshape((-1, 4, 2)) boxes[:, :, 0] /= ratio_w boxes[:, :, 1] /= ratio_h for box in boxes: box = sort_poly(box.astype(np.int32)) if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm( box[3] - box[0]) < 5: continue x_range, y_range = convert_to_rect(box) smaller_image = im[y_range, x_range, :] smaller_image_list.append(smaller_image) box_list.append(box) centers.append(((y_range.start + y_range.stop) / 2.0, (x_range.start + x_range.stop) / 2.0)) print('East done one the image {}'.format(image_name)) smaller_images_sorted, box_list = sort_by_pos(smaller_image_list, box_list, centers, im.shape) with crnn_session.as_default(): with crnn_graph.as_default(): for box, smaller_image in zip(box_list, smaller_images_sorted): smaller_im = cv2.resize(smaller_image, (100, 32)) smaller_im = smaller_im[:, :, ::-1] preds = crnn_session.run( decodes, feed_dict={cropped_image: [smaller_im]}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) if not preds[0] is None: words_list.append(preds[0]) final_boxes.append(box) print('The words detected are {}'.format(', '.join(words_list))) write_to_file(file_name, words_list, final_boxes)