def __init__(self, dataset_dir, char_dict_path, ord_map_dict_path, flags='train'): """ crnn net dataset io pip line :param dataset_dir: the root dir of crnn dataset :param char_dict_path: json file path which contains the map relation between ord value and single character :param ord_map_dict_path: json file path which contains the map relation between int index value and char ord value :param flags: flag to determinate for whom the data feeder was used """ self._dataset_dir = dataset_dir self._tfrecords_dir = ops.join(dataset_dir, '') if not ops.exists(self._tfrecords_dir): raise ValueError('{:s} not exist, please check again'.format( self._tfrecords_dir)) self._dataset_flags = flags.lower() if self._dataset_flags not in ['train', 'test', 'val']: raise ValueError( 'flags of the data feeder should be \'train\', \'test\', \'val\'' ) self._char_dict_path = char_dict_path self._ord_map_dict_path = ord_map_dict_path self._tfrecords_io_reader = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=self._char_dict_path, ord_map_dict_path=self._ord_map_dict_path) self._tfrecords_io_reader.dataset_flags = self._dataset_flags
def input_rec(image_list, weights_path, char_dict_path, ord_map_dict_path): new_heigth = 32 new_width = CFG.ARCH.INPUT_SIZE[0] inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference( inputdata=inputdata, name='shadow_net', reuse=tf.AUTO_REUSE #было#False ) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=10) return inputdata, codec, net, inference_ret, decodes, _
def test_load_saved_model(saved_model_dir, char_dict_path, ord_map_dict_path): """ :param saved_model_dir: :param char_dict_path: :param ord_map_dict_path: :return: """ image = cv2.imread('data/test_images/test_01.jpg', cv2.IMREAD_COLOR) image_vis = image image = cv2.resize( src=image, dsize=tuple(CFG.ARCH.INPUT_SIZE), interpolation=cv2.INTER_LINEAR ) image = np.array(image, np.float32) / 127.5 - 1.0 image = np.expand_dims(image, 0) # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess_config.gpu_options.allocator_type = 'BFC' sess = tf.Session(config=sess_config) with sess.as_default(): meta_graphdef = sm.loader.load( sess, tags=[sm.tag_constants.SERVING], export_dir=saved_model_dir) signature_def_d = meta_graphdef.signature_def signature_def_d = signature_def_d[sm.signature_constants.PREDICT_OUTPUTS] image_input_tensor = signature_def_d.inputs['input_tensor'] prediction_tensor = signature_def_d.outputs['prediction'] input_tensor = sm.utils.get_tensor_from_tensor_info(image_input_tensor, sess.graph) predictions = sm.utils.get_tensor_from_tensor_info(prediction_tensor, sess.graph) prediction_val = sess.run(predictions, feed_dict={input_tensor: image}) codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path ) prediction_val = codec.sparse_tensor_to_str(prediction_val)[0] log.info('Predict image result ----> {:s}'.format(prediction_val)) plt.figure('CRNN Model Demo') plt.imshow(image_vis[:, :, (2, 1, 0)]) plt.show()
def request_crnn_predict(image_path): image = cv2.imread(image_path, cv2.IMREAD_COLOR) new_height = 32 scale_rate = new_height / image.shape[0] new_width = int(scale_rate * image.shape[1]) #new_width = new_width if new_width > CFG.ARCH.INPUT_SIZE[0] else CFG.ARCH.INPUT_SIZE[0] new_width = new_width if new_width > 100 else 100 image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR) #image = cv2.resize(image, (100, 32), interpolation=cv2.INTER_LINEAR) image_vis = image image = np.array(image, np.float32) / 127.5 - 1.0 response = requests.post( SERVER_URL, data=json.dumps({ 'inputs': [image.tolist() ], # has to be in column format; not a fixed output size }), ) response.raise_for_status() outputs = response.json()['outputs'] # this part can likely be optimized, but oh well codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=CHAR_DICT_PATH, ord_map_dict_path=ORD_MAP_DICT_PATH, ) preds = codec.unpack_sparse_tensor_to_str( outputs['decodes_indices'], outputs['decodes_values'], outputs['decodes_dense_shape'], )[0] preds = ' '.join(wordninja.split(preds)) logger.info('Predict image {:s} result: {:s}'.format( ops.split(image_path)[1], preds))
def request_crnn_predict(image_path): """ request crnn predict :param image_path: :return: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) # constrain image input size to (100, 32) image = cv2.resize(image, tuple(CFG.ARCH.INPUT_SIZE), interpolation=cv2.INTER_LINEAR) image = np.array(image, np.float32) / 127.5 - 1.0 response = requests.post( SERVER_URL, data=json.dumps({ 'inputs': [image.tolist() ], # has to be in column format; not a fixed output size }), ) response.raise_for_status() outputs = response.json()['outputs'] # this part can likely be optimized, but oh well codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=CHAR_DICT_PATH, ord_map_dict_path=ORD_MAP_DICT_PATH, ) preds = codec.sparse_tensor_to_str_for_tf_serving( decode_indices=outputs['decodes_indices'], decode_values=outputs['decodes_values'], decode_dense_shape=outputs['decodes_dense_shape'], )[0] preds = ' '.join(wordninja.split(preds)) logger.info('Predict image {:s} result: {:s}'.format( ops.split(image_path)[1], preds))
from argparse import ArgumentParser import grpc import numpy as np import cv2 from tensorflow.contrib.util import make_tensor_proto from tensorflow_serving.apis import predict_pb2 from tensorflow_serving.apis import prediction_service_pb2_grpc from config import global_config from data_provider import tf_io_pipline_fast_tools CFG = global_config.cfg CODEC = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path='./data/char_dict/char_dict_en.json', ord_map_dict_path='./data/char_dict/ord_map_en.json') def parse_args(): """ :return: """ parser = ArgumentParser( description='Request a TensorFlow server for a prediction on the image' ) parser.add_argument('-s', '--server', dest='server', default='localhost:9000',
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, is_vis, is_english=True): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_vis: :return: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) new_heigth = 32 scale_rate = new_heigth / image.shape[0] new_width = int(scale_rate * image.shape[1]) new_width = new_width if new_width > CFG.ARCH.INPUT_SIZE[ 0] else CFG.ARCH.INPUT_SIZE[0] image = cv2.resize(image, (new_width, new_heigth), interpolation=cv2.INTER_LINEAR) image_vis = image image = np.array(image, np.float32) / 127.5 - 1.0 inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=10) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: [image]}) preds = codec.sparse_tensor_to_str(preds[0])[0] if is_english: preds = ' '.join(wordninja.split(preds)) logger.info('Predict image {:s} result: {:s}'.format( ops.split(image_path)[1], preds)) if is_vis: plt.figure('CRNN Model Demo') plt.imshow(image_vis[:, :, (2, 1, 0)]) plt.show() sess.close() return
def evaluate_shadownet(dataset_dir, weights_path, char_dict_path, ord_map_dict_path, is_visualize=False, is_process_all_data=False): """ :param dataset_dir: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_visualize: :param is_process_all_data: :return: """ # prepare dataset test_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='test') test_images, test_labels, test_images_paths = test_dataset.inputs( batch_size=CFG.TEST.BATCH_SIZE) # set up test sample count if is_process_all_data: log.info('Start computing test dataset sample counts') t_start = time.time() test_sample_count = test_dataset.sample_counts() log.info( 'Computing test dataset sample counts finished, cost time: {:.5f}'. format(time.time() - t_start)) num_iterations = int(math.ceil(test_sample_count / CFG.TEST.BATCH_SIZE)) else: num_iterations = 1 # declare crnn net shadownet = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) # set up decoder decoder = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) # compute inference result test_inference_ret = shadownet.inference(inputdata=test_images, name='shadow_net', reuse=False) test_decoded, test_log_prob = tf.nn.ctc_beam_search_decoder( test_inference_ret, CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TEST.BATCH_SIZE), beam_width=1, merge_repeated=False) # recover image from [-1.0, 1.0] ---> [0.0, 255.0] test_images = tf.multiply(tf.add(test_images, 1.0), 127.5, name='recoverd_test_images') # Set saver configuration saver = tf.train.Saver() # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) log.info('Start predicting...') per_char_accuracy = 0.0 full_sequence_accuracy = 0.0 total_labels_char_list = [] total_predictions_char_list = [] while True: try: for epoch in range(num_iterations): test_predictions_value, test_images_value, test_labels_value, \ test_images_paths_value = sess.run( [test_decoded, test_images, test_labels, test_images_paths]) test_images_paths_value = np.reshape( test_images_paths_value, newshape=test_images_paths_value.shape[0]) test_images_paths_value = [ tmp.decode('utf-8') for tmp in test_images_paths_value ] test_images_names_value = [ ops.split(tmp)[1] for tmp in test_images_paths_value ] test_labels_value = decoder.sparse_tensor_to_str( test_labels_value) test_predictions_value = decoder.sparse_tensor_to_str( test_predictions_value[0]) per_char_accuracy += evaluation_tools.compute_accuracy( test_labels_value, test_predictions_value, display=False, mode='per_char') full_sequence_accuracy += evaluation_tools.compute_accuracy( test_labels_value, test_predictions_value, display=False, mode='full_sequence') for index, test_image in enumerate(test_images_value): log.info( 'Predict {:s} image with gt label: {:s} **** predicted label: {:s}' .format(test_images_names_value[index], test_labels_value[index], test_predictions_value[index])) if is_visualize: plt.imshow( np.array(test_image, np.uint8)[:, :, (2, 1, 0)]) plt.show() test_labels_char_list_value = [ s for s in test_labels_value[index] ] test_predictions_char_list_value = [ s for s in test_predictions_value[index] ] if not test_labels_char_list_value or not test_predictions_char_list_value: continue if len(test_labels_char_list_value) != len( test_predictions_char_list_value): min_length = min( len(test_labels_char_list_value), len(test_predictions_char_list_value)) test_labels_char_list_value = test_labels_char_list_value[: min_length - 1] test_predictions_char_list_value = test_predictions_char_list_value[: min_length - 1] assert len(test_labels_char_list_value) == len(test_predictions_char_list_value), \ log.error('{}, {}'.format(test_labels_char_list_value, test_predictions_char_list_value)) total_labels_char_list.extend( test_labels_char_list_value) total_predictions_char_list.extend( test_predictions_char_list_value) if is_visualize: plt.imshow( np.array(test_image, np.uint8)[:, :, (2, 1, 0)]) except tf.errors.OutOfRangeError: log.error('End of tfrecords sequence') break except Exception as err: log.error(err) break avg_per_char_accuracy = per_char_accuracy / num_iterations avg_full_sequence_accuracy = full_sequence_accuracy / num_iterations log.info('Mean test per char accuracy is {:5f}'.format( avg_per_char_accuracy)) log.info('Mean test full sequence accuracy is {:5f}'.format( avg_full_sequence_accuracy)) # compute confusion matrix cnf_matrix = confusion_matrix(total_labels_char_list, total_predictions_char_list) np.set_printoptions(precision=2) evaluation_tools.plot_confusion_matrix(cm=cnf_matrix, normalize=True) plt.show()
def train_shadownet(dataset_dir, weights_path, char_dict_path, ord_map_dict_path, need_decode=False): """ :param dataset_dir: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param need_decode: :return: """ # prepare dataset train_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='train' ) val_dataset = shadownet_data_feed_pipline.CrnnDataFeeder( dataset_dir=dataset_dir, char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path, flags='val' ) train_images, train_labels, train_images_paths = train_dataset.inputs( batch_size=CFG.TRAIN.BATCH_SIZE ) val_images, val_labels, val_images_paths = val_dataset.inputs( batch_size=CFG.TRAIN.BATCH_SIZE ) # declare crnn net shadownet = crnn_net.ShadowNet( phase='train', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) shadownet_val = crnn_net.ShadowNet( phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) # set up decoder decoder = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path ) # set up training graph with tf.device('/gpu:1'): # compute loss and seq distance train_inference_ret, train_ctc_loss = shadownet.compute_loss( inputdata=train_images, labels=train_labels, name='shadow_net', reuse=False ) val_inference_ret, val_ctc_loss = shadownet_val.compute_loss( inputdata=val_images, labels=val_labels, name='shadow_net', reuse=True ) train_decoded, train_log_prob = tf.nn.ctc_beam_search_decoder( train_inference_ret, CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TRAIN.BATCH_SIZE), merge_repeated=False ) val_decoded, val_log_prob = tf.nn.ctc_beam_search_decoder( val_inference_ret, CFG.ARCH.SEQ_LENGTH * np.ones(CFG.TRAIN.BATCH_SIZE), merge_repeated=False ) train_sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(train_decoded[0], tf.int32), train_labels), name='train_edit_distance' ) val_sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(val_decoded[0], tf.int32), val_labels), name='val_edit_distance' ) # set learning rate global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay( learning_rate=CFG.TRAIN.LEARNING_RATE, global_step=global_step, decay_steps=CFG.TRAIN.LR_DECAY_STEPS, decay_rate=CFG.TRAIN.LR_DECAY_RATE, staircase=CFG.TRAIN.LR_STAIRCASE) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): optimizer = tf.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9).minimize( loss=train_ctc_loss, global_step=global_step) # Set tf summary tboard_save_dir = 'tboard/crnn_syn90k' os.makedirs(tboard_save_dir, exist_ok=True) tf.summary.scalar(name='train_ctc_loss', tensor=train_ctc_loss) tf.summary.scalar(name='val_ctc_loss', tensor=val_ctc_loss) tf.summary.scalar(name='learning_rate', tensor=learning_rate) if need_decode: tf.summary.scalar(name='train_seq_distance', tensor=train_sequence_dist) tf.summary.scalar(name='val_seq_distance', tensor=val_sequence_dist) merge_summary_op = tf.summary.merge_all() # Set saver configuration saver = tf.train.Saver() model_save_dir = 'model/crnn_syn90k' os.makedirs(model_save_dir, exist_ok=True) #train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) #model_name = 'shadownet_{:s}.ckpt'.format(str(train_start_time)) model_name = 'shadownet.ckpt' model_save_path = ops.join(model_save_dir, model_name) # Set sess configuration sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TRAIN.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TRAIN.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) summary_writer = tf.summary.FileWriter(tboard_save_dir) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = CFG.TRAIN.EPOCHS with sess.as_default(): epoch = 0 if weights_path is None: logger.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: logger.info('Restore model from {:s}'.format(weights_path)) saver.restore(sess=sess, save_path=weights_path) epoch = sess.run(tf.train.get_global_step()) patience_counter = 1 cost_history = [np.inf] while epoch < train_epochs: epoch += 1 # setup early stopping if epoch > 1 and CFG.TRAIN.EARLY_STOPPING: # We always compare to the first point where cost didn't improve if cost_history[-1 - patience_counter] - cost_history[-1] > CFG.TRAIN.PATIENCE_DELTA: patience_counter = 1 else: patience_counter += 1 if patience_counter > CFG.TRAIN.PATIENCE_EPOCHS: logger.info("Cost didn't improve beyond {:f} for {:d} epochs, stopping early.". format(CFG.TRAIN.PATIENCE_DELTA, patience_counter)) break if need_decode and epoch % 500 == 0: # train part _, train_ctc_loss_value, train_seq_dist_value, \ train_predictions, train_labels_sparse, merge_summary_value = sess.run( [optimizer, train_ctc_loss, train_sequence_dist, train_decoded, train_labels, merge_summary_op]) train_labels_str = decoder.sparse_tensor_to_str(train_labels_sparse) train_predictions = decoder.sparse_tensor_to_str(train_predictions[0]) avg_train_accuracy = evaluation_tools.compute_accuracy(train_labels_str, train_predictions) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch_Train: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'.format( epoch + 1, train_ctc_loss_value, train_seq_dist_value, avg_train_accuracy)) # validation part val_ctc_loss_value, val_seq_dist_value, \ val_predictions, val_labels_sparse = sess.run( [val_ctc_loss, val_sequence_dist, val_decoded, val_labels]) val_labels_str = decoder.sparse_tensor_to_str(val_labels_sparse) val_predictions = decoder.sparse_tensor_to_str(val_predictions[0]) avg_val_accuracy = evaluation_tools.compute_accuracy(val_labels_str, val_predictions) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch_Val: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'.format( epoch + 1, val_ctc_loss_value, val_seq_dist_value, avg_val_accuracy)) else: _, train_ctc_loss_value, merge_summary_value = sess.run( [optimizer, train_ctc_loss, merge_summary_op]) if epoch % CFG.TRAIN.DISPLAY_STEP == 0: logger.info('Epoch_Train: {:d} cost= {:9f}'.format(epoch + 1, train_ctc_loss_value)) # record history train ctc loss cost_history.append(train_ctc_loss_value) # add training sumary summary_writer.add_summary(summary=merge_summary_value, global_step=epoch) if epoch % 2000 == 0: saver.save(sess=sess, save_path=model_save_path, global_step=epoch) return np.array(cost_history[1:]) # Don't return the first np.inf
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, output_path): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param output_path: :return: """ # read pdf image image = cv2.imread(image_path, cv2.IMREAD_COLOR) # split pdf image into row block pdf_image_row_blocks = split_pdf_image_into_row_image_block(image) # locate the text area in each row block pdf_image_text_areas = [] new_heigth = 32 max_text_area_length = -1 for index, row_block in enumerate(pdf_image_row_blocks): text_area = locate_text_area(row_block) text_area_height = text_area.shape[0] scale = new_heigth / text_area_height max_text_area_length = max(max_text_area_length, int(scale * text_area.shape[1])) pdf_image_text_areas.append(text_area) new_width = max_text_area_length new_width = new_width if new_width > CFG.ARCH.INPUT_SIZE[ 0] else CFG.ARCH.INPUT_SIZE[0] # definite the compute graph inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=1) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) pdf_recognize_results = [] for index, pdf_image_text_area in enumerate(pdf_image_text_areas): # resize text area into size (None, new_height) pdf_image_text_area_height = pdf_image_text_area.shape[0] scale = new_heigth / pdf_image_text_area_height new_width_tmp = int(scale * pdf_image_text_area.shape[1]) pdf_image_text_area = cv2.resize(pdf_image_text_area, (new_width_tmp, new_heigth), interpolation=cv2.INTER_LINEAR) # pad text area into size (new_width, new_height) if new_width_tmp < new_width if new_width_tmp < new_width: pad_area_width = new_width - new_width_tmp pad_area = np.zeros(shape=[new_heigth, pad_area_width, 3], dtype=np.uint8) + 255 pdf_image_text_area = np.concatenate( (pdf_image_text_area, pad_area), axis=1) pdf_image_text_area = np.array(pdf_image_text_area, np.float32) / 127.5 - 1.0 preds = sess.run(decodes, feed_dict={inputdata: [pdf_image_text_area]}) preds = codec.sparse_tensor_to_str(preds[0]) pdf_recognize_results.append(preds[0]) output_text = [] need_tab = True for index, pdf_text in enumerate(pdf_recognize_results): if need_tab: text_console_str = '---- {:s}'.format(pdf_text) text_file_str = ' {:s}'.format(pdf_text) print(text_console_str) output_text.append(text_file_str) need_tab = \ index < (len(pdf_recognize_results) - 1) and \ len(pdf_recognize_results[index + 1]) - len(pdf_text) > 10 else: text_console_str = '---- {:s}'.format(pdf_text) text_file_str = ' {:s}'.format(pdf_text) print(text_console_str) output_text.append(text_file_str) need_tab = \ index < (len(pdf_recognize_results) - 1) and \ len(pdf_recognize_results[index + 1]) - len(pdf_text) > 10 res = '\n'.join(output_text) with open(output_path, 'w') as file: file.writelines(res) return
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, is_vis, is_english=True): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_vis: :return: """ image = cv2.imread(image_path, cv2.IMREAD_COLOR) new_heigth = 32 scale_rate = new_heigth / image.shape[0] new_width = int(scale_rate * image.shape[1]) new_width = new_width if new_width > CFG.ARCH.INPUT_SIZE[0] else \ CFG.ARCH.INPUT_SIZE[0] # TODO: Fix it, force 100. new_width = 100 image = cv2.resize(image, (new_width, new_heigth), interpolation=cv2.INTER_LINEAR) image_vis = image image = np.array(image, np.float32) / 127.5 - 1.0 print(new_width, new_heigth) inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input' ) codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path ) net = crnn_net.ShadowNet( phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) inference_ret = net.inference( inputdata=inputdata, name='shadow_net', reuse=False ) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=True, beam_width=10 ) decode = decodes[0] print(decode) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decode, feed_dict={inputdata: [image]}) print(preds) preds = codec.sparse_tensor_to_str(preds)[0] if is_english: preds = ' '.join(wordninja.split(preds)) # return preds_evaluated input_graph_name = "input_graph.pb" output_graph_name = "output_graph.pb" export_dir = 'export' tf.train.write_graph(sess.graph, export_dir, input_graph_name) tf.logging.info("Write graph at %s." % os.path.join(export_dir, input_graph_name)) export_graph = tf.Graph() with export_graph.as_default(): freeze_graph.freeze_graph(input_graph=os.path.join(export_dir, input_graph_name), input_saver="", input_binary=False, input_checkpoint=weights_path, output_node_names='CTCBeamSearchDecoder', restore_op_name="", filename_tensor_name="", output_graph=os.path.join(export_dir, output_graph_name), clear_devices=True, initializer_nodes=None, variable_names_blacklist="") tf.logging.info("Export model at %s." % os.path.join(export_dir, output_graph_name)) logger.info('Predict image {:s} result: {:s}'.format( ops.split(image_path)[1], preds) ) if is_vis: plt.figure('CRNN Model Demo') plt.imshow(image_vis[:, :, (2, 1, 0)]) plt.show() sess.close() return
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :return: """ image_list =[] new_width_list = [] inputdata_list = [] for i in range(len(test_img)):#(quantity_of_files): print(i, 'image processing') #load and normalize an image one_of_images = test_img[i] image =cv2.imread(one_of_images, cv2.IMREAD_COLOR) new_heigth = 32 scale_rate = new_heigth / image.shape[0] new_width = int(scale_rate * image.shape[1]) new_width = CFG.ARCH.INPUT_SIZE[0]#new_width if new_width > CFG.ARCH.INPUT_SIZE[0] else CFG.ARCH.INPUT_SIZE[0] new_width_list.append(new_width) image = cv2.resize(image, (new_width, new_heigth), interpolation=cv2.INTER_LINEAR) image_list.append(np.array(image, np.float32) / 127.5 - 1.0) inputdata = tf.placeholder( dtype=tf.float32, shape=[1, new_heigth, new_width, CFG.ARCH.INPUT_CHANNELS], name='input' ) #inputdata_list.append(inputdata) codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path ) net = crnn_net.ShadowNet( phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=CFG.ARCH.NUM_CLASSES ) inference_ret = net.inference( inputdata=inputdata, name='shadow_net', reuse=tf.AUTO_REUSE #было#False ) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(new_width / 4) * np.ones(1), merge_repeated=False, beam_width=10 ) # config tf saver saver = tf.compat.v1.train.Saver() # config tf session sess_config = tf.compat.v1.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.compat.v1.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) for i in range(len(test_img)):#(quantity_of_files): #print('size of image', i+500, 'is',sys.getsizeof(image_list[i])) preds = sess.run(decodes, feed_dict={inputdata: [image_list[i]]}) #preds = sess.run(decodes, feed_dict={inputdata_list[i]: [image_list[i]]}) preds = codec.sparse_tensor_to_str(preds[0])[0] #print(i, 'image recognition result_txt is', preds) #print('size of preds', i+500, 'is',sys.getsizeof(preds)) result_txt.write(preds+ '\n') sess.close() del image_list del inputdata_list del new_width_list del net del saver del codec del inference_ret del decodes del inputdata return
def recognize(image_path, weights_path, char_dict_path, ord_map_dict_path, is_vis, is_english, txt_path): """ :param image_path: :param weights_path: :param char_dict_path: :param ord_map_dict_path: :param is_vis: :param is_english: :return: """ test_number = 20 print('Test file path {} '.format(txt_path)) NUM_CLASSES = get_num_class(char_dict_path) print('num_classes: ', NUM_CLASSES) with open(txt_path, 'r') as f1: linelist = f1.readlines() image_list = [] for i in range(test_number): image_path_temp = image_path + linelist[i].split(' ')[0] image_list.append((image_path_temp, linelist[i].split(' ')[1].replace( '\r', '').replace('\n', '').replace('\t', ''))) inputdata = tf.placeholder(dtype=tf.float32, shape=[ 1, CFG.ARCH.INPUT_SIZE[1], CFG.ARCH.INPUT_SIZE[0], CFG.ARCH.INPUT_CHANNELS ], name='input') codec = tf_io_pipline_fast_tools.CrnnFeatureReader( char_dict_path=char_dict_path, ord_map_dict_path=ord_map_dict_path) net = crnn_net.ShadowNet(phase='test', hidden_nums=CFG.ARCH.HIDDEN_UNITS, layers_nums=CFG.ARCH.HIDDEN_LAYERS, num_classes=NUM_CLASSES) inference_ret = net.inference(inputdata=inputdata, name='shadow_net', reuse=False) decodes, _ = tf.nn.ctc_beam_search_decoder( inputs=inference_ret, sequence_length=int(CFG.ARCH.INPUT_SIZE[0] / 4) * np.ones(1), merge_repeated=False, beam_width=10) # config tf saver saver = tf.train.Saver() # config tf session sess_config = tf.ConfigProto(allow_soft_placement=True) sess_config.gpu_options.per_process_gpu_memory_fraction = CFG.TEST.GPU_MEMORY_FRACTION sess_config.gpu_options.allow_growth = CFG.TEST.TF_ALLOW_GROWTH sess = tf.Session(config=sess_config) weights_path = tf.train.latest_checkpoint(weights_path) print('weights_path: ', weights_path) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) for image_name, label in image_list: image = cv2.imread(image_name, cv2.IMREAD_COLOR) image = cv2.resize(image, dsize=tuple(CFG.ARCH.INPUT_SIZE), interpolation=cv2.INTER_LINEAR) image_vis = image image = np.array(image, np.float32) / 127.5 - 1.0 preds = sess.run(decodes, feed_dict={inputdata: [image]}) preds = codec.sparse_tensor_to_str(preds[0])[0] if is_english: preds = ' '.join(wordninja.split(preds)) print('Label[{:20s}] Pred:[{:20s}]'.format(label, preds)) if is_vis: plt.figure('CRNN Model Demo') plt.imshow(image_vis[:, :, (2, 1, 0)]) plt.show() sess.close() return