def recognize(image_path: str, weights_path: str, files_limit=4): decoder = TextFeatureIO().reader images, filenames = load_images(image_path, files_limit) images = np.squeeze(images) padded_images = np.zeros([32, 32, 100, 3]) padded_images[:images.shape[0], :, :, :] = images tf.reset_default_graph() inputdata = tf.placeholder(dtype=tf.float32, shape=[32, 32, 100, 3], name='input') images_sh = tf.cast(x=inputdata, dtype=tf.float32) # build shadownet net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False) # config tf saver saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): # restore the model weights saver.restore(sess=sess, save_path=weights_path) print("Predict...") start_time = time() predictions = sess.run(decoded, feed_dict={inputdata: padded_images}) end_time = time() print("Prediction time: {}".format(end_time - start_time)) preds_res = decoder.sparse_tensor_to_str(predictions[0]) for i, fname in enumerate(filenames): print("{}: {}".format(fname, preds_res[i]))
def __init__(self, config: GlobalConfig, dataset_dir: str, weights_path: str = None): self._log = LogFactory.get_logger() self._dataset_dir = dataset_dir self._weights_path = weights_path self._config = config self._display_step = config.get_training_config().display_step self._decoder = TextFeatureIO().reader self._saver = None self._tboard_save_path = 'tboard' self._model_save_path = self._get_model_saver_path()
def __init__(self, tfrecords_path: str, weights_path: str, config: GlobalConfig): self._log = LogFactory.get_logger() self._tfrecords_path = tfrecords_path self._weights_path = weights_path self._batch_size = config.get_test_config().batch_size self._merge_repeated = config.get_test_config().merge_repeated_chars self._gpu_config = config.get_gpu_config() self._decoder = TextFeatureIO().reader self._recognition_time = None
def recognize(image_path: str, weights_path: str, config: GlobalConfig, is_vis=True): logger = LogFactory.get_logger() image = load_and_resize_image(image_path) inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input') net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=inputdata) decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=25 * np.ones(1), merge_repeated=False) decoder = TextFeatureIO() # config tf session sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = config.get_gpu_config( ).memory_fraction sess_config.gpu_options.allow_growth = config.get_gpu_config( ).is_tf_growth_allowed() # config tf saver saver = tf.train.Saver() sess = tf.Session(config=sess_config) with sess.as_default(): saver.restore(sess=sess, save_path=weights_path) preds = sess.run(decodes, feed_dict={inputdata: image}) preds = decoder.writer.sparse_tensor_to_str(preds[0]) logger.info('Predict image {:s} label {:s}'.format( ops.split(image_path)[1], preds[0])) if is_vis: plt.figure('CRNN Model Demo') plt.imshow( cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)]) plt.show() sess.close()
class CrnnTrainer: def __init__(self, config: GlobalConfig, dataset_dir: str, weights_path: str = None): self._log = LogFactory.get_logger() self._dataset_dir = dataset_dir self._weights_path = weights_path self._config = config self._display_step = config.get_training_config().display_step self._decoder = TextFeatureIO().reader self._saver = None self._tboard_save_path = 'tboard' self._model_save_path = self._get_model_saver_path() def train(self): training_config = self._config.get_training_config() images, labels = self._build_data_feed(training_config.batch_size) net_out = self._build_net_model(images) cost = tf.reduce_mean( tf.nn.ctc_loss(labels=labels, inputs=net_out, sequence_length=25 * np.ones(training_config.batch_size))) decoded, _ = tf.nn.ctc_beam_search_decoder( net_out, 25 * np.ones(training_config.batch_size), merge_repeated=False) sequence_dist = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), labels)) global_step = tf.Variable(0, name='global_step', trainable=False) starter_learning_rate = training_config.learning_rate learning_rate = tf.train.exponential_decay( starter_learning_rate, global_step, training_config.lr_decay_steps, training_config.lr_decay_rate, staircase=True) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): #optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) optimizer = tf.train.RMSPropOptimizer( learning_rate=learning_rate).minimize(loss=cost, global_step=global_step) merge_summary_op = self._configure_tf_summary(cost, learning_rate, sequence_dist) self._saver = tf.train.Saver(max_to_keep=20) sess = self._create_session() summary_writer = tf.summary.FileWriter(self._tboard_save_path) summary_writer.add_graph(sess.graph) # Set the training parameters train_epochs = training_config.epochs with sess.as_default(): self._initialize_model(sess) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for epoch in range(train_epochs): self._train_epoch(sess, summary_writer, epoch, optimizer, cost, sequence_dist, decoded, labels, merge_summary_op) coord.request_stop() coord.join(threads=threads) sess.close() self._log.info('Training finished.') def _train_epoch(self, sess, summary_writer, epoch, optimizer, cost, sequence_dist, decoded, input_labels, merge_summary_op): _, c, seq_distance, preds_r, gt_labels_r, summary = sess.run([ optimizer, cost, sequence_dist, decoded, input_labels, merge_summary_op ]) preds = self._decoder.sparse_tensor_to_str(preds_r[0]) gt_labels = self._decoder.sparse_tensor_to_str(gt_labels_r) accuracy = get_batch_accuracy(preds, gt_labels) mean_accuracy = calculate_array_mean(accuracy) self._log_epoch_stats(c, epoch, mean_accuracy, seq_distance) summary_writer.add_summary(summary=summary, global_step=epoch) self._saver.save(sess=sess, save_path=self._model_save_path, global_step=epoch) def _log_epoch_stats(self, c, epoch, mean_accuracy, seq_distance): if epoch % self._display_step == 0: self._log.info( 'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}' .format(epoch + 1, c, seq_distance, mean_accuracy)) def _create_session(self): gpu_config = self._config.get_gpu_config() sess_config = tf.ConfigProto() sess_config.gpu_options.per_process_gpu_memory_fraction = gpu_config.memory_fraction sess_config.gpu_options.allow_growth = gpu_config.is_tf_growth_allowed( ) return tf.Session(config=sess_config) def _build_data_feed(self, batch_size): self._log.info('Build data feed...') images, labels, _ = self._decoder.read_features( ops.join(self._dataset_dir, 'train_feature.tfrecords')) inputdata, input_labels = tf.train.shuffle_batch( tensors=[images, labels], batch_size=batch_size, capacity=1000 + 2 * 32, min_after_dequeue=100, num_threads=4) inputdata = tf.cast(x=inputdata, dtype=tf.float32) return inputdata, input_labels def _build_net_model(self, input_data): self._log.info('Build net model...') crnn = CRNN(phase='Train', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow', reuse=False): net_out = crnn.build(inputdata=input_data) return net_out @classmethod def _get_model_saver_path(cls): model_save_dir = 'model' if not ops.exists(model_save_dir): os.makedirs(model_save_dir) train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) model_name = 'crnn_{:s}.ckpt'.format(str(train_start_time)) return ops.join(model_save_dir, model_name) def _configure_tf_summary(self, cost, learning_rate, sequence_dist): self._log.info('Configure TF summary...') if not ops.exists(self._tboard_save_path): os.makedirs(self._tboard_save_path) tf.summary.scalar(name='Cost', tensor=cost) tf.summary.scalar(name='Learning_Rate', tensor=learning_rate) tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist) return tf.summary.merge_all() def _initialize_model(self, sess): if self._weights_path is None: self._log.info('Training from scratch') init = tf.global_variables_initializer() sess.run(init) else: self._log.info('Restore model from {:s}'.format( self._weights_path)) self._saver.restore(sess=sess, save_path=self._weights_path)
def recognize(image_path: str, weights_path: str, output_file: str, files_limit=32): decoder = TextFeatureIO().reader #Read all the files in the images folder files = [ join(image_path, f) for f in listdir(image_path) if isfile(join(image_path, f)) ][:] tf.reset_default_graph() inputdata = tf.placeholder(dtype=tf.float32, shape=[BATCH_SIZE, 32, 100, 3], name='input') images_sh = tf.cast(x=inputdata, dtype=tf.float32) # build shadownet net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37) with tf.variable_scope('shadow'): net_out = net.build(inputdata=images_sh) #top_paths=NUMBER_OF_PREDICTIONS is the number of words to predict decoded, log_probabilities = tf.nn.ctc_beam_search_decoder( net_out, 25 * np.ones(BATCH_SIZE), merge_repeated=False, top_paths=NUMBER_OF_PREDICTIONS) # config tf saver saver = tf.train.Saver() sess = tf.Session() with sess.as_default(): # restore the model weights #print('TFVERSION',tf.__version__) print("Restoring trained model") saver.restore(sess=sess, save_path=weights_path) print("Predicting {} images in chunks of {}".format( len(files), BATCH_SIZE)) starting_time = time() #Run inference in groups of BATCH_SIZE images #Run it with all the files from the provided folder for group in chunker(files, BATCH_SIZE): start_time = time() images, filenames = load_images(group, files_limit) images = np.squeeze(images) padded_images = np.zeros([BATCH_SIZE, 32, 100, 3]) padded_images[:images.shape[0], :, :, :] = images predictions, probs = sess.run([decoded, log_probabilities], feed_dict={inputdata: padded_images}) for i, fname in enumerate(filenames): result = '' #log_probabilities is recomputed for softmax probs e_x = np.exp(probs[i, :]) / np.sum(np.exp(probs[i, :])) #build the array of N predictions for each image for x in range(NUMBER_OF_PREDICTIONS): preds_res2 = decoder.sparse_tensor_to_str(predictions[x]) result = result + ',{:s},{:f}'.format( preds_res2[i], e_x[x]) #output string formatting and writing to csv file result = (basename(fname) + result) with open(output_file, 'a') as f: f.write(result) f.write('\n') end_time = time() print("Prediction time for {} images: {}".format( BATCH_SIZE, end_time - start_time)) print("Total prediction time: {}".format(end_time - starting_time)) print("Predictions saved in file {}".format(output_file))