def test(): with tf.name_scope('test'): image = tf.placeholder(dtype=tf.int32, shape = [None, None, 3]) image_shape = tf.placeholder(dtype = tf.int32, shape = [3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image(image, None, None, None, None, out_shape = config.image_shape, data_format = config.data_format, is_training = False) b_image = tf.expand_dims(processed_image, axis = 0) net = pixel_link_symbol.PixelLinkNet(b_image, is_training = False) global_step = slim.get_or_create_global_step() sess_config = tf.ConfigProto(log_device_placement = False, allow_soft_placement = True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction; checkpoint_dir = util.io.get_dir(FLAGS.checkpoint_path) logdir = util.io.join_path(checkpoint_dir, 'test', FLAGS.dataset_name + '_' +FLAGS.dataset_split_name) # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list = variables_to_restore) image_names = util.io.ls(FLAGS.dataset_dir) image_names.sort() checkpoint = FLAGS.checkpoint_path checkpoint_name = util.io.get_filename(str(checkpoint)) dump_path = util.io.join_path(logdir, checkpoint_name) txt_path = util.io.join_path(dump_path,'test') with tf.Session(config = sess_config) as sess: saver.restore(sess, checkpoint) for iter, image_name in enumerate(tqdm(image_names)): image_data = util.img.imread( util.io.join_path(FLAGS.dataset_dir, image_name), rgb = True) image_name = image_name.split('.')[0] pixel_pos_scores, link_pos_scores = sess.run( [net.pixel_pos_scores, net.link_pos_scores], feed_dict = { image:image_data }) to_txt(txt_path, image_name, image_data, pixel_pos_scores, link_pos_scores)
def load_net_for_inference(): global_step = slim.get_or_create_global_step() with tf.name_scope( 'output' ): # evaluation_%dx%d'%(FLAGS.eval_image_height, FLAGS.eval_image_width)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) #masks = pixel_link.tf_decode_score_map_to_mask_in_batch( # net.pixel_pos_scores, net.link_pos_scores) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) return net, saver, image
def __init__(self): self.image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( self.image, None, None, None, None, out_shape=(768, 768), data_format='NHWC', is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss self.net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) self.masks = pixel_link.tf_decode_score_map_to_mask_in_batch( self.net.pixel_pos_scores, self.net.link_pos_scores)
def main(_): image_shape = (FLAGS.export_image_height, FLAGS.export_image_width) config.load_config(FLAGS.train_dir) config.init_config(image_shape, batch_size = 1, pixel_conf_threshold = 0.8, link_conf_threshold = 0.8, num_gpus = 1, ) image = tf.placeholder(dtype=tf.int32, shape = [None, None, 3], name='input_image') processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image(image, None, None, None, None, out_shape = config.image_shape, data_format = config.data_format, is_training = False) b_image = tf.expand_dims(processed_image, axis = 0) net = pixel_link_symbol.PixelLinkNet(b_image, is_training = True) pixel_pos_scores = tf.identity(net.pixel_pos_scores, name='pixel_pos_scores') link_pos_scores = tf.identity(net.link_pos_scores, name='link_pos_scores') saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) checkpoint_exists = ckpt and ckpt.model_checkpoint_path if not checkpoint_exists: tf.logging.info('Checkpoint not exists in FLAGS.train_dir') return with tf.Session() as sess: saver.restore(sess, ckpt.model_checkpoint_path) output_graph_def = graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names=['pixel_pos_scores', 'link_pos_scores']) with tf.gfile.FastGFile(FLAGS.output_file, mode='wb+') as f: print('write file : ' + FLAGS.output_file) ss = output_graph_def.SerializeToString() f.write(output_graph_def.SerializeToString()) print('Write finish!')
def test(): with tf.name_scope('test'): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) net = pixel_link_symbol.PixelLinkNet(b_image, is_training=True) global_step = slim.get_or_create_global_step() sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) video_names = util.io.ls(FLAGS.dataset_dir) video_names.sort() checkpoint = FLAGS.checkpoint_path checkpoint_dir = util.io.get_dir(FLAGS.checkpoint_path) output_dir = util.io.get_dir(FLAGS.output_dir) with tf.Session(config=sess_config) as sess: saver.restore(sess, checkpoint) for iter, video_name in enumerate(video_names): basename = os.path.splitext(os.path.basename(video_name))[0] vidcap = cv2.VideoCapture( util.io.join_path(FLAGS.dataset_dir, video_name)) length = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT)) step = 100 success, image_data = vidcap.read() count = 1 while success: if count % step != 0: success, image_data = vidcap.read() count += 1 continue pixel_pos_scores, link_pos_scores = sess.run( [net.pixel_pos_scores, net.link_pos_scores], feed_dict={image: image_data}) image_name = basename + '-' + str(count) + '.jpg' to_txt(output_dir, image_name, image_data, pixel_pos_scores, link_pos_scores) success, image_data = vidcap.read() count += 1 print('%d/%d: %s' % (iter + 1, len(video_names), video_name))
def test(): checkpoint_dir = util.io.get_dir(FLAGS.checkpoint_path) global_step = slim.get_or_create_global_step() with tf.name_scope('evaluation_%dx%d' % (FLAGS.eval_image_height, FLAGS.eval_image_width)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) masks = pixel_link.tf_decode_score_map_to_mask_in_batch( net.pixel_pos_scores, net.link_pos_scores) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) with tf.Session() as sess: saver.restore(sess, util.tf.get_latest_ckpt(FLAGS.checkpoint_path)) files = util.io.ls(FLAGS.dataset_dir) for image_name in files: file_path = util.io.join_path(FLAGS.dataset_dir, image_name) image_data = util.img.imread(file_path) link_scores, pixel_scores, mask_vals = sess.run( [net.link_pos_scores, net.pixel_pos_scores, masks], feed_dict={image: image_data}) h, w, _ = image_data.shape def resize(img): return util.img.resize(img, size=(w, h), interpolation=cv2.INTER_NEAREST) def get_bboxes(mask): return pixel_link.mask_to_bboxes(mask, image_data.shape) def draw_bboxes(img, bboxes, color): for bbox in bboxes: points = np.reshape(bbox, [4, 2]) cnts = util.img.points_to_contours(points) util.img.draw_contours(img, contours=cnts, idx=-1, color=color, border_width=1) image_idx = 0 pixel_score = pixel_scores[image_idx, ...] mask = mask_vals[image_idx, ...] bboxes_det = get_bboxes(mask) mask = resize(mask) pixel_score = resize(pixel_score) draw_bboxes(image_data, bboxes_det, util.img.COLOR_RGB_RED) # print util.sit(pixel_score) # print util.sit(mask) print util.sit(image_data)
def create_clones(batch_queue): with tf.device('/cpu:0'): global_step = slim.create_global_step() learning_rate = tf.constant(FLAGS.learning_rate, name='learning_rate') optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=FLAGS.momentum, name='Momentum') tf.summary.scalar('learning_rate', learning_rate) # place clones pixel_link_loss = 0 # for summary only gradients = [] for clone_idx, gpu in enumerate(config.gpus): do_summary = clone_idx == 0 # only summary on the first clone reuse = clone_idx > 0 with tf.variable_scope(tf.get_variable_scope(), reuse=reuse): with tf.name_scope(config.clone_scopes[clone_idx]) as clone_scope: with tf.device(gpu) as clone_device: b_image, b_pixel_cls_label, b_pixel_cls_weight, \ b_pixel_link_label, b_pixel_link_weight = batch_queue.dequeue() # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=True) net.build_loss(pixel_cls_labels=b_pixel_cls_label, pixel_cls_weights=b_pixel_cls_weight, pixel_link_labels=b_pixel_link_label, pixel_link_weights=b_pixel_link_weight, do_summary=do_summary) # gather losses losses = tf.get_collection(tf.GraphKeys.LOSSES, clone_scope) assert len(losses) == 2 total_clone_loss = tf.add_n(losses) / config.num_clones pixel_link_loss += total_clone_loss # gather regularization loss and add to clone_0 only if clone_idx == 0: regularization_loss = tf.add_n( tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES)) total_clone_loss = total_clone_loss + regularization_loss # compute clone gradients clone_gradients = optimizer.compute_gradients( total_clone_loss) gradients.append(clone_gradients) tf.summary.scalar('pixel_link_loss', pixel_link_loss) tf.summary.scalar('regularization_loss', regularization_loss) # add all gradients together # note that the gradients do not need to be averaged, because the average operation has been done on loss. averaged_gradients = sum_gradients(gradients) apply_grad_op = optimizer.apply_gradients(averaged_gradients, global_step=global_step) train_ops = [apply_grad_op] bn_update_op = util.tf.get_update_op() if bn_update_op is not None: train_ops.append(bn_update_op) # moving average if FLAGS.using_moving_average: tf.logging.info('using moving average in training, \ with decay = %f' % (FLAGS.moving_average_decay)) ema = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay) ema_op = ema.apply(tf.trainable_variables()) with tf.control_dependencies([apply_grad_op]): # ema after updating train_ops.append(tf.group(ema_op)) train_op = control_flow_ops.with_dependencies(train_ops, pixel_link_loss, name='train_op') return train_op
def create_model(): output_graph = 'frozen_model.pb' config_initialization() global_step = slim.get_or_create_global_step() with tf.name_scope('evaluation_%dx%d' % (FLAGS.eval_image_height, FLAGS.eval_image_width)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3], name='net/input_images') image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=[360, 640], data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) # b_image, b_pixel_cls_label, b_pixel_cls_weight, \ # b_pixel_link_label, b_pixel_link_weight = batch_queue.dequeue() # net.build_loss( # pixel_cls_labels, # pixel_cls_weights, # pixel_link_labels, # pixel_link_weights, # do_summary) variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) variables_to_restore[global_step.op.name] = global_step # open sess and then save the model. saver = tf.train.Saver() total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 print("name:", variable.name) print("shape:", shape) for dim in shape: variable_parameters *= dim.value print('variable:', variable_parameters) total_parameters += variable_parameters print('total:', total_parameters) # builder = tf.saved_model.builder.SavedModelBuilder('test/') with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # for op in sess.graph.get_operations(): # print(op.name) print(tf.contrib.framework.get_variables_to_restore()) # builder.add_meta_graph_and_variables(sess, # [tag_constants.TRAINING], # signature_def_map=None, # assets_collection=None) # tf.train.write_graph(sess.graph_def, './save/', 'mobile_net_0.01.pbtxt') saver.save(sess, 'save/mobile_net_0.01.ckpt') tf.train.write_graph(sess.graph_def, '.', 'mobile_net_0.01' + '.pb', as_text=False) graph_def = sess.graph_def from tensorflow.python.platform import gfile with gfile.GFile('./save/mobile_net_0.01.pbtxt', 'wb') as f: f.write(graph_def.SerializeToString())
def test(): checkpoint_dir = util.io.get_dir(FLAGS.checkpoint_path) global_step = slim.get_or_create_global_step() with tf.name_scope('evaluation_%dx%d' % (FLAGS.eval_image_height, FLAGS.eval_image_width)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image(image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) masks = pixel_link.tf_decode_score_map_to_mask_in_batch( net.pixel_pos_scores, net.link_pos_scores) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction; # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) with tf.Session() as sess: saver.restore(sess, util.tf.get_latest_ckpt(FLAGS.checkpoint_path)) model_dir = '/Users/ci.chen/src/pixel_link/conv2_2/' # Finally we serialize and dump the output graph to the filesystem files = util.io.ls(FLAGS.dataset_dir) rows = [["image", "id", "xMin", "xMax", "yMin", "yMax"]] for image_name in files: file_path = util.io.join_path(FLAGS.dataset_dir, image_name) image_data = util.img.imread(file_path) link_scores, pixel_scores, mask_vals = sess.run( [net.link_pos_scores, net.pixel_pos_scores, masks], feed_dict={image: image_data}) h, w, _ = image_data.shape def resize(img): return util.img.resize(img, size=(w, h), interpolation=cv2.INTER_NEAREST) def get_bboxes(mask): return pixel_link.mask_to_bboxes(mask, image_data.shape) def draw_bboxes(img, bboxes, color): for bbox in bboxes: points = np.reshape(bbox, [4, 2]) cnts = util.img.points_to_contours(points) util.img.draw_contours(img, contours=cnts, idx=-1, color=color, border_width=1) def get_box_info(img, bboxes, name): boxes = [] for id, bbox in enumerate(bboxes): points = np.reshape(bbox, [4, 2]) x = [points[0][0], points[1][0], points[2][0], points[3][0]] y = [points[0][1], points[1][1], points[2][1], points[3][1]] boxes.append([name, id + 1, min(x), max(x), min(y), max(y)]) return boxes image_idx = 0 pixel_score = pixel_scores[image_idx, ...] mask = mask_vals[image_idx, ...] bboxes_det = get_bboxes(mask) mask = resize(mask) pixel_score = resize(pixel_score) bbox = get_box_info(image_data, bboxes_det, image_name) rows += bbox draw_bboxes(image_data, bboxes_det, util.img.COLOR_RGB_RED) # print util.sit(pixel_score) # print util.sit(mask) print(util.sit(image_data)) def writeCSV(boxes): with open('/Users/ci.chen/temp/no-use/images/result.csv', 'w') as File: writer = csv.writer(File) writer.writerows(boxes) writeCSV(rows)
def test(): with tf.name_scope('test'): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) net = pixel_link_symbol.PixelLinkNet(b_image, is_training=True) global_step = slim.get_or_create_global_step() sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) sess_config.gpu_options.allow_growth = True checkpoint_dir = util.io.get_dir(FLAGS.checkpoint_path) logdir = util.io.join_path( checkpoint_dir, 'test', FLAGS.dataset_name + '_' + FLAGS.dataset_split_name) # Variables to restore: moving avg. or normal weights. variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore[global_step.op.name] = global_step saver = tf.train.Saver(var_list=variables_to_restore) image_names = util.io.ls(FLAGS.dataset_dir) image_names.sort() checkpoint = FLAGS.checkpoint_path checkpoint_name = util.io.get_filename(str(checkpoint)) dump_path = util.io.join_path(logdir, checkpoint_name) txt_path = util.io.join_path(dump_path, 'txt') zip_path = util.io.join_path(dump_path, checkpoint_name + '_det.zip') with tf.Session(config=sess_config) as sess: saver.restore(sess, checkpoint) for iter, image_name in enumerate(image_names): image_data = util.img.imread(util.io.join_path( FLAGS.dataset_dir, image_name), rgb=True) image_name = image_name.split('.')[0] pixel_pos_scores, link_pos_scores = sess.run( [net.pixel_pos_scores, net.link_pos_scores], feed_dict={image: image_data}) print '%d/%d: %s' % (iter + 1, len(image_names), image_name) to_txt(txt_path, image_name, image_data, pixel_pos_scores, link_pos_scores) # create zip file for icdar2015 cmd = 'cd %s;zip -j %s %s/*' % (dump_path, zip_path, txt_path) print cmd util.cmd.cmd(cmd) print "zip file created: ", util.io.join_path(dump_path, zip_path)
def test(checkpoint_path): with tf.name_scope('test'): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) net = pixel_link_symbol.PixelLinkNet(b_image, is_training=True) global_step = slim.get_or_create_global_step() sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction checkpoint_dir = util.io.get_dir(checkpoint_path) logdir = util.io.join_path( checkpoint_dir, 'test', FLAGS.dataset_name + '_' + FLAGS.dataset_split_name) # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) image_names = util.io.ls(FLAGS.dataset_dir) image_names.sort() checkpoint_name = util.io.get_filename(str(checkpoint_path)) dump_path = util.io.join_path(logdir, checkpoint_name) txt_path = util.io.join_path(dump_path, 'txt') zip_path = util.io.join_path(dump_path, checkpoint_name + '_det.zip') with tf.Session(config=sess_config) as sess: saver.restore(sess, checkpoint_path) for iter, image_name in enumerate(image_names): image_data = util.img.imread(util.io.join_path( FLAGS.dataset_dir, image_name), rgb=True) scale = calculate_scale(image_data) image_data = cv2.resize( image_data, (FLAGS.eval_image_width, FLAGS.eval_image_height), interpolation=cv2.INTER_AREA) image_name = image_name.split('.')[0] score_nodes = [net.pixel_pos_scores, net.link_pos_scores] if not net.pixel_pos_scores_add is None: score_nodes.extend( [net.pixel_pos_scores_add, net.link_pos_scores_add]) #pixel_pos_scores, link_pos_scores = sess.run(, results = sess.run(score_nodes, feed_dict={image: image_data}) print '%d/%d: %s' % (iter + 1, len(image_names), image_name) to_txt(txt_path, image_name, image_data, results, scale) # create zip file for icdar2015 cmd = 'cd %s;zip -j %s %s/*' % (dump_path, zip_path, txt_path) print cmd util.cmd.cmd(cmd) print "zip file created: ", util.io.join_path(dump_path, zip_path)
def test(): outfile = os.path.join(FLAGS.output_dir, 'DECT_result.txt') if os.path.exists(outfile): os.remove(outfile) wfile = open(outfile, 'w') # print ">> scale_resize", FLAGS.scale_resize, type(FLAGS.scale_resize) avg_conf_thresh = float(FLAGS.pixel_conf_threshold + FLAGS.link_conf_threshold) / 2 global_step = slim.get_or_create_global_step() # with tf.name_scope('evaluation_%dx%d'%(FLAGS.eval_image_height, FLAGS.eval_image_width)): with tf.name_scope('evaluation_%dx%d' % (0000, 0000)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, do_resize=False, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) masks = pixel_link.tf_decode_score_map_to_mask_in_batch( net.pixel_pos_scores, net.link_pos_scores) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() timer = [[], [], [], [], []] ## load_image, pad_image, inference, cal_box, total saver = tf.train.Saver(var_list=variables_to_restore) with tf.Session() as sess: saver.restore(sess, FLAGS.checkpoint_path) files = os.listdir(FLAGS.dataset_dir) for image_name in files: sp1 = time.time() file_path = os.path.join(FLAGS.dataset_dir, image_name) origin_image_data = cv2.imread(file_path) sp2 = time.time() '''padding to avoid distort''' # image_data = cv_pad(image_data, config.image_shape) if FLAGS.scale_resize != 1: image_data = scale_resize(origin_image_data, FLAGS.scale_resize) else: image_data = origin_image_data sp3 = time.time() link_scores, pixel_scores, mask_vals = sess.run( [net.link_pos_scores, net.pixel_pos_scores, masks], feed_dict={image: image_data}) h, w, _ = image_data.shape sp4 = time.time() def resize(img): return cv2.resize(img, size=(w, h), interpolation=cv2.INTER_NEAREST) def get_bboxes(mask): return pixel_link.mask_to_bboxes(mask, image_data.shape) def points_to_contour(points): contours = [[list(p)] for p in points] return np.asarray(contours, dtype=np.int32) def points_to_contours(points): return np.asarray([points_to_contour(points)]) def draw_bboxes(img, bboxes, color): for bbox in bboxes: points = np.reshape(bbox, [4, 2]) cnts = points_to_contours(points) cv2.drawContours(img, contours=cnts, idx=-1, color=color, border_width=1) image_idx = 0 pixel_score = pixel_scores[image_idx, ...] mask = mask_vals[image_idx, ...] bboxes_det = get_bboxes(mask) _bboxes_det = revert_dectbox(bboxes_det, FLAGS.scale_resize) sp5 = time.time() # print ">> bboxes_det:",type(bboxes_det), bboxes_det # print ">> _bboxes_det:",type(_bboxes_det), _bboxes_det mask = resize(mask) pixel_score = resize(pixel_score) draw_bboxes(origin_image_data, _bboxes_det, (0, 0, 255)) cv2.imwrite( os.path.join(FLAGS.output_dir, 'out_' + os.path.basename(file_path)), origin_image_data) nameID = image_name.split('.')[0] for bbox in _bboxes_det: # print "nameID, bbox", nameID, bbox _bbox = [] for num in bbox: _bbox.append(num) wfile.write("{}\t{}\t{}\n".format(nameID, avg_conf_thresh, _bbox)) ## timer accumulate timer[0].append(sp2 - sp1) timer[1].append(sp3 - sp2) timer[2].append(sp4 - sp3) timer[3].append(sp5 - sp4) timer[4].append(sp5 - sp1) print "{}:{}\t{}:{}\t{}:{}\t{}:{}\t{}:{}\n".format('Load', round(sp2-sp1,3), 'Pad', round(sp3-sp2,3), \ 'Infer', round(sp4-sp3,3), 'Post', round(sp5-sp4,3), 'Total', round(sp5-sp1,3)) print "\nAvg Timer Stat:" print "{}:{}\t{}:{}\t{}:{}\t{}:{}\t{}:{}\n".format('Load', round(np.mean(timer[0]),3), 'Pad', round(np.mean(timer[1]),3), \ 'Infer', round(np.mean(timer[2]),3), 'Post', round(np.mean(timer[3]),3), 'Total', round(np.mean(timer[4]),3)) wfile.close()
def test(): checkpoint_dir = util.io.get_dir(FLAGS.checkpoint_path) global_step = slim.get_or_create_global_step() with tf.name_scope('evaluation_%dx%d' % (FLAGS.eval_image_height, FLAGS.eval_image_width)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) masks = pixel_link.tf_decode_score_map_to_mask_in_batch( net.pixel_pos_scores, net.link_pos_scores) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) with tf.Session() as sess: saver.restore(sess, util.tf.get_latest_ckpt(FLAGS.checkpoint_path)) files = util.io.ls(FLAGS.dataset_dir) for image_name in files: file_path = util.io.join_path(FLAGS.dataset_dir, image_name) image_data = util.img.imread(file_path) link_scores, pixel_scores, mask_vals = sess.run( [net.link_pos_scores, net.pixel_pos_scores, masks], feed_dict={image: image_data}) h, w, _ = image_data.shape def resize(img): return util.img.resize(img, size=(w, h), interpolation=cv2.INTER_NEAREST) def get_bboxes(mask): return pixel_link.mask_to_bboxes(mask, image_data.shape) def draw_bboxes(img, bboxes, color): for bbox in bboxes: points = np.reshape(bbox, [4, 2]) cnts = util.img.points_to_contours(points) util.img.draw_contours(img, contours=cnts, idx=-1, color=color, border_width=1) image_idx = 0 pixel_score = pixel_scores[image_idx, ...] mask = mask_vals[image_idx, ...] bboxes_det = get_bboxes(mask) mask = resize(mask) pixel_score = resize(pixel_score) import os ID = file_path.split('/')[-1].split('.')[0] ''' txt_file = os.path.join('/data/VOC/train/tax_2/Txts3', '%s.txt' % ID) with open(txt_file, 'w') as f: count = 0 for box in bboxes_det: count = 1 x1, y1, x2, y2, x3, y3, x4, y4 = box l_1 = int(math.sqrt((x1-x2)**2 (y1-y2)**2)) l_2 = int(math.sqrt((x2-x3)**2 (y2-y3)**2)) pts1 = np.float32([[box[0], box[1]], [box[2], box[3]], [box[6], box[7]], [box[4], box[5]]]) if l_1 < l_2: width = l_2 height = l_1 pts2 = np.float32([[0, 0], [height, 0], [0, width], [height, width]]) M = cv2.getPerspectiveTransform(pts1, pts2) ROI = cv2.warpPerspective(image_data, M, (height, width)) ROI = np.rot90(ROI) else: width = l_1 height = l_2 pts2 = np.float32([[0, 0], [width, 0], [0, height], [width, height]]) M = cv2.getPerspectiveTransform(pts1, pts2) ROI = cv2.warpPerspective(image_data, M, (width, height)) nh, nw, nc = ROI.shape # if nw /float(nh) > 5.: # cv2.imwrite('/data_sdd/crop/process_tax/crop_0104/vin_train/%s_%d.jpg' % (ID, count), ROI) f.write('%d,%d,%d,%d,%d,%d,%d,%d,vin\n' % (x1, y1, x2, y2, x3, y3, x4, y4)) ''' draw_bboxes(image_data, bboxes_det, util.img.COLOR_RGB_RED)
def text_detection(): cropped_dir = args.crop_dir if os.path.exists(cropped_dir): shutil.rmtree(cropped_dir) os.makedirs(cropped_dir) checkpoint_dir = util.io.get_dir(args.checkpoint_path) # global_step = slim.get_or_create_global_step() with tf.name_scope('evaluation_%dx%d' % (args.eval_image_height, args.eval_image_width)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) masks = pixel_link.tf_decode_score_map_to_mask_in_batch( net.pixel_pos_scores, net.link_pos_scores) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if args.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif args.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction # Variables to restore: moving avg. or normal weights. # if args.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( args.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) # variables_to_restore[global_step.op.name] = global_step # else: # variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) with tf.Session() as sess: saver.restore(sess, util.tf.get_latest_ckpt(args.checkpoint_path)) files = util.io.ls(args.dataset_dir) txt_folder = args.txt_dir if os.path.exists(txt_folder): shutil.rmtree(txt_folder) os.makedirs(txt_folder) for image_name in files: file_path = util.io.join_path(args.dataset_dir, image_name) image_format = [ '.jpg', '.JPG', '.png', '.PNG', 'jpeg', 'JPEG', '.gif', '.GIF' ] if file_path[-4:] in image_format: ### subfolder subfolder_name = image_name.replace('.jpg', '') subfolder_path = os.path.join(cropped_dir, subfolder_name) os.mkdir(subfolder_path) image_data = util.img.imread(file_path) ## list boxes coord_boxes = [] ## original width & height org_height = int(image_data.shape[0]) org_width = int(image_data.shape[1]) ### txt txt_name = image_name.replace('.jpg', '.txt') txt_path = os.path.join(txt_folder, txt_name) txt_file = open(txt_path, 'a') info_org_img = '{"image_name": ' + '"%s"' % image_name + ', ' + '"width":' + str( org_width) + ', ' + '"height": ' + str(org_height) + '}\n' txt_file.write(info_org_img) link_scores, pixel_scores, mask_vals = sess.run( [net.link_pos_scores, net.pixel_pos_scores, masks], feed_dict={image: image_data}) h, w, _ = image_data.shape def resize(img): return util.img.resize(img, size=(1280, 768), interpolation=cv2.INTER_NEAREST) def get_bboxes(mask): return pixel_link.mask_to_bboxes(mask, image_data.shape) def draw_bboxes(img, bboxes, color): i = 0 for bbox in bboxes: ### top_right -> top_left -> bottom_left -> bottom_right values = [int(v) for v in bbox] x_max = max( [values[0], values[2], values[4], values[6]]) x_min = min( [values[0], values[2], values[4], values[6]]) y_max = max( [values[1], values[3], values[5], values[7]]) y_min = min( [values[1], values[3], values[5], values[7]]) ### update coordiates x_max = int(x_max * org_width / 1280) x_min = int(x_min * org_width / 1280) y_max = int(y_max * org_height / 768) y_min = int(y_min * org_height / 768) h = y_max - y_min w = x_max - x_min top_left = (x_min - 7, y_min) bbox = [ x_max, y_min, x_min, y_min, x_min, y_max, x_max, y_max ] points = np.reshape(bbox, [4, 2]) cnts = util.img.points_to_contours(points) util.img.draw_contours(img, contours=cnts, idx=-1, color=color, border_width=1) new_img = img[(y_min):y_min + h, (x_min):x_min + w] tmp_1 = image_name.replace('.jpg', '') img_crop_name = tmp_1 + "_" + str(i) + '.jpg' img_crop_path = os.path.join(subfolder_path, img_crop_name) cv2.imwrite(img_crop_path, new_img) cv2.putText(img, '%s' % (str(i)), top_left, cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 128, 255), 1, lineType=cv2.LINE_AA) i = i + 1 ### txt txt_file = open(txt_path, 'a') info_crop_img = '{"image_name":' + '"%s"' % img_crop_name + ', ' + '"id": ' + str( i ) + ', ' + '"x": ' + str(x_min) + ', ' + '"y": ' + str( y_min) + ', ' + '"width": ' + str( w) + ", " + '"height": ' + str(h) + '}\n' # print (info_crop_img) txt_file.write(info_crop_img) txt_file.close() def get_temp_path(name=''): # _count = get_count(); img_name = "%s" % (image_name) path = os.path.join(args.visual_dir, img_name) path = path.replace('.jpg', '.png') return path def sit(img=None, format='rgb', path=None, name=""): if path is None: path = get_temp_path(name) if img is None: plt.save_image(path) return path if format == 'bgr': img = _img.bgr2rgb(img) if type(img) == list: plt.show_images(images=img, path=path, show=False, axis_off=True, save=True) else: plt.imwrite(path, img) return path image_idx = 0 pixel_score = pixel_scores[image_idx, ...] mask = mask_vals[image_idx, ...] ### bboxes_det = get_bboxes(mask) coord_boxes.append(bboxes_det) draw_bboxes(image_data, bboxes_det, util.img.COLOR_RGB_RED) print(sit(image_data)) else: continue
def test(): checkpoint_dir = util.io.get_dir(FLAGS.checkpoint_path) output_dir = FLAGS.output_path global_step = slim.get_or_create_global_step() with tf.name_scope('evaluation_%dx%d' % (FLAGS.eval_image_height, FLAGS.eval_image_width)): with tf.variable_scope(tf.get_variable_scope(), reuse=False): image = tf.placeholder(dtype=tf.int32, shape=[None, None, 3]) image_shape = tf.placeholder(dtype=tf.int32, shape=[ 3, ]) processed_image, _, _, _, _ = ssd_vgg_preprocessing.preprocess_image( image, None, None, None, None, out_shape=config.image_shape, data_format=config.data_format, is_training=False) b_image = tf.expand_dims(processed_image, axis=0) # build model and loss net = pixel_link_symbol.PixelLinkNet(b_image, is_training=False) masks = pixel_link.tf_decode_score_map_to_mask_in_batch( net.pixel_pos_scores, net.link_pos_scores) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) if FLAGS.gpu_memory_fraction < 0: sess_config.gpu_options.allow_growth = True elif FLAGS.gpu_memory_fraction > 0: sess_config.gpu_options.per_process_gpu_memory_fraction = FLAGS.gpu_memory_fraction # Variables to restore: moving avg. or normal weights. if FLAGS.using_moving_average: variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore( tf.trainable_variables()) variables_to_restore[global_step.op.name] = global_step else: variables_to_restore = slim.get_variables_to_restore() saver = tf.train.Saver(var_list=variables_to_restore) with tf.Session() as sess: saver.restore(sess, util.tf.get_latest_ckpt(FLAGS.checkpoint_path)) files = util.io.ls(FLAGS.dataset_dir) for image_name in files: if os.path.isfile(os.path.join(output_dir, image_name + ".png")): continue file_path = util.io.join_path(FLAGS.dataset_dir, image_name) image_data = util.img.imread(file_path) image_data, scale = resize_im(image_data, scale=768, max_scale=1280) start_tf_time = time.time() link_scores, pixel_scores, mask_vals = sess.run( [net.link_pos_scores, net.pixel_pos_scores, masks], feed_dict={image: image_data}) end_tf_time = time.time() f = open(os.path.join('pkl', image_name) + '.pkl', 'wb') cPickle.dump(link_scores, f, protocol=-1) cPickle.dump(pixel_scores, f, protocol=-1) cPickle.dump(mask_vals, f, protocol=-1) f.close() h, w, _ = image_data.shape def resize(img): return util.img.resize(img, size=(w, h), interpolation=cv2.INTER_NEAREST) def get_bboxes(mask): return pixel_link.mask_to_bboxes(mask, image_data.shape) def draw_bboxes(img, bboxes, color): for bbox in bboxes: points = np.reshape(bbox, [4, 2]) cnts = util.img.points_to_contours(points) util.img.draw_contours(img, contours=cnts, idx=-1, color=color, border_width=4) image_idx = 0 pixel_score = pixel_scores[image_idx, ...] mask = mask_vals[image_idx, ...] start_post_time = time.time() bboxes_det = get_bboxes(mask) end_post_time = time.time() print("Tensorflow inference time:", end_tf_time - start_tf_time) print("Post filtering time:", end_post_time - start_post_time) mask = resize(mask) pixel_score = resize(pixel_score) draw_bboxes(image_data, bboxes_det, util.img.COLOR_RGB_RED) # print util.sit(pixel_score) # print util.sit(mask) # output_dir = os.path.join("test_output",'%.1f'%FLAGS.pixel_conf_threshold+"_"+'%.1f'%FLAGS.pixel_conf_threshold) if not os.path.exists(output_dir): os.mkdir(output_dir) print util.sit(image_data, format='bgr', path=os.path.join(output_dir, image_name + ".png"))