def main(_): with tf.Graph().as_default(): out_shape=[FLAGS.train_image_size] * 2 image_input=tf.placeholder(tf.uint8, shape=(None, None, 3)) shape_input=tf.placeholder(tf.int32, shape=(2,)) features, output_shape=\ textboxes_plusplus_preprocessing.preprocess_for_eval( image_input, out_shape, data_format=FLAGS.data_format, output_rgb=False) features=tf.expand_dims(features, axis=0) # (1, ?, ?, 3) output_shape=tf.expand_dims(output_shape, axis=0) # (1, 2) with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): with tf.device('/cpu:0'): anchor_processor=\ anchor_manipulator.AnchorProcessor( positive_threshold=None, ignore_threshold=None, prior_scaling=config.PRIOR_SCALING) anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers=\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) # shape=(num_anchors_all_layers,). anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax,\ _=\ anchor_processor.get_all_anchors_all_layers( tf.squeeze(output_shape, axis=0), anchor_heights_all_layers, anchor_widths_all_layers, num_anchors_per_location_all_layers, config.ANCHOR_OFFSETS, config.VERTICAL_OFFSETS, config.ALL_LAYER_SHAPES, config.ALL_LAYER_STRIDES, [0.] * config.NUM_FEATURE_LAYERS, [False] * config.NUM_FEATURE_LAYERS) backbone=textboxes_plusplus_net.VGG16Backbone(FLAGS.data_format) feature_layers=backbone.forward(features, training=False) # shape=(num_features, # bs, # fh, # fw, # num_anchors_per_locations * 2 * num_offsets) location_predictions, class_predictions=\ textboxes_plusplus_net.multibox_head( feature_layers, FLAGS.num_classes, config.NUM_OFFSETS, num_anchors_per_location_all_layers, data_format=FLAGS.data_format) if FLAGS.data_format == 'channels_first': class_predictions=\ [tf.transpose(pred, [0, 2, 3, 1])\ for pred in class_predictions] location_predictions=\ [tf.transpose(pred, [0, 2, 3, 1])\ for pred in location_predictions] class_predictions=\ [tf.reshape(pred, [-1, FLAGS.num_classes])\ for pred in class_predictions] location_predictions=\ [tf.reshape(pred, [-1, config.NUM_OFFSETS])\ for pred in location_predictions] class_predictions=tf.concat(class_predictions, axis=0) location_predictions=tf.concat(location_predictions, axis=0) # total_parameters = 0 # for variable in tf.trainable_variables(): # # shape is an array of tf.Dimension # shape = variable.get_shape() # print(shape) # print(len(shape)) # variable_parameters = 1 # for dim in shape: # print(dim) # variable_parameters *= dim.value # print(variable_parameters) # total_parameters += variable_parameters # print(total_parameters) with tf.device('/cpu:0'): bboxes_pred, quadrilaterals_pred=\ anchor_processor.decode_anchors( location_predictions, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) selected_bboxes,\ selected_quadrilaterals,\ selected_scores=\ bbox_util.parse_by_class( tf.squeeze(output_shape, axis=0), class_predictions, bboxes_pred, quadrilaterals_pred, FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold) labels_list=[] scores_list=[] bboxes_list=[] quadrilaterals_list=[] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) quadrilaterals_list.append(selected_quadrilaterals[k]) all_labels=tf.concat(labels_list, axis=0) all_scores=tf.concat(scores_list, axis=0) all_bboxes=tf.concat(bboxes_list, axis=0) all_quadrilaterals=tf.concat(quadrilaterals_list, axis=0) saver=tf.train.Saver() with tf.Session() as sess: init=tf.global_variables_initializer() sess.run(init) saver.restore(sess, get_checkpoint()) total_time=0 # np_image=imread('./demo/' + FLAGS.image_file_name) image_files_name=sorted(os.listdir(FLAGS.source_directory)) for i, image_file_name in enumerate(image_files_name): np_image=imread(os.path.join(FLAGS.source_directory, image_file_name)) start_time=time.time() labels_,\ scores_,\ bboxes_,\ quadrilaterals_,\ output_shape_=\ sess.run([all_labels, all_scores, all_bboxes, all_quadrilaterals, output_shape], feed_dict={image_input : np_image, shape_input : np_image.shape[:-1]}) elapsed_time=time.time() - start_time print('{}: elapsed_time = {}'.format(i + 1, elapsed_time)) total_time+=elapsed_time bboxes_[:, 0]=bboxes_[:, 0] * np_image.shape[0] / output_shape_[0, 0] bboxes_[:, 1]=bboxes_[:, 1] * np_image.shape[1] / output_shape_[0, 1] bboxes_[:, 2]=bboxes_[:, 2] * np_image.shape[0] / output_shape_[0, 0] bboxes_[:, 3]=bboxes_[:, 3] * np_image.shape[1] / output_shape_[0, 1] quadrilaterals_[:, 0]=quadrilaterals_[:, 0] * np_image.shape[0] / output_shape_[0, 0] quadrilaterals_[:, 1]=quadrilaterals_[:, 1] * np_image.shape[1] / output_shape_[0, 1] quadrilaterals_[:, 2]=quadrilaterals_[:, 2] * np_image.shape[0] / output_shape_[0, 0] quadrilaterals_[:, 3]=quadrilaterals_[:, 3] * np_image.shape[1] / output_shape_[0, 1] quadrilaterals_[:, 4]=quadrilaterals_[:, 4] * np_image.shape[0] / output_shape_[0, 0] quadrilaterals_[:, 5]=quadrilaterals_[:, 5] * np_image.shape[1] / output_shape_[0, 1] quadrilaterals_[:, 6]=quadrilaterals_[:, 6] * np_image.shape[0] / output_shape_[0, 0] quadrilaterals_[:, 7]=quadrilaterals_[:, 7] * np_image.shape[1] / output_shape_[0, 1] # image_with_bboxes=\ # drawing_toolbox.draw_bboxes_on_image( # np_image.copy(), # labels_, # scores_, # bboxes_, # thickness=2) # imsave('./demo/' + FLAGS.image_file_name[:-4] + '_bboxes' + '.jpg', # image_with_bboxes) image_with_quadrilaterals=\ drawing_toolbox.draw_quadrilaterals_on_image( np_image.copy(), labels_, scores_, quadrilaterals_, thickness=2) imsave(FLAGS.storage_directory + image_file_name[:-4] + '_quadrilaterals' + '.jpg', image_with_quadrilaterals) y1, x1, y2, x2,\ y3, x3, y4, x4=[int(e) for e in quadrilaterals_[0, :]] topLeftVertex = [x1, y1] topRightVertex = [x2, y2] bottomLeftVertex = [x4, y4] bottomRightVertex = [x3, y3] ymin=int(round(bboxes_[0, 0])) xmin=int(round(bboxes_[0, 1])) ymax=int(round(bboxes_[0, 2])) xmax=int(round(bboxes_[0, 3])) PLATE_WIDTH = xmax - xmin PLATE_HEIGHT = ymax - ymin pts1 = np.float32([topLeftVertex, topRightVertex, bottomLeftVertex, bottomRightVertex]) pts2 = np.float32([[0, 0], [PLATE_WIDTH, 0], [0, PLATE_HEIGHT], [PLATE_WIDTH, PLATE_HEIGHT]]) M = cv2.getPerspectiveTransform(pts1, pts2) cropped_image = cv2.warpPerspective(np_image.copy(), M, (PLATE_WIDTH, PLATE_HEIGHT)) imsave(FLAGS.storage_directory + image_file_name[:-4] + '_cropped' + '.jpg', cropped_image) print('total_time: ', total_time)
def input_fn(): target_shape = [FLAGS.train_image_size] * 2 anchor_processor =\ anchor_manipulator.AnchorProcessor( positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=config.PRIOR_SCALING) # anchor_processor: Python object anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers =\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) # anchor_heights_all_layers: [1d-tf.constant tf.float32, # 1d-tf.constant tf.float32, # ...] # anchor_widths_all_layers: [1d-tf.constant tf.float32, # 1d-tf.constant tf.float32, # ...] # num_anchors_per_location_all_layers: [Python int, Python int, ...] anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax,\ inside_mask =\ anchor_processor.get_all_anchors_all_layers( target_shape, anchor_heights_all_layers, anchor_widths_all_layers, num_anchors_per_location_all_layers, config.ANCHOR_OFFSETS, config.VERTICAL_OFFSETS, config.ALL_LAYER_SHAPES, config.ALL_LAYER_STRIDES, [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS, [False] * config.NUM_FEATURE_LAYERS) # anchors_ymin: 1d-tf.Tensor(num_anchors_all_layers) tf.float32 # inside_mask: 1d-tf.Tensor(num_anchors_all_layers) tf.bool num_anchors_per_layer = [] for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES): _, _num_anchors_per_layer =\ anchor_processor.count_num_anchors_per_layer( num_anchors_per_location_all_layers[ind], layer_shape, name='count_num_anchors_per_layer_{}'.format(ind)) num_anchors_per_layer.append(_num_anchors_per_layer) # num_anchors_per_layer = [num_anchors_layer1, num_anchors_layer2, ...] # e.g., num_anchors_per_layer = [48 x 48 x 2 x 10, ...] def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_): return textboxes_plusplus_preprocessing.preprocess_image( image_, labels_, bboxes_, quadrilaterals_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_): return anchor_processor.encode_anchors( glabels_, gbboxes_, gquadrilaterals_, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask) image, _, shape, loc_targets, cls_targets, match_scores =\ dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info =\ {'decode_fn': lambda pred: anchor_processor.batch_decode_anchors( pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax), 'num_anchors_per_layer': num_anchors_per_layer, 'num_anchors_per_location_all_layers': num_anchors_per_location_all_layers} return image,\ {'shape': shape, # original shape from .tfrecord files 'loc_targets': loc_targets, # [bs, n_anchors, 12] 'cls_targets': cls_targets, # [bs, n_anchors] 'match_scores': match_scores # [bs, n_anchors] }
def main(_): with tf.Graph().as_default(): def split_image_into_overlapped_images(image, n, r): """TODO: Docstring for split_image_into_overlapped_images. :image: TODO :n: TODO :r: TODO :returns: TODO """ IH, IW = tf.shape(image)[0], tf.shape(image)[1] ny, nx = n ry, rx = r SH = tf.cast( tf.floordiv(tf.cast(IH, tf.float32), (ny - ny * ry + ry)), tf.int32) SW = tf.cast( tf.floordiv(tf.cast(IW, tf.float32), (nx - nx * rx + rx)), tf.int32) OH = tf.cast(ry * tf.cast(SH, tf.float32), tf.int32) OW = tf.cast(rx * tf.cast(SW, tf.float32), tf.int32) images = [] os = [] for i in range(ny): oy = i * (SH - OH) for j in range(nx): ox = j * (SW - OW) os.append([oy, ox]) images.append(image[oy:oy + SH, ox:ox + SW]) return [[image, tf.shape(image), o] for image, o in zip(images, os)] output_shape = [FLAGS.image_size] * 2 input_image = tf.placeholder(tf.uint8, shape=(None, None, 3)) # nr1 = [(2, 0.7), (4, 0.6), (8, 0.5)] # nr2 = [(4, 0.4)] # no1 # nr3 = [(4, 0.2)] # nr4 = [(4, 0.3)] # nr5 = [(4, 0.6)] # nr6 = [(4, 0.5)] # nr7 = [(8, 0.2)] # nr8 = [(8, 0.8)] # nr9 = [(8, 0.4)] # no1 # nr10 = [(2, 0.8)] # nr11 = [(2, 0.2)] # nr12 = [(2, 0.4)] # nr13 = [(2, 0.6)] # no1 # nr14 = [(2, 0.5)] # nr15 = [(2, 0.6), (4, 0.4)] # select_threshold = 0.5 nr16 = [(2, 0.6), (4, 0.4)] # select_threshold = 0.95 images, shapes, os =\ zip(*([[image, shape, o] for n, r in nr16 for image, shape, o in split_image_into_overlapped_images( input_image, (n, n), (r, r))] + [[input_image, tf.shape(input_image), [0, 0]]])) # images = [images[0], images[1]] # shapes = [shapes[0], shapes[1]] # os = [os[0], os[1]] oys, oxs = zip(*os) shapes = tf.stack(shapes) oys = tf.stack(oys) oxs = tf.stack(oxs) oys = tf.expand_dims(oys, -1) oxs = tf.expand_dims(oxs, -1) features = [] for image in images: features.append( textboxes_plusplus_preprocessing.preprocess_for_eval( image, None, None, output_shape, data_format=FLAGS.data_format, output_rgb=False)) features = tf.stack(features, axis=0) output_shape =\ tf.expand_dims( tf.constant(output_shape, dtype=tf.int32), axis=0) # (1, 2) with tf.variable_scope(FLAGS.model_scope, default_name=None, values=[features], reuse=tf.AUTO_REUSE): with tf.device('/cpu:0'): anchor_processor =\ anchor_manipulator.AnchorProcessor( positive_threshold=None, ignore_threshold=None, prior_scaling=config.PRIOR_SCALING) anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers =\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) # anchor_heights_all_layers: [1d-tf.constant tf.float32, # 1d-tf.constant tf.float32, # ...] # anchor_widths_all_layers: [1d-tf.constant tf.float32, # 1d-tf.constant tf.float32, # ...] # num_anchors_per_location_all_layers: # [Python int, Python int, ...] anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax, _ =\ anchor_processor.get_all_anchors_all_layers( tf.squeeze(output_shape, axis=0), anchor_heights_all_layers, anchor_widths_all_layers, num_anchors_per_location_all_layers, config.ANCHOR_OFFSETS, config.VERTICAL_OFFSETS, config.ALL_LAYER_SHAPES, config.ALL_LAYER_STRIDES, [0.] * config.NUM_FEATURE_LAYERS, [False] * config.NUM_FEATURE_LAYERS) # anchors_ymin: 1d-tf.Tensor(num_anchors_all_layers) tf.float32 backbone =\ textboxes_plusplus_net.VGG16Backbone(FLAGS.data_format) feature_layers = backbone.forward(features, training=False) # shape = (num_feature_layers, # BS, # FH, # FW, # feature_depth) location_predictions, class_predictions =\ textboxes_plusplus_net.multibox_head( feature_layers, FLAGS.num_classes, config.NUM_OFFSETS, num_anchors_per_location_all_layers, data_format=FLAGS.data_format) # shape = (num_feature_layers, # bs, # fh, # fw, # num_anchors_per_loc * 2 * num_offsets) if FLAGS.data_format == 'channels_first': class_predictions =\ [tf.transpose(pred, [0, 2, 3, 1]) for pred in class_predictions] location_predictions =\ [tf.transpose(pred, [0, 2, 3, 1]) for pred in location_predictions] class_predictions =\ [tf.reshape(pred, [len(images), -1, FLAGS.num_classes]) for pred in class_predictions] location_predictions =\ [tf.reshape(pred, [len(images), -1, config.NUM_OFFSETS]) for pred in location_predictions] # shape = (num_feature_layers, # bs, # fh * fw * num_anchors_per_loc * 2, # num_offsets) class_predictions = tf.concat(class_predictions, axis=1) location_predictions = tf.concat(location_predictions, axis=1) # total_parameters = 0 # for variable in tf.trainable_variables(): # # shape is an array of tf.Dimension # shape = variable.get_shape() # print(shape) # print(len(shape)) # variable_parameters = 1 # for dim in shape: # print(dim) # variable_parameters *= dim.value # print(variable_parameters) # total_parameters += variable_parameters # print(total_parameters) with tf.device('/cpu:0'): bboxes_pred, quadrilaterals_pred =\ anchor_processor.batch_decode_anchors( location_predictions, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) bboxes_ymin =\ tf.cast(bboxes_pred[:, :, 0] * tf.expand_dims(tf.cast( tf.truediv(shapes[:, 0], output_shape[0, 0]), tf.float32 ), -1), tf.int32) + oys bboxes_xmin =\ tf.cast(bboxes_pred[:, :, 1] * tf.expand_dims(tf.cast( tf.truediv(shapes[:, 1], output_shape[0, 1]), tf.float32 ), -1), tf.int32) + oxs bboxes_ymax =\ tf.cast(bboxes_pred[:, :, 2] * tf.expand_dims(tf.cast( tf.truediv(shapes[:, 0], output_shape[0, 0]), tf.float32), -1), tf.int32) + oys bboxes_xmax =\ tf.cast(bboxes_pred[:, :, 3] * tf.expand_dims(tf.cast( tf.truediv(shapes[:, 1], output_shape[0, 1]), tf.float32), -1), tf.int32) + oxs bboxes_pred =\ tf.reshape( tf.stack([bboxes_ymin, bboxes_xmin, bboxes_ymax, bboxes_xmax], -1), shape=[-1, 4]) quadrilaterals_y1 =\ tf.cast( quadrilaterals_pred[:, :, 0] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 0], output_shape[0, 0]), tf.float32), -1), tf.int32) + oys quadrilaterals_x1 =\ tf.cast( quadrilaterals_pred[:, :, 1] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 1], output_shape[0, 1]), tf.float32), -1), tf.int32) + oxs quadrilaterals_y2 =\ tf.cast( quadrilaterals_pred[:, :, 2] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 0], output_shape[0, 0]), tf.float32), -1), tf.int32) + oys quadrilaterals_x2 =\ tf.cast( quadrilaterals_pred[:, :, 3] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 1], output_shape[0, 1]), tf.float32), -1), tf.int32) + oxs quadrilaterals_y3 =\ tf.cast( quadrilaterals_pred[:, :, 4] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 0], output_shape[0, 0]), tf.float32), -1), tf.int32) + oys quadrilaterals_x3 =\ tf.cast( quadrilaterals_pred[:, :, 5] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 1], output_shape[0, 1]), tf.float32), -1), tf.int32) + oxs quadrilaterals_y4 =\ tf.cast( quadrilaterals_pred[:, :, 6] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 0], output_shape[0, 0]), tf.float32), -1), tf.int32) + oys quadrilaterals_x4 =\ tf.cast( quadrilaterals_pred[:, :, 7] * tf.expand_dims( tf.cast(tf.truediv(shapes[:, 1], output_shape[0, 1]), tf.float32), -1), tf.int32) + oxs quadrilaterals_pred =\ tf.reshape( tf.stack([quadrilaterals_y1, quadrilaterals_x1, quadrilaterals_y2, quadrilaterals_x2, quadrilaterals_y3, quadrilaterals_x3, quadrilaterals_y4, quadrilaterals_x4], -1), shape=[-1, 8]) class_predictions = tf.reshape(class_predictions, shape=[-1, FLAGS.num_classes]) bboxes_pred = tf.cast(bboxes_pred, tf.float32) quadrilaterals_pred = tf.cast(quadrilaterals_pred, tf.float32) selected_bboxes,\ selected_quadrilaterals,\ selected_scores =\ bbox_util.parse_by_class( tf.shape(input_image)[:2], class_predictions, bboxes_pred, quadrilaterals_pred, FLAGS.num_classes, FLAGS.select_threshold, FLAGS.min_size, FLAGS.keep_topk, FLAGS.nms_topk, FLAGS.nms_threshold) labels_list = [] scores_list = [] bboxes_list = [] quadrilaterals_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) quadrilaterals_list.append(selected_quadrilaterals[k]) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(scores_list, axis=0) all_bboxes = tf.concat(bboxes_list, axis=0) all_quadrilaterals = tf.concat(quadrilaterals_list, axis=0) saver = tf.train.Saver() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) saver.restore(sess, get_checkpoint()) image_paths =\ sorted( [path for pattern in FLAGS.input_image_stem_patterns.split(',') for path in Path(FLAGS.input_image_root).glob(pattern)], key=lambda e: int(re.findall(r'(?<=_)\d+(?=.)', e.name)[0])) for i, image_path in enumerate(image_paths): # image = imread(str(image_path)) image =\ cv2.imread( str(image_path), cv2.IMREAD_IGNORE_ORIENTATION | cv2.IMREAD_COLOR )[:, :, ::-1] start_time = time.time() labels_,\ scores_,\ bboxes_,\ quadrilaterals_ =\ sess.run([all_labels, all_scores, all_bboxes, all_quadrilaterals, ], feed_dict={input_image: image}) elapsed_time = time.time() - start_time print('{}: elapsed_time = {}'.format(i + 1, elapsed_time)) annotation_file_name =\ 'task1_' + image_path.name.replace('.jpg', '.txt') with open( Path(FLAGS.output_directory).joinpath( annotation_file_name), 'w') as f: num_predicted_text_lines = np.shape(quadrilaterals_)[0] for i in range(num_predicted_text_lines): y1, x1, y2, x2,\ y3, x3, y4, x4 =\ [int(e) for e in quadrilaterals_[i, :]] score = float(scores_[i]) if (y1 == 0 and x1 == 0 and y2 == 0 and x2 == 0 and y3 == 0 and x3 == 0 and y4 == 0 and x4 == 0 and score == 0.0): continue f.write('{},{},{},{},{},{},{},{},{}\n'.format( x1, y1, x2, y2, x3, y3, x4, y4, score))
def main(_): target_shape=[FLAGS.train_image_size] * 2 anchor_processor=\ anchor_manipulator.AnchorProcessor( positive_threshold=None, ignore_threshold=None, prior_scaling=config.PRIOR_SCALING) anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers=\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) # shape=(num_anchors_all_layers,). fm=FLAGS.chosen_feature_map anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax=\ anchor_processor.get_all_anchors_one_layer( anchor_heights_all_layers[fm], anchor_widths_all_layers[fm], num_anchors_per_location_all_layers[fm], config.ALL_LAYER_SHAPES[fm], config.ALL_LAYER_STRIDES[fm], config.ANCHOR_OFFSETS[fm], config.VERTICAL_OFFSETS[fm], name=None) with tf.Session() as sess: # shape=(num_anchor_locations_per_feature_map, num_anchors_per_location). anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax=\ sess.run([anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax]) input_image=cv2.imread(FLAGS.image_path) input_image=cv2.resize( input_image, tuple(target_shape), interpolation=cv2.INTER_AREA) grid_drawed_image=draw_grid( image=input_image.copy(), grid_shape=config.ALL_LAYER_SHAPES[fm], color=(0, 0, 0)) cv2.imshow('grid_drawed_image', grid_drawed_image) num_anchor_locations_per_feature_map=len(anchors_ymin) num_anchors_per_location=len(anchors_ymin[0]) location_index=int(random.random() * num_anchor_locations_per_feature_map) anchor_index=int(random.random() * num_anchors_per_location) location_index=4 anchor_begin_index=0 anchor_end_index=4 anchors_ymin=anchors_ymin[location_index][anchor_begin_index:anchor_end_index] anchors_xmin=anchors_xmin[location_index][anchor_begin_index:anchor_end_index] anchors_ymax=anchors_ymax[location_index][anchor_begin_index:anchor_end_index] anchors_xmax=anchors_xmax[location_index][anchor_begin_index:anchor_end_index] # anchor=[anchor_ymin, anchor_xmin, anchor_ymax, anchor_xmax] anchors=[anchor for anchor in map(list, zip(*[anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax]))] anchor_drawed_image=draw_anchors( grid_drawed_image.copy(), anchors) cv2.imshow('anchor_drawed_image', anchor_drawed_image) cv2.imwrite('grid_' + str(FLAGS.chosen_feature_map) + '.jpg', grid_drawed_image) cv2.imwrite('grid_' + str(FLAGS.chosen_feature_map) + '_with_anchors' + '.jpg', anchor_drawed_image) cv2.waitKey(0) cv2.destroyAllWindows()
def model_fn(features, labels, mode, params): file_name = features['file_name'] file_name = tf.identity(file_name, name='file_name') shape = features['shape'] output_shape = features['output_shape'] image = features['image'] anchor_processor = anchor_manipulator.AnchorProcessor( positive_threshold=None, ignore_threshold=None, prior_scaling=config.PRIOR_SCALING) with tf.variable_scope(params['model_scope'], default_name=None, values=[image], reuse=tf.AUTO_REUSE): with tf.device('/cpu:0'): anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers=\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax,\ _=\ anchor_processor.get_all_anchors_all_layers( tf.squeeze(output_shape, axis=0), anchor_heights_all_layers, anchor_widths_all_layers, num_anchors_per_location_all_layers, config.ANCHOR_OFFSETS, config.VERTICAL_OFFSETS, config.ALL_LAYER_SHAPES, config.ALL_LAYER_STRIDES, [0.] * config.NUM_FEATURE_LAYERS, [False] * config.NUM_FEATURE_LAYERS) backbone=\ textboxes_plusplus_net.VGG16Backbone(params['data_format']) feature_layers = backbone.forward( image, training=(mode == tf.estimator.ModeKeys.TRAIN)) location_predictions, class_predictions=\ textboxes_plusplus_net.multibox_head( feature_layers, params['num_classes'], config.NUM_OFFSETS, num_anchors_per_location_all_layers, data_format=params['data_format']) if params['data_format'] == 'channels_first': location_predictions=\ [tf.transpose(pred, [0, 2, 3, 1])\ for pred in location_predictions] class_predictions=\ [tf.transpose(pred, [0, 2, 3, 1])\ for pred in class_predictions] location_predictions=\ [tf.reshape(pred, [tf.shape(image)[0], -1, config.NUM_OFFSETS])\ for pred in location_predictions] class_predictions=\ [tf.reshape(pred, [tf.shape(image)[0], -1, params['num_classes']])\ for pred in class_predictions] location_predictions = tf.concat(location_predictions, axis=1) class_predictions = tf.concat(class_predictions, axis=1) location_predictions = tf.reshape(location_predictions, [-1, config.NUM_OFFSETS]) class_predictions = tf.reshape(class_predictions, [-1, params['num_classes']]) with tf.device('/cpu:0'): bboxes_pred,\ quadrilaterals_pred=\ anchor_processor.decode_anchors( location_predictions, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax) selected_bboxes,\ selected_quadrilaterals,\ selected_scores=\ bbox_util.parse_by_class( tf.squeeze(output_shape, axis=0), class_predictions, bboxes_pred, quadrilaterals_pred, params['num_classes'], params['select_threshold'], params['min_size'], params['keep_topk'], params['nms_topk'], params['nms_threshold']) labels_list = [] scores_list = [] bboxes_list = [] quadrilaterals_list = [] for k, v in selected_scores.items(): labels_list.append(tf.ones_like(v, tf.int32) * k) scores_list.append(v) bboxes_list.append(selected_bboxes[k]) quadrilaterals_list.append(selected_quadrilaterals[k]) all_labels = tf.concat(labels_list, axis=0) all_scores = tf.concat(scores_list, axis=0) all_bboxes = tf.concat(bboxes_list, axis=0) all_quadrilaterals = tf.concat(quadrilaterals_list, axis=0) save_image_op=\ tf.py_func(save_image_with_labels, [textboxes_plusplus_preprocessing.unwhiten_image( tf.squeeze(image, axis=0), output_rgb=False), all_labels * tf.to_int32(all_scores > 0.3), all_scores, all_bboxes, all_quadrilaterals], tf.int64, stateful=True) tf.identity(save_image_op, name='save_image_op') predictions=\ {'file_name': file_name, 'shape': shape, 'output_shape': output_shape} for class_ind in range(1, params['num_classes']): predictions['scores_{}'.format(class_ind)]=\ tf.expand_dims(selected_scores[class_ind], axis=0) predictions['bboxes_{}'.format(class_ind)]=\ tf.expand_dims(selected_bboxes[class_ind], axis=0) predictions['quadrilaterals_{}'.format(class_ind)]=\ tf.expand_dims(selected_quadrilaterals[class_ind], axis=0) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions, prediction_hooks=None, loss=None, train_op=None) else: raise ValueError('This script only support "PREDICT" mode!')
def input_fn(): target_shape = [FLAGS.train_image_size] * 2 anchor_processor =\ anchor_manipulator.AnchorProcessor( positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=config.PRIOR_SCALING) anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers =\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) # shape = (num_anchors_all_layers,). anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax,\ inside_mask =\ anchor_processor.get_all_anchors_all_layers( target_shape, anchor_heights_all_layers, anchor_widths_all_layers, num_anchors_per_location_all_layers, config.ANCHOR_OFFSETS, config.VERTICAL_OFFSETS, config.ALL_LAYER_SHAPES, config.ALL_LAYER_STRIDES, [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS, [False] * config.NUM_FEATURE_LAYERS) num_anchors_per_layer = [] for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES): _, _num_anchors_per_layer =\ anchor_processor.count_num_anchors_per_layer( num_anchors_per_location_all_layers[ind], layer_shape, name='count_num_anchors_per_layer_{}'.format(ind)) num_anchors_per_layer.append(_num_anchors_per_layer) def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_): return textboxes_plusplus_preprocessing.preprocess_image( image_, labels_, bboxes_, quadrilaterals_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_): return anchor_processor.encode_anchors( glabels_, gbboxes_, gquadrilaterals_, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask) image, _, shape, loc_targets, cls_targets, match_scores =\ dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, (dataset_pattern[:-2]), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info =\ {'decode_fn': lambda pred: anchor_processor.batch_decode_anchors( pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax), 'num_anchors_per_layer': num_anchors_per_layer, 'num_anchors_per_location_all_layers': num_anchors_per_location_all_layers} return image,\ {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores }