def input_fn(): # We only support single batch when evaluation. assert (batch_size == 1) target_shape = [FLAGS.train_image_size] * 2 image_preprocessing_fn=\ lambda image_, labels_, bboxes_, quadrilaterals_:\ textboxes_plusplus_preprocessing.preprocess_image( image_, labels_, bboxes_, quadrilaterals_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) image, file_name, shape, output_shape=\ dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoding_fn=None, num_epochs=1, is_training=is_training) return { 'image': image, 'file_name': file_name, 'shape': shape, 'output_shape': output_shape }, None
def input_fn(): target_shape = [FLAGS.train_image_size] * 2 image_preprocessing_fn = lambda image_, label_: cls_preprocessing.preprocess_image(image_, label_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) if FLAGS.location_feature_stage is not None: _batch_size = int(batch_size/2) print('Use cls and reg, so the batch size of each task is {}'.format(_batch_size)) image1, _, cls_targets1, points1, is_reg1 = dataset_common.slim_get_batch( FLAGS.num_classes, _batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, FLAGS.cls_data_dir, dataset_pattern), int(FLAGS.num_readers/2), int(FLAGS.num_preprocessing_threads/2), image_preprocessing_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) image2, _, cls_targets2, points2, is_reg2 = dataset_common.slim_get_batch( FLAGS.num_classes, _batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, FLAGS.reg_data_dir, dataset_pattern), int(FLAGS.num_readers/2), int(FLAGS.num_preprocessing_threads/2), image_preprocessing_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) image, cls_targets, points, is_reg = [tf.concat([image1, image2],axis=0), tf.concat([cls_targets1, cls_targets2],axis=0), tf.concat([points1, points2],axis=0), tf.concat([is_reg1, is_reg2],axis=0),] else: _batch_size = batch_size image, _, cls_targets, points, is_reg = dataset_common.slim_get_batch( FLAGS.num_classes, _batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) return image, {'cls_targets': cls_targets, 'loc_targets': points, 'is_reg': is_reg}
def input_fn(dataset_pattern='val-*', batch_size=1, data_location=None): out_shape = [SSD_VGG16_IMAGE_SIZE] * 2 anchor_creator = anchor_manipulator.AnchorCreator( out_shape, layers_shapes=[(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ), (0.9, )], extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ), (0.6315, ), (0.8078, ), (0.9836, )], anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], layer_steps=[8, 16, 32, 64, 100, 300]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors( ) num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( allowed_borders=[1.0] * 6, positive_threshold=MATCH_THRESHOLD, ignore_threshold=NEG_THRESHOLD, prior_scaling=[0.1, 0.1, 0.2, 0.2]) def image_preprocessing_fn(image_, labels_, bboxes_): return ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=False, data_format=DATA_FORMAT, output_rgb=False) def anchor_encoder_fn(glabels_, gbboxes_): return anchor_encoder_decoder.encode_all_anchors( glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) image, filename, shape, loc_targets, cls_targets, match_scores = \ dataset_common.slim_get_batch(NUM_CLASSES, batch_size, 'val', os.path.join( data_location, dataset_pattern), NUM_READERS, NUM_PREPROCESSING_THREADS, image_preprocessing_fn, anchor_encoder_fn, num_epochs=1, is_training=False) return image, filename, shape
def input_fn(): target_shape = [FLAGS.train_image_size] * 2 anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(positive_threshold = FLAGS.match_threshold, ignore_threshold = FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) all_anchor_scales = [(16.,), (32.,), (64.,), (128.,), (256.,), (512.,)] all_extra_scales = [(), (), (), (), (), ()] all_anchor_ratios = [(1.,), (1.,), (1.,), (1.,), (1.,), (1.,)] all_layer_shapes = [(160, 160), (80, 80), (40, 40), (20, 20), (10, 10), (5, 5)] all_layer_strides = [4, 8, 16, 32, 64, 128] offset_list = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] total_layers = len(all_layer_shapes) anchors_height = list() anchors_width = list() anchors_depth = list() for ind in range(total_layers): _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height(all_anchor_scales[ind], all_extra_scales[ind], all_anchor_ratios[ind], name='get_anchors_width_height{}'.format(ind)) anchors_height.append(_anchors_height) anchors_width.append(_anchors_width) anchors_depth.append(_anchor_depth) anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask = anchor_encoder_decoder.get_all_anchors(target_shape, anchors_height, anchors_width, anchors_depth, offset_list, all_layer_shapes, all_layer_strides, [FLAGS.train_image_size * 1.] * total_layers, [False] * total_layers) num_anchors_per_layer = list() for ind, layer_shape in enumerate(all_layer_shapes): _, _num_anchors_per_layer = anchor_encoder_decoder.get_anchors_count(anchors_depth[ind], layer_shape, name='get_anchor_count{}'.format(ind)) num_anchors_per_layer.append(_num_anchors_per_layer) image_preprocessing_fn = lambda image_, bboxes_ : sfd_preprocessing.preprocess_image(image_, bboxes_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) anchor_encoder_fn = lambda gbboxes_: anchor_encoder_decoder.encode_anchors(gbboxes_, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask, match_mining=True) image, filename, shape, loc_targets, cls_targets, match_scores, _ = dataset_common.slim_get_batch(FLAGS.num_classes, batch_size, ('train' if is_training else 'valid'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.batch_decode_anchors(pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax), 'num_anchors_per_layer': num_anchors_per_layer, 'all_num_anchors_depth': anchors_depth } return image, {'filename': filename, 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}
def input_fn(): out_shape = [FLAGS.train_image_size] * 2 ssd300_anchor_params = {'layers_shapes': [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], 'anchor_scales': [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], 'extra_anchor_scales': [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], 'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], 'layer_steps': [8, 16, 32, 64, 100, 300]} ssd512_anchor_params = {'layers_shapes': [(64, 64), (32, 32), (16, 16), (8, 8), (4, 4), (2, 2), (1, 1)], 'anchor_scales': [(0.07,), (0.15,), (0.3,), (0.45,), (0.6,), (0.75,), (0.9,)], 'extra_anchor_scales': [(0.1025,), (0.2121,), (0.3674,), (0.5196,), (0.6708,), (0.8216,), (0.9721,)], 'anchor_ratios': [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], 'layer_steps': [8, 16, 32, 64, 128, 256, 512]} if FLAGS.train_image_size == 512: net_params = ssd512_anchor_params print('using ssd512 model') else: net_params = ssd300_anchor_params print('using ssd300 model') anchor_creator = anchor_manipulator.AnchorCreator(out_shape, **net_params) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * len(net_params['layer_steps']), positive_threshold = FLAGS.match_threshold, ignore_threshold = FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) image, filename, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info = {'decode_fn': lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer), 'num_anchors_per_layer': num_anchors_per_layer, 'all_num_anchors_depth': all_num_anchors_depth } return {'image': image, 'filename': filename, 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}, None
def input_fn(): assert batch_size==1, 'We only support single batch when evaluation.' target_shape = [FLAGS.train_image_size] * 2 image_preprocessing_fn = lambda image_, label_: cls_preprocessing.preprocess_image(image_, label_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) image, filename, label, points, is_reg = dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, num_epochs=1, is_training=is_training) return {'image': image, 'filename': filename, 'label': label, 'points':points, 'is_reg': is_reg}, None
def input_fn(): out_shape = [FLAGS.train_image_size] * 2 anchor_creator = anchor_manipulator.AnchorCreator(out_shape, layers_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)], anchor_scales = [(0.1,), (0.2,), (0.375,), (0.55,), (0.725,), (0.9,)], extra_anchor_scales = [(0.1414,), (0.2739,), (0.4541,), (0.6315,), (0.8078,), (0.9836,)], anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], layer_steps = [8, 16, 32, 64, 100, 300]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors() # all_anchors: [[(38x38x1),(38x38x1),(4x1),(4x1)],[(19x19x1),(19x19x1),(4x1),(4x1)]... ] -> recording all the anchors information num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder(allowed_borders = [1.0] * 6, positive_threshold = FLAGS.match_threshold, ignore_threshold = FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) image_preprocessing_fn = lambda image_, labels_, bboxes_ : ssd_preprocessing.preprocess_image(image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors(glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) anchor_decoder_fn = lambda pred : anchor_encoder_decoder.decode_all_anchors(pred, num_anchors_per_layer) image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch(FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info = {'decode_fn': anchor_decoder_fn, 'num_anchors_per_layer': num_anchors_per_layer, 'all_num_anchors_depth': all_num_anchors_depth } return image, {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores}
def input_fn(): out_shape = [args.train_image_size] * 2 anchor_creator = anchor_manipulator.AnchorCreator( out_shape, layers_shapes=[(50, 50), (25, 25), (13, 13), (7, 7), (3, 3), (3, 3)], anchor_scales=[(0.1, ), (0.2, ), (0.375, ), (0.55, ), (0.725, ), (0.9, )], extra_anchor_scales=[(0.1414, ), (0.2739, ), (0.4541, ), (0.6315, ), (0.8078, ), (0.9836, )], anchor_ratios=[(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)], layer_steps=[24, 48, 92, 171, 400, 400]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors( ) num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( allowed_borders=[1.0] * 6, positive_threshold=args.match_threshold, ignore_threshold=args.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image( image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=args.data_format, output_rgb=True) anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors( glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) image, filename, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch( args.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(args.data_dir, dataset_pattern), args.num_readers, args.num_preprocessing_threads_mine, image_preprocessing_fn, anchor_encoder_fn, num_epochs=args.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info = { 'decode_fn': lambda pred: anchor_encoder_decoder.decode_all_anchors( pred, num_anchors_per_layer), 'num_anchors_per_layer': num_anchors_per_layer, 'all_num_anchors_depth': all_num_anchors_depth } return { 'image': image, 'filename': filename, 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores }, None
def input_fn(): target_shape = [FLAGS.train_image_size] * 2 anchor_processor =\ anchor_manipulator.AnchorProcessor( positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=config.PRIOR_SCALING) # anchor_processor: Python object anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers =\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) # anchor_heights_all_layers: [1d-tf.constant tf.float32, # 1d-tf.constant tf.float32, # ...] # anchor_widths_all_layers: [1d-tf.constant tf.float32, # 1d-tf.constant tf.float32, # ...] # num_anchors_per_location_all_layers: [Python int, Python int, ...] anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax,\ inside_mask =\ anchor_processor.get_all_anchors_all_layers( target_shape, anchor_heights_all_layers, anchor_widths_all_layers, num_anchors_per_location_all_layers, config.ANCHOR_OFFSETS, config.VERTICAL_OFFSETS, config.ALL_LAYER_SHAPES, config.ALL_LAYER_STRIDES, [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS, [False] * config.NUM_FEATURE_LAYERS) # anchors_ymin: 1d-tf.Tensor(num_anchors_all_layers) tf.float32 # inside_mask: 1d-tf.Tensor(num_anchors_all_layers) tf.bool num_anchors_per_layer = [] for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES): _, _num_anchors_per_layer =\ anchor_processor.count_num_anchors_per_layer( num_anchors_per_location_all_layers[ind], layer_shape, name='count_num_anchors_per_layer_{}'.format(ind)) num_anchors_per_layer.append(_num_anchors_per_layer) # num_anchors_per_layer = [num_anchors_layer1, num_anchors_layer2, ...] # e.g., num_anchors_per_layer = [48 x 48 x 2 x 10, ...] def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_): return textboxes_plusplus_preprocessing.preprocess_image( image_, labels_, bboxes_, quadrilaterals_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_): return anchor_processor.encode_anchors( glabels_, gbboxes_, gquadrilaterals_, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask) image, _, shape, loc_targets, cls_targets, match_scores =\ dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info =\ {'decode_fn': lambda pred: anchor_processor.batch_decode_anchors( pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax), 'num_anchors_per_layer': num_anchors_per_layer, 'num_anchors_per_location_all_layers': num_anchors_per_location_all_layers} return image,\ {'shape': shape, # original shape from .tfrecord files 'loc_targets': loc_targets, # [bs, n_anchors, 12] 'cls_targets': cls_targets, # [bs, n_anchors] 'match_scores': match_scores # [bs, n_anchors] }
def input_fn(): out_shape = [300, 510] #[FLAGS.train_image_size] * 2 anchor_creator = anchor_manipulator.AnchorCreator( out_shape, layers_shapes=[(38, 64), (19, 32), (10, 16), (5, 8), (3, 6), (1, 4)], anchor_scales=[(0.05, ), (0.1, ), (0.2, ), (0.3, ), (0.4, ), (0.5, )], extra_anchor_scales=[(0.07, ), (0.1414, ), (0.245, ), (0.346, ), (0.447, ), (0.547, )], anchor_ratios=[(1., ), (1., ), (1., ), (1., ), (1., ), (1., )], #anchor_ratios = [(2., .5), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., 3., .5, 0.3333), (2., .5), (2., .5)], layer_steps=[8, 16, 32, 64, 100, 300]) all_anchors, all_num_anchors_depth, all_num_anchors_spatial = anchor_creator.get_all_anchors( ) num_anchors_per_layer = [] for ind in range(len(all_anchors)): num_anchors_per_layer.append(all_num_anchors_depth[ind] * all_num_anchors_spatial[ind]) anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( allowed_borders=[1.0] * 6, positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image( image_, labels_, bboxes_, out_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_all_anchors( glabels_, gbboxes_, all_anchors, all_num_anchors_depth, all_num_anchors_spatial) image, filename, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info = { 'decode_fn': lambda pred: anchor_encoder_decoder.decode_all_anchors( pred, num_anchors_per_layer), 'num_anchors_per_layer': num_anchors_per_layer, 'all_num_anchors_depth': all_num_anchors_depth } return { 'image': image, 'filename': filename, 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores }, None
def input_fn(): target_shape = [FLAGS.train_image_size] * 2 anchor_processor =\ anchor_manipulator.AnchorProcessor( positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=config.PRIOR_SCALING) anchor_heights_all_layers,\ anchor_widths_all_layers,\ num_anchors_per_location_all_layers =\ anchor_processor.get_anchors_size_all_layers( config.ALL_ANCHOR_SCALES, config.ALL_EXTRA_SCALES, config.ALL_ANCHOR_RATIOS, config.NUM_FEATURE_LAYERS) # shape = (num_anchors_all_layers,). anchors_ymin,\ anchors_xmin,\ anchors_ymax,\ anchors_xmax,\ inside_mask =\ anchor_processor.get_all_anchors_all_layers( target_shape, anchor_heights_all_layers, anchor_widths_all_layers, num_anchors_per_location_all_layers, config.ANCHOR_OFFSETS, config.VERTICAL_OFFSETS, config.ALL_LAYER_SHAPES, config.ALL_LAYER_STRIDES, [FLAGS.train_image_size * 1.] * config.NUM_FEATURE_LAYERS, [False] * config.NUM_FEATURE_LAYERS) num_anchors_per_layer = [] for ind, layer_shape in enumerate(config.ALL_LAYER_SHAPES): _, _num_anchors_per_layer =\ anchor_processor.count_num_anchors_per_layer( num_anchors_per_location_all_layers[ind], layer_shape, name='count_num_anchors_per_layer_{}'.format(ind)) num_anchors_per_layer.append(_num_anchors_per_layer) def image_preprocessing_fn(image_, labels_, bboxes_, quadrilaterals_): return textboxes_plusplus_preprocessing.preprocess_image( image_, labels_, bboxes_, quadrilaterals_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) def anchor_encoder_fn(glabels_, gbboxes_, gquadrilaterals_): return anchor_processor.encode_anchors( glabels_, gbboxes_, gquadrilaterals_, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask) image, _, shape, loc_targets, cls_targets, match_scores =\ dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, (dataset_pattern[:-2]), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info =\ {'decode_fn': lambda pred: anchor_processor.batch_decode_anchors( pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax), 'num_anchors_per_layer': num_anchors_per_layer, 'num_anchors_per_location_all_layers': num_anchors_per_location_all_layers} return image,\ {'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores }
def input_fn(): #train_imgage_size = 300 [300, 300] target_shape = [FLAGS.train_image_size] * 2 #match_threshold:0.5 #neg_threshold:0.5 anchor_encoder_decoder = anchor_manipulator.AnchorEncoder( positive_threshold=FLAGS.match_threshold, ignore_threshold=FLAGS.neg_threshold, prior_scaling=[0.1, 0.1, 0.2, 0.2]) all_anchor_scales = [(30., ), (60., ), (112.5, ), (165., ), (217.5, ), (270., )] all_extra_scales = [(42.43, ), (82.17, ), (136.23, ), (189.45, ), (242.34, ), (295.08, )] all_anchor_ratios = [(1., 2., .5), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., 3., .5, 0.3333), (1., 2., .5), (1., 2., .5)] all_layer_shapes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)] all_layer_strides = [8, 16, 32, 64, 100, 300] total_layers = len(all_layer_shapes) anchors_height = list() anchors_width = list() anchors_depth = list() for ind in range(total_layers): #若该层有n个default_prior_box则anchors_height是这些box的h,_anchor_depth是n _anchors_height, _anchors_width, _anchor_depth = anchor_encoder_decoder.get_anchors_width_height( all_anchor_scales[ind], all_extra_scales[ind], all_anchor_ratios[ind], name='get_anchors_width_height{}'.format(ind)) anchors_height.append(_anchors_height) anchors_width.append(_anchors_width) anchors_depth.append(_anchor_depth) #anchors_ymin: [38*38*4 + 19*19*6 + 10*10*6 + 5*5*6 + 3*3*4 + 1*!*4] anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask = anchor_encoder_decoder.get_all_anchors( target_shape, anchors_height, anchors_width, anchors_depth, [0.5] * total_layers, all_layer_shapes, all_layer_strides, [FLAGS.train_image_size * 1.] * total_layers, [False] * total_layers) num_anchors_per_layer = list() #all_layer_shapes [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)] for ind, layer_shape in enumerate(all_layer_shapes): #num_anchors_per_layer:layer_shape[0]*layer_layer[1]*anchors_depth _, _num_anchors_per_layer = anchor_encoder_decoder.get_anchors_count( anchors_depth[ind], layer_shape, name='get_anchor_count{}'.format(ind)) num_anchors_per_layer.append(_num_anchors_per_layer) #num_anchors_per_layer:[38*38*4, 19*19*6, 10*10*6, 5*5*6, 3*3*4, 1*!*4] image_preprocessing_fn = lambda image_, labels_, bboxes_: ssd_preprocessing.preprocess_image( image_, labels_, bboxes_, target_shape, is_training=is_training, data_format=FLAGS.data_format, output_rgb=False) anchor_encoder_fn = lambda glabels_, gbboxes_: anchor_encoder_decoder.encode_anchors( glabels_, gbboxes_, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax, inside_mask) image, _, shape, loc_targets, cls_targets, match_scores = dataset_common.slim_get_batch( FLAGS.num_classes, batch_size, ('train' if is_training else 'val'), os.path.join(FLAGS.data_dir, dataset_pattern), FLAGS.num_readers, FLAGS.num_preprocessing_threads, image_preprocessing_fn, anchor_encoder_fn, num_epochs=FLAGS.train_epochs, is_training=is_training) global global_anchor_info global_anchor_info = { 'decode_fn': lambda pred: anchor_encoder_decoder.batch_decode_anchors( pred, anchors_ymin, anchors_xmin, anchors_ymax, anchors_xmax), 'num_anchors_per_layer': num_anchors_per_layer, 'all_num_anchors_depth': anchors_depth } return image, { 'shape': shape, 'loc_targets': loc_targets, 'cls_targets': cls_targets, 'match_scores': match_scores }