def __call__(self, params, input_context=None, batch_size=None): input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(input_anchors, params['num_classes']) example_decoder = tf_example_decoder.TfExampleDecoder( include_mask='segmentation' in params['heads'], regenerate_source_id=params['regenerate_source_id']) batch_size = batch_size or params['batch_size'] seed = params.get('tf_random_seed', None) dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=self._is_training, seed=seed) if input_context: dataset = dataset.shard(input_context.num_input_pipelines, input_context.input_pipeline_id) # Prefetch data from files. def _prefetch_dataset(filename): if params.get('dataset_type', None) == 'sstable': pass else: dataset = tf.data.TFRecordDataset(filename).prefetch(1) return dataset dataset = dataset.interleave(_prefetch_dataset, num_parallel_calls=tf.data.AUTOTUNE, deterministic=bool(seed)) dataset = dataset.with_options(self.dataset_options) if self._is_training: dataset = dataset.shuffle(64, seed=seed) # Parse the fetched records to input tensors for model function. # pylint: disable=g-long-lambda if params.get('dataset_type', None) == 'sstable': map_fn = lambda key, value: self.dataset_parser( value, example_decoder, anchor_labeler, params) else: map_fn = lambda value: self.dataset_parser(value, example_decoder, anchor_labeler, params) # pylint: enable=g-long-lambda dataset = dataset.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE) dataset = dataset.prefetch(batch_size) dataset = dataset.batch(batch_size, drop_remainder=params['drop_remainder']) dataset = dataset.map( lambda *args: self.process_example(params, batch_size, *args)) dataset = dataset.prefetch(tf.data.AUTOTUNE) if self._is_training: dataset = dataset.repeat() if self._use_fake_data: # Turn this dataset into a semi-fake dataset which always loop at the # first batch. This reduces variance in performance and is useful in # testing. dataset = dataset.take(1).cache().repeat() return dataset
def __init__(self, iou_loss_type, min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size, **kwargs): super().__init__(**kwargs) self.iou_loss_type = iou_loss_type self.input_anchors = anchors.Anchors(min_level, max_level, num_scales, aspect_ratios, anchor_scale, image_size)
def tflite_pre_nms(params, cls_outputs, box_outputs): """Pre-NMS that is compatible with TFLite's custom NMS op. For details, see tensorflow/lite/kernels/detection_postprocess.cc Args: params: a dict of parameters. cls_outputs: a list of tensors for classes, each tensor denotes a level of logits with shape [1, H, W, num_class * num_anchors]. box_outputs: a list of tensors for boxes, each tensor ddenotes a level of boxes with shape [1, H, W, 4 * num_anchors]. Each box format is [y_min, x_min, y_max, x_man]. Returns: boxes: boxes encoded as {y_center, x_center, height, width} scores: scores converted from `cls_outputs` logits using sigmoid anchors: normalized anchors encoded as {y_center, x_center, height, width} """ cls_outputs = to_list(cls_outputs) box_outputs = to_list(box_outputs) cls_outputs, box_outputs = merge_class_box_level_outputs( params, cls_outputs, box_outputs) eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) # TODO(b/175166514): Consider computing Top-K boxes & anchors here. We don't # do this currently since the resultant graph does not support TFLite # delegates well. `topk_class_boxes` won't work as-is, since the outputs # will need to be modified appropriately for TFLite op's consumption. # TFLite's object detection APIs require normalized anchors. height, width = utils.parse_image_size(params['image_size']) normalize_factor = tf.constant([height, width, height, width], dtype=tf.float32) normalized_anchors = eval_anchors.boxes / normalize_factor decoded_anchors = anchors.decode_anchors_to_centersize( box_outputs, normalized_anchors) # convert logits to scores. scores = tf.math.sigmoid(cls_outputs) return box_outputs, scores, decoded_anchors
def test_parser(self): tf.random.set_seed(111111) params = hparams_config.get_detection_config( 'efficientdet-d0').as_dict() input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_labeler = anchors.AnchorLabeler(input_anchors, params['num_classes']) example_decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=params['regenerate_source_id']) tfrecord_path = test_util.make_fake_tfrecord(self.get_temp_dir()) dataset = tf.data.TFRecordDataset([tfrecord_path]) value = next(iter(dataset)) reader = dataloader.InputReader(tfrecord_path, True) result = reader.dataset_parser(value, example_decoder, anchor_labeler, params) self.assertEqual(len(result), 11)
def pre_nms(params, cls_outputs, box_outputs, topk=True): """Detection post processing before nms. It takes the multi-level class and box predictions from network, merge them into unified tensors, and compute boxes, scores, and classes. Args: params: a dict of parameters. cls_outputs: a list of tensors for classes, each tensor denotes a level of logits with shape [N, H, W, num_class * num_anchors]. box_outputs: a list of tensors for boxes, each tensor ddenotes a level of boxes with shape [N, H, W, 4 * num_anchors]. topk: if True, select topk before nms (mainly to speed up nms). Returns: A tuple of (boxes, scores, classes). """ # get boxes by apply bounding box regression to anchors. eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) cls_outputs, box_outputs = merge_class_box_level_outputs( params, cls_outputs, box_outputs) if topk: # select topK purely based on scores before NMS, in order to speed up nms. cls_outputs, box_outputs, classes, indices = topk_class_boxes( params, cls_outputs, box_outputs) anchor_boxes = tf.gather(eval_anchors.boxes, indices) else: anchor_boxes = eval_anchors.boxes classes = None boxes = anchors.decode_box_outputs(box_outputs, anchor_boxes) # convert logits to scores. scores = tf.math.sigmoid(cls_outputs) return boxes, scores, classes