def get_box_from_dataset(self, dataset): box_ls = None if not isinstance(dataset, list): dataset = [dataset] for ds in dataset: for el in ds: if type(box_ls) == type(None): box_ls = yxyx_to_xcycwh(el['groundtruth_boxes'])[..., 2:] else: box_ls = tf.concat([ box_ls, yxyx_to_xcycwh(el['groundtruth_boxes'])[..., 2:] ], axis=0) self._boxes = box_ls
def crop_filter_to_bbox(image, boxes, classes, target_width, target_height, offset_width, offset_height, fix=False): with tf.name_scope('resize_crop_filter'): shape = tf.shape(image) if tf.shape(shape)[0] == 4: height = shape[1] width = shape[2] else: # tf.shape(shape)[0] == 3: height = shape[0] width = shape[1] image = tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width) if fix: image = tf.image.pad_to_bounding_box(image, offset_height, offset_width, height, width) x_lower_bound = offset_width / width y_lower_bound = offset_height / height x_upper_bound = (offset_width + target_width) / width y_upper_bound = (offset_height + target_height) / height boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1) x_mask_lower = x > x_lower_bound y_mask_lower = y > y_lower_bound x_mask_upper = x < x_upper_bound y_mask_upper = y < y_upper_bound x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper) y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper) mask = tf.math.logical_and(x_mask, y_mask) x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) if not fix: x = (x - x_lower_bound) * tf.cast(width / target_width, x.dtype) y = (y - y_lower_bound) * tf.cast(height / target_height, y.dtype) w = w * tf.cast(width / target_width, w.dtype) h = h * tf.cast(height / target_height, h.dtype) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) return image, boxes, classes
def translate_boxes(box, classes, translate_x, translate_y): with tf.name_scope('translate_boxs'): box = box_ops.yxyx_to_xcycwh(box) x, y, w, h = tf.split(box, 4, axis=-1) x = x + translate_x y = y + translate_y x_mask_lower = x >= 0 y_mask_lower = y >= 0 x_mask_upper = x < 1 y_mask_upper = y < 1 x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper) y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper) mask = tf.math.logical_and(x_mask, y_mask) x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) box = tf.concat([x, y, w, h], axis=-1) box = box_ops.xcycwh_to_yxyx(box) return box, classes
def pad_filter_to_bbox(image, boxes, classes, target_width, target_height, offset_width, offset_height): with tf.name_scope('resize_crop_filter'): shape = tf.shape(image) if tf.shape(shape)[0] == 4: height = shape[1] width = shape[2] else: # tf.shape(shape)[0] == 3: height = shape[0] width = shape[1] image = tf.image.pad_to_bounding_box(image, offset_height, offset_width, target_height, target_width) x_lower_bound = tf.cast(offset_width / width, tf.float32) y_lower_bound = tf.cast(offset_height / height, tf.float32) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1) x = (x + x_lower_bound) * tf.cast(width / target_width, x.dtype) y = (y + y_lower_bound) * tf.cast(height / target_height, y.dtype) w = w * tf.cast(width / target_width, w.dtype) h = h * tf.cast(height / target_height, h.dtype) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) return image, boxes, classes
def cutmix_1(image_to_crop, boxes1, classes1, image_mask, boxes2, classes2, target_width, target_height, offset_width, offset_height): with tf.name_scope('cutmix'): image, boxes, classes = cut_out(image_mask, boxes2, classes2, target_width, target_height, offset_width, offset_height) image_, boxes_, classes_ = crop_filter_to_bbox(image_to_crop, boxes1, classes1, target_width, target_height, offset_width, offset_height, fix=True) image += image_ boxes = tf.concat([boxes, boxes_], axis=-2) classes = tf.concat([classes, classes_], axis=-1) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(boxes, 4, axis=-1) mask = x > 0 x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) return image, boxes, classes
def _parse_eval_data(self, data): """Generates images and labels that are usable for model training. Args: data: a dict of Tensors produced by the decoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ shape = tf.shape(data['image']) image = data['image'] / 255 boxes = data['groundtruth_boxes'] width = shape[0] height = shape[1] image, boxes = preprocessing_ops.fit_preserve_aspect_ratio( image, boxes, width=width, height=height, target_dim=self._image_w) boxes = box_utils.yxyx_to_xcycwh(boxes) best_anchors = preprocessing_ops.get_best_anchor(boxes, self._anchors, width=self._image_w, height=self._image_h) boxes = preprocessing_ops.pad_max_instances(boxes, self._max_num_instances, 0) classes = preprocessing_ops.pad_max_instances( data['groundtruth_classes'], self._max_num_instances, 0) best_anchors = preprocessing_ops.pad_max_instances( best_anchors, self._max_num_instances, 0) area = preprocessing_ops.pad_max_instances(data['groundtruth_area'], self._max_num_instances, 0) is_crowd = preprocessing_ops.pad_max_instances( tf.cast(data['groundtruth_is_crowd'], tf.int32), self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'area': tf.cast(area, self._dtype), 'is_crowd': is_crowd, 'best_anchors': tf.cast(best_anchors, self._dtype), 'width': width, 'height': height, 'num_detections': tf.shape(data['groundtruth_classes'])[0], } # if self._fixed_size: grid = self._build_grid(labels, self._image_w, batch=False, use_tie_breaker=self._use_tie_breaker) labels.update({'grid_form': grid}) return image, labels
def cutmix_batch(image, boxes, classes, target_width, target_height, offset_width, offset_height): with tf.name_scope('cutmix_batch'): image_, boxes_, classes_ = cut_out(image, boxes, classes, target_width, target_height, offset_width, offset_height) image__, boxes__, classes__ = crop_filter_to_bbox(image, boxes, classes, target_width, target_height, offset_width, offset_height, fix=True) mix = tf.random.uniform([], minval=0, maxval=1) if mix > 0.5: i_split1, i_split2 = tf.split(image__, 2, axis=0) b_split1, b_split2 = tf.split(boxes__, 2, axis=0) c_split1, c_split2 = tf.split(classes__, 2, axis=0) image__ = tf.concat([i_split2, i_split1], axis=0) boxes__ = tf.concat([b_split2, b_split1], axis=0) classes__ = tf.concat([c_split2, c_split1], axis=0) image = image_ + image__ boxes = tf.concat([boxes_, boxes__], axis=-2) classes = tf.concat([classes_, classes__], axis=-1) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(boxes, 4, axis=-1) mask = x > 0 x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) x = tf.squeeze(x, axis=-1) classes = tf.where(x == 0, -1, classes) num_detections = tf.reduce_sum(tf.cast(x > 0, tf.int32), axis=-1) return image, boxes, classes, num_detections
def cut_out(image_full, boxes, classes, target_width, target_height, offset_width, offset_height): shape = tf.shape(image_full) if tf.shape(shape)[0] == 4: width = shape[1] height = shape[2] else: # tf.shape(shape)[0] == 3: width = shape[0] height = shape[1] image_crop = tf.image.crop_to_bounding_box(image_full, offset_height, offset_width, target_height, target_width) + 1 image_crop = tf.ones_like(image_crop) image_crop = tf.image.pad_to_bounding_box(image_crop, offset_height, offset_width, height, width) image_crop = 1 - image_crop x_lower_bound = offset_width / width y_lower_bound = offset_height / height x_upper_bound = (offset_width + target_width) / width y_upper_bound = (offset_height + target_height) / height boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1) x_mask_lower = x > x_lower_bound y_mask_lower = y > y_lower_bound x_mask_upper = x < x_upper_bound y_mask_upper = y < y_upper_bound x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper) y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper) mask = tf.math.logical_not(tf.math.logical_and(x_mask, y_mask)) x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) image_full *= image_crop return image_full, boxes, classes
def fit_preserve_aspect_ratio(image, boxes, width=None, height=None, target_dim=None): if width is None or height is None: shape = tf.shape(data['image']) if tf.shape(shape)[0] == 4: width = shape[1] height = shape[2] else: width = shape[0] height = shape[1] clipper = tf.math.maximum(width, height) if target_dim is None: target_dim = clipper pad_width = clipper - width pad_height = clipper - height image = tf.image.pad_to_bounding_box(image, pad_width // 2, pad_height // 2, clipper, clipper) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(boxes, 4, axis=-1) y *= tf.cast(width / clipper, tf.float32) x *= tf.cast(height / clipper, tf.float32) y += tf.cast((pad_width / clipper) / 2, tf.float32) x += tf.cast((pad_height / clipper) / 2, tf.float32) h *= tf.cast(width / clipper, tf.float32) w *= tf.cast(height / clipper, tf.float32) boxes = tf.concat([x, y, w, h], axis=-1) boxes = box_ops.xcycwh_to_yxyx(boxes) image = tf.image.resize(image, (target_dim, target_dim)) return image, boxes
def _postprocess_fn(self, image, label): if self._cutmix: batch_size = tf.shape(image)[0] if batch_size >= 1: boxes = box_utils.xcycwh_to_yxyx(label['bbox']) classes = label['classes'] image, boxes, classes, num_detections = preprocessing_ops.randomized_cutmix_batch( image, boxes, classes) boxes = box_utils.yxyx_to_xcycwh(boxes) label['bbox'] = pad_max_instances( boxes, self._max_num_instances, pad_axis=-2, pad_value=0) label['classes'] = pad_max_instances( classes, self._max_num_instances, pad_axis=-1, pad_value=-1) randscale = self._image_w // self._net_down_scale if not self._fixed_size: do_scale = tf.greater( tf.random.uniform([], minval=0, maxval=1, seed=self._seed), 1 - self._pct_rand) if do_scale: randscale = tf.random.uniform([], minval=10, maxval=21, seed=self._seed, dtype=tf.int32) width = randscale * self._net_down_scale image = tf.image.resize(image, (width, width)) best_anchors = preprocessing_ops.get_best_anchor_batch( label['bbox'], self._anchors, width=self._image_w, height=self._image_h) label['best_anchors'] = pad_max_instances( best_anchors, self._max_num_instances, pad_axis=-2, pad_value=0) grid = self._build_grid( label, width, batch=True, use_tie_breaker=self._use_tie_breaker) label.update({'grid_form': grid}) label['bbox'] = box_utils.xcycwh_to_yxyx(label['bbox']) return image, label
def _parse_train_data(self, data): """Generates images and labels that are usable for model training. Args: data: a dict of Tensors produced by the decoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ image = data['image'] / 255 # / 255 boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] do_blur = tf.random.uniform([], minval=0, maxval=1, seed=self._seed, dtype=tf.float32) if do_blur > 0.9: image = tfa.image.gaussian_filter2d(image, filter_shape=7, sigma=15) elif do_blur > 0.7: image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=6) elif do_blur > 0.4: image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=3) image = tf.image.rgb_to_hsv(image) i_h, i_s, i_v = tf.split(image, 3, axis=-1) if self._aug_rand_hue: delta = preprocessing_ops.rand_uniform_strong( -0.1, 0.1 ) # tf.random.uniform([], minval= -0.1,maxval=0.1, seed=self._seed, dtype=tf.float32) i_h = i_h + delta # Hue i_h = tf.clip_by_value(i_h, 0.0, 1.0) if self._aug_rand_saturation: delta = preprocessing_ops.rand_scale( 0.75 ) # tf.random.uniform([], minval= 0.5,maxval=1.1, seed=self._seed, dtype=tf.float32) i_s = i_s * delta if self._aug_rand_brightness: delta = preprocessing_ops.rand_scale( 0.75 ) # tf.random.uniform([], minval= -0.15,maxval=0.15, seed=self._seed, dtype=tf.float32) i_v = i_v * delta image = tf.concat([i_h, i_s, i_v], axis=-1) image = tf.image.hsv_to_rgb(image) stddev = tf.random.uniform([], minval=0, maxval=40 / 255, seed=self._seed, dtype=tf.float32) noise = tf.random.normal( shape=tf.shape(image), mean=0.0, stddev=stddev, seed=self._seed) noise = tf.math.minimum(noise, 0.5) noise = tf.math.maximum(noise, 0) image += noise image = tf.clip_by_value(image, 0.0, 1.0) image_shape = tf.shape(image)[:2] if self._random_flip: image, boxes, _ = preprocess_ops.random_horizontal_flip( image, boxes, seed=self._seed) if self._jitter_boxes != 0.0: boxes = box_ops.denormalize_boxes(boxes, image_shape) boxes = box_ops.jitter_boxes(boxes, 0.025) boxes = box_ops.normalize_boxes(boxes, image_shape) if self._jitter_im != 0.0: image, boxes, classes = preprocessing_ops.random_jitter( image, boxes, classes, self._jitter_im, seed=self._seed) # image, boxes, classes = preprocessing_ops.random_translate(image, boxes, classes, 0.2, seed=self._seed) if self._aug_rand_zoom: image, boxes, classes = preprocessing_ops.random_zoom_crop( image, boxes, classes, self._jitter_im) shape = tf.shape(image) width = shape[1] height = shape[0] randscale = self._image_w // self._net_down_scale if self._fixed_size: do_scale = tf.greater( tf.random.uniform([], minval=0, maxval=1, seed=self._seed), 1 - self._pct_rand) if do_scale: randscale = tf.random.uniform([], minval=10, maxval=15, seed=self._seed, dtype=tf.int32) if self._letter_box: image, boxes = preprocessing_ops.fit_preserve_aspect_ratio( image, boxes, width=width, height=height, target_dim=randscale * self._net_down_scale) width = randscale * self._net_down_scale height = randscale * self._net_down_scale shape = tf.shape(image) width = shape[1] height = shape[0] image, boxes, classes = preprocessing_ops.resize_crop_filter( image, boxes, classes, default_width=width, # randscale * self._net_down_scale, default_height=height, # randscale * self._net_down_scale, target_width=self._image_w, target_height=self._image_h, randomize=False) boxes = box_utils.yxyx_to_xcycwh(boxes) image = tf.clip_by_value(image, 0.0, 1.0) num_dets = tf.shape(classes)[0] # padding classes = preprocess_ops.clip_or_pad_to_fixed_size(classes, self._max_num_instances, -1) if self._fixed_size and not self._cutmix: best_anchors = preprocessing_ops.get_best_anchor( boxes, self._anchors, width=self._image_w, height=self._image_h) best_anchors = preprocess_ops.clip_or_pad_to_fixed_size( best_anchors, self._max_num_instances, 0) boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'best_anchors': tf.cast(best_anchors, self._dtype), 'width': width, 'height': height, 'num_detections': num_dets } grid = self._build_grid( labels, self._image_w, use_tie_breaker=self._use_tie_breaker) labels.update({'grid_form': grid}) labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox']) else: boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'width': width, 'height': height, 'num_detections': num_dets } return image, labels
def _parse_train_data(self, data): """Generates images and labels that are usable for model training. Args: data: a dict of Tensors produced by the decoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ shape = tf.shape(data['image']) image = data['image'] / 255 boxes = data['groundtruth_boxes'] width = shape[0] height = shape[1] image, boxes = preprocessing_ops.fit_preserve_aspect_ratio( image, boxes, width=width, height=height, target_dim=self._max_process_size) image_shape = tf.shape(image)[:2] if self._random_flip: image, boxes, _ = preprocess_ops.random_horizontal_flip( image, boxes, seed=self._seed) randscale = self._image_w // self._net_down_scale if self._fixed_size: do_scale = tf.greater( tf.random.uniform([], minval=0, maxval=1, seed=self._seed), 1 - self._pct_rand) if do_scale: randscale = tf.random.uniform([], minval=10, maxval=21, seed=self._seed, dtype=tf.int32) if self._jitter_boxes != 0.0: boxes = box_ops.denormalize_boxes(boxes, image_shape) boxes = box_ops.jitter_boxes(boxes, 0.025) boxes = box_ops.normalize_boxes(boxes, image_shape) boxes = box_utils.yxyx_to_xcycwh(boxes) if self._jitter_im != 0.0: image, boxes = preprocessing_ops.random_translate(image, boxes, self._jitter_im, seed=self._seed) if self._aug_rand_zoom: image, boxes = preprocessing_ops.resize_crop_filter( image, boxes, default_width=self._image_w, default_height=self._image_h, target_width=randscale * self._net_down_scale, target_height=randscale * self._net_down_scale) image = tf.image.resize(image, (416, 416), preserve_aspect_ratio=False) if self._aug_rand_brightness: image = tf.image.random_brightness(image=image, max_delta=.1) # Brightness if self._aug_rand_saturation: image = tf.image.random_saturation(image=image, lower=0.75, upper=1.25) # Saturation if self._aug_rand_hue: image = tf.image.random_hue(image=image, max_delta=.3) # Hue image = tf.clip_by_value(image, 0.0, 1.0) best_anchors = preprocessing_ops.get_best_anchor(boxes, self._anchors, width=self._image_w, height=self._image_h) # padding boxes = preprocess_ops.clip_or_pad_to_fixed_size( boxes, self._max_num_instances, 0) classes = preprocess_ops.clip_or_pad_to_fixed_size( data['groundtruth_classes'], self._max_num_instances, -1) best_anchors = preprocess_ops.clip_or_pad_to_fixed_size( best_anchors, self._max_num_instances, 0) area = preprocess_ops.clip_or_pad_to_fixed_size( data['groundtruth_area'], self._max_num_instances, 0) is_crowd = preprocess_ops.clip_or_pad_to_fixed_size( tf.cast(data['groundtruth_is_crowd'], tf.int32), self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'area': tf.cast(area, self._dtype), 'is_crowd': is_crowd, 'best_anchors': tf.cast(best_anchors, self._dtype), 'width': width, 'height': height, 'num_detections': tf.shape(data['groundtruth_classes'])[0], } if self._fixed_size: grid = self._build_grid(labels, self._image_w, use_tie_breaker=self._use_tie_breaker) labels.update({'grid_form': grid}) return image, labels