def test_resize_keep_aspect_ratio(self): image_resized, bboxes_resized = resize(self.image, self.bboxes, 6, 6, keep_aspect_ratio=True) tf.debugging.assert_equal( tf.constant([6, 6, 3]), image_resized.shape, message="Dimensions of the resized image are wrong.") tf.debugging.assert_equal( tf.round(tf.ones([6, 2, 3], dtype=tf.float32) * 1000), tf.round(image_resized[:, 2:4, :] * 1000), message="Ones from original image should be there.", ) tf.debugging.assert_equal(tf.zeros([6, 2, 3]), image_resized[:, 4:, :], message="Padding should be here.") tf.debugging.assert_equal(tf.zeros([6, 2, 3]), image_resized[:, :2, :], message="Padding should be here.") tf.debugging.assert_equal( tf.constant( tf.round(tf.constant([[0.25, 0.5, 0.75, 0.6]]) * 1000.0)), tf.round(bboxes_resized * 1000.0))
def resize_train(self, image, bboxes, max_size, prob=0.5): image, bboxes = tf.cond( tf.math.less(tf.random.uniform([], 0.0, 1.0), prob), lambda: resize(image, bboxes, max_size, max_size, keep_aspect_ratio=True, random_method=True), lambda: resize(image, bboxes, max_size, max_size, keep_aspect_ratio=False, random_method=True), ) return image, bboxes
def _preresize(): return resize( image, bboxes, tf.cast(tf.cast(height, dtype=tf.float32) / ratio, dtype=tf.int32), tf.cast(tf.cast(width, dtype=tf.float32) / ratio, dtype=tf.int32), keep_aspect_ratio=False, random_method=True, )
def test_resize_not_keep_aspect_ratio(self): image_resized, bboxes_resized = resize(self.image, self.bboxes, 6, 6, keep_aspect_ratio=False) tf.debugging.assert_equal( tf.constant([6, 6, 3]), image_resized.shape, message="Dimensions of the resized image are wrong.") mult = 10 tf.debugging.assert_equal( tf.ones([6, 6, 3]) * mult, tf.math.round(image_resized * mult), message="New image should contain only ones.", ) h, w = self.image.shape[0], self.image.shape[1] tf.debugging.assert_equal(self.bboxes, bboxes_resized)
def prepare_for_batch(self, image, labels, bboxes, image_id=-1): """ All inputs have different dimensions, we need to update them in order to fit the batch, Image: Depending on the config, we rescale image to the batch image size (and it will stay the same) or maximum batch image size, which is then transformed to randomly selected size in preprocess_batch() method. Labels, bounding boxes: We either cut them to maximal size or fill to fit the maximum size. Returned mask tells us, which values are valid. :param image: 3-D Tensor of shape [height, width, channels] :param labels: 1-D Tensor with labels for every object :param bboxes: 2-D Tensor of shape (objects, 4) containing bounding boxes in format [ymin, xmin, ymin, xmax] in relative coordinates :param image_id: Id of image, requirement for coco evaluation :return: (image, bboxes, labels, mask) """ labels = labels[0:self.model_config.max_objects] bboxes = bboxes[0:self.model_config.max_objects] bboxes = tf.reshape( bboxes, (-1, 4) ) # always keep the second dimension to be 4, even if there are no objects # make sure labels and boxes have the correct data type labels = tf.cast(labels, dtype=tf.float32) bboxes = tf.cast(bboxes, dtype=tf.float32) # we resize for the max size to form a batch. Afterwards, we can resize the whole batch image_size = (self.model_config.image_size + self.model_config.image_size_variation if self.train else self.model_config.image_size) height, width = tf.shape(image)[0], tf.shape(image)[1] # make some augmentations if self.augmentations: if self.resize_before_augmenting: additional_space = 1.2 # so that we have something to clip pre_resize = tf.cast(image_size, dtype=tf.float32) * additional_space ratio = tf.math.minimum( tf.cast(height, dtype=tf.float32) / pre_resize, tf.cast(width, dtype=tf.float32) / pre_resize) def _preresize(): return resize( image, bboxes, tf.cast(tf.cast(height, dtype=tf.float32) / ratio, dtype=tf.int32), tf.cast(tf.cast(width, dtype=tf.float32) / ratio, dtype=tf.int32), keep_aspect_ratio=False, random_method=True, ) # when the image is just slightly better, there is not need to pre-resize image, bboxes = tf.cond(tf.math.greater(ratio, 1.2), lambda: _preresize(), lambda: (image, bboxes)) # probabilities for random.categorical() are unscaled probabilities = [ tf.cast(aug.probability, dtype=tf.float32) for aug in self.augmentations ] selected = tf.random.categorical(tf.math.log([probabilities]), 1, dtype=tf.int32)[0][0] # perform augmentation with selected id (nice tf.switch_case() was not working for an unknown reason) for idx, aug in enumerate(self.augmentations): image, bboxes = tf.cond(selected == idx, lambda: aug.augment(image, bboxes), lambda: (image, bboxes)) if self.train and not self.model_config.keep_aspect_ratio: # randomly chose to keep the image size or spread it out to take the full available space image, bboxes = self.resize_train(image, bboxes, image_size, prob=0.5) else: # always keep the size or spread depending on the settings image, bboxes = resize( image, bboxes, image_size, image_size, keep_aspect_ratio=self.model_config.keep_aspect_ratio, random_method=self.train, ) # calculate mask (one when there is a detected object) mask = tf.range(self.model_config.max_objects) < tf.shape(labels)[0] mask = tf.cast(mask, dtype=tf.float32) # update bounding boxes to the correct shape padding_add = self.model_config.max_objects - tf.shape(bboxes)[0] bboxes = tf.pad(bboxes, tf.stack([[0, padding_add], [0, 0]])) # update labels to correct shape labels = tf.pad(labels, tf.stack([[0, padding_add]])) labels = tf.cast(labels, dtype=tf.int32) return image, bboxes, labels, mask, image_id, height, width
def test_resize_types(self): self.check_types(lambda image, bboxes: resize(image, bboxes, 20, 20)) self.check_types( lambda image, bboxes: random_pad_to_square(image, bboxes)) self.check_types(lambda image, bboxes: random_aspect_ratio_deformation( image, bboxes))