def _get_annotation(item, classes): # Annotation is encoded in the original image size, actual image size may be smaller due to performance reasons width, height = item['image_size'] width, height = float(width), float(height) annotation = {} for obj in item['objects']: class_name = obj['label'] if class_name == 'ignored': continue try: class_id = classes.index(class_name) except ValueError: tf.logging.warning( "Unknown label = '{0}', supported labels: {1}".format( class_name, classes)) class_id = -1 continue xmin, ymin, xmax, ymax = obj['bbox'] xmin /= width ymin /= height xmax /= width ymax /= height annotation.setdefault(class_id, []).append( BoundingBox(xmin, ymin, xmax, ymax)) return annotation
def _transform_image(self, image): """ Geometrically transforms image according to parameters :type image: input image :rtype: transformed image """ top_h, top_w = self._infer_top_shape(image) scale = self.transform_param.scale crop_w, crop_h = self.transform_param.crop_size do_mirror = self.transform_param.mirror and random.randint(0, 1) if self.transform_param.resize_param: image = apply_resize(image, self.transform_param.resize_param) if self.transform_param.noise_param: image = apply_noise(image, self.transform_param.noise_param) image_h, image_w = image.shape[0:2] w_off, h_off = (0, 0) if crop_h * crop_w > 0: if self.is_training: def exclusive_random(val): return random.randint(0, val - 1) h_off = exclusive_random(image_h - crop_h + 1) w_off = exclusive_random(image_w - crop_w + 1) else: h_off = (image_h - crop_h) / 2 w_off = (image_w - crop_w) / 2 image = image[h_off:h_off + crop_h, w_off:w_off + crop_w] xmin = float(w_off) / image_w ymin = float(h_off) / image_h xmax = float(w_off + top_w) / image_w ymax = float(h_off + top_h) / image_h crop_bbox = BoundingBox(xmin, ymin, xmax, ymax) if do_mirror: image = cv2.flip(image, 1) if self.transform_param.mean_value: image = image.astype(np.float32) image -= self.transform_param.mean_value if math.fabs(scale - 1) > 1e-2: image = image.astype(np.float32) image *= scale return image, crop_bbox, do_mirror
def update_bbox_by_resize_policy(old_width, old_height, bbox, resize_param): new_height = resize_param.height new_width = resize_param.width orig_aspect = float(old_width) / old_height new_aspect = new_width / new_height x_min = bbox.xmin * old_width y_min = bbox.ymin * old_height x_max = bbox.xmax * old_width y_max = bbox.ymax * old_height if resize_param.resize_mode == ResizeParameter.WARP: x_min = max(0., x_min * new_width / old_width) x_max = min(new_width, x_max * new_width / old_width) y_min = max(0., y_min * new_height / old_height) y_max = min(new_height, y_max * new_height / old_height) elif resize_param.resize_mode == ResizeParameter.FIT_LARGE_SIZE_AND_PAD: if orig_aspect > new_aspect: padding = (new_height - new_width / orig_aspect) / 2 x_min = max(0., x_min * new_width / old_width) x_max = min(new_width, x_max * new_width / old_width) y_min = y_min * (new_height - 2 * padding) / old_height y_min = padding + max(0., y_min) y_max = y_max * (new_height - 2 * padding) / old_height y_max = padding + min(new_height, y_max) else: padding = (new_width - orig_aspect * new_height) / 2 x_min = x_min * (new_width - 2 * padding) / old_width x_min = padding + max(0., x_min) x_max = x_max * (new_width - 2 * padding) / old_width x_max = padding + min(new_width, x_max) y_min = max(0., y_min * new_height / old_height) y_max = min(new_height, y_max * new_height / old_height) elif resize_param.resize_mode == ResizeParameter.FIT_SMALL_SIZE: if orig_aspect < new_aspect: new_height = new_width / orig_aspect else: new_width = orig_aspect * new_height x_min = max(0., x_min * new_width / old_width) x_max = min(new_width, x_max * new_width / old_width) y_min = max(0., y_min * new_height / old_height) y_max = min(new_height, y_max * new_height / old_height) result = BoundingBox(difficult=bbox.difficult) result.xmin = x_min / new_width result.ymin = y_min / new_height result.xmax = x_max / new_width result.ymax = y_max / new_height return result
def expand_image(image, expand_ratio, mean_value=None): height, width, chs = image.shape # Get the bbox dimension. new_h = int(height * expand_ratio) new_w = int(width * expand_ratio) h_off = int(math.floor(random.uniform(0., new_h - height))) w_off = int(math.floor(random.uniform(0., new_w - width))) xmin = -w_off / float(width) ymin = -h_off / float(height) xmax = (new_w - w_off) / float(width) ymax = (new_h - h_off) / float(height) expand_box = BoundingBox(xmin, ymin, xmax, ymax) fill_value = mean_value if mean_value is not None else [0] * chs expanded_image = np.full(shape=(new_h, new_w, chs), fill_value=fill_value, dtype=image.dtype) expanded_image[h_off:h_off + height, w_off:w_off + width, :] = image return expanded_image, expand_box