def _shift_scale_aug(self, image, bbox, target_size, shift, scale): """Shift and rescale an image and corresponding bounding box. Args: image (ndarray): of shape (H, W, 3). Typically H and W equal to 511. bbox (ndarray): of shape (4, ) in [x1, y1, x2, y2] format. target_size (int): Exemplar size or search size. shift (int): The max shift offset. scale (float): The max rescale factor. Returns: tuple(crop_img, bbox): crop_img is a ndarray of shape (target_size, target_size, 3), bbox is the corrsponding ground truth box in [x1, y1, x2, y2] format. """ img_h, img_w = image.shape[:2] scale_x = (2 * np.random.random() - 1) * scale + 1 scale_y = (2 * np.random.random() - 1) * scale + 1 scale_x = min(scale_x, float(img_w) / target_size) scale_y = min(scale_y, float(img_h) / target_size) crop_region = np.array([ img_w // 2 - 0.5 * scale_x * target_size, img_h // 2 - 0.5 * scale_y * target_size, img_w // 2 + 0.5 * scale_x * target_size, img_h // 2 + 0.5 * scale_y * target_size ]) shift_x = (2 * np.random.random() - 1) * shift shift_y = (2 * np.random.random() - 1) * shift shift_x = max(-crop_region[0], min(img_w - crop_region[2], shift_x)) shift_y = max(-crop_region[1], min(img_h - crop_region[3], shift_y)) shift = np.array([shift_x, shift_y, shift_x, shift_y]) crop_region += shift crop_img = crop_image(image, crop_region, target_size) bbox -= np.array( [crop_region[0], crop_region[1], crop_region[0], crop_region[1]]) bbox /= np.array([scale_x, scale_y, scale_x, scale_y], dtype=np.float32) return crop_img, bbox
def crop_like_SiamFC(self, image, bbox, context_amount=0.5, exemplar_size=127, crop_size=511): """Crop an image as SiamFC did. Args: image (ndarray): of shape (H, W, 3). bbox (ndarray): of shape (4, ) in [x1, y1, x2, y2] format. context_amount (float): The context amount around a bounding box. Defaults to 0.5. exemplar_size (int): Exemplar size. Defaults to 127. crop_size (int): Crop size. Defaults to 511. Returns: ndarray: The cropped image of shape (crop_size, crop_size, 3). """ padding = np.mean(image, axis=(0, 1)).tolist() bbox = np.array([ 0.5 * (bbox[2] + bbox[0]), 0.5 * (bbox[3] + bbox[1]), bbox[2] - bbox[0], bbox[3] - bbox[1] ]) z_width = bbox[2] + context_amount * (bbox[2] + bbox[3]) z_height = bbox[3] + context_amount * (bbox[2] + bbox[3]) z_size = np.sqrt(z_width * z_height) z_scale = exemplar_size / z_size d_search = (crop_size - exemplar_size) / 2 pad = d_search / z_scale x_size = z_size + 2 * pad x_bbox = np.array([ bbox[0] - 0.5 * x_size, bbox[1] - 0.5 * x_size, bbox[0] + 0.5 * x_size, bbox[1] + 0.5 * x_size ]) x_crop_img = crop_image(image, x_bbox, crop_size, padding) return x_crop_img