def rot90_image(image, n=1, copy=True): """Rotate image counter-clockwise by 90 degrees. References: np.rot90 cv2.rotate tf.image.rot90 """ assert khandy.is_numpy_image(image) if copy: image = image.copy() if image.ndim == 2: transpose_axes = (1, 0) else: transpose_axes = (1, 0, 2) n = n % 4 if n == 0: return image[:] elif n == 1: image = np.transpose(image, transpose_axes) image = np.flipud(image) elif n == 2: image = np.fliplr(np.flipud(image)) else: image = np.transpose(image, transpose_axes) image = np.fliplr(image) return image
def center_pad(image, dst_width, dst_height, strict=True): """ strict: when True, raise error if src size is greater than dst size. when False, remain unchanged if src size is greater than dst size, otherwise center pad. """ assert khandy.is_numpy_image(image) assert isinstance(dst_width, numbers.Integral) and isinstance(dst_height, numbers.Integral) src_height, src_width = image.shape[:2] if strict: assert (src_height <= dst_height) and (src_width <= dst_width) padding_x = max(dst_width - src_width, 0) padding_y = max(dst_height - src_height, 0) padding_top = padding_y // 2 padding_left = padding_x // 2 if image.ndim == 2: padding = ((padding_top, padding_y - padding_top), (padding_left, padding_x - padding_left)) else: padding = ((padding_top, padding_y - padding_top), (padding_left, padding_x - padding_left), (0, 0)) return np.pad(image, padding, 'constant')
def resize_image(image, dst_width, dst_height, return_scale=False, interpolation='bilinear'): """Resize image to a given size. Args: image (ndarray): The input image. dst_width (int): Target width. dst_height (int): Target height. return_scale (bool): Whether to return `x_scale` and `y_scale`. interpolation (str): Interpolation method, accepted values are "nearest", "bilinear", "bicubic", "area", "lanczos". Returns: tuple or ndarray: (`resized_image`, `x_scale`, `y_scale`) or `resized_image`. Reference: mmcv.imresize """ assert khandy.is_numpy_image(image) resized_image = cv2.resize(image, (dst_width, dst_height), interpolation=interp_codes[interpolation]) if not return_scale: return resized_image else: src_height, src_width = image.shape[:2] x_scale = dst_width / src_width y_scale = dst_height / src_height return resized_image, x_scale, y_scale
def letterbox_resize_image(image, dst_width, dst_height, border_value=0, return_scale=False, interpolation='bilinear'): """Resize an image preserving the original aspect ratio using padding. References: `letterbox_image` in `https://github.com/pjreddie/darknet/blob/master/src/image.c` """ assert khandy.is_numpy_image(image) src_height, src_width = image.shape[:2] scale = min(dst_width / src_width, dst_height / src_height) resize_w = int(round(scale * src_width)) resize_h = int(round(scale * src_height)) resized_image = cv2.resize(image, (resize_w, resize_h), interpolation=interp_codes[interpolation]) padded_shape = list(resized_image.shape) padded_shape[0] = dst_height padded_shape[1] = dst_width padded_image = np.full(padded_shape, border_value, image.dtype) dw = (dst_width - resize_w) // 2 dh = (dst_height - resize_h) // 2 padded_image[dh:resize_h + dh, dw:resize_w + dw, ...] = resized_image if not return_scale: return padded_image else: return padded_image, scale, dw, dh
def translate_image(image, x_shift, y_shift, border_value=0): """Translate an image. Args: image (ndarray): Image to be translated with format (h, w) or (h, w, c). x_shift (int): The offset used for translate in horizontal direction. right is the positive direction. y_shift (int): The offset used for translate in vertical direction. down is the positive direction. border_value (int | tuple[int]): Value used in case of a constant border. Returns: ndarray: The translated image. See Also: crop_or_pad """ assert khandy.is_numpy_image(image) assert isinstance(x_shift, numbers.Integral) assert isinstance(y_shift, numbers.Integral) image_height, image_width = image.shape[:2] channels = 1 if image.ndim == 2 else image.shape[2] if isinstance(border_value, numbers.Real): dst_image = np.full_like(image, border_value) elif isinstance(border_value, tuple): assert len(border_value) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(border_value), channels) if channels == 1: dst_image = np.full_like(image, border_value[0]) else: border_value = np.asarray(border_value, dtype=image.dtype) dst_image = np.empty_like(image) dst_image[:] = border_value else: raise ValueError('Invalid type {} for `border_value`.'.format( type(border_value))) if (abs(x_shift) >= image_width) or (abs(y_shift) >= image_height): return dst_image src_x_begin = max(-x_shift, 0) src_x_end = min(image_width - x_shift, image_width) dst_x_begin = max(x_shift, 0) dst_x_end = min(image_width + x_shift, image_width) src_y_begin = max(-y_shift, 0) src_y_end = min(image_height - y_shift, image_height) dst_y_begin = max(y_shift, 0) dst_y_end = min(image_height + y_shift, image_height) dst_image[dst_y_begin:dst_y_end, dst_x_begin:dst_x_end] = \ image[src_y_begin:src_y_end, src_x_begin:src_x_end] return dst_image
def crop_or_pad(image, x_min, y_min, x_max, y_max, border_value=0): """ See Also: translate_image References: tf.image.resize_image_with_crop_or_pad """ assert khandy.is_numpy_image(image) assert isinstance(x_min, numbers.Integral) and isinstance(y_min, numbers.Integral) assert isinstance(x_max, numbers.Integral) and isinstance(y_max, numbers.Integral) assert (x_min <= x_max) and (y_min <= y_max) src_height, src_width = image.shape[:2] dst_height, dst_width = y_max - y_min + 1, x_max - x_min + 1 channels = 1 if image.ndim == 2 else image.shape[2] if image.ndim == 2: dst_image_shape = (dst_height, dst_width) else: dst_image_shape = (dst_height, dst_width, channels) if isinstance(border_value, numbers.Real): dst_image = np.full(dst_image_shape, border_value, dtype=image.dtype) elif isinstance(border_value, tuple): assert len(border_value) == channels, \ 'Expected the num of elements in tuple equals the channels' \ 'of input image. Found {} vs {}'.format( len(border_value), channels) if channels == 1: dst_image = np.full(dst_image_shape, border_value[0], dtype=image.dtype) else: border_value = np.asarray(border_value, dtype=image.dtype) dst_image = np.empty(dst_image_shape, dtype=image.dtype) dst_image[:] = border_value else: raise ValueError( 'Invalid type {} for `border_value`.'.format(type(border_value))) src_x_begin = max(x_min, 0) src_x_end = min(x_max + 1, src_width) dst_x_begin = src_x_begin - x_min dst_x_end = src_x_end - x_min src_y_begin = max(y_min, 0) src_y_end = min(y_max + 1, src_height) dst_y_begin = src_y_begin - y_min dst_y_end = src_y_end - y_min if (src_x_begin >= src_x_end) or (src_y_begin >= src_y_end): return dst_image dst_image[dst_y_begin: dst_y_end, dst_x_begin: dst_x_end, ...] = \ image[src_y_begin: src_y_end, src_x_begin: src_x_end, ...] return dst_image
def calc_image_dhash(image): assert khandy.is_numpy_image(image) if image.ndim == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) resized = cv2.resize(image, (9, 8)) hashval = int(0) for i in range(8): for j in range(8): mask = int(resized[i, j] > resized[i, j + 1]) hashval |= mask << (i * 8 + j) return hashval
def calc_image_ahash(image): assert khandy.is_numpy_image(image) if image.ndim == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) resized = cv2.resize(image, (8, 8)) meanval = cv2.mean(resized)[0] hashval = int(0) for i in range(8): for j in range(8): mask = int(resized[i, j] - meanval >= 1e-5) hashval |= mask << (i * 8 + j) return hashval
def calc_image_phash(image): assert khandy.is_numpy_image(image) if image.ndim == 3: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) resized = cv2.resize(image, (32, 32)) dft = cv2.dft(resized.astype(np.float32)) meanval = cv2.mean(dft[:8, :8])[0] hashval = int(0) for i in range(8): for j in range(8): mask = int(dft[i, j] - meanval >= 1e-5) hashval |= mask << (i * 8 + j) return hashval
def scale_image(image, x_scale, y_scale, interpolation='bilinear'): """Scale image. Reference: mmcv.imrescale """ assert khandy.is_numpy_image(image) src_height, src_width = image.shape[:2] dst_width = int(round(x_scale * src_width)) dst_height = int(round(y_scale * src_height)) resized_image = cv2.resize(image, (dst_width, dst_height), interpolation=interp_codes[interpolation]) return resized_image
def rotate_image(image, angle, scale=1.0, center=None, degrees=True, border_value=0, auto_bound=False): """Rotate an image. Args: image : ndarray Image to be rotated. angle : float Rotation angle in degrees, positive values mean clockwise rotation. center : tuple Center of the rotation in the source image, by default it is the center of the image. scale : float Isotropic scale factor. degrees : bool border_value : int Border value. auto_bound : bool Whether to adjust the image size to cover the whole rotated image. Returns: ndarray: The rotated image. References: mmcv.imrotate """ assert khandy.is_numpy_image(image) image_height, image_width = image.shape[:2] if auto_bound: center = None if center is None: center = ((image_width - 1) * 0.5, (image_height - 1) * 0.5) assert isinstance(center, tuple) rotation_matrix = get_2d_rotation_matrix(angle, center[0], center[1], scale, degrees) if auto_bound: scale_cos = np.abs(rotation_matrix[0, 0]) scale_sin = np.abs(rotation_matrix[0, 1]) new_width = image_width * scale_cos + image_height * scale_sin new_height = image_width * scale_sin + image_height * scale_cos rotation_matrix[0, 2] += (new_width - image_width) * 0.5 rotation_matrix[1, 2] += (new_height - image_height) * 0.5 image_width = int(np.round(new_width)) image_height = int(np.round(new_height)) rotated = cv2.warpAffine(image, rotation_matrix[:2,:], (image_width, image_height), borderValue=border_value) return rotated
def transpose_image(image, copy=True): """Transpose image. References: np.transpose cv2.transpose tf.image.transpose """ assert khandy.is_numpy_image(image) if copy: image = image.copy() if image.ndim == 2: transpose_axes = (1, 0) else: transpose_axes = (1, 0, 2) image = np.transpose(image, transpose_axes) return image
def center_crop(image, dst_width, dst_height, strict=True): """ strict: when True, raise error if src size is less than dst size. when False, remain unchanged if src size is less than dst size, otherwise center crop. """ assert khandy.is_numpy_image(image) assert isinstance(dst_width, numbers.Integral) and isinstance(dst_height, numbers.Integral) src_height, src_width = image.shape[:2] if strict: assert (src_height >= dst_height) and (src_width >= dst_width) crop_top = max((src_height - dst_height) // 2, 0) crop_left = max((src_width - dst_width) // 2, 0) cropped = image[crop_top: dst_height + crop_top, crop_left: dst_width + crop_left, ...] return cropped
def resize_image_to_range(image, min_length, max_length, return_scale=False, interpolation='bilinear'): """Resizes an image so its dimensions are within the provided value. Rescale the shortest side of the image up to `min_length` pixels while keeping the largest side below `max_length` pixels without changing the aspect ratio. Often used in object detection (e.g. RCNN and SSH.) The output size can be described by two cases: 1. If the image can be rescaled so its shortest side is equal to the `min_length` without the other side exceeding `max_length`, then do so. 2. Otherwise, resize so the longest side is equal to `max_length`. Returns: resized_image: resized image so that min(dst_height, dst_width) == min_length or max(dst_height, dst_width) == max_length. References: `resize_to_range` in `models-master/research/object_detection/core/preprocessor.py` `prep_im_for_blob` in `py-faster-rcnn-master/lib/utils/blob.py` mmcv.imrescale """ assert khandy.is_numpy_image(image) assert min_length < max_length src_height, src_width = image.shape[:2] min_side_length = min(src_width, src_height) max_side_length = max(src_width, src_height) scale = min_length / min_side_length if round(scale * max_side_length) > max_length: scale = max_length / max_side_length dst_width = int(round(scale * src_width)) dst_height = int(round(scale * src_height)) resized_image = cv2.resize(image, (dst_width, dst_height), interpolation=interp_codes[interpolation]) if not return_scale: return resized_image else: return resized_image, scale
def flip_image(image, direction='h', copy=True): """ References: np.flipud, np.fliplr, np.flip cv2.flip tf.image.flip_up_down tf.image.flip_left_right """ assert khandy.is_numpy_image(image) assert direction in [ 'x', 'h', 'horizontal', 'y', 'v', 'vertical', 'o', 'b', 'both' ] if copy: image = image.copy() if direction in ['o', 'b', 'both', 'x', 'h', 'horizontal']: image = np.fliplr(image) if direction in ['o', 'b', 'both', 'y', 'v', 'vertical']: image = np.flipud(image) return image
def resize_image_long(image, dst_size, return_scale=False, interpolation='bilinear'): """Resize an image so that the length of longer side is dst_size while preserving the original aspect ratio. References: `resize_max` in `https://github.com/pjreddie/darknet/blob/master/src/image.c` """ assert khandy.is_numpy_image(image) src_height, src_width = image.shape[:2] scale = min(dst_size / src_width, dst_size / src_height) dst_width = int(round(scale * src_width)) dst_height = int(round(scale * src_height)) resized_image = cv2.resize(image, (dst_width, dst_height), interpolation=interp_codes[interpolation]) if not return_scale: return resized_image else: return resized_image, scale