def block_TV(preliminary_result, block_size, block_stride=None): assert block_size > 1 if block_stride: assert block_stride <= block_size else: block_stride = block_size patch_post = _extract_patches(preliminary_result, patch_shape=block_size, extraction_step=block_stride) def patch_TV(patches): row, col, height, width = patches.shape tv = np.zeros([row, col]) for r in range(row): for c in range(col): tmp = np.sqrt( (patches[r, c, 1:, :-1] - patches[r, c, :-1, :-1])**2 + (patches[r, c, :-1, 1:] - patches[r, c, :-1, :-1])**2) tv[r, c] += tmp.mean() tv /= height * width return tv patch_post = patch_TV(patch_post) img_post = resize(patch_post, preliminary_result.shape, order=0, mode='constant', anti_aliasing=False) return img_post
def test_extract_patches_square(): # test same patch size for all dimensions face = downsampled_face i_h, i_w = face.shape p = 8 expected_n_patches = ((i_h - p + 1), (i_w - p + 1)) patches = _extract_patches(face, patch_shape=p) assert patches.shape == (expected_n_patches[0], expected_n_patches[1], p, p)
def test_extract_patches_strided(): image_shapes_1D = [(10,), (10,), (11,), (10,)] patch_sizes_1D = [(1,), (2,), (3,), (8,)] patch_steps_1D = [(1,), (1,), (4,), (2,)] expected_views_1D = [(10,), (9,), (3,), (2,)] last_patch_1D = [(10,), (8,), (8,), (2,)] image_shapes_2D = [(10, 20), (10, 20), (10, 20), (11, 20)] patch_sizes_2D = [(2, 2), (10, 10), (10, 11), (6, 6)] patch_steps_2D = [(5, 5), (3, 10), (3, 4), (4, 2)] expected_views_2D = [(2, 4), (1, 2), (1, 3), (2, 8)] last_patch_2D = [(5, 15), (0, 10), (0, 8), (4, 14)] image_shapes_3D = [(5, 4, 3), (3, 3, 3), (7, 8, 9), (7, 8, 9)] patch_sizes_3D = [(2, 2, 3), (2, 2, 2), (1, 7, 3), (1, 3, 3)] patch_steps_3D = [(1, 2, 10), (1, 1, 1), (2, 1, 3), (3, 3, 4)] expected_views_3D = [(4, 2, 1), (2, 2, 2), (4, 2, 3), (3, 2, 2)] last_patch_3D = [(3, 2, 0), (1, 1, 1), (6, 1, 6), (6, 3, 4)] image_shapes = image_shapes_1D + image_shapes_2D + image_shapes_3D patch_sizes = patch_sizes_1D + patch_sizes_2D + patch_sizes_3D patch_steps = patch_steps_1D + patch_steps_2D + patch_steps_3D expected_views = expected_views_1D + expected_views_2D + expected_views_3D last_patches = last_patch_1D + last_patch_2D + last_patch_3D for (image_shape, patch_size, patch_step, expected_view, last_patch) in zip( image_shapes, patch_sizes, patch_steps, expected_views, last_patches ): image = np.arange(np.prod(image_shape)).reshape(image_shape) patches = _extract_patches( image, patch_shape=patch_size, extraction_step=patch_step ) ndim = len(image_shape) assert patches.shape[:ndim] == expected_view last_patch_slices = tuple( slice(i, i + j, None) for i, j in zip(last_patch, patch_size) ) assert ( patches[(-1, None, None) * ndim] == image[last_patch_slices].squeeze() ).all()
def block_mean(preliminary_result, block_size, block_stride=None): preliminary_result = preliminary_result.squeeze() if block_stride: assert block_stride <= block_size else: block_stride = block_size if block_size > 1: patch_post = _extract_patches(preliminary_result, patch_shape=block_size, extraction_step=block_stride) patch_post = patch_post.mean(axis=(2, 3)) img_post = resize(patch_post, preliminary_result.shape, order=0, mode='constant', anti_aliasing=False) else: img_post = preliminary_result return img_post
def predict_sliding_window( self, input_sequence, pre_batch_callbacks=None, post_batch_callbacks=None, out_blob_names=None, use_fit_network=False, oversample=False, extraction_step=(1, 1), account_for_step=True, interpolation_method=_cv2INTER_NEAREST, pad_border=True, ): """ Get predictions for all images in a sliding window manner. Similar to the :py:func:`barrista.net.Net.predict` method. See there for the parameter descriptions. For this method, every image must be at least as big as the input size. It is then sampled using sliding window, and for each output layer the reassembled images are returned. The output of the network for one patch must be of shape (num_layers X 1 X 1) and currently only one output layer is supported. :param input_sequence: iterable(3D numpy arrays) The 3D numpy arrays must match in their first dimension with the second dimension of the network input (number of channels). E.g., for a network with input shape [10, 3, 24, 24], you could provide inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of 3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary and different for each image. Multi-input networks are not yet supported by this method. :param pre_batch_callbacks: list(callable) or None. Each element of this list will be called with the batch id as argument before forward propagating a batch. :param post_batch_callbacks: list(callable) or None. See before, but after a callback. :param out_blob_names: list(string) or None. The names of the blobs of which the values are returned. If unspecified, uses ``self.outputs``. Must currently contain exactly one element. :param use_fit_network: bool. If set to ``True``, always use this very network, independent of whether an internal network in stage ``predict`` is available. For more information, see the constructor documentation. :param oversample: bool. If set to ``True``, uses oversampling and averages the results. You have to take care to bring them into the right shape for further processing yourself. :parameter extraction_step: 2-tuple(int). Window step size in y and x direction. :parameter account_for_step: bool. If set to True, the output is resized with nearest neighbor interpolation to get a full-sized image. :parameter interpolation_method: int in {cv2.INTER_...}. The interpolation strategy used, if ``account_for_step`` is set and the ``extraction_step`` is not ``(1, 1)``. :parameter pad_border: bool. Whether to return images in the original image size, by adding zero padded borders. """ if self._predict_variant is not None and not use_fit_network: _LOGGER.info("Using prediction network variant.") prednet = self._predict_variant else: prednet = self if extraction_step != (1, 1) and pad_border is True: assert account_for_step, ( "If ``extraction_step`` != (1, 1) and " "``padborder`` is set, , ``account_for_step`` must be set, " " too." ) input_dims = prednet.blobs[prednet.inputs[0]].data.shape input_image_dims = _np.array(input_dims[2:]) for im in input_sequence: assert im.shape[0] == input_dims[1] assert im.shape[1] >= input_image_dims[0] assert im.shape[2] >= input_image_dims[1] assert out_blob_names is None or len(out_blob_names) == 1, "Only one output layer is supported!" # noqa output_images = [] for im_id, im in enumerate(input_sequence): _LOGGER.info("Processing image %d...", im_id) image_beginpoint = _time.time() patches = _extract_patches( im, patch_shape=(input_dims[1], input_image_dims[0], input_image_dims[1]), extraction_step=(1, extraction_step[0], extraction_step[1]), ) sampled_shape = patches.shape[1:3] patches = patches.reshape(_np.hstack((_np.prod(patches.shape[:3]), patches.shape[3:]))) results = self.predict( patches, pre_batch_callbacks=pre_batch_callbacks, # noqa post_batch_callbacks=post_batch_callbacks, out_blob_names=out_blob_names, use_fit_network=use_fit_network, oversample=oversample, before_oversample_resize_to=None, ) if account_for_step or extraction_step == (1, 1): out_im = _np.zeros((results[0].shape[0], im.shape[1], im.shape[2]), dtype=results[0].dtype) # Collect the values. collected = _np.empty(_np.hstack(([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0],) except: raise Exception( ( "The output shape of the net must be (X, 1, 1) to be " # noqa "used with the `predict_sliding_window` method. It is " # noqa "{}." ).format(val.shape) ) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] # Resize. for layer_idx in range(results[0].shape[0]): layer_area = out_im[ layer_idx, int(_np.ceil(input_image_dims[0] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[0] / 2.0)) + 1, int(_np.ceil(input_image_dims[1] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[1] / 2.0)) + 1, ] layer_area[...] = _cv2resize( collected[layer_idx], (layer_area.shape[1], layer_area.shape[0]), interpolation=interpolation_method, ) if not pad_border: out_im = out_im[ :, int(_np.ceil(input_image_dims[0] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[0] / 2.0)) + 1, int(_np.ceil(input_image_dims[1] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[1] / 2.0)) + 1, ] output_images.append(out_im) else: # Collect the values. collected = _np.empty(_np.hstack(([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0],) except: raise Exception( ( "The output shape of the net must be (X, 1, 1) to be " # noqa "used with the `predict_sliding_window` method. It is " # noqa "{}." ).format(val.shape) ) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] output_images.append(collected) _LOGGER.info("Processed image %d in %ds.", im_id, _time.time() - image_beginpoint) return output_images
from sklearn.feature_extraction.image import _extract_patches import numpy as np import cv2 from datasets import download_and_prepare def read_file(file, grayscale=False): """ Loads and normalizes image. Parameters file (string): image file path grayscale (bool): True converts the image to grayscale Returns: (ndarray): image """ image = cv2.imread(file) if grayscale: image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = np.float32(image) / 255 return image files = download_and_prepare("utk-faces", "data") Img = read_file(files[0]) print(Img.shape) sample = _extract_patches(Img, patch_shape=(64, 64, 3), extraction_step=1) print(sample.shape)
def extract_patches_2d(img, patch_size, extraction_step=1, max_patches=None, random_state=None): """Reshape a 2D image into a collection of patches The resulting patches are allocated in a dedicated array. Parameters ---------- img : array, shape = (image_height, image_width) or (image_height, image_width, n_channels) The original image data. For color images, the last dimension specifies the channel: a RGB image would have `n_channels=3`. patch_size : tuple of ints (patch_height, patch_width) the dimensions of one patch extraction_step : integer or tuple of length arr.ndim Indicates step size at which extraction shall be performed. If integer is given, then the step is uniform in all dimensions. max_patches : integer or float, optional default is None The maximum number of patches to extract. If max_patches is a float between 0 and 1, it is taken to be a proportion of the total number of patches. random_state : int, RandomState instance or None, optional (default=None) Determines the random number generator used for random sampling when `max_patches` is not None. Use an int to make the randomness deterministic. See :term:`Glossary <random_state>`. Returns ------- patches : array, shape = (n_patches, patch_height, patch_width) or (n_patches, patch_height, patch_width, n_channels) The collection of patches extracted from the image, where `n_patches` is either `max_patches` or the total number of patches that can be extracted. """ i_h, i_w = img.shape[:2] p_h, p_w = patch_size if p_h > i_h: raise ValueError("Height of the patch should be less than the height" " of the image.") if p_w > i_w: raise ValueError("Width of the patch should be less than the width" " of the image.") if isinstance(extraction_step, numbers.Number): e_h, e_w = extraction_step, extraction_step else: e_h, e_w = extraction_step img = check_array(img, allow_nd=True) img = img.reshape((i_h, i_w, -1)) n_colors = img.shape[-1] extracted_patches = _extract_patches(img, patch_shape=(p_h, p_w, n_colors), extraction_step=(e_h, e_w, n_colors)) n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, e_h, e_w, max_patches) if max_patches: rng = check_random_state(random_state) i_s = rng.randint(i_h - p_h + 1, size=n_patches) j_s = rng.randint(i_w - p_w + 1, size=n_patches) patches = extracted_patches[i_s, j_s, 0] else: patches = extracted_patches patches = patches.reshape(-1, p_h, p_w, n_colors) # remove the color dimension if useless if patches.shape[-1] == 1: return patches.reshape((n_patches, p_h, p_w)) else: return patches
def predict_sliding_window(self, input_sequence, test_callbacks=None, out_blob_names=None, use_fit_network=False, oversample=False, extraction_step=(1, 1), account_for_step=True, interpolation_method=_cv2INTER_NEAREST, pad_border=True, overlap_combine_max=True): """ Get predictions for all images in a sliding window manner. Similar to the :py:func:`barrista.net.Net.predict` method. See there for the parameter descriptions. For this method, every image must be at least as big as the input size. It is then sampled using sliding window, and for each output layer the reassembled images are returned. The output of the network for one patch must either be of shape (num_layers X 1 X 1) or of shape (num_layers X input_height X input_width) and currently only one output layer is supported. If the output is of the same shape as the input, `account_for_step` does not play a role, and the inputs are combined by using the max of the predictions per position. :param input_sequence: iterable(3D numpy arrays) The 3D numpy arrays must match in their first dimension with the second dimension of the network input (number of channels). E.g., for a network with input shape [10, 3, 24, 24], you could provide inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of 3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary and different for each image. Multi-input networks are not yet supported by this method. :param test_callbacks: list(barrista.monitoring.Monitor) or None. List of callback callables. Will be called pre and post batch processing. This list will be processed sequentially, meaning that monitors in the sequence can provide information for later monitors as done with the ``ResultExtractor``. :param out_blob_names: list(string) or None. The names of the blobs of which the values are returned. If unspecified, uses ``self.outputs``. Must currently contain exactly one element. :param use_fit_network: bool. If set to ``True``, always use this very network, independent of whether an internal network in stage ``predict`` is available. For more information, see the constructor documentation. :param oversample: bool. If set to ``True``, uses oversampling and averages the results. You have to take care to bring them into the right shape for further processing yourself. :param extraction_step: 2-tuple(int). Window step size in y and x direction. :param account_for_step: bool. If set to True, the output is resized with nearest neighbor interpolation to get a full-sized image. :param interpolation_method: int in {cv2.INTER_...}. The interpolation strategy used, if ``account_for_step`` is set and the ``extraction_step`` is not ``(1, 1)``. :param pad_border: bool. Whether to return images in the original image size, or by adding zero padded borders. :param overlap_combine_max: bool. If the network output size is equal to the input size and the stepsize smaller than the output, which operator to use to combine overlapping areas. Default: True. """ if self._predict_variant is not None and not use_fit_network: _LOGGER.debug("Using prediction network variant.") prednet = self._predict_variant else: prednet = self if extraction_step != (1, 1) and pad_border is True: assert account_for_step,\ ("If ``extraction_step`` != (1, 1) and " "``padborder`` is set, , ``account_for_step`` must be set, " " too.") input_dims = prednet.blobs[prednet.inputs[0]].data.shape input_image_dims = _np.array(input_dims[2:]) for im in input_sequence: assert im.shape[0] == input_dims[1] assert im.shape[1] >= input_image_dims[0] assert im.shape[2] >= input_image_dims[1] assert (out_blob_names is None or len(out_blob_names) == 1), "Only one output layer is supported!" # noqa output_images = [] # pylint: disable=too-many-nested-blocks for im_id, im in enumerate(input_sequence): _LOGGER.debug("Processing image %d...", im_id) image_beginpoint = _time.time() patches = _extract_patches(im, patch_shape=(input_dims[1], input_image_dims[0], input_image_dims[1]), extraction_step=(1, extraction_step[0], extraction_step[1])) sampled_shape = patches.shape[1:3] patches = patches.reshape(_np.hstack((_np.prod(patches.shape[:3]), patches.shape[3:]))) results = prednet.predict(patches, test_callbacks=test_callbacks, out_blob_names=out_blob_names, use_fit_network=use_fit_network, oversample=oversample, before_oversample_resize_to=None) if results[0].size > results[0].shape[0]: assert (results[0].ndim == 3 and _np.all(results[0].shape[1:3] == input_image_dims)), ( ("The output shape of the net must be " "(X, 1, 1) or (X, input_height, input_width) " "to be used with the `predict_sliding_window` " "method. {} vs {}.").format( input_image_dims, results[0].shape[1:3])) out_im = _np.ones((results[0].shape[0], im.shape[1], im.shape[2]), dtype=results[0].dtype) * -1. curr_y = 0 curr_x = 0 for val in results: # Write back with max. roi = out_im[:, curr_y:curr_y+val.shape[1], curr_x:curr_x+val.shape[2]] if overlap_combine_max: out_im[:, curr_y:curr_y+val.shape[1], curr_x:curr_x+val.shape[2]] =\ _np.maximum(roi, val) else: for c_idx in range(roi.shape[0]): for y_idx in range(roi.shape[1]): for x_idx in range(roi.shape[2]): if roi[c_idx, y_idx, x_idx] == -1: roi[c_idx, y_idx, x_idx] = \ val[c_idx, y_idx, x_idx] else: roi[c_idx, y_idx, x_idx] = \ (val[c_idx, y_idx, x_idx] + roi[c_idx, y_idx, x_idx]) / 2. # Find the position in the original image. if (curr_x + extraction_step[1] + input_image_dims[1] > out_im.shape[2]): curr_y += extraction_step[0] curr_x = 0 else: curr_x += extraction_step[1] output_images.append(out_im) else: if account_for_step or extraction_step == (1, 1): out_im = _np.zeros((results[0].shape[0], im.shape[1], im.shape[2]), dtype=results[0].dtype) # Collect the values. collected = _np.empty(_np.hstack(([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0],) except: # pragma: no cover raise Exception( ("The output shape of the net must be " "(X, 1, 1) or (X, input_height, input_width) " "to be used with the `predict_sliding_window` " "method. It is {}.").format(val.shape)) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] # Resize. for layer_idx in range(results[0].shape[0]): layer_area = out_im[ layer_idx, int(_np.ceil(input_image_dims[0] / 2.))-1: -int(_np.ceil(input_image_dims[0] / 2.))+1, int(_np.ceil(input_image_dims[1] / 2.))-1: -int(_np.ceil(input_image_dims[1] / 2.))+1] layer_area[...] = _cv2resize( collected[layer_idx], (layer_area.shape[1], layer_area.shape[0]), interpolation=interpolation_method) if not pad_border: out_im = out_im[ :, int(_np.ceil(input_image_dims[0] / 2.))-1: -int(_np.ceil(input_image_dims[0] / 2.))+1, int(_np.ceil(input_image_dims[1] / 2.))-1: -int(_np.ceil(input_image_dims[1] / 2.))+1] output_images.append(out_im) else: # Collect the values. collected = _np.empty(_np.hstack(([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0],) except: # pragma: no cover raise Exception( ("The output shape of the net must be (X, 1, 1) to be " # noqa "used with the `predict_sliding_window` method. It is " # noqa "{}.").format(val.shape)) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] output_images.append(collected) _LOGGER.debug("Processed image %d in %03.2fs.", im_id, _time.time() - image_beginpoint) return output_images
def predict_sliding_window(self, input_sequence, pre_batch_callbacks=None, post_batch_callbacks=None, out_blob_names=None, use_fit_network=False, oversample=False, extraction_step=(1, 1), account_for_step=True, interpolation_method=_cv2INTER_NEAREST, pad_border=True): """ Get predictions for all images in a sliding window manner. Similar to the :py:func:`barrista.net.Net.predict` method. See there for the parameter descriptions. For this method, every image must be at least as big as the input size. It is then sampled using sliding window, and for each output layer the reassembled images are returned. The output of the network for one patch must be of shape (num_layers X 1 X 1) and currently only one output layer is supported. :param input_sequence: iterable(3D numpy arrays) The 3D numpy arrays must match in their first dimension with the second dimension of the network input (number of channels). E.g., for a network with input shape [10, 3, 24, 24], you could provide inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of 3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary and different for each image. Multi-input networks are not yet supported by this method. :param pre_batch_callbacks: list(callable) or None. Each element of this list will be called with the batch id as argument before forward propagating a batch. :param post_batch_callbacks: list(callable) or None. See before, but after a callback. :param out_blob_names: list(string) or None. The names of the blobs of which the values are returned. If unspecified, uses ``self.outputs``. Must currently contain exactly one element. :param use_fit_network: bool. If set to ``True``, always use this very network, independent of whether an internal network in stage ``predict`` is available. For more information, see the constructor documentation. :param oversample: bool. If set to ``True``, uses oversampling and averages the results. You have to take care to bring them into the right shape for further processing yourself. :parameter extraction_step: 2-tuple(int). Window step size in y and x direction. :parameter account_for_step: bool. If set to True, the output is resized with nearest neighbor interpolation to get a full-sized image. :parameter interpolation_method: int in {cv2.INTER_...}. The interpolation strategy used, if ``account_for_step`` is set and the ``extraction_step`` is not ``(1, 1)``. :parameter pad_border: bool. Whether to return images in the original image size, by adding zero padded borders. """ if self._predict_variant is not None and not use_fit_network: _LOGGER.info("Using prediction network variant.") prednet = self._predict_variant else: prednet = self if extraction_step != (1, 1) and pad_border is True: assert account_for_step,\ ("If ``extraction_step`` != (1, 1) and " "``padborder`` is set, , ``account_for_step`` must be set, " " too.") input_dims = prednet.blobs[prednet.inputs[0]].data.shape input_image_dims = _np.array(input_dims[2:]) for im in input_sequence: assert im.shape[0] == input_dims[1] assert im.shape[1] >= input_image_dims[0] assert im.shape[2] >= input_image_dims[1] assert (out_blob_names is None or len(out_blob_names) == 1), "Only one output layer is supported!" # noqa output_images = [] for im_id, im in enumerate(input_sequence): _LOGGER.info("Processing image %d...", im_id) image_beginpoint = _time.time() patches = _extract_patches(im, patch_shape=(input_dims[1], input_image_dims[0], input_image_dims[1]), extraction_step=(1, extraction_step[0], extraction_step[1])) sampled_shape = patches.shape[1:3] patches = patches.reshape( _np.hstack((_np.prod(patches.shape[:3]), patches.shape[3:]))) results = prednet.predict( patches, pre_batch_callbacks=pre_batch_callbacks, # noqa post_batch_callbacks=post_batch_callbacks, out_blob_names=out_blob_names, use_fit_network=use_fit_network, oversample=oversample, before_oversample_resize_to=None) if account_for_step or extraction_step == (1, 1): out_im = _np.zeros( (results[0].shape[0], im.shape[1], im.shape[2]), dtype=results[0].dtype) # Collect the values. collected = _np.empty(_np.hstack( ([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0], ) except: raise Exception(( "The output shape of the net must be (X, 1, 1) to be " # noqa "used with the `predict_sliding_window` method. It is " # noqa "{}.").format(val.shape)) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] # Resize. for layer_idx in range(results[0].shape[0]): layer_area = out_im[ layer_idx, int(_np.ceil(input_image_dims[0] / 2.)) - 1:-int(_np.ceil(input_image_dims[0] / 2.)) + 1, int(_np.ceil(input_image_dims[1] / 2.)) - 1:-int(_np.ceil(input_image_dims[1] / 2.)) + 1] layer_area[...] = _cv2resize( collected[layer_idx], (layer_area.shape[1], layer_area.shape[0]), interpolation=interpolation_method) if not pad_border: out_im = out_im[:, int(_np.ceil(input_image_dims[0] / 2.)) - 1:-int(_np.ceil(input_image_dims[0] / 2.)) + 1, int(_np.ceil(input_image_dims[1] / 2.)) - 1:-int(_np.ceil(input_image_dims[1] / 2.)) + 1] output_images.append(out_im) else: # Collect the values. collected = _np.empty(_np.hstack( ([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0], ) except: raise Exception(( "The output shape of the net must be (X, 1, 1) to be " # noqa "used with the `predict_sliding_window` method. It is " # noqa "{}.").format(val.shape)) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] output_images.append(collected) _LOGGER.info("Processed image %d in %ds.", im_id, _time.time() - image_beginpoint) return output_images
def extract_patches(images, patch_shape, stride, in_order="NHWC", out_order="NHWC"): assert images.ndim >= 2 and images.ndim <= 4 if isinstance(images, np.ndarray): from sklearn.feature_extraction.image import _extract_patches if images.ndim == 2: # single gray image images = np.expand_dims(images, 0) if images.ndim == 3: if images.shape[2] == 3: # single color image images = np.expand_dims(images, 0) else: # multiple gray images or single gray image with first index 1 images = np.expand_dims(images, 3) elif in_order == "NCHW": images = images.transpose(0, 2, 3, 1) # numpy expects order NHWC patches = _extract_patches( images, patch_shape=(1, *patch_shape), extraction_step=(1, stride, stride, 1), ).reshape(-1, *patch_shape) # now patches' shape = NHWC if out_order == "NHWC": pass elif out_order == "NCHW": patches = patches.permute(0, 3, 1, 2) else: raise ValueError( 'out_order not understood (expected "NHWC" or "NCHW")') elif isinstance(images, torch.Tensor): if images.ndim == 2: # single gray image images = images.unsqueeze(0) if images.ndim == 3: if images.shape[2] == 3: # single color image images = images.unsqueeze(0) else: # multiple gray image images = images.unsqueeze(3) if in_order == "NHWC": images = images.permute(0, 3, 1, 2) # torch expects order NCHW patches = torch.nn.functional.unfold( images, kernel_size=patch_shape[:2], stride=stride ) # all these operations are done to circumvent pytorch's N,C,W,H ordering patches = patches.permute(0, 2, 1) nb_patches = patches.shape[0] * patches.shape[1] patches = patches.reshape(nb_patches, patch_shape[2], *patch_shape[:2]) # now patches' shape = NCHW if out_order == "NHWC": patches = patches.permute(0, 2, 3, 1) elif out_order == "NCHW": pass else: raise ValueError( 'out_order not understood (expected "NHWC" or "NCHW")') return patches
def predict_sliding_window(self, input_sequence, test_callbacks=None, out_blob_names=None, use_fit_network=False, oversample=False, extraction_step=(1, 1), account_for_step=True, interpolation_method=_cv2INTER_NEAREST, pad_border=True, overlap_combine_max=True): """ Get predictions for all images in a sliding window manner. Similar to the :py:func:`barrista.net.Net.predict` method. See there for the parameter descriptions. For this method, every image must be at least as big as the input size. It is then sampled using sliding window, and for each output layer the reassembled images are returned. The output of the network for one patch must either be of shape (num_layers X 1 X 1) or of shape (num_layers X input_height X input_width) and currently only one output layer is supported. If the output is of the same shape as the input, `account_for_step` does not play a role, and the inputs are combined by using the max of the predictions per position. :param input_sequence: iterable(3D numpy arrays) The 3D numpy arrays must match in their first dimension with the second dimension of the network input (number of channels). E.g., for a network with input shape [10, 3, 24, 24], you could provide inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of 3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary and different for each image. Multi-input networks are not yet supported by this method. :param test_callbacks: list(barrista.monitoring.Monitor) or None. List of callback callables. Will be called pre and post batch processing. This list will be processed sequentially, meaning that monitors in the sequence can provide information for later monitors as done with the ``ResultExtractor``. :param out_blob_names: list(string) or None. The names of the blobs of which the values are returned. If unspecified, uses ``self.outputs``. Must currently contain exactly one element. :param use_fit_network: bool. If set to ``True``, always use this very network, independent of whether an internal network in stage ``predict`` is available. For more information, see the constructor documentation. :param oversample: bool. If set to ``True``, uses oversampling and averages the results. You have to take care to bring them into the right shape for further processing yourself. :param extraction_step: 2-tuple(int). Window step size in y and x direction. :param account_for_step: bool. If set to True, the output is resized with nearest neighbor interpolation to get a full-sized image. :param interpolation_method: int in {cv2.INTER_...}. The interpolation strategy used, if ``account_for_step`` is set and the ``extraction_step`` is not ``(1, 1)``. :param pad_border: bool. Whether to return images in the original image size, or by adding zero padded borders. :param overlap_combine_max: bool. If the network output size is equal to the input size and the stepsize smaller than the output, which operator to use to combine overlapping areas. Default: True. """ if self._predict_variant is not None and not use_fit_network: _LOGGER.debug("Using prediction network variant.") prednet = self._predict_variant else: prednet = self if extraction_step != (1, 1) and pad_border is True: assert account_for_step,\ ("If ``extraction_step`` != (1, 1) and " "``padborder`` is set, , ``account_for_step`` must be set, " " too.") input_dims = prednet.blobs[prednet.inputs[0]].data.shape input_image_dims = _np.array(input_dims[2:]) for im in input_sequence: assert im.shape[0] == input_dims[1] assert im.shape[1] >= input_image_dims[0] assert im.shape[2] >= input_image_dims[1] assert (out_blob_names is None or len(out_blob_names) == 1), "Only one output layer is supported!" # noqa output_images = [] # pylint: disable=too-many-nested-blocks for im_id, im in enumerate(input_sequence): _LOGGER.debug("Processing image %d...", im_id) image_beginpoint = _time.time() patches = _extract_patches(im, patch_shape=(input_dims[1], input_image_dims[0], input_image_dims[1]), extraction_step=(1, extraction_step[0], extraction_step[1])) sampled_shape = patches.shape[1:3] patches = patches.reshape( _np.hstack((_np.prod(patches.shape[:3]), patches.shape[3:]))) results = prednet.predict(patches, test_callbacks=test_callbacks, out_blob_names=out_blob_names, use_fit_network=use_fit_network, oversample=oversample, before_oversample_resize_to=None) if results[0].size > results[0].shape[0]: assert (results[0].ndim == 3 and _np.all(results[0].shape[1:3] == input_image_dims) ), (("The output shape of the net must be " "(X, 1, 1) or (X, input_height, input_width) " "to be used with the `predict_sliding_window` " "method. {} vs {}.").format( input_image_dims, results[0].shape[1:3])) out_im = _np.ones( (results[0].shape[0], im.shape[1], im.shape[2]), dtype=results[0].dtype) * -1. curr_y = 0 curr_x = 0 for val in results: # Write back with max. roi = out_im[:, curr_y:curr_y + val.shape[1], curr_x:curr_x + val.shape[2]] if overlap_combine_max: out_im[:, curr_y:curr_y+val.shape[1], curr_x:curr_x+val.shape[2]] =\ _np.maximum(roi, val) else: for c_idx in range(roi.shape[0]): for y_idx in range(roi.shape[1]): for x_idx in range(roi.shape[2]): if roi[c_idx, y_idx, x_idx] == -1: roi[c_idx, y_idx, x_idx] = \ val[c_idx, y_idx, x_idx] else: roi[c_idx, y_idx, x_idx] = \ (val[c_idx, y_idx, x_idx] + roi[c_idx, y_idx, x_idx]) / 2. # Find the position in the original image. if (curr_x + extraction_step[1] + input_image_dims[1] > out_im.shape[2]): curr_y += extraction_step[0] curr_x = 0 else: curr_x += extraction_step[1] output_images.append(out_im) else: if account_for_step or extraction_step == (1, 1): out_im = _np.zeros( (results[0].shape[0], im.shape[1], im.shape[2]), dtype=results[0].dtype) # Collect the values. collected = _np.empty(_np.hstack( ([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0], ) except: # pragma: no cover raise Exception(( "The output shape of the net must be " "(X, 1, 1) or (X, input_height, input_width) " "to be used with the `predict_sliding_window` " "method. It is {}.").format(val.shape)) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] # Resize. for layer_idx in range(results[0].shape[0]): layer_area = out_im[ layer_idx, int(_np.ceil(input_image_dims[0] / 2.)) - 1:-int(_np.ceil(input_image_dims[0] / 2.)) + 1, int(_np.ceil(input_image_dims[1] / 2.)) - 1:-int(_np.ceil(input_image_dims[1] / 2.)) + 1] layer_area[...] = _cv2resize( collected[layer_idx], (layer_area.shape[1], layer_area.shape[0]), interpolation=interpolation_method) if not pad_border: out_im = out_im[:, int(_np.ceil(input_image_dims[0] / 2.)) - 1: -int(_np.ceil(input_image_dims[0] / 2.)) + 1, int(_np.ceil(input_image_dims[1] / 2.)) - 1: -int(_np.ceil(input_image_dims[1] / 2.)) + 1] output_images.append(out_im) else: # Collect the values. collected = _np.empty(_np.hstack( ([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype) for val_idx, val in enumerate(results): try: val.shape = (results[0].shape[0], ) except: # pragma: no cover raise Exception(( "The output shape of the net must be (X, 1, 1) to be " # noqa "used with the `predict_sliding_window` method. It is " # noqa "{}.").format(val.shape)) collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:] output_images.append(collected) _LOGGER.debug("Processed image %d in %03.2fs.", im_id, _time.time() - image_beginpoint) return output_images