Ejemplo n.º 1
0
def block_TV(preliminary_result, block_size, block_stride=None):
    assert block_size > 1
    if block_stride:
        assert block_stride <= block_size
    else:
        block_stride = block_size
    patch_post = _extract_patches(preliminary_result,
                                  patch_shape=block_size,
                                  extraction_step=block_stride)

    def patch_TV(patches):
        row, col, height, width = patches.shape
        tv = np.zeros([row, col])
        for r in range(row):
            for c in range(col):
                tmp = np.sqrt(
                    (patches[r, c, 1:, :-1] - patches[r, c, :-1, :-1])**2 +
                    (patches[r, c, :-1, 1:] - patches[r, c, :-1, :-1])**2)
                tv[r, c] += tmp.mean()
        tv /= height * width
        return tv

    patch_post = patch_TV(patch_post)
    img_post = resize(patch_post,
                      preliminary_result.shape,
                      order=0,
                      mode='constant',
                      anti_aliasing=False)
    return img_post
Ejemplo n.º 2
0
def test_extract_patches_square():
    # test same patch size for all dimensions
    face = downsampled_face
    i_h, i_w = face.shape
    p = 8
    expected_n_patches = ((i_h - p + 1), (i_w - p + 1))
    patches = _extract_patches(face, patch_shape=p)
    assert patches.shape == (expected_n_patches[0], expected_n_patches[1], p, p)
Ejemplo n.º 3
0
def test_extract_patches_strided():

    image_shapes_1D = [(10,), (10,), (11,), (10,)]
    patch_sizes_1D = [(1,), (2,), (3,), (8,)]
    patch_steps_1D = [(1,), (1,), (4,), (2,)]

    expected_views_1D = [(10,), (9,), (3,), (2,)]
    last_patch_1D = [(10,), (8,), (8,), (2,)]

    image_shapes_2D = [(10, 20), (10, 20), (10, 20), (11, 20)]
    patch_sizes_2D = [(2, 2), (10, 10), (10, 11), (6, 6)]
    patch_steps_2D = [(5, 5), (3, 10), (3, 4), (4, 2)]

    expected_views_2D = [(2, 4), (1, 2), (1, 3), (2, 8)]
    last_patch_2D = [(5, 15), (0, 10), (0, 8), (4, 14)]

    image_shapes_3D = [(5, 4, 3), (3, 3, 3), (7, 8, 9), (7, 8, 9)]
    patch_sizes_3D = [(2, 2, 3), (2, 2, 2), (1, 7, 3), (1, 3, 3)]
    patch_steps_3D = [(1, 2, 10), (1, 1, 1), (2, 1, 3), (3, 3, 4)]

    expected_views_3D = [(4, 2, 1), (2, 2, 2), (4, 2, 3), (3, 2, 2)]
    last_patch_3D = [(3, 2, 0), (1, 1, 1), (6, 1, 6), (6, 3, 4)]

    image_shapes = image_shapes_1D + image_shapes_2D + image_shapes_3D
    patch_sizes = patch_sizes_1D + patch_sizes_2D + patch_sizes_3D
    patch_steps = patch_steps_1D + patch_steps_2D + patch_steps_3D
    expected_views = expected_views_1D + expected_views_2D + expected_views_3D
    last_patches = last_patch_1D + last_patch_2D + last_patch_3D

    for (image_shape, patch_size, patch_step, expected_view, last_patch) in zip(
        image_shapes, patch_sizes, patch_steps, expected_views, last_patches
    ):
        image = np.arange(np.prod(image_shape)).reshape(image_shape)
        patches = _extract_patches(
            image, patch_shape=patch_size, extraction_step=patch_step
        )

        ndim = len(image_shape)

        assert patches.shape[:ndim] == expected_view
        last_patch_slices = tuple(
            slice(i, i + j, None) for i, j in zip(last_patch, patch_size)
        )
        assert (
            patches[(-1, None, None) * ndim] == image[last_patch_slices].squeeze()
        ).all()
Ejemplo n.º 4
0
def block_mean(preliminary_result, block_size, block_stride=None):
    preliminary_result = preliminary_result.squeeze()
    if block_stride:
        assert block_stride <= block_size
    else:
        block_stride = block_size
    if block_size > 1:
        patch_post = _extract_patches(preliminary_result,
                                      patch_shape=block_size,
                                      extraction_step=block_stride)
        patch_post = patch_post.mean(axis=(2, 3))
        img_post = resize(patch_post,
                          preliminary_result.shape,
                          order=0,
                          mode='constant',
                          anti_aliasing=False)
    else:
        img_post = preliminary_result
    return img_post
Ejemplo n.º 5
0
    def predict_sliding_window(
        self,
        input_sequence,
        pre_batch_callbacks=None,
        post_batch_callbacks=None,
        out_blob_names=None,
        use_fit_network=False,
        oversample=False,
        extraction_step=(1, 1),
        account_for_step=True,
        interpolation_method=_cv2INTER_NEAREST,
        pad_border=True,
    ):
        """
        Get predictions for all images in a sliding window manner.

        Similar to the :py:func:`barrista.net.Net.predict` method. See there
        for the parameter descriptions. For this method, every image must be
        at least as big as the input size. It is then sampled using sliding
        window, and for each output layer the reassembled images are returned.

        The output of the network for one patch must be of shape
        (num_layers X 1 X 1) and currently only one output layer is supported.

        :param input_sequence: iterable(3D numpy arrays)
          The 3D numpy arrays must match in their first dimension with the
          second dimension of the network input (number of channels). E.g.,
          for a network with input shape [10, 3, 24, 24], you could provide
          inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of
          3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary
          and different for each image. Multi-input networks are not yet
          supported by this method.

        :param pre_batch_callbacks: list(callable) or None.
          Each element of this list will be called with the batch id as
          argument before forward propagating a batch.

        :param post_batch_callbacks: list(callable) or None.
          See before, but after a callback.

        :param out_blob_names: list(string) or None.
          The names of the blobs of which the values are returned. If
          unspecified, uses ``self.outputs``. Must currently contain exactly
          one element.

        :param use_fit_network: bool.
          If set to ``True``, always use this very network, independent of
          whether an internal network in stage ``predict`` is available.
          For more information, see the constructor documentation.

        :param oversample: bool.
          If set to ``True``, uses oversampling and averages the results. You
          have to take care to bring them into the right shape for further
          processing yourself.

        :parameter extraction_step: 2-tuple(int).
          Window step size in y and x direction.

        :parameter account_for_step: bool.
          If set to True, the output is resized with nearest neighbor
          interpolation to get a full-sized image.

        :parameter interpolation_method: int in {cv2.INTER_...}.
          The interpolation strategy used, if ``account_for_step`` is set and
          the ``extraction_step`` is not ``(1, 1)``.

        :parameter pad_border: bool.
          Whether to return images in the original image size, by adding zero
          padded borders.
        """
        if self._predict_variant is not None and not use_fit_network:
            _LOGGER.info("Using prediction network variant.")
            prednet = self._predict_variant
        else:
            prednet = self
        if extraction_step != (1, 1) and pad_border is True:
            assert account_for_step, (
                "If ``extraction_step`` != (1, 1) and "
                "``padborder`` is set, , ``account_for_step`` must be set, "
                " too."
            )
        input_dims = prednet.blobs[prednet.inputs[0]].data.shape
        input_image_dims = _np.array(input_dims[2:])
        for im in input_sequence:
            assert im.shape[0] == input_dims[1]
            assert im.shape[1] >= input_image_dims[0]
            assert im.shape[2] >= input_image_dims[1]
        assert out_blob_names is None or len(out_blob_names) == 1, "Only one output layer is supported!"  # noqa
        output_images = []
        for im_id, im in enumerate(input_sequence):
            _LOGGER.info("Processing image %d...", im_id)
            image_beginpoint = _time.time()
            patches = _extract_patches(
                im,
                patch_shape=(input_dims[1], input_image_dims[0], input_image_dims[1]),
                extraction_step=(1, extraction_step[0], extraction_step[1]),
            )
            sampled_shape = patches.shape[1:3]
            patches = patches.reshape(_np.hstack((_np.prod(patches.shape[:3]), patches.shape[3:])))
            results = self.predict(
                patches,
                pre_batch_callbacks=pre_batch_callbacks,  # noqa
                post_batch_callbacks=post_batch_callbacks,
                out_blob_names=out_blob_names,
                use_fit_network=use_fit_network,
                oversample=oversample,
                before_oversample_resize_to=None,
            )
            if account_for_step or extraction_step == (1, 1):
                out_im = _np.zeros((results[0].shape[0], im.shape[1], im.shape[2]), dtype=results[0].dtype)
                # Collect the values.
                collected = _np.empty(_np.hstack(([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype)
                for val_idx, val in enumerate(results):
                    try:
                        val.shape = (results[0].shape[0],)
                    except:
                        raise Exception(
                            (
                                "The output shape of the net must be (X, 1, 1) to be "  # noqa
                                "used with the `predict_sliding_window` method. It is "  # noqa
                                "{}."
                            ).format(val.shape)
                        )
                    collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:]
                # Resize.
                for layer_idx in range(results[0].shape[0]):
                    layer_area = out_im[
                        layer_idx,
                        int(_np.ceil(input_image_dims[0] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[0] / 2.0)) + 1,
                        int(_np.ceil(input_image_dims[1] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[1] / 2.0)) + 1,
                    ]
                    layer_area[...] = _cv2resize(
                        collected[layer_idx],
                        (layer_area.shape[1], layer_area.shape[0]),
                        interpolation=interpolation_method,
                    )
                if not pad_border:
                    out_im = out_im[
                        :,
                        int(_np.ceil(input_image_dims[0] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[0] / 2.0)) + 1,
                        int(_np.ceil(input_image_dims[1] / 2.0)) - 1 : -int(_np.ceil(input_image_dims[1] / 2.0)) + 1,
                    ]
                output_images.append(out_im)
            else:
                # Collect the values.
                collected = _np.empty(_np.hstack(([results[0].shape[0]], sampled_shape)), dtype=results[0].dtype)
                for val_idx, val in enumerate(results):
                    try:
                        val.shape = (results[0].shape[0],)
                    except:
                        raise Exception(
                            (
                                "The output shape of the net must be (X, 1, 1) to be "  # noqa
                                "used with the `predict_sliding_window` method. It is "  # noqa
                                "{}."
                            ).format(val.shape)
                        )
                    collected[:, val_idx // sampled_shape[1], val_idx % sampled_shape[1]] = val[:]
                output_images.append(collected)
            _LOGGER.info("Processed image %d in %ds.", im_id, _time.time() - image_beginpoint)
        return output_images
Ejemplo n.º 6
0
from sklearn.feature_extraction.image import _extract_patches
import numpy as np
import cv2
from datasets import download_and_prepare


def read_file(file, grayscale=False):
    """
    Loads and normalizes image.

    Parameters
        file (string): image file path
        grayscale (bool): True converts the image to grayscale

    Returns:
        (ndarray): image
    """
    image = cv2.imread(file)
    if grayscale:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = np.float32(image) / 255
    return image


files = download_and_prepare("utk-faces", "data")
Img = read_file(files[0])
print(Img.shape)
sample = _extract_patches(Img, patch_shape=(64, 64, 3), extraction_step=1)
print(sample.shape)
Ejemplo n.º 7
0
def extract_patches_2d(img,
                       patch_size,
                       extraction_step=1,
                       max_patches=None,
                       random_state=None):
    """Reshape a 2D image into a collection of patches
    The resulting patches are allocated in a dedicated array.

    Parameters
    ----------
    img : array, shape = (image_height, image_width) or
        (image_height, image_width, n_channels)
        The original image data. For color images, the last dimension specifies
        the channel: a RGB image would have `n_channels=3`.

    patch_size : tuple of ints (patch_height, patch_width)
        the dimensions of one patch

    extraction_step : integer or tuple of length arr.ndim
            Indicates step size at which extraction shall be performed.
            If integer is given, then the step is uniform in all dimensions.

    max_patches : integer or float, optional default is None
        The maximum number of patches to extract. If max_patches is a float
        between 0 and 1, it is taken to be a proportion of the total number
        of patches.

    random_state : int, RandomState instance or None, optional (default=None)
        Determines the random number generator used for random sampling when
        `max_patches` is not None. Use an int to make the randomness
        deterministic.
        See :term:`Glossary <random_state>`.

    Returns
    -------
    patches : array, shape = (n_patches, patch_height, patch_width) or
        (n_patches, patch_height, patch_width, n_channels)
        The collection of patches extracted from the image, where `n_patches`
        is either `max_patches` or the total number of patches that can be
        extracted.
    """
    i_h, i_w = img.shape[:2]
    p_h, p_w = patch_size

    if p_h > i_h:
        raise ValueError("Height of the patch should be less than the height"
                         " of the image.")

    if p_w > i_w:
        raise ValueError("Width of the patch should be less than the width"
                         " of the image.")

    if isinstance(extraction_step, numbers.Number):
        e_h, e_w = extraction_step, extraction_step
    else:
        e_h, e_w = extraction_step

    img = check_array(img, allow_nd=True)
    img = img.reshape((i_h, i_w, -1))
    n_colors = img.shape[-1]

    extracted_patches = _extract_patches(img,
                                         patch_shape=(p_h, p_w, n_colors),
                                         extraction_step=(e_h, e_w, n_colors))

    n_patches = _compute_n_patches(i_h, i_w, p_h, p_w, e_h, e_w, max_patches)
    if max_patches:
        rng = check_random_state(random_state)
        i_s = rng.randint(i_h - p_h + 1, size=n_patches)
        j_s = rng.randint(i_w - p_w + 1, size=n_patches)
        patches = extracted_patches[i_s, j_s, 0]
    else:
        patches = extracted_patches

    patches = patches.reshape(-1, p_h, p_w, n_colors)
    # remove the color dimension if useless
    if patches.shape[-1] == 1:
        return patches.reshape((n_patches, p_h, p_w))
    else:
        return patches
Ejemplo n.º 8
0
    def predict_sliding_window(self,
                               input_sequence,
                               test_callbacks=None,
                               out_blob_names=None,
                               use_fit_network=False,
                               oversample=False,
                               extraction_step=(1, 1),
                               account_for_step=True,
                               interpolation_method=_cv2INTER_NEAREST,
                               pad_border=True,
                               overlap_combine_max=True):
        """
        Get predictions for all images in a sliding window manner.

        Similar to the :py:func:`barrista.net.Net.predict` method. See there
        for the parameter descriptions. For this method, every image must be
        at least as big as the input size. It is then sampled using sliding
        window, and for each output layer the reassembled images are returned.

        The output of the network for one patch must either be of shape
        (num_layers X 1 X 1) or of shape (num_layers X input_height X input_width)
        and currently only one output layer is supported. If the output is of the
        same shape as the input, `account_for_step` does not play a role, and the
        inputs are combined by using the max of the predictions per position.

        :param input_sequence: iterable(3D numpy arrays)
          The 3D numpy arrays must match in their first dimension with the
          second dimension of the network input (number of channels). E.g.,
          for a network with input shape [10, 3, 24, 24], you could provide
          inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of
          3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary
          and different for each image. Multi-input networks are not yet
          supported by this method.

        :param test_callbacks: list(barrista.monitoring.Monitor) or None.
          List of callback callables. Will be called pre and post batch
          processing. This list will be processed sequentially, meaning that
          monitors in the sequence can provide information for later monitors
          as done with the ``ResultExtractor``.

        :param out_blob_names: list(string) or None.
          The names of the blobs of which the values are returned. If
          unspecified, uses ``self.outputs``. Must currently contain exactly
          one element.

        :param use_fit_network: bool.
          If set to ``True``, always use this very network, independent of
          whether an internal network in stage ``predict`` is available.
          For more information, see the constructor documentation.

        :param oversample: bool.
          If set to ``True``, uses oversampling and averages the results. You
          have to take care to bring them into the right shape for further
          processing yourself.

        :param extraction_step: 2-tuple(int).
          Window step size in y and x direction.

        :param account_for_step: bool.
          If set to True, the output is resized with nearest neighbor
          interpolation to get a full-sized image.

        :param interpolation_method: int in {cv2.INTER_...}.
          The interpolation strategy used, if ``account_for_step`` is set and
          the ``extraction_step`` is not ``(1, 1)``.

        :param pad_border: bool.
          Whether to return images in the original image size, or by adding
          zero padded borders.

        :param overlap_combine_max: bool.
          If the network output size is equal to the input size and the
          stepsize smaller than the output, which operator to use to combine
          overlapping areas. Default: True.
        """
        if self._predict_variant is not None and not use_fit_network:
            _LOGGER.debug("Using prediction network variant.")
            prednet = self._predict_variant
        else:
            prednet = self
        if extraction_step != (1, 1) and pad_border is True:
            assert account_for_step,\
                ("If ``extraction_step`` != (1, 1) and "
                 "``padborder`` is set, , ``account_for_step`` must be set, "
                 " too.")
        input_dims = prednet.blobs[prednet.inputs[0]].data.shape
        input_image_dims = _np.array(input_dims[2:])
        for im in input_sequence:
            assert im.shape[0] == input_dims[1]
            assert im.shape[1] >= input_image_dims[0]
            assert im.shape[2] >= input_image_dims[1]
        assert (out_blob_names is None or
                len(out_blob_names) == 1), "Only one output layer is supported!"  # noqa
        output_images = []
        # pylint: disable=too-many-nested-blocks
        for im_id, im in enumerate(input_sequence):
            _LOGGER.debug("Processing image %d...", im_id)
            image_beginpoint = _time.time()
            patches = _extract_patches(im,
                                       patch_shape=(input_dims[1],
                                                    input_image_dims[0],
                                                    input_image_dims[1]),
                                       extraction_step=(1,
                                                        extraction_step[0],
                                                        extraction_step[1]))
            sampled_shape = patches.shape[1:3]
            patches = patches.reshape(_np.hstack((_np.prod(patches.shape[:3]),
                                                  patches.shape[3:])))
            results = prednet.predict(patches,
                                      test_callbacks=test_callbacks,
                                      out_blob_names=out_blob_names,
                                      use_fit_network=use_fit_network,
                                      oversample=oversample,
                                      before_oversample_resize_to=None)
            if results[0].size > results[0].shape[0]:
                assert (results[0].ndim == 3 and
                        _np.all(results[0].shape[1:3] == input_image_dims)), (
                            ("The output shape of the net must be "
                             "(X, 1, 1) or (X, input_height, input_width) "
                             "to be used with the `predict_sliding_window` "
                             "method. {} vs {}.").format(
                                 input_image_dims,
                                 results[0].shape[1:3]))
                out_im = _np.ones((results[0].shape[0],
                                   im.shape[1],
                                   im.shape[2]),
                                  dtype=results[0].dtype) * -1.
                curr_y = 0
                curr_x = 0
                for val in results:
                    # Write back with max.
                    roi = out_im[:,
                                 curr_y:curr_y+val.shape[1],
                                 curr_x:curr_x+val.shape[2]]
                    if overlap_combine_max:
                        out_im[:,
                               curr_y:curr_y+val.shape[1],
                               curr_x:curr_x+val.shape[2]] =\
                                    _np.maximum(roi, val)
                    else:
                        for c_idx in range(roi.shape[0]):
                            for y_idx in range(roi.shape[1]):
                                for x_idx in range(roi.shape[2]):
                                    if roi[c_idx, y_idx, x_idx] == -1:
                                        roi[c_idx, y_idx, x_idx] = \
                                            val[c_idx, y_idx, x_idx]
                                    else:
                                        roi[c_idx, y_idx, x_idx] = \
                                            (val[c_idx, y_idx, x_idx] +
                                             roi[c_idx, y_idx, x_idx]) / 2.
                    # Find the position in the original image.
                    if (curr_x + extraction_step[1] + input_image_dims[1]
                            > out_im.shape[2]):
                        curr_y += extraction_step[0]
                        curr_x = 0
                    else:
                        curr_x += extraction_step[1]
                output_images.append(out_im)
            else:
                if account_for_step or extraction_step == (1, 1):
                    out_im = _np.zeros((results[0].shape[0],
                                        im.shape[1],
                                        im.shape[2]),
                                       dtype=results[0].dtype)
                    # Collect the values.
                    collected = _np.empty(_np.hstack(([results[0].shape[0]],
                                                      sampled_shape)),
                                          dtype=results[0].dtype)
                    for val_idx, val in enumerate(results):
                        try:
                            val.shape = (results[0].shape[0],)
                        except:  # pragma: no cover
                            raise Exception(
                                ("The output shape of the net must be "
                                 "(X, 1, 1) or (X, input_height, input_width) "
                                 "to be used with the `predict_sliding_window` "
                                 "method. It is {}.").format(val.shape))
                        collected[:,
                                  val_idx // sampled_shape[1],
                                  val_idx % sampled_shape[1]] = val[:]
                    # Resize.
                    for layer_idx in range(results[0].shape[0]):
                        layer_area = out_im[
                            layer_idx,
                            int(_np.ceil(input_image_dims[0] / 2.))-1:
                            -int(_np.ceil(input_image_dims[0] / 2.))+1,
                            int(_np.ceil(input_image_dims[1] / 2.))-1:
                            -int(_np.ceil(input_image_dims[1] / 2.))+1]
                        layer_area[...] = _cv2resize(
                            collected[layer_idx],
                            (layer_area.shape[1],
                             layer_area.shape[0]),
                            interpolation=interpolation_method)
                    if not pad_border:
                        out_im = out_im[
                            :,
                            int(_np.ceil(input_image_dims[0] / 2.))-1:
                            -int(_np.ceil(input_image_dims[0] / 2.))+1,
                            int(_np.ceil(input_image_dims[1] / 2.))-1:
                            -int(_np.ceil(input_image_dims[1] / 2.))+1]
                    output_images.append(out_im)
                else:
                    # Collect the values.
                    collected = _np.empty(_np.hstack(([results[0].shape[0]],
                                                      sampled_shape)),
                                          dtype=results[0].dtype)
                    for val_idx, val in enumerate(results):
                        try:
                            val.shape = (results[0].shape[0],)
                        except:  # pragma: no cover
                            raise Exception(
                                ("The output shape of the net must be (X, 1, 1) to be "  # noqa
                                 "used with the `predict_sliding_window` method. It is "  # noqa
                                 "{}.").format(val.shape))
                        collected[:,
                                  val_idx // sampled_shape[1],
                                  val_idx % sampled_shape[1]] = val[:]
                    output_images.append(collected)
            _LOGGER.debug("Processed image %d in %03.2fs.",
                          im_id,
                          _time.time() - image_beginpoint)
        return output_images
Ejemplo n.º 9
0
    def predict_sliding_window(self,
                               input_sequence,
                               pre_batch_callbacks=None,
                               post_batch_callbacks=None,
                               out_blob_names=None,
                               use_fit_network=False,
                               oversample=False,
                               extraction_step=(1, 1),
                               account_for_step=True,
                               interpolation_method=_cv2INTER_NEAREST,
                               pad_border=True):
        """
        Get predictions for all images in a sliding window manner.

        Similar to the :py:func:`barrista.net.Net.predict` method. See there
        for the parameter descriptions. For this method, every image must be
        at least as big as the input size. It is then sampled using sliding
        window, and for each output layer the reassembled images are returned.

        The output of the network for one patch must be of shape
        (num_layers X 1 X 1) and currently only one output layer is supported.

        :param input_sequence: iterable(3D numpy arrays)
          The 3D numpy arrays must match in their first dimension with the
          second dimension of the network input (number of channels). E.g.,
          for a network with input shape [10, 3, 24, 24], you could provide
          inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of
          3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary
          and different for each image. Multi-input networks are not yet
          supported by this method.

        :param pre_batch_callbacks: list(callable) or None.
          Each element of this list will be called with the batch id as
          argument before forward propagating a batch.

        :param post_batch_callbacks: list(callable) or None.
          See before, but after a callback.

        :param out_blob_names: list(string) or None.
          The names of the blobs of which the values are returned. If
          unspecified, uses ``self.outputs``. Must currently contain exactly
          one element.

        :param use_fit_network: bool.
          If set to ``True``, always use this very network, independent of
          whether an internal network in stage ``predict`` is available.
          For more information, see the constructor documentation.

        :param oversample: bool.
          If set to ``True``, uses oversampling and averages the results. You
          have to take care to bring them into the right shape for further
          processing yourself.

        :parameter extraction_step: 2-tuple(int).
          Window step size in y and x direction.

        :parameter account_for_step: bool.
          If set to True, the output is resized with nearest neighbor
          interpolation to get a full-sized image.

        :parameter interpolation_method: int in {cv2.INTER_...}.
          The interpolation strategy used, if ``account_for_step`` is set and
          the ``extraction_step`` is not ``(1, 1)``.

        :parameter pad_border: bool.
          Whether to return images in the original image size, by adding zero
          padded borders.
        """
        if self._predict_variant is not None and not use_fit_network:
            _LOGGER.info("Using prediction network variant.")
            prednet = self._predict_variant
        else:
            prednet = self
        if extraction_step != (1, 1) and pad_border is True:
            assert account_for_step,\
                ("If ``extraction_step`` != (1, 1) and "
                 "``padborder`` is set, , ``account_for_step`` must be set, "
                 " too.")
        input_dims = prednet.blobs[prednet.inputs[0]].data.shape
        input_image_dims = _np.array(input_dims[2:])
        for im in input_sequence:
            assert im.shape[0] == input_dims[1]
            assert im.shape[1] >= input_image_dims[0]
            assert im.shape[2] >= input_image_dims[1]
        assert (out_blob_names is None or len(out_blob_names)
                == 1), "Only one output layer is supported!"  # noqa
        output_images = []
        for im_id, im in enumerate(input_sequence):
            _LOGGER.info("Processing image %d...", im_id)
            image_beginpoint = _time.time()
            patches = _extract_patches(im,
                                       patch_shape=(input_dims[1],
                                                    input_image_dims[0],
                                                    input_image_dims[1]),
                                       extraction_step=(1, extraction_step[0],
                                                        extraction_step[1]))
            sampled_shape = patches.shape[1:3]
            patches = patches.reshape(
                _np.hstack((_np.prod(patches.shape[:3]), patches.shape[3:])))
            results = prednet.predict(
                patches,
                pre_batch_callbacks=pre_batch_callbacks,  # noqa
                post_batch_callbacks=post_batch_callbacks,
                out_blob_names=out_blob_names,
                use_fit_network=use_fit_network,
                oversample=oversample,
                before_oversample_resize_to=None)
            if account_for_step or extraction_step == (1, 1):
                out_im = _np.zeros(
                    (results[0].shape[0], im.shape[1], im.shape[2]),
                    dtype=results[0].dtype)
                # Collect the values.
                collected = _np.empty(_np.hstack(
                    ([results[0].shape[0]], sampled_shape)),
                                      dtype=results[0].dtype)
                for val_idx, val in enumerate(results):
                    try:
                        val.shape = (results[0].shape[0], )
                    except:
                        raise Exception((
                            "The output shape of the net must be (X, 1, 1) to be "  # noqa
                            "used with the `predict_sliding_window` method. It is "  # noqa
                            "{}.").format(val.shape))
                    collected[:, val_idx // sampled_shape[1],
                              val_idx % sampled_shape[1]] = val[:]
                # Resize.
                for layer_idx in range(results[0].shape[0]):
                    layer_area = out_im[
                        layer_idx,
                        int(_np.ceil(input_image_dims[0] / 2.)) -
                        1:-int(_np.ceil(input_image_dims[0] / 2.)) + 1,
                        int(_np.ceil(input_image_dims[1] / 2.)) -
                        1:-int(_np.ceil(input_image_dims[1] / 2.)) + 1]
                    layer_area[...] = _cv2resize(
                        collected[layer_idx],
                        (layer_area.shape[1], layer_area.shape[0]),
                        interpolation=interpolation_method)
                if not pad_border:
                    out_im = out_im[:,
                                    int(_np.ceil(input_image_dims[0] / 2.)) -
                                    1:-int(_np.ceil(input_image_dims[0] /
                                                    2.)) + 1,
                                    int(_np.ceil(input_image_dims[1] / 2.)) -
                                    1:-int(_np.ceil(input_image_dims[1] /
                                                    2.)) + 1]
                output_images.append(out_im)
            else:
                # Collect the values.
                collected = _np.empty(_np.hstack(
                    ([results[0].shape[0]], sampled_shape)),
                                      dtype=results[0].dtype)
                for val_idx, val in enumerate(results):
                    try:
                        val.shape = (results[0].shape[0], )
                    except:
                        raise Exception((
                            "The output shape of the net must be (X, 1, 1) to be "  # noqa
                            "used with the `predict_sliding_window` method. It is "  # noqa
                            "{}.").format(val.shape))
                    collected[:, val_idx // sampled_shape[1],
                              val_idx % sampled_shape[1]] = val[:]
                output_images.append(collected)
            _LOGGER.info("Processed image %d in %ds.", im_id,
                         _time.time() - image_beginpoint)
        return output_images
Ejemplo n.º 10
0
def extract_patches(images, patch_shape, stride, in_order="NHWC", out_order="NHWC"):
    assert images.ndim >= 2 and images.ndim <= 4
    if isinstance(images, np.ndarray):
        from sklearn.feature_extraction.image import _extract_patches

        if images.ndim == 2:  # single gray image
            images = np.expand_dims(images, 0)

        if images.ndim == 3:
            if images.shape[2] == 3:  # single color image
                images = np.expand_dims(images, 0)
            else:  # multiple gray images or single gray image with first index 1
                images = np.expand_dims(images, 3)

        elif in_order == "NCHW":
            images = images.transpose(0, 2, 3, 1)
        # numpy expects order NHWC
        patches = _extract_patches(
            images,
            patch_shape=(1, *patch_shape),
            extraction_step=(1, stride, stride, 1),
        ).reshape(-1, *patch_shape)
        # now patches' shape = NHWC

        if out_order == "NHWC":
            pass
        elif out_order == "NCHW":
            patches = patches.permute(0, 3, 1, 2)
        else:
            raise ValueError(
                'out_order not understood (expected "NHWC" or "NCHW")')

    elif isinstance(images, torch.Tensor):
        if images.ndim == 2:  # single gray image
            images = images.unsqueeze(0)

        if images.ndim == 3:
            if images.shape[2] == 3:  # single color image
                images = images.unsqueeze(0)
            else:  # multiple gray image
                images = images.unsqueeze(3)

        if in_order == "NHWC":
            images = images.permute(0, 3, 1, 2)
        # torch expects order NCHW

        patches = torch.nn.functional.unfold(
            images, kernel_size=patch_shape[:2], stride=stride
        )

        # all these operations are done to circumvent pytorch's N,C,W,H ordering

        patches = patches.permute(0, 2, 1)
        nb_patches = patches.shape[0] * patches.shape[1]
        patches = patches.reshape(nb_patches, patch_shape[2], *patch_shape[:2])
        # now patches' shape = NCHW
        if out_order == "NHWC":
            patches = patches.permute(0, 2, 3, 1)
        elif out_order == "NCHW":
            pass
        else:
            raise ValueError(
                'out_order not understood (expected "NHWC" or "NCHW")')

    return patches
Ejemplo n.º 11
0
    def predict_sliding_window(self,
                               input_sequence,
                               test_callbacks=None,
                               out_blob_names=None,
                               use_fit_network=False,
                               oversample=False,
                               extraction_step=(1, 1),
                               account_for_step=True,
                               interpolation_method=_cv2INTER_NEAREST,
                               pad_border=True,
                               overlap_combine_max=True):
        """
        Get predictions for all images in a sliding window manner.

        Similar to the :py:func:`barrista.net.Net.predict` method. See there
        for the parameter descriptions. For this method, every image must be
        at least as big as the input size. It is then sampled using sliding
        window, and for each output layer the reassembled images are returned.

        The output of the network for one patch must either be of shape
        (num_layers X 1 X 1) or of shape (num_layers X input_height X input_width)
        and currently only one output layer is supported. If the output is of the
        same shape as the input, `account_for_step` does not play a role, and the
        inputs are combined by using the max of the predictions per position.

        :param input_sequence: iterable(3D numpy arrays)
          The 3D numpy arrays must match in their first dimension with the
          second dimension of the network input (number of channels). E.g.,
          for a network with input shape [10, 3, 24, 24], you could provide
          inputs as 4D numpy array of shape [100, 3, 10, 10] or a list of
          3D numpy arrays of shape [3, Y, X], where X and Y may be arbitrary
          and different for each image. Multi-input networks are not yet
          supported by this method.

        :param test_callbacks: list(barrista.monitoring.Monitor) or None.
          List of callback callables. Will be called pre and post batch
          processing. This list will be processed sequentially, meaning that
          monitors in the sequence can provide information for later monitors
          as done with the ``ResultExtractor``.

        :param out_blob_names: list(string) or None.
          The names of the blobs of which the values are returned. If
          unspecified, uses ``self.outputs``. Must currently contain exactly
          one element.

        :param use_fit_network: bool.
          If set to ``True``, always use this very network, independent of
          whether an internal network in stage ``predict`` is available.
          For more information, see the constructor documentation.

        :param oversample: bool.
          If set to ``True``, uses oversampling and averages the results. You
          have to take care to bring them into the right shape for further
          processing yourself.

        :param extraction_step: 2-tuple(int).
          Window step size in y and x direction.

        :param account_for_step: bool.
          If set to True, the output is resized with nearest neighbor
          interpolation to get a full-sized image.

        :param interpolation_method: int in {cv2.INTER_...}.
          The interpolation strategy used, if ``account_for_step`` is set and
          the ``extraction_step`` is not ``(1, 1)``.

        :param pad_border: bool.
          Whether to return images in the original image size, or by adding
          zero padded borders.

        :param overlap_combine_max: bool.
          If the network output size is equal to the input size and the
          stepsize smaller than the output, which operator to use to combine
          overlapping areas. Default: True.
        """
        if self._predict_variant is not None and not use_fit_network:
            _LOGGER.debug("Using prediction network variant.")
            prednet = self._predict_variant
        else:
            prednet = self
        if extraction_step != (1, 1) and pad_border is True:
            assert account_for_step,\
                ("If ``extraction_step`` != (1, 1) and "
                 "``padborder`` is set, , ``account_for_step`` must be set, "
                 " too.")
        input_dims = prednet.blobs[prednet.inputs[0]].data.shape
        input_image_dims = _np.array(input_dims[2:])
        for im in input_sequence:
            assert im.shape[0] == input_dims[1]
            assert im.shape[1] >= input_image_dims[0]
            assert im.shape[2] >= input_image_dims[1]
        assert (out_blob_names is None or len(out_blob_names)
                == 1), "Only one output layer is supported!"  # noqa
        output_images = []
        # pylint: disable=too-many-nested-blocks
        for im_id, im in enumerate(input_sequence):
            _LOGGER.debug("Processing image %d...", im_id)
            image_beginpoint = _time.time()
            patches = _extract_patches(im,
                                       patch_shape=(input_dims[1],
                                                    input_image_dims[0],
                                                    input_image_dims[1]),
                                       extraction_step=(1, extraction_step[0],
                                                        extraction_step[1]))
            sampled_shape = patches.shape[1:3]
            patches = patches.reshape(
                _np.hstack((_np.prod(patches.shape[:3]), patches.shape[3:])))
            results = prednet.predict(patches,
                                      test_callbacks=test_callbacks,
                                      out_blob_names=out_blob_names,
                                      use_fit_network=use_fit_network,
                                      oversample=oversample,
                                      before_oversample_resize_to=None)
            if results[0].size > results[0].shape[0]:
                assert (results[0].ndim == 3
                        and _np.all(results[0].shape[1:3] == input_image_dims)
                        ), (("The output shape of the net must be "
                             "(X, 1, 1) or (X, input_height, input_width) "
                             "to be used with the `predict_sliding_window` "
                             "method. {} vs {}.").format(
                                 input_image_dims, results[0].shape[1:3]))
                out_im = _np.ones(
                    (results[0].shape[0], im.shape[1], im.shape[2]),
                    dtype=results[0].dtype) * -1.
                curr_y = 0
                curr_x = 0
                for val in results:
                    # Write back with max.
                    roi = out_im[:, curr_y:curr_y + val.shape[1],
                                 curr_x:curr_x + val.shape[2]]
                    if overlap_combine_max:
                        out_im[:,
                               curr_y:curr_y+val.shape[1],
                               curr_x:curr_x+val.shape[2]] =\
                                    _np.maximum(roi, val)
                    else:
                        for c_idx in range(roi.shape[0]):
                            for y_idx in range(roi.shape[1]):
                                for x_idx in range(roi.shape[2]):
                                    if roi[c_idx, y_idx, x_idx] == -1:
                                        roi[c_idx, y_idx, x_idx] = \
                                            val[c_idx, y_idx, x_idx]
                                    else:
                                        roi[c_idx, y_idx, x_idx] = \
                                            (val[c_idx, y_idx, x_idx] +
                                             roi[c_idx, y_idx, x_idx]) / 2.
                    # Find the position in the original image.
                    if (curr_x + extraction_step[1] + input_image_dims[1] >
                            out_im.shape[2]):
                        curr_y += extraction_step[0]
                        curr_x = 0
                    else:
                        curr_x += extraction_step[1]
                output_images.append(out_im)
            else:
                if account_for_step or extraction_step == (1, 1):
                    out_im = _np.zeros(
                        (results[0].shape[0], im.shape[1], im.shape[2]),
                        dtype=results[0].dtype)
                    # Collect the values.
                    collected = _np.empty(_np.hstack(
                        ([results[0].shape[0]], sampled_shape)),
                                          dtype=results[0].dtype)
                    for val_idx, val in enumerate(results):
                        try:
                            val.shape = (results[0].shape[0], )
                        except:  # pragma: no cover
                            raise Exception((
                                "The output shape of the net must be "
                                "(X, 1, 1) or (X, input_height, input_width) "
                                "to be used with the `predict_sliding_window` "
                                "method. It is {}.").format(val.shape))
                        collected[:, val_idx // sampled_shape[1],
                                  val_idx % sampled_shape[1]] = val[:]
                    # Resize.
                    for layer_idx in range(results[0].shape[0]):
                        layer_area = out_im[
                            layer_idx,
                            int(_np.ceil(input_image_dims[0] / 2.)) -
                            1:-int(_np.ceil(input_image_dims[0] / 2.)) + 1,
                            int(_np.ceil(input_image_dims[1] / 2.)) -
                            1:-int(_np.ceil(input_image_dims[1] / 2.)) + 1]
                        layer_area[...] = _cv2resize(
                            collected[layer_idx],
                            (layer_area.shape[1], layer_area.shape[0]),
                            interpolation=interpolation_method)
                    if not pad_border:
                        out_im = out_im[:,
                                        int(_np.ceil(input_image_dims[0] /
                                                     2.)) - 1:
                                        -int(_np.ceil(input_image_dims[0] /
                                                      2.)) + 1,
                                        int(_np.ceil(input_image_dims[1] /
                                                     2.)) - 1:
                                        -int(_np.ceil(input_image_dims[1] /
                                                      2.)) + 1]
                    output_images.append(out_im)
                else:
                    # Collect the values.
                    collected = _np.empty(_np.hstack(
                        ([results[0].shape[0]], sampled_shape)),
                                          dtype=results[0].dtype)
                    for val_idx, val in enumerate(results):
                        try:
                            val.shape = (results[0].shape[0], )
                        except:  # pragma: no cover
                            raise Exception((
                                "The output shape of the net must be (X, 1, 1) to be "  # noqa
                                "used with the `predict_sliding_window` method. It is "  # noqa
                                "{}.").format(val.shape))
                        collected[:, val_idx // sampled_shape[1],
                                  val_idx % sampled_shape[1]] = val[:]
                    output_images.append(collected)
            _LOGGER.debug("Processed image %d in %03.2fs.", im_id,
                          _time.time() - image_beginpoint)
        return output_images