Beispiel #1
0
def adjust_gamma(image: torch.Tensor,
                 gamma: float = 1.0,
                 gain: float = 1.0) -> torch.Tensor:
    """Performs Gamma Correction on the input image.

    Also known as Power Law Transform.
    This function transforms the input image pixelwise according to the
    equation ``O = I**gamma`` after scaling each pixel to the range 0 to 1.

    Parameters
    ----------
    image : torch.Tensor
        Input image
    gamma : float, default 1.0
        Non negative real number. Default value is 1.
    gain : float, default 1.0
        The constant multiplier. Default value is 1.

    Returns
    -------
    torch.Tensor
        Gamma corrected output image.

    See also
    -----
    adjust_log

    Notes
    --------
    For gamma greater than 1, the histogram will shift towards left and
    the output image will be darker than the input image.
    For gamma less than 1, the histogram will shift towards right and
    the output image will be brighter than the input image.

    References
    ----------
    .. [1] https://en.wikipedia.org/wiki/Gamma_correction

    Examples
    --------
    >>> from skimage import data, exposure, img_as_float
    >>> import torch
    >>> image = torch.tensor(img_as_float(data.moon()))
    >>> gamma_corrected = exposure.adjust_gamma(image, 2)
    >>> # Output is darker for gamma > 1
    >>> image.mean() > gamma_corrected.mean()
    True
    """
    dtype: torch.dtype = image.dtype

    if gamma < 0:
        raise ValueError("Gamma should be a non-negative real number.")

    scale = float(dtype_limits(image, True)[1] - dtype_limits(image, True)[0])

    out = ((image / scale)**gamma) * scale * gain

    return out.type(dtype)
Beispiel #2
0
def _bin_count_histogram(
    image: torch.Tensor,
    source_range: Optional[str] = 'image'
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Efficient histogram calculation for an image of integers.

    This function is significantly more efficient than np.histogram but
    works only on images of integers. It is based on np.bincount.

    Parameters
    ----------
    image : torch.Tensor
        Input image
    source_range : Optional[str], default 'image'
        image: determines the range from the input image
        dtype: determines the range from the expected range of the images
    of that data type.

    Returns
    -------
    Tuple[torch.Tensor, torch.Tensor]
        hist: The values of the histogram.
        bin_centers: The values at the center of the bins.
    """
    if source_range not in ['image', 'dtype']:
        raise ValueError(
            f'Incorrect value for `source_range` argument: {source_range}')

    if source_range == 'image':
        max_v = torch.max(image).item()
        min_v = torch.min(image).item()
    elif source_range == 'dtype':
        min_v, max_v = dtype_limits(image, clip_negative=False)

    image = _offset_array(image.flatten(), min_v, max_v)
    hist = torch.bincount(image, minlength=int(max_v - min_v + 1))
    bin_centers = torch.arange(min_v, max_v + 1)

    if source_range == 'image':
        idx: int = int(max(min_v, 0))
        hist = hist[idx:]
    return hist, bin_centers
Beispiel #3
0
def histogram(
        image: torch.Tensor,
        nbins: Optional[int] = 256,
        source_range: Optional[str] = 'image',
        normalize: Optional[bool] = False
) -> Tuple[torch.Tensor, torch.Tensor]:
    """Return histogram of image.

    Unlike `numpy.histogram`, this function returns the centers of bins and
    does not rebin integer arrays. For integer arrays, each integer value has
    its own bin, which improves speed and intensity-resolution.
    The histogram is computed on the flattened image: for color images, the
    function should be used separately on each channel to obtain a histogram
    for each color channel.

    Parameters
    ----------
    image : torch.Tensor
        Input image
    nbins : Optional[int], default 256
        Number of bins used to calculate histogram. This value is ignored for
        integer arrays.
    source_range : Optional[str], default 'image'
        'image' (default) determines the range from the input image.
        'dtype' determines the range from the expected range of the images
        of that data type.
    normalize : Optional[bool], default False
        If True, normalize the histogram by the sum of its values.

    Returns
    -------
    Tuple[torch.Tensor, torch.Tensor]
        hist: The values of the histogram
        bin_centers: The values at the center of bins.

    Notes
    -----
    cumulative_distribution

    Examples
    --------
    >>> from skimage import data, exposure, img_as_float
    >>> import torch
    >>> image = img_as_float(data.camera())
    >>> np.histogram(image, bins=2)
    (array([107432, 154712]), array([ 0. ,  0.5,  1. ]))
    >>> image = torch.tensor(img_as_float(data.camera()))
    >>> exposure.histogram(image, nbins=2)
    (tensor([107432, 154712]), tensor([ 0.2500,  0.7500]))
    """
    if not isinstance(nbins, int):
        raise ValueError("Given bin cannot be non integer type")

    shape = image.size()

    if len(shape) == 3 and shape[0] < 4:
        warnings.warn("""This might be a color image. The histogram will be
             computed on the flattened image. You can instead
             apply this function to each color channel.""")

    image = image.flatten()
    min_v = torch.min(image).item()
    max_v = torch.max(image).item()

    # if the input image is normal integer type
    # like gray scale from 0-255, we implement fast histogram calculation
    # by returning bin count for each pixel value
    if not torch.is_floating_point(image):
        hist, bin_centers = _bin_count_histogram(image, source_range)
    else:
        if source_range == 'image':
            hist = torch.histc(image, nbins, min=min_v, max=max_v)
            bin_centers = _calc_bin_centers(min_v, max_v, nbins)
        elif source_range == 'dtype':
            min_v, max_v = dtype_limits(image, clip_negative=False)
            hist = torch.histc(image, nbins, min=min_v, max=max_v)
            bin_centers = _calc_bin_centers(min_v, max_v, nbins)
        else:
            raise ValueError("Wrong value for the `source_range` argument")

    if normalize:
        hist = torch.div(hist, float(torch.sum(hist).item()))
        return (hist, bin_centers)

    return (hist.long(), bin_centers)
 def _threshold_otsu(_image, *args):  # Careful with that axe, Eugene
     return cv2.threshold(
         np.uint8(_image / utils.dtype_limits(_image)[1] * 255.), 0, 255,
         cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
def find_text_box(contour,
                  image=None,
                  bdiff=None,
                  max_rad=(32, 32),
                  default_rad=(2, 2),
                  raw=False,
                  offset=(0, 0),
                  thresh=0.015):
    """
    :param bdiff: tuple of differences along each axis
    :param max_rad: tuple of max values (relative to the bounding rect of `contour` of box radius
    by each axis
    :param default_rad: tuple of default radius values that will be used if algorithm will be
    unable to detect_angle a box
    :return: Rectangle object
    """
    assert (image is not None or bdiff is not None)
    if bdiff is None:
        diffs = utils.differentiate(
            (cv2.cvtColor(image, cv2.COLOR_BGR2GRAY).astype(np.float32)) /
            255.,
            xkernel=5,
            ykernel=5,
            metric=utils.METRIC_SPLIT)

        bdiff = (np.uint8(
            cv2.threshold(diffs[0],
                          thresh * utils.dtype_limits(diffs[0])[1], 255,
                          cv2.THRESH_BINARY)[1]),
                 np.uint8(
                     cv2.threshold(diffs[1],
                                   thresh * utils.dtype_limits(diffs[0])[1],
                                   255, cv2.THRESH_BINARY)[1]))
    for_draw = [utils.shrink_contour(contour).raw]
    # for_draw = [contour.raw]
    cv2.drawContours(bdiff[0], for_draw, -1, 0, -1, offset=offset)
    cv2.drawContours(bdiff[1], for_draw, -1, 0, -1, offset=offset)
    # cv2.imshow(str(time.time()), bdiff[0])
    # cv2.imshow(str(time.time()), bdiff[1])

    max_left = max(contour.rect.left + 2 - offset[0], 0)
    min_left = max(contour.rect.left - max_rad[0] - offset[0], 0)

    min_right = contour.rect.right - offset[0] - 1
    max_right = contour.rect.right + max_rad[0] - offset[0] + 1

    max_top = max(contour.rect.top + 2 - offset[1], 0)
    min_top = max(contour.rect.top - max_rad[1] - offset[1], 0)

    min_bottom = contour.rect.bottom - 1 - offset[1]
    max_bottom = contour.rect.bottom + max_rad[1] + 1 - offset[1]

    left = max_left
    right = min_right
    top = max_top
    bottom = min_bottom

    # Move left border
    left_arr = (bdiff[0][top, min_left:max_left]
                | bdiff[0][bottom, min_left:max_left]).nonzero()[0]

    if left_arr.shape[0] == 0:
        left = max(contour.rect.left + 1 - default_rad[0] - offset[0], 0)
    else:
        left = min_left + left_arr[left_arr.shape[0] - 1]

    # Move right border
    right_arr = (bdiff[0][top, min_right:max_right]
                 | bdiff[0][bottom, min_right:max_right]).nonzero()[0]

    if right_arr.shape[0] == 0:
        right = min(contour.rect.right - 1 + default_rad[0] - offset[0],
                    bdiff[0].shape[1] - 1)
    else:
        right = min_right + right_arr[0]

    # Move top border
    top_arr = (bdiff[1][min_top:max_top, left]
               | bdiff[1][min_top:max_top, right]).nonzero()[0]
    if top_arr.shape[0] == 0:
        top = max(contour.rect.top + 1 - default_rad[1] - offset[1], 0)
    else:
        top = min_top + top_arr[top_arr.shape[0] - 1]

    # Move bottom border
    bottom_arr = (bdiff[1][min_bottom:max_bottom, left]
                  | bdiff[1][min_bottom:max_bottom, right]).nonzero()[0]

    if bottom_arr.shape[0] == 0:
        bottom = min(contour.rect.bottom - 1 + default_rad[1] - offset[1],
                     bdiff[1].shape[0] - 1)
    else:
        bottom = min_bottom + bottom_arr[0]

    left += offset[0]
    right += offset[0]
    top += offset[1]
    bottom += offset[1]

    if raw:
        return left, top, right - left, bottom - top
    return Rectangle(left, top, right - left, bottom - top)
 def _threshold_normal(_image, _thresh):
     return np.uint8(
         cv2.threshold(_image,
                       _thresh * utils.dtype_limits(_image)[1], 255,
                       cv2.THRESH_BINARY)[1])