コード例 #1
0
    def forward(self, input, target, mask=None):
        assert input.shape == target.shape

        if mask is not None:
            input = input[mask]
            target = target[mask]

        # Calculate spatial depth gradient
        grad_input = KF.spatial_gradient(input, mode='sobel')
        grad_target = KF.spatial_gradient(target, mode='sobel')

        # Create homogeneous column vectors
        n_input = torch.cat((-grad_input.view(-1),
                             torch.ones([
                                 1,
                             ],
                                        dtype=torch.float32,
                                        device=grad_input.device)))
        n_target = torch.cat((-grad_target.view(-1),
                              torch.ones([
                                  1,
                              ],
                                         dtype=torch.float32,
                                         device=grad_target.device)))

        # Inner product of prediction and target
        numerator = torch.dot(n_input, n_target)

        # Normalize by vector magnitudes
        d1 = torch.sqrt(torch.dot(n_input, n_input))
        d2 = torch.sqrt(torch.dot(n_target, n_target))
        denominator = torch.mul(d1, d2)

        losses = 1 - numerator / denominator
        return torch.mean(losses)
コード例 #2
0
ファイル: steal.py プロジェクト: ustczhouyu/contour
def get_normal_anlges(image, eps=EPS):
    """Calculate the normal direction of edges.

    Ref: https://github.com/nv-tlabs/STEAL/blob/master/utils/edges_nms.m
    """
    first_grads = spatial_gradient(gaussian_blur2d(image, (5, 5), (2, 2)))
    second_grad_x = spatial_gradient(first_grads[:, :, 0, :, :].squeeze_(2))
    second_grad_y = spatial_gradient(first_grads[:, :, 1, :, :].squeeze_(2))

    grad_xx = second_grad_x[:, :, 0, :, :].squeeze_()
    grad_xy = second_grad_y[:, :, 0, :, :].squeeze_()
    grad_yy = second_grad_y[:, :, 1, :, :].squeeze_()
    angle = torch.atan(grad_yy * torch.sign(-(grad_xy + eps)) /
                       (grad_xx + eps))
    return angle
コード例 #3
0
    def forward(self, input):
        KORNIA_CHECK_SHAPE(input, ["B", "1", "H", "W"])

        B, CH, W, H = input.size()
        self.bin_pooling_kernel = self.bin_pooling_kernel.to(input.dtype).to(input.device)
        self.PoolingConv = self.PoolingConv.to(input.dtype).to(input.device)
        grads: torch.Tensor = spatial_gradient(input, 'diff')
        # unpack the edges
        gx: torch.Tensor = grads[:, :, 0]
        gy: torch.Tensor = grads[:, :, 1]
        mag: torch.Tensor = torch.sqrt(gx * gx + gy * gy + self.eps)
        ori: torch.Tensor = torch.atan2(gy, gx + self.eps) + 2.0 * pi
        o_big: torch.Tensor = float(self.num_ang_bins) * ori / (2.0 * pi)

        bo0_big_: torch.Tensor = torch.floor(o_big)
        wo1_big_: torch.Tensor = (o_big - bo0_big_)
        bo0_big: torch.Tensor = bo0_big_ % self.num_ang_bins
        bo1_big: torch.Tensor = (bo0_big + 1) % self.num_ang_bins
        wo0_big: torch.Tensor = (1.0 - wo1_big_) * mag  # type: ignore
        wo1_big: torch.Tensor = wo1_big_ * mag
        ang_bins = []
        for i in range(0, self.num_ang_bins):
            out = self.bin_pooling_kernel((bo0_big == i).to(input.dtype) * wo0_big +  # noqa
                                          (bo1_big == i).to(input.dtype) * wo1_big)
            ang_bins.append(out)
        ang_bins = torch.cat(ang_bins, dim=1)
        out_no_norm = self.PoolingConv(ang_bins)
        out = F.normalize(out_no_norm, dim=1, p=2).clamp_(0, float(self.clipval))
        out = F.normalize(out, dim=1, p=2)
        if self.rootsift:
            out = torch.sqrt(F.normalize(out, p=1) + self.eps)
        return out
コード例 #4
0
    def forward(self, input):
        KORNIA_CHECK_SHAPE(input, ["B", "1", f"{self.patch_size}", f"{self.patch_size}"])
        B: int = input.shape[0]
        self.pk = self.pk.to(input.dtype).to(input.device)

        grads: torch.Tensor = spatial_gradient(input, 'diff')
        # unpack the edges
        gx: torch.Tensor = grads[:, :, 0]
        gy: torch.Tensor = grads[:, :, 1]

        mag: torch.Tensor = torch.sqrt(gx * gx + gy * gy + self.eps)
        ori: torch.Tensor = torch.atan2(gy, gx + self.eps) + 2.0 * pi
        mag = mag * self.gk.expand_as(mag).type_as(mag).to(mag.device)
        o_big: torch.Tensor = float(self.num_ang_bins) * ori / (2.0 * pi)

        bo0_big_: torch.Tensor = torch.floor(o_big)
        wo1_big_: torch.Tensor = o_big - bo0_big_
        bo0_big: torch.Tensor = bo0_big_ % self.num_ang_bins
        bo1_big: torch.Tensor = (bo0_big + 1) % self.num_ang_bins
        wo0_big: torch.Tensor = (1.0 - wo1_big_) * mag  # type: ignore
        wo1_big: torch.Tensor = wo1_big_ * mag

        ang_bins = []
        for i in range(0, self.num_ang_bins):
            out = self.pk((bo0_big == i).to(input.dtype) * wo0_big + (bo1_big == i).to(input.dtype) * wo1_big)
            ang_bins.append(out)
        ang_bins = torch.cat(ang_bins, dim=1)
        ang_bins = ang_bins.view(B, -1)
        ang_bins = F.normalize(ang_bins, p=2)
        ang_bins = torch.clamp(ang_bins, 0.0, float(self.clipval))
        ang_bins = F.normalize(ang_bins, p=2)
        if self.rootsift:
            ang_bins = torch.sqrt(F.normalize(ang_bins, p=1) + self.eps)
        return ang_bins
コード例 #5
0
def depth_to_normals(depth: torch.Tensor,
                     camera_matrix: torch.Tensor,
                     normalize_points: bool = False) -> torch.Tensor:
    """Compute the normal surface per pixel.

    Args:
        depth (torch.Tensor): image tensor containing a depth value per pixel.
        camera_matrix (torch.Tensor): tensor containing the camera intrinsics.
        normalize_points (bool): whether to normalise the pointcloud. This
            must be set to `True` when the depth is represented as the Euclidean
            ray length from the camera position. Default is `False`.

    Shape:
        - Input: :math:`(B, 1, H, W)` and :math:`(B, 3, 3)`
        - Output: :math:`(B, 3, H, W)`

    Return:
        torch.Tensor: tensor with a normal surface vector per pixel of the same resolution as the input.

    """
    if not isinstance(depth, torch.Tensor):
        raise TypeError(
            f"Input depht type is not a torch.Tensor. Got {type(depth)}.")

    if not len(depth.shape) == 4 and depth.shape[-3] == 1:
        raise ValueError(
            f"Input depth musth have a shape (B, 1, H, W). Got: {depth.shape}")

    if not isinstance(camera_matrix, torch.Tensor):
        raise TypeError(f"Input camera_matrix type is not a torch.Tensor. "
                        f"Got {type(camera_matrix)}.")

    if not len(camera_matrix.shape) == 3 and camera_matrix.shape[-2:] == (3,
                                                                          3):
        raise ValueError(f"Input camera_matrix must have a shape (B, 3, 3). "
                         f"Got: {camera_matrix.shape}.")

    # compute the 3d points from depth
    xyz: torch.Tensor = depth_to_3d(depth, camera_matrix,
                                    normalize_points)  # Bx3xHxW

    # compute the pointcloud spatial gradients
    gradients: torch.Tensor = spatial_gradient(xyz)  # Bx3x2xHxW

    # compute normals
    a, b = gradients[:, :, 0], gradients[:, :, 1]  # Bx3xHxW

    normals: torch.Tensor = torch.cross(a, b, dim=1)  # Bx3xHxW
    return F.normalize(normals, dim=1, p=2)
コード例 #6
0
    def forward(self, input):
        if not isinstance(input, torch.Tensor):
            raise TypeError("Input type is not a torch.Tensor. Got {}".format(
                type(input)))
        if not len(input.shape) == 4:
            raise ValueError(
                "Invalid input shape, we expect Bx1xHxW. Got: {}".format(
                    input.shape))
        B, CH, W, H = input.size()
        if (W != self.patch_size) or (H != self.patch_size) or (CH != 1):
            raise TypeError("input shape should be must be [Bx1x{}x{}]. "
                            "Got {}".format(self.patch_size, self.patch_size,
                                            input.size()))
        self.pk = self.pk.to(input.dtype).to(input.device)

        grads: torch.Tensor = spatial_gradient(input, 'diff')
        # unpack the edges
        gx: torch.Tensor = grads[:, :, 0]
        gy: torch.Tensor = grads[:, :, 1]

        mag: torch.Tensor = torch.sqrt(gx * gx + gy * gy + self.eps)
        ori: torch.Tensor = torch.atan2(gy, gx + self.eps) + 2.0 * pi
        mag = mag * self.gk.expand_as(mag).type_as(mag).to(mag.device)
        o_big: torch.Tensor = float(self.num_ang_bins) * ori / (2.0 * pi)

        bo0_big_: torch.Tensor = torch.floor(o_big)
        wo1_big_: torch.Tensor = (o_big - bo0_big_)
        bo0_big: torch.Tensor = bo0_big_ % self.num_ang_bins
        bo1_big: torch.Tensor = (bo0_big + 1) % self.num_ang_bins
        wo0_big: torch.Tensor = (1.0 - wo1_big_) * mag  # type: ignore
        wo1_big: torch.Tensor = wo1_big_ * mag

        ang_bins = []
        for i in range(0, self.num_ang_bins):
            out = self.pk((bo0_big == i).to(input.dtype) * wo0_big +  # noqa
                          (bo1_big == i).to(input.dtype) * wo1_big)
            ang_bins.append(out)
        ang_bins = torch.cat(ang_bins, dim=1)
        ang_bins = ang_bins.view(B, -1)
        ang_bins = F.normalize(ang_bins, p=2)
        ang_bins = torch.clamp(ang_bins, 0., float(self.clipval))
        ang_bins = F.normalize(ang_bins, p=2)
        if self.rootsift:
            ang_bins = torch.sqrt(F.normalize(ang_bins, p=1) + self.eps)
        return ang_bins
コード例 #7
0
    def __init__(self, img):
        if not img.dim() == 2 or not img.size()[0] == img.size()[1]:
            raise ValueError('Image should be single channel square image')
        with torch.no_grad():
            #creating dataset
            self.img = img
            self.size = img.size()[0]
            self.coords_abs = generate_coordinates(self.size)
            # better not normalize
            self.grad = spatial_gradient(img.view(1, 1, self.size, self.size),
                                         mode='sobel',
                                         normalized=False)[0][0]
            self.grad = torch.stack((self.grad[1], self.grad[0]), axis=0)
            self.grad_norm = torch.linalg.norm(self.grad, dim=0)

            self.grad = self.grad.permute(1, 2, 0)
            self.laplace = laplacian(img.view(1, 1, self.size, self.size),
                                     kernel_size=3,
                                     normalized=False)[0][0]
コード例 #8
0
ファイル: harris.py プロジェクト: zhenzhenxiang/kornia
    def forward(self, input: torch.Tensor) -> torch.Tensor:  # type: ignore
        if not torch.is_tensor(input):
            raise TypeError("Input type is not a torch.Tensor. Got {}".format(
                type(input)))
        if not len(input.shape) == 4:
            raise ValueError(
                "Invalid input shape, we expect BxCxHxW. Got: {}".format(
                    input.shape))
        # compute the first order gradients with sobel operator
        # TODO: implement support for kernel different than three
        gradients: torch.Tensor = spatial_gradient(input)
        dx: torch.Tensor = gradients[:, :, 0]
        dy: torch.Tensor = gradients[:, :, 1]

        # compute the structure tensor M elements
        def g(x):
            return gaussian_blur2d(x, (3, 3), (1., 1.))

        dx2: torch.Tensor = g(dx * dx)
        dy2: torch.Tensor = g(dy * dy)
        dxy: torch.Tensor = g(dx * dy)

        det_m: torch.Tensor = dx2 * dy2 - dxy * dxy
        trace_m: torch.Tensor = dx2 + dy2

        # compute the response map
        scores: torch.Tensor = det_m - self.k * trace_m**2

        # threshold
        # TODO: add as signature parameter ?
        scores = torch.clamp(scores, min=1e-6)

        # apply non maxima suppresion
        scores = non_maxima_suppression2d(scores, kernel_size=(3, 3))

        # normalize and return
        scores_max: torch.Tensor = F.adaptive_max_pool2d(scores, output_size=1)
        return scores / scores_max
コード例 #9
0
ファイル: responses.py プロジェクト: jeffshee/kornia
def harris_response(input: torch.Tensor,
                    k: Union[torch.Tensor, float] = 0.04,
                    grads_mode: str = 'sobel',
                    sigmas: Optional[torch.Tensor] = None) -> torch.Tensor:
    r"""Computes the Harris cornerness function. Function does not do
    any normalization or nms.The response map is computed according the following formulation:

    .. math::
        R = max(0, det(M) - k \cdot trace(M)^2)

    where:

    .. math::
        M = \sum_{(x,y) \in W}
        \begin{bmatrix}
            I^{2}_x & I_x I_y \\
            I_x I_y & I^{2}_y \\
        \end{bmatrix}

    and :math:`k` is an empirically determined constant
    :math:`k ∈ [ 0.04 , 0.06 ]`

    Args:
        input: torch.Tensor: 4d tensor
        k (torch.Tensor): the Harris detector free parameter.
        grads_mode (string): can be 'sobel' for standalone use or 'diff' for use on Gaussian pyramid
        sigmas (optional, torch.Tensor): coefficients to be multiplied by multichannel response. \n
                                         Should be shape of (B)
                                         It is necessary for performing non-maxima-suppression
                                         across different scale pyramid levels.\
                                         See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_

    Return:
        torch.Tensor: the response map per channel.

    Shape:
      - Input: :math:`(B, C, H, W)`
      - Output: :math:`(B, C, H, W)`

    Examples:
        >>> input = torch.tensor([[[
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ... ]]])  # 1x1x7x7
        >>> # compute the response map
        harris_response(input, 0.04)
        tensor([[[[0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012],
                  [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039],
                  [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020],
                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
                  [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020],
                  [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039],
                  [0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012]]]])
    """
    # TODO: Recompute doctest
    if not torch.is_tensor(input):
        raise TypeError("Input type is not a torch.Tensor. Got {}"
                        .format(type(input)))
    if not len(input.shape) == 4:
        raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}"
                         .format(input.shape))
    if sigmas is not None:
        if not torch.is_tensor(sigmas):
            raise TypeError("sigmas type is not a torch.Tensor. Got {}"
                            .format(type(sigmas)))
        if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)):
            raise ValueError("Invalid sigmas shape, we expect B == input.size(0). Got: {}".format(sigmas.shape))
    gradients: torch.Tensor = spatial_gradient(input, grads_mode)
    dx: torch.Tensor = gradients[:, :, 0]
    dy: torch.Tensor = gradients[:, :, 1]

    # compute the structure tensor M elements
    def g(x):
        return gaussian_blur2d(x, (7, 7), (1., 1.))

    dx2: torch.Tensor = g(dx ** 2)
    dy2: torch.Tensor = g(dy ** 2)
    dxy: torch.Tensor = g(dx * dy)
    det_m: torch.Tensor = dx2 * dy2 - dxy * dxy
    trace_m: torch.Tensor = dx2 + dy2
    # compute the response map
    scores: torch.Tensor = det_m - k * (trace_m ** 2)
    if sigmas is not None:
        scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)
    return scores
コード例 #10
0
ファイル: responses.py プロジェクト: jeffshee/kornia
def hessian_response(input: torch.Tensor,
                     grads_mode: str = 'sobel',
                     sigmas: Optional[torch.Tensor] = None) -> torch.Tensor:
    r"""Computes the absolute of determinant of the Hessian matrix. Function does not do any normalization or nms.
    The response map is computed according the following formulation:

    .. math::
        R = det(H)

    where:

    .. math::
        M = \sum_{(x,y) \in W}
        \begin{bmatrix}
            I_{xx} & I_{xy} \\
            I_{xy} & I_{yy} \\
        \end{bmatrix}

    Args:
        input: torch.Tensor: 4d tensor
        grads_mode (string): can be 'sobel' for standalone use or 'diff' for use on Gaussian pyramid
        sigmas (optional, torch.Tensor): coefficients to be multiplied by multichannel response. \n
                                         Should be shape of (B)
                                         It is necessary for performing non-maxima-suppression
                                         across different scale pyramid levels.\
                                         See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_

    Return:
         torch.Tensor: the response map per channel.

    Shape:
       - Input: :math:`(B, C, H, W)`
       - Output: :math:`(B, C, H, W)`

    Examples:
        >>> input = torch.tensor([[[
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ... ]]])  # 1x1x7x7
        >>> # compute the response map
        hessian_response(input)
        tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155],
                  [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
                  [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
                  [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
                  [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
                  [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]])
    """
    # TODO: Recompute doctest
    if not torch.is_tensor(input):
        raise TypeError("Input type is not a torch.Tensor. Got {}"
                        .format(type(input)))
    if not len(input.shape) == 4:
        raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}"
                         .format(input.shape))
    if sigmas is not None:
        if not torch.is_tensor(sigmas):
            raise TypeError("sigmas type is not a torch.Tensor. Got {}"
                            .format(type(sigmas)))
        if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)):
            raise ValueError("Invalid sigmas shape, we expect B == input.size(0). Got: {}"
                             .format(sigmas.shape))
    gradients: torch.Tensor = spatial_gradient(input, grads_mode, 2)
    dxx: torch.Tensor = gradients[:, :, 0]
    dxy: torch.Tensor = gradients[:, :, 1]
    dyy: torch.Tensor = gradients[:, :, 2]

    scores: torch.Tensor = dxx * dyy - dxy ** 2
    if sigmas is not None:
        scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)
    return scores
コード例 #11
0
ファイル: responses.py プロジェクト: jeffshee/kornia
def gftt_response(input: torch.Tensor,
                  grads_mode: str = 'sobel',
                  sigmas: Optional[torch.Tensor] = None) -> torch.Tensor:
    r"""Computes the Shi-Tomasi cornerness function. Function does not do any normalization or nms.
    The response map is computed according the following formulation:

    .. math::
        R = min(eig(M))

    where:

    .. math::
        M = \sum_{(x,y) \in W}
        \begin{bmatrix}
            I^{2}_x & I_x I_y \\
            I_x I_y & I^{2}_y \\
        \end{bmatrix}

    Args:
        input (torch.Tensor): 4d tensor
        grads_mode (string): can be 'sobel' for standalone use or 'diff' for use on Gaussian pyramid
        sigmas (optional, torch.Tensor): coefficients to be multiplied by multichannel response. \n
                                         Should be shape of (B)
                                         It is necessary for performing non-maxima-suppression
                                         across different scale pyramid levels.\
                                         See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_

    Return:
        torch.Tensor: the response map per channel.

    Shape:
      - Input: :math:`(B, C, H, W)`
      - Output: :math:`(B, C, H, W)`

    Examples:
        >>> input = torch.tensor([[[
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ... ]]])  # 1x1x7x7
        >>> # compute the response map
        gftt_response(input)
        tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155],
                  [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
                  [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
                  [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
                  [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
                  [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]])
    """
    # TODO: Recompute doctest
    if not torch.is_tensor(input):
        raise TypeError("Input type is not a torch.Tensor. Got {}"
                        .format(type(input)))
    if not len(input.shape) == 4:
        raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}"
                         .format(input.shape))
    gradients: torch.Tensor = spatial_gradient(input, grads_mode)
    dx: torch.Tensor = gradients[:, :, 0]
    dy: torch.Tensor = gradients[:, :, 1]

    # compute the structure tensor M elements
    def g(x):
        return gaussian_blur2d(x, (7, 7), (1., 1.))

    dx2: torch.Tensor = g(dx ** 2)
    dy2: torch.Tensor = g(dy ** 2)
    dxy: torch.Tensor = g(dx * dy)

    det_m: torch.Tensor = dx2 * dy2 - dxy * dxy
    trace_m: torch.Tensor = dx2 + dy2

    e1: torch.Tensor = 0.5 * (trace_m + torch.sqrt((trace_m ** 2 - 4 * det_m).abs()))
    e2: torch.Tensor = 0.5 * (trace_m - torch.sqrt((trace_m ** 2 - 4 * det_m).abs()))

    scores: torch.Tensor = torch.min(e1, e2)
    if sigmas is not None:
        scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)
    return scores
コード例 #12
0
ファイル: canny.py プロジェクト: dkoguciuk/kornia
def canny(
    input: torch.Tensor,
    low_threshold: float = 0.1,
    high_threshold: float = 0.2,
    kernel_size: Tuple[int, int] = (5, 5),
    sigma: Tuple[float, float] = (1, 1),
    hysteresis: bool = True,
    eps: float = 1e-6,
) -> Tuple[torch.Tensor, torch.Tensor]:
    r"""Finds edges of the input image and filters them using the Canny algorithm.

    Args:
        input (torch.Tensor): input image tensor with shape :math:`(B,C,H,W)`.
        low_threshold (float): lower threshold for the hysteresis procedure. Default: 0.1.
        high_threshold (float): upper threshold for the hysteresis procedure. Default: 0.1.
        kernel_size (Tuple[int, int]): the size of the kernel for the gaussian blur.
        sigma (Tuple[float, float]): the standard deviation of the kernel for the gaussian blur.
        hysteresis (bool): if True, applies the hysteresis edge tracking.
            Otherwise, the edges are divided between weak (0.5) and strong (1) edges.
        eps (float): regularization number to avoid NaN during backprop. Default: 1e-6.

    Returns:
        Tuple[torch.Tensor, torch.Tensor]:
        - the canny edge magnitudes map, shape of :math:`(B,1,H,W)`.
        - the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`.

    Example:
        >>> input = torch.rand(5, 3, 4, 4)
        >>> magnitude, edges = canny(input)  # 5x3x4x4
        >>> magnitude.shape
        torch.Size([5, 1, 4, 4])
        >>> edges.shape
        torch.Size([5, 1, 4, 4])
    """
    if not isinstance(input, torch.Tensor):
        raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(input)))

    if not len(input.shape) == 4:
        raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}".format(input.shape))

    if low_threshold > high_threshold:
        raise ValueError(
            "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: {}>{}".format(
                low_threshold, high_threshold
            )
        )

    if low_threshold < 0 and low_threshold > 1:
        raise ValueError(
            "Invalid input threshold. low_threshold should be in range (0,1). Got: {}".format(low_threshold)
        )

    if high_threshold < 0 and high_threshold > 1:
        raise ValueError(
            "Invalid input threshold. high_threshold should be in range (0,1). Got: {}".format(high_threshold)
        )

    device: torch.device = input.device
    dtype: torch.dtype = input.dtype

    # To Grayscale
    if input.shape[1] == 3:
        input = rgb_to_grayscale(input)

    # Gaussian filter
    blurred: torch.Tensor = gaussian_blur2d(input, kernel_size, sigma)

    # Compute the gradients
    gradients: torch.Tensor = spatial_gradient(blurred, normalized=False)

    # Unpack the edges
    gx: torch.Tensor = gradients[:, :, 0]
    gy: torch.Tensor = gradients[:, :, 1]

    # Compute gradient magnitude and angle
    magnitude: torch.Tensor = torch.sqrt(gx * gx + gy * gy + eps)
    angle: torch.Tensor = torch.atan2(gy, gx)

    # Radians to Degrees
    angle = rad2deg(angle)

    # Round angle to the nearest 45 degree
    angle = torch.round(angle / 45) * 45

    # Non-maximal suppression
    nms_kernels: torch.Tensor = get_canny_nms_kernel(device, dtype)
    nms_magnitude: torch.Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2)

    # Get the indices for both directions
    positive_idx: torch.Tensor = (angle / 45) % 8
    positive_idx = positive_idx.long()

    negative_idx: torch.Tensor = ((angle / 45) + 4) % 8
    negative_idx = negative_idx.long()

    # Apply the non-maximum suppresion to the different directions
    channel_select_filtered_positive: torch.Tensor = torch.gather(nms_magnitude, 1, positive_idx)
    channel_select_filtered_negative: torch.Tensor = torch.gather(nms_magnitude, 1, negative_idx)

    channel_select_filtered: torch.Tensor = torch.stack(
        [channel_select_filtered_positive, channel_select_filtered_negative], 1
    )

    is_max: torch.Tensor = channel_select_filtered.min(dim=1)[0] > 0.0

    magnitude = magnitude * is_max

    # Threshold
    edges: torch.Tensor = F.threshold(magnitude, low_threshold, 0.0)

    low: torch.Tensor = magnitude > low_threshold
    high: torch.Tensor = magnitude > high_threshold

    edges = low * 0.5 + high * 0.5
    edges = edges.to(dtype)

    # Hysteresis
    if hysteresis:
        edges_old: torch.Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype)
        hysteresis_kernels: torch.Tensor = get_hysteresis_kernel(device, dtype)

        while ((edges_old - edges).abs() != 0).any():
            weak: torch.Tensor = (edges == 0.5).float()
            strong: torch.Tensor = (edges == 1).float()

            hysteresis_magnitude: torch.Tensor = F.conv2d(
                edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2
            )
            hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype)
            hysteresis_magnitude = hysteresis_magnitude * weak + strong

            edges_old = edges.clone()
            edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5

        edges = hysteresis_magnitude

    return magnitude, edges
コード例 #13
0
ファイル: responses.py プロジェクト: kornia/kornia
def gftt_response(input: torch.Tensor,
                  grads_mode: str = 'sobel',
                  sigmas: Optional[torch.Tensor] = None) -> torch.Tensor:
    r"""Compute the Shi-Tomasi cornerness function.

    Function does not do any normalization or nms. The response map is computed according the following formulation:

    .. math::
        R = min(eig(M))

    where:

    .. math::
        M = \sum_{(x,y) \in W}
        \begin{bmatrix}
            I^{2}_x & I_x I_y \\
            I_x I_y & I^{2}_y \\
        \end{bmatrix}

    Args:
        input: input image with shape :math:`(B, C, H, W)`.
        grads_mode: can be ``'sobel'`` for standalone use or ``'diff'`` for use on Gaussian pyramid.
        sigmas: coefficients to be multiplied by multichannel response. Should be shape of :math:`(B)`
          It is necessary for performing non-maxima-suppression across different scale pyramid levels.
          See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_.

    Return:
        the response map per channel with shape :math:`(B, C, H, W)`.

    Example:
        >>> input = torch.tensor([[[
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ... ]]])  # 1x1x7x7
        >>> # compute the response map
        gftt_response(input)
        tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155],
                  [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
                  [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
                  [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194],
                  [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334],
                  [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]])
    """
    # TODO: Recompute doctest
    KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"])

    gradients: torch.Tensor = spatial_gradient(input, grads_mode)
    dx: torch.Tensor = gradients[:, :, 0]
    dy: torch.Tensor = gradients[:, :, 1]

    dx2: torch.Tensor = gaussian_blur2d(dx**2, (7, 7), (1.0, 1.0))
    dy2: torch.Tensor = gaussian_blur2d(dy**2, (7, 7), (1.0, 1.0))
    dxy: torch.Tensor = gaussian_blur2d(dx * dy, (7, 7), (1.0, 1.0))

    det_m: torch.Tensor = dx2 * dy2 - dxy * dxy
    trace_m: torch.Tensor = dx2 + dy2

    e1: torch.Tensor = 0.5 * (trace_m + torch.sqrt(
        (trace_m**2 - 4 * det_m).abs()))
    e2: torch.Tensor = 0.5 * (trace_m - torch.sqrt(
        (trace_m**2 - 4 * det_m).abs()))

    scores: torch.Tensor = torch.min(e1, e2)

    if sigmas is not None:
        scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)

    return scores
コード例 #14
0
ファイル: responses.py プロジェクト: kornia/kornia
def harris_response(
    input: torch.Tensor,
    k: Union[torch.Tensor, float] = 0.04,
    grads_mode: str = 'sobel',
    sigmas: Optional[torch.Tensor] = None,
) -> torch.Tensor:
    r"""Compute the Harris cornerness function.

    Function does not do any normalization or nms. The response map is computed according the following formulation:

    .. math::
        R = max(0, det(M) - k \cdot trace(M)^2)

    where:

    .. math::
        M = \sum_{(x,y) \in W}
        \begin{bmatrix}
            I^{2}_x & I_x I_y \\
            I_x I_y & I^{2}_y \\
        \end{bmatrix}

    and :math:`k` is an empirically determined constant
    :math:`k ∈ [ 0.04 , 0.06 ]`

    Args:
        input: input image with shape :math:`(B, C, H, W)`.
        k: the Harris detector free parameter.
        grads_mode: can be ``'sobel'`` for standalone use or ``'diff'`` for use on Gaussian pyramid.
        sigmas: coefficients to be multiplied by multichannel response. Should be shape of :math:`(B)`
          It is necessary for performing non-maxima-suppression across different scale pyramid levels.
          See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_.

    Return:
        the response map per channel with shape :math:`(B, C, H, W)`.

    Example:
        >>> input = torch.tensor([[[
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 1., 1., 1., 1., 1., 0.],
        ...    [0., 0., 0., 0., 0., 0., 0.],
        ... ]]])  # 1x1x7x7
        >>> # compute the response map
        harris_response(input, 0.04)
        tensor([[[[0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012],
                  [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039],
                  [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020],
                  [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
                  [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020],
                  [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039],
                  [0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012]]]])
    """
    # TODO: Recompute doctest
    KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"])

    if sigmas is not None:
        if not isinstance(sigmas, torch.Tensor):
            raise TypeError(
                f"sigmas type is not a torch.Tensor. Got {type(sigmas)}")
        if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)):
            raise ValueError(
                f"Invalid sigmas shape, we expect B == input.size(0). Got: {sigmas.shape}"
            )

    gradients: torch.Tensor = spatial_gradient(input, grads_mode)
    dx: torch.Tensor = gradients[:, :, 0]
    dy: torch.Tensor = gradients[:, :, 1]

    # compute the structure tensor M elements

    dx2: torch.Tensor = gaussian_blur2d(dx**2, (7, 7), (1.0, 1.0))
    dy2: torch.Tensor = gaussian_blur2d(dy**2, (7, 7), (1.0, 1.0))
    dxy: torch.Tensor = gaussian_blur2d(dx * dy, (7, 7), (1.0, 1.0))

    det_m: torch.Tensor = dx2 * dy2 - dxy * dxy
    trace_m: torch.Tensor = dx2 + dy2

    # compute the response map
    scores: torch.Tensor = det_m - k * (trace_m**2)

    if sigmas is not None:
        scores = scores * sigmas.pow(4).view(-1, 1, 1, 1)

    return scores