Esempio n. 1
0
def gmsd(
    x: torch.Tensor,
    y: torch.Tensor,
    reduction: str = 'mean',
    data_range: Union[int, float] = 1.,
    t: float = 170 / (255.**2)
) -> torch.Tensor:
    r"""Compute Gradient Magnitude Similarity Deviation.

    Inputs supposed to be in range [0, data_range] with RGB channels order for colour images.

    Args:
        x: Tensor with shape (H, W), (C, H, W) or (N, C, H, W).
        y: Tensor with shape (H, W), (C, H, W) or (N, C, H, W).
        reduction: Specifies the reduction to apply to the output:
            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
            ``'mean'``: the sum of the output will be divided by the number of
            elements in the output, ``'sum'``: the output will be summed.
        data_range: The difference between the maximum and minimum of the pixel value,
            i.e., if for image x it holds min(x) = 0 and max(x) = 1, then data_range = 1.
            The pixel value interval of both input and output should remain the same.
        t: Constant from the reference paper numerical stability of similarity map.

    Returns:
        gmsd : Gradient Magnitude Similarity Deviation between given tensors.

    References:
        Wufeng Xue et al. Gradient Magnitude Similarity Deviation (2013)
        https://arxiv.org/pdf/1308.3052.pdf
    """

    _validate_input(input_tensors=(x, y),
                    allow_5d=False,
                    scale_weights=None,
                    data_range=data_range)
    x, y = _adjust_dimensions(input_tensors=(x, y))

    # Rescale
    x = x / data_range
    y = y / data_range

    num_channels = x.size(1)
    if num_channels == 3:
        x = rgb2yiq(x)[:, :1]
        y = rgb2yiq(y)[:, :1]
    up_pad = 0
    down_pad = max(x.shape[2] % 2, x.shape[3] % 2)
    pad_to_use = [up_pad, down_pad, up_pad, down_pad]
    x = F.pad(x, pad=pad_to_use)
    y = F.pad(y, pad=pad_to_use)

    x = F.avg_pool2d(x, kernel_size=2, stride=2, padding=0)
    y = F.avg_pool2d(y, kernel_size=2, stride=2, padding=0)

    score = _gmsd(x=x, y=y, t=t)
    if reduction == 'none':
        return score

    return {'mean': score.mean, 'sum': score.sum}[reduction](dim=0)
Esempio n. 2
0
def gmsd(
    prediction: torch.Tensor,
    target: torch.Tensor,
    reduction: Optional[str] = 'mean',
    data_range: Union[int, float] = 1.,
    t: float = 170 / (255.**2)
) -> torch.Tensor:
    r"""Compute Gradient Magnitude Similarity Deviation
    Both inputs supposed to be in range [0, 1] with RGB order.
    Args:
        prediction: Tensor of shape :math:`(N, C, H, W)` holding an distorted image.
        target: Tensor of shape :math:`(N, C, H, W)` holding an target image
        reduction: Specifies the reduction to apply to the output:
            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
            ``'mean'``: the sum of the output will be divided by the number of
            elements in the output, ``'sum'``: the output will be summed.
        data_range: The difference between the maximum and minimum of the pixel value,
            i.e., if for image x it holds min(x) = 0 and max(x) = 1, then data_range = 1.
            The pixel value interval of both input and output should remain the same.
        t: Constant from the reference paper numerical stability of similarity map.

    Returns:
        gmsd : Gradient Magnitude Similarity Deviation between given tensors.

    References:
        https://arxiv.org/pdf/1308.3052.pdf
    """

    _validate_input(input_tensors=(prediction, target),
                    allow_5d=False,
                    scale_weights=None)
    prediction, target = _adjust_dimensions(input_tensors=(prediction, target))

    prediction = prediction / float(data_range)
    target = target / float(data_range)

    num_channels = prediction.size(1)
    if num_channels == 3:
        prediction = rgb2yiq(prediction)[:, :1]
        target = rgb2yiq(target)[:, :1]
    up_pad = 0
    down_pad = max(prediction.shape[2] % 2, prediction.shape[3] % 2)
    pad_to_use = [up_pad, down_pad, up_pad, down_pad]
    prediction = F.pad(prediction, pad=pad_to_use)
    target = F.pad(target, pad=pad_to_use)

    prediction = F.avg_pool2d(prediction, kernel_size=2, stride=2, padding=0)
    target = F.avg_pool2d(target, kernel_size=2, stride=2, padding=0)

    score = _gmsd(prediction=prediction, target=target, t=t)
    if reduction == 'none':
        return score

    return {'mean': score.mean, 'sum': score.sum}[reduction](dim=0)
Esempio n. 3
0
def gmsd(
    x: torch.Tensor,
    y: torch.Tensor,
    reduction: str = 'mean',
    data_range: Union[int, float] = 1.,
    t: float = 170 / (255.**2)
) -> torch.Tensor:
    r"""Compute Gradient Magnitude Similarity Deviation.

    Supports greyscale and colour images with RGB channel order.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        data_range: Maximum value range of images (usually 1.0 or 255).
        t: Constant from the reference paper numerical stability of similarity map.

    Returns:
        Gradient Magnitude Similarity Deviation between given tensors.

    References:
        Wufeng Xue et al. Gradient Magnitude Similarity Deviation (2013)
        https://arxiv.org/pdf/1308.3052.pdf
    """
    _validate_input([x, y], dim_range=(4, 4), data_range=(0, data_range))

    # Rescale
    x = x / float(data_range)
    y = y / float(data_range)

    num_channels = x.size(1)
    if num_channels == 3:
        x = rgb2yiq(x)[:, :1]
        y = rgb2yiq(y)[:, :1]
    up_pad = 0
    down_pad = max(x.shape[2] % 2, x.shape[3] % 2)
    pad_to_use = [up_pad, down_pad, up_pad, down_pad]
    x = F.pad(x, pad=pad_to_use)
    y = F.pad(y, pad=pad_to_use)

    x = F.avg_pool2d(x, kernel_size=2, stride=2, padding=0)
    y = F.avg_pool2d(y, kernel_size=2, stride=2, padding=0)

    score = _gmsd(x=x, y=y, t=t)
    return _reduce(score, reduction)
Esempio n. 4
0
def brisque(x: torch.Tensor,
            kernel_size: int = 7,
            kernel_sigma: float = 7 / 6,
            data_range: Union[int, float] = 1.,
            reduction: str = 'mean',
            interpolation: str = 'nearest') -> torch.Tensor:
    r"""Interface of BRISQUE index.

    Args:
        x: Tensor with shape (H, W), (C, H, W) or (N, C, H, W). RGB channel order for colour images.
        kernel_size: The side-length of the sliding window used in comparison. Must be an odd value.
        kernel_sigma: Sigma of normal distribution.
        data_range: Maximum value range of input images (usually 1.0 or 255).
        reduction: Reduction over samples in batch: "mean"|"sum"|"none".
        interpolation: Interpolation to be used for scaling.

    Returns:
        Value of BRISQUE index.

    Note:
        The back propagation is not available using torch=1.5.0 due to bug in argmin/argmax back propagation.
        Update the torch and torchvision to the latest versions.

    References:
        .. [1] Anish Mittal et al. "No-Reference Image Quality Assessment in the Spatial Domain",
        https://live.ece.utexas.edu/publications/2012/TIP%20BRISQUE.pdf
    """
    if '1.5.0' in torch.__version__:
        warnings.warn(
            f'BRISQUE does not support back propagation due to bug in torch={torch.__version__}.'
            f'Update torch to the latest version to access full functionality of the BRIQSUE.'
            f'More info is available at https://github.com/photosynthesis-team/piq/pull/79 and'
            f'https://github.com/pytorch/pytorch/issues/38869.')

    _validate_input(input_tensors=x, allow_5d=False, kernel_size=kernel_size)
    x = _adjust_dimensions(input_tensors=x)

    assert data_range >= x.max(
    ), f'Expected data range greater or equal maximum value, got {data_range} and {x.max()}.'
    x = x * 255. / data_range

    if x.size(1) == 3:
        x = rgb2yiq(x)[:, :1]
    features = []
    num_of_scales = 2
    for _ in range(num_of_scales):
        features.append(_natural_scene_statistics(x, kernel_size,
                                                  kernel_sigma))
        x = F.interpolate(x,
                          size=(x.size(2) // 2, x.size(3) // 2),
                          mode=interpolation)

    features = torch.cat(features, dim=-1)
    scaled_features = _scale_features(features)
    score = _score_svr(scaled_features)
    if reduction == 'none':
        return score

    return {'mean': score.mean, 'sum': score.sum}[reduction](dim=0)
Esempio n. 5
0
def brisque(x: torch.Tensor,
            kernel_size: int = 7,
            kernel_sigma: float = 7 / 6,
            data_range: Union[int, float] = 1.,
            reduction: str = 'mean',
            interpolation: str = 'nearest') -> torch.Tensor:
    r"""Interface of BRISQUE index.
    Supports greyscale and colour images with RGB channel order.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        kernel_size: The side-length of the sliding window used in comparison. Must be an odd value.
        kernel_sigma: Sigma of normal distribution.
        data_range: Maximum value range of images (usually 1.0 or 255).
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'``
        interpolation: Interpolation to be used for scaling.

    Returns:
        Value of BRISQUE index.

    Note:
        The back propagation is not available using torch=1.5.0 due to bug in argmin/argmax back propagation.
        Update the torch and torchvision to the latest versions.

    References:
        .. [1] Anish Mittal et al. "No-Reference Image Quality Assessment in the Spatial Domain",
           https://live.ece.utexas.edu/publications/2012/TIP%20BRISQUE.pdf
    """
    if '1.5.0' in torch.__version__:
        warnings.warn(
            f'BRISQUE does not support back propagation due to bug in torch={torch.__version__}.'
            f'Update torch to the latest version to access full functionality of the BRIQSUE.'
            f'More info is available at https://github.com/photosynthesis-team/piq/pull/79 and'
            f'https://github.com/pytorch/pytorch/issues/38869.')

    assert kernel_size % 2 == 1, f'Kernel size must be odd, got [{kernel_size}]'
    _validate_input([
        x,
    ], dim_range=(4, 4), data_range=(0, data_range))

    x = x / data_range * 255

    if x.size(1) == 3:
        x = rgb2yiq(x)[:, :1]
    features = []
    num_of_scales = 2
    for _ in range(num_of_scales):
        features.append(_natural_scene_statistics(x, kernel_size,
                                                  kernel_sigma))
        x = F.interpolate(x,
                          size=(x.size(2) // 2, x.size(3) // 2),
                          mode=interpolation)

    features = torch.cat(features, dim=-1)
    scaled_features = _scale_features(features)
    score = _score_svr(scaled_features)

    return _reduce(score, reduction)
Esempio n. 6
0
def haarpsi(x: torch.Tensor, y: torch.Tensor, reduction: Optional[str] = 'mean',
            data_range: Union[int, float] = 1., scales: int = 3, subsample: bool = True,
            c: float = 30.0, alpha: float = 4.2) -> torch.Tensor:
    r"""Compute Haar Wavelet-Based Perceptual Similarity
    Input can by greyscale tensor of colour image with RGB channels order.
    Args:
        x: Tensor of shape :math:`(N, C, H, W)` holding an distorted image.
        y: Tensor of shape :math:`(N, C, H, W)` holding an target image
        reduction: Specifies the reduction to apply to the output:
            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
            ``'mean'``: the sum of the output will be divided by the number of
            elements in the output, ``'sum'``: the output will be summed.
        data_range: The difference between the maximum and minimum of the pixel value,
            i.e., if for image x it holds min(x) = 0 and max(x) = 1, then data_range = 1.
            The pixel value interval of both input and output should remain the same.
        scales: Number of Haar wavelets used for image decomposition.
        subsample: Flag to apply average pooling before HaarPSI computation. See [1] for details.
        c: Constant from the paper. See [1] for details
        alpha: Exponent used for similarity maps weightning. See [1] for details

    Returns:
        HaarPSI : Wavelet-Based Perceptual Similarity between two tensors
    
    References:
        [1] R. Reisenhofer, S. Bosse, G. Kutyniok & T. Wiegand (2017)
            'A Haar Wavelet-Based Perceptual Similarity Index for Image Quality Assessment'
            http://www.math.uni-bremen.de/cda/HaarPSI/publications/HaarPSI_preprint_v4.pdf
        [2] Code from authors on MATLAB and Python
            https://github.com/rgcda/haarpsi
    """

    _validate_input(input_tensors=(x, y), allow_5d=False, scale_weights=None)
    x, y = _adjust_dimensions(input_tensors=(x, y))

    # Assert minimal image size
    kernel_size = 2 ** (scales + 1)
    if x.size(-1) < kernel_size or x.size(-2) < kernel_size:
        raise ValueError(f'Kernel size can\'t be greater than actual input size. Input size: {x.size()}. '
                         f'Kernel size: {kernel_size}')

    # Scale images to [0, 255] range as in the paper
    x = x * 255.0 / float(data_range)
    y = y * 255.0 / float(data_range)

    num_channels = x.size(1)
    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)
    else:
        x_yiq = x
        y_yiq = y

    # Downscale input to simulates the typical distance between an image and its viewer.
    if subsample:
        up_pad = 0
        down_pad = max(x.shape[2] % 2, x.shape[3] % 2)
        pad_to_use = [up_pad, down_pad, up_pad, down_pad]
        x_yiq = F.pad(x_yiq, pad=pad_to_use)
        y_yiq = F.pad(y_yiq, pad=pad_to_use)

        x_yiq = F.avg_pool2d(x_yiq, kernel_size=2, stride=2, padding=0)
        y_yiq = F.avg_pool2d(y_yiq, kernel_size=2, stride=2, padding=0)
    
    # Haar wavelet decomposition
    coefficients_x, coefficients_y = [], []
    for scale in range(scales):
        kernel_size = 2 ** (scale + 1)
        kernels = torch.stack([haar_filter(kernel_size), haar_filter(kernel_size).transpose(-1, -2)])
    
        # Assymetrical padding due to even kernel size. Matches MATLAB conv2(A, B, 'same')
        upper_pad = kernel_size // 2 - 1
        bottom_pad = kernel_size // 2
        pad_to_use = [upper_pad, bottom_pad, upper_pad, bottom_pad]
        coeff_x = torch.nn.functional.conv2d(F.pad(x_yiq[:, : 1], pad=pad_to_use, mode='constant'), kernels.to(x))
        coeff_y = torch.nn.functional.conv2d(F.pad(y_yiq[:, : 1], pad=pad_to_use, mode='constant'), kernels.to(y))
    
        coefficients_x.append(coeff_x)
        coefficients_y.append(coeff_y)

    # Shape [B x {scales * 2} x H x W]
    coefficients_x = torch.cat(coefficients_x, dim=1)
    coefficients_y = torch.cat(coefficients_y, dim=1)

    # Low-frequency coefficients used as weights
    # Shape [B x 2 x H x W]
    weights = torch.max(torch.abs(coefficients_x[:, 4:]), torch.abs(coefficients_y[:, 4:]))
    
    # High-frequency coefficients used for similarity computation in 2 orientations (horizontal and vertical)
    sim_map = []
    for orientation in range(2):
        magnitude_x = torch.abs(coefficients_x[:, (orientation, orientation + 2)])
        magnitude_y = torch.abs(coefficients_y[:, (orientation, orientation + 2)])
        sim_map.append(similarity_map(magnitude_x, magnitude_y, constant=c).sum(dim=1, keepdims=True) / 2)

    if num_channels == 3:
        pad_to_use = [0, 1, 0, 1]
        x_yiq = F.pad(x_yiq, pad=pad_to_use)
        y_yiq = F.pad(y_yiq, pad=pad_to_use)
        coefficients_x_iq = torch.abs(F.avg_pool2d(x_yiq[:, 1:], kernel_size=2, stride=1, padding=0))
        coefficients_y_iq = torch.abs(F.avg_pool2d(y_yiq[:, 1:], kernel_size=2, stride=1, padding=0))
    
        # Compute weights and simmilarity
        weights = torch.cat([weights, weights.mean(dim=1, keepdims=True)], dim=1)
        sim_map.append(
            similarity_map(coefficients_x_iq, coefficients_y_iq, constant=c).sum(dim=1, keepdims=True) / 2)

    sim_map = torch.cat(sim_map, dim=1)
    
    # Calculate the final score
    eps = torch.finfo(sim_map.dtype).eps
    score = (((sim_map * alpha).sigmoid() * weights).sum(dim=[1, 2, 3]) + eps) /\
        (torch.sum(weights, dim=[1, 2, 3]) + eps)
    # Logit of score
    score = (torch.log(score / (1 - score)) / alpha) ** 2

    if reduction == 'none':
        return score

    return {'mean': score.mean,
            'sum': score.sum
            }[reduction](dim=0)
Esempio n. 7
0
def fsim(x: torch.Tensor,
         y: torch.Tensor,
         reduction: str = 'mean',
         data_range: Union[int, float] = 1.0,
         chromatic: bool = True,
         scales: int = 4,
         orientations: int = 4,
         min_length: int = 6,
         mult: int = 2,
         sigma_f: float = 0.55,
         delta_theta: float = 1.2,
         k: float = 2.0) -> torch.Tensor:
    r"""Compute Feature Similarity Index Measure for a batch of images.

    Args:
        x: Predicted images set :math:`x`.
            Shape (H, W), (C, H, W) or (N, C, H, W).
        y: Target images set :math:`y`.
            Shape (H, W), (C, H, W) or (N, C, H, W).
        reduction: Reduction over samples in batch: "mean"|"sum"|"none"
        data_range: Value range of input images (usually 1.0 or 255). Default: 1.0
        chromatic: Flag to compute FSIMc, which also takes into account chromatic components
        scales: Number of wavelets used for computation of phase congruensy maps
        orientations: Number of filter orientations used for computation of phase congruensy maps
        min_length: Wavelength of smallest scale filter
        mult: Scaling factor between successive filters
        sigma_f: Ratio of the standard deviation of the Gaussian describing the log Gabor filter's
            transfer function in the frequency domain to the filter center frequency.
        delta_theta: Ratio of angular interval between filter orientations and the standard deviation
            of the angular Gaussian function used to construct filters in the frequency plane.
        k: No of standard deviations of the noise energy beyond the mean at which we set the noise
            threshold  point, below which phase congruency values get penalized.
        
    Returns:
        FSIM: Index of similarity betwen two images. Usually in [0, 1] interval.
            Can be bigger than 1 for predicted (x) images with higher contrast than the original ones.
    Note:
        This implementation is based on the original MATLAB code.
        https://www4.comp.polyu.edu.hk/~cslzhang/IQA/FSIM/FSIM.htm
        
    """

    _validate_input(input_tensors=(x, y),
                    allow_5d=False,
                    data_range=data_range)
    x, y = _adjust_dimensions(input_tensors=(x, y))

    # Rescale to [0, 255] range, because all constant are calculated for this factor
    x = x / data_range * 255
    y = y / data_range * 255

    # Apply average pooling
    kernel_size = max(1, round(min(x.shape[-2:]) / 256))
    x = torch.nn.functional.avg_pool2d(x, kernel_size)
    y = torch.nn.functional.avg_pool2d(y, kernel_size)

    num_channels = x.size(1)

    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)

        x_lum = x_yiq[:, :1]
        y_lum = y_yiq[:, :1]

        x_i = x_yiq[:, 1:2]
        y_i = y_yiq[:, 1:2]
        x_q = x_yiq[:, 2:]
        y_q = y_yiq[:, 2:]

    else:
        x_lum = x
        y_lum = y

    # Compute phase congruency maps
    pc_x = _phase_congruency(x_lum,
                             scales=scales,
                             orientations=orientations,
                             min_length=min_length,
                             mult=mult,
                             sigma_f=sigma_f,
                             delta_theta=delta_theta,
                             k=k)
    pc_y = _phase_congruency(y_lum,
                             scales=scales,
                             orientations=orientations,
                             min_length=min_length,
                             mult=mult,
                             sigma_f=sigma_f,
                             delta_theta=delta_theta,
                             k=k)

    # Gradient maps
    kernels = torch.stack([scharr_filter(), scharr_filter().transpose(-1, -2)])
    grad_map_x = gradient_map(x_lum, kernels)
    grad_map_y = gradient_map(y_lum, kernels)

    # Constants from the paper
    T1, T2, T3, T4, lmbda = 0.85, 160, 200, 200, 0.03

    # Compute FSIM
    PC = similarity_map(pc_x, pc_y, T1)
    GM = similarity_map(grad_map_x, grad_map_y, T2)
    pc_max = torch.where(pc_x > pc_y, pc_x, pc_y)
    score = GM * PC * pc_max

    if chromatic:
        assert num_channels == 3, "Chromatic component can be computed only for RGB images!"
        S_I = similarity_map(x_i, y_i, T3)
        S_Q = similarity_map(x_q, y_q, T4)
        score = score * torch.abs(S_I * S_Q)**lmbda
        # Complex gradients will work in PyTorch 1.6.0
        # score = score * torch.real((S_I * S_Q).to(torch.complex64) ** lmbda)

    result = score.sum(dim=[1, 2, 3]) / pc_max.sum(dim=[1, 2, 3])

    if reduction == 'none':
        return result

    return {'mean': result.mean, 'sum': result.sum}[reduction](dim=0)
Esempio n. 8
0
def haarpsi(x: torch.Tensor, y: torch.Tensor, reduction: str = 'mean',
            data_range: Union[int, float] = 1., scales: int = 3, subsample: bool = True,
            c: float = 30.0, alpha: float = 4.2) -> torch.Tensor:
    r"""Compute Haar Wavelet-Based Perceptual Similarity
    Inputs supposed to be in range [0, data_range] with RGB channels order for colour images.
    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        data_range: Maximum value range of images (usually 1.0 or 255).
        scales: Number of Haar wavelets used for image decomposition.
        subsample: Flag to apply average pooling before HaarPSI computation. See [1] for details.
        c: Constant from the paper. See [1] for details
        alpha: Exponent used for similarity maps weightning. See [1] for details

    Returns:
        HaarPSI : Wavelet-Based Perceptual Similarity between two tensors
    
    References:
        .. [1] R. Reisenhofer, S. Bosse, G. Kutyniok & T. Wiegand (2017)
           'A Haar Wavelet-Based Perceptual Similarity Index for Image Quality Assessment'
           http://www.math.uni-bremen.de/cda/HaarPSI/publications/HaarPSI_preprint_v4.pdf
        .. [2] Code from authors on MATLAB and Python
           https://github.com/rgcda/haarpsi
    """
    _validate_input([x, y], dim_range=(4, 4), data_range=(0, data_range))

    # Assert minimal image size
    kernel_size = 2 ** (scales + 1)
    if x.size(-1) < kernel_size or x.size(-2) < kernel_size:
        raise ValueError(f'Kernel size can\'t be greater than actual input size. Input size: {x.size()}. '
                         f'Kernel size: {kernel_size}')

    # Rescale images
    x = x / data_range * 255
    y = y / data_range * 255

    num_channels = x.size(1)
    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)
    else:
        x_yiq = x
        y_yiq = y

    # Downscale input to simulates the typical distance between an image and its viewer.
    if subsample:
        up_pad = 0
        down_pad = max(x.shape[2] % 2, x.shape[3] % 2)
        pad_to_use = [up_pad, down_pad, up_pad, down_pad]
        x_yiq = F.pad(x_yiq, pad=pad_to_use)
        y_yiq = F.pad(y_yiq, pad=pad_to_use)

        x_yiq = F.avg_pool2d(x_yiq, kernel_size=2, stride=2, padding=0)
        y_yiq = F.avg_pool2d(y_yiq, kernel_size=2, stride=2, padding=0)
    
    # Haar wavelet decomposition
    coefficients_x, coefficients_y = [], []
    for scale in range(scales):
        kernel_size = 2 ** (scale + 1)
        kernels = torch.stack([haar_filter(kernel_size), haar_filter(kernel_size).transpose(-1, -2)])
    
        # Assymetrical padding due to even kernel size. Matches MATLAB conv2(A, B, 'same')
        upper_pad = kernel_size // 2 - 1
        bottom_pad = kernel_size // 2
        pad_to_use = [upper_pad, bottom_pad, upper_pad, bottom_pad]
        coeff_x = torch.nn.functional.conv2d(F.pad(x_yiq[:, : 1], pad=pad_to_use, mode='constant'), kernels.to(x))
        coeff_y = torch.nn.functional.conv2d(F.pad(y_yiq[:, : 1], pad=pad_to_use, mode='constant'), kernels.to(y))
    
        coefficients_x.append(coeff_x)
        coefficients_y.append(coeff_y)

    # Shape (N, {scales * 2}, H, W)
    coefficients_x = torch.cat(coefficients_x, dim=1)
    coefficients_y = torch.cat(coefficients_y, dim=1)

    # Low-frequency coefficients used as weights
    # Shape (N, 2, H, W)
    weights = torch.max(torch.abs(coefficients_x[:, 4:]), torch.abs(coefficients_y[:, 4:]))
    
    # High-frequency coefficients used for similarity computation in 2 orientations (horizontal and vertical)
    sim_map = []
    for orientation in range(2):
        magnitude_x = torch.abs(coefficients_x[:, (orientation, orientation + 2)])
        magnitude_y = torch.abs(coefficients_y[:, (orientation, orientation + 2)])
        sim_map.append(similarity_map(magnitude_x, magnitude_y, constant=c).sum(dim=1, keepdims=True) / 2)

    if num_channels == 3:
        pad_to_use = [0, 1, 0, 1]
        x_yiq = F.pad(x_yiq, pad=pad_to_use)
        y_yiq = F.pad(y_yiq, pad=pad_to_use)
        coefficients_x_iq = torch.abs(F.avg_pool2d(x_yiq[:, 1:], kernel_size=2, stride=1, padding=0))
        coefficients_y_iq = torch.abs(F.avg_pool2d(y_yiq[:, 1:], kernel_size=2, stride=1, padding=0))
    
        # Compute weights and simmilarity
        weights = torch.cat([weights, weights.mean(dim=1, keepdims=True)], dim=1)
        sim_map.append(
            similarity_map(coefficients_x_iq, coefficients_y_iq, constant=c).sum(dim=1, keepdims=True) / 2)

    sim_map = torch.cat(sim_map, dim=1)
    
    # Calculate the final score
    eps = torch.finfo(sim_map.dtype).eps
    score = (((sim_map * alpha).sigmoid() * weights).sum(dim=[1, 2, 3]) + eps) /\
        (torch.sum(weights, dim=[1, 2, 3]) + eps)
    # Logit of score
    score = (torch.log(score / (1 - score)) / alpha) ** 2

    return _reduce(score, reduction)
Esempio n. 9
0
def multi_scale_gmsd(x: torch.Tensor,
                     y: torch.Tensor,
                     data_range: Union[int, float] = 1.,
                     reduction: str = 'mean',
                     scale_weights: Optional[Union[torch.Tensor, Tuple[float,
                                                                       ...],
                                                   List[float]]] = None,
                     chromatic: bool = False,
                     alpha: float = 0.5,
                     beta1: float = 0.01,
                     beta2: float = 0.32,
                     beta3: float = 15.,
                     t: float = 170) -> torch.Tensor:
    r"""Computation of Multi scale GMSD.

    Inputs supposed to be in range [0, data_range] with RGB channels order for colour images.
    The height and width should be at least 2 ** scales + 1.

    Args:
        x: Tensor with shape (H, W), (C, H, W) or (N, C, H, W).
        y: Tensor with shape (H, W), (C, H, W) or (N, C, H, W).
        data_range: The difference between the maximum and minimum of the pixel value,
            i.e., if for image x it holds min(x) = 0 and max(x) = 1, then data_range = 1.
            The pixel value interval of both input and output should remain the same.
        reduction: Specifies the reduction to apply to the output:
            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
            ``'mean'``: the sum of the output will be divided by the number of
            elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
        scale_weights: Weights for different scales. Can contain any number of floating point values.
        chromatic: Flag to use MS-GMSDc algorithm from paper.
            It also evaluates chromatic components of the image. Default: True
        alpha: Masking coefficient. See [1] for details.
        beta1: Algorithm parameter. Weight of chromatic component in the loss.
        beta2: Algorithm parameter. Small constant, see [1].
        beta3: Algorithm parameter. Small constant, see [1].
        t: Constant from the reference paper numerical stability of similarity map

    Returns:
        Value of MS-GMSD. 0 <= GMSD loss <= 1.
    """
    _validate_input(input_tensors=(x, y),
                    allow_5d=False,
                    scale_weights=scale_weights,
                    data_range=data_range)
    x, y = _adjust_dimensions(input_tensors=(x, y))

    # Rescale
    x = x / data_range * 255
    y = y / data_range * 255

    # Values from the paper
    if scale_weights is None:
        scale_weights = torch.tensor([0.096, 0.596, 0.289, 0.019])
    else:
        # Normalize scale weights
        scale_weights = torch.tensor(scale_weights) / torch.tensor(
            scale_weights).sum()

    scale_weights = cast(torch.Tensor, scale_weights).to(x)

    # Check that input is big enough
    num_scales = scale_weights.size(0)
    min_size = 2**num_scales + 1

    if x.size(-1) < min_size or x.size(-2) < min_size:
        raise ValueError(
            f'Invalid size of the input images, expected at least {min_size}x{min_size}.'
        )

    num_channels = x.size(1)
    if num_channels == 3:
        x = rgb2yiq(x)
        y = rgb2yiq(y)

    ms_gmds = []
    for scale in range(num_scales):
        if scale > 0:

            # Average by 2x2 filter and downsample
            up_pad = 0
            down_pad = max(x.shape[2] % 2, x.shape[3] % 2)
            pad_to_use = [up_pad, down_pad, up_pad, down_pad]
            x = F.pad(x, pad=pad_to_use)
            y = F.pad(y, pad=pad_to_use)
            x = F.avg_pool2d(x, kernel_size=2, padding=0)
            y = F.avg_pool2d(y, kernel_size=2, padding=0)

        score = _gmsd(x[:, :1], y[:, :1], t=t, alpha=alpha)
        ms_gmds.append(score)

    # Stack results in different scales and multiply by weight
    ms_gmds_val = scale_weights.view(1, num_scales) * (torch.stack(ms_gmds,
                                                                   dim=1)**2)

    # Sum and take sqrt per-image
    ms_gmds_val = torch.sqrt(torch.sum(ms_gmds_val, dim=1))

    # Shape: (batch_size, )
    score = ms_gmds_val

    if chromatic:
        assert x.size(
            1) == 3, "Chromatic component can be computed only for RGB images!"

        x_iq = x[:, 1:]
        y_iq = y[:, 1:]

        rmse_iq = torch.sqrt(torch.mean((x_iq - y_iq)**2, dim=[2, 3]))
        rmse_chrome = torch.sqrt(torch.sum(rmse_iq**2, dim=1))
        gamma = 2 / (1 + beta2 * torch.exp(-beta3 * ms_gmds_val)) - 1

        score = gamma * ms_gmds_val + (1 - gamma) * beta1 * rmse_chrome

    if reduction == 'none':
        return score

    return {'mean': score.mean, 'sum': score.sum}[reduction](dim=0)
Esempio n. 10
0
def multi_scale_gmsd(
    prediction: torch.Tensor,
    target: torch.Tensor,
    data_range: Union[int, float] = 1.,
    reduction: str = 'mean',
    scale_weights: Optional[Union[torch.Tensor, Tuple[float, ...],
                                  List[float]]] = None,
    chromatic: bool = False,
    beta1: float = 0.01,
    beta2: float = 0.32,
    beta3: float = 15.,
    t: float = 170 / (255.**2)) -> torch.Tensor:
    r"""Computation of Multi scale GMSD.

    Args:
        prediction: Tensor of prediction of the network. The height and width should be at least 2 ** scales + 1.
        target: Reference tensor. The height and width should be at least 2 ** scales + 1.
        data_range: The difference between the maximum and minimum of the pixel value,
            i.e., if for image x it holds min(x) = 0 and max(x) = 1, then data_range = 1.
            The pixel value interval of both input and output should remain the same.
        reduction: Specifies the reduction to apply to the output:
            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
            ``'mean'``: the sum of the output will be divided by the number of
            elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
        scale_weights: Weights for different scales. Can contain any number of floating point values.
        chromatic: Flag to use MS-GMSDc algorithm from paper.
            It also evaluates chromatic components of the image. Default: True
        beta1: Algorithm parameter. Weight of chromatic component in the loss.
        beta2: Algorithm parameter. Small constant, see [1].
        beta3: Algorithm parameter. Small constant, see [1].
        t: Constant from the reference paper numerical stability of similarity map

    Returns:
        Value of MS-GMSD. 0 <= GMSD loss <= 1.
    """
    _validate_input(input_tensors=(prediction, target),
                    allow_5d=False,
                    scale_weights=scale_weights)
    prediction, target = _adjust_dimensions(input_tensors=(prediction, target))

    # Values from the paper
    if scale_weights is None:
        scale_weights = torch.tensor([0.096, 0.596, 0.289, 0.019])
    elif isinstance(scale_weights, torch.Tensor):
        scale_weights = scale_weights / scale_weights.sum()
    else:
        # Normalize scale weights
        scale_weights = torch.tensor(scale_weights) / torch.tensor(
            scale_weights).sum()

    # Check that input is big enough
    num_scales = scale_weights.size(0)
    min_size = 2**num_scales + 1

    if prediction.size(-1) < min_size or prediction.size(-2) < min_size:
        raise ValueError(
            f'Invalid size of the input images, expected at least {min_size}x{min_size}.'
        )

    prediction = prediction / float(data_range)
    target = target / float(data_range)

    num_channels = prediction.size(1)
    if num_channels == 3:
        prediction = rgb2yiq(prediction)
        target = rgb2yiq(target)

    scale_weights = scale_weights.to(prediction)
    ms_gmds = []
    for scale in range(num_scales):
        if scale > 0:
            # Average by 2x2 filter and downsample
            up_pad = 0
            down_pad = max(prediction.shape[2] % 2, prediction.shape[3] % 2)
            pad_to_use = [up_pad, down_pad, up_pad, down_pad]
            prediction = F.pad(prediction, pad=pad_to_use)
            target = F.pad(target, pad=pad_to_use)
            prediction = F.avg_pool2d(prediction, kernel_size=2, padding=0)
            target = F.avg_pool2d(target, kernel_size=2, padding=0)

        score = _gmsd(prediction[:, :1], target[:, :1], t=t)
        ms_gmds.append(score)

    # Stack results in different scales and multiply by weight
    ms_gmds_val = scale_weights.view(1, num_scales) * (torch.stack(ms_gmds,
                                                                   dim=1)**2)

    # Sum and take sqrt per-image
    ms_gmds_val = torch.sqrt(torch.sum(ms_gmds_val, dim=1))

    # Shape: (batch_size, )
    score = ms_gmds_val

    if chromatic:
        assert prediction.size(
            1) == 3, "Chromatic component can be computed only for RGB images!"

        prediction_iq = prediction[:, 1:]
        target_iq = target[:, 1:]

        rmse_iq = torch.sqrt(
            torch.mean((prediction_iq - target_iq)**2, dim=[2, 3]))
        rmse_chrome = torch.sqrt(torch.sum(rmse_iq**2, dim=1))
        gamma = 2 / (1 + beta2 * torch.exp(-beta3 * ms_gmds_val)) - 1

        score = gamma * ms_gmds_val + (1 - gamma) * beta1 * rmse_chrome

    if reduction == 'none':
        return score

    return {'mean': score.mean, 'sum': score.sum}[reduction](dim=0)
Esempio n. 11
0
def fsim(x: torch.Tensor, y: torch.Tensor, reduction: str = 'mean',
         data_range: Union[int, float] = 1.0, chromatic: bool = True,
         scales: int = 4, orientations: int = 4, min_length: int = 6,
         mult: int = 2, sigma_f: float = 0.55, delta_theta: float = 1.2,
         k: float = 2.0) -> torch.Tensor:
    r"""Compute Feature Similarity Index Measure for a batch of images.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        data_range: Maximum value range of images (usually 1.0 or 255).
        chromatic: Flag to compute FSIMc, which also takes into account chromatic components
        scales: Number of wavelets used for computation of phase congruensy maps
        orientations: Number of filter orientations used for computation of phase congruensy maps
        min_length: Wavelength of smallest scale filter
        mult: Scaling factor between successive filters
        sigma_f: Ratio of the standard deviation of the Gaussian describing the log Gabor filter's
            transfer function in the frequency domain to the filter center frequency.
        delta_theta: Ratio of angular interval between filter orientations and the standard deviation
            of the angular Gaussian function used to construct filters in the frequency plane.
        k: No of standard deviations of the noise energy beyond the mean at which we set the noise
            threshold  point, below which phase congruency values get penalized.

    Returns:
        Index of similarity between two images. Usually in [0, 1] interval.
        Can be bigger than 1 for predicted :math:`x` images with higher contrast than the original ones.

    References:
        L. Zhang, L. Zhang, X. Mou and D. Zhang, "FSIM: A Feature Similarity Index for Image Quality Assessment,"
        IEEE Transactions on Image Processing, vol. 20, no. 8, pp. 2378-2386, Aug. 2011, doi: 10.1109/TIP.2011.2109730.
        https://ieeexplore.ieee.org/document/5705575

    Note:
        This implementation is based on the original MATLAB code.
        https://www4.comp.polyu.edu.hk/~cslzhang/IQA/FSIM/FSIM.htm

    """
    _validate_input([x, y], dim_range=(4, 4), data_range=(0, data_range))

    # Rescale to [0, 255] range, because all constant are calculated for this factor
    x = x / float(data_range) * 255
    y = y / float(data_range) * 255

    # Apply average pooling
    kernel_size = max(1, round(min(x.shape[-2:]) / 256))
    x = torch.nn.functional.avg_pool2d(x, kernel_size)
    y = torch.nn.functional.avg_pool2d(y, kernel_size)

    num_channels = x.size(1)

    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)

        x_lum = x_yiq[:, : 1]
        y_lum = y_yiq[:, : 1]

        x_i = x_yiq[:, 1:2]
        y_i = y_yiq[:, 1:2]
        x_q = x_yiq[:, 2:]
        y_q = y_yiq[:, 2:]

    else:
        x_lum = x
        y_lum = y

    # Compute phase congruency maps
    pc_x = _phase_congruency(
        x_lum, scales=scales, orientations=orientations,
        min_length=min_length, mult=mult, sigma_f=sigma_f,
        delta_theta=delta_theta, k=k
    )
    pc_y = _phase_congruency(
        y_lum, scales=scales, orientations=orientations,
        min_length=min_length, mult=mult, sigma_f=sigma_f,
        delta_theta=delta_theta, k=k
    )

    # Gradient maps
    kernels = torch.stack([scharr_filter(), scharr_filter().transpose(-1, -2)])
    grad_map_x = gradient_map(x_lum, kernels)
    grad_map_y = gradient_map(y_lum, kernels)

    # Constants from the paper
    T1, T2, T3, T4, lmbda = 0.85, 160, 200, 200, 0.03

    # Compute FSIM
    PC = similarity_map(pc_x, pc_y, T1)
    GM = similarity_map(grad_map_x, grad_map_y, T2)
    pc_max = torch.where(pc_x > pc_y, pc_x, pc_y)
    score = GM * PC * pc_max

    if chromatic:
        assert num_channels == 3, "Chromatic component can be computed only for RGB images!"
        S_I = similarity_map(x_i, y_i, T3)
        S_Q = similarity_map(x_q, y_q, T4)
        score = score * torch.abs(S_I * S_Q) ** lmbda
        # Complex gradients will work in PyTorch 1.6.0
        # score = score * torch.real((S_I * S_Q).to(torch.complex64) ** lmbda)

    result = score.sum(dim=[1, 2, 3]) / pc_max.sum(dim=[1, 2, 3])

    return _reduce(result, reduction)
Esempio n. 12
0
def information_weighted_ssim(x: torch.Tensor, y: torch.Tensor, data_range: Union[int, float] = 1.,
                              kernel_size: int = 11, kernel_sigma: float = 1.5, k1: float = 0.01, k2: float = 0.03,
                              parent: bool = True, blk_size: int = 3, sigma_nsq: float = 0.4,
                              scale_weights: Optional[torch.Tensor] = None,
                              reduction: str = 'mean') -> torch.Tensor:
    r"""Interface of Information Content Weighted Structural Similarity (IW-SSIM) index.
    Inputs supposed to be in range ``[0, data_range]``.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        data_range: Maximum value range of images (usually 1.0 or 255).
        kernel_size: The side-length of the sliding window used in comparison. Must be an odd value.
        kernel_sigma: Sigma of normal distribution for sliding window used in comparison.
        k1: Algorithm parameter, K1 (small constant).
        k2: Algorithm parameter, K2 (small constant).
            Try a larger K2 constant (e.g. 0.4) if you get a negative or NaN results.
        parent: Flag to control dependency on previous layer of pyramid.
        blk_size: The side-length of the sliding window used in comparison for information content.
        sigma_nsq: Parameter of visual distortion model.
        scale_weights: Weights for scaling.
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``

    Returns:
        Value of Information Content Weighted Structural Similarity (IW-SSIM) index.

    References:
        Wang, Zhou, and Qiang Li..
        Information content weighting for perceptual image quality assessment.
        IEEE Transactions on image processing 20.5 (2011): 1185-1198.
        https://ece.uwaterloo.ca/~z70wang/publications/IWSSIM.pdf DOI:`10.1109/TIP.2010.2092435`

    Note:
        Lack of content in target image could lead to RuntimeError due to singular information content matrix,
        which cannot be inverted.
    """
    assert kernel_size % 2 == 1, f'Kernel size must be odd, got [{kernel_size}]'

    _validate_input(tensors=[x, y], dim_range=(4, 4), data_range=(0., data_range))

    x = x / float(data_range) * 255
    y = y / float(data_range) * 255

    if x.size(1) == 3:
        x = rgb2yiq(x)[:, :1]
        y = rgb2yiq(y)[:, :1]

    if scale_weights is None:
        scale_weights = torch.tensor([0.0448, 0.2856, 0.3001, 0.2363, 0.1333], dtype=x.dtype, device=x.device)
    scale_weights = scale_weights / scale_weights.sum()
    if scale_weights.size(0) != scale_weights.numel():
        raise ValueError(f'Expected a vector of weights, got {scale_weights.dim()}D tensor')

    levels = scale_weights.size(0)

    min_size = (kernel_size - 1) * 2 ** (levels - 1) + 1
    if x.size(-1) < min_size or x.size(-2) < min_size:
        raise ValueError(f'Invalid size of the input images, expected at least {min_size}x{min_size}.')

    blur_pad = math.ceil((kernel_size - 1) / 2)  # Ceil
    iw_pad = blur_pad - math.floor((blk_size - 1) / 2)  # floor
    gauss_kernel = gaussian_filter(kernel_size, kernel_sigma).repeat(x.size(1), 1, 1, 1).to(x)

    # Size of the kernel size to build Laplacian pyramid
    pyramid_kernel_size = 5
    bin_filter = binomial_filter1d(kernel_size=pyramid_kernel_size).to(x) * 2 ** 0.5

    lo_x, x_diff_old = _pyr_step(x, bin_filter)
    lo_y, y_diff_old = _pyr_step(y, bin_filter)

    x = lo_x
    y = lo_y
    wmcs = []

    for i in range(levels):
        if i < levels - 2:
            lo_x, x_diff = _pyr_step(x, bin_filter)
            lo_y, y_diff = _pyr_step(y, bin_filter)
            x = lo_x
            y = lo_y

        else:
            x_diff = x
            y_diff = y

        ssim_map, cs_map = _ssim_per_channel(x=x_diff_old, y=y_diff_old, kernel=gauss_kernel, data_range=255,
                                             k1=k1, k2=k2)

        if parent and i < levels - 2:
            iw_map = _information_content(x=x_diff_old, y=y_diff_old, y_parent=y_diff, kernel_size=blk_size,
                                          sigma_nsq=sigma_nsq)

            iw_map = iw_map[:, :, iw_pad:-iw_pad, iw_pad:-iw_pad]

        elif i == levels - 1:
            iw_map = torch.ones_like(cs_map)
            cs_map = ssim_map

        else:
            iw_map = _information_content(x=x_diff_old, y=y_diff_old, y_parent=None, kernel_size=blk_size,
                                          sigma_nsq=sigma_nsq)
            iw_map = iw_map[:, :, iw_pad:-iw_pad, iw_pad:-iw_pad]

        wmcs.append(torch.sum(cs_map * iw_map, dim=(-2, -1)) / torch.sum(iw_map, dim=(-2, -1)))

        x_diff_old = x_diff
        y_diff_old = y_diff

    wmcs = torch.stack(wmcs, dim=0).abs()

    score = torch.prod((wmcs ** scale_weights.view(-1, 1, 1)), dim=0)[:, 0]

    return _reduce(x=score, reduction=reduction)
Esempio n. 13
0
def multi_scale_gmsd(x: torch.Tensor, y: torch.Tensor, data_range: Union[int, float] = 1., reduction: str = 'mean',
                     scale_weights: Optional[torch.Tensor] = None,
                     chromatic: bool = False, alpha: float = 0.5, beta1: float = 0.01, beta2: float = 0.32,
                     beta3: float = 15., t: float = 170) -> torch.Tensor:
    r"""Computation of Multi scale GMSD.

    Supports greyscale and colour images with RGB channel order.
    The height and width should be at least 2 ** scales + 1.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        data_range: Maximum value range of images (usually 1.0 or 255).
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        scale_weights: Weights for different scales. Can contain any number of floating point values.
        chromatic: Flag to use MS-GMSDc algorithm from paper.
            It also evaluates chromatic components of the image. Default: True
        alpha: Masking coefficient. See [1] for details.
        beta1: Algorithm parameter. Weight of chromatic component in the loss.
        beta2: Algorithm parameter. Small constant, see [1].
        beta3: Algorithm parameter. Small constant, see [1].
        t: Constant from the reference paper numerical stability of similarity map

    Returns:
        Value of MS-GMSD. 0 <= GMSD loss <= 1.
    """
    _validate_input([x, y], dim_range=(4, 4), data_range=(0, data_range))
    
    # Rescale
    x = x / data_range * 255
    y = y / data_range * 255
    
    # Values from the paper
    if scale_weights is None:
        scale_weights = torch.tensor([0.096, 0.596, 0.289, 0.019], device=x.device)
    else:
        # Normalize scale weights
        scale_weights = (scale_weights / scale_weights.sum()).to(x)

    # Check that input is big enough
    num_scales = scale_weights.size(0)
    min_size = 2 ** num_scales + 1

    if x.size(-1) < min_size or x.size(-2) < min_size:
        raise ValueError(f'Invalid size of the input images, expected at least {min_size}x{min_size}.')

    num_channels = x.size(1)
    if num_channels == 3:
        x = rgb2yiq(x)
        y = rgb2yiq(y)

    ms_gmds = []
    for scale in range(num_scales):
        if scale > 0:

            # Average by 2x2 filter and downsample
            up_pad = 0
            down_pad = max(x.shape[2] % 2, x.shape[3] % 2)
            pad_to_use = [up_pad, down_pad, up_pad, down_pad]
            x = F.pad(x, pad=pad_to_use)
            y = F.pad(y, pad=pad_to_use)
            x = F.avg_pool2d(x, kernel_size=2, padding=0)
            y = F.avg_pool2d(y, kernel_size=2, padding=0)

        score = _gmsd(x[:, :1], y[:, :1], t=t, alpha=alpha)
        ms_gmds.append(score)

    # Stack results in different scales and multiply by weight
    ms_gmds_val = scale_weights.view(1, num_scales) * (torch.stack(ms_gmds, dim=1) ** 2)

    # Sum and take sqrt per-image
    ms_gmds_val = torch.sqrt(torch.sum(ms_gmds_val, dim=1))

    # Shape: (batch_size, )
    score = ms_gmds_val

    if chromatic:
        assert x.size(1) == 3, "Chromatic component can be computed only for RGB images!"

        x_iq = x[:, 1:]
        y_iq = y[:, 1:]

        rmse_iq = torch.sqrt(torch.mean((x_iq - y_iq) ** 2, dim=[2, 3]))
        rmse_chrome = torch.sqrt(torch.sum(rmse_iq ** 2, dim=1))
        gamma = 2 / (1 + beta2 * torch.exp(-beta3 * ms_gmds_val)) - 1

        score = gamma * ms_gmds_val + (1 - gamma) * beta1 * rmse_chrome

    return _reduce(score, reduction)
Esempio n. 14
0
def dss(x: torch.Tensor,
        y: torch.Tensor,
        reduction: str = 'mean',
        data_range: Union[int, float] = 1.0,
        dct_size: int = 8,
        sigma_weight: float = 1.55,
        kernel_size: int = 3,
        sigma_similarity: float = 1.5,
        percentile: float = 0.05) -> torch.Tensor:
    r"""Compute DCT Subband Similarity index for a batch of images.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        data_range: Maximum value range of images (usually 1.0 or 255).
        dct_size: Size of blocks in 2D Discrete Cosine Transform. DCT sizes must be in (0, input size].
        sigma_weight: STD of gaussian that determines the proportion of weight given to low freq and high freq.
            Default: 1.55
        kernel_size: Size of gaussian kernel for computing subband similarity. Kernels size must be in (0, input size].
            Default: 3
        sigma_similarity: STD of gaussian kernel for computing subband similarity. Default: 1.55
        percentile: % in (0, 1] of the worst similarity scores which should be kept. Default: 0.05
    Returns:
        DSS: Index of similarity between two images. In [0, 1] interval.
    Note:
        This implementation is based on the original MATLAB code (see header).
        Image will be scaled to [0, 255] because all constants are computed for this range.
        Make sure you know what you are doing when changing default coefficient values.
    """
    if sigma_weight == 0 or sigma_similarity == 0:
        raise ValueError(
            f'Gaussian sigmas must not be 0, got sigma_weight: {sigma_weight} and '
            f'sigma_similarity: {sigma_similarity}')

    if percentile <= 0 or percentile > 1:
        raise ValueError(f'Percentile must be in (0,1], got {percentile}')

    _validate_input(tensors=[x, y], dim_range=(4, 4))

    for size in (dct_size, kernel_size):
        if size <= 0 or size > min(x.size(-1), x.size(-2)):
            raise ValueError(
                'DCT and kernels sizes must be included in (0, input size]')

    # Rescale to [0, 255] range, because all constant are calculated for this factor
    x = (x / float(data_range)) * 255
    y = (y / float(data_range)) * 255

    num_channels = x.size(1)

    # Use luminance channel in case of RGB images (Y from YIQ or YCrCb)
    if num_channels == 3:
        x_lum = rgb2yiq(x)[:, :1]
        y_lum = rgb2yiq(y)[:, :1]
    else:
        x_lum = x
        y_lum = y

    # Crop images size to the closest multiplication of `dct_size`
    rows, cols = x_lum.size()[-2:]
    rows = dct_size * (rows // dct_size)
    cols = dct_size * (cols // dct_size)
    x_lum = x_lum[:, :, 0:rows, 0:cols]
    y_lum = y_lum[:, :, 0:rows, 0:cols]

    # Channel decomposition for both images by `dct_size`x`dct_size` 2D DCT
    dct_x = _dct_decomp(x_lum, dct_size)
    dct_y = _dct_decomp(y_lum, dct_size)

    # Create a Gaussian window that will be used to weight subbands scores
    coords = torch.arange(1, dct_size + 1).to(x)
    weight = (coords - 0.5)**2
    weight = (-(weight.unsqueeze(0) + weight.unsqueeze(1)) /
              (2 * sigma_weight**2)).exp()

    # Compute similarity between each subband in img1 and img2
    subband_sim_matrix = torch.zeros((x.size(0), dct_size, dct_size),
                                     device=x.device)
    threshold = 1e-2
    for m in range(dct_size):
        for n in range(dct_size):
            first_term = (m == 0 and n == 0)  # boolean

            # Skip subbands with very small weight
            if weight[m, n] < threshold:
                weight[m, n] = 0
                continue

            subband_sim_matrix[:, m, n] = _subband_similarity(
                dct_x[:, :, m::dct_size, n::dct_size], dct_y[:, :, m::dct_size,
                                                             n::dct_size],
                first_term, kernel_size, sigma_similarity, percentile)

    # Weight subbands similarity scores
    eps = torch.finfo(weight.dtype).eps
    similarity_scores = torch.sum(subband_sim_matrix *
                                  (weight / (torch.sum(weight)) + eps),
                                  dim=[1, 2])
    dss_val = _reduce(similarity_scores, reduction)
    return dss_val
Esempio n. 15
0
def srsim(x: torch.Tensor,
          y: torch.Tensor,
          reduction: str = 'mean',
          data_range: Union[int, float] = 1.0,
          chromatic: bool = False,
          scale: float = 0.25,
          kernel_size: int = 3,
          sigma: float = 3.8,
          gaussian_size: int = 10) -> torch.Tensor:
    r"""Compute Spectral Residual based Similarity for a batch of images.

    Args:
        x: Predicted images. Shape (H, W), (C, H, W) or (N, C, H, W).
        y: Target images. Shape (H, W), (C, H, W) or (N, C, H, W).
        reduction: Reduction over samples in batch: "mean"|"sum"|"none"
        data_range: Value range of input images (usually 1.0 or 255). Default: 1.0
        chromatic: Flag to compute SR-SIMc, which also takes into account chromatic components
        scale: Resizing factor used in saliency map computation
        kernel_size: Kernel size of average blur filter used in saliency map computation
        sigma: Sigma of gaussian filter applied on saliency map
        gaussian_size: Size of gaussian filter applied on saliency map
    Returns:
        SR-SIM: Index of similarity between two images. Usually in [0, 1] interval.
            Can be bigger than 1 for predicted images with higher contrast than the original ones.
    Note:
        This implementation is based on the original MATLAB code.
        https://sse.tongji.edu.cn/linzhang/IQA/SR-SIM/Files/SR_SIM.m

    """
    _validate_input(tensors=[x, y],
                    dim_range=(4, 4),
                    data_range=(0, data_range))

    # Rescale to [0, 255] range, because all constant are calculated for this factor
    x = (x / float(data_range)) * 255
    y = (y / float(data_range)) * 255

    # Averaging image if the size is large enough
    ksize = max(1, round(min(x.size()[-2:]) / 256))
    padding = ksize // 2

    if padding:
        up_pad = (ksize - 1) // 2
        down_pad = padding
        pad_to_use = [up_pad, down_pad, up_pad, down_pad]
        x = F.pad(x, pad=pad_to_use)
        y = F.pad(y, pad=pad_to_use)

    x = F.avg_pool2d(x, ksize)
    y = F.avg_pool2d(y, ksize)

    num_channels = x.size(1)

    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)

        x_lum = x_yiq[:, :1]
        y_lum = y_yiq[:, :1]

        x_i = x_yiq[:, 1:2]
        y_i = y_yiq[:, 1:2]
        x_q = x_yiq[:, 2:]
        y_q = y_yiq[:, 2:]

    else:
        if chromatic:
            raise ValueError(
                'Chromatic component can be computed only for RGB images!')
        x_lum = x
        y_lum = y

    # Compute phase congruency maps
    svrs_x = _spectral_residual_visual_saliency(x_lum,
                                                scale=scale,
                                                kernel_size=kernel_size,
                                                sigma=sigma,
                                                gaussian_size=gaussian_size)
    svrs_y = _spectral_residual_visual_saliency(y_lum,
                                                scale=scale,
                                                kernel_size=kernel_size,
                                                sigma=sigma,
                                                gaussian_size=gaussian_size)

    # Gradient maps
    kernels = torch.stack([scharr_filter(), scharr_filter().transpose(-1, -2)])
    grad_map_x = gradient_map(x_lum, kernels)
    grad_map_y = gradient_map(y_lum, kernels)

    # Constants from the paper
    C1, C2, alpha = 0.40, 225, 0.50

    # Compute SR-SIM
    SVRS = similarity_map(svrs_x, svrs_y, C1)
    GM = similarity_map(grad_map_x, grad_map_y, C2)
    svrs_max = torch.where(svrs_x > svrs_y, svrs_x, svrs_y)
    score = SVRS * (GM**alpha) * svrs_max

    if chromatic:
        # Constants from FSIM paper, use same method for color image
        T3, T4, lmbda = 200, 200, 0.03

        S_I = similarity_map(x_i, y_i, T3)
        S_Q = similarity_map(x_q, y_q, T4)
        score = score * torch.abs(S_I * S_Q)**lmbda
        # Complex gradients will work in PyTorch 1.6.0
        # score = score * torch.real((S_I * S_Q).to(torch.complex64) ** lmbda)

    eps = torch.finfo(score.dtype).eps
    result = score.sum(dim=[1, 2, 3]) / (svrs_max.sum(dim=[1, 2, 3]) + eps)

    return _reduce(result, reduction)