Python gradient_map 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: piq.functional

메소드/함수: gradient_map

hotexamples.com에서의 예제들: 10

Python gradient_map - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 piq.functional.gradient_map에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def _gmsd(prediction: torch.Tensor,
          target: torch.Tensor,
          t: float = 170 / (255.**2)) -> torch.Tensor:
    r"""Compute Gradient Magnitude Similarity Deviation
    Both inputs supposed to be in range [0, 1] with RGB order.
    Args:
        prediction: Tensor of shape :math:`(N, 1, H, W)` holding an distorted grayscale image.
        target: Tensor of shape :math:`(N, 1, H, W)` holding an target grayscale image
        t: Constant from the reference paper numerical stability of similarity map

    Returns:
        gmsd : Gradient Magnitude Similarity Deviation between given tensors.

    References:
        https://arxiv.org/pdf/1308.3052.pdf
    """

    # Compute grad direction
    kernels = torch.stack(
        [prewitt_filter(),
         prewitt_filter().transpose(-1, -2)])
    pred_grad = gradient_map(prediction, kernels)
    trgt_grad = gradient_map(target, kernels)

    # Compute GMS
    gms = similarity_map(pred_grad, trgt_grad, t)
    mean_gms = torch.mean(gms, dim=[1, 2, 3], keepdims=True)
    # Compute GMSD along spatial dimensions. Shape (batch_size )
    score = torch.pow(gms - mean_gms, 2).mean(dim=[1, 2, 3]).sqrt()
    return score

예제 #2

파일 보기

파일: gmsd.py 프로젝트: Linaom1214/piq

def _gmsd(x: torch.Tensor,
          y: torch.Tensor,
          t: float = 170 / (255.**2),
          alpha: float = 0.0) -> torch.Tensor:
    r"""Compute Gradient Magnitude Similarity Deviation
    Both inputs supposed to be in range [0, 1] with RGB channels order.
    Args:
        x: Tensor with shape (N, 1, H, W).
        y: Tensor with shape (N, 1, H, W).
        t: Constant from the reference paper numerical stability of similarity map
        alpha: Masking coefficient for similarity masks computation

    Returns:
        gmsd : Gradient Magnitude Similarity Deviation between given tensors.

    References:
        https://arxiv.org/pdf/1308.3052.pdf
    """

    # Compute grad direction
    kernels = torch.stack(
        [prewitt_filter(),
         prewitt_filter().transpose(-1, -2)])
    x_grad = gradient_map(x, kernels)
    y_grad = gradient_map(y, kernels)

    # Compute GMS
    gms = similarity_map(x_grad, y_grad, constant=t, alpha=alpha)
    mean_gms = torch.mean(gms, dim=[1, 2, 3], keepdims=True)

    # Compute GMSD along spatial dimensions. Shape (batch_size )
    score = torch.pow(gms - mean_gms, 2).mean(dim=[1, 2, 3]).sqrt()
    return score

예제 #3

파일 보기

def fsim(x: torch.Tensor,
         y: torch.Tensor,
         reduction: str = 'mean',
         data_range: Union[int, float] = 1.0,
         chromatic: bool = True,
         scales: int = 4,
         orientations: int = 4,
         min_length: int = 6,
         mult: int = 2,
         sigma_f: float = 0.55,
         delta_theta: float = 1.2,
         k: float = 2.0) -> torch.Tensor:
    r"""Compute Feature Similarity Index Measure for a batch of images.

    Args:
        x: Predicted images set :math:`x`.
            Shape (H, W), (C, H, W) or (N, C, H, W).
        y: Target images set :math:`y`.
            Shape (H, W), (C, H, W) or (N, C, H, W).
        reduction: Reduction over samples in batch: "mean"|"sum"|"none"
        data_range: Value range of input images (usually 1.0 or 255). Default: 1.0
        chromatic: Flag to compute FSIMc, which also takes into account chromatic components
        scales: Number of wavelets used for computation of phase congruensy maps
        orientations: Number of filter orientations used for computation of phase congruensy maps
        min_length: Wavelength of smallest scale filter
        mult: Scaling factor between successive filters
        sigma_f: Ratio of the standard deviation of the Gaussian describing the log Gabor filter's
            transfer function in the frequency domain to the filter center frequency.
        delta_theta: Ratio of angular interval between filter orientations and the standard deviation
            of the angular Gaussian function used to construct filters in the frequency plane.
        k: No of standard deviations of the noise energy beyond the mean at which we set the noise
            threshold  point, below which phase congruency values get penalized.
        
    Returns:
        FSIM: Index of similarity betwen two images. Usually in [0, 1] interval.
            Can be bigger than 1 for predicted (x) images with higher contrast than the original ones.
    Note:
        This implementation is based on the original MATLAB code.
        https://www4.comp.polyu.edu.hk/~cslzhang/IQA/FSIM/FSIM.htm
        
    """

    _validate_input(input_tensors=(x, y),
                    allow_5d=False,
                    data_range=data_range)
    x, y = _adjust_dimensions(input_tensors=(x, y))

    # Rescale to [0, 255] range, because all constant are calculated for this factor
    x = x / data_range * 255
    y = y / data_range * 255

    # Apply average pooling
    kernel_size = max(1, round(min(x.shape[-2:]) / 256))
    x = torch.nn.functional.avg_pool2d(x, kernel_size)
    y = torch.nn.functional.avg_pool2d(y, kernel_size)

    num_channels = x.size(1)

    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)

        x_lum = x_yiq[:, :1]
        y_lum = y_yiq[:, :1]

        x_i = x_yiq[:, 1:2]
        y_i = y_yiq[:, 1:2]
        x_q = x_yiq[:, 2:]
        y_q = y_yiq[:, 2:]

    else:
        x_lum = x
        y_lum = y

    # Compute phase congruency maps
    pc_x = _phase_congruency(x_lum,
                             scales=scales,
                             orientations=orientations,
                             min_length=min_length,
                             mult=mult,
                             sigma_f=sigma_f,
                             delta_theta=delta_theta,
                             k=k)
    pc_y = _phase_congruency(y_lum,
                             scales=scales,
                             orientations=orientations,
                             min_length=min_length,
                             mult=mult,
                             sigma_f=sigma_f,
                             delta_theta=delta_theta,
                             k=k)

    # Gradient maps
    kernels = torch.stack([scharr_filter(), scharr_filter().transpose(-1, -2)])
    grad_map_x = gradient_map(x_lum, kernels)
    grad_map_y = gradient_map(y_lum, kernels)

    # Constants from the paper
    T1, T2, T3, T4, lmbda = 0.85, 160, 200, 200, 0.03

    # Compute FSIM
    PC = similarity_map(pc_x, pc_y, T1)
    GM = similarity_map(grad_map_x, grad_map_y, T2)
    pc_max = torch.where(pc_x > pc_y, pc_x, pc_y)
    score = GM * PC * pc_max

    if chromatic:
        assert num_channels == 3, "Chromatic component can be computed only for RGB images!"
        S_I = similarity_map(x_i, y_i, T3)
        S_Q = similarity_map(x_q, y_q, T4)
        score = score * torch.abs(S_I * S_Q)**lmbda
        # Complex gradients will work in PyTorch 1.6.0
        # score = score * torch.real((S_I * S_Q).to(torch.complex64) ** lmbda)

    result = score.sum(dim=[1, 2, 3]) / pc_max.sum(dim=[1, 2, 3])

    if reduction == 'none':
        return result

    return {'mean': result.mean, 'sum': result.sum}[reduction](dim=0)

예제 #4

파일 보기

파일: vsi.py 프로젝트: pyd1998/piq

def vsi(prediction: torch.Tensor,
        target: torch.Tensor,
        reduction: str = 'mean',
        data_range: Union[int, float] = 1.,
        c1: float = 1.27,
        c2: float = 386.,
        c3: float = 130.,
        alpha: float = 0.4,
        beta: float = 0.02,
        omega_0: float = 0.021,
        sigma_f: float = 1.34,
        sigma_d: float = 145.,
        sigma_c: float = 0.001) -> torch.Tensor:
    r"""Compute Visual Saliency-induced Index for a batch of images.

    Both inputs are supposed to have RGB channels order in accordance with the original approach.
    Nevertheless, the method supports greyscale images, which they are converted to RGB by copying the grey
    channel 3 times.

    Args:
        prediction:  Tensor with shape (H, W), (C, H, W) or (N, C, H, W) holding a distorted image.
        target: Tensor with shape (H, W), (C, H, W) or (N, C, H, W) holding a target image.
        reduction: Reduction over samples in batch: "mean"|"sum"|"none"
        data_range: Value range of input images (usually 1.0 or 255). Default: 1.0
        c1: coefficient to calculate saliency component of VSI
        c2: coefficient to calculate gradient component of VSI
        c3: coefficient to calculate color component of VSI
        alpha: power for gradient component of VSI
        beta: power for color component of VSI
        omega_0: coefficient to get log Gabor filter at SDSP
        sigma_f: coefficient to get log Gabor filter at SDSP
        sigma_d: coefficient to get SDSP
        sigma_c: coefficient to get SDSP

    Returns:
        VSI: Index of similarity between two images. Usually in [0, 1] interval.

    Shape:
        - Input:  Required to be 2D (H, W), 3D (C, H, W) or 4D (N, C, H, W). RGB channel order for colour images.
        - Target: Required to be 2D (H, W), 3D (C, H, W) or 4D (N, C, H, W). RGB channel order for colour images.

    References:
        .. [1] Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P.
           (2004). Image quality assessment: From error visibility to
           structural similarity. IEEE Transactions on Image Processing,
           13, 600-612.
           https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,
           DOI:`10.1109/TIP.2003.819861`

    Note:
        The original method supports only RGB image.
        See https://ieeexplore.ieee.org/document/6873260 for details.
    """
    _validate_input(input_tensors=(prediction, target), allow_5d=False)
    prediction, target = _adjust_dimensions(input_tensors=(prediction, target))
    if prediction.size(1) == 1:
        prediction = prediction.repeat(1, 3, 1, 1)
        target = target.repeat(1, 3, 1, 1)
        warnings.warn(
            'The original VSI supports only RGB images. The input images were converted to RGB by copying '
            'the grey channel 3 times.')

    # Scale to [0, 255] range to match scale of constant
    prediction = prediction * 255. / data_range
    target = target * 255. / data_range

    vs_prediction = sdsp(prediction,
                         data_range=255,
                         omega_0=omega_0,
                         sigma_f=sigma_f,
                         sigma_d=sigma_d,
                         sigma_c=sigma_c)
    vs_target = sdsp(target,
                     data_range=255,
                     omega_0=omega_0,
                     sigma_f=sigma_f,
                     sigma_d=sigma_d,
                     sigma_c=sigma_c)

    # Convert to LMN colour space
    prediction_lmn = rgb2lmn(prediction)
    target_lmn = rgb2lmn(target)

    # Averaging image if the size is large enough
    kernel_size = max(1, round(min(vs_prediction.size()[-2:]) / 256))
    padding = kernel_size // 2

    if padding:
        upper_pad = padding
        bottom_pad = (kernel_size - 1) // 2
        pad_to_use = [upper_pad, bottom_pad, upper_pad, bottom_pad]
        mode = 'replicate'
        vs_prediction = pad(vs_prediction, pad=pad_to_use, mode=mode)
        vs_target = pad(vs_target, pad=pad_to_use, mode=mode)
        prediction_lmn = pad(prediction_lmn, pad=pad_to_use, mode=mode)
        target_lmn = pad(target_lmn, pad=pad_to_use, mode=mode)

    vs_prediction = avg_pool2d(vs_prediction, kernel_size=kernel_size)
    vs_target = avg_pool2d(vs_target, kernel_size=kernel_size)

    prediction_lmn = avg_pool2d(prediction_lmn, kernel_size=kernel_size)
    target_lmn = avg_pool2d(target_lmn, kernel_size=kernel_size)

    # Calculate gradient map
    kernels = torch.stack([scharr_filter(),
                           scharr_filter().transpose(1, 2)]).to(prediction_lmn)
    gm_prediction = gradient_map(prediction_lmn[:, :1], kernels)
    gm_target = gradient_map(target_lmn[:, :1], kernels)

    # Calculate all similarity maps
    s_vs = similarity_map(vs_prediction, vs_target, c1)
    s_gm = similarity_map(gm_prediction, gm_target, c2)
    s_m = similarity_map(prediction_lmn[:, 1:2], target_lmn[:, 1:2], c3)
    s_n = similarity_map(prediction_lmn[:, 2:], target_lmn[:, 2:], c3)
    s_c = s_m * s_n

    s_c_complex = [s_c.abs(), torch.atan2(torch.zeros_like(s_c), s_c)]
    s_c_complex_pow = [s_c_complex[0]**beta, s_c_complex[1] * beta]
    s_c_real_pow = s_c_complex_pow[0] * torch.cos(s_c_complex_pow[1])

    s = s_vs * s_gm.pow(alpha) * s_c_real_pow
    vs_max = torch.max(vs_prediction, vs_target)

    eps = torch.finfo(vs_max.dtype).eps
    output = s * vs_max
    output = ((output.sum(dim=(-1, -2)) + eps) /
              (vs_max.sum(dim=(-1, -2)) + eps)).squeeze(-1)
    if reduction == 'none':
        return output
    return {'mean': torch.mean, 'sum': torch.sum}[reduction](output, dim=0)

예제 #5

파일 보기

def vsi(x: torch.Tensor,
        y: torch.Tensor,
        reduction: str = 'mean',
        data_range: Union[int, float] = 1.,
        c1: float = 1.27,
        c2: float = 386.,
        c3: float = 130.,
        alpha: float = 0.4,
        beta: float = 0.02,
        omega_0: float = 0.021,
        sigma_f: float = 1.34,
        sigma_d: float = 145.,
        sigma_c: float = 0.001) -> torch.Tensor:
    r"""Compute Visual Saliency-induced Index for a batch of images.

    Both inputs are supposed to have RGB channels order in accordance with the original approach.
    Nevertheless, the method supports greyscale images, which they are converted to RGB by copying the grey
    channel 3 times.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        data_range: Maximum value range of images (usually 1.0 or 255).
        c1: coefficient to calculate saliency component of VSI
        c2: coefficient to calculate gradient component of VSI
        c3: coefficient to calculate color component of VSI
        alpha: power for gradient component of VSI
        beta: power for color component of VSI
        omega_0: coefficient to get log Gabor filter at SDSP
        sigma_f: coefficient to get log Gabor filter at SDSP
        sigma_d: coefficient to get SDSP
        sigma_c: coefficient to get SDSP

    Returns:
        Index of similarity between two images. Usually in [0, 1] range.

    References:
        L. Zhang, Y. Shen and H. Li, "VSI: A Visual Saliency-Induced Index for Perceptual Image Quality Assessment,"
        IEEE Transactions on Image Processing, vol. 23, no. 10, pp. 4270-4281, Oct. 2014, doi: 10.1109/TIP.2014.2346028
        https://ieeexplore.ieee.org/document/6873260

    Note:
        The original method supports only RGB image.
        See https://ieeexplore.ieee.org/document/6873260 for details.
    """
    _validate_input([x, y], dim_range=(4, 4), data_range=(0, data_range))

    if x.size(1) == 1:
        x = x.repeat(1, 3, 1, 1)
        y = y.repeat(1, 3, 1, 1)
        warnings.warn(
            'The original VSI supports only RGB images. The input images were converted to RGB by copying '
            'the grey channel 3 times.')

    # Scale to [0, 255] range to match scale of constant
    x = x * 255. / float(data_range)
    y = y * 255. / float(data_range)

    vs_x = sdsp(x,
                data_range=255,
                omega_0=omega_0,
                sigma_f=sigma_f,
                sigma_d=sigma_d,
                sigma_c=sigma_c)
    vs_y = sdsp(y,
                data_range=255,
                omega_0=omega_0,
                sigma_f=sigma_f,
                sigma_d=sigma_d,
                sigma_c=sigma_c)

    # Convert to LMN colour space
    x_lmn = rgb2lmn(x)
    y_lmn = rgb2lmn(y)

    # Averaging image if the size is large enough
    kernel_size = max(1, round(min(vs_x.size()[-2:]) / 256))
    padding = kernel_size // 2

    if padding:
        upper_pad = padding
        bottom_pad = (kernel_size - 1) // 2
        pad_to_use = [upper_pad, bottom_pad, upper_pad, bottom_pad]
        mode = 'replicate'
        vs_x = pad(vs_x, pad=pad_to_use, mode=mode)
        vs_y = pad(vs_y, pad=pad_to_use, mode=mode)
        x_lmn = pad(x_lmn, pad=pad_to_use, mode=mode)
        y_lmn = pad(y_lmn, pad=pad_to_use, mode=mode)

    vs_x = avg_pool2d(vs_x, kernel_size=kernel_size)
    vs_y = avg_pool2d(vs_y, kernel_size=kernel_size)

    x_lmn = avg_pool2d(x_lmn, kernel_size=kernel_size)
    y_lmn = avg_pool2d(y_lmn, kernel_size=kernel_size)

    # Calculate gradient map
    kernels = torch.stack([scharr_filter(),
                           scharr_filter().transpose(1, 2)]).to(x_lmn)
    gm_x = gradient_map(x_lmn[:, :1], kernels)
    gm_y = gradient_map(y_lmn[:, :1], kernels)

    # Calculate all similarity maps
    s_vs = similarity_map(vs_x, vs_y, c1)
    s_gm = similarity_map(gm_x, gm_y, c2)
    s_m = similarity_map(x_lmn[:, 1:2], y_lmn[:, 1:2], c3)
    s_n = similarity_map(x_lmn[:, 2:], y_lmn[:, 2:], c3)
    s_c = s_m * s_n

    s_c_complex = [s_c.abs(), torch.atan2(torch.zeros_like(s_c), s_c)]
    s_c_complex_pow = [s_c_complex[0]**beta, s_c_complex[1] * beta]
    s_c_real_pow = s_c_complex_pow[0] * torch.cos(s_c_complex_pow[1])

    s = s_vs * s_gm.pow(alpha) * s_c_real_pow
    vs_max = torch.max(vs_x, vs_y)

    eps = torch.finfo(vs_max.dtype).eps
    output = s * vs_max
    output = ((output.sum(dim=(-1, -2)) + eps) /
              (vs_max.sum(dim=(-1, -2)) + eps)).squeeze(-1)

    return _reduce(output, reduction)

예제 #6

파일 보기

def mdsi(x: torch.Tensor, y: torch.Tensor, data_range: Union[int, float] = 1., reduction: str = 'mean',
         c1: float = 140., c2: float = 55., c3: float = 550., combination: str = 'sum', alpha: float = 0.6,
         beta: float = 0.1, gamma: float = 0.2, rho: float = 1., q: float = 0.25, o: float = 0.25):
    r"""Compute Mean Deviation Similarity Index (MDSI) for a batch of images.
    Supports greyscale and colour images with RGB channel order.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        data_range: Maximum value range of images (usually 1.0 or 255).
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        c1: coefficient to calculate gradient similarity. Default: 140.
        c2: coefficient to calculate gradient similarity. Default: 55.
        c3: coefficient to calculate chromaticity similarity. Default: 550.
        combination: mode to combine gradient similarity and chromaticity similarity: ``'sum'`` | ``'mult'``.
        alpha: coefficient to combine gradient similarity and chromaticity similarity using summation.
        beta: power to combine gradient similarity with chromaticity similarity using multiplication.
        gamma: to combine gradient similarity and chromaticity similarity using multiplication.
        rho: order of the Minkowski distance
        q: coefficient to adjusts the emphasis of the values in image and MCT
        o: the power pooling applied on the final value of the deviation

    Returns:
        Mean Deviation Similarity Index (MDSI) between 2 tensors.

    References:
        Nafchi, Hossein Ziaei and Shahkolaei, Atena and Hedjam, Rachid and Cheriet, Mohamed (2016).
        Mean deviation similarity index: Efficient and reliable full-reference image quality evaluator.
        IEEE Ieee Access, 4, 5579--5590.
        https://arxiv.org/pdf/1608.07433.pdf,
        DOI:`10.1109/ACCESS.2016.2604042`

    Note:
        The ratio between constants is usually equal :math:`c_3 = 4c_1 = 10c_2`

    Note:
        Both inputs are supposed to have RGB channels order in accordance with the original approach.
        Nevertheless, the method supports greyscale images, which are converted to RGB by copying the grey
        channel 3 times.
    """
    _validate_input([x, y], dim_range=(4, 4), data_range=(0, data_range))

    if x.size(1) == 1:
        x = x.repeat(1, 3, 1, 1)
        y = y.repeat(1, 3, 1, 1)
        warnings.warn('The original MDSI supports only RGB images. The input images were converted to RGB by copying '
                      'the grey channel 3 times.')

    x = x / float(data_range) * 255
    y = y / float(data_range) * 255

    # Averaging image if the size is large enough
    kernel_size = max(1, round(min(x.size()[-2:]) / 256))
    padding = kernel_size // 2

    if padding:
        up_pad = (kernel_size - 1) // 2
        down_pad = padding
        pad_to_use = [up_pad, down_pad, up_pad, down_pad]
        x = pad(x, pad=pad_to_use)
        y = pad(y, pad=pad_to_use)

    x = avg_pool2d(x, kernel_size=kernel_size)
    y = avg_pool2d(y, kernel_size=kernel_size)

    x_lhm = rgb2lhm(x)
    y_lhm = rgb2lhm(y)

    kernels = torch.stack([prewitt_filter(), prewitt_filter().transpose(1, 2)]).to(x)
    gm_x = gradient_map(x_lhm[:, :1], kernels)
    gm_y = gradient_map(y_lhm[:, :1], kernels)
    gm_avg = gradient_map((x_lhm[:, :1] + y_lhm[:, :1]) / 2., kernels)

    gs_x_y = similarity_map(gm_x, gm_y, c1)
    gs_x_average = similarity_map(gm_x, gm_avg, c2)
    gs_y_average = similarity_map(gm_y, gm_avg, c2)

    gs_total = gs_x_y + gs_x_average - gs_y_average

    cs_total = (2 * (x_lhm[:, 1:2] * y_lhm[:, 1:2] +
                     x_lhm[:, 2:] * y_lhm[:, 2:]) + c3) / (x_lhm[:, 1:2] ** 2 +
                                                           y_lhm[:, 1:2] ** 2 +
                                                           x_lhm[:, 2:] ** 2 +
                                                           y_lhm[:, 2:] ** 2 + c3)

    if combination == 'sum':
        gcs = (alpha * gs_total + (1 - alpha) * cs_total)
    elif combination == 'mult':
        gs_total_pow = pow_for_complex(base=gs_total, exp=gamma)
        cs_total_pow = pow_for_complex(base=cs_total, exp=beta)
        gcs = torch.stack((gs_total_pow[..., 0] * cs_total_pow[..., 0],
                           gs_total_pow[..., 1] + cs_total_pow[..., 1]), dim=-1)
    else:
        raise ValueError(f'Expected combination method "sum" or "mult", got {combination}')

    mct_complex = pow_for_complex(base=gcs, exp=q)
    mct_complex = mct_complex.mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)  # split to increase precision
    score = (pow_for_complex(base=gcs, exp=q) - mct_complex).pow(2).sum(dim=-1).sqrt()
    score = ((score ** rho).mean(dim=(-1, -2)) ** (o / rho)).squeeze(1)
    return _reduce(score, reduction)

예제 #7

파일 보기

파일: mdsi.py 프로젝트: Linaom1214/piq

def mdsi(x: torch.Tensor,
         y: torch.Tensor,
         data_range: Union[int, float] = 1.,
         reduction: str = 'mean',
         c1: float = 140.,
         c2: float = 55.,
         c3: float = 550.,
         combination: str = 'sum',
         alpha: float = 0.6,
         beta: float = 0.1,
         gamma: float = 0.2,
         rho: float = 1.,
         q: float = 0.25,
         o: float = 0.25):
    r"""Compute Mean Deviation Similarity Index (MDSI) for a batch of images.

    Note:
        Both inputs are supposed to have RGB channels order.
        Greyscale images converted to RGB by copying the grey channel 3 times.

    Args:
        x: Tensor with shape (H, W), (C, H, W) or (N, C, H, W).
        y:Tensor with shape (H, W), (C, H, W) or (N, C, H, W).
        data_range: Value range of input images (usually 1.0 or 255). Default: 1.0
        reduction: Reduction over samples in batch: "mean"|"sum"|"none"
        c1: coefficient to calculate gradient similarity. Default: 140.
        c2: coefficient to calculate gradient similarity. Default: 55.
        c3: coefficient to calculate chromaticity similarity. Default: 550.
        combination: mode to combine gradient similarity and chromaticity similarity: "sum"|"mult".
        alpha: coefficient to combine gradient similarity and chromaticity similarity using summation.
        beta: power to combine gradient similarity with chromaticity similarity using multiplication.
        gamma: to combine gradient similarity and chromaticity similarity using multiplication.
        rho: order of the Minkowski distance
        q: coefficient to adjusts the emphasis of the values in image and MCT
        o: the power pooling applied on the final value of the deviation

    Returns:
        torch.Tensor: the batch of Mean Deviation Similarity Index (MDSI) score reduced accordingly

    Note:
        The ratio between constants is usually equal c3 = 4c1 = 10c2
    """
    _validate_input(input_tensors=(x, y),
                    allow_5d=False,
                    data_range=data_range)
    x, y = _adjust_dimensions(input_tensors=(x, y))

    if x.size(1) == 1:
        x = x.repeat(1, 3, 1, 1)
        y = y.repeat(1, 3, 1, 1)
        warnings.warn(
            'The original MDSI supports only RGB images. The input images were converted to RGB by copying '
            'the grey channel 3 times.')

    x = x / data_range * 255
    y = y / data_range * 255

    # Averaging image if the size is large enough
    kernel_size = max(1, round(min(x.size()[-2:]) / 256))
    padding = kernel_size // 2

    if padding:
        up_pad = (kernel_size - 1) // 2
        down_pad = padding
        pad_to_use = [up_pad, down_pad, up_pad, down_pad]
        x = pad(x, pad=pad_to_use)
        y = pad(y, pad=pad_to_use)

    x = avg_pool2d(x, kernel_size=kernel_size)
    y = avg_pool2d(y, kernel_size=kernel_size)

    x_lhm = rgb2lhm(x)
    y_lhm = rgb2lhm(y)

    kernels = torch.stack([prewitt_filter(),
                           prewitt_filter().transpose(1, 2)]).to(x)
    gm_x = gradient_map(x_lhm[:, :1], kernels)
    gm_y = gradient_map(y_lhm[:, :1], kernels)
    gm_avg = gradient_map((x_lhm[:, :1] + y_lhm[:, :1]) / 2., kernels)

    gs_x_y = similarity_map(gm_x, gm_y, c1)
    gs_x_average = similarity_map(gm_x, gm_avg, c2)
    gs_y_average = similarity_map(gm_y, gm_avg, c2)

    gs_total = gs_x_y + gs_x_average - gs_y_average

    cs_total = (2 *
                (x_lhm[:, 1:2] * y_lhm[:, 1:2] + x_lhm[:, 2:] * y_lhm[:, 2:]) +
                c3) / (x_lhm[:, 1:2]**2 + y_lhm[:, 1:2]**2 + x_lhm[:, 2:]**2 +
                       y_lhm[:, 2:]**2 + c3)

    if combination == 'sum':
        gcs = (alpha * gs_total + (1 - alpha) * cs_total)
    elif combination == 'mult':
        gs_total_pow = pow_for_complex(base=gs_total, exp=gamma)
        cs_total_pow = pow_for_complex(base=cs_total, exp=beta)
        gcs = torch.stack((gs_total_pow[..., 0] * cs_total_pow[..., 0],
                           gs_total_pow[..., 1] + cs_total_pow[..., 1]),
                          dim=-1)
    else:
        raise ValueError(
            f'Expected combination method "sum" or "mult", got {combination}')

    mct_complex = pow_for_complex(base=gcs, exp=q)
    mct_complex = mct_complex.mean(dim=2, keepdim=True).mean(
        dim=3, keepdim=True)  # split to increase precision
    score = (pow_for_complex(base=gcs, exp=q) -
             mct_complex).pow(2).sum(dim=-1).sqrt()
    score = ((score**rho).mean(dim=(-1, -2))**(o / rho)).squeeze(1)
    if reduction == 'none':
        return score
    return {'mean': score.mean, 'sum': score.sum}[reduction](dim=0)

예제 #8

파일 보기

def fsim(x: torch.Tensor, y: torch.Tensor, reduction: str = 'mean',
         data_range: Union[int, float] = 1.0, chromatic: bool = True,
         scales: int = 4, orientations: int = 4, min_length: int = 6,
         mult: int = 2, sigma_f: float = 0.55, delta_theta: float = 1.2,
         k: float = 2.0) -> torch.Tensor:
    r"""Compute Feature Similarity Index Measure for a batch of images.

    Args:
        x: An input tensor. Shape :math:`(N, C, H, W)`.
        y: A target tensor. Shape :math:`(N, C, H, W)`.
        reduction: Specifies the reduction type:
            ``'none'`` | ``'mean'`` | ``'sum'``. Default:``'mean'``
        data_range: Maximum value range of images (usually 1.0 or 255).
        chromatic: Flag to compute FSIMc, which also takes into account chromatic components
        scales: Number of wavelets used for computation of phase congruensy maps
        orientations: Number of filter orientations used for computation of phase congruensy maps
        min_length: Wavelength of smallest scale filter
        mult: Scaling factor between successive filters
        sigma_f: Ratio of the standard deviation of the Gaussian describing the log Gabor filter's
            transfer function in the frequency domain to the filter center frequency.
        delta_theta: Ratio of angular interval between filter orientations and the standard deviation
            of the angular Gaussian function used to construct filters in the frequency plane.
        k: No of standard deviations of the noise energy beyond the mean at which we set the noise
            threshold  point, below which phase congruency values get penalized.

    Returns:
        Index of similarity between two images. Usually in [0, 1] interval.
        Can be bigger than 1 for predicted :math:`x` images with higher contrast than the original ones.

    References:
        L. Zhang, L. Zhang, X. Mou and D. Zhang, "FSIM: A Feature Similarity Index for Image Quality Assessment,"
        IEEE Transactions on Image Processing, vol. 20, no. 8, pp. 2378-2386, Aug. 2011, doi: 10.1109/TIP.2011.2109730.
        https://ieeexplore.ieee.org/document/5705575

    Note:
        This implementation is based on the original MATLAB code.
        https://www4.comp.polyu.edu.hk/~cslzhang/IQA/FSIM/FSIM.htm

    """
    _validate_input([x, y], dim_range=(4, 4), data_range=(0, data_range))

    # Rescale to [0, 255] range, because all constant are calculated for this factor
    x = x / float(data_range) * 255
    y = y / float(data_range) * 255

    # Apply average pooling
    kernel_size = max(1, round(min(x.shape[-2:]) / 256))
    x = torch.nn.functional.avg_pool2d(x, kernel_size)
    y = torch.nn.functional.avg_pool2d(y, kernel_size)

    num_channels = x.size(1)

    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)

        x_lum = x_yiq[:, : 1]
        y_lum = y_yiq[:, : 1]

        x_i = x_yiq[:, 1:2]
        y_i = y_yiq[:, 1:2]
        x_q = x_yiq[:, 2:]
        y_q = y_yiq[:, 2:]

    else:
        x_lum = x
        y_lum = y

    # Compute phase congruency maps
    pc_x = _phase_congruency(
        x_lum, scales=scales, orientations=orientations,
        min_length=min_length, mult=mult, sigma_f=sigma_f,
        delta_theta=delta_theta, k=k
    )
    pc_y = _phase_congruency(
        y_lum, scales=scales, orientations=orientations,
        min_length=min_length, mult=mult, sigma_f=sigma_f,
        delta_theta=delta_theta, k=k
    )

    # Gradient maps
    kernels = torch.stack([scharr_filter(), scharr_filter().transpose(-1, -2)])
    grad_map_x = gradient_map(x_lum, kernels)
    grad_map_y = gradient_map(y_lum, kernels)

    # Constants from the paper
    T1, T2, T3, T4, lmbda = 0.85, 160, 200, 200, 0.03

    # Compute FSIM
    PC = similarity_map(pc_x, pc_y, T1)
    GM = similarity_map(grad_map_x, grad_map_y, T2)
    pc_max = torch.where(pc_x > pc_y, pc_x, pc_y)
    score = GM * PC * pc_max

    if chromatic:
        assert num_channels == 3, "Chromatic component can be computed only for RGB images!"
        S_I = similarity_map(x_i, y_i, T3)
        S_Q = similarity_map(x_q, y_q, T4)
        score = score * torch.abs(S_I * S_Q) ** lmbda
        # Complex gradients will work in PyTorch 1.6.0
        # score = score * torch.real((S_I * S_Q).to(torch.complex64) ** lmbda)

    result = score.sum(dim=[1, 2, 3]) / pc_max.sum(dim=[1, 2, 3])

    return _reduce(result, reduction)

예제 #9

파일 보기

def mdsi(prediction: torch.Tensor, target: torch.Tensor, data_range: Union[int, float] = 1., reduction: str = 'mean',
         c1: float = 140., c2: float = 55., c3: float = 550., combination: str = 'sum', alpha: float = 0.6,
         beta: float = 0.1, gamma: float = 0.2, rho: float = 1., q: float = 0.25, o: float = 0.25):
    r"""Compute Mean Deviation Similarity Index (MDSI) for a batch of images.

    Note:
        Both inputs are supposed to have RGB order in accordance with the original approach.
        Nevertheless, the method supports greyscale images, which are converted to RGB by copying the grey
        channel 3 times.

    Args:
        prediction: Batch of predicted (distorted) images. Required to be 2D (H,W), 3D (C,H,W), 4D (N,C,H,W),
        channels first.
        target: Batch of target (reference) images. Required to be 2D (H,W), 3D (C,H,W), 4D (N,C,H,W), channels first.
        data_range: Value range of input images (usually 1.0 or 255). Default: 1.0
        reduction: Reduction over samples in batch: "mean"|"sum"|"none"
        c1: coefficient to calculate gradient similarity. Default: 140.
        c2: coefficient to calculate gradient similarity. Default: 55.
        c3: coefficient to calculate chromaticity similarity. Default: 550.
        combination: mode to combine gradient similarity and chromaticity similarity: "sum"|"mult".
        alpha: coefficient to combine gradient similarity and chromaticity similarity using summation.
        beta: power to combine gradient similarity with chromaticity similarity using multiplication.
        gamma: to combine gradient similarity and chromaticity similarity using multiplication.
        rho: order of the Minkowski distance
        q: coefficient to adjusts the emphasis of the values in image and MCT
        o: the power pooling applied on the final value of the deviation

    Returns:
        torch.Tensor: the batch of Mean Deviation Similarity Index (MDSI) score reduced accordingly

    Note:
        The ratio between constants is usually equal c3 = 4c1 = 10c2
    """
    _validate_input(input_tensors=(prediction, target), allow_5d=False)
    prediction, target = _adjust_dimensions(input_tensors=(prediction, target))

    if prediction.size(1) == 1:
        prediction = prediction.repeat(1, 3, 1, 1)
        target = target.repeat(1, 3, 1, 1)
        warnings.warn('The original MDSI supports only RGB images. The input images were converted to RGB by copying '
                      'the grey channel 3 times.')

    prediction = prediction * 255. / data_range
    target = target * 255. / data_range

    # Averaging image if the size is large enough
    kernel_size = max(1, round(min(prediction.size()[-2:]) / 256))
    padding = kernel_size // 2

    if padding:
        up_pad = (kernel_size - 1) // 2
        down_pad = padding
        pad_to_use = [up_pad, down_pad, up_pad, down_pad]
        prediction = pad(prediction, pad=pad_to_use)
        target = pad(target, pad=pad_to_use)

    prediction = avg_pool2d(prediction, kernel_size=kernel_size)
    target = avg_pool2d(target, kernel_size=kernel_size)

    prediction_lhm = rgb2lhm(prediction)
    target_lhm = rgb2lhm(target)

    kernels = torch.stack([prewitt_filter(), prewitt_filter().transpose(1, 2)]).to(prediction)
    gm_prediction = gradient_map(prediction_lhm[:, :1], kernels)
    gm_target = gradient_map(target_lhm[:, :1], kernels)
    gm_avg = gradient_map((prediction_lhm[:, :1] + target_lhm[:, :1]) / 2., kernels)

    gs_prediction_target = similarity_map(gm_prediction, gm_target, c1)
    gs_prediction_average = similarity_map(gm_prediction, gm_avg, c2)
    gs_target_average = similarity_map(gm_target, gm_avg, c2)

    gs_total = gs_prediction_target + gs_prediction_average - gs_target_average

    cs_total = (2 * (prediction_lhm[:, 1:2] * target_lhm[:, 1:2] +
                     prediction_lhm[:, 2:] * target_lhm[:, 2:]) + c3) / (prediction_lhm[:, 1:2] ** 2 +
                                                                         target_lhm[:, 1:2] ** 2 +
                                                                         prediction_lhm[:, 2:] ** 2 +
                                                                         target_lhm[:, 2:] ** 2 + c3)

    if combination == 'sum':
        gcs = (alpha * gs_total + (1 - alpha) * cs_total)
    elif combination == 'mult':
        gs_total_pow = pow_for_complex(base=gs_total, exp=gamma)
        cs_total_pow = pow_for_complex(base=cs_total, exp=beta)
        gcs = torch.stack((gs_total_pow[..., 0] * cs_total_pow[..., 0],
                           gs_total_pow[..., 1] + cs_total_pow[..., 1]), dim=-1)
    else:
        raise ValueError(f'Expected combination method "sum" or "mult", got {combination}')

    mct_complex = pow_for_complex(base=gcs, exp=q)
    mct_complex = mct_complex.mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)  # split to increase precision
    score = (pow_for_complex(base=gcs, exp=q) - mct_complex).pow(2).sum(dim=-1).sqrt()
    score = ((score ** rho).mean(dim=(-1, -2)) ** (o / rho)).squeeze(1)
    if reduction == 'none':
        return score
    return {'mean': score.mean,
            'sum': score.sum}[reduction](dim=0)

예제 #10

파일 보기

파일: srsim.py 프로젝트: photosynthesis-team/piq

def srsim(x: torch.Tensor,
          y: torch.Tensor,
          reduction: str = 'mean',
          data_range: Union[int, float] = 1.0,
          chromatic: bool = False,
          scale: float = 0.25,
          kernel_size: int = 3,
          sigma: float = 3.8,
          gaussian_size: int = 10) -> torch.Tensor:
    r"""Compute Spectral Residual based Similarity for a batch of images.

    Args:
        x: Predicted images. Shape (H, W), (C, H, W) or (N, C, H, W).
        y: Target images. Shape (H, W), (C, H, W) or (N, C, H, W).
        reduction: Reduction over samples in batch: "mean"|"sum"|"none"
        data_range: Value range of input images (usually 1.0 or 255). Default: 1.0
        chromatic: Flag to compute SR-SIMc, which also takes into account chromatic components
        scale: Resizing factor used in saliency map computation
        kernel_size: Kernel size of average blur filter used in saliency map computation
        sigma: Sigma of gaussian filter applied on saliency map
        gaussian_size: Size of gaussian filter applied on saliency map
    Returns:
        SR-SIM: Index of similarity between two images. Usually in [0, 1] interval.
            Can be bigger than 1 for predicted images with higher contrast than the original ones.
    Note:
        This implementation is based on the original MATLAB code.
        https://sse.tongji.edu.cn/linzhang/IQA/SR-SIM/Files/SR_SIM.m

    """
    _validate_input(tensors=[x, y],
                    dim_range=(4, 4),
                    data_range=(0, data_range))

    # Rescale to [0, 255] range, because all constant are calculated for this factor
    x = (x / float(data_range)) * 255
    y = (y / float(data_range)) * 255

    # Averaging image if the size is large enough
    ksize = max(1, round(min(x.size()[-2:]) / 256))
    padding = ksize // 2

    if padding:
        up_pad = (ksize - 1) // 2
        down_pad = padding
        pad_to_use = [up_pad, down_pad, up_pad, down_pad]
        x = F.pad(x, pad=pad_to_use)
        y = F.pad(y, pad=pad_to_use)

    x = F.avg_pool2d(x, ksize)
    y = F.avg_pool2d(y, ksize)

    num_channels = x.size(1)

    # Convert RGB to YIQ color space https://en.wikipedia.org/wiki/YIQ
    if num_channels == 3:
        x_yiq = rgb2yiq(x)
        y_yiq = rgb2yiq(y)

        x_lum = x_yiq[:, :1]
        y_lum = y_yiq[:, :1]

        x_i = x_yiq[:, 1:2]
        y_i = y_yiq[:, 1:2]
        x_q = x_yiq[:, 2:]
        y_q = y_yiq[:, 2:]

    else:
        if chromatic:
            raise ValueError(
                'Chromatic component can be computed only for RGB images!')
        x_lum = x
        y_lum = y

    # Compute phase congruency maps
    svrs_x = _spectral_residual_visual_saliency(x_lum,
                                                scale=scale,
                                                kernel_size=kernel_size,
                                                sigma=sigma,
                                                gaussian_size=gaussian_size)
    svrs_y = _spectral_residual_visual_saliency(y_lum,
                                                scale=scale,
                                                kernel_size=kernel_size,
                                                sigma=sigma,
                                                gaussian_size=gaussian_size)

    # Gradient maps
    kernels = torch.stack([scharr_filter(), scharr_filter().transpose(-1, -2)])
    grad_map_x = gradient_map(x_lum, kernels)
    grad_map_y = gradient_map(y_lum, kernels)

    # Constants from the paper
    C1, C2, alpha = 0.40, 225, 0.50

    # Compute SR-SIM
    SVRS = similarity_map(svrs_x, svrs_y, C1)
    GM = similarity_map(grad_map_x, grad_map_y, C2)
    svrs_max = torch.where(svrs_x > svrs_y, svrs_x, svrs_y)
    score = SVRS * (GM**alpha) * svrs_max

    if chromatic:
        # Constants from FSIM paper, use same method for color image
        T3, T4, lmbda = 200, 200, 0.03

        S_I = similarity_map(x_i, y_i, T3)
        S_Q = similarity_map(x_q, y_q, T4)
        score = score * torch.abs(S_I * S_Q)**lmbda
        # Complex gradients will work in PyTorch 1.6.0
        # score = score * torch.real((S_I * S_Q).to(torch.complex64) ** lmbda)

    eps = torch.finfo(score.dtype).eps
    result = score.sum(dim=[1, 2, 3]) / (svrs_max.sum(dim=[1, 2, 3]) + eps)

    return _reduce(result, reduction)