def test_version_tuple_doesnt_fail_valid_input(version, expected) -> None: try: _parse_version(version) except Exception as e: pytest.fail( f"Unexpected error occurred while parsing valid semver versions: {e}" )
def _msid_descriptor(x: np.ndarray, ts: np.ndarray = np.logspace(-1, 1, 256), k: int = 5, m: int = 10, niters: int = 100, rademacher: bool = False, normalized_laplacian: bool = True, normalize: str = 'empty') \ -> np.ndarray: r"""Compute the msid descriptor for a single set of samples Args: x: Samples from data distribution. Shape (N_samples, data_dim) ts: Temperature values. k: Number of neighbours for graph construction. m: Lanczos steps in SLQ. niters: Number of starting random vectors for SLQ. rademacher: True to use Rademacher distribution, False - standard normal for random vectors in Hutchinson. normalized_laplacian: if True, use normalized Laplacian normalize: 'empty' for average heat kernel (corresponds to the empty graph normalization of NetLSD), 'complete' for the complete, 'er' for erdos-renyi normalization, 'none' for no normalization Returns: normed_msidx: normalized msid descriptor """ try: import scipy except ImportError: raise ImportError( "Scipy is required for computation of the Geometry Score but not installed. " "Please install scipy using the following command: pip install --user scipy" ) recommended_scipy_version = _parse_version("1.3.3") scipy_version = _parse_version(scipy.__version__) if len(scipy_version) != 0 and scipy_version < recommended_scipy_version: warn( f'Scipy of version {scipy.__version__} is used while version >= {recommended_scipy_version} is ' f'recommended. Consider updating scipy to avoid potential long compute time with older versions.' ) Lx = _build_graph(x, k, normalized_laplacian) nx = Lx.shape[0] msidx = _slq_red_var(Lx, m, niters, ts, rademacher) normed_msidx = _normalize_msid(msidx, normalize, nx, k, ts) * NORMALIZATION return normed_msidx
def sdsp(x: torch.Tensor, data_range: Union[int, float] = 255, omega_0: float = 0.021, sigma_f: float = 1.34, sigma_d: float = 145., sigma_c: float = 0.001) -> torch.Tensor: r"""SDSP algorithm for salient region detection from a given image. Supports only colour images with RGB channel order. Args: x: Tensor. Shape :math:`(N, 3, H, W)`. data_range: Maximum value range of images (usually 1.0 or 255). omega_0: coefficient for log Gabor filter sigma_f: coefficient for log Gabor filter sigma_d: coefficient for the central areas, which have a bias towards attention sigma_c: coefficient for the warm colors, which have a bias towards attention Returns: torch.Tensor: Visual saliency map """ x = x / float(data_range) * 255 size = x.size() size_to_use = (256, 256) x = interpolate(input=x, size=size_to_use, mode='bilinear', align_corners=False) x_lab = rgb2lab(x, data_range=255) lg = _log_gabor(size_to_use, omega_0, sigma_f).to(x).view(1, 1, *size_to_use) recommended_torch_version = _parse_version('1.8.0') torch_version = _parse_version(torch.__version__) if len(torch_version) != 0 and torch_version >= recommended_torch_version: x_fft = torch.fft.fft2(x_lab) x_ifft_real = torch.fft.ifft2(x_fft * lg).real else: x_fft = torch.rfft(x_lab, 2, onesided=False) x_ifft_real = torch.ifft(x_fft * lg.unsqueeze(-1), 2)[..., 0] s_f = x_ifft_real.pow(2).sum(dim=1, keepdim=True).sqrt() coordinates = torch.stack(get_meshgrid(size_to_use), dim=0).to(x) coordinates = coordinates * size_to_use[0] + 1 s_d = torch.exp(-torch.sum(coordinates**2, dim=0) / sigma_d**2).view( 1, 1, *size_to_use) eps = torch.finfo(x_lab.dtype).eps min_x = x_lab.min(dim=-1, keepdim=True).values.min(dim=-2, keepdim=True).values max_x = x_lab.max(dim=-1, keepdim=True).values.max(dim=-2, keepdim=True).values normalized = (x_lab - min_x) / (max_x - min_x + eps) norm = normalized[:, 1:].pow(2).sum(dim=1, keepdim=True) s_c = 1 - torch.exp(-norm / sigma_c**2) vs_m = s_f * s_d * s_c vs_m = interpolate(vs_m, size[-2:], mode='bilinear', align_corners=True) min_vs_m = vs_m.min(dim=-1, keepdim=True).values.min(dim=-2, keepdim=True).values max_vs_m = vs_m.max(dim=-1, keepdim=True).values.max(dim=-2, keepdim=True).values return (vs_m - min_vs_m) / (max_vs_m - min_vs_m + eps)
def _phase_congruency(x: torch.Tensor, scales: int = 4, orientations: int = 4, min_length: int = 6, mult: int = 2, sigma_f: float = 0.55, delta_theta: float = 1.2, k: float = 2.0) -> torch.Tensor: r"""Compute Phase Congruence for a batch of greyscale images Args: x: Tensor. Shape :math:`(N, 1, H, W)`. scales: Number of wavelet scales orientations: Number of filter orientations min_length: Wavelength of smallest scale filter mult: Scaling factor between successive filters sigma_f: Ratio of the standard deviation of the Gaussian describing the log Gabor filter's transfer function in the frequency domain to the filter center frequency. delta_theta: Ratio of angular interval between filter orientations and the standard deviation of the angular Gaussian function used to construct filters in the freq. plane. k: No of standard deviations of the noise energy beyond the mean at which we set the noise threshold point, below which phase congruency values get penalized. Returns: Phase Congruency map with shape :math:`(N, H, W)` """ EPS = torch.finfo(x.dtype).eps N, _, H, W = x.shape # Fourier transform filters = _construct_filters(x, scales, orientations, min_length, mult, sigma_f, delta_theta, k) recommended_torch_version = _parse_version('1.8.0') torch_version = _parse_version(torch.__version__) if len(torch_version) != 0 and torch_version >= recommended_torch_version: imagefft = torch.fft.fft2(x) filters_ifft = torch.fft.ifft2(filters) filters_ifft = filters_ifft.real * math.sqrt(H * W) even_odd = torch.view_as_real(torch.fft.ifft2(imagefft * filters)).view(N, orientations, scales, H, W, 2) else: imagefft = torch.rfft(x, 2, onesided=False) filters_ifft = torch.ifft(torch.stack([filters, torch.zeros_like(filters)], dim=-1), 2)[..., 0] filters_ifft *= math.sqrt(H * W) even_odd = torch.ifft(imagefft * filters.unsqueeze(-1), 2).view(N, orientations, scales, H, W, 2) # Amplitude of even & odd filter response. An = sqrt(real^2 + imag^2) an = torch.sqrt(torch.sum(even_odd ** 2, dim=-1)) # Take filter at scale 0 and sum spatially # Record mean squared filter value at smallest scale. # This is used for noise estimation. em_n = (filters.view(1, orientations, scales, H, W)[:, :, :1, ...] ** 2).sum(dim=[-2, -1], keepdims=True) # Sum of even filter convolution results. sum_e = even_odd[..., 0].sum(dim=2, keepdims=True) # Sum of odd filter convolution results. sum_o = even_odd[..., 1].sum(dim=2, keepdims=True) # Get weighted mean filter response vector, this gives the weighted mean phase angle. x_energy = torch.sqrt(sum_e ** 2 + sum_o ** 2) + EPS mean_e = sum_e / x_energy mean_o = sum_o / x_energy # Now calculate An(cos(phase_deviation) - | sin(phase_deviation)) | by # using dot and cross products between the weighted mean filter response # vector and the individual filter response vectors at each scale. # This quantity is phase congruency multiplied by An, which we call energy. # Extract even and odd convolution results. even = even_odd[..., 0] odd = even_odd[..., 1] energy = (even * mean_e + odd * mean_o - torch.abs(even * mean_o - odd * mean_e)).sum(dim=2, keepdim=True) # Compensate for noise # We estimate the noise power from the energy squared response at the # smallest scale. If the noise is Gaussian the energy squared will have a # Chi-squared 2DOF pdf. We calculate the median energy squared response # as this is a robust statistic. From this we estimate the mean. # The estimate of noise power is obtained by dividing the mean squared # energy value by the mean squared filter value abs_eo = torch.sqrt(torch.sum(even_odd[:, :, :1, ...] ** 2, dim=-1)).reshape(N, orientations, 1, 1, H * W) median_e2n = torch.median(abs_eo ** 2, dim=-1, keepdim=True).values mean_e2n = - median_e2n / math.log(0.5) # Estimate of noise power. noise_power = mean_e2n / em_n # Now estimate the total energy^2 due to noise # Estimate for sum(An^2) + sum(Ai.*Aj.*(cphi.*cphj + sphi.*sphj)) filters_ifft = filters_ifft.view(1, orientations, scales, H, W) sum_an2 = torch.sum(filters_ifft ** 2, dim=-3, keepdim=True) sum_ai_aj = torch.zeros(N, orientations, 1, H, W).to(x) for s in range(scales - 1): sum_ai_aj = sum_ai_aj + (filters_ifft[:, :, s: s + 1] * filters_ifft[:, :, s + 1:]).sum(dim=-3, keepdim=True) sum_an2 = torch.sum(sum_an2, dim=[-1, -2], keepdim=True) sum_ai_aj = torch.sum(sum_ai_aj, dim=[-1, -2], keepdim=True) noise_energy2 = 2 * noise_power * sum_an2 + 4 * noise_power * sum_ai_aj # Rayleigh parameter tau = torch.sqrt(noise_energy2 / 2) # Expected value of noise energy noise_energy = tau * math.sqrt(math.pi / 2) moise_energy_sigma = torch.sqrt((2 - math.pi / 2) * tau ** 2) # Noise threshold T = noise_energy + k * moise_energy_sigma # The estimated noise effect calculated above is only valid for the PC_1 measure. # The PC_2 measure does not lend itself readily to the same analysis. However # empirically it seems that the noise effect is overestimated roughly by a factor # of 1.7 for the filter parameters used here. # Empirical rescaling of the estimated noise effect to suit the PC_2 phase congruency measure T = T / 1.7 # Apply noise threshold energy = torch.max(energy - T, torch.zeros_like(T)) eps = torch.finfo(energy.dtype).eps energy_all = energy.sum(dim=[1, 2]) + eps an_all = an.sum(dim=[1, 2]) + eps result_pc = energy_all / an_all return result_pc.unsqueeze(1)
def test_version_tuple_warns_on_invalid_input(version) -> None: with pytest.warns(UserWarning): _parse_version(version)
def test_version_tuple_parses_correctly(version, expected) -> None: parsed = _parse_version(version) assert parsed == expected, "Wrong parsing result of a valid semver version"
def _information_content(x: torch.Tensor, y: torch.Tensor, y_parent: torch.Tensor = None, kernel_size: int = 3, sigma_nsq: float = 0.4) -> torch.Tensor: r"""Computes Information Content Map for weighting the Structural Similarity. Args: x: An input tensor. Shape :math:`(N, C, H, W)`. y: A target tensor. Shape :math:`(N, C, H, W)`. y_parent: Flag to control dependency on previous layer of pyramid. kernel_size: The side-length of the sliding window used in comparison for information content. sigma_nsq: Parameter of visual distortion model. Returns: Information Content Maps. """ EPS = torch.finfo(x.dtype).eps n_channels = x.size(1) kernel = average_filter2d(kernel_size=kernel_size).repeat(x.size(1), 1, 1, 1).to(x) padding_up = kernel.size(-1) // 2 padding_down = kernel.size(-1) - padding_up mu_x = F.conv2d(input=F.pad(x, pad=[padding_up, padding_down, padding_up, padding_down]), weight=kernel, padding=0, groups=n_channels) mu_y = F.conv2d(input=F.pad(y, pad=[padding_up, padding_down, padding_up, padding_down]), weight=kernel, padding=0, groups=n_channels) mu_xx = mu_x ** 2 mu_yy = mu_y ** 2 mu_xy = mu_x * mu_y sigma_xx = F.conv2d(F.pad(x ** 2, pad=[padding_up, padding_down, padding_up, padding_down]), weight=kernel, stride=1, padding=0, groups=n_channels) - mu_xx sigma_yy = F.conv2d(F.pad(y ** 2, pad=[padding_up, padding_down, padding_up, padding_down]), weight=kernel, stride=1, padding=0, groups=n_channels) - mu_yy sigma_xy = F.conv2d(F.pad(x * y, pad=[padding_up, padding_down, padding_up, padding_down]), weight=kernel, stride=1, padding=0, groups=n_channels) - mu_xy sigma_xx = F.relu(sigma_xx) sigma_yy = F.relu(sigma_yy) g = sigma_xy / (sigma_yy + EPS) vv = sigma_xx - g * sigma_xy g = g.masked_fill(sigma_yy < EPS, 0) vv[sigma_yy < EPS] = sigma_xx[sigma_yy < EPS] g = g.masked_fill(sigma_xx < EPS, 0) vv = vv.masked_fill(sigma_xx < EPS, 0) block = [kernel_size, kernel_size] nblv = y.size(-2) - block[0] + 1 nblh = y.size(-1) - block[1] + 1 nexp = nblv * nblh N = block[0] * block[1] assert block[0] % 2 == 1 and block[1] % 2 == 1, f'Expected odd block dimensions, got {block}' Ly = (block[0] - 1) // 2 Lx = (block[1] - 1) // 2 if y_parent is not None: # upscale y_parent and cut to the size of y y_parent_up = _image_enlarge(y_parent)[:, :, :y.size(-2), :y.size(-1)] N = N + 1 Y = torch.zeros(y.size(0), y.size(1), nexp, N) n = -1 for ny in range(-Ly, Ly + 1): for nx in range(-Lx, Lx + 1): n = n + 1 foo = _shift(y, [ny, nx]) foo = foo[:, :, Ly:Ly + nblv, Lx:Lx + nblh] Y[..., n] = foo.flatten(start_dim=-2, end_dim=-1) if y_parent is not None: n = n + 1 foo = y_parent_up foo = foo[:, :, Ly:Ly + nblv, Lx:Lx + nblh] Y[..., n] = foo.flatten(start_dim=-2, end_dim=-1) C_u = torch.matmul(Y.transpose(-2, -1), Y) / nexp recommended_torch_version = _parse_version('1.7.0') torch_version = _parse_version(torch.__version__) if len(torch_version) != 0 and torch_version >= recommended_torch_version: eig_values, eig_vectors = torch.linalg.eigh(C_u) else: eig_values, eig_vectors = torch.symeig(C_u, eigenvectors=True) sum_eig_values = torch.sum(eig_values, dim=-1).view(y.size(0), y.size(1), 1, 1) non_zero_eig_values_matrix = torch.diag_embed(eig_values * (eig_values > 0)) sum_non_zero_eig_values = torch.sum(non_zero_eig_values_matrix, dim=(-2, -1), keepdim=True) L = non_zero_eig_values_matrix * sum_eig_values / (sum_non_zero_eig_values + (sum_non_zero_eig_values == 0)) C_u = torch.matmul(torch.matmul(eig_vectors, L), eig_vectors.transpose(-2, -1)) C_u_inv = torch.inverse(C_u) ss = torch.matmul(Y, C_u_inv) * Y / N ss = torch.sum(ss, dim=-1, keepdim=True) ss = ss.view(y.size(0), y.size(1), nblv, nblh) g = g[:, :, Ly: Ly + nblv, Lx: Lx + nblh] vv = vv[:, :, Ly: Ly + nblv, Lx: Lx + nblh] # Calculate mutual information scaled_eig_values = torch.diagonal(L, offset=0, dim1=-2, dim2=-1).unsqueeze(2).unsqueeze(3) iw_map = torch.sum(torch.log2(1 + ((vv.unsqueeze(-1) + (1 + g.unsqueeze(-1) * g.unsqueeze(-1)) * sigma_nsq) * ss.unsqueeze(-1) * scaled_eig_values + sigma_nsq * vv.unsqueeze(-1)) / ( sigma_nsq * sigma_nsq)), dim=-1) iw_map[iw_map < EPS] = 0 return iw_map
def _spectral_residual_visual_saliency( x: torch.Tensor, scale: float = 0.25, kernel_size: int = 3, sigma: float = 3.8, gaussian_size: int = 10) -> torch.Tensor: r"""Compute Spectral Residual Visual Saliency Credits X. Hou and L. Zhang, CVPR 07, 2007 Reference: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.125.5641&rep=rep1&type=pdf Args: x: Tensor with shape (N, 1, H, W). scale: Resizing factor kernel_size: Kernel size of average blur filter sigma: Sigma of gaussian filter applied on saliency map gaussian_size: Size of gaussian filter applied on saliency map Returns: saliency_map: Tensor with shape BxHxW """ eps = torch.finfo(x.dtype).eps for kernel in kernel_size, gaussian_size: if x.size(-1) * scale < kernel or x.size(-2) * scale < kernel: raise ValueError( f'Kernel size can\'t be greater than actual input size. ' f'Input size: {x.size()} x {scale}. Kernel size: {kernel}') # Downsize image in_img = imresize(x, scale=scale) # Fourier transform (use complex format [a,b] instead of a + ib # because torch<1.8.0 autograd does not support the latter) recommended_torch_version = _parse_version('1.8.0') torch_version = _parse_version(torch.__version__) if len(torch_version) != 0 and torch_version >= recommended_torch_version: imagefft = torch.fft.fft2(in_img) log_amplitude = torch.log(imagefft.abs() + eps) phase = torch.angle(imagefft) else: imagefft = torch.rfft(in_img, 2, onesided=False) # Compute log of absolute value and angle of fourier transform log_amplitude = torch.log(imagefft.pow(2).sum(dim=-1).sqrt() + eps) phase = torch.atan2(imagefft[..., 1], imagefft[..., 0] + eps) # Compute spectral residual using average filtering padding = kernel_size // 2 if padding: up_pad = (kernel_size - 1) // 2 down_pad = padding pad_to_use = [up_pad, down_pad, up_pad, down_pad] # replicate padding before average filtering spectral_residual = F.pad(log_amplitude, pad=pad_to_use, mode='replicate') else: spectral_residual = log_amplitude spectral_residual = log_amplitude - F.avg_pool2d( spectral_residual, kernel_size=kernel_size, stride=1) # Saliency map # representation of complex exp(spectral_residual + j * phase) compx = torch.stack((torch.exp(spectral_residual) * torch.cos(phase), torch.exp(spectral_residual) * torch.sin(phase)), -1) if len(torch_version) != 0 and torch_version >= recommended_torch_version: saliency_map = torch.abs(torch.fft.ifft2( torch.view_as_complex(compx)))**2 else: saliency_map = torch.sum(torch.ifft(compx, 2)**2, dim=-1) # After effect for SR-SIM # Apply gaussian blur kernel = gaussian_filter(gaussian_size, sigma) if gaussian_size % 2 == 0: # matlab pads upper and lower borders with 0s for even kernels kernel = torch.cat((torch.zeros(1, 1, gaussian_size), kernel), 1) kernel = torch.cat((torch.zeros(1, gaussian_size + 1, 1), kernel), 2) gaussian_size += 1 kernel = kernel.view(1, 1, gaussian_size, gaussian_size).to(saliency_map) saliency_map = F.conv2d(saliency_map, kernel, padding=(gaussian_size - 1) // 2) # normalize between [0, 1] min_sal = torch.min(saliency_map[:]) max_sal = torch.max(saliency_map[:]) saliency_map = (saliency_map - min_sal) / (max_sal - min_sal + eps) # scale to original size saliency_map = imresize(saliency_map, sizes=x.size()[-2:]) return saliency_map