def learn_dictionary(X, n_filters, filter_size, n_sample=1000, n_sample_patches=0, **kwargs): """ learn a dictionary of n_filters atoms from n_sample images from X """ n_channels = X.shape[1] # subsample n_sample images randomly rand_idx = np.random.choice(len(X), n_sample, replace=False) # extract patches patch_size = (filter_size, filter_size) patches = PatchExtractor(patch_size).transform( X[rand_idx, ...].reshape(n_sample, X.shape[2], X.shape[3], X.shape[1])) patches = patches.reshape(patches.shape[0], -1) patches -= np.mean(patches, axis=0) patches /= np.std(patches, axis=0) if n_sample_patches > 0 and (n_sample_patches < len(patches)): np.random.shuffle(patches) patches = patches[:n_sample_patches, ...] # learn dictionary print('Learning dictionary for weight initialization...') dico = MiniBatchDictionaryLearning(n_components=n_filters, alpha=1, n_iter=1000, batch_size=10, shuffle=True, verbose=True, **kwargs) W = dico.fit(patches).components_ W = W.reshape(n_filters, n_channels, filter_size, filter_size) print('Dictionary learned.') return W.astype(np.float32)
def __init__(self, train, patch_size=(9, 9)): self.patch_size = patch_size X = [] toTensor = transforms.ToTensor() for _input, _ in train: X.append(toTensor(_input).permute(1, 2, 0).numpy()) X = np.array(X) self.mean = (X.mean(axis=(0, 1, 2))) X = np.add(X, -self.mean) self.mean = torch.from_numpy( self.mean.reshape(1, self.mean.shape[0], 1, 1) ) _, _, _, n_channels = X.shape # 1. Sample 10M random image patches (each with 3 colors) patches = PatchExtractor(patch_size=self.patch_size, max_patches=int(2.5e2)).transform(X) # 2. Perform PCA on these to get eigenvectors V and eigenvalues D. pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) dim = (-1,) + self.patch_size + (n_channels,) eigenvectors = torch.from_numpy( pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype( X.dtype) ) eigenvalues = torch.from_numpy( np.diag(1. / np.sqrt(pca.explained_variance_)) ) # 4. Construct the whitening kernel k: # for each pair of colors (ci,cj), # set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] # where (x0, y0) is the center pixel location # (e.g. (5,5) for a 9x9 kernel) x_0 = int(np.floor(self.patch_size[0] / 2)) y_0 = int(np.floor(self.patch_size[1] / 2)) filter_shape = (n_channels, n_channels, self.patch_size[0], self.patch_size[1]) self.kernel = torch.zeros(filter_shape) eigenvectorsT = eigenvectors.permute(2, 3, 1, 0) # build the kernel for i in range(n_channels): for j in range(n_channels): a = torch.mm( eigenvectorsT[x_0, y_0, j, :].contiguous().view(1, -1), eigenvalues.float() ) b = eigenvectors[:, i, :, :].contiguous().view( -1, self.patch_size[0] * self.patch_size[1] ) c = torch.mm(a, b).contiguous().view(self.patch_size[0], self.patch_size[1]) self.kernel[j, i, :, :] = c self.padding = (self.patch_size[0] - 1), (self.patch_size[1] - 1)
def convolutional_zca(input, patch_size=(9, 9), max_patches=int(1e5)): """ This is an implementation of the convolutional ZCA whitening presented by David Eigen in his phd thesis http://www.cs.nyu.edu/~deigen/deigen-thesis.pdf "Predicting Images using Convolutional Networks: Visual Scene Understanding with Pixel Maps" From paragraph 8.4: A simple adaptation of ZCA to convolutional application is to find the ZCA whitening transformation for a sample of local image patches across the dataset, and then apply this transform to every patch in a larger image. We then use the center pixel of each ZCA patch to create the conv-ZCA output image. The operations of applying local ZCA and selecting the center pixel can be combined into a single convolution kernel, resulting in the following algorithm (explained using RGB inputs and 9x9 kernel): 1. Sample 10M random 9x9 image patches (each with 3 colors) 2. Perform PCA on these to get eigenvectors V and eigenvalues D. 3. Optionally remove small eigenvalues, so V has shape [npca x 3 x 9 x 9]. 4. Construct the whitening kernel k: for each pair of colors (ci,cj), set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] where (x0, y0) is the center pixel location (e.g. (5,5) for a 9x9 kernel) :param input: 4D tensor of shape [batch_size, rows, col, channels] :param patch_size: size of the patches extracted from the dataset :param max_patches: max number of patches extracted from the dataset :return: conv-zca whitened dataset """ # I don't know if it's correct or not.. but it seems to work mean = np.mean(input, axis=(0, 1, 2)) input -= mean # center the data n_imgs, h, w, n_channels = input.shape patch_size = (patch_size, patch_size) patches = PatchExtractor(patch_size=patch_size, max_patches=max_patches).transform(input) pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) # Transpose the components into theano convolution filter type dim = (-1,) + patch_size + (n_channels,) V = shared(pca.components_.reshape(dim). transpose(0, 3, 1, 2).astype(input.dtype)) D = T.nlinalg.diag(1. / np.sqrt(pca.explained_variance_)) x_0 = int(np.floor(patch_size[0] / 2)) y_0 = int(np.floor(patch_size[1] / 2)) filter_shape = [n_channels, n_channels, patch_size[0], patch_size[1]] image_shape = [n_imgs, n_channels, h, w] kernel = T.zeros(filter_shape) VT = V.dimshuffle(2, 3, 1, 0) # V : 243 x 3 x 9 x 9 # VT : 9 x 9 x 3 x 243 # build the kernel for i in range(n_channels): for j in range(n_channels): a = T.dot(VT[x_0, y_0, j, :], D).reshape([1, -1]) b = V[:, i, :, :].reshape([-1, patch_size[0] * patch_size[1]]) c = T.dot(a, b).reshape([patch_size[0], patch_size[1]]) kernel = T.set_subtensor(kernel[j, i, :, :], c) kernel = kernel.astype(floatX) input = input.astype(floatX) input_images = T.tensor4(dtype=floatX) conv_whitening = conv2d(input_images.dimshuffle((0, 3, 1, 2)), kernel, input_shape=image_shape, filter_shape=filter_shape, border_mode='full') s_crop = [(patch_size[0] - 1) // 2, (patch_size[1] - 1) // 2] # e_crop = [s_crop[0] if (s_crop[0] % 2) != 0 else s_crop[0] + 1, # s_crop[1] if (s_crop[1] % 2) != 0 else s_crop[1] + 1] conv_whitening = conv_whitening[:, :, s_crop[0]:-s_crop[0], s_crop[ 1]:-s_crop[1]] conv_whitening = conv_whitening.dimshuffle(0, 2, 3, 1) f_convZCA = function([input_images], conv_whitening) return f_convZCA(input)
def __call__(self, _input): _input = _input.permute(1, 2, 0).numpy() _input = _input.reshape(1, _input.shape[0], _input.shape[1], _input.shape[2]) mean = (_input.mean(axis=(0, 1, 2))) _input = np.add(_input, -mean) _, _, _, n_channels = _input.shape # 1. Sample 10M random image patches (each with 3 colors) patches = PatchExtractor(patch_size=self.patch_size).transform(_input) # 2. Perform PCA on these to get eigenvectors V and eigenvalues D. pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) dim = (-1,) + self.patch_size + (n_channels,) eigenvectors = torch.from_numpy( pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype( _input.dtype) ) eigenvalues = torch.from_numpy( np.diag(1. / np.sqrt(pca.explained_variance_)) ) # 4. Construct the whitening kernel k: # for each pair of colors (ci,cj), # set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] # where (x0, y0) is the center pixel location # (e.g. (5,5) for a 9x9 kernel) x_0 = int(np.floor(self.patch_size[0] / 2)) y_0 = int(np.floor(self.patch_size[1] / 2)) filter_shape = (n_channels, n_channels, self.patch_size[0], self.patch_size[1]) kernel = torch.zeros(filter_shape) eigenvectorsT = eigenvectors.permute(2, 3, 1, 0) # build the kernel for i in range(n_channels): for j in range(n_channels): a = torch.mm( eigenvectorsT[x_0, y_0, j, :].contiguous().view(1, -1), eigenvalues.float() ) b = eigenvectors[:, i, :, :].contiguous().view( -1, self.patch_size[0] * self.patch_size[1] ) c = torch.mm(a, b).contiguous().view(self.patch_size[0], self.patch_size[1]) kernel[j, i, :, :] = c padding = (self.patch_size[0] - 1), (self.patch_size[1] - 1) input_tensor = torch.from_numpy(_input).permute(0, 3, 1, 2) conv_whitening = torch.nn.functional.conv2d( input=input_tensor, weight=kernel, padding=padding ) s_crop = [(self.patch_size[0] - 1) // 2, (self.patch_size[1] - 1) // 2] conv_whitening = conv_whitening[ :, :, s_crop[0]:-s_crop[0], s_crop[1]:-s_crop[1] ] return conv_whitening.view(conv_whitening.shape[1], conv_whitening.shape[2], conv_whitening.shape[3])
def convolutional_zca(input, patch_size=(9, 9), max_patches=int(1e5)): """ This is an implementation of the convolutional ZCA whitening presented by David Eigen in his phd thesis http://www.cs.nyu.edu/~deigen/deigen-thesis.pdf "Predicting Images using Convolutional Networks: Visual Scene Understanding with Pixel Maps" From paragraph 8.4: A simple adaptation of ZCA to convolutional application is to find the ZCA whitening transformation for a sample of local image patches across the dataset, and then apply this transform to every patch in a larger image. We then use the center pixel of each ZCA patch to create the conv-ZCA output image. The operations of applying local ZCA and selecting the center pixel can be combined into a single convolution kernel, resulting in the following algorithm (explained using RGB inputs and 9x9 kernel): 1. Sample 10M random 9x9 image patches (each with 3 colors) 2. Perform PCA on these to get eigenvectors V and eigenvalues D. 3. Optionally remove small eigenvalues, so V has shape [npca x 3 x 9 x 9]. 4. Construct the whitening kernel k: for each pair of colors (ci,cj), set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] where (x0, y0) is the center pixel location (e.g. (5,5) for a 9x9 kernel) :param input: 4D tensor of shape [batch_size, rows, col, channels] :param patch_size: size of the patches extracted from the dataset :param max_patches: max number of patches extracted from the dataset :return: conv-zca whitened dataset """ # I don't know if it's correct or not.. but it seems to work mean = np.mean(input, axis=(0, 1, 2)) input -= mean # center the data n_imgs, h, w, n_channels = input.shape patch_size = (patch_size, patch_size) patches = PatchExtractor(patch_size=patch_size, max_patches=max_patches).transform(input) pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) # Transpose the components into theano convolution filter type dim = (-1, ) + patch_size + (n_channels, ) V = shared( pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype(input.dtype)) D = T.nlinalg.diag(1. / np.sqrt(pca.explained_variance_)) x_0 = int(np.floor(patch_size[0] / 2)) y_0 = int(np.floor(patch_size[1] / 2)) filter_shape = [n_channels, n_channels, patch_size[0], patch_size[1]] image_shape = [n_imgs, n_channels, h, w] kernel = T.zeros(filter_shape) VT = V.dimshuffle(2, 3, 1, 0) # V : 243 x 3 x 9 x 9 # VT : 9 x 9 x 3 x 243 # build the kernel for i in range(n_channels): for j in range(n_channels): a = T.dot(VT[x_0, y_0, j, :], D).reshape([1, -1]) b = V[:, i, :, :].reshape([-1, patch_size[0] * patch_size[1]]) c = T.dot(a, b).reshape([patch_size[0], patch_size[1]]) kernel = T.set_subtensor(kernel[j, i, :, :], c) kernel = kernel.astype(floatX) input = input.astype(floatX) input_images = T.tensor4(dtype=floatX) conv_whitening = conv2d(input_images.dimshuffle((0, 3, 1, 2)), kernel, input_shape=image_shape, filter_shape=filter_shape, border_mode='full') s_crop = [(patch_size[0] - 1) // 2, (patch_size[1] - 1) // 2] # e_crop = [s_crop[0] if (s_crop[0] % 2) != 0 else s_crop[0] + 1, # s_crop[1] if (s_crop[1] % 2) != 0 else s_crop[1] + 1] conv_whitening = conv_whitening[:, :, s_crop[0]:-s_crop[0], s_crop[1]:-s_crop[1]] conv_whitening = conv_whitening.dimshuffle(0, 2, 3, 1) f_convZCA = function([input_images], conv_whitening) return f_convZCA(input)