def extract_patches(img, N, scale=1.0, patch_size=positive_patches[0].shape): extracted_patch_size = tuple((scale * np.array(patch_size)).astype(int)) extractor = PatchExtractor(patch_size=extracted_patch_size, max_patches=N, random_state=0) patches = extractor.transform(img[np.newaxis]) if scale != 1: patches = np.array([transform.resize(patch, patch_size) for patch in patches]) return patches
def learn_dictionary(X, n_filters, filter_size, n_sample=1000, n_sample_patches=0, **kwargs): """ learn a dictionary of n_filters atoms from n_sample images from X """ n_channels = X.shape[1] # subsample n_sample images randomly rand_idx = np.random.choice(len(X), n_sample, replace=False) # extract patches patch_size = (filter_size, filter_size) patches = PatchExtractor(patch_size).transform( X[rand_idx, ...].reshape(n_sample, X.shape[2], X.shape[3], X.shape[1])) patches = patches.reshape(patches.shape[0], -1) patches -= np.mean(patches, axis=0) patches /= np.std(patches, axis=0) if n_sample_patches > 0 and (n_sample_patches < len(patches)): np.random.shuffle(patches) patches = patches[:n_sample_patches, ...] # learn dictionary print('Learning dictionary for weight initialization...') dico = MiniBatchDictionaryLearning(n_components=n_filters, alpha=1, n_iter=1000, batch_size=10, shuffle=True, verbose=True, **kwargs) W = dico.fit(patches).components_ W = W.reshape(n_filters, n_channels, filter_size, filter_size) print('Dictionary learned.') return W.astype(np.float32)
def test_patch_extractor_color(): faces = _make_images(orange_face) i_h, i_w = faces.shape[1:3] p_h, p_w = 8, 8 expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1) extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0) patches = extr.transform(faces) assert patches.shape == (expected_n_patches, p_h, p_w, 3)
def test_patch_extractor_color(): faces = _make_images(orange_face) i_h, i_w = faces.shape[1:3] p_h, p_w = 8, 8 expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1) extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0) patches = extr.transform(faces) assert_true(patches.shape == (expected_n_patches, p_h, p_w, 3))
def test_patch_extractor_all_patches(): faces = face_collection i_h, i_w = faces.shape[1:3] p_h, p_w = 8, 8 expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1) extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0) patches = extr.transform(faces) assert_true(patches.shape == (expected_n_patches, p_h, p_w))
def test_patch_extractor_all_patches(): faces = face_collection i_h, i_w = faces.shape[1:3] p_h, p_w = 8, 8 expected_n_patches = len(faces) * (i_h - p_h + 1) * (i_w - p_w + 1) extr = PatchExtractor(patch_size=(p_h, p_w), random_state=0) patches = extr.transform(faces) assert patches.shape == (expected_n_patches, p_h, p_w)
def __init__(self, train, patch_size=(9, 9)): self.patch_size = patch_size X = [] toTensor = transforms.ToTensor() for _input, _ in train: X.append(toTensor(_input).permute(1, 2, 0).numpy()) X = np.array(X) self.mean = (X.mean(axis=(0, 1, 2))) X = np.add(X, -self.mean) self.mean = torch.from_numpy( self.mean.reshape(1, self.mean.shape[0], 1, 1) ) _, _, _, n_channels = X.shape # 1. Sample 10M random image patches (each with 3 colors) patches = PatchExtractor(patch_size=self.patch_size, max_patches=int(2.5e2)).transform(X) # 2. Perform PCA on these to get eigenvectors V and eigenvalues D. pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) dim = (-1,) + self.patch_size + (n_channels,) eigenvectors = torch.from_numpy( pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype( X.dtype) ) eigenvalues = torch.from_numpy( np.diag(1. / np.sqrt(pca.explained_variance_)) ) # 4. Construct the whitening kernel k: # for each pair of colors (ci,cj), # set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] # where (x0, y0) is the center pixel location # (e.g. (5,5) for a 9x9 kernel) x_0 = int(np.floor(self.patch_size[0] / 2)) y_0 = int(np.floor(self.patch_size[1] / 2)) filter_shape = (n_channels, n_channels, self.patch_size[0], self.patch_size[1]) self.kernel = torch.zeros(filter_shape) eigenvectorsT = eigenvectors.permute(2, 3, 1, 0) # build the kernel for i in range(n_channels): for j in range(n_channels): a = torch.mm( eigenvectorsT[x_0, y_0, j, :].contiguous().view(1, -1), eigenvalues.float() ) b = eigenvectors[:, i, :, :].contiguous().view( -1, self.patch_size[0] * self.patch_size[1] ) c = torch.mm(a, b).contiguous().view(self.patch_size[0], self.patch_size[1]) self.kernel[j, i, :, :] = c self.padding = (self.patch_size[0] - 1), (self.patch_size[1] - 1)
def extract_patches(img, N, patch_size, scale=1.0): # takes the size we need from the negative pics extracted_patch_size = tuple((scale * np.array(patch_size)).astype(int)) extractor = PatchExtractor(patch_size=extracted_patch_size, max_patches=N, random_state=0) patches = extractor.transform(img[np.newaxis]) # TO DO: if scale != 1: patches = np.array( [transform.resize(patch, patch_size) for patch in patches]) return patches
def extract_patches(path, max_patches, patch_size): """ Extract a patch of images of the same `patch_size` from the original image. Output is a 4D-Numpy array with shape (max_patches, *patch_size, num_channels=3). """ img = cv2.imread(path) img = np.expand_dims(img, axis=0) patch_extractor = PatchExtractor(max_patches=max_patches, patch_size=patch_size) patch_extractor.fit(img) return patch_extractor.transform(img)
def get_feature_patches(FV, patch_size, patch_shift, input_shape): FV = StandardScaler(copy=False).fit_transform(FV) # FV should be of the shape (nFeatures, nFrames) if any(np.array([9,10,21,22,39])==np.shape(FV)[1]): FV = FV.T patches = np.empty([]) if np.shape(FV)[1]<patch_size: # print('Size append: ', np.shape(FV), patch_size) FV1 = FV.copy() while np.shape(FV)[1]<=patch_size: FV = np.append(FV, FV1, axis=1) numPatches = int(np.ceil(np.shape(FV)[1]/patch_shift)) patches = PatchExtractor(patch_size=(np.shape(FV)[0], patch_size), max_patches=numPatches).transform(np.expand_dims(FV, axis=0)) patches_mean = np.mean(patches, axis=2) patches_var = np.var(patches, axis=2) patches_mean_var = np.append(patches_mean, patches_var, axis=1) # print('sklearn splitting: ', time.clock()-startTime, np.shape(patches)) # print('Patches: ', np.shape(patches)) if np.shape(patches_mean_var)[1]!=2*input_shape[0]: # This condition checks for 39CC if np.shape(patches_mean_var)[1]==44: patches_mean_var = patches_mean_var[:,list(range(0,21))+list(range(22,43))] elif np.shape(patches_mean_var)[1]==78: first_7_cep_dim = np.array(list(range(0,7))+list(range(13,20))+list(range(26,33))+list(range(39,46))+list(range(52,59))+list(range(65,72))) patches_mean_var = patches_mean_var[:, first_7_cep_dim] # print('patches_mean_var: ', np.shape(patches_mean_var)) return patches_mean_var
def _extract_random_patches(X: np.ndarray, image_size: Tuple, patch_size: Tuple, n_patches: Union[int, np.integer], random_state: Union[ None, int, np.random.RandomState] = None)\ -> np.ndarray: """ Extract random patches from image array. Parameters ---------- X : np.ndarray of shape (n_samples, n_image_array) image_size : Tuple (n_height, n_width) or (n_height, n_width, n_channels) Image size patch_size : Tuple (n_height, n_width) or (n_height, n_width, n_channels) Patch size, features to extract n_patches : Union[int, np.integer] Number of patches to extract random_state : Union[None, int, np.random.RandomState], default=None Returns ------- ndarray of shape (n_samples, n_patch_features) """ rs = check_random_state(random_state) random_images = Coates._reshape_arrays_to_images( X[rs.randint(0, high=X.shape[0], size=n_patches)], image_size) random_patches = PatchExtractor( patch_size=(patch_size[0], patch_size[1]), max_patches=1, random_state=rs).transform(random_images) return Coates._reshape_images_to_arrays(random_patches, patch_size)
def get_feature_patches(FV, patch_size, patch_shift, input_shape): FV = StandardScaler(copy=False).fit_transform(FV) # FV should be of the shape (nFeatures, nFrames) if any(np.array([9, 10, 21, 22, 39]) == np.shape(FV)[1]): FV = FV.T patches = np.empty([]) if np.shape(FV)[1] < patch_size: FV1 = FV.copy() while np.shape(FV)[1] <= patch_size: FV = np.append(FV, FV1, axis=1) numPatches = int(np.ceil(np.shape(FV)[1] / patch_shift)) patches = PatchExtractor(patch_size=(np.shape(FV)[0], patch_size), max_patches=numPatches).transform( np.expand_dims(FV, axis=0)) # print('sklearn splitting: ', time.clock()-startTime, np.shape(patches)) # print('Patches: ', np.shape(patches)) if (np.shape(patches)[1] == 9) or (np.shape(patches)[1] == 10): diff_dim = input_shape[0] - np.shape(patches)[1] zero_padding = np.zeros( (np.shape(patches)[0], diff_dim, np.shape(patches)[2])) patches = np.append(patches, zero_padding, axis=1) elif np.shape(patches)[1] == 22: patches = patches[:, :21, :] elif np.shape(patches)[1] == 39: first_7_cep_dim = np.array( list(range(0, 7)) + list(range(13, 20)) + list(range(26, 33))) patches = patches[:, first_7_cep_dim, :] # print('Patches: ', np.shape(patches)) return patches
def _extract_random_patches(X, image_size, patch_size, n_patches, random_state=None): """ Extracts random patches from image array :param X: (n_samples, n_image_array) :param image_size: (n_height, n_width) or (n_height, n_width, n_channels) Image size :param patch_size: (n_height, n_width) or (n_height, n_width, n_channels) Patch size, features to extract :param n_patches: Number of patches to extract :param random_state: None or int or np.RandomState :return: (n_samples, n_patch_features) """ rs = check_random_state(random_state) random_images = Coates._reshape_arrays_to_images( X[rs.randint(0, high=X.shape[0], size=n_patches)], image_size) random_patches = PatchExtractor( patch_size=(patch_size[0], patch_size[1]), max_patches=1, random_state=rs).transform(random_images) return Coates._reshape_images_to_arrays(random_patches, patch_size)
def test_patch_extractor_max_patches(): faces = face_collection i_h, i_w = faces.shape[1:3] p_h, p_w = 8, 8 max_patches = 100 expected_n_patches = len(faces) * max_patches extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches, random_state=0) patches = extr.transform(faces) assert patches.shape == (expected_n_patches, p_h, p_w) max_patches = 0.5 expected_n_patches = len(faces) * int((i_h - p_h + 1) * (i_w - p_w + 1) * max_patches) extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches, random_state=0) patches = extr.transform(faces) assert patches.shape == (expected_n_patches, p_h, p_w)
def extract_patch(images, patch_side=33, max_patches=20): """ Extracts patches from a collection of images. 'patch': refer to patch generated by single image 'images': refer to the set of all images 'patches': refer to the patches generated by the set of all images Input: images = (image_lr, image_hr) Return: patches_lr, patches_hr """ image_lr, image_hr = images N = len(image_lr) patches_lr = np.empty([0, patch_side, patch_side, 3]) patches_hr = np.empty([0, patch_side, patch_side, 3]) patchextractor = PatchExtractor() patchextractor.set_params(patch_size=(patch_side, patch_side), max_patches=max_patches) for i in range(N): # set the ramdom sate randint = np.random.randint(0, 2**16 - 1) # patchify the low resolution images patchextractor.set_params(random_state=randint) # the low resolution need to be bicubiced patch_lr = patchextractor.transform(np.expand_dims(image_lr[i], axis=0)) patches_lr = np.append(patches_lr, patch_lr, axis=0) # patchify the low resolution images patchextractor.set_params(random_state=randint) patch_hr = patchextractor.transform(np.expand_dims(image_hr[i], axis=0)) patches_hr = np.append(patches_hr, patch_hr, axis=0) return patches_lr, patches_hr
def generate_data(img_folder, max_patches=0.001): for fpath in get_img_filepaths(img_folder): print ('Reading image', fpath) patch_extractor = PatchExtractor(patch_size=(32,32), max_patches=max_patches) img_tensor = imread(fpath, mode='RGB') # shape : (row, col, channels) input_matrix = np.array([img_tensor]) # shape : (1, row, col, channels) input_matrix = input_matrix/255.0 # Casting into 0 to 1 space which DNN models learn faster patches = patch_extractor.transform(input_matrix) # shape : (n_samples, row, col, channels) patches = np.rollaxis(patches, axis=3, start=1) # shape : (n_samples, channels, row, col) small_patches = np.array([resize(patch) for patch in patches]) # shape : (n_samples, channels, max_x, max_y) patches = np.array([p.reshape(p.shape[0] * p.shape[1] * p.shape[2]) for p in patches]) # shape : (n_samples, output_vector_size) if False: # Print out values to debug print ("Shapes of tensors", small_patches.shape, patches.shape) for i, (small, big) in enumerate(zip(small_patches, patches)): small_img = np.rollaxis(small, axis=0, start=3) if not os.path.exists('debug'): os.makedirs('debug') imsave('debug/small_patch_{}.jpg'.format(i), small_img) imsave('debug/big_patch_{}.jpg'.format(i), vec2img(big)) yield small_patches, patches
def get_feature_patches(PARAMS, Xin, segment_duration, segment_shift): Xin -= np.mean(Xin) Xin /= np.max(Xin) - np.min(Xin) # print('Xin: ', np.shape(Xin)) seg_patches = np.empty([]) if len(Xin) < segment_duration: Xin1 = Xin.copy() while len(Xin) <= segment_duration: Xin = np.append(Xin, Xin1) # startTime = time.process_time() numPatches = int(np.ceil(len(Xin) / segment_shift)) Xin = np.expand_dims(np.expand_dims(Xin, axis=0), axis=2) # print('Xin: ', np.shape(Xin)) seg_patches = PatchExtractor(patch_size=(segment_duration, 1), max_patches=numPatches).transform(Xin) # print('sklearn splitting: ', time.process_time()-startTime, np.shape(seg_patches)) return seg_patches
def test_patch_extractor_max_patches(): faces = face_collection i_h, i_w = faces.shape[1:3] p_h, p_w = 8, 8 max_patches = 100 expected_n_patches = len(faces) * max_patches extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches, random_state=0) patches = extr.transform(faces) assert_true(patches.shape == (expected_n_patches, p_h, p_w)) max_patches = 0.5 expected_n_patches = len(faces) * int((i_h - p_h + 1) * (i_w - p_w + 1) * max_patches) extr = PatchExtractor(patch_size=(p_h, p_w), max_patches=max_patches, random_state=0) patches = extr.transform(faces) assert_true(patches.shape == (expected_n_patches, p_h, p_w))
def get_patches(img, tb_size, sb_size, mask_tb=None, mask_sb=None): tb_extractor = PatchExtractor(tuple(tb_size)) sb_extractor = PatchExtractor(tuple(sb_size)) tb_images = tb_extractor.transform(img) tb_images = tb_images.reshape( (img.shape[0], -1, tb_images.shape[-2], tb_images.shape[-1])) if mask_tb is not None: tb_images = tb_images[:, mask_tb.ravel(), :, :] tb_images = np.rollaxis(tb_images, 1, 4) sb_images = sb_extractor.transform(tb_images) sb_images = sb_images.reshape((img.shape[0], -1, sb_images.shape[-3], sb_images.shape[-2], sb_images.shape[-1])) sb_images = np.rollaxis(sb_images, 4, 1) if mask_sb is not None: sb_images = sb_images[:, :, mask_sb.ravel(), :, :] sb_images = sb_images.reshape( list(sb_images.shape[:-2]) + [np.prod(sb_images.shape[-2:])]) return sb_images
def test_patch_extractor_max_patches_default(): faces = face_collection extr = PatchExtractor(max_patches=100, random_state=0) patches = extr.transform(faces) assert patches.shape == (len(faces) * 100, 19, 25)
def test_patch_extractor_fit(): faces = face_collection extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0) assert extr == extr.fit(faces)
def train(self): patch_extractor = PatchExtractor((self.params['receptive_field'], self.params['receptive_field']), max_patches=self.params['training_patches_num']) self.trainingPatches = patch_extractor.transform(self.dataset.train.images).reshape(-1, self.params['receptive_field'] **2) self.trainingPatches = self._img_preprocessing(self.trainingPatches) super().train()
def convolutional_zca(input, patch_size=(9, 9), max_patches=int(1e5)): """ This is an implementation of the convolutional ZCA whitening presented by David Eigen in his phd thesis http://www.cs.nyu.edu/~deigen/deigen-thesis.pdf "Predicting Images using Convolutional Networks: Visual Scene Understanding with Pixel Maps" From paragraph 8.4: A simple adaptation of ZCA to convolutional application is to find the ZCA whitening transformation for a sample of local image patches across the dataset, and then apply this transform to every patch in a larger image. We then use the center pixel of each ZCA patch to create the conv-ZCA output image. The operations of applying local ZCA and selecting the center pixel can be combined into a single convolution kernel, resulting in the following algorithm (explained using RGB inputs and 9x9 kernel): 1. Sample 10M random 9x9 image patches (each with 3 colors) 2. Perform PCA on these to get eigenvectors V and eigenvalues D. 3. Optionally remove small eigenvalues, so V has shape [npca x 3 x 9 x 9]. 4. Construct the whitening kernel k: for each pair of colors (ci,cj), set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] where (x0, y0) is the center pixel location (e.g. (5,5) for a 9x9 kernel) :param input: 4D tensor of shape [batch_size, rows, col, channels] :param patch_size: size of the patches extracted from the dataset :param max_patches: max number of patches extracted from the dataset :return: conv-zca whitened dataset """ # I don't know if it's correct or not.. but it seems to work mean = np.mean(input, axis=(0, 1, 2)) input -= mean # center the data n_imgs, h, w, n_channels = input.shape patch_size = (patch_size, patch_size) patches = PatchExtractor(patch_size=patch_size, max_patches=max_patches).transform(input) pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) # Transpose the components into theano convolution filter type dim = (-1,) + patch_size + (n_channels,) V = shared(pca.components_.reshape(dim). transpose(0, 3, 1, 2).astype(input.dtype)) D = T.nlinalg.diag(1. / np.sqrt(pca.explained_variance_)) x_0 = int(np.floor(patch_size[0] / 2)) y_0 = int(np.floor(patch_size[1] / 2)) filter_shape = [n_channels, n_channels, patch_size[0], patch_size[1]] image_shape = [n_imgs, n_channels, h, w] kernel = T.zeros(filter_shape) VT = V.dimshuffle(2, 3, 1, 0) # V : 243 x 3 x 9 x 9 # VT : 9 x 9 x 3 x 243 # build the kernel for i in range(n_channels): for j in range(n_channels): a = T.dot(VT[x_0, y_0, j, :], D).reshape([1, -1]) b = V[:, i, :, :].reshape([-1, patch_size[0] * patch_size[1]]) c = T.dot(a, b).reshape([patch_size[0], patch_size[1]]) kernel = T.set_subtensor(kernel[j, i, :, :], c) kernel = kernel.astype(floatX) input = input.astype(floatX) input_images = T.tensor4(dtype=floatX) conv_whitening = conv2d(input_images.dimshuffle((0, 3, 1, 2)), kernel, input_shape=image_shape, filter_shape=filter_shape, border_mode='full') s_crop = [(patch_size[0] - 1) // 2, (patch_size[1] - 1) // 2] # e_crop = [s_crop[0] if (s_crop[0] % 2) != 0 else s_crop[0] + 1, # s_crop[1] if (s_crop[1] % 2) != 0 else s_crop[1] + 1] conv_whitening = conv_whitening[:, :, s_crop[0]:-s_crop[0], s_crop[ 1]:-s_crop[1]] conv_whitening = conv_whitening.dimshuffle(0, 2, 3, 1) f_convZCA = function([input_images], conv_whitening) return f_convZCA(input)
def convolutional_zca(input, patch_size=(9, 9), max_patches=int(1e5)): """ This is an implementation of the convolutional ZCA whitening presented by David Eigen in his phd thesis http://www.cs.nyu.edu/~deigen/deigen-thesis.pdf "Predicting Images using Convolutional Networks: Visual Scene Understanding with Pixel Maps" From paragraph 8.4: A simple adaptation of ZCA to convolutional application is to find the ZCA whitening transformation for a sample of local image patches across the dataset, and then apply this transform to every patch in a larger image. We then use the center pixel of each ZCA patch to create the conv-ZCA output image. The operations of applying local ZCA and selecting the center pixel can be combined into a single convolution kernel, resulting in the following algorithm (explained using RGB inputs and 9x9 kernel): 1. Sample 10M random 9x9 image patches (each with 3 colors) 2. Perform PCA on these to get eigenvectors V and eigenvalues D. 3. Optionally remove small eigenvalues, so V has shape [npca x 3 x 9 x 9]. 4. Construct the whitening kernel k: for each pair of colors (ci,cj), set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] where (x0, y0) is the center pixel location (e.g. (5,5) for a 9x9 kernel) :param input: 4D tensor of shape [batch_size, rows, col, channels] :param patch_size: size of the patches extracted from the dataset :param max_patches: max number of patches extracted from the dataset :return: conv-zca whitened dataset """ # I don't know if it's correct or not.. but it seems to work mean = np.mean(input, axis=(0, 1, 2)) input -= mean # center the data n_imgs, h, w, n_channels = input.shape patch_size = (patch_size, patch_size) patches = PatchExtractor(patch_size=patch_size, max_patches=max_patches).transform(input) pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) # Transpose the components into theano convolution filter type dim = (-1, ) + patch_size + (n_channels, ) V = shared( pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype(input.dtype)) D = T.nlinalg.diag(1. / np.sqrt(pca.explained_variance_)) x_0 = int(np.floor(patch_size[0] / 2)) y_0 = int(np.floor(patch_size[1] / 2)) filter_shape = [n_channels, n_channels, patch_size[0], patch_size[1]] image_shape = [n_imgs, n_channels, h, w] kernel = T.zeros(filter_shape) VT = V.dimshuffle(2, 3, 1, 0) # V : 243 x 3 x 9 x 9 # VT : 9 x 9 x 3 x 243 # build the kernel for i in range(n_channels): for j in range(n_channels): a = T.dot(VT[x_0, y_0, j, :], D).reshape([1, -1]) b = V[:, i, :, :].reshape([-1, patch_size[0] * patch_size[1]]) c = T.dot(a, b).reshape([patch_size[0], patch_size[1]]) kernel = T.set_subtensor(kernel[j, i, :, :], c) kernel = kernel.astype(floatX) input = input.astype(floatX) input_images = T.tensor4(dtype=floatX) conv_whitening = conv2d(input_images.dimshuffle((0, 3, 1, 2)), kernel, input_shape=image_shape, filter_shape=filter_shape, border_mode='full') s_crop = [(patch_size[0] - 1) // 2, (patch_size[1] - 1) // 2] # e_crop = [s_crop[0] if (s_crop[0] % 2) != 0 else s_crop[0] + 1, # s_crop[1] if (s_crop[1] % 2) != 0 else s_crop[1] + 1] conv_whitening = conv_whitening[:, :, s_crop[0]:-s_crop[0], s_crop[1]:-s_crop[1]] conv_whitening = conv_whitening.dimshuffle(0, 2, 3, 1) f_convZCA = function([input_images], conv_whitening) return f_convZCA(input)
def __call__(self, _input): _input = _input.permute(1, 2, 0).numpy() _input = _input.reshape(1, _input.shape[0], _input.shape[1], _input.shape[2]) mean = (_input.mean(axis=(0, 1, 2))) _input = np.add(_input, -mean) _, _, _, n_channels = _input.shape # 1. Sample 10M random image patches (each with 3 colors) patches = PatchExtractor(patch_size=self.patch_size).transform(_input) # 2. Perform PCA on these to get eigenvectors V and eigenvalues D. pca = PCA() pca.fit(patches.reshape(patches.shape[0], -1)) dim = (-1,) + self.patch_size + (n_channels,) eigenvectors = torch.from_numpy( pca.components_.reshape(dim).transpose(0, 3, 1, 2).astype( _input.dtype) ) eigenvalues = torch.from_numpy( np.diag(1. / np.sqrt(pca.explained_variance_)) ) # 4. Construct the whitening kernel k: # for each pair of colors (ci,cj), # set k[j,i, :, :] = V[:, j, x0, y0]^T * D^{-1/2} * V[:, i, :, :] # where (x0, y0) is the center pixel location # (e.g. (5,5) for a 9x9 kernel) x_0 = int(np.floor(self.patch_size[0] / 2)) y_0 = int(np.floor(self.patch_size[1] / 2)) filter_shape = (n_channels, n_channels, self.patch_size[0], self.patch_size[1]) kernel = torch.zeros(filter_shape) eigenvectorsT = eigenvectors.permute(2, 3, 1, 0) # build the kernel for i in range(n_channels): for j in range(n_channels): a = torch.mm( eigenvectorsT[x_0, y_0, j, :].contiguous().view(1, -1), eigenvalues.float() ) b = eigenvectors[:, i, :, :].contiguous().view( -1, self.patch_size[0] * self.patch_size[1] ) c = torch.mm(a, b).contiguous().view(self.patch_size[0], self.patch_size[1]) kernel[j, i, :, :] = c padding = (self.patch_size[0] - 1), (self.patch_size[1] - 1) input_tensor = torch.from_numpy(_input).permute(0, 3, 1, 2) conv_whitening = torch.nn.functional.conv2d( input=input_tensor, weight=kernel, padding=padding ) s_crop = [(self.patch_size[0] - 1) // 2, (self.patch_size[1] - 1) // 2] conv_whitening = conv_whitening[ :, :, s_crop[0]:-s_crop[0], s_crop[1]:-s_crop[1] ] return conv_whitening.view(conv_whitening.shape[1], conv_whitening.shape[2], conv_whitening.shape[3])
def test_patch_extractor_fit(): faces = face_collection extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0) assert_true(extr == extr.fit(faces))
X_train, X_test = X[train], X[test] y_train, y_test = y[train], y[test] ############################################################################### # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled # dataset): unsupervised feature extraction / dimensionality reduction #TODO: Support for "non-square" numbers of atoms to display #This number should be square i.e. 16, 25, 36, 49, 64 etc. n_atoms = 144 patch_size = (8, 8) print "Extracting image patches from %d faces" % len(X_train) t0 = time() extr = PatchExtractor(patch_size=patch_size, max_patches=100, random_state=0) patches = extr.transform(X_train) print "done in %0.3fs" % (time() - t0) print "Extracting %d atoms from %d patches" % (n_atoms, len(patches)) t0 = time() patches = patches.reshape( (patches.shape[0], patches.shape[1] * patches.shape[2])) sc1 = KSVDCoder(n_atoms, verbose=True, n_iter=5) rc1 = RandomDataCoder(n_atoms) kc1 = KMeansCoder(n_atoms, verbose=True) steps = [('pre', ZCA()), ('dict', kc1)] p_kmeans = Pipeline(steps) p_kmeans.fit(patches)
def extract_patches(self, patch_size, max_patches=None, random_state=None): patch_extractor = PatchExtractor(patch_size=patch_size, max_patches=np.int( max_patches / self.num_images()), random_state=random_state) return patch_extractor.transform(self._images).astype(np.uint8)
def test_patch_extractor_fit(): lenas = lena_collection extr = PatchExtractor(patch_size=(8, 8), max_patches=100, random_state=0) assert_true(extr == extr.fit(lenas))
def test_patch_extractor_max_patches_default(): lenas = lena_collection extr = PatchExtractor(max_patches=100, random_state=0) patches = extr.transform(lenas) assert_equal(patches.shape, (len(lenas) * 100, 12, 12))
def test_patch_extractor_max_patches_default(): faces = face_collection extr = PatchExtractor(max_patches=100, random_state=0) patches = extr.transform(faces) assert_equal(patches.shape, (len(faces) * 100, 19, 25))
def get_feature_patches(PARAMS, FV, patch_size, patch_shift, input_shape): # Removing NaN and Inf if any(np.array([9, 10, 21, 22, 39]) == np.shape(FV)[1]): FV = FV[~np.isnan(FV).any(axis=1), :] FV = FV[~np.isinf(FV).any(axis=1), :] else: FV = FV[:, ~np.isnan(FV).any(axis=0)] FV = FV[:, ~np.isinf(FV).any(axis=0)] FV = StandardScaler(copy=False).fit_transform(FV) # FV should be of the shape (nFeatures, nFrames) if any(np.array([9, 10, 21, 22, 39]) == np.shape(FV)[1]): FV = FV.T frmStart = 0 frmEnd = 0 patchNum = 0 patches = np.empty([]) if np.shape(FV)[1] < patch_size: FV1 = FV.copy() while np.shape(FV)[1] <= patch_size: FV = np.append(FV, FV1, axis=1) # while frmEnd<np.shape(FV)[1]: # # print('get_feature_patches: ', frmStart, frmEnd, np.shape(FV)) # frmStart = patchNum*patch_shift # frmEnd = np.min([patchNum*patch_shift+patch_size, np.shape(FV)[1]]) # if frmEnd-frmStart<patch_size: # frmStart = frmEnd - patch_size # if np.size(patches)<=1: # patches = np.expand_dims(FV[:, frmStart:frmEnd], axis=0) # else: # patches = np.append(patches, np.expand_dims(FV[:, frmStart:frmEnd], axis=0), axis=0) # patchNum += 1 # startTime = time.clock() # for frmStart in range(0, np.shape(FV)[1], patch_shift): # # print('get_feature_patches: ', frmStart, frmEnd, np.shape(FV)) # frmEnd = np.min([frmStart+patch_size, np.shape(FV)[1]]) # if frmEnd-frmStart<patch_size: # frmStart = frmEnd - patch_size # if np.size(patches)<=1: # patches = np.array(FV[:, frmStart:frmEnd], ndmin=3) # else: # patches = np.append(patches, np.array(FV[:, frmStart:frmEnd], ndmin=3), axis=0) # print('My splitting: ', time.clock()-startTime, np.shape(patches)) startTime = time.clock() numPatches = int(np.ceil(np.shape(FV)[1] / patch_shift)) patches = PatchExtractor(patch_size=(np.shape(FV)[0], patch_size), max_patches=numPatches).transform( np.expand_dims(FV, axis=0)) # print('sklearn splitting: ', time.clock()-startTime, np.shape(patches)) # print('Patches: ', np.shape(patches)) if (np.shape(patches)[1] == 9) or (np.shape(patches)[1] == 10): diff_dim = input_shape[0] - np.shape(patches)[1] zero_padding = np.zeros( (np.shape(patches)[0], diff_dim, np.shape(patches)[2])) patches = np.append(patches, zero_padding, axis=1) elif np.shape(patches)[1] == 22: patches = patches[:, :21, :] elif np.shape(patches)[1] == 39: if not PARAMS['39_dim_CC_feat']: first_7_cep_dim = np.array( list(range(0, 7)) + list(range(13, 20)) + list(range(26, 33))) patches = patches[:, first_7_cep_dim, :] # print('Patches: ', np.shape(patches)) return patches