def test_pca_input(sh, m): """ Generate input data from labeled faces data :param sh: The shape of the image tiles to sample :param m: The number of examples to sample :return: 2d ndarray where columns are examples """ return sample_patches(load_lfwc(), patch_shape=sh, n_samples=m, flatten=True).T
def get_input(input_pattern=lfwc_pattern, n_samples=5000, patch_shape=(12, 12), n_images=-1, epsilon=0.1, norm_axis=0): """ Get input data from lfwc patches :param input_pattern: The pattern to glob for image files :param n_images: The number of images to draw samples from :param n_samples: The number of samples to extract :param patch_shape: The shape of the patches to sample :param epsilon: Regularization for whitening, or None to disable whitening :param norm_axis: Whether to mean normalize across each feature (0) or each patch (1) :return: m x n ndarray of flattened images """ paths = glob.glob(input_pattern) random.shuffle(paths) paths = paths[:n_images] print("Sampling {} {}x{} patches from {} images...".format( n_samples, patch_shape[0], patch_shape[1], len(paths))) # read images into a m x img_row x img_col ndarray imgs = np.asarray([cv2.imread(path, 0) for path in paths], dtype=theano.config.floatX) imgs /= 255. # scale to [0,1] if input_pattern == lfwc_pattern and patch_shape == ( 64, 64) and n_samples == lfwc_count: patches = load_matrix_2d(paths) epsilon = 0 # disable whitening... too slow else: # sample patches into a 3d array patches = sample_patches(imgs, patch_shape=patch_shape, n_samples=n_samples) # flatten each image to 1d patches = patches.reshape(patches.shape[0], patch_shape[0] * patch_shape[1]) if norm_axis == 0: # subtract per feature (pixel) means across the data set (m x n) normed = patches - patches.mean(axis=0) else: # remove mean val of each patch normed = patches - patches.mean(axis=1)[:, np.newaxis] if epsilon is None: return normed return whiten_zca(normed, epsilon=epsilon)
def main(cmd, args): """process the command""" data_set = "lfw" if len(args) > 0 and args[0] == "lfw" else "lfwc" if cmd == "show": if len(args) > 0 and args[0] == "lfw": show_images(lfw_paths()) else: show_images(lfwc_paths()) return n_samples = 10000 sh = (20, 20) out_shape = (8, 8) data = load_lfw() if data_set == "lfw" else load_lfwc() out_dir = "./img/" shape_range = range(8, 21, 2) if cmd == "sample": patches = sample_patches(data, patch_shape=sh, n_samples=n_samples) tile_images(patches, patch_shape=sh, output_shape=out_shape, show=True) elif cmd == "sweep": shapes = [(i, i) for i in shape_range] for sh in shapes: patches = sample_patches(data, n_samples=n_samples, patch_shape=sh) img = tile_images(patches, sh, output_shape=out_shape, show=True) fn = os.path.join(out_dir, "{}_patches_{}.png".format(data_set, sh)) print("saving {}...".format(fn)) img.save(fn) elif cmd == "alfw": print(load_alfw()) else: print("unexpected cmd")
def get_input(input_pattern=lfwc_pattern, n_samples=5000, patch_shape=(12, 12), n_images=-1, epsilon=0.1, norm_axis=0): """ Get input data from lfwc patches :param input_pattern: The pattern to glob for image files :param n_images: The number of images to draw samples from :param n_samples: The number of samples to extract :param patch_shape: The shape of the patches to sample :param epsilon: Regularization for whitening, or None to disable whitening :param norm_axis: Whether to mean normalize across each feature (0) or each patch (1) :return: m x n ndarray of flattened images """ paths = glob.glob(input_pattern) random.shuffle(paths) paths = paths[:n_images] print("Sampling {} {}x{} patches from {} images..." .format(n_samples, patch_shape[0], patch_shape[1], len(paths))) # read images into a m x img_row x img_col ndarray imgs = np.asarray([cv2.imread(path, 0) for path in paths], dtype=theano.config.floatX) imgs /= 255. # scale to [0,1] if input_pattern == lfwc_pattern and patch_shape == (64,64) and n_samples == lfwc_count: patches = load_matrix_2d(paths) epsilon = 0 # disable whitening... too slow else: # sample patches into a 3d array patches = sample_patches(imgs, patch_shape=patch_shape, n_samples=n_samples) # flatten each image to 1d patches = patches.reshape(patches.shape[0], patch_shape[0]*patch_shape[1]) if norm_axis == 0: # subtract per feature (pixel) means across the data set (m x n) normed = patches - patches.mean(axis=0) else: # remove mean val of each patch normed = patches - patches.mean(axis=1)[:, np.newaxis] if epsilon is None: return normed return whiten_zca(normed, epsilon=epsilon)