def __call__(self, imgs): # imgs is a list of 2D image arrays from joblib import Parallel, delayed from lyssa.utils import gen_even_batches n_imgs = len(imgs) n_desc_per_image = int(self.n_descriptors / float(n_imgs)) # Z = run_parallel(func=_sift_extract_proc,data=imgs,args=(n_desc_per_image,self.patch_shape), # result_shape=(n_features,n_imgs),n_batches=100,mmap=self.mmap,msg="building ScSPM features",n_jobs=n_jobs) if self.n_jobs > 1: msg = "extracting dsift" Parallel.print_progress = joblib_print(n_imgs, msg) results = Parallel(n_jobs=self.n_jobs)(delayed(_sift_extract_proc)( imgs[i], n_desc_per_image, self.patch_shape) for i in range(n_imgs)) n_descs = [results[i].shape[1] for i in range(len(results))] if self.mmap: Z = get_empty_mmap((results[0].shape[0], np.sum(n_descs))) else: Z = np.zeros((results[0].shape[0], np.sum(n_descs))) base = 0 for j in range(n_imgs): offset = n_descs[j] Z[:, base:base + offset] = results[j] base += offset # normalize each SIFT descriptor Z = norm_cols(Z) return Z
def class_dict_learn(X, y, n_class_atoms=None, sparse_coders=None, init_dict='data', max_iter=5, approx=False, non_neg=False, eta=None, alpha=None, n_cycles=1, n_jobs=1, mmap=False, verbose=True): n_classes = len(set(y)) # the number of atoms in the joint dictionary n_total_atoms = np.sum(n_class_atoms) n_features = X.shape[0] shape = (n_features, n_total_atoms) if mmap: D = get_empty_mmap(shape) else: D = np.zeros(shape) for c in range(n_classes): if verbose: print "-------------------------------------" print "optimizing the dictionary of class", c x_c = y == c # extract the datapoints for the c-th class Xc = X[:, x_c] n_class_samples = Xc.shape[1] Dc, _ = ksvd_dict_learn(Xc, n_class_atoms[c], init_dict='data', sparse_coder=sparse_coders[c], max_iter=max_iter, non_neg=non_neg, approx=approx, eta=eta, n_cycles=n_cycles, n_jobs=n_jobs, mmap=mmap, verbose=verbose) base = c * n_class_atoms[c] offset = n_class_atoms[c] D[:, base:base + offset] = Dc if alpha is not None: # force Structural incoherence from lyssa.dict_learn.utils import replace_coherent_atoms if verbose: print "reducing structural incoherence" D, n_class_atoms = replace_coherent_atoms(X, y, D, n_class_atoms, thresh=alpha, kappa=None, unused_data=None) return D, n_class_atoms # merge the class dictionaries into one # and return it return D
def extract_patches(imgs, step_size=None, n_patches=None, patch_size=None, mmap=False, scale=False, verbose=False, mem="high", n_jobs=1): #extracts n_patches from a set of images. #It calls grid_patches with a specific spacing. #if patch_shape = (a,a) then patch_size = a. #imgs is a list of 2D images n_imgs = len(imgs) if n_patches is not None: patches_per_image = int(np.floor(float(n_patches) / float(n_imgs))) print "using {0} patches per image".format(patches_per_image) #find the number of actual patches #each image has if n_patches is not None: patch_numbers = n_dataset_patches(imgs, patch_size=patch_size, patches_per_image=patches_per_image) else: patch_numbers = n_dataset_patches(imgs, patch_size=patch_size, step_size=step_size) print "total number of patches {0}".format(np.sum(patch_numbers)) if mem == "high": import multiprocessing pool = multiprocessing.Pool(processes=n_jobs, initializer=None) results = [] for i in range(n_imgs): if verbose: sys.stdout.write("\rextracting patches:%3.2f%%" % ((i / float(n_imgs)) * 100)) sys.stdout.flush() if n_patches is not None: func = partial(grid_patches, patch_size=patch_size, n_patches=patches_per_image, scale=scale) else: func = partial(grid_patches, patch_size=patch_size, step_size=step_size, scale=scale) results.append(pool.apply_async(func, (imgs[i], ))) pool.close() # no more tasks if verbose: sys.stdout.write("\rextracting patches:%3.2f%%" % (100)) sys.stdout.flush() print "" n_patches = np.sum(patch_numbers) if mmap: patches = get_empty_mmap((results[0].get().shape[0], n_patches)) print "things are mmaped!" else: patches = np.zeros((results[0].get().shape[0], n_patches)) base = 0 for i in range(n_imgs): result = results[i].get() results[i] = None offset = patch_numbers[i] patches[:, base:base + offset] = result base += offset else: n_patches = np.sum(patch_numbers) if len(imgs[0].shape) == 2: patches = np.zeros((patch_size**2, n_patches)) elif len(imgs[0].shape) == 3: patches = np.zeros((imgs[0].shape[2] * (patch_size**2), n_patches)) base = 0 for i in range(n_imgs): if verbose: sys.stdout.write("\rextracting patches:%3.2f%%" % ((i / float(n_imgs)) * 100)) sys.stdout.flush() if n_patches is not None: _patches = grid_patches(imgs[i], patch_size=patch_size, n_patches=patches_per_image, scale=scale) else: _patches = grid_patches(imgs[i], patch_size=patch_size, step_size=step_size, scale=scale) offset = patch_numbers[i] patches[:, base:base + offset] = _patches base += offset if step_size is not None: return patches, patch_numbers return patches
def extract_patches(imgs,step_size=None,n_patches=None,patch_size=None,mmap=False,scale=False,verbose=False,mem="high",n_jobs=1): #extracts n_patches from a set of images. #It calls grid_patches with a specific spacing. #if patch_shape = (a,a) then patch_size = a. #imgs is a list of 2D images n_imgs = len(imgs) if n_patches is not None: patches_per_image = int(np.floor(float(n_patches)/float(n_imgs))) print "using {0} patches per image".format(patches_per_image) #find the number of actual patches #each image has if n_patches is not None: patch_numbers = n_dataset_patches(imgs,patch_size=patch_size,patches_per_image=patches_per_image) else: patch_numbers = n_dataset_patches(imgs,patch_size=patch_size,step_size=step_size) print "total number of patches {0}".format(np.sum(patch_numbers)) if mem == "high": import multiprocessing pool = multiprocessing.Pool(processes=n_jobs,initializer=None) results = [] for i in range(n_imgs): if verbose: sys.stdout.write("\rextracting patches:%3.2f%%" % (( i / float(n_imgs))*100)) sys.stdout.flush() if n_patches is not None: func = partial(grid_patches,patch_size=patch_size,n_patches=patches_per_image,scale=scale) else: func = partial(grid_patches,patch_size=patch_size,step_size=step_size,scale=scale) results.append(pool.apply_async(func, (imgs[i],))) pool.close() # no more tasks if verbose: sys.stdout.write("\rextracting patches:%3.2f%%" % (100)) sys.stdout.flush() print "" n_patches = np.sum(patch_numbers) if mmap: patches = get_empty_mmap((results[0].get().shape[0],n_patches)) print "things are mmaped!" else: patches = np.zeros((results[0].get().shape[0],n_patches)) base = 0 for i in range(n_imgs): result = results[i].get() results[i] = None offset = patch_numbers[i] patches[:,base:base+offset] = result base += offset else: n_patches = np.sum(patch_numbers) if len(imgs[0].shape) == 2: patches = np.zeros((patch_size**2,n_patches)) elif len(imgs[0].shape) == 3: patches = np.zeros(( imgs[0].shape[2]*(patch_size**2),n_patches)) base = 0 for i in range(n_imgs): if verbose: sys.stdout.write("\rextracting patches:%3.2f%%" % (( i / float(n_imgs))*100)) sys.stdout.flush() if n_patches is not None: _patches = grid_patches(imgs[i],patch_size=patch_size,n_patches=patches_per_image,scale=scale) else: _patches = grid_patches(imgs[i],patch_size=patch_size,step_size=step_size,scale=scale) offset = patch_numbers[i] patches[:,base:base+offset] = _patches base += offset if step_size is not None: return patches,patch_numbers return patches