def dict_learn(self, imgs, feature_extractor=None, dict_learner=None): if not self.workspace.contains("descriptors.npy"): self.descriptors = feature_extractor(imgs) self.workspace.save("descriptors.npy", self.descriptors) else: self.descriptors = self.workspace.load("descriptors.npy") if self.mmap: self.descriptors = get_mmap(self.descriptors) print "descriptors extracted" if not self.workspace.contains("dict.npy"): dict_learner.fit(self.descriptors) self.D = dict_learner.D self.workspace.save("dict.npy", self.D) else: self.D = self.workspace.load("dict.npy")
def ksvd_dict_learn(X, n_atoms, init_dict='data', sparse_coder=None, max_iter=20, non_neg=False, approx=False, eta=None, n_cycles=1, n_jobs=1, mmap=False, verbose=True): """ The K-SVD algorithm X: the data matrix of shape (n_features,n_samples) n_atoms: the number of atoms in the dictionary sparse_coder: must be an instance of the sparse_coding.sparse_encoder class approx: if true, invokes the approximate KSVD algorithm max_iter: the maximum number of iterations non_neg: if set to True, it uses non-negativity constraints n_cycles: the number of updates per atom (Dictionary Update Cycles) n_jobs: the number of CPU threads mmap: if set to True, the algorithm applies memory mapping to save memory """ n_features, n_samples = X.shape shape = (n_atoms, n_samples) Z = np.zeros(shape) # dictionary initialization # track the datapoints that are not used as atoms unused_data = [] if init_dict == 'data': from .utils import init_dictionary D, unused_data = init_dictionary(X, n_atoms, method=init_dict, return_unused_data=True) else: D = np.copy(init_dict) if mmap: D = get_mmap(D) sparse_coder.mmap = True print "dictionary initialized" max_patience = 10 error_curr = 0 error_prev = 0 it = 0 patience = 0 approx_errors = [] while it < max_iter and patience < max_patience: print "----------------------------" print "iteration", it print "" it_start = time.time() if verbose: t_sparse_start = time.time() # sparse coding Z = sparse_coder(X, D) if verbose: t_sparse_duration = time.time() - t_sparse_start print "sparse coding took", t_sparse_duration, "seconds" t_dict_start = time.time() # ksvd to learn the dictionary set_openblas_threads(n_jobs) if approx: D, _, unused_atoms = approx_ksvd(X, D, Z, n_cycles=n_cycles) elif non_neg: D, _, unused_atoms = nn_ksvd(X, D, Z, n_cycles=it) else: D, _, unused_atoms = ksvd(X, D, Z, n_cycles=n_cycles) set_openblas_threads(1) if verbose: t_dict_duration = time.time() - t_dict_start print "K-SVD took", t_dict_duration, "seconds" print "" if verbose: print "number of unused atoms:", len(unused_atoms) # replace the unused atoms in the dictionary for j in range(len(unused_atoms)): # no datapoint available to be used as atom if len(unused_data) == 0: break _idx = np.random.choice(unused_data, size=1) idx = _idx[0] D[:, unused_atoms[j]] = X[:, idx] D[:, unused_atoms[j]] = normalize(D[:, unused_atoms[j]]) unused_data.remove(idx) if eta is not None: # do not force incoherence in the last iteration if it < max_iter - 1: # force Mutual Incoherence D, unused_data = force_mi(D, X, Z, unused_data, eta) if verbose: amc = average_mutual_coherence(D) print "average mutual coherence:", amc it_duration = time.time() - it_start # calculate the approximation error error_curr = approx_error(D, Z, X, n_jobs=2) approx_errors.append(error_curr) if verbose: print "error:", error_curr print "error difference:", (error_curr - error_prev) error_prev = error_curr print "duration:", it_duration, "seconds" if (it > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev): patience += 1 it += 1 print "" return D, Z
def projected_grad_desc(X, n_atoms=None, sparse_coder=None, batch_size=None, D_init=None, eta=None, mu=None, n_epochs=None, non_neg=False, verbose=False, n_jobs=1, mmap=False): """ X: the data matrix of shape (n_features,n_samples) n_atoms: the number of atoms in the dictionary sparse_coder: must be an instance of the sparse_coding.sparse_encoder class batch_size: the number of datapoints in each iteration D_init: the initial dictionary. If None, we initialize it with randomly selected datapoints. eta: the learning rate mu: the mutual coherence penalty n_epochs: the number of times we iterate over the dataset non_neg: if set to True, it uses non-negativity constraints n_jobs: the number of CPU threads mmap: if set to True, the algorithm applies memory mapping to save memory Note that a large batch_size implies faster execution but high memory overhead, while a smaller batch_size implies slower execution but low memory overhead """ # dont monitor sparse coding sparse_coder.verbose = False n_features, n_samples = X.shape # initialize the dictionary # with the dataset if D_init is None: D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True) else: D = D_init print "dictionary initialized" if mmap: D = get_mmap(D) batch_idx = gen_batches(n_samples, batch_size=batch_size) n_batches = len(batch_idx) n_iter = n_batches n_total_iter = n_epochs * n_iter I = np.eye(n_atoms) if n_batches > n_iter: print "will iterate on only {0:.2f}% of the dataset".format( (float(n_iter) / n_batches) * 100) if n_jobs > 1: set_openblas_threads(n_jobs) max_patience = 10 error_curr = 0 error_prev = 0 patience = 0 approx_errors = [] incs = [] for e in range(n_epochs): # cycle over the batches for i, batch in zip(range(n_iter), cycle(batch_idx)): X_batch = X[:, batch] # sparse coding step Z_batch = sparse_coder(X_batch, D) if verbose: progress = float((e * n_iter) + i) / n_total_iter sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100)) sys.stdout.flush() # the gradient of the approximation error grad_approx = np.dot(np.dot(D, Z_batch) - X_batch, Z_batch.T) # the gradient of the incoherence penalty if mu is not None and mu > 0: grad_incoh = 2 * mu * np.dot(D, np.dot(D.T, D) - I) else: grad_incoh = 0 grad = grad_approx D = D - eta * grad # enforce non-negativity if non_neg: D[D < 0] = 0 # project to l2 unit sphere D = norm_cols(D) # sparse coding Z = sparse_coder(X, D) from lyssa.dict_learning.utils import average_mutual_coherence approx_errors.append(approx_error(D, Z, X, n_jobs=n_jobs)) # replace_unused_atoms(A,unused_data,i) if e < n_epochs - 1: print "" print "end of epoch {0}".format(e) error_curr = 0 for i, batch in zip(range(n_iter), cycle(batch_idx)): X_batch = X[:, batch] # sparse coding step Z_batch = sparse_coder(X_batch, D) error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs) if verbose: print "" print "error:", error_curr print "error difference:", (error_curr - error_prev) error_prev = error_curr if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev): patience += 1 if patience >= max_patience: return D if verbose: sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100)) sys.stdout.flush() print "" return D
def online_dict_learn(X, n_atoms, sparse_coder=None, batch_size=None, A=None, B=None, D_init=None, beta=None, n_epochs=1, verbose=False, n_jobs=1, non_neg=False, mmap=False): """ X: the data matrix of shape (n_features,n_samples) n_atoms: the number of atoms in the dictionary sparse_coder: must be an instance of the sparse_coding.sparse_encoder class batch_size: the number of datapoints in each iteration D_init: the initial dictionary. If None, we initialize it with randomly selected datapoints. eta: the learning rate mu: the mutual coherence penalty n_epochs: the number of times we iterate over the dataset non_neg: if set to True, it uses non-negativity constraints n_jobs: the number of CPU threads mmap: if set to True, the algorithm applies memory mapping to save memory Note that a large batch_size implies faster execution but high memory overhead, while a smaller batch_size implies slower execution but low memory overhead """ # dont monitor sparse coding sparse_coder.verbose = False n_features, n_samples = X.shape # initialize using the data if D_init is None: D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True) else: D = D_init print "dictionary initialized" if mmap: D = get_mmap(D) batch_idx = gen_batches(n_samples, batch_size=batch_size) n_batches = len(batch_idx) n_iter = n_batches n_total_iter = n_epochs * n_iter _eps = np.finfo(float).eps if n_jobs > 1: set_openblas_threads(n_jobs) if A is None and B is None: A = np.zeros((n_atoms, n_atoms)) B = np.zeros((n_features, n_atoms)) if beta is None: # create a sequence that converges to one beta = np.linspace(0, 1, num=n_iter) else: beta = np.zeros(n_iter) + beta max_patience = 10 error_curr = 0 error_prev = 0 patience = 0 approx_errors = [] incs = [] for e in range(n_epochs): # cycle over the batches for i, batch in zip(range(n_iter), cycle(batch_idx)): X_batch = X[:, batch] # sparse coding step Z_batch = sparse_coder(X_batch, D) # update A and B A = beta[i] * A + fast_dot(Z_batch, Z_batch.T) B = beta[i] * B + fast_dot(X_batch, Z_batch.T) if verbose: progress = float((e * n_iter) + i) / n_total_iter sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100)) sys.stdout.flush() DA = fast_dot(D, A) # this part could also be parallelized w.r.t the atoms for k in xrange(n_atoms): D[:, k] = (1 / (A[k, k] + _eps)) * (B[:, k] - DA[:, k]) + D[:, k] # enforce non-negativity constraints if non_neg: D[D < 0] = 0 D = norm_cols(D) # replace_unused_atoms(A,unused_data,i) if e < n_epochs - 1: if patience >= max_patience: return D, A, B print "" print "end of epoch {0}".format(e) error_curr = 0 for i, batch in zip(range(n_iter), cycle(batch_idx)): X_batch = X[:, batch] # sparse coding step Z_batch = sparse_coder(X_batch, D) error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs) if verbose: print "" print "error:", error_curr print "error difference:", (error_curr - error_prev) error_prev = error_curr if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev): patience += 1 if verbose: sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100)) sys.stdout.flush() print "" return D, A, B
def lc_ksvd(X, y, D, Q, alpha=1, beta=1, lambda1=1, lambda2=1, sparse_coder=None, max_iter=2, approx=False, mmap=False, verbose=False, n_jobs=1): """ X: the data matrix with shape (n_features,n_samples) y: the vector that contains the label of each datapoint Q: a matrix with shape (n_atoms,n_samples). The element Q_{k,i} is 1 if the ith datapoint and the k atom belong to the same class lambda1: the regularizer for the W matrix i.e lambda1 * ||W||_{2} lambda2: the regularizer for the transformation matrix G i.e lambda2 * ||G||_{2} alpha: the weight we assign for sparse code discrimination beta: is the weight we assign for correct classification: beta*||H - WZ||_{2} """ n_classes = len(set(y)) n_atoms = D.shape[1] n_features, n_samples = X.shape Z = np.zeros((n_atoms, n_samples)) # create the class label matrix # H is the class label matrix which has a # datapoint in each column with H_{c,i}=1 if # the ith datapoint belongs to the cth class H = np.zeros((n_classes, n_samples)).astype(int) for i in xrange(n_samples): H[y[i], i] = 1 if n_jobs > 1: set_openblas_threads(n_jobs) # classifier parameter initialization I = np.eye(n_atoms) # W_{c,:} are the parameters of the linear classifier for the cth class W = np.dot(inv(np.dot(Z, Z.T) + lambda1 * I), np.dot(Z, H.T)).T # The matrix G forces the sparse codes to be discriminative and approximate the matrix Q, # and has shape (n_atoms,n_atoms) G = np.dot(inv(np.dot(Z, Z.T) + lambda2 * I), np.dot(Z, Q.T)).T # stack the data matrix X with class label matrix H # and matrix Q _X = np.vstack((X, np.sqrt(alpha) * Q)) _X = np.vstack((_X, np.sqrt(beta) * H)) if mmap: _X = get_mmap(_X) _normalizer = np.array( [np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])]) D = D / _normalizer G = G / _normalizer W = W / _normalizer # stack the dictionary D with the weight matrix W # and matrix G _D = np.vstack((D, np.sqrt(alpha) * G)) _D = np.vstack((_D, np.sqrt(beta) * W)) if mmap: _D = get_mmap(_D) if verbose: error_curr = 0 error_prev = 0 for it in range(max_iter): print "iteration", it it_start = time.time() if verbose: t_sparse_start = time.time() # sparse coding Z = sparse_coder(X, D) if verbose: t_sparse_duration = time.time() - t_sparse_start print "\nsparse coding took", t_sparse_duration, "seconds" t_dict_start = time.time() _D, _, unused_atoms = ksvd(_X, _D, Z, verbose=True) if verbose: t_dict_duration = time.time() - t_dict_start print "\nK-SVD took", t_dict_duration, "seconds" if verbose: print "number of unused atoms:", len(unused_atoms) D = _D[:n_features, :] G = _D[n_features:n_features + n_atoms, :] W = _D[n_features + n_atoms:, :] _normalizer = np.array( [np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])]) D = D / _normalizer G = G / _normalizer W = W / _normalizer # stack the dictionary D with the weight matrix W # and matrix G _D = np.vstack((D, np.sqrt(alpha) * G)) _D = np.vstack((_D, np.sqrt(beta) * W)) it_duration = time.time() - it_start if verbose: # calculate the approximation error error_curr = approx_error(D, Z, X, n_jobs=2) print "error:", error_curr print "error difference:", (error_curr - error_prev) n_correct = np.array([ y[i] == np.argmax(np.dot(W, Z[:, i])) for i in range(Z.shape[1]) ]).nonzero()[0].size class_acc = n_correct / float(n_samples) print "classification accuracy", class_acc error_prev = error_curr print "duration:", it_duration, "seconds" print "----------------------" return D, Z, W
def lc_ksvd(X, y, D, Q, alpha=1, beta=1, lambda1=1, lambda2=1, sparse_coder=None, max_iter=2, approx=False, mmap=False, verbose=False, n_jobs=1): """ X: the data matrix with shape (n_features,n_samples) y: the vector that contains the label of each datapoint Q: a matrix with shape (n_atoms,n_samples). The element Q_{k,i} is 1 if the ith datapoint and the k atom belong to the same class lambda1: the regularizer for the W matrix i.e lambda1 * ||W||_{2} lambda2: the regularizer for the transformation matrix G i.e lambda2 * ||G||_{2} alpha: the weight we assign for sparse code discrimination beta: is the weight we assign for correct classification: beta*||H - WZ||_{2} """ n_classes = len(set(y)) n_atoms = D.shape[1] n_features, n_samples = X.shape Z = np.zeros((n_atoms, n_samples)) # create the class label matrix # H is the class label matrix which has a # datapoint in each column with H_{c,i}=1 if # the ith datapoint belongs to the cth class H = np.zeros((n_classes, n_samples)).astype(int) for i in xrange(n_samples): H[y[i], i] = 1 if n_jobs > 1: set_openblas_threads(n_jobs) # classifier parameter initialization I = np.eye(n_atoms) # W_{c,:} are the parameters of the linear classifier for the cth class W = np.dot(inv(np.dot(Z, Z.T) + lambda1 * I), np.dot(Z, H.T)).T # The matrix G forces the sparse codes to be discriminative and approximate the matrix Q, # and has shape (n_atoms,n_atoms) G = np.dot(inv(np.dot(Z, Z.T) + lambda2 * I), np.dot(Z, Q.T)).T # stack the data matrix X with class label matrix H # and matrix Q _X = np.vstack((X, np.sqrt(alpha) * Q)) _X = np.vstack((_X, np.sqrt(beta) * H)) if mmap: _X = get_mmap(_X) _normalizer = np.array([np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])]) D = D / _normalizer G = G / _normalizer W = W / _normalizer # stack the dictionary D with the weight matrix W # and matrix G _D = np.vstack((D, np.sqrt(alpha) * G)) _D = np.vstack((_D, np.sqrt(beta) * W)) if mmap: _D = get_mmap(_D) if verbose: error_curr = 0 error_prev = 0 for it in range(max_iter): print "iteration", it it_start = time.time() if verbose: t_sparse_start = time.time() # sparse coding Z = sparse_coder(X, D) if verbose: t_sparse_duration = time.time() - t_sparse_start print "\nsparse coding took", t_sparse_duration, "seconds" t_dict_start = time.time() _D, _, unused_atoms = ksvd(_X, _D, Z, verbose=True) if verbose: t_dict_duration = time.time() - t_dict_start print "\nK-SVD took", t_dict_duration, "seconds" if verbose: print "number of unused atoms:", len(unused_atoms) D = _D[:n_features, :] G = _D[n_features:n_features + n_atoms, :] W = _D[n_features + n_atoms:, :] _normalizer = np.array([np.sqrt(np.dot(D[:, j], D[:, j])) for j in range(D.shape[1])]) D = D / _normalizer G = G / _normalizer W = W / _normalizer # stack the dictionary D with the weight matrix W # and matrix G _D = np.vstack((D, np.sqrt(alpha) * G)) _D = np.vstack((_D, np.sqrt(beta) * W)) it_duration = time.time() - it_start if verbose: # calculate the approximation error error_curr = approx_error(D, Z, X, n_jobs=2) print "error:", error_curr print "error difference:", (error_curr - error_prev) n_correct = np.array([y[i] == np.argmax(np.dot(W, Z[:, i])) for i in range(Z.shape[1])]).nonzero()[0].size class_acc = n_correct / float(n_samples) print "classification accuracy", class_acc error_prev = error_curr print "duration:", it_duration, "seconds" print "----------------------" return D, Z, W
def projected_grad_desc(X, n_atoms=None, sparse_coder=None, batch_size=None, D_init=None, eta=None, mu=None, n_epochs=None, non_neg=False, verbose=False, n_jobs=1, mmap=False): """ X: the data matrix of shape (n_features,n_samples) n_atoms: the number of atoms in the dictionary sparse_coder: must be an instance of the sparse_coding.sparse_encoder class batch_size: the number of datapoints in each iteration D_init: the initial dictionary. If None, we initialize it with randomly selected datapoints. eta: the learning rate mu: the mutual coherence penalty n_epochs: the number of times we iterate over the dataset non_neg: if set to True, it uses non-negativity constraints n_jobs: the number of CPU threads mmap: if set to True, the algorithm applies memory mapping to save memory Note that a large batch_size implies faster execution but high memory overhead, while a smaller batch_size implies slower execution but low memory overhead """ if eta is None: raise ValueError('Must specify learning rate.') # don't monitor sparse coding sparse_coder.verbose = False n_features, n_samples = X.shape # initialize the dictionary # with the dataset if D_init is None: D, unused_data = init_dictionary(X, n_atoms, method='data', return_unused_data=True) else: D = D_init print "dictionary initialized" if mmap: D = get_mmap(D) batch_idx = gen_batches(n_samples, batch_size=batch_size) n_batches = len(batch_idx) n_iter = n_batches n_total_iter = n_epochs * n_iter I = np.eye(n_atoms) if n_batches > n_iter: print "will iterate on only {0:.2f}% of the dataset".format((float(n_iter) / n_batches) * 100) if n_jobs > 1: set_openblas_threads(n_jobs) max_patience = 10 error_prev = 0 patience = 0 approx_errors = [] for e in range(n_epochs): # cycle over the batches for i, batch in zip(range(n_iter), cycle(batch_idx)): X_batch = X[:, batch] # sparse coding step Z_batch = sparse_coder(X_batch, D) if verbose: progress = float((e * n_iter) + i) / n_total_iter sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (progress * 100)) sys.stdout.flush() # the gradient of the approximation error grad_approx = np.dot(np.dot(D, Z_batch) - X_batch, Z_batch.T) # the gradient of the incoherence penalty if mu is not None and mu > 0: grad_incoh = 2 * mu * np.dot(D, np.dot(D.T, D) - I) else: grad_incoh = 0 grad = grad_approx D = D - (eta * grad) + grad_incoh # enforce non-negativity if non_neg: D[D < 0] = 0 # project to l2 unit sphere D = norm_cols(D) # sparse coding Z = sparse_coder(X, D) #replace_unused_atoms(A,unused_data,i) if e < n_epochs - 1: print "" print "end of epoch {0}".format(e) error_curr = 0 for i, batch in zip(range(n_iter), cycle(batch_idx)): X_batch = X[:, batch] # sparse coding step Z_batch = sparse_coder(X_batch, D) error_curr += approx_error(D, Z_batch, X_batch, n_jobs=n_jobs) if verbose: print "" print "error:", error_curr print "error difference:", (error_curr - error_prev) error_prev = error_curr if (e > 0) and (error_curr > 0.9 * error_prev or error_curr > error_prev): patience += 1 if patience >= max_patience: return D if verbose: sys.stdout.write("\r" + "dictionary learning" + "...:%3.2f%%" % (100)) sys.stdout.flush() print "" return D