def set_sampling_method(self, sampling_method): if sampling_method=='random': self._sampling_method = RandomSampling(self._func, k_to_all=False) elif sampling_method=='farthest': self._sampling_method = FarthestSampling(self._func, k_to_all=False) else: raise ValueError('Unknown sampling method')
def __init__(self, k, func=None, rcond=None, sampling_method='random'): self._func = func self._k = k self._rcond = rcond if sampling_method == 'random': self._sampling_method = RandomSampling(func, k_to_all=True) elif sampling_method == 'farthest': self._sampling_method = FarthestSampling(func, k_to_all=True) else: raise ValueError('Unknown sampling method')
def __init__(self, l, func, soft_p, sampling_method="farthest", interp_sq=True, **kwargs): super(MF, self).__init__(l=l, func=func, soft_p=soft_p, sampling_method=sampling_method, interp_sq=interp_sq, **kwargs) # Make sure we get k-to-all sampling self._sampling_method = FarthestSampling(func, k_to_all=True)
def __init__(self, k, l, distFun, applyPCA=True, seed=0, sampling_method='random'): super(LMDS, self).__init__(k) self._l = l self._distance = distFun self._applyPCA = applyPCA self._seed = seed if sampling_method == 'random': self._sampling_method = RandomSampling(distFun, k_to_all=True) elif sampling_method == 'farthest': self._sampling_method = FarthestSampling(distFun, k_to_all=True) else: raise ValueError('Unknown sampling method')
class Nystrom(SymMatrixApprox): def __init__(self, k, func=None, rcond=None, sampling_method='random'): self._func = func self._k = k self._rcond = rcond if sampling_method == 'random': self._sampling_method = RandomSampling(func, k_to_all=True) elif sampling_method == 'farthest': self._sampling_method = FarthestSampling(func, k_to_all=True) else: raise ValueError('Unknown sampling method') def fit(self, X, y=None, chosen_inds=None): n = X.shape[0] if self._func is None: raise ValueError('function==None is not accepted.') if chosen_inds is None: chosen_inds, C = self._sampling_method.sample(X, self._k) else: C = self._func(X, chosen_inds, np.arange(n)) W = C[chosen_inds, :] Wpinv = la.pinvh(W, rcond=self._rcond) self._n = n self._C = C self._idxs = chosen_inds self._Wpinv = Wpinv def get_row(self, i): return self._C[i, :].dot(self._Wpinv).dot(self._C.T) def get_rows(self, rows): return self._C[rows, :].dot(self._Wpinv).dot(self._C.T) def get_size(self): return self._n def get_memory(self): return (self._C.shape[0] * self._C.shape[1] + self._Wpinv.shape[0] * self._Wpinv.shape[1]) * 8 def get_name(self): return 'nystrom'
class LMDS(Scaling): """ Landmark MultiDimensional Scaling References: [1]_ "Sparse multidimensional scaling using landmark points"; V. de Silva and J. B. Tenenbaum """ def __init__(self, k, l, distFun, applyPCA=True, seed=0, sampling_method='random'): super(LMDS, self).__init__(k) self._l = l self._distance = distFun self._applyPCA = applyPCA self._seed = seed if sampling_method == 'random': self._sampling_method = RandomSampling(distFun, k_to_all=True) elif sampling_method == 'farthest': self._sampling_method = FarthestSampling(distFun, k_to_all=True) else: raise ValueError('Unknown sampling method') def fit(self, data): # Get the number of samples n = data.shape[0] # Select the landmarks landmarks, deltas = self._sampling_method.sample(data, self._l) # Apply Classical MDS # Compute distance between the landmarks landmark_deltas = deltas[landmarks, :] landmark_deltas = (landmark_deltas + landmark_deltas.T) / 2.0 # Compute the means and center around these landmark_mu_i = landmark_deltas.mean(axis=1)[:, np.newaxis] landmark_mu = landmark_mu_i.mean() B = -(landmark_deltas - landmark_mu_i - landmark_mu_i.T + landmark_mu) / 2 w, v = scipy.linalg.eigh(B, eigvals=(self._l - self._k, self._l - 1)) k_final = np.sum(w > 0) L = v[:, w > 0].dot(np.diag(np.sqrt(w[w > 0]))) # Apply distance based triangulation L_pinv = v[:, w > 0].dot(np.diag(1 / np.sqrt(w[w > 0]))) X = -L_pinv.T.dot((deltas.T - landmark_mu_i) / 2) # Save the resulting model self._n = n self._k_final = k_final self._L = L # Apply PCA normalization if self._applyPCA: self.Z = self._compute_pca(X, k_final).T else: self.Z = X.T def transform(self, data): return self.Z @staticmethod def _compute_pca(X, k): X_mean = X.mean(axis=1) X_bar = X - X_mean[:, np.newaxis] _, U = scipy.linalg.eigh(X_bar.dot(X_bar.T), eigvals=(0, k - 1)) return U.T.dot(X_bar) def get_memory(self): l = self._l n = self._n return (n * l + l * l + self._k + self._k * l + l) * 8 def get_name(self): return 'lmds'
class BHA(SymMatrixApprox): def __init__(self, l, func=None, nnz_row=None, threshold=None, sparse_direction='cols', fit_group=True, sampling_method='farthest', interp_sq=False, soft_p=None): self._l = l self._func = func self._nnz_row = nnz_row self._threshold = threshold self._sparse_direction = sparse_direction self._fit_group = fit_group self.set_sampling_method(sampling_method) self._interp_sq = interp_sq # interpolate squared distance, then sqrt? self._soft_p = soft_p def set_sampling_method(self, sampling_method): if sampling_method=='random': self._sampling_method = RandomSampling(self._func, k_to_all=False) elif sampling_method=='farthest': self._sampling_method = FarthestSampling(self._func, k_to_all=False) else: raise ValueError('Unknown sampling method') def preprocessing(self, pts, polys): self._pts = pts self._polys = polys self._surface = Surface(pts, polys) B, D, lapW, lapV = self._surface.laplace_operator npt = len(D) Dinv = sparse.dia_matrix((D ** -1, [0]), (npt, npt)).tocsr() # construct Dinv self._M = (lapV - lapW).dot(Dinv.dot(lapV - lapW)) self._lapW = lapW self._lapV = lapV self._Dinv = Dinv def get_W(self, X, Winds): W = self._func(X, Winds, Winds) if self._interp_sq: return W ** 2 else: return W def fit(self, X): #(X, Winds, K, t): n, d = X.shape self._n = n # If not computed, then compute the Nearest Neighbor graph # and the Laplace-Beltrami Operator. # if self._M is None: # self.preprocessing(X) if "_Winds" not in dir(self) or self._Winds is None: Winds, W = self._sampling_method.sample(X, self._l) nonWinds = np.setdiff1d(np.arange(n), Winds) # non-landmark points self._Winds = Winds self._nonWinds = nonWinds # square the distance matrix if we need to do that if self._interp_sq: W = W ** 2 else: Winds = self._Winds nonWinds = self._nonWinds # compute kernel, W W = self.get_W(X, Winds) self._W = (W + W.T) / 2.0 # compute P self.compute_P() def compute_P(self): Winds = self._Winds nonWinds = self._nonWinds n = self._M.shape[0] # pull out part of M for unselected points M_aa = self._M[nonWinds,:][:,nonWinds].tocsc() # pull out part of M that crosses selected and unselected points M_ab = self._M[Winds,:][:,nonWinds] if self._nnz_row is not None: if self._sparse_direction == 'cols': self._threshold = self._nnz_row * (n-self._l) // self._l else: self._threshold = self._nnz_row try: from sksparse.cholmod import cholesky solve_method = 'cholmod' except ImportError: solve_method = 'spsolve' # compute Pprime, part of the dense interpolation matrix if self._threshold is None: if solve_method == 'spsolve': Pprime = sparse.linalg.spsolve(M_aa, -M_ab.T) elif solve_method == 'cholmod': Pprime = cholesky(M_aa).solve_A(-M_ab.T) # compute P, the full dense interpolation matrix P = np.zeros((n, self._l)) P[nonWinds,:] = Pprime.todense() P[Winds,:] = np.eye(self._l) Pnnz = n * self._l if self._soft_p is not None: # don't force P to be exactly identity for known points, # allow it to fudge a little print("Softening P..") M_bb = self._M[Winds,:][:,Winds] soft_eye = sparse.eye(self._l) * self._soft_p to_invert = (M_bb + soft_eye + M_ab.dot(Pprime)).todense() soft_factor = np.linalg.inv(to_invert) * self._soft_p P = P.dot(soft_factor).A else: # Compute the sparse bha if solve_method == 'cholmod': chol_M_aa = cholesky(M_aa) if self._sparse_direction == 'rows': thresh = THR_ROWS(k=self._threshold) Prows = np.empty(self._threshold*(n-self._l)+self._l, dtype=int) Pcols = np.empty(self._threshold*(n-self._l)+self._l, dtype=int) Pvals = np.empty(self._threshold*(n-self._l)+self._l) else: thresh = THR(k=self._threshold) Prows = np.empty(self._threshold*self._l+self._l, dtype=int) Pcols = np.empty(self._threshold*self._l+self._l, dtype=int) Pvals = np.empty(self._threshold*self._l+self._l) chunk_size = 64 # min(self._l // self._njobs, 64) chunks = self._l // chunk_size + ((self._l % chunk_size) > 0) for chunk in counter(range(chunks)): start = chunk*chunk_size end = min(((chunk+1)*chunk_size, self._l)) if solve_method == 'spsolve': sol = sparse.linalg.spsolve(M_aa, -M_ab.T[:, start:end].toarray()) elif solve_method == 'cholmod': sol = chol_M_aa.solve_A(-M_ab.T[:, start:end].toarray()) if self._sparse_direction == 'rows': thresh.fit(sol) else: if self._fit_group: l_i = 0 for l in range(start,end): thresh.fit_partition(sol[:, l_i]) Prows[l*self._threshold:(l+1)*self._threshold] = nonWinds[thresh._idxs] Pvals[l*self._threshold:(l+1)*self._threshold] = thresh._vals l_i += 1 else: l_i = 0 for l in range(start,end): thresh.fit(sol[:, l_i]) Prows[l*self._threshold:(l+1)*self._threshold] = nonWinds[thresh._idxs] Pvals[l*self._threshold:(l+1)*self._threshold] = thresh._vals l_i += 1 if self._sparse_direction == 'rows': cols, vals = thresh.get_best_k() Prows[:(n-self._l)*self._threshold] = np.repeat(nonWinds[np.arange(n-self._l)],self._threshold) Pcols[:(n-self._l)*self._threshold] = cols Pvals[:(n-self._l)*self._threshold] = vals lastnonWindElement = (n-self._l)*self._threshold else: Pcols[:self._l*self._threshold] = np.repeat(np.arange(self._l),self._threshold) lastnonWindElement = self._l*self._threshold # add the identity for indices in W Prows[lastnonWindElement:] = Winds Pcols[lastnonWindElement:] = np.arange(self._l) Pvals[lastnonWindElement:] = 1.0 P = sparse.csr_matrix((Pvals,(Prows, Pcols)), shape=(n,self._l)) P.eliminate_zeros() Pnnz = P.nnz # save values self._nnz = Pnnz self._P = P def reconstruct(self, approx): """Reconstruct the data from the approximation. Takes the square root of the approximation if we are approximating the squared matrix. """ if self._interp_sq: return np.sqrt(np.clip(approx, 0, np.inf)) else: return approx def transform(self): if self._threshold is None: Kmanifold = self._P.dot(self._W).dot(self._P.T) else: Kmanifold = self._P.dot(self._P.dot(self._W).T) return self.reconstruct(Kmanifold) def get_row(self, i): if self._threshold is None: approx = self._P[i,:].dot(self._W).dot(self._P.T) else: approx = self._P.dot(self._P[i,:].dot(self._W).T).T return self.reconstruct(approx) def get_rows(self, rows): if self._threshold is None: approx = self._P[rows,:].dot(self._W).dot(self._P.T) else: approx = self._P.dot(self._P[rows,:].dot(self._W).T).T return self.reconstruct(approx) def get_size(self): return self._n def set_l(self, l): self._l = l # reset the solution self._nonWinds = None self._Winds = None self._threshold = None self._n = None self._nnz = 0 self._P = None self._W = None def set_nnz_row(self, nnz_row=None): self._nnz_row = nnz_row # reset the solution self._threshold = None self._n = None self._nnz = 0 self._P = None self._W = None def get_memory(self): if self._nnz_row == None: return (self._W.shape[0] * self._W.shape[1] + self._P.shape[0] * self._P.shape[1]) * 8 else: return (self._W.shape[0] * self._W.shape[1] + self._P.data.shape[0] + self._P.indices.shape[0]//2 + self._P.indptr.shape[0]//2) * 8 def get_name(self): if self._nnz_row == None: return 'bha' else: return 'sbha' + str(self._nnz_row) def reset(self, preprocessing=False): # reset the solution if preprocessing: self._M = None self._lapW = None self._lapV = None self._Dinv = None self._nonWinds = None self._Winds = None @classmethod def from_surface(cls, pts, polys, l, nnz_row=150, m=1.0, **kwargs): # create MeshKLazy object that computes geodesics meshk = MeshKLazy(m=m) meshk.fit(pts, polys) # create function that we'll pass to BHA object def geodesic(_, source=None, dest=None): if source is None and dest is None: return np.vstack([meshk.get_row(i) for i in range(len(pts))]) elif dest is None: return meshk.get_rows(source).T else: return meshk.get_rows(source)[:,dest].T # create BHA object bha = cls(l, geodesic, nnz_row=nnz_row, **kwargs) # preprocess & fit bha.preprocessing(pts, polys) bha.fit(meshk) return bha
class MF(BHA): def __init__(self, l, func, soft_p, sampling_method="farthest", interp_sq=True, **kwargs): super(MF, self).__init__(l=l, func=func, soft_p=soft_p, sampling_method=sampling_method, interp_sq=interp_sq, **kwargs) # Make sure we get k-to-all sampling self._sampling_method = FarthestSampling(func, k_to_all=True) def fit(self, X): ## The following is adapted from BHA.fit n, d = X.shape self._n = n # If not computed, then compute the Nearest Neighbor graph # and the Laplace-Beltrami Operator. if self._M is None: self.preprocessing(X) # if "_Winds" not in dir(self) or self._Winds is None: Winds, F = self._sampling_method.sample(X, self._l) nonWinds = np.setdiff1d(np.arange(n), Winds) # non-landmark points self._Winds = Winds self._nonWinds = nonWinds # else: # raise NotImplementedError # Winds = self._Winds # nonWinds = self._nonWinds # # compute kernel, W # W = self.get_W(X, Winds) # store entire columns in F # F = self._func(X, self._Winds).T if self._interp_sq: self._F = F.T**2 else: self._F = F.T # compute P self.compute_P() def transform(self): MF = self._P.dot(self._F) return self.reconstruct(0.5 * (MF + MF.T)) def get_row(self, i): return self.reconstruct(0.5 * self._P[i, :].dot(self._F) + 0.5 * self._P.dot(self._F[:, i])) def get_rows(self, rows): return self.reconstruct(0.5 * self._P[rows, :].dot(self._F) + 0.5 * self._P.dot(self._F[:, rows]).T) def get_memory(self): if self._nnz_row == None: return (self._F.shape[0] * self._F.shape[1] + self._P.shape[0] * self._P.shape[1]) * 8 else: return (self._F.shape[0] * self._F.shape[1] + self._P.data.shape[0] + self._P.indices.shape[0] // 2 + self._P.indptr.shape[0] // 2) * 8 def get_name(self): return 'mf' def reset(self, preprocessing=False): # reset the solution self._nonWinds = None self._Winds = None