Ejemplo n.º 1
0
 def set_sampling_method(self, sampling_method):
     if sampling_method=='random':
         self._sampling_method = RandomSampling(self._func, k_to_all=False)
     elif sampling_method=='farthest':
         self._sampling_method = FarthestSampling(self._func, k_to_all=False)
     else:
         raise ValueError('Unknown sampling method')
Ejemplo n.º 2
0
 def __init__(self, k, func=None, rcond=None, sampling_method='random'):
     self._func = func
     self._k = k
     self._rcond = rcond
     if sampling_method == 'random':
         self._sampling_method = RandomSampling(func, k_to_all=True)
     elif sampling_method == 'farthest':
         self._sampling_method = FarthestSampling(func, k_to_all=True)
     else:
         raise ValueError('Unknown sampling method')
Ejemplo n.º 3
0
 def __init__(self,
              l,
              func,
              soft_p,
              sampling_method="farthest",
              interp_sq=True,
              **kwargs):
     super(MF, self).__init__(l=l,
                              func=func,
                              soft_p=soft_p,
                              sampling_method=sampling_method,
                              interp_sq=interp_sq,
                              **kwargs)
     # Make sure we get k-to-all sampling
     self._sampling_method = FarthestSampling(func, k_to_all=True)
Ejemplo n.º 4
0
 def __init__(self,
              k,
              l,
              distFun,
              applyPCA=True,
              seed=0,
              sampling_method='random'):
     super(LMDS, self).__init__(k)
     self._l = l
     self._distance = distFun
     self._applyPCA = applyPCA
     self._seed = seed
     if sampling_method == 'random':
         self._sampling_method = RandomSampling(distFun, k_to_all=True)
     elif sampling_method == 'farthest':
         self._sampling_method = FarthestSampling(distFun, k_to_all=True)
     else:
         raise ValueError('Unknown sampling method')
Ejemplo n.º 5
0
class Nystrom(SymMatrixApprox):
    def __init__(self, k, func=None, rcond=None, sampling_method='random'):
        self._func = func
        self._k = k
        self._rcond = rcond
        if sampling_method == 'random':
            self._sampling_method = RandomSampling(func, k_to_all=True)
        elif sampling_method == 'farthest':
            self._sampling_method = FarthestSampling(func, k_to_all=True)
        else:
            raise ValueError('Unknown sampling method')

    def fit(self, X, y=None, chosen_inds=None):
        n = X.shape[0]

        if self._func is None:
            raise ValueError('function==None is not accepted.')

        if chosen_inds is None:
            chosen_inds, C = self._sampling_method.sample(X, self._k)
        else:
            C = self._func(X, chosen_inds, np.arange(n))

        W = C[chosen_inds, :]
        Wpinv = la.pinvh(W, rcond=self._rcond)
        self._n = n
        self._C = C
        self._idxs = chosen_inds
        self._Wpinv = Wpinv

    def get_row(self, i):
        return self._C[i, :].dot(self._Wpinv).dot(self._C.T)

    def get_rows(self, rows):
        return self._C[rows, :].dot(self._Wpinv).dot(self._C.T)

    def get_size(self):
        return self._n

    def get_memory(self):
        return (self._C.shape[0] * self._C.shape[1] +
                self._Wpinv.shape[0] * self._Wpinv.shape[1]) * 8

    def get_name(self):
        return 'nystrom'
Ejemplo n.º 6
0
class LMDS(Scaling):
    """
    Landmark MultiDimensional Scaling

    References:
        [1]_ "Sparse multidimensional scaling using landmark points"; V. de Silva and J. B. Tenenbaum

    """
    def __init__(self,
                 k,
                 l,
                 distFun,
                 applyPCA=True,
                 seed=0,
                 sampling_method='random'):
        super(LMDS, self).__init__(k)
        self._l = l
        self._distance = distFun
        self._applyPCA = applyPCA
        self._seed = seed
        if sampling_method == 'random':
            self._sampling_method = RandomSampling(distFun, k_to_all=True)
        elif sampling_method == 'farthest':
            self._sampling_method = FarthestSampling(distFun, k_to_all=True)
        else:
            raise ValueError('Unknown sampling method')

    def fit(self, data):
        # Get the number of samples
        n = data.shape[0]

        # Select the landmarks
        landmarks, deltas = self._sampling_method.sample(data, self._l)

        # Apply Classical MDS
        # Compute distance between the landmarks
        landmark_deltas = deltas[landmarks, :]
        landmark_deltas = (landmark_deltas + landmark_deltas.T) / 2.0

        # Compute the means and center around these
        landmark_mu_i = landmark_deltas.mean(axis=1)[:, np.newaxis]
        landmark_mu = landmark_mu_i.mean()
        B = -(landmark_deltas - landmark_mu_i - landmark_mu_i.T +
              landmark_mu) / 2

        w, v = scipy.linalg.eigh(B, eigvals=(self._l - self._k, self._l - 1))
        k_final = np.sum(w > 0)
        L = v[:, w > 0].dot(np.diag(np.sqrt(w[w > 0])))

        # Apply distance based triangulation
        L_pinv = v[:, w > 0].dot(np.diag(1 / np.sqrt(w[w > 0])))
        X = -L_pinv.T.dot((deltas.T - landmark_mu_i) / 2)

        # Save the resulting model
        self._n = n
        self._k_final = k_final
        self._L = L

        # Apply PCA normalization
        if self._applyPCA:
            self.Z = self._compute_pca(X, k_final).T
        else:
            self.Z = X.T

    def transform(self, data):
        return self.Z

    @staticmethod
    def _compute_pca(X, k):
        X_mean = X.mean(axis=1)
        X_bar = X - X_mean[:, np.newaxis]
        _, U = scipy.linalg.eigh(X_bar.dot(X_bar.T), eigvals=(0, k - 1))
        return U.T.dot(X_bar)

    def get_memory(self):
        l = self._l
        n = self._n
        return (n * l + l * l + self._k + self._k * l + l) * 8

    def get_name(self):
        return 'lmds'
Ejemplo n.º 7
0
class BHA(SymMatrixApprox):
    def __init__(self, l, func=None, nnz_row=None, threshold=None, 
                 sparse_direction='cols', fit_group=True, 
                 sampling_method='farthest', interp_sq=False, soft_p=None):
        self._l = l
        self._func = func
        self._nnz_row = nnz_row
        self._threshold = threshold
        self._sparse_direction = sparse_direction
        self._fit_group = fit_group
        self.set_sampling_method(sampling_method)
        self._interp_sq = interp_sq # interpolate squared distance, then sqrt?
        self._soft_p = soft_p

    def set_sampling_method(self, sampling_method):
        if sampling_method=='random':
            self._sampling_method = RandomSampling(self._func, k_to_all=False)
        elif sampling_method=='farthest':
            self._sampling_method = FarthestSampling(self._func, k_to_all=False)
        else:
            raise ValueError('Unknown sampling method')

    def preprocessing(self, pts, polys):
        self._pts = pts
        self._polys = polys
        self._surface = Surface(pts, polys)

        B, D, lapW, lapV = self._surface.laplace_operator
        npt = len(D)
        Dinv = sparse.dia_matrix((D ** -1, [0]), (npt, npt)).tocsr()  # construct Dinv

        self._M = (lapV - lapW).dot(Dinv.dot(lapV - lapW))

        self._lapW = lapW
        self._lapV = lapV
        self._Dinv = Dinv

    def get_W(self, X, Winds):
        W = self._func(X, Winds, Winds)

        if self._interp_sq:
            return W ** 2
        else:
            return W

    def fit(self, X):
        #(X, Winds, K, t):
        n, d = X.shape
        self._n = n

        # If not computed, then compute the Nearest Neighbor graph
        # and the Laplace-Beltrami Operator.
        # if self._M is None:
        #     self.preprocessing(X)
        if "_Winds" not in dir(self) or self._Winds is None:
            Winds, W = self._sampling_method.sample(X, self._l)
            nonWinds = np.setdiff1d(np.arange(n), Winds)  # non-landmark points
            self._Winds = Winds
            self._nonWinds = nonWinds
            # square the distance matrix if we need to do that
            if self._interp_sq:
                W = W ** 2
        else:
            Winds = self._Winds
            nonWinds = self._nonWinds
            # compute kernel, W
            W = self.get_W(X, Winds)

        self._W = (W + W.T) / 2.0
        
        # compute P
        self.compute_P()

    def compute_P(self):
        Winds = self._Winds
        nonWinds = self._nonWinds
        n = self._M.shape[0]
        
        # pull out part of M for unselected points
        M_aa = self._M[nonWinds,:][:,nonWinds].tocsc()
        
        # pull out part of M that crosses selected and unselected points
        M_ab = self._M[Winds,:][:,nonWinds]

        if self._nnz_row is not None:
            if self._sparse_direction == 'cols':
                self._threshold = self._nnz_row * (n-self._l) // self._l
            else:
                self._threshold = self._nnz_row
        
        try:
            from sksparse.cholmod import cholesky
            solve_method = 'cholmod'
        except ImportError:
            solve_method = 'spsolve'

        # compute Pprime, part of the dense interpolation matrix
        if self._threshold is None:
            if solve_method == 'spsolve':
                Pprime = sparse.linalg.spsolve(M_aa, -M_ab.T)
            elif solve_method == 'cholmod':
                Pprime = cholesky(M_aa).solve_A(-M_ab.T)
        
            # compute P, the full dense interpolation matrix
            P = np.zeros((n, self._l))
            P[nonWinds,:] = Pprime.todense()
            P[Winds,:] = np.eye(self._l)
            Pnnz = n * self._l

            if self._soft_p is not None:
                # don't force P to be exactly identity for known points,
                # allow it to fudge a little
                print("Softening P..")
                M_bb = self._M[Winds,:][:,Winds]
                soft_eye = sparse.eye(self._l) * self._soft_p
                to_invert = (M_bb + soft_eye + M_ab.dot(Pprime)).todense()
                soft_factor = np.linalg.inv(to_invert) * self._soft_p
                P = P.dot(soft_factor).A

        else:
            # Compute the sparse bha
            if solve_method == 'cholmod':
                chol_M_aa = cholesky(M_aa)

            if self._sparse_direction == 'rows':
                thresh = THR_ROWS(k=self._threshold)
                Prows = np.empty(self._threshold*(n-self._l)+self._l, dtype=int)
                Pcols = np.empty(self._threshold*(n-self._l)+self._l, dtype=int)
                Pvals = np.empty(self._threshold*(n-self._l)+self._l)
            else:
                thresh = THR(k=self._threshold)
                Prows = np.empty(self._threshold*self._l+self._l, dtype=int)
                Pcols = np.empty(self._threshold*self._l+self._l, dtype=int)
                Pvals = np.empty(self._threshold*self._l+self._l)
            chunk_size = 64  # min(self._l // self._njobs, 64)
            chunks = self._l // chunk_size + ((self._l % chunk_size) > 0)

            for chunk in counter(range(chunks)):
                start = chunk*chunk_size
                end = min(((chunk+1)*chunk_size, self._l))
                if solve_method == 'spsolve':
                    sol = sparse.linalg.spsolve(M_aa, -M_ab.T[:, start:end].toarray())
                elif solve_method == 'cholmod':
                    sol = chol_M_aa.solve_A(-M_ab.T[:, start:end].toarray())

                if self._sparse_direction == 'rows':
                    thresh.fit(sol)
                else:
                    if self._fit_group:
                        l_i = 0
                        for l in range(start,end):
                            thresh.fit_partition(sol[:, l_i])
                            Prows[l*self._threshold:(l+1)*self._threshold] = nonWinds[thresh._idxs]
                            Pvals[l*self._threshold:(l+1)*self._threshold] = thresh._vals
                            l_i += 1
                    else:
                        l_i = 0
                        for l in range(start,end):
                            thresh.fit(sol[:, l_i])
                            Prows[l*self._threshold:(l+1)*self._threshold] = nonWinds[thresh._idxs]
                            Pvals[l*self._threshold:(l+1)*self._threshold] = thresh._vals
                            l_i += 1

            if self._sparse_direction == 'rows':
                cols, vals = thresh.get_best_k()
                Prows[:(n-self._l)*self._threshold] = np.repeat(nonWinds[np.arange(n-self._l)],self._threshold)
                Pcols[:(n-self._l)*self._threshold] = cols
                Pvals[:(n-self._l)*self._threshold] = vals
                lastnonWindElement = (n-self._l)*self._threshold
            else:
                Pcols[:self._l*self._threshold] = np.repeat(np.arange(self._l),self._threshold)
                lastnonWindElement = self._l*self._threshold

            # add the identity for indices in W
            Prows[lastnonWindElement:] = Winds
            Pcols[lastnonWindElement:] = np.arange(self._l)
            Pvals[lastnonWindElement:] = 1.0

            P = sparse.csr_matrix((Pvals,(Prows, Pcols)), shape=(n,self._l))
            P.eliminate_zeros()
            Pnnz = P.nnz

        # save values
        self._nnz = Pnnz
        self._P = P

    def reconstruct(self, approx):
        """Reconstruct the data from the approximation. Takes the square root
        of the approximation if we are approximating the squared matrix.
        """
        if self._interp_sq:
            return np.sqrt(np.clip(approx, 0, np.inf))
        else:
            return approx
    
    def transform(self):
        if self._threshold is None:
            Kmanifold = self._P.dot(self._W).dot(self._P.T)
        else:
            Kmanifold = self._P.dot(self._P.dot(self._W).T)

        return self.reconstruct(Kmanifold)

    def get_row(self, i):
        if self._threshold is None:
            approx = self._P[i,:].dot(self._W).dot(self._P.T)
        else:
            approx = self._P.dot(self._P[i,:].dot(self._W).T).T

        return self.reconstruct(approx)

    def get_rows(self, rows):
        if self._threshold is None:
            approx = self._P[rows,:].dot(self._W).dot(self._P.T)
        else:
            approx = self._P.dot(self._P[rows,:].dot(self._W).T).T

        return self.reconstruct(approx)

    def get_size(self):
        return self._n

    def set_l(self, l):
        self._l = l
        # reset the solution
        self._nonWinds = None
        self._Winds = None
        self._threshold = None
        self._n = None
        self._nnz = 0
        self._P = None
        self._W = None

    def set_nnz_row(self, nnz_row=None):
        self._nnz_row = nnz_row
        # reset the solution
        self._threshold = None
        self._n = None
        self._nnz = 0
        self._P = None
        self._W = None

    def get_memory(self):
        if self._nnz_row == None:
            return (self._W.shape[0] * self._W.shape[1] + 
                    self._P.shape[0] * self._P.shape[1]) * 8
        else:
            return (self._W.shape[0] * self._W.shape[1] + self._P.data.shape[0] + 
                    self._P.indices.shape[0]//2 + self._P.indptr.shape[0]//2) * 8

    def get_name(self):
        if self._nnz_row == None:
            return 'bha'
        else:
            return 'sbha' + str(self._nnz_row)

    def reset(self, preprocessing=False):
        # reset the solution
        if preprocessing:
            self._M = None
            self._lapW = None
            self._lapV = None
            self._Dinv = None
        self._nonWinds = None
        self._Winds = None

    @classmethod
    def from_surface(cls, pts, polys, l, nnz_row=150, m=1.0, **kwargs):

        # create MeshKLazy object that computes geodesics
        meshk = MeshKLazy(m=m)
        meshk.fit(pts, polys)

        # create function that we'll pass to BHA object
        def geodesic(_, source=None, dest=None):
            if source is None and dest is None:
                return np.vstack([meshk.get_row(i) for i in range(len(pts))])
            elif dest is None:
                return meshk.get_rows(source).T
            else:
                return meshk.get_rows(source)[:,dest].T

        # create BHA object
        bha = cls(l, geodesic, nnz_row=nnz_row, **kwargs)

        # preprocess & fit
        bha.preprocessing(pts, polys)
        bha.fit(meshk)

        return bha
Ejemplo n.º 8
0
class MF(BHA):
    def __init__(self,
                 l,
                 func,
                 soft_p,
                 sampling_method="farthest",
                 interp_sq=True,
                 **kwargs):
        super(MF, self).__init__(l=l,
                                 func=func,
                                 soft_p=soft_p,
                                 sampling_method=sampling_method,
                                 interp_sq=interp_sq,
                                 **kwargs)
        # Make sure we get k-to-all sampling
        self._sampling_method = FarthestSampling(func, k_to_all=True)

    def fit(self, X):
        ## The following is adapted from BHA.fit
        n, d = X.shape
        self._n = n

        # If not computed, then compute the Nearest Neighbor graph
        # and the Laplace-Beltrami Operator.
        if self._M is None:
            self.preprocessing(X)
        # if "_Winds" not in dir(self) or self._Winds is None:
        Winds, F = self._sampling_method.sample(X, self._l)
        nonWinds = np.setdiff1d(np.arange(n), Winds)  # non-landmark points
        self._Winds = Winds
        self._nonWinds = nonWinds
        # else:
        #     raise NotImplementedError
        #     Winds = self._Winds
        #     nonWinds = self._nonWinds
        # #     compute kernel, W
        #     W = self.get_W(X, Winds)

        # store entire columns in F
        # F = self._func(X, self._Winds).T
        if self._interp_sq:
            self._F = F.T**2
        else:
            self._F = F.T

        # compute P
        self.compute_P()

    def transform(self):
        MF = self._P.dot(self._F)
        return self.reconstruct(0.5 * (MF + MF.T))

    def get_row(self, i):
        return self.reconstruct(0.5 * self._P[i, :].dot(self._F) +
                                0.5 * self._P.dot(self._F[:, i]))

    def get_rows(self, rows):
        return self.reconstruct(0.5 * self._P[rows, :].dot(self._F) +
                                0.5 * self._P.dot(self._F[:, rows]).T)

    def get_memory(self):
        if self._nnz_row == None:
            return (self._F.shape[0] * self._F.shape[1] +
                    self._P.shape[0] * self._P.shape[1]) * 8
        else:
            return (self._F.shape[0] * self._F.shape[1] +
                    self._P.data.shape[0] + self._P.indices.shape[0] // 2 +
                    self._P.indptr.shape[0] // 2) * 8

    def get_name(self):
        return 'mf'

    def reset(self, preprocessing=False):
        # reset the solution
        self._nonWinds = None
        self._Winds = None