def _bootstrap_problem(A, k, minstepsize=1e-9): m, n = A.shape manifold = FixedRankEmbeeded(m, n, k) #solver = TrustRegions(maxiter=500, minstepsize=1e-6) solver = SteepestDescent(maxiter=500, minstepsize=minstepsize) #solver = ConjugateGradient(maxiter=500, minstepsize=minstepsize) return manifold, solver
def fit(self): v_matrix_shape = (self.w_matrix.shape[0], self.w_matrix.shape[1]) w_matrix = tf.convert_to_tensor(self.w_matrix, dtype=tf.float64) z_matrix = tf.convert_to_tensor(self.z_matrix, dtype=tf.float64) x_matrix = tf.convert_to_tensor(self.x_matrix, dtype=tf.float64) lambda_matrix = tf.convert_to_tensor(self.lambda_matrix, dtype=tf.float64) x = tf.Variable( initial_value=tf.ones(v_matrix_shape, dtype=tf.dtypes.float64)) cost = tf.norm(x_matrix - tf.linalg.matmul( tf.linalg.matmul(x, lambda_matrix), tf.transpose(x)) ) + self.rho / 2 * tf.norm(x - w_matrix + z_matrix) manifold = Stiefel(v_matrix_shape[0], v_matrix_shape[1]) problem = Problem(manifold=manifold, cost=cost, arg=x) solver = SteepestDescent(logverbosity=self.verbosity) if self.verbosity > 2: v_optimal, _ = solver.solve(problem) else: v_optimal = solver.solve(problem) if self.verbosity > 2: print("==> WSubproblem ==> Showing v_optimal:") print(v_optimal) return v_optimal
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): n = 128 matrix = rnd.randn(n, n) matrix = 0.5 * (matrix + matrix.T) cost, egrad = create_cost_egrad(backend, matrix) manifold = Sphere(n) problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad) if quiet: problem.verbosity = 0 solver = SteepestDescent() estimated_dominant_eigenvector = solver.solve(problem) if quiet: return # Calculate the actual solution by a conventional eigenvalue decomposition. eigenvalues, eigenvectors = la.eig(matrix) dominant_eigenvector = eigenvectors[:, np.argmax(eigenvalues)] # Make sure both vectors have the same direction. Both are valid # eigenvectors, but for comparison we need to get rid of the sign # ambiguity. if (np.sign(dominant_eigenvector[0]) != np.sign( estimated_dominant_eigenvector[0])): estimated_dominant_eigenvector = -estimated_dominant_eigenvector # Print information about the solution. print("l2-norm of x: %f" % la.norm(dominant_eigenvector)) print("l2-norm of xopt: %f" % la.norm(estimated_dominant_eigenvector)) print("Solution found: %s" % np.allclose( dominant_eigenvector, estimated_dominant_eigenvector, rtol=1e-3)) error_norm = la.norm(dominant_eigenvector - estimated_dominant_eigenvector) print("l2-error: %f" % error_norm)
def _align_H_stiefel(self, Q, G): """Tangent vector field alignment via optimization on orthogonal group.""" N, D, d = Q.shape indptr = G.indptr indices = G.indices K = G.data def cost(V): F = 0 for i in range(N): for j, K_ij in zip(indices[indptr[i]:indptr[i + 1]], K[indptr[i]:indptr[i + 1]]): f_i = K_ij * np.trace( np.dot(np.dot(V[i].T, np.dot(Q[i].T, Q[j])), V[j])) F += f_i return F manifold = Rotations(d) problem = Problem(manifold=manifold, cost=cost) solver = SteepestDescent() V = solver.solve(problem, np.zeros((d, d))) return H
def get_rotation_matrix(M, Mtilde, weights=None, dist=None): if dist is None: dist = 'euc' n = M[0].shape[0] # (1) Instantiate a manifold manifold = Rotations(n) # (2) Define cost function and a problem if dist == 'euc': cost = partial(cost_function_full, M=M, Mtilde=Mtilde, weights=weights, dist=dist) problem = Problem(manifold=manifold, cost=cost, verbosity=0) elif dist == 'rie': cost = partial(cost_function_full, M=M, Mtilde=Mtilde, weights=weights, dist=dist) egrad = partial(egrad_function_full_rie, M=M, Mtilde=Mtilde, weights=weights) problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0) # (3) Instantiate a Pymanopt solver solver = SteepestDescent(mingradnorm=1e-3) # let Pymanopt do the rest Q_opt = solver.solve(problem) return Q_opt
def rotation_matrix(mean_source, mean_target_train): manifold = Rotations(mean_source[0].shape[0]) cost = partial(cost_function_full, mean_source, mean_target_train) problem = Problem(manifold, cost) solver = SteepestDescent(mingradnorm=1e-3) U = solver.solve(problem) return U
def __init__(self, P, k, testing=True): """ P is a (d x n) matrix holding our points. k is the number of clusters expected If testing is true then we do extra computations which are useful but not computationally necessary. """ self.P = P #Matrix of points self.nu = np.sum(P**2, axis=0)[:,None] #Matrix of pts' norms. self.d = P.shape[0] #Dimension the points lie in self.n = P.shape[1] #number of points self.k = k #number of clusters self.M = Y_mani(self.n, self.k) #"Y" manifold corresponding to our problem. #NB it may be quicker to use "sums" and such strictly instead of the ones matrix self.one = np.ones((self.n,1)) #Matrix of all ones. #The solver we use for gradient descent. #NB there may be some better way to set the settings, #Perhaps dynamically depending on the data. self.solver = SteepestDescent(maxiter=MAXITER, logverbosity = 1, mingradnorm = MINGRADNORM) if testing: #Computationally expensive, and not needed in final code. self.D = dist_mat(P)**2 #Distance squared matrix #Slow way which is replaced by the way below. #self.Dsize = la.norm(self.D) self.Dsize = np.sqrt(2*self.n*(self.nu**2).sum() + 2*(self.nu.sum())**2 + 4*la.norm(P.dot(P.T))**2 - 8*(P.T.dot(P.dot(self.nu)).sum()))
def envelope(X_env, Y_env, u): p, r = X_env.shape[1], Y_env.shape[1] linear_model = LinearRegression().fit(X_env, Y_env) err = Y_env - linear_model.predict(X_env) Sigma_res = np.cov(err.transpose()) Sigma_Y = np.cov(Y_env.transpose()) def cost(Gamma): X = np.matmul(Gamma, Gamma.T) out = -np.log( np.linalg.det( np.matmul(np.matmul(X, Sigma_res), X) + np.matmul(np.matmul(np.eye(r) - X, Sigma_Y), np.eye(r) - X))) return (np.array(out)) manifold = Grassmann(r, u) # manifold = Stiefel(r, u) problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = SteepestDescent() Gamma = solver.solve(problem) PSigma1_hat = np.matmul(Gamma, Gamma.T) PSigma2_hat = np.eye(r) - PSigma1_hat beta_hat = np.matmul(PSigma1_hat, linear_model.coef_) Sigma1_hat = np.matmul(np.matmul(PSigma1_hat, Sigma_res), PSigma1_hat) Sigma2_hat = np.matmul(np.matmul(np.eye(r) - PSigma1_hat, Sigma_res), np.eye(r) - PSigma1_hat) alpha_hat = np.mean(Y_env - np.matmul(X_env, beta_hat.T), axis=0) return (alpha_hat.reshape(1, r), beta_hat.reshape(p, r))
def get_rotation_matrix(X, C): def cost(R): Z = npy.dot(X, R) M = npy.max(Z, axis=1, keepdims=True) return npy.sum((Z / M)**2) manifold = Stiefel(C, C) problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = SteepestDescent(logverbosity=0) opt = solver.solve(problem=problem, x=npy.eye(C)) return cost(opt), opt
def NG_sdr(X, y, m, v_w = 5, v_b = 5, verbosity=0, *args, **kwargs): """ X: array of N points on complex Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(m, p), m < n) where X_hat_i = R^T X_i, W \in St(n, m) minimizing the projection error (using projection F-norm) """ N, n, p = X.shape cpx = np.iscomplex(X).any() # true if X is complex-valued if cpx: gr = ComplexGrassmann(n, p) man = ComplexGrassmann(n, m) else: gr = Grassmann(n, p) man = Grassmann(n, m) # distance matrix dist_m = np.zeros((N, N)) for i in range(N): for j in range(i): dist_m[i, j] = gr.dist(X[i], X[j]) dist_m[j, i] = dist_m[i, j] # affinity matrix affinity = affinity_matrix(dist_m, y, v_w, v_b) X_ = torch.from_numpy(X) affinity_ = torch.from_numpy(affinity) @pymanopt.function.PyTorch def cost(A): dm = torch.zeros((N, N)) for i in range(N): for j in range(i): dm[i, j] = dist_proj(torch.matmul(A.conj().t(), X_[i]), torch.matmul(A.conj().t(), X_[j]))**2 #dm[i, j] = gr_low.dist(X_proj[i], X_proj[j])**2 dm[j, i] = dm[i, j] d2 = torch.mean(affinity_*dm) return d2 # solver = ConjugateGradient() solver = SteepestDescent() problem = Problem(manifold=man, cost=cost, verbosity=verbosity) A = solver.solve(problem) tmp = np.array([A.conj().T for i in range(N)]) # N x m x n X_low = multiprod(tmp, X) # N x m x p X_low = np.array([qr(X_low[i])[0] for i in range(N)]) return X_low, A
def NG_dr1(X, verbosity = 0): """ X: array of N points on Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(n-1, p)) where X_hat_i = A^T X_i, A \in St(n, n-1) minimizing the projection error (using projection F-norm) """ N, n, p = X.shape cpx = np.iscomplex(X).any() # true if X is complex-valued if cpx: man = Product([ComplexGrassmann(n, 1), Euclidean(p, 2)]) else: man = Product([Grassmann(n, 1), Euclidean(p)]) X_ = torch.from_numpy(X) @pymanopt.function.PyTorch def cost(v, b): vvT = torch.matmul(v, v.conj().t()) # n x n if cpx: b_ = b[:,0] + b[:,1]*1j b_ = torch.unsqueeze(b_, axis=1) else: b_ = torch.unsqueeze(b, axis=1) vbt = torch.matmul(v, b_.t()) # n x p IvvT = torch.eye(n, dtype=X_.dtype) - vvT d2 = 0 for i in range(N): d2 = d2 + dist_proj(X_[i], torch.matmul(IvvT, X_[i]) + vbt)**2/N #d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]))**2/N return d2 solver = SteepestDescent() problem = Problem(manifold=man, cost=cost, verbosity=verbosity) theta = solver.solve(problem) v = theta[0] b_ = theta[1] if cpx: b = b_[:,0] + b_[:,1]*1j b = np.expand_dims(b, axis=1) else: b = np.expand_dims(b_, axis=1) R = ortho_complement(v) tmp = np.array([R.conj().T for i in range(N)]) X_low = multiprod(tmp, X) X_low = np.array([qr(X_low[i])[0] for i in range(N)]) return X_low, R, v, b
def NG_dr(X, m, verbosity=0, *args, **kwargs): """ X: array of N points on Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(m, p), m < n) where X_hat_i = R^T X_i, R \in St(n, m) minimizing the projection error (using projection F-norm) """ N, n, p = X.shape cpx = np.iscomplex(X).any() # true if X is complex-valued if cpx: man = Product([ComplexGrassmann(n, m), Euclidean(n, p, 2)]) else: man = Product([Grassmann(n, m), Euclidean(n, p)]) X_ = torch.from_numpy(X) @pymanopt.function.PyTorch def cost(A, B): AAT = torch.matmul(A, A.conj().t()) # n x n if cpx: B_ = B[:,:,0] + B[:,:,1]*1j else: B_ = B IAATB = torch.matmul(torch.eye(n, dtype=X_.dtype) - AAT, B_) # n x p d2 = 0 for i in range(N): d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]) + IAATB)**2/N #d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]))**2/N return d2 #solver = ConjugateGradient() solver = SteepestDescent() problem = Problem(manifold=man, cost=cost, verbosity=verbosity) theta = solver.solve(problem) A = theta[0] B = theta[1] if cpx: B_ = B[:,:,0] + B[:,:,1]*1j else: B_ = B #tmp = np.array([A.T for i in range(N)]) tmp = np.array([A.conj().T for i in range(N)]) X_low = multiprod(tmp, X) X_low = np.array([qr(X_low[i])[0] for i in range(N)]) return X_low, A, B_
def icf(Fhat, Z, nc=None, return_Fhat=False): """Independent Component Factorization (ICF) of an array of matrices Z.""" # get centered vec(Z) such that zc = A xc = A Cxx^(1/2) xc^w zc = centering(Z) # get y = Vs^T xc^w, where A Cxx^(1/2) = Us \Sigma_s Vs^T y, es, Us = z2y(zc, nc) _, N, M = Z.shape s, NM = y.shape def cost(W): WTy = np.dot(W.T, y) return np.sum([ Fhat(WTy[i]) for i in range(s) ]) # A solver that involves the hessian # solver = TrustRegions(mingradnorm=1e-8) solver = SteepestDescent(mingradnorm=1e-8) # O(s) manifold = Rotations(s, 1) # Solve the problem with pymanopt problem = Problem(manifold=manifold, cost=cost) # get What = Vs^T P S Wopt = solver.solve(problem) # get Ahat and xhat such that zc = Ahat xhat # get Ahat, which is actually = A Cxx^(1/2) P S Ahat = np.dot(Us*es, Wopt) # get xhat, which is actually = S^-1 P^-1 xc^w # xhat = np.dot(la.inv(Wopt), y) # Wopt is orthogonal, so Wopt.T = la.inv(Wopt) xhat = np.dot(Wopt.T, y) # assert np.allclose(zc, np.dot(Ahat, xhat)), 'Something may be wrong as zc != Ahat xhat' # re=order xhat and Ahat, from more non-Gaussian to more Gaussian Fhat_values = np.array([ Fhat(xhat[i]) for i in range(s) ]) inds = np.argsort(Fhat_values) Ahat = Ahat[:, inds] xhat = xhat[inds] # assert np.allclose(zc, np.dot(Ahat, xhat)), 'Something may be wrong as zc != Ahat xhat' # reshape xhat to an array of matrices Xhat Xhat = xhat.reshape((s, N, M)) if return_Fhat: return Ahat, Xhat, Fhat_values[inds] return Ahat, Xhat
def fcf(Fhat, Z, type='rectangular', nc=None, return_Fhat=False): """Free Component Factorization (FCF) of an array of matrices Z.""" # get centered Z such that Zc = A Xc = A Cxx^(1/2) Xc^w Zc = centering(Z, type) # get Y = Vs^T Xc^w, where A Cxx^(1/2) = Us \Sigma_s Vs^T Y, es, Us = z2y(Zc, type, nc) s, N, M = Y.shape def cost(W): WTY = np.tensordot(W.T, Y, axes=(1, 0)) return np.sum([Fhat(WTY[i], type) for i in range(s)]) # A solver that involves the hessian # solver = TrustRegions(mingradnorm=1e-8) # solver = SteepestDescent(mingradnorm=1e-8) solver = SteepestDescent(mingradnorm=1e-8, maxtime=36000, maxiter=3000) # O(s) manifold = Rotations(s, 1) # Solve the problem with pymanopt problem = Problem(manifold=manifold, cost=cost) # get What = Vs^T P S Wopt = solver.solve(problem) # get Ahat and Xhat such that Zc = Ahat Xhat # get Ahat, which is actually = A Cxx^(1/2) P S Ahat = np.dot(Us * es, Wopt) # get Xhat, which is actually = S^-1 P^-1 Xc^w # Xhat = np.tensordot(la.inv(Wopt), Y, axes=(1, 0)) # Wopt is orthogonal, so Wopt.T = la.inv(Wopt) Xhat = np.tensordot(Wopt.T, Y, axes=(1, 0)) # assert np.allclose(Zc, np.tensordot(Ahat, Xhat, axes=(1, 0))), 'Something may be wrong as Zc != Ahat Xhat' # re=order Xhat and Ahat, from more non-Gaussian to more Gaussian Fhat_values = np.array([Fhat(Xhat[i]) for i in range(s)]) inds = np.argsort(Fhat_values) Ahat = Ahat[:, inds] Xhat = Xhat[inds] # assert np.allclose(Zc, np.tensordot(Ahat, Xhat, axes=(1, 0))), 'Something may be wrong as Zc != Ahat Xhat' if return_Fhat: return Ahat, Xhat, Fhat_values[inds] return Ahat, Xhat
def estimate_dominant_eigenvector(matrix): """Returns the dominant eigenvector of the symmetric matrix A by minimizing the Rayleigh quotient -x' * A * x / (x' * x). """ num_rows, num_columns = gs.shape(matrix) assert num_rows == num_columns, 'matrix must be square' assert gs.allclose(gs.sum(matrix - gs.transpose(matrix)), 0.0), \ 'matrix must be symmetric' def cost(vector): return -gs.dot(vector, gs.dot(matrix, vector)) def egrad(vector): return -2 * gs.dot(matrix, vector) sphere = GeomstatsSphere(num_columns) problem = Problem(manifold=sphere, cost=cost, egrad=egrad) solver = SteepestDescent() return solver.solve(problem)
def fit(self, T, Y, init, maxIter=100): self.init_fit(T, Y, None) D = self.D + self.L K = self.K # (1) Instantiate the manifold manifold = Product([PositiveDefinite(D + 1, k=K), Euclidean(K - 1)]) cost = self.get_cost_function(T, Y) problem = Problem(manifold=manifold, cost=cost, verbosity=1) # (3) Instantiate a Pymanopt solver solver = SteepestDescent(maxiter=3 * maxIter) # let Pymanopt do the rest Xopt = solver.solve(problem) self.Xopt_to_theta(Xopt)
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): n = 3 m = 10 k = 10 A = np.random.randn(k, n, m) B = np.random.randn(k, n, m) ABt = np.array([Ak @ Bk.T for Ak, Bk in zip(A, B)]) cost, egrad = create_cost_egrad(backend, ABt) manifold = SpecialOrthogonalGroup(n, k) problem = pymanopt.Problem(manifold, cost, egrad=egrad) if quiet: problem.verbosity = 0 solver = SteepestDescent() X = solver.solve(problem) if not quiet: Xopt = np.array([compute_optimal_solution(ABtk) for ABtk in ABt]) print("Frobenius norm error:", np.linalg.norm(Xopt - X))
def estimate_dominant_eigenvector(matrix): """Returns the dominant eigenvector of the symmetric matrix A by minimizing the Rayleigh quotient -x' * A * x / (x' * x). """ num_rows, num_columns = gs.shape(matrix) if num_rows != num_columns: raise ValueError('Matrix must be square.') if not gs.allclose(gs.sum(matrix - gs.transpose(matrix)), 0.0): raise ValueError('Matrix must be symmetric.') @pymanopt.function.Callable def cost(vector): return -gs.dot(vector, gs.dot(matrix, vector)) @pymanopt.function.Callable def egrad(vector): return -2 * gs.dot(matrix, vector) sphere = GeomstatsSphere(num_columns) problem = pymanopt.Problem(manifold=sphere, cost=cost, egrad=egrad) solver = SteepestDescent() return solver.solve(problem)
def get_rotation_matrix(Mt, Ms, metric='euc'): Mt = Mt.reshape(-1, *Mt.shape[-2:]) Ms = Ms.reshape(-1, *Ms.shape[-2:]) n = Mt[0].shape[0] manifolds = Rotations(n) if metric == 'euc': cost = partial(_procruster_cost_function_euc, Mt=Mt, Ms=Ms) problem = Problem(manifold=manifolds, cost=cost, verbosity=0) elif metric == 'rie': cost = partial(_procruster_cost_function_rie, Mt=Mt, Ms=Ms) egrad = partial(_procruster_egrad_function_rie, Mt=Mt, Ms=Ms) problem = Problem(manifold=manifolds, cost=cost, egrad=egrad, verbosity=0) solver = SteepestDescent(mingradnorm=1e-3) Ropt = solver.solve(problem) return Ropt
import autograd.numpy as np from pymanopt import Problem from pymanopt.solvers import SteepestDescent from pymanopt.manifolds import Stiefel import pprint if __name__ == "__main__": # Generate random data with highest variance in first 2 dimensions X = np.diag([3, 2, 1]).dot(np.random.randn(3, 200)) # Cost function is the squared reconstruction error def cost(w): return np.sum(np.sum((X - np.dot(w, np.dot(w.T, X)))**2)) solver = SteepestDescent(logverbosity=2) # Projection matrices onto a two dimensional subspace manifold = Stiefel(3, 2) # Solve the problem with pymanopt problem = Problem(manifold=manifold, cost=cost, verbosity=0) wopt, optlog = solver.solve(problem) print('And here comes the optlog:\n\r') pp = pprint.PrettyPrinter() pp.pprint(optlog)
def run(self, x_init, max_rep=400): problem = Problem(manifold=self.manifold, cost=self.cost_function) solver = SteepestDescent() self.x_opt = solver.solve(problem) return self.x_opt
def kindr(Ud, n_clusters, mansolver, init, tol_in, tol_out, max_iter_in, max_iter_out, disp, do_inner, post_SR, isnrm_row_U, isnrm_col_H, isbinary_H): if max_iter_out <= 0: raise ValueError('Number of iterations should be a positive number,' ' got %d instead' % max_iter_out) if tol_out <= 0: raise ValueError('The tolerance should be a positive number,' ' got %d instead' % tol_out) try: from pymanopt import Problem from pymanopt.manifolds import Stiefel, Rotations from pymanopt.solvers import SteepestDescent, ConjugateGradient, TrustRegions, NelderMead except ImportError: raise ValueError( "KindR needs pymanopt, which is unavailable, try KindAP instead.") # warnings.warn("KindR solver is unavailable. Transfer to" # "KindAP instead.") try: import autograd.numpy as anp except ImportError: warnings.warn( "Pymanopt needs autograd, which is unavailable,try KindAP instead." ) idx, center, gerr, numiter = kindap(Ud, n_clusters, init, tol_in, tol_out, max_iter_in, max_iter_out, disp, True, post_SR, isnrm_row_U, isnrm_col_H, isbinary_H) return idx, center, gerr, numiter n, d = Ud.shape k = n_clusters if d != k: warnings.warn('Provided more features, expected %d, got %d' % (k, d)) if isnrm_row_U: Ud = normalize(Ud, axis=1) # initialization if isinstance(init, string_types) and init == 'eye': # Z_0 = -np.identity(k) U = Ud[:, :k] elif hasattr(init, '__array__'): if init.shape[0] != d: raise ValueError( 'The row size of init should be the same as the total' 'features, got %d instead.' % init.shape[0]) if init.shape[1] != k: raise ValueError( 'The column size of init should be the same as the total' 'clusters, got %d instead.' % init.shape[1]) U = np.matmul(Ud, np.array(init)) elif isinstance(init, string_types) and init == 'random': H = sparse.csc_matrix((np.ones( (n, )), (np.arange(n), np.random.randint(0, k, (n, )))), shape=(n, k)) smat, sigma, vmat = la.svd(sparse.csc_matrix.dot(Ud.T, H), full_matrices=False) z_init = np.matmul(smat, vmat) U = np.matmul(Ud, z_init) else: raise ValueError( "The init parameter for KindAP should be 'eye','random'," "or an array. Got a %s with type %s instead." % (init, type(init))) if isinstance(do_inner, bool) or isinstance(do_inner, int): do_inner = bool(do_inner) elif isinstance(do_inner, string_types) and (do_inner in ["relu", "softmax"]): pass else: raise ValueError("Invalid put do_inner") H, N = sparse.csc_matrix((n, k)), sparse.csc_matrix((n, k)) dUH = float('inf') numiter, gerr = [], [] idx = np.ones(n) crit2 = np.zeros(4) def cost_n(rotation): return 0.5 * anp.sum(anp.minimum(anp.matmul(Ud, rotation), 0)**2) # def cost_softmax(rotation): # umat = anp.matmul(Ud, rotation) # exp_umat = anp.exp(umat) # mat = exp_umat / exp_umat.sum(axis=1).reshape(Ud.shape[0], 1) # return 0.5 * anp.sum((umat - mat) ** 2) for n_iter_out in range(max_iter_out): idxp, Up, Np, Hp = idx, U, N, H optlog = {} itr = 0 # inner iterations if do_inner: if d == k: manifold = Rotations(k) else: manifold = Stiefel(d, k) # if do_inner == "softmax": # cost = cost_softmax # else: # cost = cost_n problem = Problem(manifold=manifold, cost=cost_n, verbosity=0) if mansolver == "SD": solver = SteepestDescent(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) elif mansolver == "CG": solver = ConjugateGradient(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) elif mansolver == "TR": solver = TrustRegions(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) elif mansolver == "NM": solver = NelderMead(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) else: raise ValueError( "Undefined manifold optimization method, Expect SD, CG, TR or NM, " "get %s instead" % mansolver) Z, optlog = solver.solve(problem) U = np.matmul(Ud, Z) N = np.maximum(U, 0) itr = len(optlog['iterations']['iteration']) numiter.append(itr) if disp: print(optlog['iterations']['f(x)']) print(optlog['stoppingreason']) else: N = U numiter.append(itr) # project onto H H, idx = proj_h(N, isnrm_col_H, isbinary_H) idxchg = sum(idx != idxp) # project back to Ud U = proj_ud(H, Ud) dUHp = dUH dUH = la.norm(U - H, 'fro') gerr.append(dUH) if disp: print('Outer %3d: %3d dUH: %11.8e idxchg: %6d' % (n_iter_out + 1, itr, dUH, idxchg)) # stopping criteria of outer iterations crit2[0] = dUH < 1e-12 crit2[1] = abs(dUH - dUHp) < dUHp * tol_out crit2[2] = dUH > dUHp crit2[3] = idxchg == 0 if any(crit2): if post_SR and do_inner: do_inner, isbinary_H = False, True continue if crit2[2] and not crit2[1]: idx, H, U, N, dUH = idxp, Hp, Up, Np, dUHp if disp and optlog and 'stoppingreason' in optlog: print('\t stop reason:', optlog['stoppingreason']) break center = sparse.csc_matrix.dot( normalize((H != 0), axis=0, norm='l1').T, Ud) return idx, center, gerr, numiter
def __init__(self, M: Manifold, Y, param, degrees, iscycle=False, P_init=None, verbosity=2, maxtime=100000, maxiter=100, mingradnorm=1e-6, minstepsize=1e-10, maxcostevals=5000): """Compute regression with Bézier splines for data in a manifold M using pymanopt. :param M: manifold :param Y: array containing M-valued data. :param param: vector with scalars between 0 and the number of intended segments corresponding to the data points in Y. The integer part determines the segment to which the data point belongs. :param degrees: vector of length L; the l-th entry is the degree of the l-th segment of the spline. All entries must be positive. For a closed spline, L > 1, degrees[0] > 2 and degrees[-1] > 2 must hold. :param iscycle: boolean that determines whether a closed curve C1 spline shall be modeled. :param P_init: initial guess :param verbosity: 0 is silent to gives the most information, see pymanopt's problem class :param maxtime: maximum time for steepest descent :param maxiter: maximum number of iterations in steepest descent :param mingradnorm: stop iteration when the norm of the gradient is lower than mingradnorm :param minstepsize: stop iteration when step the stepsize is smaller than minstepsize :param maxcostevals: maximum number of allowed cost evaluations :return P: list of control points of the optimal Bézier spline """ degrees = np.atleast_1d(degrees) self._M = M self._Y = Y self._param = param pymanoptM = ManoptWrap(M) # Cost def cost(P): P = np.stack(P) control_points = self.full_set(M, P, degrees, iscycle) return self.sumOfSquared( BezierSpline(M, control_points, iscycle=iscycle), Y, param) #MMM = Product([M for i in range(degrees[0])]) # for conjugated gradient # Gradient def grad(P): P = np.stack(P) control_points = self.full_set(M, P, degrees, iscycle) grad_E = self.gradSumOfSquared( BezierSpline(M, control_points, iscycle=iscycle), Y, param) grad_E = self.indep_set(grad_E, iscycle) # return _ProductTangentVector([grad_E[0][i] for i in range(len(grad_E[0]))]) # for conjugated gradient return np.concatenate(grad_E) # Solve optimization problem with pymanopt by optimizing over independent control points if iscycle: N = Product([pymanoptM] * np.sum(degrees - 1)) else: N = Product([pymanoptM] * (np.sum(degrees - 1) + 2)) problem = Problem(manifold=N, cost=cost, grad=grad, verbosity=verbosity) # solver = ConjugateGradient(maxtime=maxtime, maxiter=maxiter, mingradnorm=mingradnorm, # minstepsize=minstepsize, maxcostevals=maxcostevals, logverbosity=2) solver = SteepestDescent(maxtime=maxtime, maxiter=maxiter, mingradnorm=mingradnorm, minstepsize=minstepsize, maxcostevals=maxcostevals, logverbosity=2) if P_init is None: P_init = self.initControlPoints(M, Y, param, degrees, iscycle) P_init = self.indep_set(P_init, iscycle) P_opt, opt_log = solver.solve(problem, list(np.concatenate(P_init))) P_opt = self.full_set(M, np.stack(P_opt, axis=0), degrees, iscycle) self._spline = BezierSpline(M, P_opt, iscycle=iscycle) self._unexplained_variance = opt_log['final_values']["f(x)"] / len(Y)
def wda(X, y, p=2, reg=1, k=10, solver=None, sinkhorn_method='sinkhorn', maxiter=100, verbose=0, P0=None, normalize=False): r""" Wasserstein Discriminant Analysis :ref:`[11] <references-wda>` The function solves the following optimization problem: .. math:: \mathbf{P} = \mathop{\arg \min}_\mathbf{P} \quad \frac{\sum\limits_i W(P \mathbf{X}^i, P \mathbf{X}^i)}{\sum\limits_{i, j \neq i} W(P \mathbf{X}^i, P \mathbf{X}^j)} where : - :math:`P` is a linear projection operator in the Stiefel(`p`, `d`) manifold - :math:`W` is entropic regularized Wasserstein distances - :math:`\mathbf{X}^i` are samples in the dataset corresponding to class i **Choosing a Sinkhorn solver** By default and when using a regularization parameter that is not too small the default sinkhorn solver should be enough. If you need to use a small regularization to get sparse cost matrices, you should use the :py:func:`ot.dr.sinkhorn_log` solver that will avoid numerical errors, but can be slow in practice. Parameters ---------- X : ndarray, shape (n, d) Training samples. y : ndarray, shape (n,) Labels for training samples. p : int, optional Size of dimensionnality reduction. reg : float, optional Regularization term >0 (entropic regularization) solver : None | str, optional None for steepest descent or 'TrustRegions' for trust regions algorithm else should be a pymanopt.solvers sinkhorn_method : str method used for the Sinkhorn solver, either 'sinkhorn' or 'sinkhorn_log' P0 : ndarray, shape (d, p) Initial starting point for projection. normalize : bool, optional Normalise the Wasserstaiun distance by the average distance on P0 (default : False) verbose : int, optional Print information along iterations. Returns ------- P : ndarray, shape (d, p) Optimal transportation matrix for the given parameters proj : callable Projection function including mean centering. .. _references-wda: References ---------- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. """ # noqa if sinkhorn_method.lower() == 'sinkhorn': sinkhorn_solver = sinkhorn elif sinkhorn_method.lower() == 'sinkhorn_log': sinkhorn_solver = sinkhorn_log else: raise ValueError("Unknown Sinkhorn method '%s'." % sinkhorn_method) mx = np.mean(X) X -= mx.reshape((1, -1)) # data split between classes d = X.shape[1] xc = split_classes(X, y) # compute uniform weighs wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc] # pre-compute reg_c,c' if P0 is not None and normalize: regmean = np.zeros((len(xc), len(xc))) for i, xi in enumerate(xc): xi = np.dot(xi, P0) for j, xj in enumerate(xc[i:]): xj = np.dot(xj, P0) M = dist(xi, xj) regmean[i, j] = np.sum(M) / (len(xi) * len(xj)) else: regmean = np.ones((len(xc), len(xc))) @Autograd def cost(P): # wda loss loss_b = 0 loss_w = 0 for i, xi in enumerate(xc): xi = np.dot(xi, P) for j, xj in enumerate(xc[i:]): xj = np.dot(xj, P) M = dist(xi, xj) G = sinkhorn_solver(wc[i], wc[j + i], M, reg * regmean[i, j], k) if j == 0: loss_w += np.sum(G * M) else: loss_b += np.sum(G * M) # loss inversed because minimization return loss_w / loss_b # declare manifold and problem manifold = Stiefel(d, p) problem = Problem(manifold=manifold, cost=cost) # declare solver and solve if solver is None: solver = SteepestDescent(maxiter=maxiter, logverbosity=verbose) elif solver in ['tr', 'TrustRegions']: solver = TrustRegions(maxiter=maxiter, logverbosity=verbose) Popt = solver.solve(problem, x=P0) def proj(X): return (X - mx.reshape((1, -1))).dot(Popt) return Popt, proj
def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0): """ Wasserstein Discriminant Analysis [11]_ The function solves the following optimization problem: .. math:: P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)} where : - :math:`P` is a linear projection operator in the Stiefel(p,d) manifold - :math:`W` is entropic regularized Wasserstein distances - :math:`X^i` are samples in the dataset corresponding to class i Parameters ---------- X : numpy.ndarray (n,d) Training samples y : np.ndarray (n,) labels for training samples p : int, optional size of dimensionnality reduction reg : float, optional Regularization term >0 (entropic regularization) solver : str, optional None for steepest decsent or 'TrustRegions' for trust regions algorithm else shoudl be a pymanopt.sovers verbose : int, optional Print information along iterations Returns ------- P : (d x p) ndarray Optimal transportation matrix for the given parameters proj : fun projectiuon function including mean centering References ---------- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. """ mx = np.mean(X) X -= mx.reshape((1, -1)) # data split between classes d = X.shape[1] xc = split_classes(X, y) # compute uniform weighs wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc] def cost(P): # wda loss loss_b = 0 loss_w = 0 for i, xi in enumerate(xc): xi = np.dot(xi, P) for j, xj in enumerate(xc[i:]): xj = np.dot(xj, P) M = dist(xi, xj) G = sinkhorn(wc[i], wc[j + i], M, reg, k) if j == 0: loss_w += np.sum(G * M) else: loss_b += np.sum(G * M) # loss inversed because minimization return loss_w / loss_b # declare manifold and problem manifold = Stiefel(d, p) problem = Problem(manifold=manifold, cost=cost) # declare solver and solve if solver is None: solver = SteepestDescent(maxiter=maxiter, logverbosity=verbose) elif solver in ['tr', 'TrustRegions']: solver = TrustRegions(maxiter=maxiter, logverbosity=verbose) Popt = solver.solve(problem) def proj(X): return (X - mx.reshape((1, -1))).dot(Popt) return Popt, proj
def transforming_A(self): with SuppressPrints(not self.verbose_obtimizer): result = SteepestDescent(maxtime=self.maxtime, maxiter=self.maxiter).solve( self.transformer, x=self.x0) return result
def run(self, S, F, v=None, C=None, fs=None, omega=None, maxiter=500, tol=1e-10, variant='bp'): ''' Run MERLiN algorithm. Whether to run a scalar variant, i.e. S -> C -> w'F, or a timeseries variant, i.e. S -> C -> bp(w'F) is determined by the dimensionality of the input F. Input (default) - S (m x 1) np.array that contains the samples of S - F either a (d x m) np.array that contains the linear mixture samples or a (d x m x n) np.array that contains the linearly mixed timeseries of length n (d channels, m trials) - v (d x 1) np.array holding the linear combination that extracts middle node C from F - C (m x 1) np.array that contains the samples of the middle node C - fs sampling rate in Hz - omega tuple of (low, high) cut-off of desired frequency band - maxiter (500) maximum iterations to run the optimisation algorithm for - tol (1e-16) terminate optimisation if step size < tol or grad norm < tol - variant ('bp') determines which MERLiN variant to use on timeseries data ('bp' = MERLiNbp algorithm ([1], Algorithm 4), 'bpicoh' = MERLiNbpicoh algorithm ([1], Algorithm 5), 'nlbp' = MERLiNnlbp) Output - w linear combination that was found and should extract the effect of C from F - converged boolean that indicates whether the stopping criterion was met before the maximum number of iterations was performed - curob objecive functions value at w ''' self._S = S self._Forig = F self._fs = fs self._omega = omega self._d = F.shape[0] self._m = F.shape[1] # scalar or timeseries mode if F.ndim == 3: self._mode = 'timeseries' self._n = F.shape[2] if not (fs and omega): raise ValueError('Both the optional arguments fs and omega ' 'need to be provided.') if self._verbosity: print('Launching MERLiN' + variant + ' for iid sampled ' 'timeseries chunks.') elif F.ndim == 2: self._mode = 'scalar' if self._verbosity: print('Launching MERLiN for iid sampled scalars.') else: raise ValueError('F needs to be a 2-dimensional numpy array ' '(iid sampled scalars) or a 3-dimensional ' 'numpy array (iid sampled timeseries chunks).') self._prepare(v, C) if self._mode is 'scalar': problem = self._problem_MERLiN() elif variant is 'bp': problem = self._problem_MERLiNbp() elif variant is 'bpicoh': problem = self._problem_MERLiNbpicoh() elif variant is 'nlbp': problem = self._problem_MERLiNnlbp() else: raise NotImplementedError if variant is not 'nlbp': problem.manifold = Sphere(self._d, 1) elif variant is 'nlbp': problem.manifold = Product( [Sphere(self._d, 1), Euclidean(1, 1), Euclidean(1, 1)]) # choose best out of ten 10-step runs as initialisation solver = SteepestDescent(maxiter=10, logverbosity=1) res = [solver.solve(problem) for k in range(0, 10)] obs = [-r[1]['final_values']['f(x)'] for r in res] w0 = res[obs.index(max(obs))][0] solver = SteepestDescent(maxtime=float('inf'), maxiter=maxiter, mingradnorm=tol, minstepsize=tol, logverbosity=1) if self._verbosity: print('Running optimisation algorithm.') w, info = solver.solve(problem, x=w0) if variant is 'nlbp': w = w[0] converged = maxiter != info['final_values']['iterations'] curob = -float(info['final_values']['f(x)']) if self._verbosity: print('DONE.') return self._P.T.dot(w), converged, curob
def RidgeAlternating(X, f, U0, degree=1, maxiter=100, tol=1e-10, history=False, disp=False, gtol=1e-6, inner_iter=20): if len(f.shape) == 1: f = f.reshape(-1, 1) # Instantiate the polynomial approximation rs = PolynomialApproximation(N=degree) # Instantiate the Grassmann manifold m, n = U0.shape manifold = Grassmann(m, n) if history: hist = {} hist['U'] = [] hist['residual'] = [] hist['inner_steps'] = [] # Alternating minimization i = 0 res = 1e9 while i < maxiter and res > tol: # Train the polynomial approximation with projected points Y = np.dot(X, U0) rs.train(Y, f) # Minimize residual with polynomial over Grassmann func = lambda y: _res(y, X, f, rs) grad = lambda y: _dres(y, X, f, rs) problem = Problem(manifold=manifold, cost=func, egrad=grad, verbosity=0) if history: solver = SteepestDescent(logverbosity=1, mingradnorm=gtol, maxiter=inner_iter, minstepsize=tol) U1, log = solver.solve(problem, x=U0) else: solver = SteepestDescent(logverbosity=0, mingradnorm=gtol, maxiter=inner_iter, minstepsize=tol) U1 = solver.solve(problem, x=U0) # Evaluate and store the residual res = func(U1) # This is the squared mismatch if history: hist['U'].append(U1) # To match the rest of code, we define the residual as the mismatch r = (f - rs.predict(Y)[0]).flatten() hist['residual'].append(r) hist['inner_steps'].append(log['final_values']['iterations']) if disp: print "iter %3d\t |r| : %10.10e" % (i, np.linalg.norm(res)) # Update iterators U0 = U1 i += 1 # Store data if i == maxiter: exitflag = 1 else: exitflag = 0 if history: return U0, hist else: return U0
Sigma_norm_B = np.dot(np.dot(B, Sigma_norm), B.transpose()) Wcd = dlyap_iterative(Ad.transpose(), Sigma_norm_B) Wnum = np.dot(Wo, Wcd) Wden = np.dot(Wo, Wcd - Sigma_norm_B) num = np.linalg.det(Wnum / sigma2 + I) den = np.linalg.det(Wden / sigma2 + I) return -np.log2(num / den) / (2. * T) # instantiate manifold for Pymanopt manifold_fact = Euclidean(n, n) # instantiate problem for Pymanopt problem = Problem(manifold=manifold_fact, cost=shannon_rate, verbosity=0) # instantiate solver for Pymanopt solver = SteepestDescent() # let Pymanopt do the rest L_opt = solver.solve(problem) # optimal covariance Sigma^* Sigma_opt = np.dot(L_opt, L_opt.transpose()) Sigma_opt_norm = Sigma_opt / (np.trace(Sigma_opt)) # value of transmission rate rate = -shannon_rate(L_opt) print "Shannon transmission rate:", rate rate_vec[k] = rate rate_data[k, pp] = rate k = k + 1
def DR_geod_complex(X, m, verbosity=0): """ X: array of N points on Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(m, p), m < n) where X_hat_i = R^T X_i, W \in St(n, m) minimizing the projection error (using geodesic distance) """ N, n, p = X.shape Cgr = ComplexGrassmann(n, p, N) Cgr_low = Grassmann(m, p) Cgr_map = ComplexGrassmann(n, m) # n x m XXT = multiprod(X, multihconj(X)) @pymanopt.function.Callable def cost(Q): tmp = np.array([np.matmul(Q, Q.T) for i in range(N)]) # N x n x n new_X = multiprod(tmp, X) # N x n x p q = np.array([qr(new_X[i])[0] for i in range(N)]) d2 = Cgr.dist(X, q)**2 return d2/N @pymanopt.function.Callable def egrad(Q): """ need to be fixed """ QQ = np.matmul(Q, multihconj(Q)) tmp = np.array([QQ for i in range(N)]) XQQX = multiprod(multiprod(multihconj(X), tmp), X) lam, V = np.linalg.eigh(XQQX) theta = np.arccos(np.sqrt(lam)) d = -2*theta/(np.cos(theta)*np.sin(theta)) Sig = np.array([np.diag(dd) for dd in d]) XV = multiprod(X,V) eg = multiprod(XV, multiprod(Sig, multitransp(XV.conj()))) eg = np.mean(eg, axis = 0) eg = np.matmul(eg, Q) return eg def egrad_num(R, eps = 1e-8+1e-8j): """ compute egrad numerically """ g = np.zeros(R.shape, dtype=np.complex128) for i in range(n): for j in range(m): R1 = R.copy() R2 = R.copy() R1[i,j] += eps R2[i,j] -= eps g[i,j] = (cost(R1) - cost(R2))/(2*eps) return g # solver = ConjugateGradient() solver = SteepestDescent() problem = Problem(manifold=Cst, cost=cost, egrad=egrad, verbosity=verbosity) Q_proj = solver.solve(problem) tmp = np.array([multihconj(Q_proj) for i in range(N)]) X_low = multiprod(tmp, X) X_low = X_low/np.expand_dims(np.linalg.norm(X_low, axis=1), axis = 2) M_hat = compute_centroid(Cgr_low, X_low) v_hat = var(Cgr_low, X_low, M_hat)/N var_ratio = v_hat/v return var_ratio, X_low, Q_proj