def fit(self): v_matrix_shape = (self.w_matrix.shape[0], self.w_matrix.shape[1]) w_matrix = tf.convert_to_tensor(self.w_matrix, dtype=tf.float64) z_matrix = tf.convert_to_tensor(self.z_matrix, dtype=tf.float64) x_matrix = tf.convert_to_tensor(self.x_matrix, dtype=tf.float64) lambda_matrix = tf.convert_to_tensor(self.lambda_matrix, dtype=tf.float64) x = tf.Variable( initial_value=tf.ones(v_matrix_shape, dtype=tf.dtypes.float64)) cost = tf.norm(x_matrix - tf.linalg.matmul( tf.linalg.matmul(x, lambda_matrix), tf.transpose(x)) ) + self.rho / 2 * tf.norm(x - w_matrix + z_matrix) manifold = Stiefel(v_matrix_shape[0], v_matrix_shape[1]) problem = Problem(manifold=manifold, cost=cost, arg=x) solver = SteepestDescent(logverbosity=self.verbosity) if self.verbosity > 2: v_optimal, _ = solver.solve(problem) else: v_optimal = solver.solve(problem) if self.verbosity > 2: print("==> WSubproblem ==> Showing v_optimal:") print(v_optimal) return v_optimal
def _align_H_stiefel(self, Q, G): """Tangent vector field alignment via optimization on orthogonal group.""" N, D, d = Q.shape indptr = G.indptr indices = G.indices K = G.data def cost(V): F = 0 for i in range(N): for j, K_ij in zip(indices[indptr[i]:indptr[i + 1]], K[indptr[i]:indptr[i + 1]]): f_i = K_ij * np.trace( np.dot(np.dot(V[i].T, np.dot(Q[i].T, Q[j])), V[j])) F += f_i return F manifold = Rotations(d) problem = Problem(manifold=manifold, cost=cost) solver = SteepestDescent() V = solver.solve(problem, np.zeros((d, d))) return H
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): n = 128 matrix = rnd.randn(n, n) matrix = 0.5 * (matrix + matrix.T) cost, egrad = create_cost_egrad(backend, matrix) manifold = Sphere(n) problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad) if quiet: problem.verbosity = 0 solver = SteepestDescent() estimated_dominant_eigenvector = solver.solve(problem) if quiet: return # Calculate the actual solution by a conventional eigenvalue decomposition. eigenvalues, eigenvectors = la.eig(matrix) dominant_eigenvector = eigenvectors[:, np.argmax(eigenvalues)] # Make sure both vectors have the same direction. Both are valid # eigenvectors, but for comparison we need to get rid of the sign # ambiguity. if (np.sign(dominant_eigenvector[0]) != np.sign( estimated_dominant_eigenvector[0])): estimated_dominant_eigenvector = -estimated_dominant_eigenvector # Print information about the solution. print("l2-norm of x: %f" % la.norm(dominant_eigenvector)) print("l2-norm of xopt: %f" % la.norm(estimated_dominant_eigenvector)) print("Solution found: %s" % np.allclose( dominant_eigenvector, estimated_dominant_eigenvector, rtol=1e-3)) error_norm = la.norm(dominant_eigenvector - estimated_dominant_eigenvector) print("l2-error: %f" % error_norm)
def get_rotation_matrix(M, Mtilde, weights=None, dist=None): if dist is None: dist = 'euc' n = M[0].shape[0] # (1) Instantiate a manifold manifold = Rotations(n) # (2) Define cost function and a problem if dist == 'euc': cost = partial(cost_function_full, M=M, Mtilde=Mtilde, weights=weights, dist=dist) problem = Problem(manifold=manifold, cost=cost, verbosity=0) elif dist == 'rie': cost = partial(cost_function_full, M=M, Mtilde=Mtilde, weights=weights, dist=dist) egrad = partial(egrad_function_full_rie, M=M, Mtilde=Mtilde, weights=weights) problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0) # (3) Instantiate a Pymanopt solver solver = SteepestDescent(mingradnorm=1e-3) # let Pymanopt do the rest Q_opt = solver.solve(problem) return Q_opt
def rotation_matrix(mean_source, mean_target_train): manifold = Rotations(mean_source[0].shape[0]) cost = partial(cost_function_full, mean_source, mean_target_train) problem = Problem(manifold, cost) solver = SteepestDescent(mingradnorm=1e-3) U = solver.solve(problem) return U
def envelope(X_env, Y_env, u): p, r = X_env.shape[1], Y_env.shape[1] linear_model = LinearRegression().fit(X_env, Y_env) err = Y_env - linear_model.predict(X_env) Sigma_res = np.cov(err.transpose()) Sigma_Y = np.cov(Y_env.transpose()) def cost(Gamma): X = np.matmul(Gamma, Gamma.T) out = -np.log( np.linalg.det( np.matmul(np.matmul(X, Sigma_res), X) + np.matmul(np.matmul(np.eye(r) - X, Sigma_Y), np.eye(r) - X))) return (np.array(out)) manifold = Grassmann(r, u) # manifold = Stiefel(r, u) problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = SteepestDescent() Gamma = solver.solve(problem) PSigma1_hat = np.matmul(Gamma, Gamma.T) PSigma2_hat = np.eye(r) - PSigma1_hat beta_hat = np.matmul(PSigma1_hat, linear_model.coef_) Sigma1_hat = np.matmul(np.matmul(PSigma1_hat, Sigma_res), PSigma1_hat) Sigma2_hat = np.matmul(np.matmul(np.eye(r) - PSigma1_hat, Sigma_res), np.eye(r) - PSigma1_hat) alpha_hat = np.mean(Y_env - np.matmul(X_env, beta_hat.T), axis=0) return (alpha_hat.reshape(1, r), beta_hat.reshape(p, r))
def get_rotation_matrix(X, C): def cost(R): Z = npy.dot(X, R) M = npy.max(Z, axis=1, keepdims=True) return npy.sum((Z / M)**2) manifold = Stiefel(C, C) problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = SteepestDescent(logverbosity=0) opt = solver.solve(problem=problem, x=npy.eye(C)) return cost(opt), opt
def NG_sdr(X, y, m, v_w = 5, v_b = 5, verbosity=0, *args, **kwargs): """ X: array of N points on complex Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(m, p), m < n) where X_hat_i = R^T X_i, W \in St(n, m) minimizing the projection error (using projection F-norm) """ N, n, p = X.shape cpx = np.iscomplex(X).any() # true if X is complex-valued if cpx: gr = ComplexGrassmann(n, p) man = ComplexGrassmann(n, m) else: gr = Grassmann(n, p) man = Grassmann(n, m) # distance matrix dist_m = np.zeros((N, N)) for i in range(N): for j in range(i): dist_m[i, j] = gr.dist(X[i], X[j]) dist_m[j, i] = dist_m[i, j] # affinity matrix affinity = affinity_matrix(dist_m, y, v_w, v_b) X_ = torch.from_numpy(X) affinity_ = torch.from_numpy(affinity) @pymanopt.function.PyTorch def cost(A): dm = torch.zeros((N, N)) for i in range(N): for j in range(i): dm[i, j] = dist_proj(torch.matmul(A.conj().t(), X_[i]), torch.matmul(A.conj().t(), X_[j]))**2 #dm[i, j] = gr_low.dist(X_proj[i], X_proj[j])**2 dm[j, i] = dm[i, j] d2 = torch.mean(affinity_*dm) return d2 # solver = ConjugateGradient() solver = SteepestDescent() problem = Problem(manifold=man, cost=cost, verbosity=verbosity) A = solver.solve(problem) tmp = np.array([A.conj().T for i in range(N)]) # N x m x n X_low = multiprod(tmp, X) # N x m x p X_low = np.array([qr(X_low[i])[0] for i in range(N)]) return X_low, A
def NG_dr1(X, verbosity = 0): """ X: array of N points on Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(n-1, p)) where X_hat_i = A^T X_i, A \in St(n, n-1) minimizing the projection error (using projection F-norm) """ N, n, p = X.shape cpx = np.iscomplex(X).any() # true if X is complex-valued if cpx: man = Product([ComplexGrassmann(n, 1), Euclidean(p, 2)]) else: man = Product([Grassmann(n, 1), Euclidean(p)]) X_ = torch.from_numpy(X) @pymanopt.function.PyTorch def cost(v, b): vvT = torch.matmul(v, v.conj().t()) # n x n if cpx: b_ = b[:,0] + b[:,1]*1j b_ = torch.unsqueeze(b_, axis=1) else: b_ = torch.unsqueeze(b, axis=1) vbt = torch.matmul(v, b_.t()) # n x p IvvT = torch.eye(n, dtype=X_.dtype) - vvT d2 = 0 for i in range(N): d2 = d2 + dist_proj(X_[i], torch.matmul(IvvT, X_[i]) + vbt)**2/N #d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]))**2/N return d2 solver = SteepestDescent() problem = Problem(manifold=man, cost=cost, verbosity=verbosity) theta = solver.solve(problem) v = theta[0] b_ = theta[1] if cpx: b = b_[:,0] + b_[:,1]*1j b = np.expand_dims(b, axis=1) else: b = np.expand_dims(b_, axis=1) R = ortho_complement(v) tmp = np.array([R.conj().T for i in range(N)]) X_low = multiprod(tmp, X) X_low = np.array([qr(X_low[i])[0] for i in range(N)]) return X_low, R, v, b
def NG_dr(X, m, verbosity=0, *args, **kwargs): """ X: array of N points on Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(m, p), m < n) where X_hat_i = R^T X_i, R \in St(n, m) minimizing the projection error (using projection F-norm) """ N, n, p = X.shape cpx = np.iscomplex(X).any() # true if X is complex-valued if cpx: man = Product([ComplexGrassmann(n, m), Euclidean(n, p, 2)]) else: man = Product([Grassmann(n, m), Euclidean(n, p)]) X_ = torch.from_numpy(X) @pymanopt.function.PyTorch def cost(A, B): AAT = torch.matmul(A, A.conj().t()) # n x n if cpx: B_ = B[:,:,0] + B[:,:,1]*1j else: B_ = B IAATB = torch.matmul(torch.eye(n, dtype=X_.dtype) - AAT, B_) # n x p d2 = 0 for i in range(N): d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]) + IAATB)**2/N #d2 = d2 + dist_proj(X_[i], torch.matmul(AAT, X_[i]))**2/N return d2 #solver = ConjugateGradient() solver = SteepestDescent() problem = Problem(manifold=man, cost=cost, verbosity=verbosity) theta = solver.solve(problem) A = theta[0] B = theta[1] if cpx: B_ = B[:,:,0] + B[:,:,1]*1j else: B_ = B #tmp = np.array([A.T for i in range(N)]) tmp = np.array([A.conj().T for i in range(N)]) X_low = multiprod(tmp, X) X_low = np.array([qr(X_low[i])[0] for i in range(N)]) return X_low, A, B_
def icf(Fhat, Z, nc=None, return_Fhat=False): """Independent Component Factorization (ICF) of an array of matrices Z.""" # get centered vec(Z) such that zc = A xc = A Cxx^(1/2) xc^w zc = centering(Z) # get y = Vs^T xc^w, where A Cxx^(1/2) = Us \Sigma_s Vs^T y, es, Us = z2y(zc, nc) _, N, M = Z.shape s, NM = y.shape def cost(W): WTy = np.dot(W.T, y) return np.sum([ Fhat(WTy[i]) for i in range(s) ]) # A solver that involves the hessian # solver = TrustRegions(mingradnorm=1e-8) solver = SteepestDescent(mingradnorm=1e-8) # O(s) manifold = Rotations(s, 1) # Solve the problem with pymanopt problem = Problem(manifold=manifold, cost=cost) # get What = Vs^T P S Wopt = solver.solve(problem) # get Ahat and xhat such that zc = Ahat xhat # get Ahat, which is actually = A Cxx^(1/2) P S Ahat = np.dot(Us*es, Wopt) # get xhat, which is actually = S^-1 P^-1 xc^w # xhat = np.dot(la.inv(Wopt), y) # Wopt is orthogonal, so Wopt.T = la.inv(Wopt) xhat = np.dot(Wopt.T, y) # assert np.allclose(zc, np.dot(Ahat, xhat)), 'Something may be wrong as zc != Ahat xhat' # re=order xhat and Ahat, from more non-Gaussian to more Gaussian Fhat_values = np.array([ Fhat(xhat[i]) for i in range(s) ]) inds = np.argsort(Fhat_values) Ahat = Ahat[:, inds] xhat = xhat[inds] # assert np.allclose(zc, np.dot(Ahat, xhat)), 'Something may be wrong as zc != Ahat xhat' # reshape xhat to an array of matrices Xhat Xhat = xhat.reshape((s, N, M)) if return_Fhat: return Ahat, Xhat, Fhat_values[inds] return Ahat, Xhat
def fcf(Fhat, Z, type='rectangular', nc=None, return_Fhat=False): """Free Component Factorization (FCF) of an array of matrices Z.""" # get centered Z such that Zc = A Xc = A Cxx^(1/2) Xc^w Zc = centering(Z, type) # get Y = Vs^T Xc^w, where A Cxx^(1/2) = Us \Sigma_s Vs^T Y, es, Us = z2y(Zc, type, nc) s, N, M = Y.shape def cost(W): WTY = np.tensordot(W.T, Y, axes=(1, 0)) return np.sum([Fhat(WTY[i], type) for i in range(s)]) # A solver that involves the hessian # solver = TrustRegions(mingradnorm=1e-8) # solver = SteepestDescent(mingradnorm=1e-8) solver = SteepestDescent(mingradnorm=1e-8, maxtime=36000, maxiter=3000) # O(s) manifold = Rotations(s, 1) # Solve the problem with pymanopt problem = Problem(manifold=manifold, cost=cost) # get What = Vs^T P S Wopt = solver.solve(problem) # get Ahat and Xhat such that Zc = Ahat Xhat # get Ahat, which is actually = A Cxx^(1/2) P S Ahat = np.dot(Us * es, Wopt) # get Xhat, which is actually = S^-1 P^-1 Xc^w # Xhat = np.tensordot(la.inv(Wopt), Y, axes=(1, 0)) # Wopt is orthogonal, so Wopt.T = la.inv(Wopt) Xhat = np.tensordot(Wopt.T, Y, axes=(1, 0)) # assert np.allclose(Zc, np.tensordot(Ahat, Xhat, axes=(1, 0))), 'Something may be wrong as Zc != Ahat Xhat' # re=order Xhat and Ahat, from more non-Gaussian to more Gaussian Fhat_values = np.array([Fhat(Xhat[i]) for i in range(s)]) inds = np.argsort(Fhat_values) Ahat = Ahat[:, inds] Xhat = Xhat[inds] # assert np.allclose(Zc, np.tensordot(Ahat, Xhat, axes=(1, 0))), 'Something may be wrong as Zc != Ahat Xhat' if return_Fhat: return Ahat, Xhat, Fhat_values[inds] return Ahat, Xhat
def fit(self, T, Y, init, maxIter=100): self.init_fit(T, Y, None) D = self.D + self.L K = self.K # (1) Instantiate the manifold manifold = Product([PositiveDefinite(D + 1, k=K), Euclidean(K - 1)]) cost = self.get_cost_function(T, Y) problem = Problem(manifold=manifold, cost=cost, verbosity=1) # (3) Instantiate a Pymanopt solver solver = SteepestDescent(maxiter=3 * maxIter) # let Pymanopt do the rest Xopt = solver.solve(problem) self.Xopt_to_theta(Xopt)
def estimate_dominant_eigenvector(matrix): """Returns the dominant eigenvector of the symmetric matrix A by minimizing the Rayleigh quotient -x' * A * x / (x' * x). """ num_rows, num_columns = gs.shape(matrix) assert num_rows == num_columns, 'matrix must be square' assert gs.allclose(gs.sum(matrix - gs.transpose(matrix)), 0.0), \ 'matrix must be symmetric' def cost(vector): return -gs.dot(vector, gs.dot(matrix, vector)) def egrad(vector): return -2 * gs.dot(matrix, vector) sphere = GeomstatsSphere(num_columns) problem = Problem(manifold=sphere, cost=cost, egrad=egrad) solver = SteepestDescent() return solver.solve(problem)
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): n = 3 m = 10 k = 10 A = np.random.randn(k, n, m) B = np.random.randn(k, n, m) ABt = np.array([Ak @ Bk.T for Ak, Bk in zip(A, B)]) cost, egrad = create_cost_egrad(backend, ABt) manifold = SpecialOrthogonalGroup(n, k) problem = pymanopt.Problem(manifold, cost, egrad=egrad) if quiet: problem.verbosity = 0 solver = SteepestDescent() X = solver.solve(problem) if not quiet: Xopt = np.array([compute_optimal_solution(ABtk) for ABtk in ABt]) print("Frobenius norm error:", np.linalg.norm(Xopt - X))
def estimate_dominant_eigenvector(matrix): """Returns the dominant eigenvector of the symmetric matrix A by minimizing the Rayleigh quotient -x' * A * x / (x' * x). """ num_rows, num_columns = gs.shape(matrix) if num_rows != num_columns: raise ValueError('Matrix must be square.') if not gs.allclose(gs.sum(matrix - gs.transpose(matrix)), 0.0): raise ValueError('Matrix must be symmetric.') @pymanopt.function.Callable def cost(vector): return -gs.dot(vector, gs.dot(matrix, vector)) @pymanopt.function.Callable def egrad(vector): return -2 * gs.dot(matrix, vector) sphere = GeomstatsSphere(num_columns) problem = pymanopt.Problem(manifold=sphere, cost=cost, egrad=egrad) solver = SteepestDescent() return solver.solve(problem)
def get_rotation_matrix(Mt, Ms, metric='euc'): Mt = Mt.reshape(-1, *Mt.shape[-2:]) Ms = Ms.reshape(-1, *Ms.shape[-2:]) n = Mt[0].shape[0] manifolds = Rotations(n) if metric == 'euc': cost = partial(_procruster_cost_function_euc, Mt=Mt, Ms=Ms) problem = Problem(manifold=manifolds, cost=cost, verbosity=0) elif metric == 'rie': cost = partial(_procruster_cost_function_rie, Mt=Mt, Ms=Ms) egrad = partial(_procruster_egrad_function_rie, Mt=Mt, Ms=Ms) problem = Problem(manifold=manifolds, cost=cost, egrad=egrad, verbosity=0) solver = SteepestDescent(mingradnorm=1e-3) Ropt = solver.solve(problem) return Ropt
def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0): """ Wasserstein Discriminant Analysis [11]_ The function solves the following optimization problem: .. math:: P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)} where : - :math:`P` is a linear projection operator in the Stiefel(p,d) manifold - :math:`W` is entropic regularized Wasserstein distances - :math:`X^i` are samples in the dataset corresponding to class i Parameters ---------- X : numpy.ndarray (n,d) Training samples y : np.ndarray (n,) labels for training samples p : int, optional size of dimensionnality reduction reg : float, optional Regularization term >0 (entropic regularization) solver : str, optional None for steepest decsent or 'TrustRegions' for trust regions algorithm else shoudl be a pymanopt.sovers verbose : int, optional Print information along iterations Returns ------- P : (d x p) ndarray Optimal transportation matrix for the given parameters proj : fun projectiuon function including mean centering References ---------- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. """ mx = np.mean(X) X -= mx.reshape((1, -1)) # data split between classes d = X.shape[1] xc = split_classes(X, y) # compute uniform weighs wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc] def cost(P): # wda loss loss_b = 0 loss_w = 0 for i, xi in enumerate(xc): xi = np.dot(xi, P) for j, xj in enumerate(xc[i:]): xj = np.dot(xj, P) M = dist(xi, xj) G = sinkhorn(wc[i], wc[j + i], M, reg, k) if j == 0: loss_w += np.sum(G * M) else: loss_b += np.sum(G * M) # loss inversed because minimization return loss_w / loss_b # declare manifold and problem manifold = Stiefel(d, p) problem = Problem(manifold=manifold, cost=cost) # declare solver and solve if solver is None: solver = SteepestDescent(maxiter=maxiter, logverbosity=verbose) elif solver in ['tr', 'TrustRegions']: solver = TrustRegions(maxiter=maxiter, logverbosity=verbose) Popt = solver.solve(problem) def proj(X): return (X - mx.reshape((1, -1))).dot(Popt) return Popt, proj
import autograd.numpy as np from pymanopt import Problem from pymanopt.solvers import SteepestDescent from pymanopt.manifolds import Stiefel import pprint if __name__ == "__main__": # Generate random data with highest variance in first 2 dimensions X = np.diag([3, 2, 1]).dot(np.random.randn(3, 200)) # Cost function is the squared reconstruction error def cost(w): return np.sum(np.sum((X - np.dot(w, np.dot(w.T, X)))**2)) solver = SteepestDescent(logverbosity=2) # Projection matrices onto a two dimensional subspace manifold = Stiefel(3, 2) # Solve the problem with pymanopt problem = Problem(manifold=manifold, cost=cost, verbosity=0) wopt, optlog = solver.solve(problem) print('And here comes the optlog:\n\r') pp = pprint.PrettyPrinter() pp.pprint(optlog)
def run(self, x_init, max_rep=400): problem = Problem(manifold=self.manifold, cost=self.cost_function) solver = SteepestDescent() self.x_opt = solver.solve(problem) return self.x_opt
def __init__(self, M: Manifold, Y, param, degrees, iscycle=False, P_init=None, verbosity=2, maxtime=100000, maxiter=100, mingradnorm=1e-6, minstepsize=1e-10, maxcostevals=5000): """Compute regression with Bézier splines for data in a manifold M using pymanopt. :param M: manifold :param Y: array containing M-valued data. :param param: vector with scalars between 0 and the number of intended segments corresponding to the data points in Y. The integer part determines the segment to which the data point belongs. :param degrees: vector of length L; the l-th entry is the degree of the l-th segment of the spline. All entries must be positive. For a closed spline, L > 1, degrees[0] > 2 and degrees[-1] > 2 must hold. :param iscycle: boolean that determines whether a closed curve C1 spline shall be modeled. :param P_init: initial guess :param verbosity: 0 is silent to gives the most information, see pymanopt's problem class :param maxtime: maximum time for steepest descent :param maxiter: maximum number of iterations in steepest descent :param mingradnorm: stop iteration when the norm of the gradient is lower than mingradnorm :param minstepsize: stop iteration when step the stepsize is smaller than minstepsize :param maxcostevals: maximum number of allowed cost evaluations :return P: list of control points of the optimal Bézier spline """ degrees = np.atleast_1d(degrees) self._M = M self._Y = Y self._param = param pymanoptM = ManoptWrap(M) # Cost def cost(P): P = np.stack(P) control_points = self.full_set(M, P, degrees, iscycle) return self.sumOfSquared( BezierSpline(M, control_points, iscycle=iscycle), Y, param) #MMM = Product([M for i in range(degrees[0])]) # for conjugated gradient # Gradient def grad(P): P = np.stack(P) control_points = self.full_set(M, P, degrees, iscycle) grad_E = self.gradSumOfSquared( BezierSpline(M, control_points, iscycle=iscycle), Y, param) grad_E = self.indep_set(grad_E, iscycle) # return _ProductTangentVector([grad_E[0][i] for i in range(len(grad_E[0]))]) # for conjugated gradient return np.concatenate(grad_E) # Solve optimization problem with pymanopt by optimizing over independent control points if iscycle: N = Product([pymanoptM] * np.sum(degrees - 1)) else: N = Product([pymanoptM] * (np.sum(degrees - 1) + 2)) problem = Problem(manifold=N, cost=cost, grad=grad, verbosity=verbosity) # solver = ConjugateGradient(maxtime=maxtime, maxiter=maxiter, mingradnorm=mingradnorm, # minstepsize=minstepsize, maxcostevals=maxcostevals, logverbosity=2) solver = SteepestDescent(maxtime=maxtime, maxiter=maxiter, mingradnorm=mingradnorm, minstepsize=minstepsize, maxcostevals=maxcostevals, logverbosity=2) if P_init is None: P_init = self.initControlPoints(M, Y, param, degrees, iscycle) P_init = self.indep_set(P_init, iscycle) P_opt, opt_log = solver.solve(problem, list(np.concatenate(P_init))) P_opt = self.full_set(M, np.stack(P_opt, axis=0), degrees, iscycle) self._spline = BezierSpline(M, P_opt, iscycle=iscycle) self._unexplained_variance = opt_log['final_values']["f(x)"] / len(Y)
def RidgeAlternating(X, f, U0, degree=1, maxiter=100, tol=1e-10, history=False, disp=False, gtol=1e-6, inner_iter=20): if len(f.shape) == 1: f = f.reshape(-1, 1) # Instantiate the polynomial approximation rs = PolynomialApproximation(N=degree) # Instantiate the Grassmann manifold m, n = U0.shape manifold = Grassmann(m, n) if history: hist = {} hist['U'] = [] hist['residual'] = [] hist['inner_steps'] = [] # Alternating minimization i = 0 res = 1e9 while i < maxiter and res > tol: # Train the polynomial approximation with projected points Y = np.dot(X, U0) rs.train(Y, f) # Minimize residual with polynomial over Grassmann func = lambda y: _res(y, X, f, rs) grad = lambda y: _dres(y, X, f, rs) problem = Problem(manifold=manifold, cost=func, egrad=grad, verbosity=0) if history: solver = SteepestDescent(logverbosity=1, mingradnorm=gtol, maxiter=inner_iter, minstepsize=tol) U1, log = solver.solve(problem, x=U0) else: solver = SteepestDescent(logverbosity=0, mingradnorm=gtol, maxiter=inner_iter, minstepsize=tol) U1 = solver.solve(problem, x=U0) # Evaluate and store the residual res = func(U1) # This is the squared mismatch if history: hist['U'].append(U1) # To match the rest of code, we define the residual as the mismatch r = (f - rs.predict(Y)[0]).flatten() hist['residual'].append(r) hist['inner_steps'].append(log['final_values']['iterations']) if disp: print "iter %3d\t |r| : %10.10e" % (i, np.linalg.norm(res)) # Update iterators U0 = U1 i += 1 # Store data if i == maxiter: exitflag = 1 else: exitflag = 0 if history: return U0, hist else: return U0
def kindr(Ud, n_clusters, mansolver, init, tol_in, tol_out, max_iter_in, max_iter_out, disp, do_inner, post_SR, isnrm_row_U, isnrm_col_H, isbinary_H): if max_iter_out <= 0: raise ValueError('Number of iterations should be a positive number,' ' got %d instead' % max_iter_out) if tol_out <= 0: raise ValueError('The tolerance should be a positive number,' ' got %d instead' % tol_out) try: from pymanopt import Problem from pymanopt.manifolds import Stiefel, Rotations from pymanopt.solvers import SteepestDescent, ConjugateGradient, TrustRegions, NelderMead except ImportError: raise ValueError( "KindR needs pymanopt, which is unavailable, try KindAP instead.") # warnings.warn("KindR solver is unavailable. Transfer to" # "KindAP instead.") try: import autograd.numpy as anp except ImportError: warnings.warn( "Pymanopt needs autograd, which is unavailable,try KindAP instead." ) idx, center, gerr, numiter = kindap(Ud, n_clusters, init, tol_in, tol_out, max_iter_in, max_iter_out, disp, True, post_SR, isnrm_row_U, isnrm_col_H, isbinary_H) return idx, center, gerr, numiter n, d = Ud.shape k = n_clusters if d != k: warnings.warn('Provided more features, expected %d, got %d' % (k, d)) if isnrm_row_U: Ud = normalize(Ud, axis=1) # initialization if isinstance(init, string_types) and init == 'eye': # Z_0 = -np.identity(k) U = Ud[:, :k] elif hasattr(init, '__array__'): if init.shape[0] != d: raise ValueError( 'The row size of init should be the same as the total' 'features, got %d instead.' % init.shape[0]) if init.shape[1] != k: raise ValueError( 'The column size of init should be the same as the total' 'clusters, got %d instead.' % init.shape[1]) U = np.matmul(Ud, np.array(init)) elif isinstance(init, string_types) and init == 'random': H = sparse.csc_matrix((np.ones( (n, )), (np.arange(n), np.random.randint(0, k, (n, )))), shape=(n, k)) smat, sigma, vmat = la.svd(sparse.csc_matrix.dot(Ud.T, H), full_matrices=False) z_init = np.matmul(smat, vmat) U = np.matmul(Ud, z_init) else: raise ValueError( "The init parameter for KindAP should be 'eye','random'," "or an array. Got a %s with type %s instead." % (init, type(init))) if isinstance(do_inner, bool) or isinstance(do_inner, int): do_inner = bool(do_inner) elif isinstance(do_inner, string_types) and (do_inner in ["relu", "softmax"]): pass else: raise ValueError("Invalid put do_inner") H, N = sparse.csc_matrix((n, k)), sparse.csc_matrix((n, k)) dUH = float('inf') numiter, gerr = [], [] idx = np.ones(n) crit2 = np.zeros(4) def cost_n(rotation): return 0.5 * anp.sum(anp.minimum(anp.matmul(Ud, rotation), 0)**2) # def cost_softmax(rotation): # umat = anp.matmul(Ud, rotation) # exp_umat = anp.exp(umat) # mat = exp_umat / exp_umat.sum(axis=1).reshape(Ud.shape[0], 1) # return 0.5 * anp.sum((umat - mat) ** 2) for n_iter_out in range(max_iter_out): idxp, Up, Np, Hp = idx, U, N, H optlog = {} itr = 0 # inner iterations if do_inner: if d == k: manifold = Rotations(k) else: manifold = Stiefel(d, k) # if do_inner == "softmax": # cost = cost_softmax # else: # cost = cost_n problem = Problem(manifold=manifold, cost=cost_n, verbosity=0) if mansolver == "SD": solver = SteepestDescent(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) elif mansolver == "CG": solver = ConjugateGradient(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) elif mansolver == "TR": solver = TrustRegions(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) elif mansolver == "NM": solver = NelderMead(maxiter=max_iter_in, mingradnorm=tol_in, logverbosity=2) else: raise ValueError( "Undefined manifold optimization method, Expect SD, CG, TR or NM, " "get %s instead" % mansolver) Z, optlog = solver.solve(problem) U = np.matmul(Ud, Z) N = np.maximum(U, 0) itr = len(optlog['iterations']['iteration']) numiter.append(itr) if disp: print(optlog['iterations']['f(x)']) print(optlog['stoppingreason']) else: N = U numiter.append(itr) # project onto H H, idx = proj_h(N, isnrm_col_H, isbinary_H) idxchg = sum(idx != idxp) # project back to Ud U = proj_ud(H, Ud) dUHp = dUH dUH = la.norm(U - H, 'fro') gerr.append(dUH) if disp: print('Outer %3d: %3d dUH: %11.8e idxchg: %6d' % (n_iter_out + 1, itr, dUH, idxchg)) # stopping criteria of outer iterations crit2[0] = dUH < 1e-12 crit2[1] = abs(dUH - dUHp) < dUHp * tol_out crit2[2] = dUH > dUHp crit2[3] = idxchg == 0 if any(crit2): if post_SR and do_inner: do_inner, isbinary_H = False, True continue if crit2[2] and not crit2[1]: idx, H, U, N, dUH = idxp, Hp, Up, Np, dUHp if disp and optlog and 'stoppingreason' in optlog: print('\t stop reason:', optlog['stoppingreason']) break center = sparse.csc_matrix.dot( normalize((H != 0), axis=0, norm='l1').T, Ud) return idx, center, gerr, numiter
def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None): """ Wasserstein Discriminant Analysis [11]_ The function solves the following optimization problem: .. math:: P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)} where : - :math:`P` is a linear projection operator in the Stiefel(p,d) manifold - :math:`W` is entropic regularized Wasserstein distances - :math:`X^i` are samples in the dataset corresponding to class i Parameters ---------- X : numpy.ndarray (n,d) Training samples y : np.ndarray (n,) labels for training samples p : int, optional size of dimensionnality reduction reg : float, optional Regularization term >0 (entropic regularization) solver : str, optional None for steepest decsent or 'TrustRegions' for trust regions algorithm else shoudl be a pymanopt.solvers P0 : numpy.ndarray (d,p) Initial starting point for projection verbose : int, optional Print information along iterations Returns ------- P : (d x p) ndarray Optimal transportation matrix for the given parameters proj : fun projection function including mean centering References ---------- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. """ # noqa mx = np.mean(X) X -= mx.reshape((1, -1)) # data split between classes d = X.shape[1] xc = split_classes(X, y) # compute uniform weighs wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc] def cost(P): # wda loss loss_b = 0 loss_w = 0 for i, xi in enumerate(xc): xi = np.dot(xi, P) for j, xj in enumerate(xc[i:]): xj = np.dot(xj, P) M = dist(xi, xj) G = sinkhorn(wc[i], wc[j + i], M, reg, k) if j == 0: loss_w += np.sum(G * M) else: loss_b += np.sum(G * M) # loss inversed because minimization return loss_w / loss_b # declare manifold and problem manifold = Stiefel(d, p) problem = Problem(manifold=manifold, cost=cost) # declare solver and solve if solver is None: solver = SteepestDescent(maxiter=maxiter, logverbosity=verbose) elif solver in ['tr', 'TrustRegions']: solver = TrustRegions(maxiter=maxiter, logverbosity=verbose) Popt = solver.solve(problem, x=P0) def proj(X): return (X - mx.reshape((1, -1))).dot(Popt) return Popt, proj
def run(self, S, F, v=None, C=None, fs=None, omega=None, maxiter=500, tol=1e-10, variant='bp'): ''' Run MERLiN algorithm. Whether to run a scalar variant, i.e. S -> C -> w'F, or a timeseries variant, i.e. S -> C -> bp(w'F) is determined by the dimensionality of the input F. Input (default) - S (m x 1) np.array that contains the samples of S - F either a (d x m) np.array that contains the linear mixture samples or a (d x m x n) np.array that contains the linearly mixed timeseries of length n (d channels, m trials) - v (d x 1) np.array holding the linear combination that extracts middle node C from F - C (m x 1) np.array that contains the samples of the middle node C - fs sampling rate in Hz - omega tuple of (low, high) cut-off of desired frequency band - maxiter (500) maximum iterations to run the optimisation algorithm for - tol (1e-16) terminate optimisation if step size < tol or grad norm < tol - variant ('bp') determines which MERLiN variant to use on timeseries data ('bp' = MERLiNbp algorithm ([1], Algorithm 4), 'bpicoh' = MERLiNbpicoh algorithm ([1], Algorithm 5), 'nlbp' = MERLiNnlbp) Output - w linear combination that was found and should extract the effect of C from F - converged boolean that indicates whether the stopping criterion was met before the maximum number of iterations was performed - curob objecive functions value at w ''' self._S = S self._Forig = F self._fs = fs self._omega = omega self._d = F.shape[0] self._m = F.shape[1] # scalar or timeseries mode if F.ndim == 3: self._mode = 'timeseries' self._n = F.shape[2] if not (fs and omega): raise ValueError('Both the optional arguments fs and omega ' 'need to be provided.') if self._verbosity: print('Launching MERLiN' + variant + ' for iid sampled ' 'timeseries chunks.') elif F.ndim == 2: self._mode = 'scalar' if self._verbosity: print('Launching MERLiN for iid sampled scalars.') else: raise ValueError('F needs to be a 2-dimensional numpy array ' '(iid sampled scalars) or a 3-dimensional ' 'numpy array (iid sampled timeseries chunks).') self._prepare(v, C) if self._mode is 'scalar': problem = self._problem_MERLiN() elif variant is 'bp': problem = self._problem_MERLiNbp() elif variant is 'bpicoh': problem = self._problem_MERLiNbpicoh() elif variant is 'nlbp': problem = self._problem_MERLiNnlbp() else: raise NotImplementedError if variant is not 'nlbp': problem.manifold = Sphere(self._d, 1) elif variant is 'nlbp': problem.manifold = Product( [Sphere(self._d, 1), Euclidean(1, 1), Euclidean(1, 1)]) # choose best out of ten 10-step runs as initialisation solver = SteepestDescent(maxiter=10, logverbosity=1) res = [solver.solve(problem) for k in range(0, 10)] obs = [-r[1]['final_values']['f(x)'] for r in res] w0 = res[obs.index(max(obs))][0] solver = SteepestDescent(maxtime=float('inf'), maxiter=maxiter, mingradnorm=tol, minstepsize=tol, logverbosity=1) if self._verbosity: print('Running optimisation algorithm.') w, info = solver.solve(problem, x=w0) if variant is 'nlbp': w = w[0] converged = maxiter != info['final_values']['iterations'] curob = -float(info['final_values']['f(x)']) if self._verbosity: print('DONE.') return self._P.T.dot(w), converged, curob
class Cluster_Pblm: """ A class which holds all of the data needed for running the clustering problems Also has methods for the common functions we use, and has our algorithms. This is useful because we want to use P and the precomputed vector of the norms of points, instead of the distance matrix, for computing various things. """ def __init__(self, P, k, testing=True): """ P is a (d x n) matrix holding our points. k is the number of clusters expected If testing is true then we do extra computations which are useful but not computationally necessary. """ self.P = P #Matrix of points self.nu = np.sum(P**2, axis=0)[:,None] #Matrix of pts' norms. self.d = P.shape[0] #Dimension the points lie in self.n = P.shape[1] #number of points self.k = k #number of clusters self.M = Y_mani(self.n, self.k) #"Y" manifold corresponding to our problem. #NB it may be quicker to use "sums" and such strictly instead of the ones matrix self.one = np.ones((self.n,1)) #Matrix of all ones. #The solver we use for gradient descent. #NB there may be some better way to set the settings, #Perhaps dynamically depending on the data. self.solver = SteepestDescent(maxiter=MAXITER, logverbosity = 1, mingradnorm = MINGRADNORM) if testing: #Computationally expensive, and not needed in final code. self.D = dist_mat(P)**2 #Distance squared matrix #Slow way which is replaced by the way below. #self.Dsize = la.norm(self.D) self.Dsize = np.sqrt(2*self.n*(self.nu**2).sum() + 2*(self.nu.sum())**2 + 4*la.norm(P.dot(P.T))**2 - 8*(P.T.dot(P.dot(self.nu)).sum())) def tr(self, Y): """ Returns tr(DYY^T) This code uses the computation which is linear in n. """ nu, P, one = self.nu, self.P, self.one term1 = 2*((one.T.dot(Y)).dot(Y.T.dot(nu)))[0,0] term2 = -2*np.sum(P.dot(Y)**2) return (term1 + term2) def gr_tr(self, Y): """ Returns the (euclidean) gradient of tr This code uses the computation which is linear in n. """ nu, P, one = self.nu, self.P, self.one return (2*(one.dot(nu.T.dot(Y)) + nu.dot(one.T.dot(Y))) - 4*P.T.dot(P.dot(Y))) def gr_tr_projected(self, Y): """ Returns the M gradient of tr """ W = self.gr_tr(Y) return self.M.proj(Y, W) def neg(self, Y): """ Returns the norm of the negative part of Y. """ negpt = Y*(Y<0) return (negpt**2).sum() def gr_neg(self, Y): """ Returns the (euclidean) gradient of the negative part of Y """ return 2*Y*(Y<0) def fn_weighted(self, a, b): """ Returns a function which computes a*neg(Y) + b*tr(Y) """ return lambda Y: a*self.tr(Y) + b*self.neg(Y) def gr_weighted(self, a, b): """ Returns a function which computes a*gr_neg(Y) + b*gr_tr(Y) """ return lambda Y: a*self.gr_tr(Y) + b*self.gr_neg(Y) def run_minimization(self, a, b, Y0 = None, testing=True): """ Optimizes the problem a*tr(Y) + b*neg(Y) If Y0 is given then this runs gradient descent starting from Y0, otherwise it starts from a random point. Returns the Y value, together with the log information from the solver. if testing=True then it also prints the number of iterations needed for convergence, which is nice for watching the algorithm run and understanding its difficulties. """ cst = self.fn_weighted(a,b) grad = self.gr_weighted(a,b) pblm = mo.Problem(manifold = self.M, cost = self.fn_weighted(a,b), egrad = self.gr_weighted(a,b), verbosity = 0) Y,log = self.solver.solve(pblm, x=Y0) print("Number of Iterations: " + str(log['final_values']['iterations'])) return Y,log def run_lloyd(self): """ Run Lloyd's algorithm from sklearn. Return the Y matrix corresponding to the clustering given. """ #Note this method expects the transpose of our T matrix. clustering = KMeans(n_clusters = self.k).fit(self.P.T) clusters = clustering.labels_ Y = np.zeros((self.n, self.k)) cts = collections.Counter(clusters) for (pt, cluster) in zip(range(self.n), clusters): Y[pt, cluster] = 1/np.sqrt(cts[cluster]) return Y def do_path(self, As, Bs, smart_start = True, save=False): """ As and Bs are lists of a and b coefficients. Runs the minimization for the (a,b) pairs succesively, using the previous answer as the initial guess. If smart_start = True, Then the first minimization is done iwth (a,b) = (1,0) and the negative part is minimized as well. If save = True, Then the list of minimizing Y's is saved and returned """ if smart_start: #We first run the minimization of tr alone. Y0,_ = self.run_minimization(1, 0) #Next we minimize neg(Y) over all Y with the same X = YY^T Y = self.minneg_in_SOk(Y0) record = [Y] else: Y = None for (a,b) in zip(As,Bs): print("Current (a,b): " + str((a,b))) Y_prev = Y Y,_ = self.run_minimization(a, b, Y0 = Y) print("Clustering Change: " + str(la.norm(self.M.round_clustering(Y) - self.M.round_clustering(Y_prev)))) if save: record.append(Y) Y_last,_ = self.run_minimization(0, 1, Y0 = Y) record.append(Y_last) return (Y, record) def minneg_in_SOk(self, Y0): """ Minimizes the negative part of $Y_0 Q$ over $Q \in SO(k)$. """ def cost(Q): Y = Y0.dot(Q) return self.neg(Y) def cost_grad(Q): Y = Y0.dot(Q) return Y0.transpose().dot(Y*(Y<0)) k = Y0.shape[1] SOk = Stiefel(k,k) pblm = mo.Problem(manifold = SOk, cost = cost, egrad = cost_grad, verbosity=0) Q,log = self.solver.solve(pblm) return Y0.dot(Q)
def DR_geod_complex(X, m, verbosity=0): """ X: array of N points on Gr(n, p); N x n x p array aim to represent X by X_hat (N points on Gr(m, p), m < n) where X_hat_i = R^T X_i, W \in St(n, m) minimizing the projection error (using geodesic distance) """ N, n, p = X.shape Cgr = ComplexGrassmann(n, p, N) Cgr_low = Grassmann(m, p) Cgr_map = ComplexGrassmann(n, m) # n x m XXT = multiprod(X, multihconj(X)) @pymanopt.function.Callable def cost(Q): tmp = np.array([np.matmul(Q, Q.T) for i in range(N)]) # N x n x n new_X = multiprod(tmp, X) # N x n x p q = np.array([qr(new_X[i])[0] for i in range(N)]) d2 = Cgr.dist(X, q)**2 return d2/N @pymanopt.function.Callable def egrad(Q): """ need to be fixed """ QQ = np.matmul(Q, multihconj(Q)) tmp = np.array([QQ for i in range(N)]) XQQX = multiprod(multiprod(multihconj(X), tmp), X) lam, V = np.linalg.eigh(XQQX) theta = np.arccos(np.sqrt(lam)) d = -2*theta/(np.cos(theta)*np.sin(theta)) Sig = np.array([np.diag(dd) for dd in d]) XV = multiprod(X,V) eg = multiprod(XV, multiprod(Sig, multitransp(XV.conj()))) eg = np.mean(eg, axis = 0) eg = np.matmul(eg, Q) return eg def egrad_num(R, eps = 1e-8+1e-8j): """ compute egrad numerically """ g = np.zeros(R.shape, dtype=np.complex128) for i in range(n): for j in range(m): R1 = R.copy() R2 = R.copy() R1[i,j] += eps R2[i,j] -= eps g[i,j] = (cost(R1) - cost(R2))/(2*eps) return g # solver = ConjugateGradient() solver = SteepestDescent() problem = Problem(manifold=Cst, cost=cost, egrad=egrad, verbosity=verbosity) Q_proj = solver.solve(problem) tmp = np.array([multihconj(Q_proj) for i in range(N)]) X_low = multiprod(tmp, X) X_low = X_low/np.expand_dims(np.linalg.norm(X_low, axis=1), axis = 2) M_hat = compute_centroid(Cgr_low, X_low) v_hat = var(Cgr_low, X_low, M_hat)/N var_ratio = v_hat/v return var_ratio, X_low, Q_proj
def spatial_envelope(X_env, Y_env, si, theta, u, thershold): #const: #n,p,r #H,G #beta_MLE #si: matrix of loacation n, p, r = X_env.shape[0], X_env.shape[1], Y_env.shape[1] H = Y_env - np.kron( np.mean(Y_env, axis=0).reshape(1, r), np.repeat(1, n).reshape(n, 1)) G = X_env - np.kron( np.mean(X_env, axis=0).reshape(1, p), np.repeat(1, n).reshape(n, 1)) linear_model = LinearRegression().fit(X_env, Y_env) err = Y_env - linear_model.predict(X_env) beta_MLE = linear_model.coef_ # changable # judge, iter_count, err_count, theta # V0_hat, V1_hat, PV0_hat, PV1_hat # Sigma_Y, Sigma_res # rho_h_theta Sigma_res = np.cov(err.T) Sigma_Y = np.cov(Y_env.T) judge = True iter_count = 0 cal_thershold, min_thershold = thershold, thershold prot = 1 err_count = 0 while judge: try: if err_count > 10: print("Start new") theta = np.random.uniform(0, 0.5, 2) err_count = 0 # Step1: Optimize on Gamma to get V0,V1,PV0,PV1 def cost(Gamma): X = np.matmul(Gamma, Gamma.T) out = -np.log( np.linalg.det( np.matmul(np.matmul(X, Sigma_res), X) + np.matmul(np.matmul(np.eye(r) - X, Sigma_Y), np.eye(r) - X))) return (np.array(out)) manifold = Grassmann(r, u) #manifold = Stiefel(r,u) problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = SteepestDescent() Gamma = solver.solve(problem) PV1_hat = np.matmul(Gamma, Gamma.T) PV0_hat = np.eye(r) - PV1_hat V1_hat = np.matmul(np.matmul(PV1_hat, Sigma_Y), PV1_hat) V0_hat = np.matmul(np.matmul(PV1_hat, Sigma_res), PV1_hat) # Step2: Optimize on theta def theta_fun(theta): rho_h_theta = np.array(rho(si, theta)) item1 = np.matmul( sqrtm(np.linalg.inv(rho_h_theta).real).real, G) project = lambda x: np.eye(n) - np.matmul( np.matmul(x, np.linalg.inv(np.matmul(x.T, x)).real), x.T) item2 = np.matmul( np.matmul(project(item1), sqrtm(np.linalg.inv(rho_h_theta).real).real), H) item3 = np.matmul( np.matmul(item2, np.linalg.pinv(V1_hat).real), item2.T) item4 = np.matmul( sqrtm(np.linalg.inv(rho_h_theta).real).real, H) item5 = np.matmul( np.matmul(item4, np.linalg.pinv(V0_hat).real), item4.T) loss = r * np.linalg.det(rho_h_theta) + 0.5 * np.trace(item3 + item5) return (loss) # print("Theta: {}".format(theta)) opt_res = minimize(theta_fun, theta, method="BFGS") # print("Pass") weight = max(min(1, 1 / cal_thershold), (thershold / prot)**(1 - 1 / (iter_count + 1))) theta_opt = np.abs(np.array(opt_res.x)) theta_new = (1 - weight) * theta + weight * theta_opt theta = theta_new # theta = np.array(opt_res.x) # Step3 update Sigma_Y, Sigma_Res based on theta rho_h_theta = np.array(rho(si, theta)) term1 = np.matmul(np.matmul(H.T, np.linalg.inv(rho_h_theta).real), H) term2 = np.matmul(np.matmul(G.T, np.linalg.inv(rho_h_theta).real), H) term3 = np.matmul(np.matmul(G.T, np.linalg.inv(rho_h_theta).real), G) Sigma_Y = term1 Sigma_res = term1 - np.matmul( np.matmul(term2.T, np.linalg.inv(term3).real), term2) if iter_count == 0: iter_count += 1 oldV0_hat, oldV1_hat, old_theta = V0_hat, V1_hat, theta continue # print("Before thershold") cal_thershold = np.sum((oldV1_hat - V1_hat)**2) + np.sum( (oldV0_hat - V0_hat)**2) + np.sum((old_theta - theta)**2) print("Gap: {}, Theta: {}, weight: {}".format( cal_thershold, theta, weight)) if cal_thershold < thershold: judge = False min_thershold = min(min_thershold, cal_thershold) prot = cal_thershold / min_thershold oldV0_hat, oldV1_hat, old_theta = V0_hat, V1_hat, theta iter_count += 1 except: err_count += 1 theta = theta + np.array([ randint(-10, 10) * thershold * prot, randint(-10, 10) * thershold * prot ]) X_env = X_env + np.random.normal(0, 1e-6, n * p).reshape(n, p) Y_env = Y_env + np.random.normal(0, 1e-6, n * r).reshape(n, r) continue beta_final = np.matmul(PV1_hat, beta_MLE) Y_bar = np.mean(Y_env, axis=0) X_bar = np.mean(X_env, axis=0) alpha_final = Y_bar - np.matmul(X_bar, beta_final.T) output = (alpha_final.reshape(1, r), beta_final.reshape(p, r)) # print("stop, iter = {}".format(iter_count+err_count)) return (output)
Wnum = np.dot(Wo, Wcd) Wden = np.dot(Wo, Wcd - Sigma_norm_B) num = np.linalg.det(Wnum / sigma2 + I) den = np.linalg.det(Wden / sigma2 + I) return -np.log2(num / den) / (2. * T) # instantiate manifold for Pymanopt manifold_fact = Euclidean(n, n) # instantiate problem for Pymanopt problem = Problem(manifold=manifold_fact, cost=shannon_rate, verbosity=0) # instantiate solver for Pymanopt solver = SteepestDescent() # let Pymanopt do the rest L_opt = solver.solve(problem) # optimal covariance Sigma^* Sigma_opt = np.dot(L_opt, L_opt.transpose()) Sigma_opt_norm = Sigma_opt / (np.trace(Sigma_opt)) # value of transmission rate rate = -shannon_rate(L_opt) print "Shannon transmission rate:", rate rate_vec[k] = rate rate_data[k, pp] = rate k = k + 1 # plot transmission rate vs. transmission window plt.plot(T_vec, rate_vec, 'bo')
def wda(X, y, p=2, reg=1, k=10, solver=None, sinkhorn_method='sinkhorn', maxiter=100, verbose=0, P0=None, normalize=False): r""" Wasserstein Discriminant Analysis :ref:`[11] <references-wda>` The function solves the following optimization problem: .. math:: \mathbf{P} = \mathop{\arg \min}_\mathbf{P} \quad \frac{\sum\limits_i W(P \mathbf{X}^i, P \mathbf{X}^i)}{\sum\limits_{i, j \neq i} W(P \mathbf{X}^i, P \mathbf{X}^j)} where : - :math:`P` is a linear projection operator in the Stiefel(`p`, `d`) manifold - :math:`W` is entropic regularized Wasserstein distances - :math:`\mathbf{X}^i` are samples in the dataset corresponding to class i **Choosing a Sinkhorn solver** By default and when using a regularization parameter that is not too small the default sinkhorn solver should be enough. If you need to use a small regularization to get sparse cost matrices, you should use the :py:func:`ot.dr.sinkhorn_log` solver that will avoid numerical errors, but can be slow in practice. Parameters ---------- X : ndarray, shape (n, d) Training samples. y : ndarray, shape (n,) Labels for training samples. p : int, optional Size of dimensionnality reduction. reg : float, optional Regularization term >0 (entropic regularization) solver : None | str, optional None for steepest descent or 'TrustRegions' for trust regions algorithm else should be a pymanopt.solvers sinkhorn_method : str method used for the Sinkhorn solver, either 'sinkhorn' or 'sinkhorn_log' P0 : ndarray, shape (d, p) Initial starting point for projection. normalize : bool, optional Normalise the Wasserstaiun distance by the average distance on P0 (default : False) verbose : int, optional Print information along iterations. Returns ------- P : ndarray, shape (d, p) Optimal transportation matrix for the given parameters proj : callable Projection function including mean centering. .. _references-wda: References ---------- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. """ # noqa if sinkhorn_method.lower() == 'sinkhorn': sinkhorn_solver = sinkhorn elif sinkhorn_method.lower() == 'sinkhorn_log': sinkhorn_solver = sinkhorn_log else: raise ValueError("Unknown Sinkhorn method '%s'." % sinkhorn_method) mx = np.mean(X) X -= mx.reshape((1, -1)) # data split between classes d = X.shape[1] xc = split_classes(X, y) # compute uniform weighs wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc] # pre-compute reg_c,c' if P0 is not None and normalize: regmean = np.zeros((len(xc), len(xc))) for i, xi in enumerate(xc): xi = np.dot(xi, P0) for j, xj in enumerate(xc[i:]): xj = np.dot(xj, P0) M = dist(xi, xj) regmean[i, j] = np.sum(M) / (len(xi) * len(xj)) else: regmean = np.ones((len(xc), len(xc))) @Autograd def cost(P): # wda loss loss_b = 0 loss_w = 0 for i, xi in enumerate(xc): xi = np.dot(xi, P) for j, xj in enumerate(xc[i:]): xj = np.dot(xj, P) M = dist(xi, xj) G = sinkhorn_solver(wc[i], wc[j + i], M, reg * regmean[i, j], k) if j == 0: loss_w += np.sum(G * M) else: loss_b += np.sum(G * M) # loss inversed because minimization return loss_w / loss_b # declare manifold and problem manifold = Stiefel(d, p) problem = Problem(manifold=manifold, cost=cost) # declare solver and solve if solver is None: solver = SteepestDescent(maxiter=maxiter, logverbosity=verbose) elif solver in ['tr', 'TrustRegions']: solver = TrustRegions(maxiter=maxiter, logverbosity=verbose) Popt = solver.solve(problem, x=P0) def proj(X): return (X - mx.reshape((1, -1))).dot(Popt) return Popt, proj