def rand_basis_with1(n, k): """Return a random o.n. basis containing the ones vector""" one = anp.ones((n, 1)) stief = Stiefel(n, k-1) #Gives us a basis with Z = stief.rand() Y = sla.orth(anp.hstack((one, Z))) return Y
def setUp(self): self.m = m = 20 self.n = n = 2 self.k = k = 1 self.man = Stiefel(m, n, k=k) self.proj = lambda x, u: u - npa.dot(x, npa.dot(x.T, u) + npa.dot(u.T, x)) / 2
def __init__(self, dataPtr, lambda1=1e-2, rank=10): """Initialize parameters Args: dataPtr (DataPtr): An object of which contains X, Z side features and target matrix Y. lambda1 (uint): Regularizer. rank (uint): rank of the U, B, V parametrization. """ self.dataset = dataPtr self.X = self.dataset.get_entity("row") self.Z = self.dataset.get_entity("col") self.rank = rank self._loadTarget() self.shape = (self.X.shape[0], self.Z.shape[0]) self.lambda1 = lambda1 self.nSamples = self.Y.data.shape[0] self.W = None self.optima_reached = False self.manifold = Product([ Stiefel(self.X.shape[1], self.rank), SymmetricPositiveDefinite(self.rank), Stiefel(self.Z.shape[1], self.rank), ])
def test_inferer_infer(dataPtr): test_data = dataPtr rowFeatureDim = test_data.get_entity("row").shape[1] colFeatureDim = test_data.get_entity("col").shape[1] rank = 2 W = [ Stiefel(rowFeatureDim, rank).rand(), PositiveDefinite(rank).rand(), Stiefel(colFeatureDim, rank).rand(), ] Inferer(method="dot").infer(test_data, W) inference = Inferer(method="dot", transformation="mean").infer(test_data, W) nOccurences = collections.Counter(inference.ravel()) assert nOccurences[0] + nOccurences[1] == inference.size k = 2 inference = Inferer(method="dot", k=k, transformation="topk").infer(test_data, W) nOccurences = collections.Counter(inference.ravel()) assert nOccurences[0] + nOccurences[1] == inference.size assert np.max(np.count_nonzero(inference == 1, axis=0)) <= k
def setUp(self): self.m = m = 10 self.n = n = 3 self.k = k = 3 self.manifold = Stiefel(m, n, k=k) self.manifold_polar = Stiefel(m, n, k=k, retraction="polar") super().setUp()
def setUp(self): self.m = m = 20 self.n = n = 2 self.k = k = 1 self.manifold = Stiefel(m, n, k=k) self.manifold_polar = Stiefel(m, n, k=k, retraction="polar") self.projection = lambda x, u: u - x @ (x.T @ u + u.T @ x) / 2 super().setUp()
def __init__(self, m, n, k): self._m = m self._n = n self._k = k self._name = ("Manifold of {m}-by-{n} matrices with rank {k} and " "embedded geometry".format(m=m, n=n, k=k)) self._stiefel_m = Stiefel(m, k) self._stiefel_n = Stiefel(n, k)
def run(quiet=True): dimension = 3 num_samples = 200 num_components = 2 samples = np.random.randn(num_samples, dimension) @ np.diag([3, 2, 1]) samples -= samples.mean(axis=0) samples_ = torch.from_numpy(samples) @pymanopt.function.PyTorch def cost(w): projector = torch.matmul(w, torch.transpose(w, 1, 0)) return torch.norm(samples_ - torch.matmul(samples_, projector)) ** 2 manifold = Stiefel(dimension, num_components) problem = pymanopt.Problem(manifold, cost, egrad=None, ehess=None) if quiet: problem.verbosity = 0 solver = TrustRegions() # from pymanopt.solvers import ConjugateGradient # solver = ConjugateGradient() estimated_span_matrix = solver.solve(problem) if quiet: return estimated_projector = estimated_span_matrix @ estimated_span_matrix.T eigenvalues, eigenvectors = np.linalg.eig(samples.T @ samples) indices = np.argsort(eigenvalues)[::-1][:num_components] span_matrix = eigenvectors[:, indices] projector = span_matrix @ span_matrix.T print("Frobenius norm error between estimated and closed-form projection " "matrix:", np.linalg.norm(projector - estimated_projector))
def fit(self): v_matrix_shape = (self.w_matrix.shape[0], self.w_matrix.shape[1]) w_matrix = tf.convert_to_tensor(self.w_matrix, dtype=tf.float64) z_matrix = tf.convert_to_tensor(self.z_matrix, dtype=tf.float64) x_matrix = tf.convert_to_tensor(self.x_matrix, dtype=tf.float64) lambda_matrix = tf.convert_to_tensor(self.lambda_matrix, dtype=tf.float64) x = tf.Variable( initial_value=tf.ones(v_matrix_shape, dtype=tf.dtypes.float64)) cost = tf.norm(x_matrix - tf.linalg.matmul( tf.linalg.matmul(x, lambda_matrix), tf.transpose(x)) ) + self.rho / 2 * tf.norm(x - w_matrix + z_matrix) manifold = Stiefel(v_matrix_shape[0], v_matrix_shape[1]) problem = Problem(manifold=manifold, cost=cost, arg=x) solver = SteepestDescent(logverbosity=self.verbosity) if self.verbosity > 2: v_optimal, _ = solver.solve(problem) else: v_optimal = solver.solve(problem) if self.verbosity > 2: print("==> WSubproblem ==> Showing v_optimal:") print(v_optimal) return v_optimal
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): dimension = 3 num_samples = 200 num_components = 2 samples = np.random.randn(num_samples, dimension) @ np.diag([3, 2, 1]) samples -= samples.mean(axis=0) cost, egrad, ehess = create_cost_egrad_ehess(backend, samples, num_components) manifold = Stiefel(dimension, num_components) problem = pymanopt.Problem(manifold, cost, egrad=egrad, ehess=ehess) if quiet: problem.verbosity = 0 solver = TrustRegions() # from pymanopt.solvers import ConjugateGradient # solver = ConjugateGradient() estimated_span_matrix = solver.solve(problem) if quiet: return estimated_projector = estimated_span_matrix @ estimated_span_matrix.T eigenvalues, eigenvectors = np.linalg.eig(samples.T @ samples) indices = np.argsort(eigenvalues)[::-1][:num_components] span_matrix = eigenvectors[:, indices] projector = span_matrix @ span_matrix.T print( "Frobenius norm error between estimated and closed-form projection " "matrix:", np.linalg.norm(projector - estimated_projector))
def optimize_AB(Cor11, Cor21, n, V11, V21, D11, D21, k): global D2 global V1 global V2 global Cor1 global Cor2 global k_ D2 = D21 V1 = V11 V2 = V21 Cor1 = Cor11 Cor2 = Cor21 k_ = k manifold = Stiefel(k, k) x0 = init_x0(Cor1, Cor2, n, V1, V2, D1, D2, k) # x0=np.load('zwischenspeicher/B.npy') problem = Problem(manifold=manifold, cost=cost) # (3) Instantiate a Pymanopt solver #solver = pymanopt.solvers.conjugate_gradient.ConjugateGradient(maxtime=10000, maxiter=10000) solver = pymanopt.solvers.trust_regions.TrustRegions( ) # maxtime=float('inf')) # let Pymanopt do the rest B = solver.solve(problem, x=x0) # print(B) # print(np.reshape(res.x[0:k*k_],(k_,k))[email protected](res.x[0:k*k_],(k_,k))) return B
def CGmanopt(X, objective_function, A, **kwargs): ''' Minimizes the objective function subject to the constraint that X.T * X = I_k using the conjugate gradient method Args: X: Initial 2D array of shape (n, k) such that X.T * X = I_k objective_function: Objective function F(X, A) to minimize. A: Additional parameters for the objective function F(X, A) Keyword Args: None Returns: Xopt: Value of X that minimizes the objective subject to the constraint. ''' manifold = Stiefel(X.shape[0], X.shape[1]) def cost(X): c, _ = objective_function(X, A) return c problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = ConjugateGradient(logverbosity=0) Xopt = solver.solve(problem) return Xopt, None
def fit(self): f = self.f X = self.X tol = self.tol d = self.d n = self.n current_best_residual = np.inf for r in range(self.restarts): print('restart %d' % r) M0 = np.linalg.qr(np.random.randn(self.d, self.n))[0] my_params = [Parameter(order=self.order, distribution='uniform', lower=-5, upper=5) for _ in range(n)] my_basis = Basis('total-order') my_poly_init = Poly(parameters=my_params, basis=my_basis, method='least-squares', sampling_args={'mesh': 'user-defined', 'sample-points': X @ M0, 'sample-outputs': f}) my_poly_init.set_model() c0 = my_poly_init.coefficients.copy() residual = self.cost(f, X, M0, c0) cauchy_length = self.cauchy_length residual_history = [] iter_ind = 0 M = M0.copy() c = c0.copy() while residual > tol: if self.verbosity == 2: print(residual) residual_history.append(residual) # Minimize over M func_M = lambda M_var: self.cost(f, X, M_var, c) grad_M = lambda M_var: self.dcostdM(f, X, M_var, c) manifold = Stiefel(d, n) solver = ConjugateGradient(maxiter=self.max_M_iters) problem = Problem(manifold=manifold, cost=func_M, egrad=grad_M, verbosity=0) M = solver.solve(problem, x=M) # Minimize over c func_c = lambda c_var: self.cost(f, X, M, c_var) grad_c = lambda c_var: self.dcostdc(f, X, M, c_var) res = minimize(func_c, x0=c, method='CG', jac=grad_c) c = res.x residual = self.cost(f, X, M, c) if iter_ind < cauchy_length: iter_ind += 1 elif np.abs(np.mean(residual_history[-cauchy_length:]) - residual)/residual < self.cauchy_tol: break if self.verbosity > 0: print('final residual on training data: %f' % self.cost(f, X, M, c)) if residual < current_best_residual: self.M = M self.c = c current_best_residual = residual
def __init__(self, Xs, Xt, A, lbda, rank, device=-1): self.Xs = Xs self.Xt = Xt self.A = A self.rank = rank self.lbda = lbda assert isinstance(self.Xs, torch.Tensor) assert isinstance(self.Xt, torch.Tensor) assert isinstance(self.A, torch.Tensor) self.device = device d1 = self.Xs.size(1) d2 = self.Xt.size(1) assert (d1 == rank == d2), f"Found dimensions {d1}, {rank}, {d2}" d = d1 self.manifold = Product( [Stiefel(d, d), PositiveDefinite(d), Stiefel(d, d)])
def __init__(self, Xs, Xt, device=-1): self.Xs = Xs self.Xt = Xt assert isinstance(self.Xs, torch.Tensor) assert isinstance(self.Xt, torch.Tensor) d1 = self.Xs.size(1) d2 = self.Xt.size(1) self.device = device assert d1 == d2, f"Error. Found different dims {d1}, {d2}" self.manifold = Product([Stiefel(d1, d2)])
def get_rotation_matrix(X, C): def cost(R): Z = npy.dot(X, R) M = npy.max(Z, axis=1, keepdims=True) return npy.sum((Z / M)**2) manifold = Stiefel(C, C) problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = SteepestDescent(logverbosity=0) opt = solver.solve(problem=problem, x=npy.eye(C)) return cost(opt), opt
def estimateR_weighted(S, W, D, R0): ''' estimates the update of the rotation matrix for the second part of the iterations :param S : shape :param W : heatmap :param D : weight of the heatmap :param R0 : rotation matrix :return: R the new rotation matrix ''' A = np.transpose(S) B = np.transpose(W) X0 = R0[0:2, :] store_E = Store() [m, n] = A.shape p = B.shape[1] At = np.zeros([n, m]) At = np.transpose(A) # we use the optimization on a Stiefel manifold because R is constrained to be othogonal manifold = Stiefel(n, p, 1) #################################################################################################################### def cost(X): ''' cost function of the manifold, the cost is trace(E'*D*E)/2 with E = A*X - B or store_E :param X : vector :return f : the cost ''' if store_E.stored is None: store_E.stored = np.dot(A, np.transpose(X)) - B E = store_E.stored f = np.trace(np.dot(np.transpose(E), np.dot(D, E))) / 2 return f #################################################################################################################### # setup the problem structure with manifold M and cost problem = Problem(manifold=manifold, cost=cost, verbosity=0) # setup the trust region algorithm to solve the problem TR = TrustRegions(maxiter=10) # solve the problem X = TR.solve(problem, X0) #print('X : ',X) return np.transpose(X) # return R = X'
def accumulate_gradients(opts, lr, batchSize, net, res, mmap=None): for l in range(len(net['layers']) - 1, -1, -1): if res['dzdw'][l] is not None: if 'learningRate' not in net['layers'][l]: net['layers'][l]['learningRate'] = 1 else: pass if 'weightDecay' not in net['layers'][l]: net['layers'][l]['weightDecay'] = 1 else: pass thisLR = lr * net['layers'][l]['learningRate'] if 'weight' in net['layers'][l]: if net['layers'][l]['type'] == 'bfc': W1 = net['layers'][l]['weight'] W1grad = (1. / batchSize) * res['dzdw'][l] manifold = Stiefel(W1.shape[0], W1.shape[1]) W1Rgrad = manifold.egrad2rgrad(W1, W1grad) net['layers'][l]['weight'] = manifold.retr( W1, -thisLR * W1Rgrad) else: net['layers'][l]['weight'] = net['layers'][l][ 'weight'] - thisLR * (1. / batchSize) * res['dzdw'][l] else: pass return net, res
def optimize_on_manifold(self, options, optmeth): if optmeth not in ['bo13', 'wen12', 'ManOpt']: print("Chosen optimization method", optmeth, "has not been implemented, using 'ManOpt' ") optmeth = 'ManOpt' if optmeth == 'ManOpt': # This is hardcoding it to the two-dimensional case.. manifold_one = Stiefel( np.shape(self.rotations[0])[0], np.shape(self.rotations[0])[1]) manifold_two = Stiefel( np.shape(self.rotations[0])[0], np.shape(self.rotations[0])[1]) manifold = Product((manifold_one, manifold_two)) optimization_variable = tf.Variable(tf.placeholder(tf.float32)) problem = Problem(manifold=manifold, cost=self.my_cost(), arg=optimization_variable) solver = ConjugateGradient(problem, optimization_variable, options) return solver
def estimate_orth_subspaces(self, DataStruct): ''' main optimization function ''' # Grassman point? if LA.norm(np.dot(self.Q.T, self.Q) - np.eye(self.Q.shape[-1]), ord='fro') > 1e-4: self._project_stiefel() # ----------------------------------------------------------------------- # # eGrad = grad(cost) # eHess = hessian(cost) # Perform optimization # ----------------------------------------------------------------------- # # ----------------------------------------------------------------------- # d, r = np.shape(self.Q) # problem size print(d) manif = Stiefel(d, r) # initialize manifold # instantiate problem problem = Problem(manifold=manif, cost=self._cost, verbosity=2) # initialize solver solver = TrustRegions(mingradnorm=1e-8, minstepsize=1e-16, logverbosity=1) # solve Xopt, optlog = solver.solve(problem) opt_subspaces = self._objfn(Xopt) # Align the axes within a subspace by variance high to low for j in range(self.numSubspaces): Aj = DataStruct.A[j] Qj = opt_subspaces[2].Q[j] # data projected onto subspace Aj_proj = np.dot((Aj - np.mean(Aj, 0)), Qj) if np.size(np.cov(Aj_proj.T)) < 2: V = 1 else: V = LA.svd(np.cov(Aj_proj.T))[0] Qj = np.dot(Qj, V) opt_subspaces[2].Q[j] = Qj # ranked top to low variance return opt_subspaces[2]
def ManoptOptimization(A, m): n = A.shape[0] T = A.shape[2] manifold = Stiefel(n, m, k=1) mycost = lambda x: cost(A, x) myegrad = lambda x: egrad(A, x) problem = Problem(manifold=manifold, cost=mycost, egrad=myegrad) solver = TrustRegions() print('# Start optimization using solver: trustregion') Xopt = solver.solve(problem) return Xopt
def test_vararg_cost_on_product(self): shape = (3, 3) manifold = Product([Stiefel(*shape)] * 2) @pymanopt.function.tensorflow(manifold) def cost(*args): X, Y = args return tf.reduce_sum(X) + tf.reduce_sum(Y) problem = pymanopt.Problem(manifold, cost) optimizer = TrustRegions(max_iterations=1) Xopt, Yopt = optimizer.run(problem).point self.assertEqual(Xopt.shape, (3, 3)) self.assertEqual(Yopt.shape, (3, 3))
def optimizer_R_v1(W, D, S, R0): """ :param W: heatmap constant :param D: weight of the keypoints :param S: shape of the object :param R0: initial R :return : the optimal R with fixed T and s the cost is ||(W-(RS))*sqrt(D)|| but in fact is the scale factor is already taken into account in S and T is taken into account in W """ # this store object is needed because the manifold optimizer do not works if it is not implemented like that store = Store() # -------------------------------------COST FUNCTION------------------------------------- def cost(R): """ :param R: rotation matrix variable :return : ||(W-(RS))*D^1/2||^2 = tr((W-(RS))*D*(W-(RS))') """ if store.stored is None: store.stored = W - np.dot(R, S) X = store.stored f = np.trace(np.dot(X, np.dot(D, np.transpose(X)))) / 2 return f # ---------------------------------------------------------------------------------------- # we use the optimization on a Stiefel manifold because R is constrained to be othogonal manifold = Stiefel(3, 2, 1) # setup the problem structure with manifold M and cost problem = Problem(manifold=manifold, cost=cost, verbosity=0) # setup the trust region algorithm to solve the problem TR = TrustRegions(maxiter=15) # solve the problem R_opt = TR.solve(problem, R0) return np.transpose(R_opt)
def minneg_in_SOk(self, Y0): """ Minimizes the negative part of $Y_0 Q$ over $Q \in SO(k)$. """ def cost(Q): Y = Y0.dot(Q) return self.neg(Y) def cost_grad(Q): Y = Y0.dot(Q) return Y0.transpose().dot(Y*(Y<0)) k = Y0.shape[1] SOk = Stiefel(k,k) pblm = mo.Problem(manifold = SOk, cost = cost, egrad = cost_grad, verbosity=0) Q,log = self.solver.solve(pblm) return Y0.dot(Q)
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): dimension = 3 num_samples = 200 num_components = 2 samples = np.random.normal(size=(num_samples, dimension)) @ np.diag( [3, 2, 1]) samples -= samples.mean(axis=0) manifold = Stiefel(dimension, num_components) cost, euclidean_gradient, euclidean_hessian = create_cost_and_derivates( manifold, samples, backend) problem = pymanopt.Problem( manifold, cost, euclidean_gradient=euclidean_gradient, euclidean_hessian=euclidean_hessian, ) optimizer = TrustRegions(verbosity=2 * int(not quiet)) estimated_span_matrix = optimizer.run(problem).point if quiet: return estimated_projector = estimated_span_matrix @ estimated_span_matrix.T eigenvalues, eigenvectors = np.linalg.eig(samples.T @ samples) indices = np.argsort(eigenvalues)[::-1][:num_components] span_matrix = eigenvectors[:, indices] projector = span_matrix @ span_matrix.T print( "Frobenius norm error between estimated and closed-form projection " "matrix:", np.linalg.norm(projector - estimated_projector), )
class TestMultiStiefelManifold(unittest.TestCase): def setUp(self): self.m = m = 10 self.n = n = 3 self.k = k = 3 self.man = Stiefel(m, n, k=k) def test_dim(self): assert self.man.dim == 0.5 * self.k * self.n * (2 * self.m - self.n - 1) def test_typicaldist(self): np_testing.assert_almost_equal(self.man.typicaldist, np.sqrt(self.n * self.k)) # def test_dist(self): def test_inner(self): X = self.man.rand() A = self.man.randvec(X) B = self.man.randvec(X) np_testing.assert_allclose(np.sum(A * B), self.man.inner(X, A, B)) def test_proj(self): # Construct a random point X on the manifold. X = self.man.rand() # Construct a vector H in the ambient space. H = rnd.randn(self.k, self.m, self.n) # Compare the projections. Hproj = H - multiprod(X, multiprod(multitransp(X), H) + multiprod(multitransp(H), X)) / 2 np_testing.assert_allclose(Hproj, self.man.proj(X, H)) def test_rand(self): # Just make sure that things generated are on the manifold and that # if you generate two they are not equal. X = self.man.rand() np_testing.assert_allclose(multiprod(multitransp(X), X), multieye(self.k, self.n), atol=1e-10) Y = self.man.rand() assert np.linalg.norm(X - Y) > 1e-6 def test_randvec(self): # Make sure things generated are in tangent space and if you generate # two then they are not equal. X = self.man.rand() U = self.man.randvec(X) np_testing.assert_allclose(multisym(multiprod(multitransp(X), U)), np.zeros((self.k, self.n, self.n)), atol=1e-10) V = self.man.randvec(X) assert la.norm(U - V) > 1e-6 def test_retr(self): # Test that the result is on the manifold and that for small # tangent vectors it has little effect. x = self.man.rand() u = self.man.randvec(x) xretru = self.man.retr(x, u) np_testing.assert_allclose(multiprod(multitransp(xretru), xretru), multieye(self.k, self.n), atol=1e-10) u = u * 1e-6 xretru = self.man.retr(x, u) np_testing.assert_allclose(xretru, x + u) # def test_egrad2rgrad(self): def test_norm(self): x = self.man.rand() u = self.man.randvec(x) np_testing.assert_almost_equal(self.man.norm(x, u), la.norm(u)) # def test_transp(self): def test_exp(self): # Check that exp lies on the manifold and that exp of a small vector u # is close to x + u. s = self.man x = s.rand() u = s.randvec(x) xexpu = s.exp(x, u) np_testing.assert_allclose(multiprod(multitransp(xexpu), xexpu), multieye(self.k, self.n), atol=1e-10) u = u * 1e-6 xexpu = s.exp(x, u) np_testing.assert_allclose(xexpu, x + u)
class TestSingleStiefelManifold(unittest.TestCase): def setUp(self): self.m = m = 20 self.n = n = 2 self.k = k = 1 self.man = Stiefel(m, n, k=k) self.proj = lambda x, u: u - npa.dot(x, npa.dot(x.T, u) + npa.dot(u.T, x)) / 2 def test_dim(self): assert self.man.dim == 0.5 * self.n * (2 * self.m - self.n - 1) # def test_typicaldist(self): # def test_dist(self): def test_inner(self): X = la.qr(rnd.randn(self.m, self.n))[0] A, B = rnd.randn(2, self.m, self.n) np_testing.assert_allclose(np.sum(A * B), self.man.inner(X, A, B)) def test_proj(self): # Construct a random point X on the manifold. X = rnd.randn(self.m, self.n) X = la.qr(X)[0] # Construct a vector H in the ambient space. H = rnd.randn(self.m, self.n) # Compare the projections. Hproj = H - X.dot(X.T.dot(H) + H.T.dot(X)) / 2 np_testing.assert_allclose(Hproj, self.man.proj(X, H)) def test_rand(self): # Just make sure that things generated are on the manifold and that # if you generate two they are not equal. X = self.man.rand() np_testing.assert_allclose(X.T.dot(X), np.eye(self.n), atol=1e-10) Y = self.man.rand() assert np.linalg.norm(X - Y) > 1e-6 def test_randvec(self): # Make sure things generated are in tangent space and if you generate # two then they are not equal. X = self.man.rand() U = self.man.randvec(X) np_testing.assert_allclose(multisym(X.T.dot(U)), np.zeros((self.n, self.n)), atol=1e-10) V = self.man.randvec(X) assert la.norm(U - V) > 1e-6 def test_retr(self): # Test that the result is on the manifold and that for small # tangent vectors it has little effect. x = self.man.rand() u = self.man.randvec(x) xretru = self.man.retr(x, u) np_testing.assert_allclose(xretru.T.dot(xretru), np.eye(self.n, self.n), atol=1e-10) u = u * 1e-6 xretru = self.man.retr(x, u) np_testing.assert_allclose(xretru, x + u) def test_ehess2rhess(self): # Test this function at some randomly generated point. x = self.man.rand() u = self.man.randvec(x) egrad = rnd.randn(self.m, self.n) ehess = rnd.randn(self.m, self.n) np_testing.assert_allclose(testing.ehess2rhess(self.proj)(x, egrad, ehess, u), self.man.ehess2rhess(x, egrad, ehess, u)) # def test_egrad2rgrad(self): def test_norm(self): x = self.man.rand() u = self.man.randvec(x) np_testing.assert_almost_equal(self.man.norm(x, u), la.norm(u)) # def test_transp(self): def test_exp(self): # Check that exp lies on the manifold and that exp of a small vector u # is close to x + u. s = self.man x = s.rand() u = s.randvec(x) xexpu = s.exp(x, u) np_testing.assert_allclose(xexpu.T.dot(xexpu), np.eye(self.n, self.n), atol=1e-10) u = u * 1e-6 xexpu = s.exp(x, u) np_testing.assert_allclose(xexpu, x + u)
def setUp(self): self.m = m = 10 self.n = n = 3 self.k = k = 3 self.man = Stiefel(m, n, k=k)
def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): """ Parameters ---------- data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] Each element in the list contains the fMRI data for alignment of one subject. There are n_align samples for each subject. data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. s : array, shape=[features, samples] The shared response. theta : array, shape=[classes, features] The MLR class plane parameters. bias : array, shape=[classes] The MLR class biases. Returns ------- w : list of 2D array, element i has shape=[voxels_i, features] The updated orthogonal transforms (mappings). """ subjects = len(data_align) s_th = S.shared(s.astype(theano.config.floatX)) theta_th = S.shared(theta.T.astype(theano.config.floatX)) bias_th = S.shared(bias.T.astype(theano.config.floatX), broadcastable=(True, False)) for subject in range(subjects): logger.info('Subject Wi %d' % subject) # Solve for subject i # Create the theano function w_th = T.matrix(name='W', dtype=theano.config.floatX) data_srm_subject = \ S.shared(data_align[subject].astype(theano.config.floatX)) constf1 = \ S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1], allow_downcast=True) f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2) if data_sup[subject] is not None: lr_samples_S = S.shared(data_sup[subject].shape[1]) data_sup_subject = \ S.shared(data_sup[subject].astype(theano.config.floatX)) labels_S = S.shared(labels[subject]) constf2 = S.shared(-self.alpha / self.gamma / data_sup[subject].shape[1], allow_downcast=True) log_p_y_given_x = T.log( T.nnet.softmax( (theta_th.dot(w_th.T.dot(data_sup_subject))).T + bias_th)) f2 = constf2 * T.sum(log_p_y_given_x[T.arange(lr_samples_S), labels_S]) f = f1 + f2 else: f = f1 # Define the problem and solve f_subject = self._objective_function_subject( data_align[subject], data_sup[subject], labels[subject], w[subject], s, theta, bias) minstep = np.amin(((10**-np.floor(np.log10(f_subject))), 1e-1)) manifold = Stiefel(w[subject].shape[0], w[subject].shape[1]) problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0) solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep) w[subject] = np.array( solver.solve(problem, x=w[subject].astype(theano.config.floatX))) if data_sup[subject] is not None: del f2 del log_p_y_given_x del data_sup_subject del labels_S del solver del problem del manifold del f del f1 del data_srm_subject del w_th del theta_th del bias_th del s_th # Run garbage collector to avoid filling up the memory gc.collect() return w
if __name__ == "__main__": experiment_name = 'brockett' n_exp = 10 if not os.path.isdir('result'): os.makedirs('result') path = os.path.join('result', experiment_name + '.csv') m = 20 n = 5 A = make_spd_matrix(m) N = np.diag([i for i in range(n)]) cost = create_cost(A, N) manifold = Stiefel(m, n) problem = pymanopt.Problem(manifold, cost, egrad=None) for i in range(n_exp): res_list = [] for beta_type in BetaTypes: solver = ConjugateGradient(beta_type=beta_type, maxiter=10000) res = solver.solve(problem) res_list.append(res[1]) res_list.append(res[2]) with open(path, 'a') as f: writer = csv.writer(f) writer.writerow(res_list)
class FixedRankEmbedded(Manifold): """ Note: Currently not compatible with the second order TrustRegions solver. Should be fixed soon. Manifold of m-by-n real matrices of fixed rank k. This follows the embedded geometry described in Bart Vandereycken's 2013 paper: "Low-rank matrix completion by Riemannian optimization". Paper link: http://arxiv.org/pdf/1209.3834.pdf For efficiency purposes, Pymanopt does not represent points on this manifold explicitly using m x n matrices, but instead implicitly using a truncated singular value decomposition. Specifically, a point is represented by a tuple (u, s, vt) of three numpy arrays. The arrays u, s and vt have shapes (m, k), (k,) and (k, n) respectively, and the low rank matrix which they represent can be recovered by the matrix product u * diag(s) * vt. For example, to optimize over the space of 5 by 4 matrices with rank 3, we would need to >>> import pymanopt.manifolds >>> manifold = pymanopt.manifolds.FixedRankEmbedded(5, 4, 3) Then the shapes will be as follows: >>> x = manifold.rand() >>> x[0].shape (5, 3) >>> x[1].shape (3,) >>> x[2].shape (3, 4) and the full matrix can be recovered using the matrix product x[0] * diag(x[1]) * x[2]: >>> import numpy as np >>> X = x[0].dot(np.diag(x[1])).dot(x[2]) Tangent vectors are represented as a tuple (Up, M Vp). The matrices Up (mxk) and Vp (nxk) obey Up'*U = 0 and Vp'*V = 0. The matrix M (kxk) is arbitrary. Such a structure corresponds to the following tangent vector in the ambient space of mxn matrices: Z = U*M*V' + Up*V' + U*Vp' where (U, S, V) is the current point and (Up, M, Vp) is the tangent vector at that point. Vectors in the ambient space are best represented as mxn matrices. If these are low-rank, they may also be represented as structures with U, S, V fields, such that Z = U*S*V'. There are no restrictions on what U, S and V are, as long as their product as indicated yields a real, mxn matrix. The chosen geometry yields a Riemannian submanifold of the embedding space R^(mxn) equipped with the usual trace (Frobenius) inner product. Please cite the Pymanopt paper as well as the research paper: @Article{vandereycken2013lowrank, Title = {Low-rank matrix completion by {Riemannian} optimization}, Author = {Vandereycken, B.}, Journal = {SIAM Journal on Optimization}, Year = {2013}, Number = {2}, Pages = {1214--1236}, Volume = {23}, Doi = {10.1137/110845768} } This file is based on fixedrankembeddedfactory from Manopt: www.manopt.org. Ported by: Jamie Townsend, Sebastian Weichwald Original author: Nicolas Boumal, Dec. 30, 2012. """ def __init__(self, m, n, k): self._m = m self._n = n self._k = k self._name = ("Manifold of {m}-by-{n} matrices with rank {k} and " "embedded geometry".format(m=m, n=n, k=k)) self._stiefel_m = Stiefel(m, k) self._stiefel_n = Stiefel(n, k) def __str__(self): return self._name @property def dim(self): return (self._m + self._n - self._k) * self._k @property def typicaldist(self): return self.dim def dist(self, X, Y): raise NotImplementedError def inner(self, X, G, H): return np.sum(np.tensordot(a, b) for (a, b) in zip(G, H)) def _apply_ambient(self, Z, W): """ For a given ambient vector Z, given as a tuple (U, S, V) such that Z = U*S*V', applies it to a matrix W to calculate the matrix product ZW. """ if isinstance(Z, tuple): return np.dot(Z[0], np.dot(Z[1], np.dot(Z[2].T, W))) else: return np.dot(Z, W) def _apply_ambient_transpose(self, Z, W): """ Same as apply_ambient, but applies Z' to W. """ if isinstance(Z, tuple): return np.dot(Z[2], np.dot(Z[1], np.dot(Z[0].T, W))) else: return np.dot(Z.T, W) def proj(self, X, Z): """ Note that Z must either be an m x n matrix from the ambient space, or else a tuple (Uz, Sz, Vz), where Uz * Sz * Vz is in the ambient space (of low-rank matrices). This function then returns a tangent vector parameterized as (Up, M, Vp), as described in the class docstring. """ ZV = self._apply_ambient(Z, X[2].T) UtZV = np.dot(X[0].T, ZV) ZtU = self._apply_ambient_transpose(Z, X[0]) Up = ZV - np.dot(X[0], UtZV) M = UtZV Vp = ZtU - np.dot(X[2].T, UtZV.T) return _TangentVector((Up, M, Vp)) def egrad2rgrad(self, x, egrad): """ Assuming that the cost function being optimized has been defined in terms of the low-rank singular value decomposition of X, the gradient returned by the autodiff backends will have three components and will be in the form of a tuple egrad = (df/dU, df/dS, df/dV). This function correctly maps a gradient of this form into the tangent space. See https://j-towns.github.io/papers/svd-derivative.pdf for a derivation. """ utdu = np.dot(x[0].T, egrad[0]) uutdu = np.dot(x[0], utdu) Up = (egrad[0] - uutdu) / x[1] vtdv = np.dot(x[2], egrad[2].T) vvtdv = np.dot(x[2].T, vtdv) Vp = (egrad[2].T - vvtdv) / x[1] i = np.eye(self._k) f = 1 / (x[1][np.newaxis, :]**2 - x[1][:, np.newaxis]**2 + i) M = (f * (utdu - utdu.T) * x[1] + x[1][:, np.newaxis] * f * (vtdv - vtdv.T) + np.diag(egrad[1])) return _TangentVector((Up, M, Vp)) def ehess2rhess(self, X, egrad, ehess, H): raise NotImplementedError # This retraction is second order, following general results from # Absil, Malick, "Projection-like retractions on matrix manifolds", # SIAM J. Optim., 22 (2012), pp. 135-158. def retr(self, X, Z): Qu, Ru = np.linalg.qr(Z[0]) Qv, Rv = np.linalg.qr(Z[2]) T = np.vstack((np.hstack((np.diag(X[1]) + Z[1], Rv.T)), np.hstack((Ru, np.zeros((self._k, self._k)))))) # Numpy svd outputs St as a 1d vector, not a matrix. Ut, St, Vt = np.linalg.svd(T, full_matrices=False) # Transpose because numpy outputs it the wrong way. Vt = Vt.T U = np.dot(np.hstack((X[0], Qu)), Ut[:, :self._k]) V = np.dot(np.hstack((X[2].T, Qv)), Vt[:, :self._k]) S = St[:self._k] + np.spacing(1) return (U, S, V.T) def norm(self, X, G): return np.sqrt(self.inner(X, G, G)) def rand(self): u = self._stiefel_m.rand() s = np.sort(np.random.rand(self._k))[::-1] vt = self._stiefel_n.rand().T return (u, s, vt) def _tangent(self, X, Z): """ Given Z in tangent vector format, projects the components Up and Vp such that they satisfy the tangent space constraints up to numerical errors. If Z was indeed a tangent vector at X, this should barely affect Z (it would not at all if we had infinite numerical accuracy). """ Up = Z[0] - np.dot(X[0], np.dot(X[0].T, Z[0])) Vp = Z[2] - np.dot(X[2].T, np.dot(X[2], Z[2])) return _TangentVector((Up, Z[1], Vp)) def randvec(self, X): Up = np.random.randn(self._m, self._k) Vp = np.random.randn(self._n, self._k) M = np.random.randn(self._k, self._k) Z = self._tangent(X, (Up, M, Vp)) nrm = self.norm(X, Z) return _TangentVector((Z[0]/nrm, Z[1]/nrm, Z[2]/nrm)) def tangent2ambient(self, X, Z): """ Transforms a tangent vector Z represented as a structure (Up, M, Vp) into a structure with fields (U, S, V) that represents that same tangent vector in the ambient space of mxn matrices, as U*S*V'. This matrix is equal to X.U*Z.M*X.V' + Z.Up*X.V' + X.U*Z.Vp'. The latter is an mxn matrix, which could be too large to build explicitly, and this is why we return a low-rank representation instead. Note that there are no guarantees on U, S and V other than that USV' is the desired matrix. In particular, U and V are not (in general) orthonormal and S is not (in general) diagonal. (In this implementation, S is identity, but this might change.) """ U = np.hstack((np.dot(X[0], Z[1]) + Z[0], X[0])) S = np.eye(2 * self._k) V = np.hstack(([X[2].T, Z[2]])) return (U, S, V) # Comment from Manopt: # New vector transport on June 24, 2014 (as indicated by Bart) # Reference: Absil, Mahony, Sepulchre 2008 section 8.1.3: # For Riemannian submanifolds of a Euclidean space, it is acceptable to # transport simply by orthogonal projection of the tangent vector # translated in the ambient space. def transp(self, X1, X2, G): return self.proj(X2, self.tangent2ambient(X1, G)) def exp(self, X, U): raise NotImplementedError def log(self, X, Y): raise NotImplementedError def pairmean(self, X, Y): raise NotImplementedError def zerovec(self, X): return _TangentVector((np.zeros((self._m, self._k)), np.zeros((self._k, self._k)), np.zeros((self._n, self._k))))