def fit(self): f = self.f X = self.X tol = self.tol d = self.d n = self.n current_best_residual = np.inf for r in range(self.restarts): print('restart %d' % r) M0 = np.linalg.qr(np.random.randn(self.d, self.n))[0] my_params = [Parameter(order=self.order, distribution='uniform', lower=-5, upper=5) for _ in range(n)] my_basis = Basis('total-order') my_poly_init = Poly(parameters=my_params, basis=my_basis, method='least-squares', sampling_args={'mesh': 'user-defined', 'sample-points': X @ M0, 'sample-outputs': f}) my_poly_init.set_model() c0 = my_poly_init.coefficients.copy() residual = self.cost(f, X, M0, c0) cauchy_length = self.cauchy_length residual_history = [] iter_ind = 0 M = M0.copy() c = c0.copy() while residual > tol: if self.verbosity == 2: print(residual) residual_history.append(residual) # Minimize over M func_M = lambda M_var: self.cost(f, X, M_var, c) grad_M = lambda M_var: self.dcostdM(f, X, M_var, c) manifold = Stiefel(d, n) solver = ConjugateGradient(maxiter=self.max_M_iters) problem = Problem(manifold=manifold, cost=func_M, egrad=grad_M, verbosity=0) M = solver.solve(problem, x=M) # Minimize over c func_c = lambda c_var: self.cost(f, X, M, c_var) grad_c = lambda c_var: self.dcostdc(f, X, M, c_var) res = minimize(func_c, x0=c, method='CG', jac=grad_c) c = res.x residual = self.cost(f, X, M, c) if iter_ind < cauchy_length: iter_ind += 1 elif np.abs(np.mean(residual_history[-cauchy_length:]) - residual)/residual < self.cauchy_tol: break if self.verbosity > 0: print('final residual on training data: %f' % self.cost(f, X, M, c)) if residual < current_best_residual: self.M = M self.c = c current_best_residual = residual
def CGmanopt(X, objective_function, A, **kwargs): ''' Minimizes the objective function subject to the constraint that X.T * X = I_k using the conjugate gradient method Args: X: Initial 2D array of shape (n, k) such that X.T * X = I_k objective_function: Objective function F(X, A) to minimize. A: Additional parameters for the objective function F(X, A) Keyword Args: None Returns: Xopt: Value of X that minimizes the objective subject to the constraint. ''' manifold = Stiefel(X.shape[0], X.shape[1]) def cost(X): c, _ = objective_function(X, A) return c problem = Problem(manifold=manifold, cost=cost, verbosity=0) solver = ConjugateGradient(logverbosity=0) Xopt = solver.solve(problem) return Xopt, None
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): dimension = 3 # Dimension of the embedding space, i.e. R^k num_points = 24 # Points on the sphere # This value should be as close to 0 as affordable. If it is too close to # zero, optimization first becomes much slower, than simply doesn't work # anymore because of floating point overflow errors (NaN's and Inf's start # to appear). If it is too large, then log-sum-exp is a poor approximation # of the max function, and the spread will be less uniform. An okay value # seems to be 0.01 or 0.001 for example. Note that a better strategy than # using a small epsilon straightaway is to reduce epsilon bit by bit and to # warm-start subsequent optimization in that way. Trustregions will be more # appropriate for these fine tunings. epsilon = 0.0015 cost = create_cost(backend, dimension, num_points, epsilon) manifold = Elliptope(num_points, dimension) problem = pymanopt.Problem(manifold, cost) if quiet: problem.verbosity = 0 solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5) Yopt = solver.solve(problem) if quiet: return Xopt = Yopt @ Yopt.T maxdot = np.triu(Xopt, 1).max() print("Maximum angle between any two points:", maxdot)
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): m, n, rank = 5, 4, 2 matrix = rnd.randn(m, n) cost, egrad = create_cost_egrad(backend, matrix, rank) manifold = FixedRankEmbedded(m, n, rank) problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad) if quiet: problem.verbosity = 0 solver = ConjugateGradient() left_singular_vectors, singular_values, right_singular_vectors = \ solver.solve(problem) low_rank_approximation = (left_singular_vectors @ np.diag(singular_values) @ right_singular_vectors) if not quiet: u, s, vt = la.svd(matrix, full_matrices=False) indices = np.argsort(s)[-rank:] low_rank_solution = ( u[:, indices] @ np.diag(s[indices]) @ vt[indices, :]) print("Analytic low-rank solution:") print() print(low_rank_solution) print() print("Rank-{} approximation:".format(rank)) print() print(low_rank_approximation) print() print("Frobenius norm error:", la.norm(low_rank_approximation - low_rank_solution)) print()
def fixedrank(A, YT, r): """ Solves the AX=YT problem on the manifold of r-rank matrices with """ # Instantiate a manifold manifold = FixedRankEmbedded(N, r, r) # Define the cost function (here using autograd.numpy) def cost(X): U = X[0] cst = 0 for n in range(N): cst = cst + huber(U[n, :]) Mat = np.matmul(np.matmul(X[0], np.diag(X[1])), X[2]) fidelity = LA.norm(np.subtract(np.matmul(A, Mat), YT)) return cst + lambd * fidelity**2 problem = Problem(manifold=manifold, cost=cost) solver = ConjugateGradient(maxiter=maxiter) # Let Pymanopt do the rest Xopt = solver.solve(problem) #Solve Sol = np.dot(np.dot(Xopt[0], np.diag(Xopt[1])), Xopt[2]) return Sol
def solve_manopt(X, d, cost, egrad, Wo=None): D = X.shape[1] manifold = Grassmann(height=D, width=d) problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0) solver = ConjugateGradient(mingradnorm=1e-3) W = solver.solve(problem, x=Wo) return W
def closest_unit_norm_column_approximation(A): """ Returns the matrix with unit-norm columns that is closests to A w.r.t. the Frobenius norm. """ m, n = A.shape manifold = Oblique(m, n) solver = ConjugateGradient() X = T.matrix() cost = 0.5 * T.sum((X - A)**2) problem = Problem(manifold=manifold, cost=cost, arg=X) return solver.solve(problem)
def closest_unit_norm_column_approximation(A): """ Returns the matrix with unit-norm columns that is closests to A w.r.t. the Frobenius norm. """ m, n = A.shape manifold = Oblique(m, n) solver = ConjugateGradient() X = T.matrix() cost = 0.5 * T.sum((X - A) ** 2) problem = Problem(manifold=manifold, cost=cost, arg=X) return solver.solve(problem)
def cp_mds_reg(X, D, lam=1.0, v=1, maxiter=1000): """Version of MDS in which "signs" are also an optimization parameter. Rather than performing a full optimization and then resetting the sign matrix, here we treat the signs as a parameter `A = [a_ij]` and minimize the cost function F(X,A) = ||W*(X^H(A*X) - cos(D))||^2 + lambda*||A - X^HX/|X^HX| ||^2 Lambda is a regularization parameter we can experiment with. The collection of data, `X`, is treated as a point on the `Oblique` manifold, consisting of `k*n` matrices with unit-norm columns. Since we are working on a sphere in complex space we require `k` to be even. The first `k/2` entries of each column are the real components and the last `k/2` entries are the imaginary parts. Parameters ---------- X : ndarray (k, n) Initial guess for data. D : ndarray (k, k) Goal distance matrix. lam : float, optional Weight to give regularization term. v : int, optional Verbosity Returns ------- X_opt : ndarray (k, n) Collection of points optimizing cost. """ dim = X.shape[0] num_points = X.shape[1] W = distance_to_weights(D) Sreal, Simag = norm_rotations(X) A = np.vstack( (np.reshape(Sreal, (1, num_points**2)), np.reshape(Simag, num_points**2))) cp_manifold = Oblique(dim, num_points) a_manifold = Oblique(2, num_points**2) manifold = Product((cp_manifold, a_manifold)) solver = ConjugateGradient(maxiter=maxiter, maxtime=float('inf')) cost = setup_reg_autograd_cost(D, int(dim / 2), num_points, lam=lam) problem = pymanopt.Problem(cost=cost, manifold=manifold) Xopt, Aopt = solver.solve(problem, x=(X, A)) Areal = np.reshape(Aopt[0, :], (num_points, num_points)) Aimag = np.reshape(Aopt[1, :], (num_points, num_points)) return Xopt, Areal, Aimag
def solve_manopt(X, d, cost, egrad): D = X.shape[1] manifold = Grassmann(height=D, width=d) problem = Problem(manifold=manifold, cost=cost, egrad=egrad, verbosity=0) solver = ConjugateGradient(mingradnorm=1e-3) M = mean_riemann(X) w, v = np.linalg.eig(M) idx = w.argsort()[::-1] v_ = v[:, idx] Wo = v_[:, :d] W = solver.solve(problem, x=Wo) return W
def packing_on_the_sphere(n, k, epsilon): manifold = Elliptope(n, k) solver = ConjugateGradient(mingradnorm=1e-8, maxiter=1e5) def cost(X): Y = np.dot(X, X.T) # Shift the exponentials by the maximum value to reduce numerical # trouble due to possible overflows. s = np.triu(Y, 1).max() expY = np.exp((Y - s) / epsilon) # Zero out the diagonal expY -= np.diag(np.diag(expY)) u = np.triu(expY, 1).sum() return s + epsilon * np.log(u) problem = Problem(manifold, cost) return solver.solve(problem)
def _optimize(self, max_opt_time, max_opt_iter, verbosity): """Optimize the GeoIMC optimization problem Args: The args of `solve` """ residual_global = np.zeros(self.Y.data.shape) solver = ConjugateGradient(maxtime=max_opt_time, maxiter=max_opt_iter, linesearch=LineSearchBackTracking()) prb = Problem(manifold=self.manifold, cost=lambda x: self._cost(x, residual_global), egrad=lambda z: self._egrad(z, residual_global), verbosity=verbosity) solution = solver.solve(prb, x=self.W) self.W = [solution[0], solution[1], solution[2]] return self._cost(self.W, residual_global)
def dominant_eigenvector(A): """ Returns the dominant eigenvector of the symmetric matrix A. Note: For the same A, this should yield the same as the dominant invariant subspace example with p = 1. """ m, n = A.shape assert m == n, "matrix must be square" assert np.allclose(np.sum(A - A.T), 0), "matrix must be symmetric" manifold = Sphere(n) solver = ConjugateGradient(maxiter=500, minstepsize=1e-6) x = T.matrix() cost = -x.T.dot(T.dot(A, x)).trace() problem = Problem(man=manifold, ad_cost=cost, ad_arg=x) xopt = solver.solve(problem) return xopt.squeeze()
def _bootstrap_problem(A, k, minstepsize=1e-9, man_type='fixed'): m, n = A.shape if man_type == 'fixed': manifold = FixedRankEmbeeded(m, n, k) if man_type == 'fixed2': manifold = FixedRankEmbeeded2Factors(m, n, k) elif man_type == 'simple': manifold = Simple(m, n, k) #solver = TrustRegions(maxiter=500, minstepsize=1e-6) solver = ConjugateGradient(maxiter=500, minstepsize=minstepsize) return manifold, solver
def main(): r"""Main entry point in the graph embedding procedure.""" args = config_parser().parse_args() g_pdists = load_pdists(args) n = g_pdists.shape[0] d = args.manifold_dim # we are actually using only the upper diagonal part g_pdists = g_pdists[np.triu_indices(n, 1)] g_sq_pdists = g_pdists**2 # read the graph # the distortion cost def distortion_cost(X): man_sq_pdists = manifold_pdists(X, squared=True) return np.sum(np.abs(man_sq_pdists / g_sq_pdists - 1)) # the manifold, problem, and solver manifold = PositiveDefinite(d, k=n) problem = Problem(manifold=manifold, cost=distortion_cost, verbosity=2) linesearch = ReduceLROnPlateau(start_lr=2e-2, patience=10, threshold=1e-4, factor=0.1, verbose=1) solver = ConjugateGradient(linesearch=linesearch, maxiter=1000) # solve it with Timer('training') as t: X_opt = solver.solve(problem, x=sample_init_points(n, d)) # the distortion achieved man_pdists = manifold_pdists(X_opt) print('Average distortion: ', average_distortion(g_pdists, man_pdists)) man_pdists_sym = pdists_vec_to_sym(man_pdists, n, 1e12) print('MAP: ', mean_average_precision(g, man_pdists_sym, diag_adjusted=True))
def main_mds(D, dim=3, X=None, space='real'): """MDS via gradient descent with the chordal metric. Parameters ---------- D : ndarray (n, n) Goal distance matrix. dim : int, optional Goal dimension (of ambient Euclidean space). Default is `dim = 3`. X : ndarray (dim, n), optional Initial value for gradient descent. `n` points in dimension `dim`. If both a dimension and an initial condition are specified, the initial condition overrides the dimension. field : str Choice of real or complex version. Options 'real', 'complex'. If 'complex' dim must be even. """ n = D.shape[0] max_d = np.max(D) if max_d > 1: print('WARNING: maximum value in distance matrix exceeds diameter of '\ 'projective space. Max distance = $2.4f.' %max_d) manifold = Oblique(dim, n) solver = ConjugateGradient() if space == 'real': cost = setup_cost(D) elif space == 'complex': cost = setup_CPn_cost(D, int(dim/2)) problem = pymanopt.Problem(manifold=manifold, cost=cost) if X is None: X_out = solver.solve(problem) else: if X.shape[0] != dim: print('WARNING: initial condition does not match specified goal '\ 'dimension. Finding optimum in dimension %d' %X.shape[0]) X_out = solver.solve(problem, x=X) return X_out
def run(backend=SUPPORTED_BACKENDS[0], quiet=True): m = 5 n = 8 matrix = rnd.randn(m, n) cost, egrad = create_cost_egrad(backend, matrix) manifold = Oblique(m, n) problem = pymanopt.Problem(manifold, cost=cost, egrad=egrad) if quiet: problem.verbosity = 0 solver = ConjugateGradient() Xopt = solver.solve(problem) if quiet: return # Calculate the actual solution by normalizing the columns of A. X = matrix / la.norm(matrix, axis=0)[np.newaxis, :] # Print information about the solution. print("Solution found: %s" % np.allclose(X, Xopt, rtol=1e-3)) print("Frobenius-error: %f" % la.norm(X - Xopt))
def test_doublystochastic(N, M, K): rnd.seed(21) ns = [N] * K ms = [M] * K batch = len(ns) p = [] q = [] A = [] for i in range(batch): n, m = ns[i], ms[i] p0 = np.random.rand(n) q0 = np.random.rand(m) p.append(p0 / np.sum(p0)) q.append(q0 / np.sum(q0)) A0 = rnd.rand(n, m) A0 = A0[np.newaxis, :] A0 = SKnopp(A0, p[i], q[i], n+m) A.append(A0) A = np.vstack((C for C in A)) def _cost(x): return 0.5 * (np.linalg.norm(np.array(x) - np.array(A))**2) def _egrad(x): return x - A def _ehess(x, u): return u manf = DoublyStochastic(n, m, p, q) solver = ConjugateGradient(maxiter=3, maxtime=100000) prblm = Problem(manifold=manf, cost=lambda x: _cost(x), egrad=lambda x: _egrad(x), ehess=lambda x, u: _ehess(x, u), verbosity=3) U = manf.rand() Uopt = solver.solve(prblm, x=U)
def optimize_on_manifold(self, options, optmeth): if optmeth not in ['bo13', 'wen12', 'ManOpt']: print("Chosen optimization method", optmeth, "has not been implemented, using 'ManOpt' ") optmeth = 'ManOpt' if optmeth == 'ManOpt': # This is hardcoding it to the two-dimensional case.. manifold_one = Stiefel( np.shape(self.rotations[0])[0], np.shape(self.rotations[0])[1]) manifold_two = Stiefel( np.shape(self.rotations[0])[0], np.shape(self.rotations[0])[1]) manifold = Product((manifold_one, manifold_two)) optimization_variable = tf.Variable(tf.placeholder(tf.float32)) problem = Problem(manifold=manifold, cost=self.my_cost(), arg=optimization_variable) solver = ConjugateGradient(problem, optimization_variable, options) return solver
def main(): # Parse command line arguments parser = argparse.ArgumentParser(description='Map the source embeddings into the target embedding space') parser.add_argument('src_input', help='the input source embeddings') parser.add_argument('trg_input', help='the input target embeddings') parser.add_argument('--model_path', default=None, type=str, help='directory to save the model') parser.add_argument('--geomm_embeddings_path', default=None, type=str, help='directory to save the output GeoMM latent space embeddings. The output embeddings are normalized.') parser.add_argument('--encoding', default='utf-8', help='the character encoding for input/output (defaults to utf-8)') parser.add_argument('--max_vocab', default=0,type=int, help='Maximum vocabulary to be loaded, 0 allows complete vocabulary') parser.add_argument('--verbose', default=0,type=int, help='Verbose') mapping_group = parser.add_argument_group('mapping arguments', 'Basic embedding mapping arguments') mapping_group.add_argument('-dtrain', '--dictionary_train', default=sys.stdin.fileno(), help='the training dictionary file (defaults to stdin)') mapping_group.add_argument('-dtest', '--dictionary_test', default=sys.stdin.fileno(), help='the test dictionary file (defaults to stdin)') mapping_group.add_argument('--normalize', choices=['unit', 'center', 'unitdim', 'centeremb'], nargs='*', default=[], help='the normalization actions to perform in order') geomm_group = parser.add_argument_group('GeoMM arguments', 'Arguments for GeoMM method') geomm_group.add_argument('--l2_reg', type=float,default=1e2, help='Lambda for L2 Regularization') geomm_group.add_argument('--max_opt_time', type=int,default=5000, help='Maximum time limit for optimization in seconds') geomm_group.add_argument('--max_opt_iter', type=int,default=150, help='Maximum number of iterations for optimization') eval_group = parser.add_argument_group('evaluation arguments', 'Arguments for evaluation') eval_group.add_argument('--normalize_eval', action='store_true', help='Normalize the embeddings at test time') eval_group.add_argument('--eval_batch_size', type=int,default=1000, help='Batch size for evaluation') eval_group.add_argument('--csls_neighbourhood', type=int,default=10, help='Neighbourhood size for CSLS') args = parser.parse_args() BATCH_SIZE = args.eval_batch_size ## Logging #method_name = os.path.join('logs','geomm') #directory = os.path.join(os.path.join(os.getcwd(),method_name), datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) #if not os.path.exists(directory): # os.makedirs(directory) #log_file_name, file_extension = os.path.splitext(os.path.basename(args.dictionary_train)) #log_file_name = log_file_name + '.log' #class Logger(object): # def __init__(self): # self.terminal = sys.stdout # self.log = open(os.path.join(directory,log_file_name), "a") # def write(self, message): # self.terminal.write(message) # self.log.write(message) # def flush(self): # #this flush method is needed for python 3 compatibility. # #this handles the flush command by doing nothing. # #you might want to specify some extra behavior here. # pass #sys.stdout = Logger() if args.verbose: print('Current arguments: {0}'.format(args)) dtype = 'float32' if args.verbose: print('Loading train data...') # Read input embeddings srcfile = open(args.src_input, encoding=args.encoding, errors='surrogateescape') trgfile = open(args.trg_input, encoding=args.encoding, errors='surrogateescape') src_words, x = embeddings.read(srcfile,max_voc=args.max_vocab, dtype=dtype) trg_words, z = embeddings.read(trgfile,max_voc=args.max_vocab, dtype=dtype) # Build word to index map src_word2ind = {word: i for i, word in enumerate(src_words)} trg_word2ind = {word: i for i, word in enumerate(trg_words)} # Build training dictionary noov=0 src_indices = [] trg_indices = [] f = open(args.dictionary_train, encoding=args.encoding, errors='surrogateescape') for line in f: src,trg = line.split() if args.max_vocab: src=src.lower() trg=trg.lower() try: src_ind = src_word2ind[src] trg_ind = trg_word2ind[trg] src_indices.append(src_ind) trg_indices.append(trg_ind) except KeyError: noov+=1 if args.verbose: print('WARNING: OOV dictionary entry ({0} - {1})'.format(src, trg)) #, file=sys.stderr f.close() if args.verbose: print('Number of training pairs having at least one OOV: {}'.format(noov)) src_indices = src_indices trg_indices = trg_indices if args.verbose: print('Normalizing embeddings...') # STEP 0: Normalization for action in args.normalize: if action == 'unit': x = embeddings.length_normalize(x) z = embeddings.length_normalize(z) elif action == 'center': x = embeddings.mean_center(x) z = embeddings.mean_center(z) elif action == 'unitdim': x = embeddings.length_normalize_dimensionwise(x) z = embeddings.length_normalize_dimensionwise(z) elif action == 'centeremb': x = embeddings.mean_center_embeddingwise(x) z = embeddings.mean_center_embeddingwise(z) # Step 1: Optimization if args.verbose: print('Beginning Optimization') start_time = time.time() x_count = len(set(src_indices)) z_count = len(set(trg_indices)) A = np.zeros((x_count,z_count)) # Creating dictionary matrix from training set map_dict_src={} map_dict_trg={} I=0 uniq_src=[] uniq_trg=[] for i in range(len(src_indices)): if src_indices[i] not in map_dict_src.keys(): map_dict_src[src_indices[i]]=I I+=1 uniq_src.append(src_indices[i]) J=0 for j in range(len(trg_indices)): if trg_indices[j] not in map_dict_trg.keys(): map_dict_trg[trg_indices[j]]=J J+=1 uniq_trg.append(trg_indices[j]) for i in range(len(src_indices)): A[map_dict_src[src_indices[i]],map_dict_trg[trg_indices[i]]]=1 np.random.seed(0) Lambda=args.l2_reg U1 = TT.matrix() U2 = TT.matrix() B = TT.matrix() Kx, Kz = x[uniq_src], z[uniq_trg] XtAZ = Kx.T.dot(A.dot(Kz)) XtX = Kx.T.dot(Kx) ZtZ = Kz.T.dot(Kz) # AA = np.sum(A*A) # this can be added if cost needs to be compared to original geomm W = (U1.dot(B)).dot(U2.T) regularizer = 0.5*Lambda*(TT.sum(B**2)) sXtX = shared(XtX) sZtZ = shared(ZtZ) sXtAZ = shared(XtAZ) cost = regularizer wtxtxw = W.T.dot(sXtX.dot(W)) wtxtxwztz = wtxtxw.dot(sZtZ) cost += TT.nlinalg.trace(wtxtxwztz) cost += -2 * TT.sum(W * sXtAZ) # cost += shared(AA) # this can be added if cost needs to be compared with original geomm solver = ConjugateGradient(maxtime=args.max_opt_time,maxiter=args.max_opt_iter) manifold =Product([Stiefel(x.shape[1], x.shape[1]),Stiefel(z.shape[1], x.shape[1]),PositiveDefinite(x.shape[1])]) #manifold =Product([Stiefel(x.shape[1], 200),Stiefel(z.shape[1], 200),PositiveDefinite(200)]) problem = Problem(manifold=manifold, cost=cost, arg=[U1,U2,B], verbosity=3) wopt = solver.solve(problem) w= wopt U1 = w[0] U2 = w[1] B = w[2] ### Save the models if requested if args.model_path is not None: os.makedirs(args.model_path,exist_ok=True) np.savetxt('{}/U_src.csv'.format(args.model_path),U1) np.savetxt('{}/U_tgt.csv'.format(args.model_path),U2) np.savetxt('{}/B.csv'.format(args.model_path),B) # Step 2: Transformation xw = x.dot(U1).dot(scipy.linalg.sqrtm(B)) zw = z.dot(U2).dot(scipy.linalg.sqrtm(B)) end_time = time.time() if args.verbose: print('Completed training in {0:.2f} seconds'.format(end_time-start_time)) gc.collect() ### Save the GeoMM embeddings if requested xw_n = embeddings.length_normalize(xw) zw_n = embeddings.length_normalize(zw) if args.geomm_embeddings_path is not None: os.makedirs(args.geomm_embeddings_path,exist_ok=True) out_emb_fname=os.path.join(args.geomm_embeddings_path,'src.vec') with open(out_emb_fname,'w',encoding=args.encoding) as outfile: embeddings.write(src_words,xw_n,outfile) out_emb_fname=os.path.join(args.geomm_embeddings_path,'trg.vec') with open(out_emb_fname,'w',encoding=args.encoding) as outfile: embeddings.write(trg_words,zw_n,outfile) # Step 3: Evaluation if args.normalize_eval: xw = xw_n zw = zw_n X = xw[src_indices] Z = zw[trg_indices] # Loading test dictionary f = open(args.dictionary_test, encoding=args.encoding, errors='surrogateescape') src2trg = collections.defaultdict(set) trg2src = collections.defaultdict(set) oov = set() vocab = set() for line in f: src, trg = line.split() if args.max_vocab: src=src.lower() trg=trg.lower() try: src_ind = src_word2ind[src] trg_ind = trg_word2ind[trg] src2trg[src_ind].add(trg_ind) trg2src[trg_ind].add(src_ind) vocab.add(src) except KeyError: oov.add(src) src = list(src2trg.keys()) trgt = list(trg2src.keys()) oov -= vocab # If one of the translation options is in the vocabulary, then the entry is not an oov coverage = len(src2trg) / (len(src2trg) + len(oov)) f.close() translation = collections.defaultdict(int) translation5 = collections.defaultdict(list) translation10 = collections.defaultdict(list) ### compute nearest neigbours of x in z t=time.time() nbrhood_x=np.zeros(xw.shape[0]) for i in range(0, len(src), BATCH_SIZE): j = min(i + BATCH_SIZE, len(src)) similarities = xw[src[i:j]].dot(zw.T) similarities_x = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1) nbrhood_x[src[i:j]]=np.mean(similarities_x[:,:args.csls_neighbourhood],axis=1) ### compute nearest neigbours of z in x (GPU version) nbrhood_z=np.zeros(zw.shape[0]) with cp.cuda.Device(0): nbrhood_z2=cp.zeros(zw.shape[0]) batch_num=1 for i in range(0, zw.shape[0], BATCH_SIZE): j = min(i + BATCH_SIZE, zw.shape[0]) similarities = -1*cp.partition(-1*cp.dot(cp.asarray(zw[i:j]),cp.transpose(cp.asarray(xw))),args.csls_neighbourhood-1 ,axis=1)[:,:args.csls_neighbourhood] nbrhood_z2[i:j]=(cp.mean(similarities[:,:args.csls_neighbourhood],axis=1)) batch_num+=1 nbrhood_z=cp.asnumpy(nbrhood_z2) #### compute nearest neigbours of z in x (CPU version) #nbrhood_z=np.zeros(zw.shape[0]) #for i in range(0, len(zw.shape[0]), BATCH_SIZE): # j = min(i + BATCH_SIZE, len(zw.shape[0])) # similarities = zw[i:j].dot(xw.T) # similarities_z = -1*np.partition(-1*similarities,args.csls_neighbourhood-1 ,axis=1) # nbrhood_z[i:j]=np.mean(similarities_z[:,:args.csls_neighbourhood],axis=1) #### find translation #for i in range(0, len(src), BATCH_SIZE): # j = min(i + BATCH_SIZE, len(src)) # similarities = xw[src[i:j]].dot(zw.T) # similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z # nn = similarities.argmax(axis=1).tolist() # similarities = np.argsort((similarities),axis=1) # nn5 = (similarities[:,-5:]) # nn10 = (similarities[:,-10:]) # for k in range(j-i): # translation[src[i+k]] = nn[k] # translation5[src[i+k]] = nn5[k] # translation10[src[i+k]] = nn10[k] #if args.geomm_embeddings_path is not None: # delim=',' # os.makedirs(args.geomm_embeddings_path,exist_ok=True) # translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv') # with open(translations_fname,'w',encoding=args.encoding) as translations_file: # for src_id in src: # src_word = src_words[src_id] # all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ] # trgout_words = [ trg_words[j] for j in translation10[src_id] ] # ss = list(nn10[src_id,:]) # # p1 = ':'.join(all_trg_words) # p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] ) # translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, delim=delim, p1=p1, p2=p2) ) ### find translation (and write to file if output requested) delim=',' translations_file =None if args.geomm_embeddings_path is not None: os.makedirs(args.geomm_embeddings_path,exist_ok=True) translations_fname=os.path.join(args.geomm_embeddings_path,'translations.csv') translations_file = open(translations_fname,'w',encoding=args.encoding) for i in range(0, len(src), BATCH_SIZE): j = min(i + BATCH_SIZE, len(src)) similarities = xw[src[i:j]].dot(zw.T) similarities = np.transpose(np.transpose(2*similarities) - nbrhood_x[src[i:j]]) - nbrhood_z nn = similarities.argmax(axis=1).tolist() similarities = np.argsort((similarities),axis=1) nn5 = (similarities[:,-5:]) nn10 = (similarities[:,-10:]) for k in range(j-i): translation[src[i+k]] = nn[k] translation5[src[i+k]] = nn5[k] translation10[src[i+k]] = nn10[k] if args.geomm_embeddings_path is not None: src_id=src[i+k] src_word = src_words[src_id] all_trg_words = [ trg_words[trg_id] for trg_id in src2trg[src_id] ] trgout_words = [ trg_words[j] for j in translation10[src_id] ] #ss = list(nn10[src_id,:]) p1 = ':'.join(all_trg_words) p2 = ':'.join(trgout_words) #p2 = delim.join( [ '{}{}{}'.format(w,delim,s) for w,s in zip(trgout_words,ss) ] ) translations_file.write( '{s}{delim}{p1}{delim}{p2}\n'.format(s=src_word, p1=p1, p2=p2, delim=delim) ) if args.geomm_embeddings_path is not None: translations_file.close() accuracy = np.mean([1 if translation[i] in src2trg[i] else 0 for i in src]) mean=0 for i in src: for k in translation5[i]: if k in src2trg[i]: mean+=1 break mean/=len(src) accuracy5 = mean mean=0 for i in src: for k in translation10[i]: if k in src2trg[i]: mean+=1 break mean/=len(src) accuracy10 = mean message = src_input.split(".")[-2] + "-->" + trg_input.split(".")[-2] + ":" 'Coverage:{0:7.2%} Accuracy:{1:7.2%}'.format(coverage, accuracy)
rotation, end='\n\n') #Take a step further and solve the problem using a pymanopt to solve the optimization as a constrained problem from pymanopt import Problem from pymanopt.solvers import ConjugateGradient from pymanopt.manifolds import Stiefel # define objective function def pca_objective(U): return -np.trace(np.dot(U.T, np.dot(cov, U))) # set up Pymanopt problem and solve. solver = ConjugateGradient(maxiter=1000) manifold = Stiefel(D, components) problem = Problem(manifold=manifold, cost=pca_objective, verbosity=0) Uopt = solver.solve(problem) print('Solution found using Pymanopt = \n', Uopt, end='\n\n') # Find the matrix T such that UA = Uopt T = np.linalg.lstsq(U, Uopt, rcond=None)[0] # Assuming this is a rotation matrix, compute the angle and confirm this is indeed a rotation angle = np.arccos(T[0, 0]) rotation = np.array([[np.cos(angle), np.sin(angle)], [-np.sin(angle), np.cos(angle)]]) print('T matrix found as: \n', T) print('rotation matrix associated with an angle of ', np.round(angle, 4),
def _bootstrap_problem(self): self.manifold = FixedRankEmbeeded(self.num_users, self.num_items, self.num_factors + 1) self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize)
class UsvRiemannianLogisticMF(): def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0, iterations=30, minstepsize=1e-9): self.counts = counts self.num_users = self.counts.shape[0] self.num_items = self.counts.shape[1] self.num_factors = num_factors self.iterations = iterations self.minstepsize = minstepsize self.reg_param = reg_param self.gamma = gamma self._bootstrap_problem() def _bootstrap_problem(self): self.manifold = FixedRankEmbeeded(self.num_users, self.num_items, self.num_factors + 1) self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize) def train_model(self, x0=None): self.U = T.matrix('U') self.S = T.matrix('S') self.V = T.matrix('V') problem = Problem(man=self.manifold, theano_cost=self.log_likelihood(), theano_arg=[self.U, self.S, self.V]) if x0 is None: user_vectors = np.random.normal(size=(self.num_users, self.num_factors + 1)) item_vectors = np.random.normal(size=(self.num_items, self.num_factors + 1)) s = rnd.random(self.num_factors + 1) s[:-1] = np.sort(s[:-1])[::-1] x0 = (user_vectors, np.diag(s), item_vectors.T) else: x0 = x0 (left, middle, right), self.loss_history = self.solver.solve(problem, x=x0) right = right.T s_mid = np.diag(np.sqrt(np.diag(middle)[:-1])) self.middle = s_mid print('U norm: {}'.format(la.norm(left[:, :-1]))) print('V norm: {}'.format(la.norm(right[:, :-1]))) self.user_vectors = left[:, :-1].dot(s_mid) self.item_vectors = right[:, :-1].dot(s_mid) self.user_biases = left[:, -1] * np.sqrt(middle[-1, -1]) self.item_biases = right[:, -1] * np.sqrt(middle[-1, -1]) print('U norm: {}'.format(la.norm(self.user_vectors))) print('V norm: {}'.format(la.norm(self.item_vectors))) print('LL: {}'.format(self._log_likelihood())) def _log_likelihood(self): loglik = 0 A = np.dot(self.user_vectors, self.item_vectors.T) A += self.user_biases A += self.item_biases.T B = A * self.counts loglik += np.sum(B) A = np.exp(A) A += 1 A = np.log(A) A = (self.counts + 1) * A loglik -= np.sum(A) # L2 regularization loglik -= 0.5 * self.reg_param * np.sum(np.square(np.diag(self.middle))) return loglik def log_likelihood(self): Users = self.U[:, :-1] Middle = self.S Items = self.V[:-1, :] UserBiases = self.U[:, -1].reshape((-1, 1)) ItemBiases = self.V[-1, :].reshape((-1, 1)) A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :]) A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1])) A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1])) B = A * self.counts loglik = T.sum(B) A = T.exp(A) A += 1 A = T.log(A) A = (self.counts + 1) * A loglik -= T.sum(A) # L2 regularization loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1])) # Return negation of LogLikelihood cause we will minimize cost return -loglik def print_vectors(self): user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w') for i in range(self.num_users): vec = ' '.join(map(str, self.user_vectors[i])) line = '%i\t%s\n' % (i, vec) user_vecs_file.write(line) user_vecs_file.close() item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w') for i in range(self.num_items): vec = ' '.join(map(str, self.item_vectors[i])) line = '%i\t%s\n' % (i, vec) item_vecs_file.write(line) item_vecs_file.close()
class WildLogisticMF(): def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0, iterations=30, minstepsize=1e-10): self.counts = counts self.num_users = self.counts.shape[0] self.num_items = self.counts.shape[1] self.num_factors = num_factors self.iterations = iterations self.minstepsize = minstepsize self.reg_param = reg_param self.gamma = gamma self._bootstrap_problem() def _bootstrap_problem(self): self.manifold = FixedRankEmbeeded2Factors(self.num_users, self.num_items, self.num_factors + 1) self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize) def train_model(self, x0=None): self.L = T.matrix('L') self.R = T.matrix('R') problem = Problem(man=self.manifold, theano_cost=self.log_likelihood(), theano_arg=[self.L, self.R]) if x0 is None: user_vectors = np.random.normal(size=(self.num_users, self.num_factors)) item_vectors = np.random.normal(size=(self.num_items, self.num_factors)) user_biases = np.random.normal(size=(self.num_users, 1)) / SCONST item_biases = np.random.normal(size=(self.num_items, 1)) / SCONST x0 = (np.hstack((user_vectors, user_biases)), np.hstack((item_vectors, item_biases))) else: x0 = x0 (left, right), self.loss_history = self.solver.solve(problem, x=x0) self.user_vectors = left[:, :-1] self.item_vectors = right[:, :-1] self.user_biases = left[:, -1].reshape((self.num_users, 1)) self.item_biases = right[:, -1].reshape((self.num_items, 1)) print('U norm: {}'.format(la.norm(self.user_vectors))) print('V norm: {}'.format(la.norm(self.item_vectors))) def log_likelihood(self): Users = self.L[:, :-1] Items = self.R[:, :-1] UserBiases = self.L[:, -1].reshape((-1, 1)) ItemBiases = self.R[:, -1].reshape((-1, 1)) A = T.dot(self.L[:, :-1], (self.R[:, :-1]).T) A = T.inc_subtensor(A[:, :], UserBiases) A = T.inc_subtensor(A[:, :], ItemBiases.T) B = A * self.counts loglik = T.sum(B) A = T.exp(A) A += 1 A = T.log(A) A = (self.counts + 1) * A loglik -= T.sum(A) # L2 regularization loglik -= 0.5 * self.reg_param * T.sum(T.square(self.L[:, :-1])) loglik -= 0.5 * self.reg_param * T.sum(T.square(self.R[:, :-1])) # Return negation of LogLikelihood cause we will minimize cost return -loglik def print_vectors(self): user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w') for i in range(self.num_users): vec = ' '.join(map(str, self.user_vectors[i])) line = '%i\t%s\n' % (i, vec) user_vecs_file.write(line) user_vecs_file.close() item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w') for i in range(self.num_items): vec = ' '.join(map(str, self.item_vectors[i])) line = '%i\t%s\n' % (i, vec) item_vecs_file.write(line) item_vecs_file.close()
class LogisticMF(): def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0, iterations=30, minstepsize=1e-9): self.counts = counts N = 20000 self.counts = counts[:N, :N] self.num_users = self.counts.shape[0] self.num_items = self.counts.shape[1] self.num_factors = num_factors + 2 self.iterations = iterations self.minstepsize = minstepsize self.reg_param = reg_param self.gamma = gamma self._bootstrap_problem() def _bootstrap_problem(self): self.manifold = FixedRankEmbeeded2Factors(self.num_users, self.num_items, self.num_factors) self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize) def train_model(self): self.L = T.matrix('L') self.R = T.matrix('R') problem = Problem(man=self.manifold, theano_cost=self.log_likelihood(), theano_arg=[self.L, self.R]) left, right = self.solver.solve(problem) self.user_vectors = left[:, :-2] self.item_vectors = right[:, :-2] self.user_biases = left[:, -1] self.item_biases = right[:, -2] print('U norm: {}'.format(la.norm(self.user_vectors))) print('V norm: {}'.format(la.norm(self.item_vectors))) print("how much user outer? {}".format( np.average(np.isclose(left[:, -2], 1)))) print("how much item outer? {}".format( np.average(np.isclose(right[:, -1], 1)))) print('user delta: {} in norm, {} in max abs'.format( la.norm(left[:, -2] - 1), np.max(np.abs(left[:, -2] - 1)))) print('item delta: {} in norm, {} in max abs'.format( la.norm(right[:, -1] - 1), np.max(np.abs(right[:, -1] - 1)))) def evaluate_lowrank(self, U, V, item, fast=False): if hasattr(item, '__len__') and len(item) == 2 and len(item[0]) == len( item[1]): if fast: rows = U[item[0], :] cols = V[item[1], :] data = (rows * cols).sum(1) return data else: idx_argsort = item[0].argsort() item = (item[0][idx_argsort], item[1][idx_argsort]) vals, idxs, counts = [theano.shared(it) for it in\ np.unique(item[0], return_index=True, return_counts=True)] output = T.zeros(int(np.max(counts.get_value()))) it1 = theano.shared(item[1]) def process_partial_dot(row_idx, out, U, V, item): partial_dot = T.dot( U[vals[row_idx], :], V[item[idxs[row_idx]:idxs[row_idx] + counts[row_idx]], :].T) return T.set_subtensor(out[:counts[row_idx]], partial_dot) parts, updates = theano.scan(fn=process_partial_dot, outputs_info=output, sequences=T.arange(vals.size), non_sequences=[U, V, it1]) mask = np.ones( (vals.get_value().size, int(np.max(counts.get_value())))) for i, count in enumerate(counts.get_value()): mask[i, count:] = 0 return parts[theano.shared(mask).nonzero()].ravel() else: raise ValueError('__getitem__ now supports only indices set') def log_likelihood(self): Users = self.L[:, :-2] Items = self.R[:, :-2] UserBiases = self.L[:, -1] ItemBiases = self.R[:, -2] UserOuter = self.L[:, -2] ItemOuter = self.R[:, -1] ## A = T.dot(Users, Items.T) ## A += UserBiases ## A += ItemBiases.T ## B = A * self.counts ## loglik = T.sum(B) # A implicitly stored as self.L @ self.R.T # loglik = T.sum(A * self.counts) => sum over nonzeros only print('nnz size: {}'.format(self.counts.nonzero()[0].size)) loglik = T.dot( self.evaluate_lowrank(self.L, self.R, self.counts.nonzero(), fast=False), np.array(self.counts[self.counts.nonzero()]).ravel()) ## A = T.exp(A) ## A += 1 ## A = T.log(A) # There we use Taylor series ln(exp(x) + 1) = ln(2) + x/2 + x^2/8 + O(x^4) at x=0 # ln(2) const_term = (T.ones( (self.num_users, 1)) * np.log(2), T.ones((self.num_items, 1))) # x/2 first_order_term = (0.5 * self.L, 0.5 * self.R) # x^2/8 second_order_term = hadamard((self.L, self.R), (self.L, self.R), self.num_factors) second_order_term = tuple(factor / 8.0 for factor in second_order_term) grouped_factors = list( zip(const_term, first_order_term, second_order_term)) A = (T.concatenate(grouped_factors[0], axis=1), T.concatenate(grouped_factors[1], axis=1)) ## A = (self.counts + 1) * A ## loglik -= T.sum(A) loglik -= sum_lowrank(A) loglik -= T.dot( self.evaluate_lowrank(A[0], A[1], self.counts.nonzero(), fast=False), np.array(self.counts[self.counts.nonzero()]).ravel()) # L2 regularization loglik -= 0.5 * self.reg_param * T.sum(T.square(Users)) loglik -= 0.5 * self.reg_param * T.sum(T.square(Items)) # we need strictly maintain UserOuter and ItemOuter be ones, just to ensure they properly # outer products with biases loglik -= self.num_users * T.sum(T.square(UserOuter - 1)) loglik -= self.num_items * T.sum(T.square(ItemOuter - 1)) # Return negation of LogLikelihood cause we will minimize cost return -loglik def print_vectors(self): user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w') for i in range(self.num_users): vec = ' '.join(map(str, self.user_vectors[i])) line = '%i\t%s\n' % (i, vec) user_vecs_file.write(line) user_vecs_file.close() item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w') for i in range(self.num_items): vec = ' '.join(map(str, self.item_vectors[i])) line = '%i\t%s\n' % (i, vec) item_vecs_file.write(line) item_vecs_file.close()
# (a) Instantiation of a manifold # points on the manifold are parameterized via their singular value # decomposition (u, s, vt) where # u is a 5 x 2 matrix with orthonormal columns, # s is a vector of length 2, # vt is a 2 x 4 matrix with orthonormal rows, # so that u*diag(s)*vt is a 5 x 4 matrix of rank 2. manifold = FixedRankEmbedded(A.shape[0], A.shape[1], k) # (b) Definition of a cost function (here using autograd.numpy) # Note that the cost must be defined in terms of u, s and vt, where # X = u * diag(s) * vt. def cost(usv): delta = .5 u = usv[0] s = usv[1] vt = usv[2] X = np.dot(np.dot(u, np.diag(s)), vt) return np.sum(np.sqrt((X - A)**2 + delta**2) - delta) # define the Pymanopt problem problem = Problem(manifold=manifold, cost=cost) # (c) Instantiation of a Pymanopt solver solver = ConjugateGradient() # let Pymanopt do the rest X = solver.solve(problem)
def RELMM(data, A_init, S0, lambda_S, lambda_S0): [L, N] = data.shape [L, P] = S0.shape V = P * np.eye(P) - np.outer(np.ones(P), np.transpose(np.ones(P))) def cost(X): data_fit = np.zeros(N) for n in np.arange(N): data_fit[n] = np.linalg.norm( S[:, :, n] - np.dot(X, np.diag(psi[:, n])), 'fro')**2 cost = lambda_S / 2 * np.sum(data_fit, axis=0) + lambda_S0 / 2 * np.trace( np.dot(np.dot(X, V), np.transpose(X))) return cost def egrad(X): partial_grad = np.zeros([L, P, N]) for n in np.arange(N): partial_grad[:, :, n] = np.dot(X, np.diag(psi[:, n])) - np.dot( S[:, :, n], np.diag(psi[:, n])) egrad = lambda_S * np.sum(partial_grad, axis=2) + lambda_S0 * np.dot( X, V) return egrad A = A_init S = np.zeros([L, P, N]) psi = np.ones([P, N]) for n in np.arange(N): S[:, :, n] = S0 maxiter = 200 U = A # split variable D = np.zeros(A.shape) # Lagrange mutlipliers rho = 1 maxiter_ADMM = 100 tol_A_ADMM = 10**-3 tol_A = 10**-3 tol_S = 10**-3 tol_psi = 10**-3 tol_S0 = 10**-3 I = np.identity(P) for i in np.arange(maxiter): A_old = np.copy(A) psi_old = np.copy(psi) S_old = np.copy(S) S0_old = np.copy(S0) # A update for j in np.arange(maxiter_ADMM): A_old_ADMM = np.copy(A) for n in np.arange(N): A[:, n] = np.dot( np.linalg.inv( np.dot(np.transpose(S[:, :, n]), S[:, :, n]) + rho * I), np.dot(np.transpose(S[:, :, n]), data[:, n]) + rho * (U[:, n] - D[:, n])) U = proj_simplex(A + D) D = D + A - U if j > 0: rel_A_ADMM = np.abs((np.linalg.norm(A, 'fro') - np.linalg.norm( A_old_ADMM, 'fro'))) / np.linalg.norm(A_old_ADMM, 'fro') print("iteration ", j, " of ", maxiter_ADMM, ", rel_A_ADMM =", rel_A_ADMM) if rel_A_ADMM < tol_A_ADMM: break # psi update for n in np.arange(N): for p in np.arange(P): psi[p, n] = np.dot(np.transpose(S0[:, p]), S[:, p, n]) / np.dot( np.transpose(S0[:, p]), S0[:, p]) # S update for n in np.arange(N): S[:, :, n] = np.dot( np.outer(data[:, n], np.transpose(A[:, n])) + lambda_S * np.dot(S0, np.diag(psi[:, n])), np.linalg.inv( np.outer(A[:, n], np.transpose(A[:, n])) + lambda_S * I)) # S0 update manifold = Oblique(L, P) solver = ConjugateGradient() problem = Problem(manifold=manifold, cost=cost, egrad=egrad) S0 = solver.solve(problem) # termination checks if i > 0: S_vec = np.hstack(S) rel_A = np.abs( np.linalg.norm(A, 'fro') - np.linalg.norm(A_old, 'fro')) / np.linalg.norm(A_old, 'fro') rel_psi = np.abs( np.linalg.norm(psi, 'fro') - np.linalg.norm(psi_old, 'fro')) / np.linalg.norm( psi_old, 'fro') rel_S = np.abs( np.linalg.norm(S_vec) - np.linalg.norm(np.hstack(S_old))) / np.linalg.norm(S_old) rel_S0 = np.abs( np.linalg.norm(S0, 'fro') - np.linalg.norm(S0_old, 'fro')) / np.linalg.norm(S0_old, 'fro') print("iteration ", i, " of ", maxiter, ", rel_A =", rel_A, ", rel_psi =", rel_psi, "rel_S =", rel_S, "rel_S0 =", rel_S0) if rel_A < tol_A and rel_psi and tol_psi and rel_S < tol_S and rel_S0 < tol_S0 and i > 1: break return A, psi, S, S0
def _update_classifier(self, data, labels, w, classes): """Update the classifier parameters theta and bias Parameters ---------- data : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of 2D array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. classes : int The number of classes in the classifier. Returns ------- theta : array, shape=[features, classes] The MLR parameter for the class planes. bias : array shape=[classes,] The MLR parameter for class biases. """ # Stack the data and labels for training the classifier data_stacked, labels_stacked, weights = \ SSSRM._stack_list(data, labels, w) features = w[0].shape[1] total_samples = weights.size data_th = S.shared(data_stacked.astype(theano.config.floatX)) val_ = S.shared(labels_stacked) total_samples_S = S.shared(total_samples) theta_th = T.matrix(name='theta', dtype=theano.config.floatX) bias_th = T.col(name='bias', dtype=theano.config.floatX) constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True) weights_th = S.shared(weights) log_p_y_given_x = \ T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T)) f = -constf2 * T.sum((log_p_y_given_x[T.arange(total_samples_S), val_]) / weights_th) + 0.5 * T.sum(theta_th ** 2) manifold = Product((Euclidean(features, classes), Euclidean(classes, 1))) problem = Problem(manifold=manifold, cost=f, arg=[theta_th, bias_th], verbosity=0) solver = ConjugateGradient(mingradnorm=1e-6) solution = solver.solve(problem) theta = solution[0] bias = solution[1] del constf2 del theta_th del bias_th del data_th del val_ del solver del solution return theta, bias
def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): """ Parameters ---------- data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] Each element in the list contains the fMRI data for alignment of one subject. There are n_align samples for each subject. data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. s : array, shape=[features, samples] The shared response. theta : array, shape=[classes, features] The MLR class plane parameters. bias : array, shape=[classes] The MLR class biases. Returns ------- w : list of 2D array, element i has shape=[voxels_i, features] The updated orthogonal transforms (mappings). """ subjects = len(data_align) s_th = S.shared(s.astype(theano.config.floatX)) theta_th = S.shared(theta.T.astype(theano.config.floatX)) bias_th = S.shared(bias.T.astype(theano.config.floatX), broadcastable=(True, False)) for subject in range(subjects): logger.info('Subject Wi %d' % subject) # Solve for subject i # Create the theano function w_th = T.matrix(name='W', dtype=theano.config.floatX) data_srm_subject = \ S.shared(data_align[subject].astype(theano.config.floatX)) constf1 = \ S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1], allow_downcast=True) f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2) if data_sup[subject] is not None: lr_samples_S = S.shared(data_sup[subject].shape[1]) data_sup_subject = \ S.shared(data_sup[subject].astype(theano.config.floatX)) labels_S = S.shared(labels[subject]) constf2 = S.shared(-self.alpha / self.gamma / data_sup[subject].shape[1], allow_downcast=True) log_p_y_given_x = T.log(T.nnet.softmax((theta_th.dot( w_th.T.dot(data_sup_subject))).T + bias_th)) f2 = constf2 * T.sum( log_p_y_given_x[T.arange(lr_samples_S), labels_S]) f = f1 + f2 else: f = f1 # Define the problem and solve f_subject = self._objective_function_subject(data_align[subject], data_sup[subject], labels[subject], w[subject], s, theta, bias) minstep = np.min((10**-np.floor(np.log10(f_subject))), 1e-1) manifold = Stiefel(w[subject].shape[0], w[subject].shape[1]) problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0) solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep) w[subject] = np.array(solver.solve( problem, x=w[subject].astype(theano.config.floatX))) if data_sup[subject] is not None: del f2 del log_p_y_given_x del data_sup_subject del labels_S del solver del problem del manifold del f del f1 del data_srm_subject del w_th del theta_th del bias_th del s_th # Run garbage collector to avoid filling up the memory gc.collect() return w
def _update_w(self, data_align, data_sup, labels, w, s, theta, bias): """ Parameters ---------- data_align : list of 2D arrays, element i has shape=[voxels_i, n_align] Each element in the list contains the fMRI data for alignment of one subject. There are n_align samples for each subject. data_sup : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. s : array, shape=[features, samples] The shared response. theta : array, shape=[classes, features] The MLR class plane parameters. bias : array, shape=[classes] The MLR class biases. Returns ------- w : list of 2D array, element i has shape=[voxels_i, features] The updated orthogonal transforms (mappings). """ subjects = len(data_align) s_th = S.shared(s.astype(theano.config.floatX)) theta_th = S.shared(theta.T.astype(theano.config.floatX)) bias_th = S.shared(bias.T.astype(theano.config.floatX), broadcastable=(True, False)) for subject in range(subjects): logger.info('Subject Wi %d' % subject) # Solve for subject i # Create the theano function w_th = T.matrix(name='W', dtype=theano.config.floatX) data_srm_subject = \ S.shared(data_align[subject].astype(theano.config.floatX)) constf1 = \ S.shared((1 - self.alpha) * 0.5 / data_align[subject].shape[1], allow_downcast=True) f1 = constf1 * T.sum((data_srm_subject - w_th.dot(s_th))**2) if data_sup[subject] is not None: lr_samples_S = S.shared(data_sup[subject].shape[1]) data_sup_subject = \ S.shared(data_sup[subject].astype(theano.config.floatX)) labels_S = S.shared(labels[subject]) constf2 = S.shared(-self.alpha / self.gamma / data_sup[subject].shape[1], allow_downcast=True) log_p_y_given_x = T.log( T.nnet.softmax( (theta_th.dot(w_th.T.dot(data_sup_subject))).T + bias_th)) f2 = constf2 * T.sum(log_p_y_given_x[T.arange(lr_samples_S), labels_S]) f = f1 + f2 else: f = f1 # Define the problem and solve f_subject = self._objective_function_subject( data_align[subject], data_sup[subject], labels[subject], w[subject], s, theta, bias) minstep = np.amin(((10**-np.floor(np.log10(f_subject))), 1e-1)) manifold = Stiefel(w[subject].shape[0], w[subject].shape[1]) problem = Problem(manifold=manifold, cost=f, arg=w_th, verbosity=0) solver = ConjugateGradient(mingradnorm=1e-2, minstepsize=minstep) w[subject] = np.array( solver.solve(problem, x=w[subject].astype(theano.config.floatX))) if data_sup[subject] is not None: del f2 del log_p_y_given_x del data_sup_subject del labels_S del solver del problem del manifold del f del f1 del data_srm_subject del w_th del theta_th del bias_th del s_th # Run garbage collector to avoid filling up the memory gc.collect() return w
class UsvRiemannianLogisticMF(): def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0, iterations=30, minstepsize=1e-9): self.counts = counts self.num_users = self.counts.shape[0] self.num_items = self.counts.shape[1] self.num_factors = num_factors self.iterations = iterations self.minstepsize = minstepsize self.reg_param = reg_param self.gamma = gamma self._bootstrap_problem() def _bootstrap_problem(self): self.manifold = FixedRankEmbeeded(self.num_users, self.num_items, self.num_factors + 1) self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize) def train_model(self, x0=None): self.U = T.matrix('U') self.S = T.matrix('S') self.V = T.matrix('V') problem = Problem(man=self.manifold, theano_cost=self.log_likelihood(), theano_arg=[self.U, self.S, self.V]) if x0 is None: user_vectors = np.random.normal(size=(self.num_users, self.num_factors + 1)) item_vectors = np.random.normal(size=(self.num_items, self.num_factors + 1)) s = rnd.random(self.num_factors + 1) s[:-1] = np.sort(s[:-1])[::-1] x0 = (user_vectors, np.diag(s), item_vectors.T) else: x0 = x0 (left, middle, right), self.loss_history = self.solver.solve(problem, x=x0) right = right.T s_mid = np.diag(np.sqrt(np.diag(middle)[:-1])) self.middle = s_mid print('U norm: {}'.format(la.norm(left[:, :-1]))) print('V norm: {}'.format(la.norm(right[:, :-1]))) self.user_vectors = left[:, :-1].dot(s_mid) self.item_vectors = right[:, :-1].dot(s_mid) self.user_biases = left[:, -1] * np.sqrt(middle[-1, -1]) self.item_biases = right[:, -1] * np.sqrt(middle[-1, -1]) print('U norm: {}'.format(la.norm(self.user_vectors))) print('V norm: {}'.format(la.norm(self.item_vectors))) print('LL: {}'.format(self._log_likelihood())) def _log_likelihood(self): loglik = 0 A = np.dot(self.user_vectors, self.item_vectors.T) A += self.user_biases A += self.item_biases.T B = A * self.counts loglik += np.sum(B) A = np.exp(A) A += 1 A = np.log(A) A = (self.counts + 1) * A loglik -= np.sum(A) # L2 regularization loglik -= 0.5 * self.reg_param * np.sum(np.square(np.diag( self.middle))) return loglik def log_likelihood(self): Users = self.U[:, :-1] Middle = self.S Items = self.V[:-1, :] UserBiases = self.U[:, -1].reshape((-1, 1)) ItemBiases = self.V[-1, :].reshape((-1, 1)) A = T.dot(T.dot(self.U[:, :-1], self.S[:-1, :-1]), self.V[:-1, :]) A = T.inc_subtensor(A[:, :], UserBiases * T.sqrt(self.S[-1, -1])) A = T.inc_subtensor(A[:, :], ItemBiases.T * T.sqrt(self.S[-1, -1])) B = A * self.counts loglik = T.sum(B) A = T.exp(A) A += 1 A = T.log(A) A = (self.counts + 1) * A loglik -= T.sum(A) # L2 regularization loglik -= 0.5 * self.reg_param * T.sum(T.square(T.diag(self.S)[:-1])) # Return negation of LogLikelihood cause we will minimize cost return -loglik def print_vectors(self): user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w') for i in range(self.num_users): vec = ' '.join(map(str, self.user_vectors[i])) line = '%i\t%s\n' % (i, vec) user_vecs_file.write(line) user_vecs_file.close() item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w') for i in range(self.num_items): vec = ' '.join(map(str, self.item_vectors[i])) line = '%i\t%s\n' % (i, vec) item_vecs_file.write(line) item_vecs_file.close()
class LogisticMF(): def __init__(self, counts, num_factors, reg_param=0.6, gamma=1.0, iterations=30, minstepsize=1e-9): self.counts = counts N = 20000 self.counts = counts[:N, :N] self.num_users = self.counts.shape[0] self.num_items = self.counts.shape[1] self.num_factors = num_factors + 2 self.iterations = iterations self.minstepsize = minstepsize self.reg_param = reg_param self.gamma = gamma self._bootstrap_problem() def _bootstrap_problem(self): self.manifold = FixedRankEmbeeded2Factors(self.num_users, self.num_items, self.num_factors) self.solver = ConjugateGradient(maxiter=self.iterations, minstepsize=self.minstepsize) def train_model(self): self.L = T.matrix('L') self.R = T.matrix('R') problem = Problem(man=self.manifold, theano_cost=self.log_likelihood(), theano_arg=[self.L, self.R]) left, right = self.solver.solve(problem) self.user_vectors = left[:, :-2] self.item_vectors = right[:, :-2] self.user_biases = left[:, -1] self.item_biases = right[:, -2] print('U norm: {}'.format(la.norm(self.user_vectors))) print('V norm: {}'.format(la.norm(self.item_vectors))) print("how much user outer? {}".format(np.average(np.isclose(left[:, -2], 1)))) print("how much item outer? {}".format(np.average(np.isclose(right[:, -1], 1)))) print('user delta: {} in norm, {} in max abs'.format(la.norm(left[:, -2] - 1), np.max(np.abs(left[:, -2] - 1)))) print('item delta: {} in norm, {} in max abs'.format(la.norm(right[:, -1] - 1), np.max(np.abs(right[:, -1] - 1)))) def evaluate_lowrank(self, U, V, item, fast=False): if hasattr(item, '__len__') and len(item) == 2 and len(item[0]) == len(item[1]): if fast: rows = U[item[0], :] cols = V[item[1], :] data = (rows * cols).sum(1) return data else: idx_argsort = item[0].argsort() item = (item[0][idx_argsort], item[1][idx_argsort]) vals, idxs, counts = [theano.shared(it) for it in\ np.unique(item[0], return_index=True, return_counts=True)] output = T.zeros(int(np.max(counts.get_value()))) it1 = theano.shared(item[1]) def process_partial_dot(row_idx, out, U, V, item): partial_dot = T.dot(U[vals[row_idx], :], V[item[idxs[row_idx]: idxs[row_idx] + counts[row_idx]], :].T) return T.set_subtensor(out[:counts[row_idx]], partial_dot) parts, updates = theano.scan(fn=process_partial_dot, outputs_info=output, sequences=T.arange(vals.size), non_sequences=[U, V, it1]) mask = np.ones((vals.get_value().size, int(np.max(counts.get_value())))) for i, count in enumerate(counts.get_value()): mask[i, count:] = 0 return parts[theano.shared(mask).nonzero()].ravel() else: raise ValueError('__getitem__ now supports only indices set') def log_likelihood(self): Users = self.L[:, :-2] Items = self.R[:, :-2] UserBiases = self.L[:, -1] ItemBiases = self.R[:, -2] UserOuter = self.L[:, -2] ItemOuter = self.R[:, -1] ## A = T.dot(Users, Items.T) ## A += UserBiases ## A += ItemBiases.T ## B = A * self.counts ## loglik = T.sum(B) # A implicitly stored as self.L @ self.R.T # loglik = T.sum(A * self.counts) => sum over nonzeros only print('nnz size: {}'.format(self.counts.nonzero()[0].size)) loglik = T.dot(self.evaluate_lowrank(self.L, self.R, self.counts.nonzero(), fast=False), np.array(self.counts[self.counts.nonzero()]).ravel()) ## A = T.exp(A) ## A += 1 ## A = T.log(A) # There we use Taylor series ln(exp(x) + 1) = ln(2) + x/2 + x^2/8 + O(x^4) at x=0 # ln(2) const_term = (T.ones((self.num_users, 1)) * np.log(2), T.ones((self.num_items, 1))) # x/2 first_order_term = (0.5 * self.L, 0.5 * self.R) # x^2/8 second_order_term = hadamard((self.L, self.R), (self.L, self.R), self.num_factors) second_order_term = tuple(factor / 8.0 for factor in second_order_term) grouped_factors = list(zip(const_term, first_order_term, second_order_term)) A = (T.concatenate(grouped_factors[0], axis=1), T.concatenate(grouped_factors[1], axis=1)) ## A = (self.counts + 1) * A ## loglik -= T.sum(A) loglik -= sum_lowrank(A) loglik -= T.dot(self.evaluate_lowrank(A[0], A[1], self.counts.nonzero(), fast=False), np.array(self.counts[self.counts.nonzero()]).ravel()) # L2 regularization loglik -= 0.5 * self.reg_param * T.sum(T.square(Users)) loglik -= 0.5 * self.reg_param * T.sum(T.square(Items)) # we need strictly maintain UserOuter and ItemOuter be ones, just to ensure they properly # outer products with biases loglik -= self.num_users * T.sum(T.square(UserOuter - 1)) loglik -= self.num_items * T.sum(T.square(ItemOuter - 1)) # Return negation of LogLikelihood cause we will minimize cost return -loglik def print_vectors(self): user_vecs_file = open('logmf-user-vecs-%i' % self.num_factors, 'w') for i in range(self.num_users): vec = ' '.join(map(str, self.user_vectors[i])) line = '%i\t%s\n' % (i, vec) user_vecs_file.write(line) user_vecs_file.close() item_vecs_file = open('logmf-item-vecs-%i' % self.num_factors, 'w') for i in range(self.num_items): vec = ' '.join(map(str, self.item_vectors[i])) line = '%i\t%s\n' % (i, vec) item_vecs_file.write(line) item_vecs_file.close()
def _update_classifier(self, data, labels, w, classes): """Update the classifier parameters theta and bias Parameters ---------- data : list of 2D arrays, element i has shape=[voxels_i, samples_i] Each element in the list contains the fMRI data of one subject for the classification task. labels : list of arrays of int, element i has shape=[samples_i] Each element in the list contains the labels for the data samples in data_sup. w : list of 2D array, element i has shape=[voxels_i, features] The orthogonal transforms (mappings) :math:`W_i` for each subject. classes : int The number of classes in the classifier. Returns ------- theta : array, shape=[features, classes] The MLR parameter for the class planes. bias : array shape=[classes,] The MLR parameter for class biases. """ # Stack the data and labels for training the classifier data_stacked, labels_stacked, weights = \ SSSRM._stack_list(data, labels, w) features = w[0].shape[1] total_samples = weights.size data_th = S.shared(data_stacked.astype(theano.config.floatX)) val_ = S.shared(labels_stacked) total_samples_S = S.shared(total_samples) theta_th = T.matrix(name='theta', dtype=theano.config.floatX) bias_th = T.col(name='bias', dtype=theano.config.floatX) constf2 = S.shared(self.alpha / self.gamma, allow_downcast=True) weights_th = S.shared(weights) log_p_y_given_x = \ T.log(T.nnet.softmax((theta_th.T.dot(data_th.T)).T + bias_th.T)) f = -constf2 * T.sum( (log_p_y_given_x[T.arange(total_samples_S), val_]) / weights_th) + 0.5 * T.sum(theta_th**2) manifold = Product((Euclidean(features, classes), Euclidean(classes, 1))) problem = Problem(manifold=manifold, cost=f, arg=[theta_th, bias_th], verbosity=0) solver = ConjugateGradient(mingradnorm=1e-6) solution = solver.solve(problem) theta = solution[0] bias = solution[1] del constf2 del theta_th del bias_th del data_th del val_ del solver del solution return theta, bias
loss = loss # compute 1,2 norm of K norm12 = 0. for i in range(K.shape[0]): norm12 += np.linalg.norm(K[i]) loss = loss / len(S) + lam12 * norm12 return loss # create the problems, defined over the manifold problem_L1 = Problem(manifold=manifold, cost=costL1) problem_L12 = Problem(manifold=manifold, cost=costL12) # Instantiate a pymanopt solver solver = ConjugateGradient(maxiter=100) # solve each problem: # Lasso method: print("Beginning test with L_1") ts = time() Khat_L1 = solver.solve(problem_L1) print(np.shape(Khat_L1)) tot_time = time() - ts emp_loss, log_loss = lossK(Khat_L1, X, S) print("L_1 regularization: Time=%f, emp_loss=%f, log_loss=%f" % (tot_time, emp_loss, log_loss)) # L_{1,2} regularized method print("Beginning test with L_{1,2}")
XtYZ = torch.matmul(X.T, C) BtA = XtYZ.T DtC = XtYZ f = 0.5 * (torch.norm(AtA.flatten())**2 + normBtB**2) - torch.norm( BtA.flatten())**2 f += 0.5 * (torch.norm(CtC.flatten())**2 + normDtD**2) - torch.norm( DtC.flatten())**2 f += 0.5 * regularizer * torch.norm(Y, 'fro')**2 return f problem = pymanopt.Problem(manifold, cost=cost) solver = ConjugateGradient(maxiter=200) print("Starting optimization...\n") Yopt = solver.solve(problem) print("Optimization finished\n") print(Yopt.sum(0)) print(Yopt.sum(1)) X, Z = X.numpy(), Z.numpy() W = uf(X.T @ (Yopt @ Z)) # Alignment of rows / Node correspondences YZ = Yopt @ Z YX = Yopt.T @ X print("||X - YZ||_fro", np.linalg.norm((X - YZ), 'fro'))