def gen_zero_energy_guess(H, rank): """ Generate psi such that <psi|H|psi> = 0 Parameters: ----------- H: tt.matrix hamiltonian in the TT-matrix format rank: int Rank of the guess """ v = 1.0 while v > 1e-12: # Create two random TT vectors and normalize them psi1 = tt.rand(H.n, r=rank) psi2 = tt.rand(H.n, r=1) psi1 = psi1 * (1.0 / psi1.norm()) psi2 = psi2 * (1.0 / psi2.norm()) # Calculate coefficients of the quadratic equation h22 = tt.dot(tt.matvec(H, psi2), psi2) h21 = tt.dot(tt.matvec(H, psi2), psi1) h11 = tt.dot(tt.matvec(H, psi1), psi1) # find vectors such that <psi|H|psi> = 0 rs = np.roots([h22, 2 * h21, h11]) v = np.linalg.norm(np.imag(rs)) psi = psi1 + rs[0] * psi2 psi = psi * (1.0 / psi.norm()) return psi
def test_gradient_wrt_core(self): w = tt.rand([2, 2, 2], 3, [1, 2, 2, 1]) x_1 = all_subsets.subset_tensor([3, 4, 5]) x_2 = all_subsets.subset_tensor([-1, 12, 5]) X = np.array([[3, 4, 5], [-1, 12, 5]]) eps = 1e-8 def loss(core): new_w = w.copy() new_w.core = copy.copy(core) res = (tt.dot(new_w, x_1))**2 # Quadratic. res += tt.dot(new_w, x_2) # Linear. return res # Derivatives of the quadratic and linear functions in the loss. dfdz = [2 * tt.dot(w, x_1), 1] core = w.core value = loss(core) numerical_grad = np.zeros(len(core)) for i in range(len(core)): new_core = copy.copy(core) # print(new_core) new_core[i] += eps numerical_grad[i] = (loss(new_core) - value) / eps w_cores = tt.tensor.to_list(w) gradient = all_subsets.gradient_wrt_cores(w_cores, X, dfdz) np.testing.assert_array_almost_equal(numerical_grad, gradient, decimal=3)
def eval_post(Atts, params, P0, time_observation, observations, obs_operator, eps=1e-7, method='cheby', dtmax=0.1, Nmax=16): Att = Atts[0] * params[0] for i in range(1, params.size): Att += Atts[i] * params[i] Att = Att.round(1e-12) qtt = True if qtt: A_qtt = ttm2qttm(Att) integrator = ttInt(A_qtt, epsilon=eps, N_max=Nmax, dt_max=1.0, method=method) P = tt2qtt(P0) else: integrator = ttInt(Att, epsilon=eps, N_max=Nmax, dt_max=1.0, method=method) P = P0 ps = [] for i in range(1, time_observation.size): dt = time_observation[i] - time_observation[i - 1] # print(i,int(np.ceil(dt/dtmax))) # tme = timeit.time.time() P = integrator.solve(P, dt, intervals=int(np.ceil(dt / dtmax)), qtt=True) Po_tt = obs_operator(observations[i, :], P, time_observation[i]) Po_tt = tt2qtt(Po_tt) ps.append(tt.dot(Po_tt, P)) P = (P * Po_tt).round(1e-9) P = P * (1 / ps[-1]) # tme = timeit.time.time() - tme # print(i,' time ',tme,' ',P.r) ps = np.array(ps) return ps
def test_guess(): # Set up the parameters of the script d = 4 r = 5 H = dyn.gen_heisenberg_hamiltonian(d) # guess np.random.seed(10) psi1 = dyn.gen_rounded_gaussian_guess(H, r) np.random.seed(10) psi2 = dyn.gen_implicit_gaussian_guess(H, r) assert (np.allclose(tt.dot(psi1, psi2), -0.18779753088022963))
def GMRES(A, u_0, b, eps=1E-6, restart=20, verb=0): """GMRES linear systems solver based on TT techniques. A = A(x[, eps]) is a function that multiplies x by matrix. """ do_restart = True while do_restart: r0 = b + A((-1) * u_0) r0 = r0.round(eps) beta = r0.norm() bnorm = b.norm() curr_beta = beta if verb: print("/ Initial residual norm: %lf; mean rank:" % beta, r0.rmean()) m = restart V = np.zeros(m + 1, dtype=object) # Krylov basis V[0] = r0 * (1.0 / beta) H = np.mat(np.zeros((m + 1, m), dtype=np.complex128, order='F')) j = 0 while j < m and curr_beta / bnorm > eps: delta = eps / (curr_beta / beta) # print("Calculating new Krylov vector") w = A(V[j], delta) #w = w.round(delta) for i in range(j + 1): H[i, j] = tt.dot(w, V[i]) w = w + (-H[i, j]) * V[i] w = w.round(delta) if verb > 1: print("|% 3d. New Krylov vector mean rank:" % (j + 1), w.rmean()) H[j + 1, j] = w.norm() V[j + 1] = w * (1 / H[j + 1, j]) Hj = H[:j + 2, :j + 1] betae = np.zeros(j + 2, dtype=np.complex128) betae[0] = beta # solving Hj * y = beta e_1 y, curr_beta, rank, s = np.linalg.lstsq(Hj, betae) curr_beta = curr_beta[0] if verb: print("|% 3d. LSTSQ residual norm:" % (j + 1), curr_beta) j += 1 x = u_0 for i in range(j): x = x + V[i] * y[i] x = x.round(eps) if verb: print("\\ Solution mean rank:", x.rmean()) u_0 = x do_restart = (curr_beta / bnorm > eps) return x
def GMRES(A, u_0, b, eps=1E-6, restart=20, verb=0): """GMRES linear systems solver based on TT techniques. A = A(x[, eps]) is a function that multiplies x by matrix. """ do_restart = True while do_restart: r0 = b + A((-1) * u_0) r0 = r0.round(eps) beta = r0.norm() bnorm = b.norm() curr_beta = beta if verb: print "/ Initial residual norm: %lf; mean rank:" % beta, r0.rmean() m = restart V = np.zeros(m + 1, dtype=object) # Krylov basis V[0] = r0 * (1.0 / beta) H = np.mat(np.zeros((m+1, m), dtype=np.complex128, order='F')) j = 0 while j < m and curr_beta / bnorm > eps: delta = eps / (curr_beta / beta) #print "Calculating new Krylov vector" w = A(V[j], delta) #w = w.round(delta) for i in range(j + 1): H[i, j] = tt.dot(w, V[i]) w = w + (-H[i, j]) * V[i] w = w.round(delta) if verb > 1: print "|% 3d. New Krylov vector mean rank:" % (j + 1), w.rmean() H[j+1, j] = w.norm() V[j+1] = w * (1 / H[j+1, j]) Hj = H[:j+2, :j+1] betae = np.zeros(j+2, dtype=np.complex128) betae[0] = beta # solving Hj * y = beta e_1 y, curr_beta, rank, s = np.linalg.lstsq(Hj, betae) curr_beta = curr_beta[0] if verb: print "|% 3d. LSTSQ residual norm:" % (j + 1), curr_beta j += 1 x = u_0 for i in range(j): x = x + V[i] * y[i] x = x.round(eps) if verb: print "\\ Solution mean rank:", x.rmean() u_0 = x do_restart = (curr_beta / bnorm > eps) return x
def goal_function(thetuta): L1 = np.array([lagrange(thetuta[0], i, pts1) for i in range(pts1.size)]) L2 = np.array([lagrange(thetuta[1], i, pts2) for i in range(pts2.size)]) L3 = np.array([lagrange(thetuta[2], i, pts3) for i in range(pts3.size)]) L4 = np.array([lagrange(thetuta[3], i, pts4) for i in range(pts4.size)]) L5 = np.array([lagrange(thetuta[4], i, pts5) for i in range(pts5.size)]) val = tt.dot( Post, tt.mkron(tt.tensor(L1.flatten()), tt.tensor(L2.flatten()), tt.tensor(L3.flatten()), tt.tensor(L4.flatten()), tt.tensor(L5.flatten()))) return -val
def increase_rank(w0, rank, train_x, train_y, vectorized_tt_dot_h, loss_h, loss_grad_h, project_h, object_tensor_h, reg): """Implements the idea from the paper Riemannian Pursuit for Big Matrix Recovery That is, to init the tensor with the desired rank, we add orthogonal component of the current gradient to our current low-rank estimate w0. """ w = w0 if rank > max(w.r): # Choose not too many objects, so that the rank is reasonable. num_objects_used = rank * 5 w_x = vectorized_tt_dot_h(w, train_x[:num_objects_used, :]) grad_coef = loss_grad_h(w_x, train_y[:num_objects_used]) proj_grad = project_h(w, train_x[:num_objects_used, :], grad_coef, reg=reg) grad = reg * w for i in range(0, num_objects_used): grad = grad + grad_coef[i] * object_tensor_h(train_x[i, :]) orth = grad - proj_grad orth = orth.round(eps=0, rmax=rank - max(w0.r)) batch_w_x = vectorized_tt_dot_h(w, train_x) w_w = w.norm()**2 batch_orth_x = vectorized_tt_dot_h(orth, train_x) orth_orth = orth.norm()**2 w_orth = tt.dot(w, orth) def w_step_objective(w_step): steps = np.array([w_step, 0]) obj = _regularized_loss_step(steps, loss_h, train_y, batch_w_x, batch_orth_x, 0, 0, reg, w_w, w_orth, orth_orth) return obj step_w = minimize_scalar(w_step_objective).x w = (w - step_w * orth).round(eps=0) return w
def increase_rank(w0, rank, train_x, train_y, vectorized_tt_dot_h, loss_h, loss_grad_h, project_h, object_tensor_h, reg): """Implements the idea from the paper Riemannian Pursuit for Big Matrix Recovery That is, to init the tensor with the desired rank, we add orthogonal component of the current gradient to our current low-rank estimate w0. """ w = w0 if rank > max(w.r): # Choose not too many objects, so that the rank is reasonable. num_objects_used = rank * 5 w_x = vectorized_tt_dot_h(w, train_x[:num_objects_used, :]) grad_coef = loss_grad_h(w_x, train_y[:num_objects_used]) proj_grad = project_h(w, train_x[:num_objects_used, :], grad_coef, reg=reg) grad = reg * w for i in range(0, num_objects_used): grad = grad + grad_coef[i] * object_tensor_h(train_x[i, :]) orth = grad - proj_grad orth = orth.round(eps=0, rmax=rank-max(w0.r)) batch_w_x = vectorized_tt_dot_h(w, train_x) w_w = w.norm()**2 batch_orth_x = vectorized_tt_dot_h(orth, train_x) orth_orth = orth.norm()**2 w_orth = tt.dot(w, orth) def w_step_objective(w_step): steps = np.array([w_step, 0]) obj = _regularized_loss_step(steps, loss_h, train_y, batch_w_x, batch_orth_x, 0, 0, reg, w_w, w_orth, orth_orth) return obj step_w = minimize_scalar(w_step_objective).x w = (w - step_w * orth).round(eps=0) return w
#Generate the initial Gaussian, which is just shifted gs = np.exp(-0.5*(x-2)**2) gs = tt.tensor(gs,1e-8) start = None for i in xrange(f): start = tt.kron(start,gs) radd = 20 start = start+0*tt.rand(start.n,start.d,radd) y = start.copy() print 'initial value norm:', start.norm() cf = [] tf = 150.0 nit = 5000 tau = (tf/nit) i = 0 t = 0 import time t1 = time.time() while t <= tf: print '%f/%f' % (t,tf) y = ksl(H,y,tau) cf.append(tt.dot(y,start)) t += tau t2 = time.time() print("Elapsed time: %f" % (t2-t1)) zz = np.abs(fft(np.conj(cf))) lls = np.arange(zz.size)*pi/(0.5*tf)
def loss(core): new_w = w.copy() new_w.core = copy.copy(core) res = (tt.dot(new_w, x_1))**2 # Quadratic. res += tt.dot(new_w, x_2) # Linear. return res
tme_total = datetime.datetime.now() - tme_total #%% show print() print('Total time ', tme_total) print('Maximum size ', tensor_size * 8 / 1e6, ' MB') Post = Pt_fwd Prior = Pt_prior nburn = 20000 xs_tt = tt_meshgrid([pts1, pts2, pts3, pts4, pts5]) Pt = Pt_fwd E = np.array([tt.dot(x_tt * Pt, WS) for x_tt in xs_tt]) V = np.array( [tt.dot(Pt * xs_tt[i], xs_tt[i] * WS) - E[i]**2 for i in range(5)]) import pyswarm def goal_function(thetuta): L1 = np.array([lagrange(thetuta[0], i, pts1) for i in range(pts1.size)]) L2 = np.array([lagrange(thetuta[1], i, pts2) for i in range(pts2.size)]) L3 = np.array([lagrange(thetuta[2], i, pts3) for i in range(pts3.size)]) L4 = np.array([lagrange(thetuta[3], i, pts4) for i in range(pts4.size)]) L5 = np.array([lagrange(thetuta[4], i, pts5) for i in range(pts5.size)])
def parallel_worker(task_id, H, operators, guess_generator, rank, tau, n_steps, callbacks=[], **kwargs): """ Parallel worker for the calculation of the expectation value of an operator Parameters: ----------- H: tt.matrix hamiltonain matrix in the TT format operators: iterable of tt.matrix matrix of the operator in the TT format guess_generator: function initial vector generator rank: int TT rank of the initial vector tau: float time step n_steps: number of steps of the dynamics callbacks: list, default [] list of extra callbacks. The callback has to have a signature (tt.vector) -> Scalar. The callback will receive the wavefunction, and the result will be collected. The results of the callbacks are stored in the matrix along with mean values of the operators. Returns: -------- (time, evs) : (np.array, np.array) time array and array of expectation values """ # np.random.seed(seed) psi = guess_generator(H, rank) time = [] evs = [] t = 0 psi = ksl(A=-1j * H, y0=psi, tau=1e-10, **kwargs) for i in range(n_steps): ev = [] for operator in operators: ev.append(tt.dot(tt.matvec(operator, psi), psi)) for func in callbacks: ev.append(func(psi)) time.append(t) evs.append(ev) # update psi = ksl(A=-1j * H, y0=psi, tau=tau, **kwargs) t += tau evs = np.array(evs).real time = np.array(time) return time, evs
def GMRES(A, u_0, b, eps=1e-6, maxit=100, m=20, _iteration=0, callback=None, verbose=0): """ Flexible TT GMRES :param A: matvec(x[, eps]) :param u_0: initial vector :param b: answer :param maxit: max number of iterations :param eps: required accuracy :param m: number of iteration without restart :param _iteration: iteration counter :param callback: :param verbose: to print debug info or not :return: answer, residual >>> from tt import GMRES >>> def matvec(x, eps): >>> return tt.matvec(S, x).round(eps) >>> answer, res = GMRES(matvec, u_0, b, eps=1e-8) """ maxitexceeded = False converged = False if verbose: print('GMRES(m=%d, _iteration=%d, maxit=%d)' % (m, _iteration, maxit)) v = np.ones((m + 1), dtype=object) * np.nan R = np.ones((m, m)) * np.nan g = np.zeros(m) s = np.ones(m) * np.nan c = np.ones(m) * np.nan v[0] = b - A(u_0, eps=eps) v[0] = v[0].round(eps) resnorm = v[0].norm() curr_beta = resnorm bnorm = b.norm() wlen = resnorm q = m for j in range(m): _iteration += 1 delta = eps / (curr_beta / resnorm) if verbose: print("it = %d delta = " % _iteration, delta) v[j] *= 1.0 / wlen v[j + 1] = A(v[j], eps=delta) for i in range(j + 1): R[i, j] = tt.dot(v[j + 1], v[i]) v[j + 1] = v[j + 1] - R[i, j] * v[i] v[j + 1] = v[j + 1].round(delta) wlen = v[j + 1].norm() for i in range(j): r1 = R[i, j] r2 = R[i + 1, j] R[i, j] = c[i] * r1 - s[i] * r2 R[i + 1, j] = c[i] * r2 + s[i] * r1 denom = np.hypot(wlen, R[j, j]) s[j] = wlen / denom c[j] = -R[j, j] / denom R[j, j] = -denom g[j] = c[j] * curr_beta curr_beta *= s[j] if verbose: print("it = {}, ||r|| = {}".format(_iteration, curr_beta / bnorm)) converged = (curr_beta / bnorm) < eps or (curr_beta / resnorm) < eps maxitexceeded = _iteration >= maxit if converged or maxitexceeded: q = j + 1 break y = la.solve_triangular(R[:q, :q], g[:q], check_finite=False) for idx in range(q): u_0 += v[idx] * y[idx] u_0 = u_0.round(eps) if callback is not None: callback(u_0) if converged or maxitexceeded: return u_0, resnorm / bnorm return GMRES(A, u_0, b, eps, maxit, m, _iteration, callback=callback, verbose=verbose)
from __future__ import print_function, absolute_import, division import sys sys.path.append('../') import numpy as np import tt d = 30 n = 2**d b = 1E3 h = b / (n + 1) #x = np.arange(n) #x = np.reshape(x, [2] * d, order = 'F') #x = tt.tensor(x, 1e-12) x = tt.xfun(2, d) e = tt.ones(2, d) x = x + e x = x * h sf = lambda x: np.sin(x) / x #Should be rank 2 y = tt.multifuncrs([x], sf, 1e-6, y0=tt.ones(2, d)) #y1 = tt.tensor(sf(x.full()), 1e-8) print("pi / 2 ~ ", tt.dot(y, tt.ones(2, d)) * h) #print (y - y1).norm() / y.norm()
def collect_ev_sequential(H, operators, guess_generator, rank, n_samples, tau, n_steps, filename=None, append_file=True, dump_every=0, callbacks=[], **kwargs): """ Generate the expectation value of a provided operator in the dynamical process generated by the hamiltonian H. The dynamics starts from the initial vector, which is generated by the guess_generator Parameters: ----------- H: tt.matrix hamiltonain matrix in the TT format operators: iterable of tt.matrix or tt.matrix matrices of the operators in the TT format guess_generator: function initial vector generator rank: int TT rank of the initial vector n_samples: int number of sample trajectories tau: float time step n_steps: number of steps of the dynamics filename: str, default None filename to output results. The file is appended if exists append_file: bool, default True if we append to the existing file instead of replacing it dump_every: int, default 0 dump current results every n parallel rounds. Default is 0. callbacks: list, default [] list of extra callbacks. The callback has to have a signature (tt.vector) -> Scalar. The callback will receive the wavefunction, and the result will be collected. The results of the callbacks are stored in the matrix along with mean values of the operators. Returns: -------- (time, evs) : (np.array, np.array) time array and array of expectation values """ # ensure that operators is iterable if not isinstance(operators, Iterable): operators = [operators] evs_all_l = [] for s in tqdm(range(n_samples), desc="guess={}, n_steps={}".format(guess_generator.__name__, n_steps)): # np.random.seed(s) psi = guess_generator(H, rank) time_l = [] evs = [] t = 0 psi = ksl(A=-1j * H, y0=psi, tau=1e-10, **kwargs) for i in range(n_steps): ev = [] for operator in operators: ev.append(tt.dot(tt.matvec(operator, psi), psi)) for func in callbacks: ev.append(func(psi)) time_l.append(t) evs.append(ev) # update psi = ksl(A=-1j * H, y0=psi, tau=tau, **kwargs) t += tau evs_all_l.append(evs) if ((dump_every > 0) and (s // dump_every == 0) and (s != 0) and (filename is not None)): # time to dump results evs_all = np.array(evs_all_l).real time = np.array(time_l) if (s == dump_every) and (not os.path.isfile(filename) or not append_file): # rewrite old file with the first batch np.savez(filename, t=time, evs=evs_all) else: time_old = np.load(filename)['t'] evs_old = np.load(filename)['evs'] assert (np.allclose(time_old, time)) evs_updated = np.vstack((evs_old, evs_all)) np.savez(filename, t=time, evs=evs_updated) evs_all = np.array(evs_all_l).real time = np.array(time_l) if filename is not None: if not os.path.isfile(filename) or not append_file: np.savez(filename, t=time, evs=evs_all) else: time_old = np.load(filename)['t'] evs_old = np.load(filename)['evs'] assert (np.allclose(time_old, time)) evs_updated = np.vstack((evs_old, evs_all)) np.savez(filename, t=time, evs=evs_updated) return time, evs_all
from __future__ import print_function, absolute_import, division import sys sys.path.append('../') import numpy as np import tt d = 30 n = 2 ** d b = 1E3 h = b / (n + 1) #x = np.arange(n) #x = np.reshape(x, [2] * d, order = 'F') #x = tt.tensor(x, 1e-12) x = tt.xfun(2, d) e = tt.ones(2, d) x = x + e x = x * h sf = lambda x : np.sin(x) / x #Should be rank 2 y = tt.multifuncrs([x], sf, 1e-6, y0=tt.ones(2, d)) #y1 = tt.tensor(sf(x.full()), 1e-8) print("pi / 2 ~ ", tt.dot(y, tt.ones(2, d)) * h) #print (y - y1).norm() / y.norm()
tme = datetime.datetime.now() - tme print('',flush=True) print('\tmax rank ',max(P.r)) Ppred = P Ppost = PO * Ppred Ppost = Ppost.round(1e-10) print('\tmax rank (after observation) ',max(Ppost.r)) if tensor_size<tt_size(Ppost): tensor_size = tt_size(Ppost) if not qtt: # Ppost = Ppost * (1/tt.sum(Ppost * tt.kron(tt.ones(N),WS))) Pt = tt.sum(tt.sum(tt.sum(tt.sum(Ppost,0),0),0),0) Z = tt.dot(Pt,WS) Pt = Pt * (1/Z) Pt = Pt.round(1e-10) else: # Ppost = Ppost * (1/tt.sum(Ppost * tt.kron(tt.ones(int(np.sum(np.log2(N)))*[2]),ws_qtt))) Pt = Ppost for i in range(int(np.sum(np.log2(N)))): Pt = tt.sum(Pt,0) Z = tt.dot(Pt,ws_qtt) Pt = Pt * (1/Z) Pt = Pt.round(1e-10) Ppost = Ppost*(1/tt.sum(Ppost))
for j in xrange(d): if j % 2: e0 = v0#np.random.rand(2) else: e0 = v1#e0 = tt.tensor(e0,1e-12) e1 = tt.kron(e1,e0) r = [1]*(d+1) r[0] = 1 r[d] = 1 x0 = tt.rand(n,d,r) tau = 1e-2 tf = 100 t = 0 start = e1 psi = start + 0 * x0 psi1 = start + 0 * x0 cf = [] while t <= tf: print '%f/%f' % (t,tf) psi = ksl(-1.0j*A,psi,tau) #psi1 = kls(-1.0j*A,psi1,tau) cf.append(tt.dot(psi,start)) #import ipdb; ipdb.set_trace() t += tau #x0 = tt.rand(n,d,r) #t1 = time.time() #print 'Matrices are done' #y, lam = eigb(A,x0,1e-3) #lm.append(d) #t2 = time.time()
def riemannian_sgd(train_x, train_y, vectorized_tt_dot_h, loss_h, loss_grad_h, project_h, w0, intercept0=0, fit_intercept=True, val_x=None, val_y=None, reg=0., exp_reg=1., dropout=None, batch_size=-1, num_passes=30, seed=None, logger=None, verbose_period=1, debug=False, beta=0.5, rho=0.1): """Riemannian SGD method optimization for a linear model with weights in TT. The objective function is reg <w, w> + \sum_i f(d(w, x_i) + b, y_i) * where f(o, y) is the loss w.r.t. one object (this function is from R^2 to R); * d(w, x_i) is the dot product between the tensor w and the tensor build from the vector x_i. """ num_objects, num_features = train_x.shape is_val_set_provided = False if val_x is not None and val_y is not None: is_val_set_provided = True if seed is not None: np.random.seed(seed) if batch_size == -1: # Full gradient learning. batch_size = num_objects # TODO: correctly process the last batch. num_batches = num_objects // batch_size w = w0 b = intercept0 # TODO: start not from zero in case we are resuming the learning. start_epoch = 0 if logger is not None: logger.before_first_iter(train_x, train_y, w, lambda w, x: vectorized_tt_dot_h(w, x) + b, num_passes, num_objects) reg_tens = build_reg_tens(w.n, exp_reg) for e in xrange(start_epoch, num_passes): idx_perm = np.random.permutation(num_objects) for batch_idx in xrange(num_batches): start = batch_idx * batch_size end = (batch_idx + 1) * batch_size curr_idx = idx_perm[start:end] curr_batch = train_x[curr_idx, :] if dropout is not None: dropout_mask = np.random.binomial(1, dropout, size=curr_batch.shape) # To make the expected value of <W, dropout(X)> equals to <W, X>. dropout_mask = dropout_mask / dropout curr_batch = dropout_mask * curr_batch batch_y = train_y[curr_idx] batch_w_x = vectorized_tt_dot_h(w, curr_batch) batch_linear_o = batch_w_x + b batch_loss_arr = loss_h(batch_linear_o, batch_y) wreg = w * reg_tens wregreg = w * reg_tens * reg_tens wreg_wreg = wreg.norm()**2 batch_loss = np.sum(batch_loss_arr) + reg * wreg_wreg / 2.0 batch_grad_coef = loss_grad_h(batch_linear_o, batch_y) batch_gradient_b = np.sum(batch_grad_coef) direction = project_h(w, curr_batch, batch_grad_coef, reg=0) direction = riemannian.project(w, [direction, reg * wregreg]) batch_dir_x = vectorized_tt_dot_h(direction, curr_batch) dir_dir = direction.norm()**2 wreg_dir = tt.dot(wreg, direction) if fit_intercept: # TODO: Use classical Newton-Raphson (with hessian). step_objective = lambda s: _regularized_loss_step(s, loss_h, batch_y, batch_w_x, batch_dir_x, b, batch_gradient_b, reg, wreg_wreg, wreg_dir, dir_dir) step_gradient = lambda s: _regularized_loss_step_grad(s, loss_grad_h, batch_y, batch_w_x, batch_dir_x, b, batch_gradient_b, reg, wreg_dir, dir_dir) step0_w, step0_b = fmin_bfgs(step_objective, np.ones(2), fprime=step_gradient, gtol=1e-10, disp=logger.disp()) else: def w_step_objective(w_step): steps = np.array([w_step, 0]) obj = _regularized_loss_step(steps, loss_h, batch_y, batch_w_x, batch_dir_x, b, batch_gradient_b, reg, wreg_wreg, wreg_dir, dir_dir) return obj step0_w = minimize_scalar(w_step_objective).x # TODO: consider using Probabilistic Line Searches for Stochastic Optimization. # Armiho step choosing. step_w = step0_w # <gradient, direction> = # = <(\sum_i coef[i] * x_i + reg * w), direction> = # = \sum_i coef[i] <x_i, direction> + reg * <w, direction> grad_times_direction = batch_dir_x.dot(batch_grad_coef) + reg * wreg_dir while step_w > 1e-10: new_w = (w - step_w * direction).round(eps=0, rmax=max(w.r)) new_w_x = vectorized_tt_dot_h(new_w, curr_batch) if fit_intercept: b_objective = lambda b: np.sum(loss_h(new_w_x + b, batch_y)) m = minimize_scalar(b_objective) b = m.x new_loss = m.fun else: new_loss = np.sum(loss_h(new_w_x + b, batch_y)) new_wreg = new_w * reg_tens new_loss += reg * new_wreg.norm()**2 / 2.0 if new_loss <= batch_loss - rho * step_w * grad_times_direction: break step_w *= beta w = new_w if (logger is not None) and e % verbose_period == 0: logger.after_each_iter(e, train_x, train_y, w, lambda w, x: vectorized_tt_dot_h(w, x) + b, stage='train') if is_val_set_provided: logger.after_each_iter(e, val_x, val_y, w, lambda w, x: vectorized_tt_dot_h(w, x) + b, stage='valid') return w, b
if qtt: x1 = tt2qtt(x1) x2 = tt2qtt(x2) x3 = tt2qtt(x3) x4 = tt2qtt(x4) for i in range(len(P_bck)): print(i) Pf = P_fwd[i] Pb = P_bck[i] # if qtt: # Pf = qtt2tt(Pf,N) # Pb = qtt2tt(Pb,N) Z = tt.dot(Pf, Pb) mean = [ tt.dot(Pf, Pb * x1) / Z, tt.dot(Pf, Pb * x2) / Z, tt.dot(Pf, Pb * x3) / Z, tt.dot(Pf, Pb * x4) / Z ] var = [ tt.dot(Pf * x1, Pb * x1) / Z - mean[0]**2, tt.dot(Pf * x2, Pb * x2) / Z - mean[1]**2, tt.dot(Pf * x3, Pb * x3) / Z - mean[2]**2, tt.dot(Pf * x4, Pb * x4) / Z - mean[3]**2 ] Es.append(mean)
def riemannian_sgd(train_x, train_y, vectorized_tt_dot_h, loss_h, loss_grad_h, project_h, w0, intercept0=0, fit_intercept=True, val_x=None, val_y=None, reg=0., exp_reg=1., dropout=None, batch_size=-1, num_passes=30, seed=None, logger=None, verbose_period=1, debug=False, beta=0.5, rho=0.1): """Riemannian SGD method optimization for a linear model with weights in TT. The objective function is reg <w, w> + \sum_i f(d(w, x_i) + b, y_i) * where f(o, y) is the loss w.r.t. one object (this function is from R^2 to R); * d(w, x_i) is the dot product between the tensor w and the tensor build from the vector x_i. """ num_objects, num_features = train_x.shape is_val_set_provided = False if val_x is not None and val_y is not None: is_val_set_provided = True if seed is not None: np.random.seed(seed) if batch_size == -1: # Full gradient learning. batch_size = num_objects # TODO: correctly process the last batch. num_batches = num_objects // batch_size w = w0 b = intercept0 # TODO: start not from zero in case we are resuming the learning. start_epoch = 0 if logger is not None: logger.before_first_iter(train_x, train_y, w, lambda w, x: vectorized_tt_dot_h(w, x) + b, num_passes, num_objects) reg_tens = build_reg_tens(w.n, exp_reg) for e in xrange(start_epoch, num_passes): idx_perm = np.random.permutation(num_objects) for batch_idx in xrange(num_batches): start = batch_idx * batch_size end = (batch_idx + 1) * batch_size curr_idx = idx_perm[start:end] curr_batch = train_x[curr_idx, :] if dropout is not None: dropout_mask = np.random.binomial(1, dropout, size=curr_batch.shape) # To make the expected value of <W, dropout(X)> equals to <W, X>. dropout_mask = dropout_mask / dropout curr_batch = dropout_mask * curr_batch batch_y = train_y[curr_idx] batch_w_x = vectorized_tt_dot_h(w, curr_batch) batch_linear_o = batch_w_x + b batch_loss_arr = loss_h(batch_linear_o, batch_y) wreg = w * reg_tens wregreg = w * reg_tens * reg_tens wreg_wreg = wreg.norm()**2 batch_loss = np.sum(batch_loss_arr) + reg * wreg_wreg / 2.0 batch_grad_coef = loss_grad_h(batch_linear_o, batch_y) batch_gradient_b = np.sum(batch_grad_coef) direction = project_h(w, curr_batch, batch_grad_coef, reg=0) direction = riemannian.project(w, [direction, reg * wregreg]) batch_dir_x = vectorized_tt_dot_h(direction, curr_batch) dir_dir = direction.norm()**2 wreg_dir = tt.dot(wreg, direction) if fit_intercept: # TODO: Use classical Newton-Raphson (with hessian). step_objective = lambda s: _regularized_loss_step( s, loss_h, batch_y, batch_w_x, batch_dir_x, b, batch_gradient_b, reg, wreg_wreg, wreg_dir, dir_dir) step_gradient = lambda s: _regularized_loss_step_grad( s, loss_grad_h, batch_y, batch_w_x, batch_dir_x, b, batch_gradient_b, reg, wreg_dir, dir_dir) step0_w, step0_b = fmin_bfgs(step_objective, np.ones(2), fprime=step_gradient, gtol=1e-10, disp=logger.disp()) else: def w_step_objective(w_step): steps = np.array([w_step, 0]) obj = _regularized_loss_step(steps, loss_h, batch_y, batch_w_x, batch_dir_x, b, batch_gradient_b, reg, wreg_wreg, wreg_dir, dir_dir) return obj step0_w = minimize_scalar(w_step_objective).x # TODO: consider using Probabilistic Line Searches for Stochastic Optimization. # Armiho step choosing. step_w = step0_w # <gradient, direction> = # = <(\sum_i coef[i] * x_i + reg * w), direction> = # = \sum_i coef[i] <x_i, direction> + reg * <w, direction> grad_times_direction = batch_dir_x.dot( batch_grad_coef) + reg * wreg_dir while step_w > 1e-10: new_w = (w - step_w * direction).round(eps=0, rmax=max(w.r)) new_w_x = vectorized_tt_dot_h(new_w, curr_batch) if fit_intercept: b_objective = lambda b: np.sum(loss_h( new_w_x + b, batch_y)) m = minimize_scalar(b_objective) b = m.x new_loss = m.fun else: new_loss = np.sum(loss_h(new_w_x + b, batch_y)) new_wreg = new_w * reg_tens new_loss += reg * new_wreg.norm()**2 / 2.0 if new_loss <= batch_loss - rho * step_w * grad_times_direction: break step_w *= beta w = new_w if (logger is not None) and e % verbose_period == 0: logger.after_each_iter(e, train_x, train_y, w, lambda w, x: vectorized_tt_dot_h(w, x) + b, stage='train') if is_val_set_provided: logger.after_each_iter( e, val_x, val_y, w, lambda w, x: vectorized_tt_dot_h(w, x) + b, stage='valid') return w, b