Beispiel #1
0
def implicit_euler(A, x, stepSize, n):
	op = xe.TTOperator.identity(A.dimensions) - stepSize*A
	
	j,k = xe.indices(2)
	ourALS = xe.ALS_SPD
	ourALS.convergenceEpsilon = 1e-4
	ourALS.numHalfSweeps = 100
	
	results = [x]
	nextX = xe.TTTensor(x)
	
	for i in xrange(n) :
		ourALS(op, nextX, x)
		
		# normalize
		norm = one_norm(nextX)
		nextX /= norm
		
		print("done itr", i, \
			"residual:", xe.frob_norm(op(j/2,k/2)*nextX(k&0) - x(j&0)), \
			"one-norm:", norm)
		
		x = xe.TTTensor(nextX) # ensure it is a copy
		results.append(x)
	
	return results
Beispiel #2
0
def costs_riemannian_gradient(_tt, _measures, _values):
    """
    Computes the Riemannian gradient of the cost functional.

    Parameters
    ----------
    _tt : xe.TTTensor
        The point at which the gradient is to be computed.
    _measures, _values : np.ndarray
        The samples used for Monte-Carlo estimation.

    Returns
    -------
    TangentVector
        The gradient.
    """

    #TODO: das geht schneller, wenn man Us, Vs und Xs speichert und den TangentSpace manuell aufbaut.
    gradient = xe.TTTensor(_tt.dimensions)
    _tt.move_core(0)
    for m in range(_tt.order()):
        core = costs_component_gradient(_tt, m, _measures, _values)
        cg = xe.TTTensor(_tt)
        if m < _tt.order() - 1:
            _tt.move_core(m + 1)
            Um = _tt.get_component(m).to_ndarray()
            core -= np.einsum('lex, yzx, yzr -> ler', Um, Um, core)
        cg.set_component(m, xe.Tensor.from_buffer(core))
        gradient = gradient + cg

    ts = TangentSpace(_tt)
    tv = ts.project(gradient)
    assert xe.frob_norm(tv.to_TTTensor() -
                        gradient) <= 1e-12 * xe.frob_norm(gradient)

    # project out the part in direction [0, ..., 0]
    tp = ts.project(xe.TTTensor.dirac(_tt.dimensions, [0] * _tt.order()))
    tv = (tv - (tv @ tp) / tp.norm()**2 * tp)
    return tv
Beispiel #3
0
ttA = xe.TTOperator(A)

# and verify its rank
print("ttA ranks:", ttA.ranks())

# the right hand side of the equation both as Tensor and in (Q)TT format
b = xe.Tensor.ones([2,]*9)
ttb = xe.TTTensor.ones(b.dimensions)

# construct a random initial guess of rank 3 for the ALS algorithm
ttx = xe.TTTensor.random([2,]*9, [3,]*8)

# and solve the system with the default ALS algorithm for symmetric positive operators
xe.ALS_SPD(ttA, ttx, ttb)

# to perform arithmetic operations we need to define some indices
i,j,k = xe.indices(3)

# calculate the residual of the just solved system to evaluate its accuracy
# here i^9 denotes a multiindex named i of dimension 9 (ie. spanning 9 indices of the respective tensors)
residual = xe.frob_norm( ttA(i^9,j^9)*ttx(j^9) - ttb(i^9) )
print("residual:", residual)

# as an comparison solve the system exactly using the Tensor / operator
x = xe.Tensor()
x(j^9) << b(i^9) / A(i^9, j^9)

# and calculate the Frobenius norm of the difference
print("error:", xe.frob_norm(x - xe.Tensor(ttx)))

Beispiel #4
0
	def calc_residual_norm(self) :
		i,j = xe.indices(2)
		return xe.frob_norm(self.A(i/2, j/2)*self.x(j&0) - self.b(i&0)) / self.solutionsNorm
Beispiel #5
0
			
			
			# right -> left, only move core and update stack
			self.x.move_core(0, True)
			for pos in reversed(xrange(1,self.d)) :
				self.push_right_stack(pos)
				self.leftAStack.pop()
				self.leftBStack.pop()


def simpleALS(A, x, b) :
	solver = InternalSolver(A, x, b)
	solver.solve()

if __name__ == "__main__":
	i,j,k = xe.indices(3)
	
	A = xe.TTOperator.random([4]*16, [2]*7)
	A(i/2,j/2) << A(i/2, k/2) * A(j/2, k/2)
	
	solution = xe.TTTensor.random([4]*8, [3]*7)
	b = xe.TTTensor()
	b(i&0) << A(i/2, j/2) * solution(j&0)
	
	x = xe.TTTensor.random([4]*8, [3]*7)
	simpleALS(A, x, b)
	
	print("Residual:", xe.frob_norm(A(i/2, j/2) * x(j&0) - b(i&0))/xe.frob_norm(b))
	print("Error:", xe.frob_norm(solution-x)/xe.frob_norm(x))
	
Beispiel #6
0
def compute_and_cache_solution(params, maxIter=100):
    trainingSet = slice(0, params.num_training_samples, 1)
    validationSet = slice(
        params.num_training_samples,
        params.num_training_samples + params.num_validation_samples, 1)

    sol = compute_and_cache_measures_and_values(params)
    measures, values = sol.value
    time = sol.time

    sol = compute_and_cache_initial_guess(params, maxIter)
    tt = sol.value
    time += sol.time

    ranks = [params.chaos_rank] * (params.num_exercise_dates - 2)

    # Define convenience functions.
    def training_costs(_tt):
        return costs(_tt, measures[:, trainingSet], values[trainingSet])[0]

    def training_costs_gradient(_tt):
        return costs_riemannian_gradient(_tt, measures[:, trainingSet],
                                         values[trainingSet])

    def validation_costs(_tt):
        return costs(_tt,
                     measures[:, validationSet],
                     values[validationSet],
                     _alpha=1e3)[0]

    # compute_descentDir = lambda curGrad, prevGrad: curGrad  # GD
    # compute_descentDir = lambda curGrad, prevGrad: 0.5*(curGrad + prevGrad)  # Momentum
    def compute_descentDir(curGrad, prevGrad):  # nonlinear CG update
        """
        Nonlinear CG update.

        The Hestenes-Stiefel (HS) update can be derived by demanding that consecutive search directions be conjugate
        with respect to the average Hessian over the line segment [x_k , x_{k+1}].
        Even though it is a natural choice it is not easy to implement on Manifolds.
        The Polak-Ribiere (PR) update is similar to HS, both in terms of theoretical convergence properties and practical performance.
        For PR however, the strong Wolfe conditions does not guarantee that the computed update direction
        is always a descent direction. To guarantee this we modify PR to PR+. This choice also provides a direction reset automatically [2].
        Finally, it can be shown that global convergence can be guaranteed for every parameter that is bounded in absolute value by the Fletcher-Reeves update.
        This leads us to the final update rule max{PR+,FR}.
        To ensure that a descent direction is returned even with Armijo updates we check that the computed update direction
        does not point in the opposite direction to the gradient.

        References:
        -----------
          - [1] Numerical optimization (Jorge Nocedal and Stephen J. Wright)
          - [2] An Introduction to the Conjugate Gradient Method Without the Agonizing Pain (Jonathan Richard Shewchuk)
        """
        gradDiff = curGrad - prevGrad
        betaPR = (curGrad @ gradDiff) / (curGrad @ curGrad
                                         )  # Polak-Ribiere update
        beta = max(betaPR, 0)  # PR+ update
        betaFR = (curGrad @ curGrad) / (prevGrad @ prevGrad
                                        )  # Fletcher-Reeves update
        beta = min(beta, betaFR)  # max{PR+,FR} update
        descentDir = curGrad + beta * prevGrad
        if descentDir @ curGrad < 1e-3 * descentDir.norm() * curGrad.norm():
            print("WARNING: Computed descent direction opposite to gradient.")
            descentDir = curGrad
        return descentDir

    print("=" * 80)
    print("  Perform gradient descent")
    print("=" * 80)
    tic = process_time()
    trnCosts = training_costs(tt)
    valCosts = deque(maxlen=10)
    grad = training_costs_gradient(tt)
    print(
        f"[0] Training costs: {trnCosts: .4e}  |  Validation costs: {validation_costs(tt): .4e}  |  Best validation costs: {np.nan: .4e}  |  Relative gradient norm: {grad.norm()/xe.frob_norm(tt):.2e}  |  Relative update norm: {np.nan:.2e}  |  Step size: {np.nan:.2e}  |  Relative retraction error: {np.nan:.2e}  |  Ranks: {tt.ranks()}"
    )
    ss = 1
    descentDir = grad
    descentDirGrad = descentDir @ grad
    bestValCosts = np.inf
    bestTT = None
    for iteration in range(maxIter):
        if grad.norm() < 1e-6 * xe.frob_norm(tt):
            print(
                "Termination: relative norm of gradient deceeds tolerance (local minimum reached)"
            )
            break
        prev_tt = tt
        tt, re, ss = armijo_step(retraction(tt,
                                            descentDir,
                                            _roundingParameter=ranks),
                                 training_costs,
                                 descentDirGrad,
                                 _initialStepSize=ss)
        trnCosts = training_costs(tt)
        valCosts.append(validation_costs(tt))
        if valCosts[-1] < bestValCosts:
            bestTT = xe.TTTensor(tt)
            bestValCosts = valCosts[-1]
        print(
            f"[{iteration+1}] Training costs: {trnCosts: .4e}  |  Validation costs: {valCosts[-1]: .4e}  |  Best validation costs: {bestValCosts: .4e}  |  Relative gradient norm: {grad.norm()/np.asarray(xe.frob_norm(prev_tt)):.2e}  |  Relative update norm: {xe.frob_norm(prev_tt-tt)/np.asarray(xe.frob_norm(prev_tt)):.2e}  |  Step size: {ss:.2e}  |  Relative retraction error: {re:.2e}  |  Ranks: {tt.ranks()}"
        )
        if len(valCosts) == 10 and (valCosts[0] -
                                    valCosts[-1]) < 1e-2 * valCosts[0]:
            print("Termination: decrease of costs deceeds tolerance")
            break
        if iteration < maxIter - 1:
            prev_grad = TangentSpace(tt).project(grad)
            grad = training_costs_gradient(tt)
            descentDir = compute_descentDir(grad, prev_grad)
            descentDirGrad = descentDir @ grad
    else:
        print("Termination: maximum number of iterations reached")

    assert bestTT is not None
    return bestTT
Beispiel #7
0
 def step(_stepSize):
     trial = (basePoint - _stepSize * _tv).to_TTTensor()
     tmp = xe.TTTensor(trial)
     trial.round(_roundingParameter)
     return trial, xe.frob_norm(tmp - trial) / (_stepSize * tv_norm)