def generate(self): treelog.user('my message') with treelog.infofile('test.dat', 'w') as f: f.write('test1') with treelog.context('my context'): with treelog.iter.plain('iter', 'abc') as items: for c in items: treelog.info(c) with treelog.context('empty'): pass treelog.error('multiple..\n ..lines') with treelog.userfile('test.dat', 'wb') as f: treelog.info('generating') f.write(b'test2') self.generate_test() with treelog.context('context step={}', 0) as format: treelog.info('foo') format(1) treelog.info('bar') with treelog.errorfile('same.dat', 'wb') as f: f.write(b'test3') with treelog.debugfile('dbg.dat', 'wb') as f: f.write(b'test4') treelog.debug('dbg') treelog.warning('warn')
def main(nelems: int, degree: int, reynolds: float): ''' Driven cavity benchmark problem using compatible spaces. .. arguments:: nelems [12] Number of elements along edge. degree [2] Polynomial degree for velocity; the pressure space is one degree less. reynolds [1000] Reynolds number, taking the domain size as characteristic length. ''' verts = numpy.linspace(0, 1, nelems + 1) domain, geom = mesh.rectilinear([verts, verts]) ns = function.Namespace() ns.x = geom ns.Re = reynolds ns.ubasis = function.vectorize([ domain.basis('spline', degree=(degree, degree - 1), removedofs=((0, -1), None)), domain.basis('spline', degree=(degree - 1, degree), removedofs=(None, (0, -1))) ]) ns.pbasis = domain.basis('spline', degree=degree - 1) ns.u_i = 'ubasis_ni ?u_n' ns.p = 'pbasis_n ?p_n' ns.stress_ij = '(d(u_i, x_j) + d(u_j, x_i)) / Re - p δ_ij' ns.uwall = domain.boundary.indicator('top'), 0 ns.N = 5 * degree * nelems # nitsche constant based on element size = 1/nelems ns.nitsche_ni = '(N ubasis_ni - (d(ubasis_ni, x_j) + d(ubasis_nj, x_i)) n(x_j)) / Re' ures = domain.integral('d(ubasis_ni, x_j) stress_ij d:x' @ ns, degree=2 * degree) ures += domain.boundary.integral( '(nitsche_ni (u_i - uwall_i) - ubasis_ni stress_ij n(x_j)) d:x' @ ns, degree=2 * degree) pres = domain.integral('pbasis_n (d(u_k, x_k) + ?lm) d:x' @ ns, degree=2 * degree) lres = domain.integral('p d:x' @ ns, degree=2 * degree) with treelog.context('stokes'): state0 = solver.solve_linear(['u', 'p', 'lm'], [ures, pres, lres]) postprocess(domain, ns, **state0) ures += domain.integral('ubasis_ni d(u_i, x_j) u_j d:x' @ ns, degree=3 * degree) with treelog.context('navierstokes'): state1 = solver.newton(('u', 'p', 'lm'), (ures, pres, lres), arguments=state0).solve(tol=1e-10) postprocess(domain, ns, **state1) return state0, state1
def main(nelems: int, degree: int, reynolds: float): ''' Driven cavity benchmark problem using compatible spaces. .. arguments:: nelems [12] Number of elements along edge. degree [2] Polynomial degree for velocity; the pressure space is one degree less. reynolds [1000] Reynolds number, taking the domain size as characteristic length. ''' verts = numpy.linspace(0, 1, nelems + 1) domain, geom = mesh.rectilinear([verts, verts]) ns = function.Namespace() ns.x = geom ns.Re = reynolds ns.uxbasis, ns.uybasis, ns.pbasis, ns.lbasis = function.chain([ domain.basis('spline', degree=(degree, degree - 1), removedofs=((0, -1), None)), domain.basis('spline', degree=(degree - 1, degree), removedofs=(None, (0, -1))), domain.basis('spline', degree=degree - 1), [1], # lagrange multiplier ]) ns.ubasis_ni = '<uxbasis_n, uybasis_n>_i' ns.u_i = 'ubasis_ni ?lhs_n' ns.p = 'pbasis_n ?lhs_n' ns.l = 'lbasis_n ?lhs_n' ns.stress_ij = '(u_i,j + u_j,i) / Re - p δ_ij' ns.uwall = domain.boundary.indicator('top'), 0 ns.N = 5 * degree * nelems # nitsche constant based on element size = 1/nelems ns.nitsche_ni = '(N ubasis_ni - (ubasis_ni,j + ubasis_nj,i) n_j) / Re' res = domain.integral( '(ubasis_ni,j stress_ij + pbasis_n (u_k,k + l) + lbasis_n p) d:x' @ ns, degree=2 * degree) res += domain.boundary.integral( '(nitsche_ni (u_i - uwall_i) - ubasis_ni stress_ij n_j) d:x' @ ns, degree=2 * degree) with treelog.context('stokes'): lhs0 = solver.solve_linear('lhs', res) postprocess(domain, ns, lhs=lhs0) res += domain.integral('ubasis_ni u_i,j u_j d:x' @ ns, degree=3 * degree) with treelog.context('navierstokes'): lhs1 = solver.newton('lhs', res, lhs0=lhs0).solve(tol=1e-10) postprocess(domain, ns, lhs=lhs1) return lhs0, lhs1
def main(nelems: int, etype: str, degree: int, reynolds: float): ''' Driven cavity benchmark problem. .. arguments:: nelems [12] Number of elements along edge. etype [square] Element type (square/triangle/mixed). degree [2] Polynomial degree for velocity; the pressure space is one degree less. reynolds [1000] Reynolds number, taking the domain size as characteristic length. ''' domain, geom = mesh.unitsquare(nelems, etype) ns = function.Namespace() ns.Re = reynolds ns.x = geom ns.ubasis, ns.pbasis = function.chain([ domain.basis('std', degree=degree).vector(2), domain.basis('std', degree=degree - 1), ]) ns.u_i = 'ubasis_ni ?lhs_n' ns.p = 'pbasis_n ?lhs_n' ns.stress_ij = '(u_i,j + u_j,i) / Re - p δ_ij' sqr = domain.boundary.integral('u_k u_k d:x' @ ns, degree=degree * 2) wallcons = solver.optimize('lhs', sqr, droptol=1e-15) sqr = domain.boundary['top'].integral('(u_0 - 1)^2 d:x' @ ns, degree=degree * 2) lidcons = solver.optimize('lhs', sqr, droptol=1e-15) cons = numpy.choose(numpy.isnan(lidcons), [lidcons, wallcons]) cons[-1] = 0 # pressure point constraint res = domain.integral('(ubasis_ni,j stress_ij + pbasis_n u_k,k) d:x' @ ns, degree=degree * 2) with treelog.context('stokes'): lhs0 = solver.solve_linear('lhs', res, constrain=cons) postprocess(domain, ns, lhs=lhs0) res += domain.integral( '.5 (ubasis_ni u_i,j - ubasis_ni,j u_i) u_j d:x' @ ns, degree=degree * 3) with treelog.context('navierstokes'): lhs1 = solver.newton('lhs', res, lhs0=lhs0, constrain=cons).solve(tol=1e-10) postprocess(domain, ns, lhs=lhs1) return lhs0, lhs1
def main(nelems:int, etype:str, degree:int, reynolds:float): ''' Driven cavity benchmark problem. .. arguments:: nelems [12] Number of elements along edge. etype [square] Element type (square/triangle/mixed). degree [2] Polynomial degree for velocity; the pressure space is one degree less. reynolds [1000] Reynolds number, taking the domain size as characteristic length. ''' domain, geom = mesh.unitsquare(nelems, etype) ns = function.Namespace() ns.Re = reynolds ns.x = geom ns.ubasis = domain.basis('std', degree=degree).vector(domain.ndims) ns.pbasis = domain.basis('std', degree=degree-1) ns.u_i = 'ubasis_ni ?u_n' ns.p = 'pbasis_n ?p_n' ns.stress_ij = '(d(u_i, x_j) + d(u_j, x_i)) / Re - p δ_ij' usqr = domain.boundary.integral('u_k u_k d:x' @ ns, degree=degree*2) wallcons = solver.optimize('u', usqr, droptol=1e-15) usqr = domain.boundary['top'].integral('(u_0 - 1)^2 d:x' @ ns, degree=degree*2) lidcons = solver.optimize('u', usqr, droptol=1e-15) ucons = numpy.choose(numpy.isnan(lidcons), [lidcons, wallcons]) pcons = numpy.zeros(len(ns.pbasis), dtype=bool) pcons[-1] = True # constrain pressure to zero in a point cons = dict(u=ucons, p=pcons) ures = domain.integral('d(ubasis_ni, x_j) stress_ij d:x' @ ns, degree=degree*2) pres = domain.integral('pbasis_n d(u_k, x_k) d:x' @ ns, degree=degree*2) with treelog.context('stokes'): state0 = solver.solve_linear(('u', 'p'), (ures, pres), constrain=cons) postprocess(domain, ns, **state0) ures += domain.integral('.5 (ubasis_ni d(u_i, x_j) - d(ubasis_ni, x_j) u_i) u_j d:x' @ ns, degree=degree*3) with treelog.context('navierstokes'): state1 = solver.newton(('u', 'p'), (ures, pres), arguments=state0, constrain=cons).solve(tol=1e-10) postprocess(domain, ns, **state1) return state0, state1
def solve_scipy(self, rhs, solver, atol, callback=None, precon=None, **solverargs): rhsnorm = numpy.linalg.norm(rhs) solverfun = getattr(self.scipy.sparse.linalg, solver) myrhs = rhs / rhsnorm # normalize right hand side vector for best control over scipy's stopping criterion mytol = atol / rhsnorm M = self.getprecon(precon) if isinstance( precon, str) else precon(self.core) if callable(precon) else precon with log.context(solver + ' {:.0f}%', 0) as reformat: def mycallback(arg): # some solvers provide the residual, others the left hand side vector res = numpy.linalg.norm(myrhs - self @ arg) if numpy.ndim( arg) == 1 else float(arg) if callback: callback(res) reformat(100 * numpy.log10(max(mytol, res)) / numpy.log10(mytol)) mylhs, status = solverfun(self.core, myrhs, M=M, tol=mytol, callback=mycallback, **solverargs) if status != 0: raise Exception('status {}'.format(status)) return mylhs * rhsnorm
def eval(*args, **arguments:argdict): '''Evaluate function. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' self, funcs = args nprocs = min(config.nprocs, self.nelems) zeros = parallel.shzeros if nprocs > 1 else numpy.zeros funcs = [function.asarray(func) for func in funcs] retvals = [zeros((self.npoints,)+func.shape, dtype=func.dtype) for func in funcs] idata = function.Tuple(function.Tuple([ifunc, function.Tuple(ind), f.simplified]) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func.prepare_eval(ndims=self.ndims))) if config.dot: idata.graphviz() ielems = parallel.range(self.nelems) with parallel.fork(nprocs): for ielem in ielems: with log.context('elem', ielem, '({:.0f}%)'.format(100*ielem/self.nelems)): for ifunc, inds, data in idata.eval(_transforms=tuple(t[ielem] for t in self.transforms), _points=self.points[ielem].coords, **arguments): numpy.add.at(retvals[ifunc], numpy.ix_(self.index[ielem], *[ind for (ind,) in inds]), data) return retvals
def mycallback(arg): niter[...] += 1 # some solvers provide the residual, others the left hand side vector res = numpy.linalg.norm(myrhs - self.matvec(arg)) if numpy.ndim(arg) == 1 else float(arg) if callback: callback(res) with log.context('residual {:.2e} ({:.0f}%)'.format(res, 100. * numpy.log10(res) / numpy.log10(mytol) if res > 0 else 0)): pass
def fork(nprocs=None): '''continue as ``nprocs`` parallel processes by forking ``nprocs-1`` times If ``nprocs`` exceeds the configured ``maxprocs`` than it will silently be capped. It is up to the user to prepare shared memory and/or locks for inter-process communication. As a safety measure nested forks are blocked by limiting nprocs to 1; all secondary forks will be silently ignored. ''' if nprocs is None or nprocs > _maxprocs.value: nprocs = _maxprocs.value if nprocs <= 1: yield 0 return if not hasattr(os, 'fork'): warnings.warn('fork is unavailable on this platform') yield 0 return amchild = False try: child_pids = [] for procid in builtins.range(1, nprocs): pid = os.fork() if not pid: # pragma: no cover amchild = True signal.signal( signal.SIGINT, signal.SIG_IGN) # disable sigint (ctrl+c) handler treelog.current = treelog.NullLog() # silence treelog break child_pids.append(pid) else: procid = 0 with maxprocs(1): yield procid except BaseException as e: if amchild: # pragma: no cover try: print('[parallel.fork] exception in child process:', e) finally: os._exit(1) # communicate failure to main process for pid in child_pids: # kill all child processes os.kill(pid, signal.SIGKILL) raise else: if amchild: # pragma: no cover os._exit(0) # communicate success to main process with treelog.context('waiting for child processes'): nfails = sum(not _wait(pid) for pid in child_pids) if nfails: # failure in child process: raise exception raise Exception('fork failed in {} out of {} processes'.format( nfails, nprocs)) finally: if amchild: # pragma: no cover os._exit(1) # failsafe
def mycallback(arg): niter[...] += 1 # some solvers provide the residual, others the left hand side vector res = numpy.linalg.norm(myrhs - self.matvec(arg)) if numpy.ndim( arg) == 1 else float(arg) if callback: callback(res) with log.context('residual {:.2e} ({:.0f}%)'.format( res, 100. * numpy.log10(res) / numpy.log10(mytol) if res > 0 else 0)): pass
def _solver_fgmres(self, rhs, atol, maxiter=0, restart=150, precon=None, ztol=1e-12, preconargs={}, **args): rci = c_int(0) n = c_int(len(rhs)) b = numpy.array(rhs, dtype=numpy.float64, copy=False) x = numpy.zeros_like(b) N = min(restart, len(rhs)) ipar = numpy.empty(128, dtype=numpy.int32) dpar = numpy.empty(128, dtype=numpy.float64) tmp = numpy.empty((2*N+1)*len(rhs)+(N*(N+9))//2+1, dtype=numpy.float64) dfgmres_args = byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes itercount = c_int(0) libmkl.dfgmres_init(*dfgmres_args) ipar[7] = 0 # do not perform the stopping test for the maximum number of iterations ipar[8] = 0 # do not perform the residual stopping test ipar[9] = 1 # perform the user-defined stopping test by setting RCI_request=2 if precon is not None: ipar[10] = 1 # run the preconditioned version of the FGMRES method precon = self.getprecon(precon, **args, **preconargs) ipar[11] = 0 # do not perform the automatic test for zero norm of the currently generated vector ipar[12] = 0 # update the solution to the vector x according to the computations done by the dfgmres routine ipar[14] = N # the number of non-restarted FGMRES iterations libmkl.dfgmres_check(*dfgmres_args) if rci.value in (-1001, -1010, -1011): warnings.warn('dgmres ' + ' and '.join(['wrote some warnings to stdout', 'changed some parameters to make them consistent or correct'][1 if rci.value==-1010 else 0:1 if rci.value==-1001 else 2])) elif rci.value != 0: raise MatrixError('dgmres check failed with error code {}'.format(rci.value)) with log.context('fgmres {:.0f}%', 0, 0) as format: while True: libmkl.dfgmres(*dfgmres_args) if rci.value == 1: # multiply the matrix tmp[ipar[22]-1:ipar[22]+n.value-1] = self @ tmp[ipar[21]-1:ipar[21]+n.value-1] elif rci.value == 2: # perform the stopping test if dpar[4] < atol: libmkl.dfgmres_get(*dfgmres_args, byref(itercount)) if numpy.linalg.norm(self @ x - b) < atol: break format(100 * numpy.log(dpar[2]/dpar[4]) / numpy.log(dpar[2]/atol)) if ipar[3] > maxiter > 0: break elif rci.value == 3: # apply the preconditioner tmp[ipar[22]-1:ipar[22]+n.value-1] = precon(tmp[ipar[21]-1:ipar[21]+n.value-1]) elif rci.value == 4: # check if the norm of the current orthogonal vector is zero if dpar[6] < ztol: libmkl.dfgmres_get(*dfgmres_args, byref(itercount)) if numpy.linalg.norm(self @ x - b) < atol: break raise MatrixError('singular matrix') else: raise MatrixError('this should not have occurred: rci={}'.format(rci.value)) log.debug('performed {} fgmres iterations, {} restarts'.format(ipar[3], ipar[3]//ipar[14])) return x
def run(self, collector: 'ResultCollector', context: Dict, workpath: Path, logdir: Path) -> bool: kwargs = { 'cwd': workpath, 'capture_output': True, 'shell': False, } if isinstance(self._command, str): kwargs['shell'] = True command = render(self._command, context, mode='shell') else: command = [render(arg, context) for arg in self._command] with log.context(self.name): log.debug( command if isinstance(command, str) else ' '.join(command)) with time() as duration: result = subprocess.run(command, **kwargs) duration = duration() if logdir: stdout_path = logdir / f'{self.name}.stdout' with open(stdout_path, 'wb') as f: f.write(result.stdout) stderr_path = logdir / f'{self.name}.stderr' with open(stderr_path, 'wb') as f: f.write(result.stderr) stdout = result.stdout.decode() for capture in self._capture: capture.find_in(collector, stdout) if self._capture_walltime: collector.collect(f'walltime/{self.name}', duration) if result.returncode: log.error(f"Command returned exit status {result.returncode}") if logdir: log.error(f"stdout stored in {stdout_path}") log.error(f"stderr stored in {stderr_path}") return False else: log.info(f"Success ({duration:.3g}s)") return True
def getprecon(self, precon): if precon == self._precon: return self._precon_object if self.shape[0] != self.shape[1]: raise MatrixError('matrix must be square') precon_method, precon_name = self._method('precon', precon) try: with treelog.context( 'constructing {} preconditioner'.format(precon_name)): precon_object = precon_method() except MatrixError: raise except Exception as e: raise MatrixError( 'failed to create preconditioner: {}'.format(e)) from e self._precon = precon self._precon_object = precon_object return precon_object
def _solver_scipy(self, rhs, method, atol, callback=None, precon=None, **solverargs): rhsnorm = numpy.linalg.norm(rhs) solverfun = getattr(scipy.sparse.linalg, method) myrhs = rhs / rhsnorm # normalize right hand side vector for best control over scipy's stopping criterion mytol = atol / rhsnorm if precon is not None: precon = scipy.sparse.linalg.LinearOperator(self.shape, self.getprecon(precon), dtype=float) with log.context(method + ' {:.0f}%', 0) as reformat: def mycallback(arg): # some solvers provide the residual, others the left hand side vector res = numpy.linalg.norm(myrhs - self @ arg) if numpy.ndim(arg) == 1 else float(arg) if callback: callback(res) reformat(100 * numpy.log10(max(mytol, res)) / numpy.log10(mytol)) mylhs, status = solverfun(self.core, myrhs, M=precon, tol=mytol, callback=mycallback, **solverargs) if status != 0: raise Exception('status {}'.format(status)) return mylhs * rhsnorm
def eval(*args, **arguments: argdict): '''Evaluate function. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' self, funcs = args nprocs = min(config.nprocs, self.nelems) zeros = parallel.shzeros if nprocs > 1 else numpy.zeros funcs = [function.asarray(func) for func in funcs] retvals = [ zeros((self.npoints, ) + func.shape, dtype=func.dtype) for func in funcs ] idata = function.Tuple( function.Tuple([ifunc, function.Tuple(ind), f.simplified]) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func.prepare_eval(ndims=self.ndims))) if config.dot: idata.graphviz() ielems = parallel.range(self.nelems) with parallel.fork(nprocs): for ielem in ielems: with log.context('elem', ielem, '({:.0f}%)'.format( 100 * ielem / self.nelems)): for ifunc, inds, data in idata.eval( _transforms=self.transforms[ielem], _points=self.points[ielem].coords, **arguments): numpy.add.at( retvals[ifunc], numpy.ix_(self.index[ielem], *[ind for (ind, ) in inds]), data) return retvals
return iter(title, builtins.range(*args)) def enumerate(title, iterable): warnings.deprecation('log.enumerate is deprecated; use log.iter.percentage instead') return iter(title, builtins.enumerate(iterable), length=_len(iterable)) def zip(title, *iterables): warnings.deprecation('log.zip is deprecated; use log.iter.percentage instead') return iter(title, builtins.zip(*iterables), length=min(map(_len, iterables))) def count(title, start=0, step=1): warnings.deprecation('log.count is deprecated; use log.iter.percentage instead') return iter(title, itertools.count(start, step)) if distutils.version.StrictVersion(treelog.version) >= distutils.version.StrictVersion('1.0b5'): from treelog import debug, info, user, warning, error, debugfile, infofile, userfile, warningfile, errorfile, context else: debug = lambda *args, **kwargs: treelog.debug(*args, **kwargs) info = lambda *args, **kwargs: treelog.info(*args, **kwargs) user = lambda *args, **kwargs: treelog.user(*args, **kwargs) warning = lambda *args, **kwargs: treelog.warning(*args, **kwargs) error = lambda *args, **kwargs: treelog.error(*args, **kwargs) debugfile = lambda *args, **kwargs: treelog.debugfile(*args, **kwargs) infofile = lambda *args, **kwargs: treelog.infofile(*args, **kwargs) userfile = lambda *args, **kwargs: treelog.userfile(*args, **kwargs) warningfile = lambda *args, **kwargs: treelog.warningfile(*args, **kwargs) errorfile = lambda *args, **kwargs: treelog.errorfile(*args, **kwargs) context = lambda *args, **kwargs: treelog.context(title, *initargs, **initkwargs) # vim:sw=2:sts=2:et
def _solver_fgmres(self, rhs, atol, maxiter=0, restart=150, precon=None, ztol=1e-12, preconargs={}, **args): rci = c_int(0) n = c_int(len(rhs)) b = numpy.array(rhs, dtype=numpy.float64) x = numpy.zeros_like(b) ipar = numpy.zeros(128, dtype=numpy.int32) ipar[0] = len(rhs) # problem size ipar[1] = 6 # output on screen ipar[ 2] = 1 # current stage of the RCI FGMRES computations; the initial value is 1 ipar[3] = 0 # current iteration number; the initial value is 0 ipar[4] = 0 # maximum number of iterations ipar[ 5] = 1 # output error messages in accordance with the parameter ipar[1] ipar[ 6] = 1 # output warning messages in accordance with the parameter ipar[1] ipar[ 7] = 0 # do not perform the stopping test for the maximum number of iterations: ipar[3] <= ipar[4] ipar[ 8] = 0 # do not perform the residual stopping test: dpar[4] <= dpar[3] ipar[ 9] = 1 # perform the user-defined stopping test by setting RCI_request=2 if precon is None: ipar[ 10] = 0 # run the non-preconditioned version of the FGMRES method else: ipar[10] = 1 # run the preconditioned version of the FGMRES method precon = self.getprecon(precon, **args, **preconargs) ipar[ 11] = 0 # do not perform the automatic test for zero norm of the currently generated vector: dpar[6] <= dpar[7] ipar[ 12] = 1 # update the solution to the vector b according to the computations done by the dfgmres routine ipar[ 13] = 0 # internal iteration counter that counts the number of iterations before the restart takes place; the initial value is 0 ipar[14] = min( restart, len(rhs)) # the number of non-restarted FGMRES iterations dpar = numpy.zeros(128, dtype=numpy.float64) tmp = numpy.zeros((2 * ipar[14] + 1) * ipar[0] + (ipar[14] * (ipar[14] + 9)) // 2 + 1, dtype=numpy.float64) libmkl.dfgmres_check(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes) if rci.value != 0: raise MatrixError('dgmres check failed with error code {}'.format( rci.value)) with log.context('fgmres {:.0f}%', 0, 0) as format: while True: libmkl.dfgmres(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes) if rci.value == 1: # multiply the matrix tmp[ipar[22] - 1:ipar[22] + n.value - 1] = self @ tmp[ipar[21] - 1:ipar[21] + n.value - 1] elif rci.value == 2: # perform the stopping test if dpar[4] < atol: libmkl.dfgmres_get(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes, byref(c_int(0))) if numpy.linalg.norm(self @ b - rhs) < atol: break b[:] = rhs # reset rhs vector for restart format(100 * numpy.log(dpar[2] / dpar[4]) / numpy.log(dpar[2] / atol)) if ipar[3] > maxiter > 0: break elif rci.value == 3: # apply the preconditioner tmp[ipar[22] - 1:ipar[22] + n.value - 1] = precon( tmp[ipar[21] - 1:ipar[21] + n.value - 1]) elif rci.value == 4: # check if the norm of the current orthogonal vector is zero if dpar[6] < ztol: libmkl.dfgmres_get(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes, byref(c_int(0))) if numpy.linalg.norm(self @ b - rhs) < atol: break raise MatrixError('singular matrix') else: raise MatrixError( 'this should not have occurred: rci={}'.format( rci.value)) log.debug('performed {} fgmres iterations, {} restarts'.format( ipar[3], ipar[3] // ipar[14])) return b
def optimize(target: types.strictstr, functional: sample.strictintegral, *, tol: types.strictfloat = 0., arguments: argdict = {}, droptol: float = None, constrain: types.frozenarray = None, lhs0: types.frozenarray[types.strictfloat] = None, relax0: float = 1., linesearch=None, failrelax: types.strictfloat = 1e-6, **kwargs): '''find the minimizer of a given functional Parameters ---------- target : :class:`str` Name of the target: a :class:`nutils.function.Argument` in ``residual``. functional : scalar :class:`nutils.sample.Integral` The functional the should be minimized by varying target tol : :class:`float` Target residual norm. arguments : :class:`collections.abc.Mapping` Defines the values for :class:`nutils.function.Argument` objects in `residual`. The ``target`` should not be present in ``arguments``. Optional. droptol : :class:`float` Threshold for leaving entries in the return value at NaN if they do not contribute to the value of the functional. constrain : :class:`numpy.ndarray` with dtype :class:`float` Defines the fixed entries of the coefficient vector lhs0 : :class:`numpy.ndarray` Coefficient vector, starting point of the iterative procedure. relax0 : :class:`float` Initial relaxation value. linesearch : :class:`nutils.solver.LineSearch` Callable that defines relaxation logic. failrelax : :class:`float` Fail with exception if relaxation reaches this lower limit. Yields ------ :class:`numpy.ndarray` Coefficient vector corresponding to the functional optimum ''' if linesearch is None: linesearch = NormBased.legacy(kwargs) solveargs = _strip(kwargs, 'lin') if kwargs: raise TypeError('unexpected keyword arguments: {}'.format( ', '.join(kwargs))) residual = functional.derivative(target) jacobian = residual.derivative(target) lhs, cons = _parse_lhs_cons(lhs0, constrain, residual.shape) val, res, jac = sample.eval_integrals(functional, residual, jacobian, **{target: lhs}, **arguments) if droptol is not None: nan = ~(cons | jac.rowsupp(droptol)) cons = cons | nan resnorm = numpy.linalg.norm(res[~cons]) if jacobian.contains(target): if tol <= 0: raise ValueError( 'nonlinear optimization problem requires a nonzero "tol" argument' ) solveargs.setdefault('rtol', 1e-3) firstresnorm = resnorm relax = relax0 accept = True with log.context('newton {:.0f}%', 0) as reformat: while not numpy.isfinite(resnorm) or resnorm > tol: if accept: reformat(100 * numpy.log(firstresnorm / resnorm) / numpy.log(firstresnorm / tol)) lhs0 = lhs dlhs = -jac.solve_leniently( res, constrain=cons, **solveargs) res0 = res[~cons] dres0 = ( jac @ dlhs )[~cons] # == -res0 if dlhs was solved to infinite precision resnorm0 = resnorm lhs = lhs0 + relax * dlhs val, res, jac = sample.eval_integrals(functional, residual, jacobian, **{target: lhs}, **arguments) resnorm = numpy.linalg.norm(res[~cons]) scale, accept = linesearch(res0, relax * dres0, res[~cons], relax * (jac @ dlhs)[~cons]) relax = min(relax * scale, 1) if relax <= failrelax: raise SolverError('stuck in local minimum') log.info('converged with residual {:.1e}'.format(resnorm)) elif resnorm > tol: solveargs.setdefault('atol', tol) dlhs = -jac.solve(res, constrain=cons, **solveargs) lhs = lhs + dlhs val += (res + jac @ dlhs / 2).dot(dlhs) if droptol is not None: lhs = numpy.choose(nan, [lhs, numpy.nan]) log.info('constrained {}/{} dofs'.format( len(lhs) - nan.sum(), len(lhs))) log.info('optimum value {:.2e}'.format(val)) return lhs
def integrate(*args, **arguments:argdict): '''Integrate functions. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' self, funcs = args # Functions may consist of several blocks, such as originating from # chaining. Here we make a list of all blocks consisting of triplets of # argument id, evaluable index, and evaluable values. funcs = [function.asarray(func).prepare_eval(ndims=self.ndims) for func in funcs] blocks = [(ifunc, function.Tuple(ind), f.simplified) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func)] block2func, indices, values = zip(*blocks) if blocks else ([],[],[]) log.debug('integrating {} distinct blocks'.format('+'.join( str(block2func.count(ifunc)) for ifunc in range(len(funcs))))) if config.dot: function.Tuple(values).graphviz() # To allocate (shared) memory for all block data we evaluate indexfunc to # build an nblocks x nelems+1 offset array, and nblocks index lists of # length nelems. offsets = numpy.zeros((len(blocks), self.nelems+1), dtype=int) if blocks: sizefunc = function.stack([f.size for ifunc, ind, f in blocks]).simplified for ielem, transforms in enumerate(zip(*self.transforms)): n, = sizefunc.eval(_transforms=transforms, **arguments) offsets[:,ielem+1] = offsets[:,ielem] + n # Since several blocks may belong to the same function, we post process the # offsets to form consecutive intervals in longer arrays. The length of # these arrays is captured in the nfuncs-array nvals. nvals = numpy.zeros(len(funcs), dtype=int) for iblock, ifunc in enumerate(block2func): offsets[iblock] += nvals[ifunc] nvals[ifunc] = offsets[iblock,-1] # The data_index list contains shared memory index and value arrays for # each function argument. nprocs = min(config.nprocs, self.nelems) empty = parallel.shempty if nprocs > 1 else numpy.empty data_index = [ (empty(n, dtype=float), empty((funcs[ifunc].ndim,n), dtype=int)) for ifunc, n in enumerate(nvals) ] # In a second, parallel element loop, valuefunc is evaluated to fill the # data part of data_index using the offsets array for location. Each # element has its own location so no locks are required. The index part of # data_index is filled in the same loop. It does not use valuefunc data but # benefits from parallel speedup. valueindexfunc = function.Tuple(function.Tuple([value]+list(index)) for value, index in zip(values, indices)) ielems = parallel.range(self.nelems) with parallel.fork(nprocs): for ielem in ielems: with log.context('elem', ielem, '({:.0f}%)'.format(100*ielem/self.nelems)): points = self.points[ielem] for iblock, (intdata, *indices) in enumerate(valueindexfunc.eval(_transforms=tuple(t[ielem] for t in self.transforms), _points=points.coords, **arguments)): s = slice(*offsets[iblock,ielem:ielem+2]) data, index = data_index[block2func[iblock]] w_intdata = numeric.dot(points.weights, intdata) data[s] = w_intdata.ravel() si = (slice(None),) + (numpy.newaxis,) * (w_intdata.ndim-1) for idim, (ii,) in enumerate(indices): index[idim,s].reshape(w_intdata.shape)[...] = ii[si] si = si[:-1] retvals = [] for i, func in enumerate(funcs): with log.context('assembling {}/{}'.format(i+1, len(funcs))): retvals.append(matrix.assemble(*data_index[i], shape=func.shape)) return retvals
def optimize(target: types.strictstr, functional: sample.strictintegral, *, tol: types.strictfloat = 0., arguments: argdict = {}, droptol: float = None, constrain: types.frozenarray = None, lhs0: types.frozenarray[types.strictfloat] = None, solveargs: types.frozendict = {}, searchrange: types.tuple[float] = (.01, 2 / 3), rebound: types.strictfloat = 2., failrelax: types.strictfloat = 1e-6, **linargs): '''find the minimizer of a given functional Parameters ---------- target : :class:`str` Name of the target: a :class:`nutils.function.Argument` in ``residual``. functional : scalar :class:`nutils.sample.Integral` The functional the should be minimized by varying target tol : :class:`float` Target residual norm. arguments : :class:`collections.abc.Mapping` Defines the values for :class:`nutils.function.Argument` objects in `residual`. The ``target`` should not be present in ``arguments``. Optional. droptol : :class:`float` Threshold for leaving entries in the return value at NaN if they do not contribute to the value of the functional. constrain : :class:`numpy.ndarray` with dtype :class:`float` Defines the fixed entries of the coefficient vector lhs0 : :class:`numpy.ndarray` Coefficient vector, starting point of the iterative procedure. Yields ------ :class:`numpy.ndarray` Coefficient vector corresponding to the functional optimum ''' if 'newtontol' in linargs: warnings.deprecation( 'argument "newtontol" is deprecated, use "tol" instead') tol = linargs.pop('newtontol') solveargs = _striplin(linargs, solveargs) residual = functional.derivative(target) jacobian = residual.derivative(target) lhs, cons = _parse_lhs_cons(lhs0, constrain, residual.shape) val, res, jac = sample.eval_integrals(functional, residual, jacobian, **{target: lhs}, **arguments) if droptol is not None: nan = ~(cons | jac.rowsupp(droptol)) cons = cons | nan resnorm = numpy.linalg.norm(res[~cons]) if jacobian.contains(target): if tol <= 0: raise ValueError( 'nonlinear optimization problem requires a nonzero "tol" argument' ) solveargs.setdefault('rtol', 1e-3) linesearch = LineSearch(searchrange, rebound, failrelax) firstresnorm = resnorm relax = 1 accept = True with log.context('newton {:.0f}%', 0) as reformat: while resnorm > tol: if accept: reformat(100 * numpy.log(firstresnorm / resnorm) / numpy.log(firstresnorm / tol)) dlhs = -jac.solve_leniently( res, constrain=cons, **solveargs) lhs0 = lhs resnorm0 = resnorm lhs = lhs0 + relax * dlhs val, res, jac = sample.eval_integrals(functional, residual, jacobian, **{target: lhs}, **arguments) resnorm = numpy.linalg.norm(res[~cons]) relax, accept = linesearch( resnorm0**2, -2 * resnorm0**2, resnorm**2, 2 * (jac @ dlhs)[~cons].dot(res[~cons]), relax) log.info('converged with residual {:.1e}'.format(resnorm)) elif resnorm > tol: solveargs.setdefault('atol', tol) dlhs = -jac.solve(res, constrain=cons, **solveargs) lhs = lhs + dlhs val += (res + jac @ dlhs / 2).dot(dlhs) if droptol is not None: lhs = numpy.choose(nan, [lhs, numpy.nan]) log.info('constrained {}/{} dofs'.format( len(lhs) - nan.sum(), len(lhs))) log.info('optimum value {:.2e}'.format(val)) return lhs
def optimize(target, functional: sample.strictintegral, *, tol: types.strictfloat = 0., arguments: argdict = {}, droptol: float = None, constrain: arrayordict = None, lhs0: types.frozenarray[types.strictfloat] = None, relax0: float = 1., linesearch=None, failrelax: types.strictfloat = 1e-6, **kwargs): '''find the minimizer of a given functional Parameters ---------- target : :class:`str` Name of the target: a :class:`nutils.function.Argument` in ``residual``. functional : scalar :class:`nutils.sample.Integral` The functional the should be minimized by varying target tol : :class:`float` Target residual norm. arguments : :class:`collections.abc.Mapping` Defines the values for :class:`nutils.function.Argument` objects in `residual`. The ``target`` should not be present in ``arguments``. Optional. droptol : :class:`float` Threshold for leaving entries in the return value at NaN if they do not contribute to the value of the functional. constrain : :class:`numpy.ndarray` with dtype :class:`float` Defines the fixed entries of the coefficient vector lhs0 : :class:`numpy.ndarray` Coefficient vector, starting point of the iterative procedure. relax0 : :class:`float` Initial relaxation value. linesearch : :class:`nutils.solver.LineSearch` Callable that defines relaxation logic. failrelax : :class:`float` Fail with exception if relaxation reaches this lower limit. Yields ------ :class:`numpy.ndarray` Coefficient vector corresponding to the functional optimum ''' if linesearch is None: linesearch = NormBased.legacy(kwargs) solveargs = _strip(kwargs, 'lin') if kwargs: raise TypeError('unexpected keyword arguments: {}'.format( ', '.join(kwargs))) if any(t not in functional.argshapes for t in target): if not droptol: raise ValueError( 'target {} does not occur in integrand; consider setting droptol>0' .format(', '.join(t for t in target if t not in functional.argshapes))) target = [t for t in target if t in functional.argshapes] if not target: return {} residual = [functional.derivative(t) for t in target] jacobian = _derivative(residual, target) lhs0, constrain = _parse_lhs_cons(lhs0, constrain, target, functional.argshapes, arguments) mask, vmask = _invert(constrain, target) lhs, vlhs = _redict(lhs0, target) val, res, jac = _integrate_blocks(functional, residual, jacobian, arguments=lhs, mask=mask) if droptol is not None: supp = jac.rowsupp(droptol) res = res[supp] jac = jac.submatrix(supp, supp) nan = numpy.zeros_like(vmask) nan[vmask] = ~supp # return value is set to nan if dof is not supported and not constrained vmask[ vmask] = supp # dof is computed if it is supported and not constrained assert vmask.sum() == len(res) resnorm = numpy.linalg.norm(res) if any(jacobian.contains(t) for jacobian in jacobian for t in target): if tol <= 0: raise ValueError( 'nonlinear optimization problem requires a nonzero "tol" argument' ) solveargs.setdefault('rtol', 1e-3) firstresnorm = resnorm relax = relax0 accept = True with log.context('newton {:.0f}%', 0) as reformat: while not numpy.isfinite(resnorm) or resnorm > tol: if accept: reformat(100 * numpy.log(firstresnorm / resnorm) / numpy.log(firstresnorm / tol)) dlhs = -jac.solve_leniently(res, **solveargs) res0 = res dres = jac @ dlhs # == -res0 if dlhs was solved to infinite precision relax0 = 0 vlhs[vmask] += (relax - relax0) * dlhs relax0 = relax # currently applied relaxation val, res, jac = _integrate_blocks(functional, residual, jacobian, arguments=lhs, mask=mask) resnorm = numpy.linalg.norm(res) scale, accept = linesearch(res0, relax * dres, res, relax * (jac @ dlhs)) relax = min(relax * scale, 1) if relax <= failrelax: raise SolverError('stuck in local minimum') log.info('converged with residual {:.1e}'.format(resnorm)) elif resnorm > tol: solveargs.setdefault('atol', tol) dlhs = -jac.solve(res, **solveargs) vlhs[vmask] += dlhs val += (res + jac @ dlhs / 2).dot(dlhs) if droptol is not None: vlhs[nan] = numpy.nan log.info('constrained {}/{} dofs'.format( len(vlhs) - nan.sum(), len(vlhs))) log.info('optimum value {:.2e}'.format(val)) return lhs
def integrate(*args, **arguments: argdict): '''Integrate functions. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' self, funcs = args # Functions may consist of several blocks, such as originating from # chaining. Here we make a list of all blocks consisting of triplets of # argument id, evaluable index, and evaluable values. funcs = [ function.asarray(func).prepare_eval(ndims=self.ndims) for func in funcs ] blocks = [(ifunc, function.Tuple(ind), f.simplified) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func)] block2func, indices, values = zip(*blocks) if blocks else ([], [], []) log.debug('integrating {} distinct blocks'.format('+'.join( str(block2func.count(ifunc)) for ifunc in range(len(funcs))))) if config.dot: function.Tuple(values).graphviz() # To allocate (shared) memory for all block data we evaluate indexfunc to # build an nblocks x nelems+1 offset array, and nblocks index lists of # length nelems. offsets = numpy.zeros((len(blocks), self.nelems + 1), dtype=int) if blocks: sizefunc = function.stack([f.size for ifunc, ind, f in blocks]).simplified for ielem, transforms in enumerate(self.transforms): n, = sizefunc.eval(_transforms=transforms, **arguments) offsets[:, ielem + 1] = offsets[:, ielem] + n # Since several blocks may belong to the same function, we post process the # offsets to form consecutive intervals in longer arrays. The length of # these arrays is captured in the nfuncs-array nvals. nvals = numpy.zeros(len(funcs), dtype=int) for iblock, ifunc in enumerate(block2func): offsets[iblock] += nvals[ifunc] nvals[ifunc] = offsets[iblock, -1] # The data_index list contains shared memory index and value arrays for # each function argument. nprocs = min(config.nprocs, self.nelems) empty = parallel.shempty if nprocs > 1 else numpy.empty data_index = [(empty(n, dtype=float), empty((funcs[ifunc].ndim, n), dtype=int)) for ifunc, n in enumerate(nvals)] # In a second, parallel element loop, valuefunc is evaluated to fill the # data part of data_index using the offsets array for location. Each # element has its own location so no locks are required. The index part of # data_index is filled in the same loop. It does not use valuefunc data but # benefits from parallel speedup. valueindexfunc = function.Tuple( function.Tuple([value] + list(index)) for value, index in zip(values, indices)) ielems = parallel.range(self.nelems) with parallel.fork(nprocs): for ielem in ielems: with log.context('elem', ielem, '({:.0f}%)'.format( 100 * ielem / self.nelems)): points = self.points[ielem] for iblock, (intdata, *indices) in enumerate( valueindexfunc.eval( _transforms=self.transforms[ielem], _points=points.coords, **arguments)): s = slice(*offsets[iblock, ielem:ielem + 2]) data, index = data_index[block2func[iblock]] w_intdata = numeric.dot(points.weights, intdata) data[s] = w_intdata.ravel() si = (slice(None), ) + (numpy.newaxis, ) * (w_intdata.ndim - 1) for idim, (ii, ) in enumerate(indices): index[idim, s].reshape(w_intdata.shape)[...] = ii[si] si = si[:-1] retvals = [] for i, func in enumerate(funcs): with log.context('assembling {}/{}'.format(i + 1, len(funcs))): retvals.append( matrix.assemble(*data_index[i], shape=func.shape)) return retvals