def _traceback(richoutput, postmortem, exit): try: yield except (KeyboardInterrupt, SystemExit, pdb.bdb.BdbQuit): treelog.error('killed by user') if exit: raise SystemExit(1) from None raise except: exc = traceback.TracebackException(*sys.exc_info()) prefix = '' while True: treelog.error(prefix + ''.join(exc.format_exception_only()).rstrip()) treelog.debug('Traceback (most recent call first):\n' + ''.join(reversed(exc.stack.format())).rstrip()) if exc.__cause__ is not None: exc = exc.__cause__ prefix = '.. caused by ' elif exc.__context__ is not None and not exc.__suppress_context__: exc = exc.__context__ prefix = '.. while handling ' else: break if postmortem: print(_mkbox( 'YOUR PROGRAM HAS DIED. The Python debugger', 'allows you to examine its post-mortem state', 'to figure out why this happened. Type "h"', 'for an overview of commands to get going.', richoutput=richoutput)) pdb.post_mortem() if exit: raise SystemExit(2) from None raise else: if exit: raise SystemExit(0)
def generate(self): treelog.user('my message') with treelog.infofile('test.dat', 'w') as f: f.write('test1') with treelog.context('my context'): with treelog.iter.plain('iter', 'abc') as items: for c in items: treelog.info(c) with treelog.context('empty'): pass treelog.error('multiple..\n ..lines') with treelog.userfile('test.dat', 'wb') as f: treelog.info('generating') f.write(b'test2') self.generate_test() with treelog.context('context step={}', 0) as format: treelog.info('foo') format(1) treelog.info('bar') with treelog.errorfile('same.dat', 'wb') as f: f.write(b'test3') with treelog.debugfile('dbg.dat', 'wb') as f: f.write(b'test4') treelog.debug('dbg') treelog.warning('warn')
def evaluate_context(self, context, verbose=True, allowed_missing=()): evaluator = SimpleEval( functions={ **DEFAULT_FUNCTIONS, 'log': np.log, 'log2': np.log2, 'log10': np.log10, 'sqrt': np.sqrt, 'abs': np.abs, 'ord': ord, }) evaluator.names.update(context) evaluator.names.update(self._constants) allowed_missing = set(allowed_missing) for name, code in self._evaluables.items(): try: result = evaluator.eval(code) if isinstance(code, str) else code except NameNotDefined as error: if error.name in allowed_missing: allowed_missing.add(name) log.debug(f'Skipped evaluating: {name}') continue else: raise if verbose: log.debug(f'Evaluated: {name} = {repr(result)}') evaluator.names[name] = context[name] = result
def copy(self, context, sourcepath, targetpath, sourcename='SRC', targetname='TGT', ignore_missing=False): for source, target in self.iter_paths(context, sourcepath, targetpath): logsrc = Path(sourcename) / source.relative_to(sourcepath) logtgt = Path(targetname) / target.relative_to(targetpath) if not sourcepath.exists(): level = log.warning if ignore_missing else log.error level(f"Missing file: {logsrc}") if not ignore_missing: return else: log.debug(logsrc, '->', logtgt) target.parent.mkdir(parents=True, exist_ok=True) if not self.template: shutil.copyfile(source, target) continue with open(source, 'r') as f: text = f.read() with open(target, 'w') as f: f.write(render(text, context))
def discover_fields(reader: Reader) -> Tuple[List[Field], List[Field]]: geometries, fields = [], [] for field in reader.fields(): if field.is_geometry: geometries.append(field) continue if config.field_filter is not None and field.name.lower( ) not in config.field_filter: continue fields.append(field) for field in fields: log.debug( f"Discovered field '{field.name}' with {field.ncomps} component(s)" ) fields = sorted(fields, key=attrgetter('name')) fields = sorted(fields, key=attrgetter('cells')) fields = list(discover_decompositions(fields)) for field in geometries: log.debug( f"Discovered geometry '{field.name}' with coordinates {field.coords}" ) return geometries, fields
def setdefault(self, data: Array2D, oldkey: PatchKey) -> PatchKey: if oldkey in self.ids: return self.ids[oldkey] bbox = bounding_box(data) _, seq, *_ = oldkey try: newkey = self.bboxes[bbox] log.debug( f"Patch {oldkey} identified with {newkey} by bounding box") except KeyError: if config.strict_id: newkey = oldkey else: try: newkey = self.seqs[seq] log.debug( f"Patch {oldkey} identified with {newkey} by sequence number" ) except KeyError: newkey = oldkey self.ids[oldkey] = newkey self.bboxes[bbox] = newkey self.seqs[seq] = newkey return newkey
def _solver_arnoldi(self, rhs, atol, precon='direct', truncate=None): solve = self.getprecon(precon) lhs = numpy.zeros_like(rhs) res = rhs resnorm = numpy.linalg.norm(res, axis=0).max() krylov = collections.deque( maxlen=truncate) # unlimited if truncate is None while resnorm > atol: k = solve(res) v = self @ k # In the following we use sum rather than dot for slightly higher accuracy due to partial # pairwise summation, see https://numpy.org/doc/stable/reference/generated/numpy.sum.html for k_, v_, v2_ in krylov: # orthogonolize v (modified Gramm-Schmidt) c = numpy.multiply(v, v_, order='F').sum(0) / v2_ k -= k_ * c v -= v_ * c v2 = numpy.square(v, order='F').sum(0) c = numpy.multiply( v, res, order='F').sum(0) / v2 # min_c |res - c v| => c = res.v / v.v newlhs = lhs + k * c res = rhs - self @ newlhs # recompute rather than update to avoid drift newresnorm = numpy.linalg.norm(res, axis=0).max() if not numpy.isfinite(newresnorm) or newresnorm >= resnorm: break treelog.debug( 'residual decreased by {:.1f} orders using {} krylov vectors'. format(numpy.log10(resnorm / newresnorm), len(krylov))) lhs = newlhs resnorm = newresnorm krylov.append((k, v, v2)) return lhs
def __init__(self, mtype, a, ia, ja, verbose=False, iparm={}): self.pt = numpy.zeros(64, numpy.int64) # handle to data structure self.maxfct = c_int(1) self.mnum = c_int(1) self.mtype = c_int(mtype) self.n = c_int(len(ia) - 1) self.a = a.ctypes self.ia = ia.ctypes self.ja = ja.ctypes self.perm = None self.iparm = numpy.zeros( 64, dtype=numpy.int32 ) # https://software.intel.com/en-us/mkl-developer-reference-c-pardiso-iparm-parameter self.msglvl = c_int(verbose) libmkl.pardisoinit(self.pt.ctypes, byref( self.mtype), self.iparm.ctypes) # initialize iparm based on mtype if self.iparm[0] != 1: raise MatrixError('pardiso init failed') for n, v in iparm.items(): self.iparm[n] = v self.iparm[27] = 0 # double precision data self.iparm[34] = 0 # one-based indexing self.iparm[36] = 0 # csr matrix format self._phase(12) # analysis, numerical factorization log.debug('peak memory use {:,d}k'.format( max(self.iparm[14], self.iparm[15] + self.iparm[16])))
def update(self, key: PatchKey, data: Array2D) -> int: try: patchid = self.id_by_key(key) except KeyError: patchid = len(self.patch_keys) log.debug(f"New unique patch detected {key}, assigned ID {patchid}") self.patch_keys[key] = patchid return patchid
def wrapper(*args, **kwargs): if _cache.value is None: return func(*args, **kwargs) args, kwargs = canonicalize(*args, **kwargs) # Hash the function key and the canonicalized arguments and compute the # hexdigest. This is used to identify cache file `cachefile`. h = hashlib.sha1(func_key) for arg in args: h.update(types.nutils_hash(arg)) for hkv in sorted( hashlib.sha1(k.encode()).digest() + types.nutils_hash(v) for k, v in kwargs.items()): h.update(hkv) hkey = h.hexdigest() cachefile = _cache.value / hkey # Open and lock `cachefile`. Try to read it and, if successful, unlock # the file (implicitly by closing the file) and return the value. If # reading fails, e.g. because the file did not exist, call `func`, store # the result, unlock and return. While not necessary per se, we lock the # file immediately to avoid checking twice if there is a cached value: once # before locking the file, and once after locking, at which point another # party may have written something to the cache already. cachefile.parent.mkdir(parents=True, exist_ok=True) cachefile.touch() with cachefile.open('r+b') as f: log.debug('[cache.function {}] acquiring lock'.format(hkey)) _lock_file(f) log.debug('[cache.function {}] lock acquired'.format(hkey)) try: data = pickle.load(f) if len(data) == 3: # For old caches. log_, fail, value = data if fail: raise pickle.UnpicklingError else: value, log_ = data except (EOFError, pickle.UnpicklingError, IndexError): log.debug( '[cache.function {}] failed to load, cache will be rewritten' .format(hkey)) pass else: log.debug('[cache.function {}] load'.format(hkey)) log_.replay() return value # Seek back to the beginning, because pickle might have read garbage. f.seek(0) # Disable the cache temporarily to prevent caching subresults *in* `func`. log_ = log.RecordLog() with disable(), log.add(log_): value = func(*args, **kwargs) pickle.dump((value, log_), f) log.debug('[cache.function {}] store'.format(hkey)) return value
def find_applicable(fmt: str) -> type: """Return a writer subclass that can handle the given format.""" for cls in subclasses(Writer, invert=True): if isabstract(cls): continue if cls.applicable(fmt): log.info(f"Using writer: {cls.writer_name}") return cls else: log.debug(f"Rejecting writer: {cls.writer_name}") raise TypeError(f"Unable to find any applicable writers for {fmt}")
def find_applicable(filename: Path) -> type: """Return a reader subclass that can handle files of the given type.""" for cls in subclasses(Reader, invert=True): if isabstract(cls): continue if cls.applicable(filename): log.info(f"Using reader: {cls.reader_name}") return cls else: log.debug(f"Rejecting reader: {cls.reader_name}") raise TypeError( f"Unable to find any applicable readers for {filename}")
def _solver_fgmres(self, rhs, atol, maxiter=0, restart=150, precon=None, ztol=1e-12, preconargs={}, **args): rci = c_int(0) n = c_int(len(rhs)) b = numpy.array(rhs, dtype=numpy.float64, copy=False) x = numpy.zeros_like(b) N = min(restart, len(rhs)) ipar = numpy.empty(128, dtype=numpy.int32) dpar = numpy.empty(128, dtype=numpy.float64) tmp = numpy.empty((2*N+1)*len(rhs)+(N*(N+9))//2+1, dtype=numpy.float64) dfgmres_args = byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes itercount = c_int(0) libmkl.dfgmres_init(*dfgmres_args) ipar[7] = 0 # do not perform the stopping test for the maximum number of iterations ipar[8] = 0 # do not perform the residual stopping test ipar[9] = 1 # perform the user-defined stopping test by setting RCI_request=2 if precon is not None: ipar[10] = 1 # run the preconditioned version of the FGMRES method precon = self.getprecon(precon, **args, **preconargs) ipar[11] = 0 # do not perform the automatic test for zero norm of the currently generated vector ipar[12] = 0 # update the solution to the vector x according to the computations done by the dfgmres routine ipar[14] = N # the number of non-restarted FGMRES iterations libmkl.dfgmres_check(*dfgmres_args) if rci.value in (-1001, -1010, -1011): warnings.warn('dgmres ' + ' and '.join(['wrote some warnings to stdout', 'changed some parameters to make them consistent or correct'][1 if rci.value==-1010 else 0:1 if rci.value==-1001 else 2])) elif rci.value != 0: raise MatrixError('dgmres check failed with error code {}'.format(rci.value)) with log.context('fgmres {:.0f}%', 0, 0) as format: while True: libmkl.dfgmres(*dfgmres_args) if rci.value == 1: # multiply the matrix tmp[ipar[22]-1:ipar[22]+n.value-1] = self @ tmp[ipar[21]-1:ipar[21]+n.value-1] elif rci.value == 2: # perform the stopping test if dpar[4] < atol: libmkl.dfgmres_get(*dfgmres_args, byref(itercount)) if numpy.linalg.norm(self @ x - b) < atol: break format(100 * numpy.log(dpar[2]/dpar[4]) / numpy.log(dpar[2]/atol)) if ipar[3] > maxiter > 0: break elif rci.value == 3: # apply the preconditioner tmp[ipar[22]-1:ipar[22]+n.value-1] = precon(tmp[ipar[21]-1:ipar[21]+n.value-1]) elif rci.value == 4: # check if the norm of the current orthogonal vector is zero if dpar[6] < ztol: libmkl.dfgmres_get(*dfgmres_args, byref(itercount)) if numpy.linalg.norm(self @ x - b) < atol: break raise MatrixError('singular matrix') else: raise MatrixError('this should not have occurred: rci={}'.format(rci.value)) log.debug('performed {} fgmres iterations, {} restarts'.format(ipar[3], ipar[3]//ipar[14])) return x
def run_single(self, num, index, namespace): log.user(', '.join(f'{k}={repr(v)}' for k, v in namespace.items())) self.evaluate_context(namespace) namespace['_index'] = num collector = ResultCollector(self._types) for key, value in namespace.items(): collector.collect(key, value) namespace.update(self._constants) with TemporaryDirectory() as workpath: workpath = Path(workpath) if self._logdir: logdir = self.storagepath / render(self._logdir, namespace) logdir.mkdir(parents=True, exist_ok=True) else: logdir = None log.debug( f"Using SRC='{self.sourcepath}', WRK='{workpath}', LOG='{logdir}'" ) for filemap in self._pre_files: filemap.copy(namespace, self.sourcepath, workpath, sourcename='SRC', targetname='WRK') success = True for command in self._commands: if not command.run(collector, namespace, workpath, logdir): self.commit_result(index, collector) success = False break if logdir: for filemap in self._post_files: filemap.copy(namespace, workpath, logdir, sourcename='WRK', targetname='LOG', ignore_missing=not success) self.commit_result(index, collector) return success
def fields(self) -> Iterable[Field]: for fld in self.src.fields(): if fld.is_geometry: try: converter = graph.path(fld.coords, self.target) path = ' -> '.join( str(k) for k in [fld.coords, *converter.path[1:-1], self.target]) log.debug(f"Coordinate conversion path: {path}") yield CoordinateTransformGeometryField(fld, self) except CoordinateConversionError: log.warning( f"Skipping {fld.name}: {fld.coords} not convertable to {self.target}" ) continue else: yield CoordinateTransformField(fld, self)
def _returnmap(self, Δε, εe0, κ0): σ0 = numpy.einsum('ijkl,kl->ij', self.C, εe0) Δσ = numpy.einsum('ijkl,kl->ij', self.C, Δε) Δλ = 0 with treelog.iter.fraction('rmap', range(self.maxiter)) as counter: for iiter in counter: n = dF(σ0+Δσ) dn = d2F(σ0+Δσ) κ = κ0 + Δλ*numpy.sqrt(numpy.einsum('ij,ij', n, n)) rσ = Δσ - numpy.einsum('ijkl,kl->ij', self.C, Δε-Δλ*n) rF = F(σ0+Δσ) - numpy.sqrt(2/3) * (self.σyield + self.h*κ) b = numpy.empty(len(self.P)+1) b[:-1] = numpy.einsum('ijk,jk', self.P, rσ) b[-1] = rF error = numpy.linalg.norm(b) / self.σyield treelog.debug('rmap residual = {}'.format(error)) if error < self.rtol: break drσdσ = self.Isym + Δλ * numpy.einsum('ijkl,klmn->ijmn', self.C, dn) drσdλ = numpy.einsum('ijkl,kl->ij', self.C, n) drFdσ = n drFdλ = - numpy.sqrt(2*numpy.einsum('ij,ij', n, n)/3) *self.h A = numpy.empty((len(self.P)+1,)*2) A[:-1,:-1] = numpy.einsum('ikl,klmn,jmn->ij', self.P, drσdσ, self.P) A[-1,:-1] = numpy.einsum('mn,jmn->j', drFdσ, self.P) A[:-1,-1] = numpy.einsum('ikl,kl->i', self.P, drσdλ) A[-1,-1] = drFdλ x = -numpy.linalg.solve(A,b) Δσ += numpy.einsum('ijk,i->jk', self.P, x[:-1]) Δλ += x[-1] else: raise RuntimeError('Return mapping solver did not converge') return Δλ * n
def run(self, collector: 'ResultCollector', context: Dict, workpath: Path, logdir: Path) -> bool: kwargs = { 'cwd': workpath, 'capture_output': True, 'shell': False, } if isinstance(self._command, str): kwargs['shell'] = True command = render(self._command, context, mode='shell') else: command = [render(arg, context) for arg in self._command] with log.context(self.name): log.debug( command if isinstance(command, str) else ' '.join(command)) with time() as duration: result = subprocess.run(command, **kwargs) duration = duration() if logdir: stdout_path = logdir / f'{self.name}.stdout' with open(stdout_path, 'wb') as f: f.write(result.stdout) stderr_path = logdir / f'{self.name}.stderr' with open(stderr_path, 'wb') as f: f.write(result.stderr) stdout = result.stdout.decode() for capture in self._capture: capture.find_in(collector, stdout) if self._capture_walltime: collector.collect(f'walltime/{self.name}', duration) if result.returncode: log.error(f"Command returned exit status {result.returncode}") if logdir: log.error(f"stdout stored in {stdout_path}") log.error(f"stderr stored in {stderr_path}") return False else: log.info(f"Success ({duration:.3g}s)") return True
def wrapped(self, rhs, atol, **kwargs): lhs = solve(self, rhs, **kwargs) res = rhs - self @ lhs resnorm = numpy.linalg.norm(res) if not numpy.isfinite(resnorm) or resnorm <= atol: return lhs with log.iter.plain('refinement iteration', itertools.count(start=1)) as count: for iiter in count: newlhs = lhs + solve(self, res, **kwargs) newres = rhs - self @ newlhs newresnorm = numpy.linalg.norm(newres) if not numpy.isfinite(resnorm) or newresnorm >= resnorm: log.debug('residual increased to {:.0e} (discarding)'.format(newresnorm)) return lhs log.debug('residual decreased to {:.0e}'.format(newresnorm)) lhs, res, resnorm = newlhs, newres, newresnorm if resnorm <= atol: return lhs
def init_bases(self, basisnames: Optional[Set[str]] = None, constructor: type = StandardBasis): """Populate the contents of self.bases. To allow easier subclassing, the two keyword arguments allows a subclass to override which bases and which basis class to use. """ if basisnames is None: basisnames = set(chain.from_iterable(self.h5.values())) # Construct Basis objects for each discovered basis name for basisname in basisnames: self.bases[basisname] = constructor(basisname, self) # Delete timeinfo, if present if 'timeinfo' in self.bases: del self.bases['timeinfo'] # Delete bases that don't have any patch data to_del = [ name for name, basis in self.bases.items() if basis.num_updates == 0 ] for basisname in to_del: log.debug(f"Removing basis {basisname}: no updates") del self.bases[basisname] # Delete the bases we don't need if config.only_bases: keep = {b.lower() for b in config.only_bases} | {config.coords.name.lower()} self.bases = { name: basis for name, basis in self.bases.items() if name.lower() in keep } # Debug output for basis in self.bases.values(): log.debug( f"Basis {basis.name} updates at {basis.num_updates} step(s) with {basis.npatches} patch(es)" )
def _solver_direct(self, rhs, atol, precon='direct', history=0): solve = self.getprecon(precon) k = solve(rhs) v = self @ k v2 = numpy.square(v, order='F').sum( 0 ) # use sum rather than dot for higher accuracy due to pairwise summation c = numpy.multiply( v, rhs, order='F').sum(0) / v2 # min_c |rhs - c v| => c = rhs.v / v.v lhs = k * c res = rhs - self @ lhs resnorm = numpy.linalg.norm(res, axis=0).max() if not numpy.isfinite(resnorm) or resnorm <= atol: return lhs history = collections.deque(maxlen=history) with treelog.iter.plain('refinement iteration', itertools.count(start=1)) as count: for iiter in count: history.append((k, v, v2)) k = solve(res) v = self @ k for k_, v_, v2_ in history: # orthogonolize v (modified Gramm-Schmidt) c = numpy.multiply(v, v_, order='F').sum(0) / v2_ k -= k_ * c v -= v_ * c v2 = numpy.square(v, order='F').sum(0) c = numpy.multiply(v, res, order='F').sum( 0) / v2 # min_c |res - c v| => c = res.v / v.v newlhs = k * c newlhs += lhs res = rhs - self @ newlhs # recompute rather than update to avoid drift newresnorm = numpy.linalg.norm(res, axis=0).max() if not numpy.isfinite(resnorm) or newresnorm >= resnorm: treelog.debug( 'residual increased to {:.0e} (discarding)'.format( resnorm)) return lhs lhs = newlhs resnorm = newresnorm treelog.debug('residual decreased to {:.0e}'.format(resnorm)) if resnorm <= atol: return lhs
def pipeline(reader: Source, writer: Writer): """Main driver for moving data from reader to writer.""" # TODO: Streamline filter application if config.only_final_timestep: reader = LastStepFilter(reader) elif config.timestep_slice is not None: reader = StepSliceFilter(reader, *map(int, config.timestep_slice.split(':'))) reader = TesselatorFilter(reader) if writer.writer_name != 'VTF': reader = MergeTopologiesFilter(reader) reader = CoordinateTransformFilter(reader, config.coords) geometries, fields = discover_fields(reader) if not geometries: raise ValueError(f"Unable to find any useful geometry fields") geometry = geometries[0] log.debug(f"Using '{geometry.name}' as geometry input") first = True for stepid, stepdata in log.iter.plain('Step', reader.steps()): with writer.step(stepdata) as step: with step.geometry(geometry) as geom: for patch, data in geometry.patches(stepid, force=first): geom(patch, data) for field in fields: with step.field(field) as fld: for patch, data in field.patches(stepid, force=first, coords=geometry.coords): fld(patch, data) first = False
def solve_direct(self, rhs): log.debug('solving system using MKL Pardiso') if self._factors: log.debug('reusing existing factorization') pardiso, iparm, mtype = self._factors phase = 33 # solve, iterative refinement else: pardiso = Pardiso(self.libmkl) iparm = numpy.zeros( 64, dtype=numpy.int32 ) # https://software.intel.com/en-us/mkl-developer-reference-c-pardiso-iparm-parameter iparm[0] = 1 # supply all values in components iparm[1:64] iparm[ 1] = 2 # fill-in reducing ordering for the input matrix: nested dissection algorithm from the METIS package iparm[ 9] = 13 # pivoting perturbation threshold 1e-13 (default for nonsymmetric) iparm[10] = 1 # enable scaling vectors (default for nonsymmetric) iparm[ 12] = 1 # enable improved accuracy using (non-) symmetric weighted matching (default for nonsymmetric) iparm[34] = 0 # one-based indexing mtype = 11 # real and nonsymmetric phase = 13 # analysis, numerical factorization, solve, iterative refinement self._factors = pardiso, iparm, mtype rhsflat = numpy.ascontiguousarray(rhs.reshape(rhs.shape[0], -1).T, dtype=numpy.float64) lhsflat = numpy.empty((rhsflat.shape[0], self.shape[1]), dtype=numpy.float64) pardiso(phase=phase, mtype=mtype, iparm=iparm, n=self.shape[0], nrhs=rhsflat.shape[0], b=rhsflat, x=lhsflat, a=self.data, ia=self.rowptr, ja=self.colidx) log.debug( 'solver returned after {} refinement steps; peak memory use {:,d}k' .format(iparm[6], max(iparm[14], iparm[15] + iparm[16]))) return lhsflat.T.reshape(lhsflat.shape[1:] + rhs.shape[1:])
def integrate(*args, **arguments: argdict): '''Integrate functions. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' self, funcs = args # Functions may consist of several blocks, such as originating from # chaining. Here we make a list of all blocks consisting of triplets of # argument id, evaluable index, and evaluable values. funcs = [ function.asarray(func).prepare_eval(ndims=self.ndims) for func in funcs ] blocks = [(ifunc, function.Tuple(ind), f.simplified) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func)] block2func, indices, values = zip(*blocks) if blocks else ([], [], []) log.debug('integrating {} distinct blocks'.format('+'.join( str(block2func.count(ifunc)) for ifunc in range(len(funcs))))) if config.dot: function.Tuple(values).graphviz() # To allocate (shared) memory for all block data we evaluate indexfunc to # build an nblocks x nelems+1 offset array, and nblocks index lists of # length nelems. offsets = numpy.zeros((len(blocks), self.nelems + 1), dtype=int) if blocks: sizefunc = function.stack([f.size for ifunc, ind, f in blocks]).simplified for ielem, transforms in enumerate(self.transforms): n, = sizefunc.eval(_transforms=transforms, **arguments) offsets[:, ielem + 1] = offsets[:, ielem] + n # Since several blocks may belong to the same function, we post process the # offsets to form consecutive intervals in longer arrays. The length of # these arrays is captured in the nfuncs-array nvals. nvals = numpy.zeros(len(funcs), dtype=int) for iblock, ifunc in enumerate(block2func): offsets[iblock] += nvals[ifunc] nvals[ifunc] = offsets[iblock, -1] # The data_index list contains shared memory index and value arrays for # each function argument. nprocs = min(config.nprocs, self.nelems) empty = parallel.shempty if nprocs > 1 else numpy.empty data_index = [(empty(n, dtype=float), empty((funcs[ifunc].ndim, n), dtype=int)) for ifunc, n in enumerate(nvals)] # In a second, parallel element loop, valuefunc is evaluated to fill the # data part of data_index using the offsets array for location. Each # element has its own location so no locks are required. The index part of # data_index is filled in the same loop. It does not use valuefunc data but # benefits from parallel speedup. valueindexfunc = function.Tuple( function.Tuple([value] + list(index)) for value, index in zip(values, indices)) ielems = parallel.range(self.nelems) with parallel.fork(nprocs): for ielem in ielems: with log.context('elem', ielem, '({:.0f}%)'.format( 100 * ielem / self.nelems)): points = self.points[ielem] for iblock, (intdata, *indices) in enumerate( valueindexfunc.eval( _transforms=self.transforms[ielem], _points=points.coords, **arguments)): s = slice(*offsets[iblock, ielem:ielem + 2]) data, index = data_index[block2func[iblock]] w_intdata = numeric.dot(points.weights, intdata) data[s] = w_intdata.ravel() si = (slice(None), ) + (numpy.newaxis, ) * (w_intdata.ndim - 1) for idim, (ii, ) in enumerate(indices): index[idim, s].reshape(w_intdata.shape)[...] = ii[si] si = si[:-1] retvals = [] for i, func in enumerate(funcs): with log.context('assembling {}/{}'.format(i + 1, len(funcs))): retvals.append( matrix.assemble(*data_index[i], shape=func.shape)) return retvals
return iter(title, builtins.range(*args)) def enumerate(title, iterable): warnings.deprecation('log.enumerate is deprecated; use log.iter.percentage instead') return iter(title, builtins.enumerate(iterable), length=_len(iterable)) def zip(title, *iterables): warnings.deprecation('log.zip is deprecated; use log.iter.percentage instead') return iter(title, builtins.zip(*iterables), length=min(map(_len, iterables))) def count(title, start=0, step=1): warnings.deprecation('log.count is deprecated; use log.iter.percentage instead') return iter(title, itertools.count(start, step)) if distutils.version.StrictVersion(treelog.version) >= distutils.version.StrictVersion('1.0b5'): from treelog import debug, info, user, warning, error, debugfile, infofile, userfile, warningfile, errorfile, context else: debug = lambda *args, **kwargs: treelog.debug(*args, **kwargs) info = lambda *args, **kwargs: treelog.info(*args, **kwargs) user = lambda *args, **kwargs: treelog.user(*args, **kwargs) warning = lambda *args, **kwargs: treelog.warning(*args, **kwargs) error = lambda *args, **kwargs: treelog.error(*args, **kwargs) debugfile = lambda *args, **kwargs: treelog.debugfile(*args, **kwargs) infofile = lambda *args, **kwargs: treelog.infofile(*args, **kwargs) userfile = lambda *args, **kwargs: treelog.userfile(*args, **kwargs) warningfile = lambda *args, **kwargs: treelog.warningfile(*args, **kwargs) errorfile = lambda *args, **kwargs: treelog.errorfile(*args, **kwargs) context = lambda *args, **kwargs: treelog.context(title, *initargs, **initkwargs) # vim:sw=2:sts=2:et
def discover_decompositions(fields: List[Field]) -> Iterable[Field]: for field in fields: yield field for subfield in field.decompositions(): log.debug(f"Discovered decomposed scalar field '{subfield.name}'") yield subfield
def integrate_sparse( self, funcs: types.tuple[function.asarray], arguments: types.frozendict[str, types.frozenarray] = None): '''Integrate functions into sparse data. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' if arguments is None: arguments = {} # Functions may consist of several blocks, such as originating from # chaining. Here we make a list of all blocks consisting of triplets of # argument id, evaluable index, and evaluable values. funcs = self._prepare_funcs(funcs) blocks = [(ifunc, function.Tuple(ind).optimized_for_numpy, f.optimized_for_numpy) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func)] block2func, indices, values = zip(*blocks) if blocks else ([], [], []) log.debug('integrating {} distinct blocks'.format('+'.join( str(block2func.count(ifunc)) for ifunc in range(len(funcs))))) # To allocate (shared) memory for all block data we evaluate indexfunc to # build an nblocks x nelems+1 offset array. In the first step the block # sizes are evaluated. offsets = numpy.empty((len(blocks), self.nelems + 1), dtype=numpy.uint64) sizefunc = function.Tuple([f.size for ifunc, ind, f in blocks ]).optimized_for_numpy for ielem, transforms in enumerate(zip(*self.transforms)): offsets[:, ielem + 1] = sizefunc.eval(_transforms=transforms, **arguments) # In the second step the block sizes are accumulated to form offsets. Since # several blocks may belong to the same function, we post process the # offsets to form consecutive intervals in longer arrays. The length of # these arrays is captured in the nvals array. nvals = numpy.zeros(len(funcs), dtype=numpy.uint64) for iblock, ifunc in enumerate(block2func): v = offsets[iblock] v[0] = nvals[ifunc] numpy.cumsum(v, out=v) # in place accumulation assert (v[1:] >= v[:-1]).all(), 'integer overflow' nvals[ifunc] = v[-1] # In a second, parallel element loop, value and index are evaluated and # stored in shared memory using the offsets array for location. Each # element has its own location so no locks are required. datas = [ parallel.shempty(n, dtype=sparse.dtype(funcs[ifunc].shape)) for ifunc, n in enumerate(nvals) ] trailingdims = [ numpy.cumsum([0] + [ind.ndim for ind in index[:0:-1]])[::-1] for index in indices ] # prepare index reshapes with function.Tuple(function.Tuple([value, *index]) for value, index in zip(values, indices)).session(graphviz) as eval, \ parallel.ctxrange('integrating', self.nelems) as ielems: for ielem in ielems: points = self.points[ielem] for iblock, (intdata, *indices) in enumerate( eval(_transforms=tuple(t[ielem] for t in self.transforms), _points=points.coords, **arguments)): data = datas[block2func[iblock]][offsets[ iblock, ielem]:offsets[iblock, ielem + 1]].reshape(intdata.shape[1:]) numpy.einsum('p,p...->...', points.weights, intdata, out=data['value']) td = trailingdims[iblock] for idim, ii in enumerate(indices): data['index']['i' + str(idim)] = ii.reshape( ii.shape[1:] + (1, ) * td[idim] ) # note: this could be implemented using newaxis, but reshape appears to be faster return datas
def integrate_sparse( self, funcs: types.tuple[function.asarray], arguments: types.frozendict[str, types.frozenarray] = None): '''Integrate functions into sparse data. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' if arguments is None: arguments = {} # Functions may consist of several blocks, such as originating from # chaining. Here we make a list of all blocks consisting of triplets of # argument id, evaluable index, and evaluable values. funcs = self._prepare_funcs(funcs) blocks = [(ifunc, function.Tuple(ind), f.simplified.optimized_for_numpy) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func)] block2func, indices, values = zip(*blocks) if blocks else ([], [], []) log.debug('integrating {} distinct blocks'.format('+'.join( str(block2func.count(ifunc)) for ifunc in range(len(funcs))))) if graphviz: function.Tuple(values).graphviz(graphviz) # To allocate (shared) memory for all block data we evaluate indexfunc to # build an nblocks x nelems+1 offset array, and nblocks index lists of # length nelems. offsets = numpy.zeros((len(blocks), self.nelems + 1), dtype=int) if blocks: sizefunc = function.stack([f.size for ifunc, ind, f in blocks]).simplified for ielem, transforms in enumerate(zip(*self.transforms)): n, = sizefunc.eval(_transforms=transforms, **arguments) offsets[:, ielem + 1] = offsets[:, ielem] + n # Since several blocks may belong to the same function, we post process the # offsets to form consecutive intervals in longer arrays. The length of # these arrays is captured in the nfuncs-array nvals. nvals = numpy.zeros(len(funcs), dtype=int) for iblock, ifunc in enumerate(block2func): offsets[iblock] += nvals[ifunc] nvals[ifunc] = offsets[iblock, -1] # In a second, parallel element loop, value and index are evaluated and # stored in shared memory using the offsets array for location. Each # element has its own location so no locks are required. datas = [ parallel.shempty(n, dtype=sparse.dtype(funcs[ifunc].shape)) for ifunc, n in enumerate(nvals) ] valueindexfunc = function.Tuple( function.Tuple([value] + list(index)) for value, index in zip(values, indices)) with parallel.ctxrange('integrating', self.nelems) as ielems: for ielem in ielems: points = self.points[ielem] for iblock, (intdata, *indices) in enumerate( valueindexfunc.eval(_transforms=tuple( t[ielem] for t in self.transforms), _points=points.coords, **arguments)): data = datas[block2func[iblock]][offsets[ iblock, ielem]:offsets[iblock, ielem + 1]].reshape(intdata.shape[1:]) numpy.einsum('p,p...->...', points.weights, intdata, out=data['value']) for idim, ii in enumerate(indices): data['index']['i' + str(idim)] = ii.reshape( [-1] + [1] * (data.ndim - 1 - idim)) return datas
def _solver_fgmres(self, rhs, atol, maxiter=0, restart=150, precon=None, ztol=1e-12, preconargs={}, **args): rci = c_int(0) n = c_int(len(rhs)) b = numpy.array(rhs, dtype=numpy.float64) x = numpy.zeros_like(b) ipar = numpy.zeros(128, dtype=numpy.int32) ipar[0] = len(rhs) # problem size ipar[1] = 6 # output on screen ipar[ 2] = 1 # current stage of the RCI FGMRES computations; the initial value is 1 ipar[3] = 0 # current iteration number; the initial value is 0 ipar[4] = 0 # maximum number of iterations ipar[ 5] = 1 # output error messages in accordance with the parameter ipar[1] ipar[ 6] = 1 # output warning messages in accordance with the parameter ipar[1] ipar[ 7] = 0 # do not perform the stopping test for the maximum number of iterations: ipar[3] <= ipar[4] ipar[ 8] = 0 # do not perform the residual stopping test: dpar[4] <= dpar[3] ipar[ 9] = 1 # perform the user-defined stopping test by setting RCI_request=2 if precon is None: ipar[ 10] = 0 # run the non-preconditioned version of the FGMRES method else: ipar[10] = 1 # run the preconditioned version of the FGMRES method precon = self.getprecon(precon, **args, **preconargs) ipar[ 11] = 0 # do not perform the automatic test for zero norm of the currently generated vector: dpar[6] <= dpar[7] ipar[ 12] = 1 # update the solution to the vector b according to the computations done by the dfgmres routine ipar[ 13] = 0 # internal iteration counter that counts the number of iterations before the restart takes place; the initial value is 0 ipar[14] = min( restart, len(rhs)) # the number of non-restarted FGMRES iterations dpar = numpy.zeros(128, dtype=numpy.float64) tmp = numpy.zeros((2 * ipar[14] + 1) * ipar[0] + (ipar[14] * (ipar[14] + 9)) // 2 + 1, dtype=numpy.float64) libmkl.dfgmres_check(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes) if rci.value != 0: raise MatrixError('dgmres check failed with error code {}'.format( rci.value)) with log.context('fgmres {:.0f}%', 0, 0) as format: while True: libmkl.dfgmres(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes) if rci.value == 1: # multiply the matrix tmp[ipar[22] - 1:ipar[22] + n.value - 1] = self @ tmp[ipar[21] - 1:ipar[21] + n.value - 1] elif rci.value == 2: # perform the stopping test if dpar[4] < atol: libmkl.dfgmres_get(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes, byref(c_int(0))) if numpy.linalg.norm(self @ b - rhs) < atol: break b[:] = rhs # reset rhs vector for restart format(100 * numpy.log(dpar[2] / dpar[4]) / numpy.log(dpar[2] / atol)) if ipar[3] > maxiter > 0: break elif rci.value == 3: # apply the preconditioner tmp[ipar[22] - 1:ipar[22] + n.value - 1] = precon( tmp[ipar[21] - 1:ipar[21] + n.value - 1]) elif rci.value == 4: # check if the norm of the current orthogonal vector is zero if dpar[6] < ztol: libmkl.dfgmres_get(byref(n), x.ctypes, b.ctypes, byref(rci), ipar.ctypes, dpar.ctypes, tmp.ctypes, byref(c_int(0))) if numpy.linalg.norm(self @ b - rhs) < atol: break raise MatrixError('singular matrix') else: raise MatrixError( 'this should not have occurred: rci={}'.format( rci.value)) log.debug('performed {} fgmres iterations, {} restarts'.format( ipar[3], ipar[3] // ipar[14])) return b
def __iter__(self): length = type(self).length if _cache.value is None: yield from self.resume_index([], 0) else: # The hash of `types.Immutable` uniquely defines this `Recursion`, so use # this to identify the cache directory. All iterations are stored as # separate files, numbered '0000', '0001', ..., in this directory. hkey = self.__nutils_hash__.hex() cachepath = _cache.value / hkey cachepath.mkdir(exist_ok=True, parents=True) log.debug('[cache.Recursion {}] start iterating'.format(hkey)) # The `history` variable is updated while reading from the cache and # truncated to the required length. history = [] # The `exhausted` variable controls if we are reading items from the # cache (`False`) or we are computing values and writing to the cache. # Once `exhausted` is `True` we keep it there, even if at some point # there are cached items available. exhausted = False # The `stop` variable indicates if an exception is raised in `resume`. stop = False for i in itertools.count(): cachefile = cachepath / '{:04d}'.format(i) cachefile.touch() with cachefile.open('r+b') as f: log.debug( '[cache.Recursion {}.{:04d}] acquiring lock'.format( hkey, i)) _lock_file(f) log.debug( '[cache.Recursion {}.{:04d}] lock acquired'.format( hkey, i)) if not exhausted: try: log_, stop, value = pickle.load(f) except (pickle.UnpicklingError, IndexError): log.debug( '[cache.Recursion {}.{:04d}] failed to load, cache will be rewritten from this point' .format(hkey, i)) exhausted = True except EOFError: log.debug( '[cache.Recursion {}.{:04d}] cache exhausted'. format(hkey, i)) exhausted = True else: log.debug( '[cache.Recursion {}.{:04d}] load'.format( hkey, i)) log_.replay() if stop and value is None: value = StopIteration history.append(value) if len(history) > length: history = history[1:] if exhausted: resume = self.resume_index(history, i) f.seek(0) del history if exhausted: # Disable the cache temporarily to prevent caching subresults *in* `func`. log_ = log.RecordLog() with disable(), log.add(log_): try: value = next(resume) except Exception as e: stop = True value = e log.debug('[cache.Recursion {}.{}] store'.format( hkey, i)) pickle.dump((log_, stop, value), f) if not stop: yield value elif isinstance(value, StopIteration): return else: raise value
def integrate(*args, **arguments:argdict): '''Integrate functions. Args ---- funcs : :class:`nutils.function.Array` object or :class:`tuple` thereof. The integrand(s). arguments : :class:`dict` (default: None) Optional arguments for function evaluation. ''' self, funcs = args # Functions may consist of several blocks, such as originating from # chaining. Here we make a list of all blocks consisting of triplets of # argument id, evaluable index, and evaluable values. funcs = [function.asarray(func).prepare_eval(ndims=self.ndims) for func in funcs] blocks = [(ifunc, function.Tuple(ind), f.simplified) for ifunc, func in enumerate(funcs) for ind, f in function.blocks(func)] block2func, indices, values = zip(*blocks) if blocks else ([],[],[]) log.debug('integrating {} distinct blocks'.format('+'.join( str(block2func.count(ifunc)) for ifunc in range(len(funcs))))) if config.dot: function.Tuple(values).graphviz() # To allocate (shared) memory for all block data we evaluate indexfunc to # build an nblocks x nelems+1 offset array, and nblocks index lists of # length nelems. offsets = numpy.zeros((len(blocks), self.nelems+1), dtype=int) if blocks: sizefunc = function.stack([f.size for ifunc, ind, f in blocks]).simplified for ielem, transforms in enumerate(zip(*self.transforms)): n, = sizefunc.eval(_transforms=transforms, **arguments) offsets[:,ielem+1] = offsets[:,ielem] + n # Since several blocks may belong to the same function, we post process the # offsets to form consecutive intervals in longer arrays. The length of # these arrays is captured in the nfuncs-array nvals. nvals = numpy.zeros(len(funcs), dtype=int) for iblock, ifunc in enumerate(block2func): offsets[iblock] += nvals[ifunc] nvals[ifunc] = offsets[iblock,-1] # The data_index list contains shared memory index and value arrays for # each function argument. nprocs = min(config.nprocs, self.nelems) empty = parallel.shempty if nprocs > 1 else numpy.empty data_index = [ (empty(n, dtype=float), empty((funcs[ifunc].ndim,n), dtype=int)) for ifunc, n in enumerate(nvals) ] # In a second, parallel element loop, valuefunc is evaluated to fill the # data part of data_index using the offsets array for location. Each # element has its own location so no locks are required. The index part of # data_index is filled in the same loop. It does not use valuefunc data but # benefits from parallel speedup. valueindexfunc = function.Tuple(function.Tuple([value]+list(index)) for value, index in zip(values, indices)) ielems = parallel.range(self.nelems) with parallel.fork(nprocs): for ielem in ielems: with log.context('elem', ielem, '({:.0f}%)'.format(100*ielem/self.nelems)): points = self.points[ielem] for iblock, (intdata, *indices) in enumerate(valueindexfunc.eval(_transforms=tuple(t[ielem] for t in self.transforms), _points=points.coords, **arguments)): s = slice(*offsets[iblock,ielem:ielem+2]) data, index = data_index[block2func[iblock]] w_intdata = numeric.dot(points.weights, intdata) data[s] = w_intdata.ravel() si = (slice(None),) + (numpy.newaxis,) * (w_intdata.ndim-1) for idim, (ii,) in enumerate(indices): index[idim,s].reshape(w_intdata.shape)[...] = ii[si] si = si[:-1] retvals = [] for i, func in enumerate(funcs): with log.context('assembling {}/{}'.format(i+1, len(funcs))): retvals.append(matrix.assemble(*data_index[i], shape=func.shape)) return retvals