def _get_flattened_sizes(self): """ Collect all flattened sizes of vars stored in our internal array. Returns ------- list of lists of (name, size) tuples Contains an entry for each process in this object's communicator. """ sizes = [] for name, acc in iteritems(self._dat): if not acc.pbo: if acc.remote: sizes.append((name, 0)) else: sizes.append((name, acc.meta['size'])) # collect local var sizes from all of the processes that share the same comm # these sizes will be the same in all processes except in cases # where a variable belongs to a multiprocess component. In that # case, the part of the component that runs in a given process will # only have a slice of each of the component's variables. if trace: # pragma: no cover debug("'%s': allgathering local unknown sizes: local=%s" % (self._sysdata.pathname, sizes)) return self.comm.allgather(sizes)
def _get_flattened_sizes(self): """ Collect all flattened sizes of vars stored in our internal array. Returns ------- list of lists of (name, size) tuples Contains an entry for each process in this object's communicator. """ sizes = [] for name, acc in iteritems(self._dat): if not acc.pbo: if acc.remote: sizes.append((name, 0)) else: sizes.append((name, acc.meta['size'])) # collect local var sizes from all of the processes that share the same comm # these sizes will be the same in all processes except in cases # where a variable belongs to a multiprocess component. In that # case, the part of the component that runs in a given process will # only have a slice of each of the component's variables. if trace: # pragma: no cover debug("'%s': allgathering local unknown sizes: local=%s" % (self._sysdata.pathname, sizes)) return self.comm.allgather(sizes)
def setup(self, unknowns_dict, relevance, var_of_interest=None, store_byobjs=False, shared_vec=None): """ Create internal data storage for variables in unknowns_dict. Args ---- unknowns_dict : `OrderedDict` A dictionary of absolute variable names keyed to an associated metadata dictionary. relevance : `Relevance` object Object that knows what vars are relevant for each var_of_interest. var_of_interest : str or None Name of the current variable of interest. store_byobjs : bool, optional Indicates that 'pass by object' vars should be stored. This is only true for the unknowns vecwrapper. shared_vec : ndarray, optional If not None, create vec as a subslice of this array. """ super(PetscSrcVecWrapper, self).setup(unknowns_dict, relevance=relevance, var_of_interest=var_of_interest, store_byobjs=store_byobjs, shared_vec=shared_vec) if trace: debug("'%s': creating src petsc_vec: size(%d) %s vec=%s" % (self._sysdata.pathname, len(self.vec), self.keys(), self.vec)) self.petsc_vec = PETSc.Vec().createWithArray(self.vec, comm=self.comm)
def get_view(self, sys_pathname, comm, varmap): view = super(PetscSrcVecWrapper, self).get_view(sys_pathname, comm, varmap) if trace: debug("'%s': creating src petsc_vec (view): (size %d )%s: vec=%s" % (sys_pathname, len(view.vec), view.keys(), view.vec)) view.petsc_vec = PETSc.Vec().createWithArray(view.vec, comm=comm) return view
def __init__(self, src_vec, tgt_vec, src_idxs, tgt_idxs, vec_conns, byobj_conns, mode): self.byobj_conns = byobj_conns self.comm = comm = src_vec.comm uvec = src_vec.petsc_vec pvec = tgt_vec.petsc_vec name = src_vec._sysdata.pathname if trace: debug("'%s': creating index sets for '%s' DataTransfer: %s %s" % (name, src_vec._sysdata.pathname, src_idxs, tgt_idxs)) src_idx_set = PETSc.IS().createGeneral(src_idxs, comm=comm) tgt_idx_set = PETSc.IS().createGeneral(tgt_idxs, comm=comm) try: if trace: self.src_idxs = src_idxs self.tgt_idxs = tgt_idxs self.vec_conns = vec_conns arrow = '-->' if mode == 'fwd' else '<--' debug("'%s': new %s scatter (sizes: %d, %d)\n %s %s %s %s %s %s" % (name, mode, len(src_idx_set.indices), len(tgt_idx_set.indices), [v for u, v in vec_conns], arrow, [u for u, v in vec_conns], src_idx_set.indices, arrow, tgt_idx_set.indices)) self.scatter = PETSc.Scatter().create(uvec, src_idx_set, pvec, tgt_idx_set) except Exception as err: raise RuntimeError("ERROR in %s (src_idxs=%s, tgt_idxs=%s, usize=%d, psize=%d): %s" % (name, src_idxs, tgt_idxs, src_vec.vec.size, tgt_vec.vec.size, str(err)))
def setup(self, unknowns_dict, relevance, var_of_interest=None, store_byobjs=False, shared_vec=None): """ Create internal data storage for variables in unknowns_dict. Args ---- unknowns_dict : `OrderedDict` A dictionary of absolute variable names keyed to an associated metadata dictionary. relevance : `Relevance` object Object that knows what vars are relevant for each var_of_interest. var_of_interest : str or None Name of the current variable of interest. store_byobjs : bool, optional Indicates that 'pass by object' vars should be stored. This is only true for the unknowns vecwrapper. shared_vec : ndarray, optional If not None, create vec as a subslice of this array. """ super(PetscSrcVecWrapper, self).setup(unknowns_dict, relevance=relevance, var_of_interest=var_of_interest, store_byobjs=store_byobjs, shared_vec=shared_vec) if trace: # pragma: no cover debug("'%s': creating src petsc_vec: size(%d) %s vec=%s" % (self._sysdata.pathname, len(self.vec), self.keys(), self.vec)) self.petsc_vec = PETSc.Vec().createWithArray(self.vec, comm=self.comm) if trace: debug("petsc_vec creation DONE")
def get_view(self, sys_pathname, comm, varmap): view = super(PetscSrcVecWrapper, self).get_view(sys_pathname, comm, varmap) if trace: # pragma: no cover debug("'%s': creating src petsc_vec (view): (size %d )%s: vec=%s" % (sys_pathname, len(view.vec), view.keys(), view.vec)) view.petsc_vec = PETSc.Vec().createWithArray(view.vec, comm=comm) return view
def setup( self, unknowns_dict, relevance, var_of_interest=None, store_byobjs=False, shared_vec=None, alloc_complex=False, vectype="u", ): """ Create internal data storage for variables in unknowns_dict. Args ---- unknowns_dict : `OrderedDict` A dictionary of absolute variable names keyed to an associated metadata dictionary. relevance : `Relevance` object Object that knows what vars are relevant for each var_of_interest. var_of_interest : str or None Name of the current variable of interest. store_byobjs : bool, optional Indicates that 'pass by object' vars should be stored. This is only true for the unknowns vecwrapper. shared_vec : ndarray, optional If not None, create vec as a subslice of this array. alloc_complex : bool, optional If True, allocate space for the imaginary part of the vector and configure all functions to support complex computation. vectype : str('u'), optional Type of vector, can be 'u' (unknown), 'r' (resids), 'du' dunknowns, or 'dr' dresids. """ super(PetscSrcVecWrapper, self).setup( unknowns_dict, relevance=relevance, var_of_interest=var_of_interest, store_byobjs=store_byobjs, shared_vec=shared_vec, alloc_complex=alloc_complex, vectype=vectype, ) if trace: # pragma: no cover debug( "'%s': creating src petsc_vec: size(%d) %s vec=%s" % (self._sysdata.pathname, len(self.vec), self.keys(), self.vec) ) self.petsc_vec = PETSc.Vec().createWithArray(self.vec, comm=self.comm) if alloc_complex: self.imag_petsc_vec = PETSc.Vec().createWithArray(self.imag_vec, comm=self.comm) if trace: debug("petsc_vec creation DONE")
def _setup_variables(self, compute_indices=False): """ Returns copies of our params and unknowns dictionaries, re-keyed to use absolute variable names. Args ---- compute_indices : bool, optional If True, call setup_distrib_idxs() to set values of 'src_indices' metadata. """ self._to_abs_unames = self._sysdata._to_abs_unames = {} self._to_abs_pnames = self._sysdata._to_abs_pnames = {} if MPI and compute_indices and self.is_active(): self.setup_distrib_idxs() # now update our distrib_size metadata for any distributed # unknowns sizes = [] names = [] for name, meta in iteritems(self._unknowns_dict): if 'src_indices' in meta: sizes.append(len(meta['src_indices'])) names.append(name) if sizes: if trace: # pragma: no cover debug("allgathering src index sizes:") allsizes = np.zeros((self.comm.size, len(sizes)), dtype=int) self.comm.Allgather(np.array(sizes, dtype=int), allsizes) for i, name in enumerate(names): self._unknowns_dict[name]['distrib_size'] = np.sum(allsizes[:, i]) # rekey with absolute path names and add promoted names _new_params = OrderedDict() for name, meta in iteritems(self._params_dict): pathname = self._get_var_pathname(name) _new_params[pathname] = meta meta['pathname'] = pathname meta['promoted_name'] = name self._params_dict[name]['promoted_name'] = name self._to_abs_pnames[name] = (pathname,) _new_unknowns = OrderedDict() for name, meta in iteritems(self._unknowns_dict): pathname = self._get_var_pathname(name) _new_unknowns[pathname] = meta meta['pathname'] = pathname meta['promoted_name'] = name self._to_abs_unames[name] = (pathname,) self._post_setup_vars = True self._sysdata._params_dict = _new_params self._sysdata._unknowns_dict = _new_unknowns return _new_params, _new_unknowns
def setup(self, parent_params_vec, params_dict, srcvec, my_params, connections, relevance, var_of_interest=None, store_byobjs=False, shared_vec=None, alloc_complex=False): """ Configure this vector to store a flattened array of the variables in params_dict. Variable shape and value are retrieved from srcvec. Args ---- parent_params_vec : `VecWrapper` or None `VecWrapper` of parameters from the parent `System`. params_dict : `OrderedDict` Dictionary of parameter absolute name mapped to metadata dict. srcvec : `VecWrapper` Source `VecWrapper` corresponding to the target `VecWrapper` we're building. my_params : list of str A list of absolute names of parameters that the `VecWrapper` we're building will 'own'. connections : dict of str : str A dict of absolute target names mapped to the absolute name of their source variable. relevance : `Relevance` object Object that knows what vars are relevant for each var_of_interest. var_of_interest : str or None Name of the current variable of interest. store_byobjs : bool, optional If True, store 'pass by object' variables in the `VecWrapper` we're building. shared_vec : ndarray, optional If not None, create vec as a subslice of this array. alloc_complex : bool, optional If True, allocate space for the imaginary part of the vector and configure all functions to support complex computation. """ super(PetscTgtVecWrapper, self).setup(parent_params_vec, params_dict, srcvec, my_params, connections, relevance=relevance, var_of_interest=var_of_interest, store_byobjs=store_byobjs, shared_vec=shared_vec, alloc_complex=alloc_complex) if trace: # pragma: no cover debug("'%s': creating tgt petsc_vec: (size %d) %s: vec=%s" % (self._sysdata.pathname, len(self.vec), self.keys(), self.vec)) self.petsc_vec = PETSc.Vec().createWithArray(self.vec, comm=self.comm) if alloc_complex: self.imag_petsc_vec = PETSc.Vec().createWithArray(self.imag_vec, comm=self.comm) if trace: debug("petsc_vec creation DONE")
def setup(self, parent_params_vec, params_dict, srcvec, my_params, connections, relevance, var_of_interest=None, store_byobjs=False, shared_vec=None, alloc_complex=False): """ Configure this vector to store a flattened array of the variables in params_dict. Variable shape and value are retrieved from srcvec. Args ---- parent_params_vec : `VecWrapper` or None `VecWrapper` of parameters from the parent `System`. params_dict : `OrderedDict` Dictionary of parameter absolute name mapped to metadata dict. srcvec : `VecWrapper` Source `VecWrapper` corresponding to the target `VecWrapper` we're building. my_params : list of str A list of absolute names of parameters that the `VecWrapper` we're building will 'own'. connections : dict of str : str A dict of absolute target names mapped to the absolute name of their source variable. relevance : `Relevance` object Object that knows what vars are relevant for each var_of_interest. var_of_interest : str or None Name of the current variable of interest. store_byobjs : bool, optional If True, store 'pass by object' variables in the `VecWrapper` we're building. shared_vec : ndarray, optional If not None, create vec as a subslice of this array. alloc_complex : bool, optional If True, allocate space for the imaginary part of the vector and configure all functions to support complex computation. """ super(PetscTgtVecWrapper, self).setup(parent_params_vec, params_dict, srcvec, my_params, connections, relevance=relevance, var_of_interest=var_of_interest, store_byobjs=store_byobjs, shared_vec=shared_vec, alloc_complex=alloc_complex) if trace: # pragma: no cover debug("'%s': creating tgt petsc_vec: (size %d) %s: vec=%s" % (self._sysdata.pathname, len(self.vec), self.keys(), self.vec)) self.petsc_vec = PETSc.Vec().createWithArray(self.vec, comm=self.comm) if alloc_complex is True: self.imag_petsc_vec = PETSc.Vec().createWithArray(self.imag_vec, comm=self.comm) if trace: debug("petsc_vec creation DONE")
def _distrib_lb_build_runlist(self): """ Runs a load balanced version of the runlist, with the master rank (0) sending a new case to each worker rank as soon as it has finished its last case. """ comm = self._full_comm if self._full_comm.rank == 0: # master rank runiter = self._build_runlist() received = 0 sent = 0 # cases left for each par doe cases = { n: { 'count': 0, 'terminate': 0, 'p': {}, 'u': {}, 'r': {}, 'meta': { 'success': 1, 'msg': '' } } for n in self._id_map } # create a mapping of ranks to doe_ids, to handle those cases # where a single DOE is executed across multiple processes, i.e., # for each process, we need to know which case it's working on. doe_ids = {} for doe_id, tup in self._id_map.items(): size, offset = tup for i in range(size): doe_ids[i + offset] = doe_id # seed the workers for i in range(1, self._num_par_doe): try: # case is a generator, so must make a list to send case = list(next(runiter)) except StopIteration: break size, offset = self._id_map[i] # send the case to all of the subprocs that will work on it for j in range(size): if trace: debug('Sending Seed case %d, %d' % (i, j)) comm.send(case, j + offset, tag=1) if trace: debug('Seed Case Sent %d, %d' % (i, j)) cases[i]['count'] += 1 sent += 1
def __init__(self, src_vec, tgt_vec, src_idxs, tgt_idxs, vec_conns, byobj_conns, mode, sysdata): src_idxs = src_vec.merge_idxs(src_idxs) tgt_idxs = tgt_vec.merge_idxs(tgt_idxs) self.byobj_conns = byobj_conns self.comm = comm = src_vec.comm self.sysdata = sysdata uvec = src_vec.petsc_vec pvec = tgt_vec.petsc_vec name = src_vec._sysdata.pathname if trace: debug( "'%s': creating index sets for '%s' DataTransfer: %s %s" % (name, src_vec._sysdata.pathname, src_idxs, tgt_idxs) ) src_idx_set = PETSc.IS().createGeneral(src_idxs, comm=comm) if trace: debug("src_idx_set DONE") tgt_idx_set = PETSc.IS().createGeneral(tgt_idxs, comm=comm) if trace: debug("tgt_idx_set DONE") try: if trace: # pragma: no cover self.src_idxs = src_idxs self.tgt_idxs = tgt_idxs self.vec_conns = vec_conns arrow = "-->" if mode == "fwd" else "<--" debug( "'%s': new %s scatter (sizes: %d, %d)\n %s %s %s %s %s %s" % ( name, mode, len(src_idx_set.indices), len(tgt_idx_set.indices), [v for u, v in vec_conns], arrow, [u for u, v in vec_conns], src_idx_set.indices, arrow, tgt_idx_set.indices, ) ) self.scatter = PETSc.Scatter().create(uvec, src_idx_set, pvec, tgt_idx_set) if trace: debug("scatter creation DONE") except Exception as err: raise RuntimeError( "ERROR in %s (src_idxs=%s, tgt_idxs=%s, usize=%d, psize=%d): %s" % (name, src_idxs, tgt_idxs, src_vec.vec.size, tgt_vec.vec.size, str(err)) )
def get_view(self, system, comm, varmap): view = super(PetscSrcVecWrapper, self).get_view(system, comm, varmap) if trace: # pragma: no cover debug("'%s': creating src petsc_vec (view): (size %d )%s: vec=%s" % (system.pathname, len(view.vec), view.keys(), view.vec)) view.petsc_vec = PETSc.Vec().createWithArray(view.vec, comm=comm) if self.alloc_complex: view.imag_petsc_vec = PETSc.Vec().createWithArray(view.imag_vec, comm=comm) if trace: debug("petsc_vec creation DONE") return view
def norm(self): """ Returns ------- float The norm of the distributed vector. """ if trace: # pragma: no cover debug("%s: norm: petsc_vec.assemble" % self._sysdata.pathname) self.petsc_vec.assemble() return self.petsc_vec.norm()
def get_view(self, sys_pathname, comm, varmap): view = super(PetscSrcVecWrapper, self).get_view(sys_pathname, comm, varmap) if trace: # pragma: no cover debug("'%s': creating src petsc_vec (view): (size %d )%s: vec=%s" % (sys_pathname, len(view.vec), view.keys(), view.vec)) view.petsc_vec = PETSc.Vec().createWithArray(view.vec, comm=comm) if self.alloc_complex is True: view.imag_petsc_vec = PETSc.Vec().createWithArray(view.imag_vec, comm=comm) if trace: debug("petsc_vec creation DONE") return view
def norm(self): """ Returns ------- float The norm of the distributed vector. """ if trace: # pragma: no cover debug("%s: norm: petsc_vec.assemble" % self._sysdata.pathname) self.petsc_vec.assemble() return self.petsc_vec.norm()
def _setup_communicators(self, comm, parent_dir): """ Assign a communicator to the root `System`. Args ---- comm : an MPI communicator (real or fake) The communicator being offered by the Problem. parent_dir : str Absolute dir of parent `System`. """ root = self.root if self._num_par_doe < 1: raise ValueError( "'%s': _num_par_doe must be >= 1 but value is %s." % (self.pathname, self._num_par_doe)) if not MPI: self._num_par_doe = 1 self._full_comm = comm # figure out which parallel DOE we are associated with if self._num_par_doe > 1: minprocs, maxprocs = root.get_req_procs() if self._load_balance: sizes, offsets = evenly_distrib_idxs(self._num_par_doe - 1, comm.size - 1) sizes = [1] + list(sizes) offsets = [0] + [o + 1 for o in offsets] else: sizes, offsets = evenly_distrib_idxs(self._num_par_doe, comm.size) # a 'color' is assigned to each subsystem, with # an entry for each processor it will be given # e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] color = [] self._id_map = {} for i in range(self._num_par_doe): color.extend([i] * sizes[i]) self._id_map[i] = (sizes[i], offsets[i]) self._par_doe_id = color[comm.rank] # create a sub-communicator for each color and # get the one assigned to our color/process if trace: debug('%s: splitting comm, doe_id=%s' % ('.'.join( (root.pathname, 'driver')), self._par_doe_id)) comm = comm.Split(self._par_doe_id) root._setup_communicators(comm, parent_dir)
def _setup_communicators(self, comm, parent_dir): """ Assign a communicator to the root `System`. Args ---- comm : an MPI communicator (real or fake) The communicator being offered by the Problem. parent_dir : str Absolute dir of parent `System`. """ root = self.root if self._num_par_doe < 1: raise ValueError("'%s': _num_par_doe must be >= 1 but value is %s." % (self.pathname, self._num_par_doe)) if not MPI: self._num_par_doe = 1 self._full_comm = comm # figure out which parallel DOE we are associated with if self._num_par_doe > 1: minprocs, maxprocs = root.get_req_procs() if self._load_balance: sizes, offsets = evenly_distrib_idxs(self._num_par_doe-1, comm.size-1) sizes = [1]+list(sizes) offsets = [0]+[o+1 for o in offsets] else: sizes, offsets = evenly_distrib_idxs(self._num_par_doe, comm.size) # a 'color' is assigned to each subsystem, with # an entry for each processor it will be given # e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] color = [] self._id_map = {} for i in range(self._num_par_doe): color.extend([i]*sizes[i]) self._id_map[i] = (sizes[i], offsets[i]) self._par_doe_id = color[comm.rank] # create a sub-communicator for each color and # get the one assigned to our color/process if trace: debug('%s: splitting comm, doe_id=%s' % ('.'.join((root.pathname, 'driver')), self._par_doe_id)) comm = comm.Split(self._par_doe_id) root._setup_communicators(comm, parent_dir)
def _setup_communicators(self, comm, parent_dir): """ Assign communicator to this `Group` and all of its subsystems. Args ---- comm : an MPI communicator (real or fake) The communicator being offered by the parent system. parent_dir : str Absolute dir of parent `System`. """ if self._num_par_fds < 1: raise ValueError( "'%s': num_par_fds must be >= 1 but value is %s." % (self.pathname, self._num_par_fds)) if not MPI: self._num_par_fds = 1 self._full_comm = comm # figure out which parallel FD we are associated with if self._num_par_fds > 1: minprocs, maxprocs = super(ParallelFDGroup, self).get_req_procs() sizes, offsets = evenly_distrib_idxs(self._num_par_fds, comm.size) # a 'color' is assigned to each subsystem, with # an entry for each processor it will be given # e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] color = [] for i in range(self._num_par_fds): color.extend([i] * sizes[i]) self._par_fd_id = color[comm.rank] # create a sub-communicator for each color and # get the one assigned to our color/process if trace: debug('%s: splitting comm, fd_id=%s' % (self.pathname, self._par_fd_id)) comm = comm.Split(self._par_fd_id) self._local_subsystems = [] self.comm = comm self._setup_dir(parent_dir) for sub in itervalues(self._subsystems): sub._setup_communicators(comm, self._sysdata.absdir) if self.is_active() and sub.is_active(): self._local_subsystems.append(sub)
def _setup_communicators(self, comm, parent_dir): """ Assign communicator to this `Group` and all of its subsystems. Args ---- comm : an MPI communicator (real or fake) The communicator being offered by the parent system. parent_dir : str Absolute dir of parent `System`. """ if self._num_par_fds < 1: raise ValueError("'%s': num_par_fds must be >= 1 but value is %s." % (self.pathname, self._num_par_fds)) if not MPI: self._num_par_fds = 1 self._full_comm = comm # figure out which parallel FD we are associated with if self._num_par_fds > 1: minprocs, maxprocs = super(ParallelFDGroup, self).get_req_procs() sizes, offsets = evenly_distrib_idxs(self._num_par_fds, comm.size) # a 'color' is assigned to each subsystem, with # an entry for each processor it will be given # e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] color = [] for i in range(self._num_par_fds): color.extend([i]*sizes[i]) self._par_fd_id = color[comm.rank] # create a sub-communicator for each color and # get the one assigned to our color/process if trace: debug('%s: splitting comm, fd_id=%s' % (self.pathname, self._par_fd_id)) comm = comm.Split(self._par_fd_id) self._local_subsystems = [] self.comm = comm self._setup_dir(parent_dir) for sub in itervalues(self._subsystems): sub._setup_communicators(comm, self._sysdata.absdir) if self.is_active() and sub.is_active(): self._local_subsystems.append(sub)
def setup(self, system): """ Setup petsc problem just once.""" lsize = np.sum(system._local_unknown_sizes[None][system.comm.rank, :]) size = np.sum(system._local_unknown_sizes[None]) jac_mat = PETSc.Mat().createPython([(lsize, size), (lsize, size)], comm=system.comm) jac_mat.setPythonContext(self) jac_mat.setUp() if trace: # pragma: no cover debug("creating KSP object for system",system.pathname) self.ksp = PETSc.KSP().create(comm=system.comm) self.ksp.setOperators(jac_mat) self.ksp.setType('fgmres') self.ksp.setGMRESRestart(1000) self.ksp.setPCSide(PETSc.PC.Side.RIGHT) self.ksp.setMonitor(Monitor(self)) if trace: # pragma: no cover debug("ksp.getPC()") debug("rhs_buf, sol_buf size: %d" % lsize) pc_mat = self.ksp.getPC() pc_mat.setType('python') pc_mat.setPythonContext(self) if trace: # pragma: no cover debug("ksp setup done") self.rhs_buf = np.zeros((lsize, )) self.sol_buf = np.zeros((lsize, ))
def setup(self, system): """ Setup petsc problem just once.""" lsize = np.sum(system._local_unknown_sizes[None][system.comm.rank, :]) size = np.sum(system._local_unknown_sizes[None]) jac_mat = PETSc.Mat().createPython([(lsize, size), (lsize, size)], comm=system.comm) jac_mat.setPythonContext(self) jac_mat.setUp() if trace: # pragma: no cover debug("creating KSP object for system", system.pathname) self.ksp = PETSc.KSP().create(comm=system.comm) self.ksp.setOperators(jac_mat) self.ksp.setType('fgmres') self.ksp.setGMRESRestart(1000) self.ksp.setPCSide(PETSc.PC.Side.RIGHT) self.ksp.setMonitor(Monitor(self)) if trace: # pragma: no cover debug("ksp.getPC()") debug("rhs_buf, sol_buf size: %d" % lsize) pc_mat = self.ksp.getPC() pc_mat.setType('python') pc_mat.setPythonContext(self) if trace: # pragma: no cover debug("ksp setup done") self.rhs_buf = np.zeros((lsize, )) self.sol_buf = np.zeros((lsize, ))
def record_iteration(self, root, metadata, dummy=False): """ Gathers variables for non-parallel case recorders and calls record for all recorders. Args ---- root : `System` System containing variables. metadata : dict Metadata for iteration coordinate dummy : bool, optional If True, this is a dummy iteration, so no data will be colllected from the model, but collective gather call will still be made. """ if not self._recorders: return metadata['timestamp'] = time.time() params = root.params unknowns = root.unknowns resids = root.resids cases = None if MPI: if dummy and self._casecomm is not None: case = (None, None, None, None) if trace: debug("DUMMY gathering cases") cases = self._casecomm.gather(case, root=0) if trace: debug("DUMMY done gathering cases:") return pnames = self._vars_to_record['pnames'] unames = self._vars_to_record['unames'] rnames = self._vars_to_record['rnames'] # get names and values of all locally owned variables params = {p: params[p] for p in pnames} unknowns = {u: unknowns[u] for u in unames} resids = {r: resids[r] for r in rnames} if self._has_serial_recorders: params = self._gather_vars(root, params) if self._record_p else {} unknowns = self._gather_vars( root, unknowns) if self._record_u else {} resids = self._gather_vars(root, resids) if self._record_r else {} if self._casecomm is not None: # our parent driver is running a parallel DOE, so we need to # gather all of the cases to this rank and loop over them case = (params, unknowns, resids, metadata) if trace: debug("gathering cases") cases = self._casecomm.gather(case, root=0) if trace: debug("done gathering cases") if cases is None: cases = []
def _distrib_build_runlist(self): """ Returns an iterator over only those cases meant to execute in the current rank as part of a parallel DOE. A latin hypercube, unlike some other DOE generators, is created in one rank and then the appropriate cases are scattered to the appropriate ranks. """ comm = self._full_comm job_list = None if comm.rank == 0: debug('Parallel DOE using %d procs' % self._num_par_doe) run_list = [list(case) for case in self._build_runlist() ] # need to run iterator run_sizes, run_offsets = evenly_distrib_idxs(
def _gather_vars(self, root, local_vars): """Gathers and returns only variables listed in `local_vars` from the `root` System. """ if trace: debug("gathering vars for recording in %s" % root.pathname) all_vars = root.comm.gather(local_vars, root=0) if trace: debug("DONE gathering rec vars for %s" % root.pathname) if root.comm.rank == 0: dct = all_vars[-1] for d in all_vars[:-1]: dct.update(d) return dct
def _get_distrib_var(self, name, meta, voi_type): uvec = self.root.unknowns comm = self.root.comm nproc = comm.size iproc = comm.rank if nproc > 1: owner = self.root._owning_ranks[name] if iproc == owner: flatval = uvec._dat[name].val else: flatval = None else: owner = 0 flatval = uvec._dat[name].val if 'indices' in meta and not (nproc > 1 and owner != iproc): # Make sure our indices are valid try: flatval = flatval[meta['indices']] except IndexError: msg = "Index for {} '{}' is out of bounds. " msg += "Requested index: {}, " msg += "shape: {}." raise IndexError( msg.format(voi_type, name, meta['indices'], uvec.metadata(name)['shape'])) if nproc > 1: # TODO: use Bcast for improved performance if trace: debug("%s.driver._get_distrib_var bcast: val=%s" % (self.root.pathname, flatval)) flatval = comm.bcast(flatval, root=owner) if trace: debug("%s.driver._get_distrib_var bcast DONE" % self.root.pathname) scaler = meta['scaler'] adder = meta['adder'] if isinstance(scaler, np.ndarray) or isinstance(adder, np.ndarray) \ or scaler != 1.0 or adder != 0.0: return (flatval + adder) * scaler else: return flatval
def setup(self, unknowns_dict, relevance, var_of_interest=None, store_byobjs=False, shared_vec=None, alloc_complex=False, vectype='u'): """ Create internal data storage for variables in unknowns_dict. Args ---- unknowns_dict : `OrderedDict` A dictionary of absolute variable names keyed to an associated metadata dictionary. relevance : `Relevance` object Object that knows what vars are relevant for each var_of_interest. var_of_interest : str or None Name of the current variable of interest. store_byobjs : bool, optional Indicates that 'pass by object' vars should be stored. This is only true for the unknowns vecwrapper. shared_vec : ndarray, optional If not None, create vec as a subslice of this array. alloc_complex : bool, optional If True, allocate space for the imaginary part of the vector and configure all functions to support complex computation. vectype : str('u'), optional Type of vector, can be 'u' (unknown), 'r' (resids), 'du' dunknowns, or 'dr' dresids. """ super(PetscSrcVecWrapper, self).setup(unknowns_dict, relevance=relevance, var_of_interest=var_of_interest, store_byobjs=store_byobjs, shared_vec=shared_vec, alloc_complex=alloc_complex, vectype=vectype) if trace: # pragma: no cover debug("'%s': creating src petsc_vec: size(%d) %s vec=%s" % (self._sysdata.pathname, len(self.vec), self.keys(), self.vec)) self.petsc_vec = PETSc.Vec().createWithArray(self.vec, comm=self.comm) if alloc_complex: self.imag_petsc_vec = PETSc.Vec().createWithArray(self.imag_vec, comm=self.comm) if trace: debug("petsc_vec creation DONE")
def _get_distrib_var(self, name, meta, voi_type): uvec = self.root.unknowns comm = self.root.comm nproc = comm.size iproc = comm.rank if nproc > 1: owner = self.root._owning_ranks[name] if iproc == owner: flatval = uvec._dat[name].val else: flatval = None else: owner = 0 flatval = uvec._dat[name].val if 'indices' in meta and not (nproc > 1 and owner != iproc): # Make sure our indices are valid try: flatval = flatval[meta['indices']] except IndexError: msg = "Index for {} '{}' is out of bounds. " msg += "Requested index: {}, " msg += "shape: {}." raise IndexError(msg.format(voi_type, name, meta['indices'], uvec.metadata(name)['shape'])) if nproc > 1: # TODO: use Bcast for improved performance if trace: debug("%s.driver._get_distrib_var bcast: val=%s" % (self.root.pathname, flatval)) flatval = comm.bcast(flatval, root=owner) if trace: debug("%s.driver._get_distrib_var bcast DONE" % self.root.pathname) scaler = meta['scaler'] adder = meta['adder'] if isinstance(scaler, np.ndarray) or isinstance(adder, np.ndarray) \ or scaler != 1.0 or adder != 0.0: return (flatval + adder)*scaler else: return flatval
def __init__(self, src_vec, tgt_vec, src_idxs, tgt_idxs, vec_conns, byobj_conns, mode, sysdata): src_idxs = src_vec.merge_idxs(src_idxs) tgt_idxs = tgt_vec.merge_idxs(tgt_idxs) self.byobj_conns = byobj_conns self.comm = comm = src_vec.comm self.sysdata = sysdata uvec = src_vec.petsc_vec pvec = tgt_vec.petsc_vec name = src_vec._sysdata.pathname if trace: debug("'%s': creating index sets for '%s' DataTransfer: %s %s" % (name, src_vec._sysdata.pathname, src_idxs, tgt_idxs)) src_idx_set = PETSc.IS().createGeneral(src_idxs, comm=comm) if trace: debug("src_idx_set DONE") tgt_idx_set = PETSc.IS().createGeneral(tgt_idxs, comm=comm) if trace: debug("tgt_idx_set DONE") try: if trace: # pragma: no cover self.src_idxs = src_idxs self.tgt_idxs = tgt_idxs self.vec_conns = vec_conns arrow = '-->' if mode == 'fwd' else '<--' debug( "'%s': new %s scatter (sizes: %d, %d)\n %s %s %s %s %s %s" % (name, mode, len(src_idx_set.indices), len(tgt_idx_set.indices), [v for u, v in vec_conns], arrow, [u for u, v in vec_conns], src_idx_set.indices, arrow, tgt_idx_set.indices)) self.scatter = PETSc.Scatter().create(uvec, src_idx_set, pvec, tgt_idx_set) if trace: debug("scatter creation DONE") except Exception as err: raise RuntimeError( "ERROR in %s (src_idxs=%s, tgt_idxs=%s, usize=%d, psize=%d): %s" % (name, src_idxs, tgt_idxs, src_vec.vec.size, tgt_vec.vec.size, str(err)))
def _distrib_build_runlist(self): """ Returns an iterator over only those cases meant to execute in the current rank as part of a parallel DOE. A latin hypercube, unlike some other DOE generators, is created in one rank and then the appropriate cases are scattered to the appropriate ranks. """ comm = self._full_comm # get the par_doe_id from every rank in the full comm so we know which # cases to scatter where doe_ids = comm.allgather(self._par_doe_id) job_list = None if comm.rank == 0: if trace: debug('Parallel DOE using %d procs' % self._num_par_doe) run_list = [list(case) for case in self._build_runlist()] # need to run iterator run_sizes, run_offsets = evenly_distrib_idxs(self._num_par_doe, len(run_list)) jobs = [run_list[o:o+s] for o, s in zip(run_offsets, run_sizes)] job_list = [jobs[i] for i in doe_ids] if trace: debug("scattering job_list: %s" % job_list) run_list = comm.scatter(job_list, root=0) if trace: debug('Number of DOE jobs: %s (scatter DONE)' % len(run_list)) for case in run_list: yield case
def _distrib_lb_build_runlist(self): """ Runs a load balanced version of the runlist, with the master rank (0) sending a new case to each worker rank as soon as it has finished its last case. """ comm = self._full_comm if self._full_comm.rank == 0: # master rank runiter = self._build_runlist() received = 0 sent = 0 # cases left for each par doe cases = {n:{'count': 0, 'p':{}, 'u':{}, 'r':{}, 'meta':{}} for n in self._id_map} # create a mapping of ranks to doe_ids doe_ids = {} for doe_id, tup in self._id_map.items(): size, offset = tup for i in range(size): doe_ids[i+offset] = doe_id # seed the workers for i in range(1, self._num_par_doe): try: # case is a generator, so must make a list to send case = list(next(runiter)) except StopIteration: break size, offset = self._id_map[i] for j in range(size): if trace: debug('Sending Seed case %d, %d' % (i, j)) comm.send(case, j+offset, tag=1) if trace: debug('Seed Case Sent %d, %d' % (i, j)) cases[i]['count'] += 1 sent += 1
def _get_flattened_sizes(self): """ Returns ------- list of lists of (name, size) tuples Contains an entry for each process in this object's communicator. Each entry is an `OrderedDict` mapping var name to local size for 'pass by vector' params. """ psizes = [] for name, m in self._get_vecvars(): if m.get('owned'): if m.get('remote'): psizes.append((name, 0)) else: psizes.append((name, m['size'])) if trace: msg = "'%s': allgathering param sizes. local param sizes = %s" debug(msg % (self._sysdata.pathname, psizes)) return self.comm.allgather(psizes)
def _get_flattened_sizes(self): """ Returns ------- list of lists of (name, size) tuples Contains an entry for each process in this object's communicator. Each entry is an `OrderedDict` mapping var name to local size for 'pass by vector' params. """ psizes = [] for name, acc in iteritems(self._dat): if acc.owned and not acc.pbo: if acc.remote: psizes.append((name, 0)) else: psizes.append((name, acc.meta['size'])) if trace: # pragma: no cover msg = "'%s': allgathering param sizes. local param sizes = %s" debug(msg % (self._sysdata.pathname, psizes)) return self.comm.allgather(psizes)
def _get_flattened_sizes(self): """ Returns ------- list of lists of (name, size) tuples Contains an entry for each process in this object's communicator. Each entry is an `OrderedDict` mapping var name to local size for 'pass by vector' params. """ psizes = [] for name, acc in iteritems(self._dat): if acc.owned and not acc.pbo: if acc.remote: psizes.append((name, 0)) else: psizes.append((name, acc.meta['size'])) if trace: # pragma: no cover msg = "'%s': allgathering param sizes. local param sizes = %s" debug(msg % (self._sysdata.pathname, psizes)) return self.comm.allgather(psizes)
def _distrib_build_runlist(self): """ Returns an iterator over only those cases meant to execute in the current rank as part of a parallel DOE. A latin hypercube, unlike some other DOE generators, is created in one rank and then the appropriate cases are scattered to the appropriate ranks. """ comm = self._full_comm job_list = None if comm.rank == 0: debug('Parallel DOE using %d procs' % self._num_par_doe) run_list = [list(case) for case in self._build_runlist()] # need to run iterator run_sizes, run_offsets = evenly_distrib_idxs(self._num_par_doe, len(run_list)) job_list = [run_list[o:o+s] for o, s in zip(run_offsets, run_sizes)] run_list = comm.scatter(job_list, root=0) debug('Number of DOE jobs: %s' % len(run_list)) for case in run_list: yield case
doe_ids = comm.allgather(self._par_doe_id) job_list = None if comm.rank == 0: if trace: debug('Parallel DOE using %d procs' % self._num_par_doe) run_list = [list(case) for case in self._build_runlist() ] # need to run iterator run_sizes, run_offsets = evenly_distrib_idxs( self._num_par_doe, len(run_list)) jobs = [run_list[o:o + s] for o, s in zip(run_offsets, run_sizes)] job_list = [jobs[i] for i in doe_ids] if trace: debug("scattering job_list: %s" % job_list) run_list = comm.scatter(job_list, root=0) if trace: debug('Number of DOE jobs: %s (scatter DONE)' % len(run_list)) for case in run_list: yield case def _get_lhc(self): """Generates a Latin Hypercube based on the number of samples and the number of design variables. """ rand_lhc = _rand_latin_hypercube(self.num_samples, self.num_design_vars) return rand_lhc.astype(int)
def fd_jacobian(self, params, unknowns, resids, total_derivs=False, fd_params=None, fd_unknowns=None, pass_unknowns=(), poi_indices=None, qoi_indices=None): """Finite difference across all unknowns in this system w.r.t. all incoming params. Args ---- params : `VecWrapper` `VecWrapper` containing parameters. (p) unknowns : `VecWrapper` `VecWrapper` containing outputs and states. (u) resids : `VecWrapper` `VecWrapper` containing residuals. (r) total_derivs : bool, optional Set to true to calculate total derivatives. Otherwise, partial derivatives are returned. fd_params : list of strings, optional List of parameter name strings with respect to which derivatives are desired. This is used by problem to limit the derivatives that are taken. fd_unknowns : list of strings, optional List of output or state name strings for derivatives to be calculated. This is used by problem to limit the derivatives that are taken. pass_unknowns : list of strings, optional List of outputs that are also finite difference inputs. OpenMDAO supports specifying a design variable (or slice of one) as an objective, so gradients of these are also required. poi_indices: dict of list of integers, optional This is a dict that contains the index values for each parameter of interest, so that we only finite difference those indices. qoi_indices: dict of list of integers, optional This is a dict that contains the index values for each quantity of interest, so that the finite difference is returned only for those indices. Returns ------- dict Dictionary whose keys are tuples of the form ('unknown', 'param') and whose values are ndarrays containing the derivative for that tuple pair. """ # Params and Unknowns that we provide at this level. if fd_params is None: fd_params = self._get_fd_params() if fd_unknowns is None: fd_unknowns = self._get_fd_unknowns() abs_pnames = self._sysdata.to_abs_pnames # Use settings in the system dict unless variables override. step_size = self.fd_options.get('step_size', 1.0e-6) form = self.fd_options.get('form', 'forward') step_type = self.fd_options.get('step_type', 'relative') jac = {} cache2 = None # Prepare for calculating partial derivatives or total derivatives if total_derivs: run_model = self.solve_nonlinear resultvec = unknowns states = () else: run_model = self.apply_nonlinear resultvec = resids states = self.states cache1 = resultvec.vec.copy() gather_jac = False fd_count = -1 # if doing parallel FD, we need to save results during calculation # and then pass them around. fd_cols stores the # column data keyed by (uname, pname, col_id). fd_cols = {} to_prom_name = self._sysdata.to_prom_name # Compute gradient for this param or state. for p_name in chain(fd_params, states): # If our input is connected to a IndepVarComp, then we need to twiddle # the unknowns vector instead of the params vector. src = self.connections.get(p_name) if src is not None: param_src = src[0] # just the name # Have to convert to promoted name to key into unknowns if param_src not in self.unknowns: param_src = to_prom_name[param_src] target_input = unknowns._dat[param_src].val else: # Cases where the IndepVarComp is somewhere above us. if p_name in states: inputs = unknowns else: inputs = params target_input = inputs._dat[p_name].val param_src = None mydict = {} # since p_name is a promoted name, it could refer to multiple # params. We've checked earlier to make sure that step_size, # step_type, and form are not defined differently for each # matching param. If they differ, a warning has already been issued. if p_name in abs_pnames: mydict = self._params_dict[abs_pnames[p_name][0]] # Local settings for this var trump all fdstep = mydict.get('step_size', step_size) fdtype = mydict.get('step_type', step_type) fdform = mydict.get('form', form) # Size our Inputs if poi_indices and param_src in poi_indices: p_idxs = poi_indices[param_src] p_size = len(p_idxs) else: p_size = np.size(target_input) p_idxs = range(p_size) # Size our Outputs for u_name in chain(fd_unknowns, pass_unknowns): if qoi_indices and u_name in qoi_indices: u_size = len(qoi_indices[u_name]) else: u_size = np.size(unknowns[u_name]) jac[u_name, p_name] = np.zeros((u_size, p_size)) # if a given param isn't present in this process, we need # to still run the model once for each entry in that param # in order to stay in sync with the other processes. if p_size == 0: gather_jac = True p_idxs = range(self._params_dict[p_name]['size']) # Finite Difference each index in array for col, idx in enumerate(p_idxs): fd_count += 1 # skip the current index if its done by some other # parallel fd proc if fd_count % self._num_par_fds == self._par_fd_id: if p_size == 0: run_model(params, unknowns, resids) continue # Relative or Absolute step size if fdtype == 'relative': step = target_input[idx] * fdstep if step < fdstep: step = fdstep else: step = fdstep if fdform == 'forward': target_input[idx] += step run_model(params, unknowns, resids) target_input[idx] -= step # delta resid is delta unknown resultvec.vec[:] -= cache1 resultvec.vec[:] *= (1.0/step) elif fdform == 'backward': target_input[idx] -= step run_model(params, unknowns, resids) target_input[idx] += step # delta resid is delta unknown resultvec.vec[:] -= cache1 resultvec.vec[:] *= (-1.0/step) elif fdform == 'central': target_input[idx] += step run_model(params, unknowns, resids) cache2 = resultvec.vec.copy() target_input[idx] -= step resultvec.vec[:] = cache1 target_input[idx] -= step run_model(params, unknowns, resids) # central difference formula resultvec.vec[:] -= cache2 resultvec.vec[:] *= (-0.5/step) target_input[idx] += step for u_name in fd_unknowns: if qoi_indices and u_name in qoi_indices: result = resultvec._dat[u_name].val[qoi_indices[u_name]] else: result = resultvec._dat[u_name].val jac[u_name, p_name][:, col] = result if self._num_par_fds > 1: # pragma: no cover fd_cols[(u_name, p_name, col)] = \ jac[u_name, p_name][:, col] # When an unknown is a parameter, it isn't calculated, so # we manually fill in identity by placing a 1 wherever it # is needed. for u_name in pass_unknowns: if u_name == param_src: if qoi_indices and u_name in qoi_indices: q_idxs = qoi_indices[u_name] if idx in q_idxs: row = qoi_indices[u_name].index(idx) jac[u_name, p_name][row][col] = 1.0 else: jac[u_name, p_name] = np.array([[1.0]]) # Restore old residual resultvec.vec[:] = cache1 if self._num_par_fds > 1: if trace: # pragma: no cover debug("%s: allgathering parallel FD columns" % self.pathname) jacinfos = self._full_comm.allgather(fd_cols) for rank, jacinfo in enumerate(jacinfos): if rank == self._full_comm.rank: continue for key, val in iteritems(jacinfo): if key not in fd_cols: uname, pname, col = key jac[uname, pname][:, col] = val fd_cols[(uname, pname, col)] = val # to avoid setting dups elif MPI and gather_jac: jac = self.get_combined_jac(jac) return jac
def get_combined_jac(self, J): """ Take a J dict that's distributed, i.e., has different values across different MPI processes, and return a dict that contains all of the values from all of the processes. If values are duplicated, use the value from the lowest rank process. Note that J has a nested dict structure. Args ---- J : `dict` Local Jacobian Returns ------- `dict` Local gathered Jacobian """ if not self.is_active(): return J comm = self.comm iproc = comm.rank # TODO: calculate dist_need_tups and dist_has_tups once # and cache it instead of doing every time. need_tups = [] has_tups = [] # Gather a list of local tuples for J. for (output, param), value in iteritems(J): if value.size == 0: need_tups.append((output, param)) else: has_tups.append((output, param)) if trace: # pragma: no cover debug("%s: allgather of needed tups" % self.pathname) dist_need_tups = comm.allgather(need_tups) needed_set = set() for need_tups in dist_need_tups: needed_set.update(need_tups) if not needed_set: return J # nobody needs any J entries if trace: # pragma: no cover debug("%s: allgather of has_tups" % self.pathname) dist_has_tups = comm.allgather(has_tups) found = set() owned_vals = [] for rank, tups in enumerate(dist_has_tups): for tup in tups: if tup in needed_set and not tup in found: found.add(tup) if rank == iproc: owned_vals.append((tup, J[tup])) if trace: # pragma: no cover debug("%s: allgather of owned vals" % self.pathname) dist_vals = comm.allgather(owned_vals) for rank, vals in enumerate(dist_vals): if rank != iproc: for (output, param), value in vals: J[output, param] = value return J
comm = self._full_comm job_list = None if comm.rank == 0: debug('Parallel DOE using %d procs' % self._num_par_doe) run_list = [ list(case) for case in self._deserialize_or_create_runlist() ] # need to run iterator run_sizes, run_offsets = evenly_distrib_idxs( self._num_par_doe, len(run_list)) job_list = [ run_list[o:o + s] for o, s in zip(run_offsets, run_sizes) ] run_list = comm.scatter(job_list, root=0) debug('Number of DOE jobs: %s' % len(run_list)) for case in run_list: yield case def _deserialize_or_create_runlist(self): runlist = None if self.use_restart: runlist = RestartRecorder.deserialize_runlist(self.original_dir) if not runlist: runlist = [list(run) for run in self._build_runlist()] if self.use_restart: RestartRecorder.serialize_runlist(self.original_dir, runlist, self._num_par_doe) return runlist
def fd_jacobian(self, params, unknowns, resids, total_derivs=False, fd_params=None, fd_unknowns=None, poi_indices=None, qoi_indices=None): """Finite difference across all unknowns in this system w.r.t. all incoming params. Args ---- params : `VecWrapper` `VecWrapper` containing parameters. (p) unknowns : `VecWrapper` `VecWrapper` containing outputs and states. (u) resids : `VecWrapper` `VecWrapper` containing residuals. (r) total_derivs : bool, optional Set to true to calculate total derivatives. Otherwise, partial derivatives are returned. fd_params : list of strings, optional List of parameter name strings with respect to which derivatives are desired. This is used by problem to limit the derivatives that are taken. fd_unknowns : list of strings, optional List of output or state name strings for derivatives to be calculated. This is used by problem to limit the derivatives that are taken. poi_indices: dict of list of integers, optional This is a dict that contains the index values for each parameter of interest, so that we only finite difference those indices. qoi_indices: dict of list of integers, optional This is a dict that contains the index values for each quantity of interest, so that the finite difference is returned only for those indices. Returns ------- dict Dictionary whose keys are tuples of the form ('unknown', 'param') and whose values are ndarrays containing the derivative for that tuple pair. """ # Params and Unknowns that we provide at this level. if fd_params is None: fd_params = self._get_fd_params() if fd_unknowns is None: fd_unknowns = self._get_fd_unknowns() abs_pnames = self._sysdata.to_abs_pnames # Use settings in the system dict unless variables override. step_size = self.fd_options.get('step_size', 1.0e-6) form = self.fd_options.get('form', 'forward') step_type = self.fd_options.get('step_type', 'relative') jac = {} cache2 = None # Prepare for calculating partial derivatives or total derivatives if total_derivs: run_model = self.solve_nonlinear resultvec = unknowns states = () else: run_model = self.apply_nonlinear resultvec = resids states = self.states cache1 = resultvec.vec.copy() gather_jac = False fd_count = -1 # if doing parallel FD, we need to save results during calculation # and then pass them around. fd_cols stores the # column data keyed by (uname, pname, col_id). fd_cols = {} to_prom_name = self._sysdata.to_prom_name # Compute gradient for this param or state. for p_name in chain(fd_params, states): # If our input is connected to a IndepVarComp, then we need to twiddle # the unknowns vector instead of the params vector. src = self.connections.get(p_name) if src is not None: param_src = src[0] # just the name # Have to convert to promoted name to key into unknowns if param_src not in self.unknowns: param_src = to_prom_name[param_src] target_input = unknowns._dat[param_src].val else: # Cases where the IndepVarComp is somewhere above us. if p_name in states: inputs = unknowns else: inputs = params target_input = inputs._dat[p_name].val mydict = {} # since p_name is a promoted name, it could refer to multiple # params. We've checked earlier to make sure that step_size, # step_type, and form are not defined differently for each # matching param. If they differ, a warning has already been issued. if p_name in abs_pnames: mydict = self._params_dict[abs_pnames[p_name][0]] # Local settings for this var trump all fdstep = mydict.get('step_size', step_size) fdtype = mydict.get('step_type', step_type) fdform = mydict.get('form', form) # Size our Inputs if poi_indices and param_src in poi_indices: p_idxs = poi_indices[param_src] p_size = len(p_idxs) else: p_size = np.size(target_input) p_idxs = range(p_size) # Size our Outputs for u_name in fd_unknowns: if qoi_indices and u_name in qoi_indices: u_size = len(qoi_indices[u_name]) else: u_size = np.size(unknowns[u_name]) jac[u_name, p_name] = np.zeros((u_size, p_size)) # if a given param isn't present in this process, we need # to still run the model once for each entry in that param # in order to stay in sync with the other processes. if p_size == 0: gather_jac = True p_idxs = range(self._params_dict[p_name]['size']) # Finite Difference each index in array for col, idx in enumerate(p_idxs): fd_count += 1 # skip the current index if its done by some other # parallel fd proc if fd_count % self._num_par_fds == self._par_fd_id: if p_size == 0: run_model(params, unknowns, resids) continue # Relative or Absolute step size if fdtype == 'relative': step = target_input[idx] * fdstep if step < fdstep: step = fdstep else: step = fdstep if fdform == 'forward': target_input[idx] += step run_model(params, unknowns, resids) target_input[idx] -= step # delta resid is delta unknown resultvec.vec[:] -= cache1 resultvec.vec[:] *= (1.0 / step) elif fdform == 'backward': target_input[idx] -= step run_model(params, unknowns, resids) target_input[idx] += step # delta resid is delta unknown resultvec.vec[:] -= cache1 resultvec.vec[:] *= (-1.0 / step) elif fdform == 'central': target_input[idx] += step run_model(params, unknowns, resids) cache2 = resultvec.vec.copy() target_input[idx] -= step resultvec.vec[:] = cache1 target_input[idx] -= step run_model(params, unknowns, resids) # central difference formula resultvec.vec[:] -= cache2 resultvec.vec[:] *= (-0.5 / step) target_input[idx] += step for u_name in fd_unknowns: if qoi_indices and u_name in qoi_indices: result = resultvec._dat[u_name].val[ qoi_indices[u_name]] else: result = resultvec._dat[u_name].val jac[u_name, p_name][:, col] = result if self._num_par_fds > 1: # pragma: no cover fd_cols[(u_name, p_name, col)] = \ jac[u_name, p_name][:, col] # Restore old residual resultvec.vec[:] = cache1 if self._num_par_fds > 1: if trace: # pragma: no cover debug("%s: allgathering parallel FD columns" % self.pathname) jacinfos = self._full_comm.allgather(fd_cols) for rank, jacinfo in enumerate(jacinfos): if rank == self._full_comm.rank: continue for key, val in iteritems(jacinfo): if key not in fd_cols: uname, pname, col = key jac[uname, pname][:, col] = val fd_cols[(uname, pname, col)] = val # to avoid setting dups elif MPI and gather_jac: jac = self.get_combined_jac(jac) return jac
def record_iteration(self, root, metadata, dummy=False): """ Gathers variables for non-parallel case recorders and calls record for all recorders. Args ---- root : `System` System containing variables. metadata : dict Metadata for iteration coordinate dummy : bool, optional If True, this is a dummy iteration, so no data will be colllected from the model, but collective gather call will still be made. """ if not self._recorders: return metadata['timestamp'] = time.time() params = root.params unknowns = root.unknowns resids = root.resids if MPI: if dummy and self._casecomm is not None: case = (None, None, None, None) if trace: debug("DUMMY gathering cases") cases = self._casecomm.gather(case, root=0) if trace: debug("DUMMY done gathering cases:") return pnames = self._vars_to_record['pnames'] unames = self._vars_to_record['unames'] rnames = self._vars_to_record['rnames'] # get names and values of all locally owned variables params = {p: params[p] for p in pnames} unknowns = {u: unknowns[u] for u in unames} resids = {r: resids[r] for r in rnames} if self._has_serial_recorders: params = self._gather_vars(root, params) if self._record_p else {} unknowns = self._gather_vars(root, unknowns) if self._record_u else {} resids = self._gather_vars(root, resids) if self._record_r else {} if self._casecomm is not None: # our parent driver is running a parallel DOE, so we need to # gather all of the cases to this rank and loop over them case = (params, unknowns, resids, metadata) if trace: debug("gathering cases") cases = self._casecomm.gather(case, root=0) if trace: debug("done gathering cases") if cases is None: cases = [] else: cases = [(params, unknowns, resids, metadata)] else: cases = [(params, unknowns, resids, metadata)] # If the recorder does not support parallel recording # we need to make sure we only record on rank 0. for params, unknowns, resids, meta in cases: if params is None: # dummy cases have None in place of params, etc. continue for recorder in self._recorders: if recorder._parallel or MPI is None or self.rank == 0: recorder.record_iteration(params, unknowns, resids, meta)
def setup(self, system): """ Setup petsc problem just once. Args ---- system : `System` Parent `System` object. """ if not system.is_active(): return # allocate and cache the ksp problem for each voi for voi in system.dumat: sizes = system._local_unknown_sizes[voi] lsize = np.sum(sizes[system.comm.rank, :]) size = np.sum(sizes) if trace: debug("creating petsc matrix of size (%d,%d)" % (lsize, size)) jac_mat = PETSc.Mat().createPython([(lsize, size), (lsize, size)], comm=system.comm) if trace: debug("petsc matrix creation DONE for %s" % voi) jac_mat.setPythonContext(self) jac_mat.setUp() if trace: # pragma: no cover debug("creating KSP object for system", system.pathname) ksp = self.ksp[voi] = PETSc.KSP().create(comm=system.comm) if trace: debug("KSP creation DONE") ksp.setOperators(jac_mat) ksp.setType(self.options['ksp_type']) ksp.setGMRESRestart(1000) ksp.setPCSide(PETSc.PC.Side.RIGHT) ksp.setMonitor(Monitor(self)) if trace: # pragma: no cover debug("ksp.getPC()") debug("rhs_buf, sol_buf size: %d" % lsize) pc_mat = ksp.getPC() pc_mat.setType('python') pc_mat.setPythonContext(self) if trace: # pragma: no cover debug("ksp setup done") if self.preconditioner: self.preconditioner.setup(system)
def transfer(self, srcvec, tgtvec, mode='fwd', deriv=False): """Performs data transfer between a distributed source vector and a distributed target vector. Args ---- srcvec : `VecWrapper` Variables that are the source of the transfer in fwd mode and the destination of the transfer in rev mode. tgtvec : `VecWrapper` Variables that are the destination of the transfer in fwd mode and the source of the transfer in rev mode. mode : 'fwd' or 'rev', optional Direction of the data transfer, source to target ('fwd', the default) or target to source ('rev'). deriv : bool, optional If True, this is a derivative data transfer, so no pass_by_obj variables will be transferred. """ if mode == 'rev': # in reverse mode, srcvec and tgtvec are switched. Note, we only # run in reverse for derivatives, and derivatives accumulate from # all targets. This does not involve pass_by_object. if trace: # pragma: no cover conns = ['%s <-- %s' % (u, v) for v, u in self.vec_conns] debug("%s rev scatter %s %s <-- %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) self.scatter.scatter(tgtvec.petsc_vec, srcvec.petsc_vec, True, True) if trace: # pragma: no cover debug("%s: tgtvec = %s (DONE)" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) else: # forward mode, source to target including pass_by_object if trace: # pragma: no cover conns = ['%s --> %s' % (u, v) for v, u in self.vec_conns] debug("%s fwd scatter %s %s --> %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) self.scatter.scatter(srcvec.petsc_vec, tgtvec.petsc_vec, False, False) if tgtvec._probdata.in_complex_step: self.scatter.scatter(srcvec.imag_petsc_vec, tgtvec.imag_petsc_vec, False, False) if trace: # pragma: no cover debug("%s: tgtvec = %s (DONE)" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) if not deriv and self.byobj_conns: comm = self.sysdata.comm iproc = comm.rank mylocals = self.sysdata.all_locals[iproc] for itag, (tgt, src) in enumerate(self.byobj_conns): # if we're the owning rank of the src, send it out to # systems that don't have it locally. if iproc == self.sysdata.owning_ranks[src]: # grab local value val = srcvec[src] for i, localvars in enumerate(self.sysdata.all_locals): if i != iproc and src not in localvars and tgt in localvars: if trace: debug("sending %s" % val) comm.send(val, dest=i, tag=itag) if trace: debug("DONE sending %s" % val) # ensure that all src values have been sent before we receive # any in order to avoid possible race conditions if trace: debug("waiting on comm.barrier") comm.barrier() if trace: debug("comm.barrier DONE") for itag, (tgt, src) in enumerate(self.byobj_conns): # if we don't have the value locally, pull it across using MPI if tgt in mylocals: if src in mylocals: if isinstance(tgtvec[tgt], FileRef): tgtvec[tgt]._assign_to(srcvec[src]) else: tgtvec[tgt] = srcvec[src] else: if trace: debug("receiving to %s" % tgtvec[tgt]) val = comm.recv( source=self.sysdata.owning_ranks[src], tag=itag) if trace: debug("received %s" % val) if isinstance(tgtvec[tgt], FileRef): tgtvec[tgt]._assign_to(val) else: tgtvec[tgt] = val
def _setup_variables(self, compute_indices=False): """ Returns copies of our params and unknowns dictionaries, re-keyed to use absolute variable names. Args ---- compute_indices : bool, optional If True, call setup_distrib() to set values of 'src_indices' metadata. """ to_prom_name = self._sysdata.to_prom_name = {} to_abs_uname = self._sysdata.to_abs_uname = {} to_abs_pnames = self._sysdata.to_abs_pnames = OrderedDict() to_prom_uname = self._sysdata.to_prom_uname = OrderedDict() to_prom_pname = self._sysdata.to_prom_pname = OrderedDict() if MPI and compute_indices and self.is_active(): if hasattr(self, 'setup_distrib_idxs'): warnings.simplefilter('always', DeprecationWarning) warnings.warn( "setup_distrib_idxs is deprecated, use setup_distrib instead.", DeprecationWarning, stacklevel=2) warnings.simplefilter('ignore', DeprecationWarning) self.setup_distrib_idxs() else: self.setup_distrib() # now update our distrib_size metadata for any distributed # unknowns sizes = [] names = [] for name, meta in iteritems(self._init_unknowns_dict): if 'src_indices' in meta: sizes.append(len(meta['src_indices'])) names.append(name) if sizes: if trace: # pragma: no cover debug("allgathering src index sizes:") allsizes = np.zeros((self.comm.size, len(sizes)), dtype=int) self.comm.Allgather(np.array(sizes, dtype=int), allsizes) for i, name in enumerate(names): self._init_unknowns_dict[name]['distrib_size'] = np.sum( allsizes[:, i]) # key with absolute path names and add promoted names self._params_dict = OrderedDict() for name, meta in iteritems(self._init_params_dict): pathname = self._get_var_pathname(name) self._params_dict[pathname] = meta meta['pathname'] = pathname to_prom_pname[pathname] = name to_abs_pnames[name] = (pathname, ) self._unknowns_dict = OrderedDict() for name, meta in iteritems(self._init_unknowns_dict): pathname = self._get_var_pathname(name) self._unknowns_dict[pathname] = meta meta['pathname'] = pathname to_prom_uname[pathname] = name to_abs_uname[name] = pathname to_prom_name.update(to_prom_uname) to_prom_name.update(to_prom_pname) self._post_setup_vars = True self._sysdata._params_dict = self._params_dict self._sysdata._unknowns_dict = self._unknowns_dict return self._params_dict, self._unknowns_dict
def transfer(self, srcvec, tgtvec, mode='fwd', deriv=False): """Performs data transfer between a distributed source vector and a distributed target vector. Args ---- srcvec : `VecWrapper` Variables that are the source of the transfer in fwd mode and the destination of the transfer in rev mode. tgtvec : `VecWrapper` Variables that are the destination of the transfer in fwd mode and the source of the transfer in rev mode. mode : 'fwd' or 'rev', optional Direction of the data transfer, source to target ('fwd', the default) or target to source ('rev'). deriv : bool, optional If True, this is a derivative data transfer, so no pass_by_obj variables will be transferred. """ if mode == 'rev': # in reverse mode, srcvec and tgtvec are switched. Note, we only # run in reverse for derivatives, and derivatives accumulate from # all targets. This does not involve pass_by_object. if trace: # pragma: no cover conns = ['%s <-- %s' % (u, v) for v, u in self.vec_conns] debug("%s rev scatter %s %s <-- %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) self.scatter.scatter(tgtvec.petsc_vec, srcvec.petsc_vec, True, True) if trace: # pragma: no cover debug("%s: tgtvec = %s" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) else: # forward mode, source to target including pass_by_object if trace: # pragma: no cover conns = ['%s --> %s' % (u, v) for v, u in self.vec_conns] debug("%s fwd scatter %s %s --> %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) self.scatter.scatter(srcvec.petsc_vec, tgtvec.petsc_vec, False, False) if trace: # pragma: no cover debug("%s: tgtvec = %s" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) if not deriv: for tgt, src in self.byobj_conns: raise NotImplementedError("can't transfer '%s' to '%s'" % (src, tgt))
def transfer(self, srcvec, tgtvec, mode='fwd', deriv=False): """Performs data transfer between a distributed source vector and a distributed target vector. Args ---- srcvec : `VecWrapper` Variables that are the source of the transfer in fwd mode and the destination of the transfer in rev mode. tgtvec : `VecWrapper` Variables that are the destination of the transfer in fwd mode and the source of the transfer in rev mode. mode : 'fwd' or 'rev', optional Direction of the data transfer, source to target ('fwd', the default) or target to source ('rev'). deriv : bool, optional If True, this is a derivative data transfer, so no pass_by_obj variables will be transferred. """ if mode == 'rev': # in reverse mode, srcvec and tgtvec are switched. Note, we only # run in reverse for derivatives, and derivatives accumulate from # all targets. This does not involve pass_by_object. if trace: conns = ['%s <-- %s' % (u, v) for v, u in self.vec_conns] debug("%s rev scatter %s %s <-- %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) self.scatter.scatter(tgtvec.petsc_vec, srcvec.petsc_vec, True, True) if trace: debug("%s: tgtvec = %s" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) else: # forward mode, source to target including pass_by_object if trace: conns = ['%s --> %s' % (u, v) for v, u in self.vec_conns] debug("%s fwd scatter %s %s --> %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) self.scatter.scatter(srcvec.petsc_vec, tgtvec.petsc_vec, False, False) if trace: debug("%s: tgtvec = %s" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) if not deriv: for tgt, src in self.byobj_conns: raise NotImplementedError("can't transfer '%s' to '%s'" % (src, tgt))
def _setup_communicators(self, comm, parent_dir): """ Assign a communicator to the root `System`. Args ---- comm : an MPI communicator (real or fake) The communicator being offered by the Problem. parent_dir : str Absolute dir of parent `System`. """ root = self.root if not MPI or self._num_par_doe <= 1: self._num_par_doe = 1 self._load_balance = False self._full_comm = comm # figure out which parallel DOE we are associated with if self._num_par_doe > 1: minprocs, maxprocs = root.get_req_procs() if self._load_balance: sizes, offsets = evenly_distrib_idxs(self._num_par_doe - 1, comm.size - 1) sizes = [1] + list(sizes) offsets = [0] + [o + 1 for o in offsets] else: sizes, offsets = evenly_distrib_idxs(self._num_par_doe, comm.size) # a 'color' is assigned to each subsystem, with # an entry for each processor it will be given # e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] color = [] self._id_map = {} for i in range(self._num_par_doe): color.extend([i] * sizes[i]) self._id_map[i] = (sizes[i], offsets[i]) self._par_doe_id = color[comm.rank] if self._load_balance: self._casecomm = None else: casecolor = [] for i in range(self._num_par_doe): if sizes[i] > 0: casecolor.append(1) casecolor.extend([MPI.UNDEFINED] * (sizes[i] - 1)) # we need a comm that has all the 0 ranks of the subcomms so # we can gather multiple cases run as part of parallel DOE. if trace: debug('%s: splitting casecomm, doe_id=%s' % ('.'.join( (root.pathname, 'driver')), self._par_doe_id)) self._casecomm = comm.Split(casecolor[comm.rank]) if trace: debug('%s: casecomm split done' % '.'.join( (root.pathname, 'driver'))) if self._casecomm == MPI.COMM_NULL: self._casecomm = None # create a sub-communicator for each color and # get the one assigned to our color/process if trace: debug('%s: splitting comm, doe_id=%s' % ('.'.join( (root.pathname, 'driver')), self._par_doe_id)) comm = comm.Split(self._par_doe_id) if trace: debug('%s: comm split done' % '.'.join( (root.pathname, 'driver'))) else: self._casecomm = None # tell RecordingManager it needs to do a multicase gather self.recorders._casecomm = self._casecomm root._setup_communicators(comm, parent_dir)
def solve(self, rhs_mat, system, mode): """ Solves the linear system for the problem in self.system. The full solution vector is returned. Args ---- rhs_mat : dict of ndarray Dictionary containing one ndarry per top level quantity of interest. Each array contains the right-hand side for the linear solve. system : `System` Parent `System` object. mode : string Derivative mode, can be 'fwd' or 'rev'. Returns ------- dict of ndarray : Solution vectors """ options = self.options self.mode = mode self.ksp.setTolerances(max_it=options['maxiter'], atol=options['atol'], rtol=options['rtol']) unknowns_mat = OrderedDict() for voi, rhs in iteritems(rhs_mat): sol_vec = np.zeros(rhs.shape) # Set these in the system if trace: # pragma: no cover debug("creating sol_buf petsc vec for voi", voi) self.sol_buf_petsc = PETSc.Vec().createWithArray(sol_vec, comm=system.comm) if trace: # pragma: no cover debug("sol_buf creation DONE") debug("creating rhs_buf petsc vec for voi", voi) self.rhs_buf_petsc = PETSc.Vec().createWithArray(rhs, comm=system.comm) if trace: debug("rhs_buf creation DONE") # Petsc can only handle one right-hand-side at a time for now self.voi = voi self.system = system self.iter_count = 0 self.ksp.solve(self.rhs_buf_petsc, self.sol_buf_petsc) self.system = None if self.options['iprint'] > 0: if self.iter_count == self.options['maxiter']: msg = 'FAILED to converge after hitting max iterations' else: msg = 'Converged' self.print_norm(self.print_name, system.pathname, self.iter_count, 0, 0, msg=msg, solver='LN') unknowns_mat[voi] = sol_vec #print system.name, 'Linear solution vec', d_unknowns self.system = None return unknowns_mat
def transfer(self, srcvec, tgtvec, mode='fwd', deriv=False): """Performs data transfer between a distributed source vector and a distributed target vector. Args ---- srcvec : `VecWrapper` Variables that are the source of the transfer in fwd mode and the destination of the transfer in rev mode. tgtvec : `VecWrapper` Variables that are the destination of the transfer in fwd mode and the source of the transfer in rev mode. mode : 'fwd' or 'rev', optional Direction of the data transfer, source to target ('fwd', the default) or target to source ('rev'). deriv : bool, optional If True, this is a derivative data transfer, so no pass_by_obj variables will be transferred. """ if mode == 'rev': # in reverse mode, srcvec and tgtvec are switched. Note, we only # run in reverse for derivatives, and derivatives accumulate from # all targets. This does not involve pass_by_object. if trace: # pragma: no cover conns = ['%s <-- %s' % (u, v) for v, u in self.vec_conns] debug("%s rev scatter %s %s <-- %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) self.scatter.scatter(tgtvec.petsc_vec, srcvec.petsc_vec, True, True) if trace: # pragma: no cover debug("%s: tgtvec = %s (DONE)" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) else: # forward mode, source to target including pass_by_object if trace: # pragma: no cover conns = ['%s --> %s' % (u, v) for v, u in self.vec_conns] debug("%s fwd scatter %s %s --> %s" % (srcvec._sysdata.pathname, conns, self.src_idxs, self.tgt_idxs)) debug("%s: srcvec = %s" % (srcvec._sysdata.pathname, srcvec.petsc_vec.array)) self.scatter.scatter(srcvec.petsc_vec, tgtvec.petsc_vec, False, False) if tgtvec._probdata.in_complex_step is True: self.scatter.scatter(srcvec.imag_petsc_vec, tgtvec.imag_petsc_vec, False, False) if trace: # pragma: no cover debug("%s: tgtvec = %s (DONE)" % (tgtvec._sysdata.pathname, tgtvec.petsc_vec.array)) if not deriv: comm = self.sysdata.comm iproc = comm.rank mylocals = self.sysdata.all_locals[iproc] for itag, (tgt, src) in enumerate(self.byobj_conns): # if we're the owning rank of the src, send it out to # systems that don't have it locally. if iproc == self.sysdata.owning_ranks[src]: # grab local value val = srcvec[src] for i, localvars in enumerate(self.sysdata.all_locals): if i != iproc and src not in localvars and tgt in localvars: if trace: debug("sending %s" % val) comm.send(val, dest=i, tag=itag) if trace: debug("DONE sending %s" % val) # ensure that all src values have been sent before we receive # any in order to avoid possible race conditions comm.barrier() for itag, (tgt, src) in enumerate(self.byobj_conns): # if we don't have the value locally, pull it across using MPI if tgt in mylocals: if src in mylocals: if isinstance(tgtvec[tgt], FileRef): tgtvec[tgt]._assign_to(srcvec[src]) else: tgtvec[tgt] = srcvec[src] else: if trace: debug("receiving to %s" % tgtvec[tgt]) val = comm.recv(source=self.sysdata.owning_ranks[src], tag=itag) if trace: debug("received %s" % val) if isinstance(tgtvec[tgt], FileRef): tgtvec[tgt]._assign_to(val) else: tgtvec[tgt] = val
def solve(self, rhs_mat, system, mode): """ Solves the linear system for the problem in self.system. The full solution vector is returned. Args ---- rhs_mat : dict of ndarray Dictionary containing one ndarry per top level quantity of interest. Each array contains the right-hand side for the linear solve. system : `System` Parent `System` object. mode : string Derivative mode, can be 'fwd' or 'rev'. Returns ------- dict of ndarray : Solution vectors """ options = self.options self.mode = mode self.ksp.setTolerances(max_it=options['maxiter'], atol=options['atol'], rtol=options['rtol']) unknowns_mat = {} for voi, rhs in iteritems(rhs_mat): sol_vec = np.zeros(rhs.shape) # Set these in the system if trace: # pragma: no cover debug("creating sol_buf petsc vec for voi", voi) self.sol_buf_petsc = PETSc.Vec().createWithArray(sol_vec, comm=system.comm) if trace: # pragma: no cover debug("creating rhs_buf petsc vec for voi", voi) self.rhs_buf_petsc = PETSc.Vec().createWithArray(rhs, comm=system.comm) # Petsc can only handle one right-hand-side at a time for now self.voi = voi self.system = system self.iter_count = 0 self.ksp.solve(self.rhs_buf_petsc, self.sol_buf_petsc) self.system = None if self.options['iprint'] > 0: if self.iter_count == self.options['maxiter']: msg = 'FAILED to converge after hitting max iterations' else: msg = 'Converged' self.print_norm('KSP', system.pathname, self.iter_count, 0, 0, msg=msg, solver='LN') unknowns_mat[voi] = sol_vec #print system.name, 'Linear solution vec', d_unknowns self.system = None return unknowns_mat
def solve(self, rhs_mat, system, mode): """ Solves the linear system for the problem in self.system. The full solution vector is returned. Args ---- rhs_mat : dict of ndarray Dictionary containing one ndarry per top level quantity of interest. Each array contains the right-hand side for the linear solve. system : `System` Parent `System` object. mode : string Derivative mode, can be 'fwd' or 'rev'. Returns ------- dict of ndarray : Solution vectors """ options = self.options self.mode = mode unknowns_mat = OrderedDict() maxiter = options['maxiter'] atol = options['atol'] rtol = options['rtol'] iprint = self.options['iprint'] for voi, rhs in iteritems(rhs_mat): ksp = self.ksp[voi] ksp.setTolerances(max_it=maxiter, atol=atol, rtol=rtol) sol_vec = np.zeros(rhs.shape) # Set these in the system if trace: # pragma: no cover debug("creating sol_buf petsc vec for voi", voi) self.sol_buf_petsc = PETSc.Vec().createWithArray(sol_vec, comm=system.comm) if trace: # pragma: no cover debug("sol_buf creation DONE") debug("creating rhs_buf petsc vec for voi", voi) self.rhs_buf_petsc = PETSc.Vec().createWithArray(rhs, comm=system.comm) if trace: debug("rhs_buf creation DONE") # Petsc can only handle one right-hand-side at a time for now self.voi = voi self.system = system self.iter_count = 0 ksp.solve(self.rhs_buf_petsc, self.sol_buf_petsc) self.system = None # Final residual print if you only want the last one if iprint == 1: mon = ksp.getMonitor()[0][0] self.print_norm(self.print_name, system, self.iter_count, mon._norm, mon._norm0, indent=1, solver='LN') if self.iter_count >= maxiter: msg = 'FAILED to converge in %d iterations' % self.iter_count fail = True else: msg = 'Converged in %d iterations' % self.iter_count fail = False if iprint > 0 or (fail and iprint > -1 ): self.print_norm(self.print_name, system,self.iter_count, 0, 0, msg=msg, indent=1, solver='LN') unknowns_mat[voi] = sol_vec if fail and self.options['err_on_maxiter']: raise AnalysisError("Solve in '%s': PetscKSP %s" % (system.pathname, msg)) #print system.name, 'Linear solution vec', d_unknowns self.system = None return unknowns_mat
def _distrib_lb_build_runlist(self): """ Runs a load balanced version of the runlist, with the master rank (0) sending a new case to each worker rank as soon as it has finished its last case. """ comm = self._full_comm if self._full_comm.rank == 0: # master rank runiter = self._build_runlist() received = 0 sent = 0 # cases left for each par doe cases = {n:{'count': 0, 'terminate': 0, 'p':{}, 'u':{}, 'r':{}, 'meta':{'success': 1, 'msg': ''}} for n in self._id_map} # create a mapping of ranks to doe_ids, to handle those cases # where a single DOE is executed across multiple processes, i.e., # for each process, we need to know which case it's working on. doe_ids = {} for doe_id, tup in self._id_map.items(): size, offset = tup for i in range(size): doe_ids[i+offset] = doe_id # seed the workers for i in range(1, self._num_par_doe): try: # case is a generator, so must make a list to send case = list(next(runiter)) except StopIteration: break size, offset = self._id_map[i] # send the case to all of the subprocs that will work on it for j in range(size): if trace: # pragma: no cover debug('Sending Seed case %d, %d' % (i, j)) comm.send(case, j+offset, tag=1) if trace: # pragma: no cover debug('Seed Case Sent %d, %d' % (i, j)) cases[i]['count'] += 1 sent += 1 # send the rest of the cases if sent > 0: more_cases = True while True: if trace: # pragma: no cover debug("Waiting on case") worker, p, u, r, meta = comm.recv(tag=2) if trace: # pragma: no cover debug("Case Recieved from Worker %d" % worker ) received += 1 caseinfo = cases[doe_ids[worker]] caseinfo['count'] -= 1 caseinfo['p'].update(p) caseinfo['u'].update(u) caseinfo['r'].update(r) # save certain parts of existing metadata so we don't hide failures oldmeta = caseinfo['meta'] success = oldmeta['success'] if not success: msg = oldmeta['msg'] oldmeta.update(meta) oldmeta['success'] = success oldmeta['msg'] = msg else: oldmeta.update(meta) caseinfo['terminate'] += meta.get('terminate', 0) if caseinfo['count'] == 0: # we've received case from all procs with that doe_id # so the case is complete. # worker has experienced some critical error, so we'll # stop sending new cases and start to wrap things up if caseinfo['terminate'] > 0: more_cases = False print("Worker %d has requested termination. No more new " "cases will be distributed. Worker traceback was:\n%s" % (worker, meta['msg'])) else: # Send case to recorders yield caseinfo if more_cases: try: case = list(next(runiter)) except StopIteration: more_cases = False else: # send a new case to every proc that works on # cases with the current worker doe = doe_ids[worker] size, offset = self._id_map[doe] cases[doe]['terminate'] = 0 cases[doe]['meta'] = {'success': 1, 'msg': ''} for j in range(size): if trace: # pragma: no cover debug("Sending New Case to Worker %d" % worker ) comm.send(case, j+offset, tag=1) if trace: # pragma: no cover debug("Case Sent to Worker %d" % worker ) cases[doe]['count'] += 1 sent += 1 # don't stop until we hear back from every worker process # we sent a case to if received == sent: break # tell all workers to stop for rank in range(1, self._full_comm.size): if trace: # pragma: no cover debug("Make Worker Stop on Rank %d" % rank ) comm.send(None, rank, tag=1) if trace: # pragma: no cover debug("Worker has Stopped on Rank %d" % rank ) else: # worker while True: # wait on a case from the master if trace: debug("Receiving Case from Master") # pragma: no cover case = comm.recv(source=0, tag=1) if trace: debug("Case Received from Master") # pragma: no cover if case is None: # we're done break # yield the case so it can be executed yield case # get local vars from RecordingManager params, unknowns, resids = self.recorders._get_local_case_data(self.root) # tell the master we're done with that case and send local vars if trace: debug("Send Master Local Vars") # pragma: no cover comm.send((comm.rank, params, unknowns, resids, self._last_meta), 0, tag=2) if trace: debug("Local Vars Sent to Master") # pragma: no cover
def get_combined_jac(self, J): """ Take a J dict that's distributed, i.e., has different values across different MPI processes, and return a dict that contains all of the values from all of the processes. If values are duplicated, use the value from the lowest rank process. Note that J has a nested dict structure. Args ---- J : `dict` Local Jacobian Returns ------- `dict` Local gathered Jacobian """ if not self.is_active(): return J comm = self.comm iproc = comm.rank # TODO: calculate dist_need_tups and dist_has_tups once # and cache it instead of doing every time. need_tups = [] has_tups = [] # Gather a list of local tuples for J. for (output, param), value in iteritems(J): if value.size == 0: need_tups.append((output, param)) else: has_tups.append((output, param)) if trace: # pragma: no cover debug("%s: allgather of needed tups" % self.pathname) dist_need_tups = comm.allgather(need_tups) needed_set = set() for need_tups in dist_need_tups: needed_set.update(need_tups) if not needed_set: return J # nobody needs any J entries if trace: # pragma: no cover debug("%s: allgather of has_tups" % self.pathname) dist_has_tups = comm.allgather(has_tups) found = set() owned_vals = [] for rank, tups in enumerate(dist_has_tups): for tup in tups: if tup in needed_set and not tup in found: found.add(tup) if rank == iproc: owned_vals.append((tup, J[tup])) if trace: # pragma: no cover debug("%s: allgather of owned vals" % self.pathname) dist_vals = comm.allgather(owned_vals) for rank, vals in enumerate(dist_vals): if rank != iproc: for (output, param), value in vals: J[output, param] = value return J
def _setup_communicators(self, comm, parent_dir): """ Assign a communicator to the root `System`. Args ---- comm : an MPI communicator (real or fake) The communicator being offered by the Problem. parent_dir : str Absolute dir of parent `System`. """ root = self.root if self._num_par_doe <= 1: self._num_par_doe = 1 self._load_balance = False self._full_comm = comm # figure out which parallel DOE we are associated with if MPI and self._num_par_doe > 1: minprocs, maxprocs = root.get_req_procs() if self._load_balance: sizes, offsets = evenly_distrib_idxs(self._num_par_doe-1, comm.size-1) sizes = [1]+list(sizes) offsets = [0]+[o+1 for o in offsets] else: sizes, offsets = evenly_distrib_idxs(self._num_par_doe, comm.size) # a 'color' is assigned to each subsystem, with # an entry for each processor it will be given # e.g. [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3] color = [] self._id_map = {} for i in range(self._num_par_doe): color.extend([i]*sizes[i]) self._id_map[i] = (sizes[i], offsets[i]) self._par_doe_id = color[comm.rank] if self._load_balance: self._casecomm = None else: casecolor = [] for i in range(self._num_par_doe): if sizes[i] > 0: casecolor.append(1) casecolor.extend([MPI.UNDEFINED]*(sizes[i]-1)) # we need a comm that has all the 0 ranks of the subcomms so # we can gather multiple cases run as part of parallel DOE. if trace: # pragma: no cover debug('%s: splitting casecomm, doe_id=%s' % ('.'.join((root.pathname, 'driver')), self._par_doe_id)) self._casecomm = comm.Split(casecolor[comm.rank]) if trace: # pragma: no cover debug('%s: casecomm split done' % '.'.join((root.pathname, 'driver'))) if self._casecomm == MPI.COMM_NULL: self._casecomm = None # create a sub-communicator for each color and # get the one assigned to our color/process if trace: # pragma: no cover debug('%s: splitting comm, doe_id=%s' % ('.'.join((root.pathname, 'driver')), self._par_doe_id)) comm = comm.Split(self._par_doe_id) if trace: # pragma: no cover debug('%s: comm split done' % '.'.join((root.pathname, 'driver'))) else: self._casecomm = None # tell RecordingManager it needs to do a multicase gather self.recorders._casecomm = self._casecomm root._setup_communicators(comm, parent_dir)
def _setup_variables(self, compute_indices=False): """ Returns copies of our params and unknowns dictionaries, re-keyed to use absolute variable names. Args ---- compute_indices : bool, optional If True, call setup_distrib() to set values of 'src_indices' metadata. """ to_prom_name = self._sysdata.to_prom_name = {} to_abs_uname = self._sysdata.to_abs_uname = {} to_abs_pnames = self._sysdata.to_abs_pnames = OrderedDict() to_prom_uname = self._sysdata.to_prom_uname = OrderedDict() to_prom_pname = self._sysdata.to_prom_pname = OrderedDict() if MPI and compute_indices and self.is_active(): if hasattr(self, 'setup_distrib_idxs'): warnings.simplefilter('always', DeprecationWarning) warnings.warn("setup_distrib_idxs is deprecated, use setup_distrib instead.", DeprecationWarning,stacklevel=2) warnings.simplefilter('ignore', DeprecationWarning) self.setup_distrib_idxs() else: self.setup_distrib() # now update our distrib_size metadata for any distributed # unknowns sizes = [] names = [] for name, meta in iteritems(self._init_unknowns_dict): if 'src_indices' in meta: sizes.append(len(meta['src_indices'])) names.append(name) if sizes: if trace: # pragma: no cover debug("allgathering src index sizes:") allsizes = np.zeros((self.comm.size, len(sizes)), dtype=int) self.comm.Allgather(np.array(sizes, dtype=int), allsizes) for i, name in enumerate(names): self._init_unknowns_dict[name]['distrib_size'] = np.sum(allsizes[:, i]) # key with absolute path names and add promoted names self._params_dict = OrderedDict() for name, meta in iteritems(self._init_params_dict): pathname = self._get_var_pathname(name) self._params_dict [pathname] = meta meta['pathname'] = pathname to_prom_pname[pathname] = name to_abs_pnames[name] = (pathname,) self._unknowns_dict = OrderedDict() for name, meta in iteritems(self._init_unknowns_dict): pathname = self._get_var_pathname(name) self._unknowns_dict[pathname] = meta meta['pathname'] = pathname to_prom_uname[pathname] = name to_abs_uname[name] = pathname to_prom_name.update(to_prom_uname) to_prom_name.update(to_prom_pname) self._post_setup_vars = True self._sysdata._params_dict = self._params_dict self._sysdata._unknowns_dict = self._unknowns_dict return self._params_dict, self._unknowns_dict
size, offset = self._id_map[i] # send the case to all of the subprocs that will work on it for j in range(size): if trace: debug('Sending Seed case %d, %d' % (i, j)) comm.send(case, j + offset, tag=1) if trace: debug('Seed Case Sent %d, %d' % (i, j)) cases[i]['count'] += 1 sent += 1 # send the rest of the cases if sent > 0: more_cases = True while True: if trace: debug("Waiting on case") worker, p, u, r, meta = comm.recv(tag=2) if trace: debug("Case Recieved from Worker %d" % worker) received += 1 caseinfo = cases[doe_ids[worker]] caseinfo['count'] -= 1 caseinfo['p'].update(p) caseinfo['u'].update(u) caseinfo['r'].update(r) # save certain parts of existing metadata so we don't hide failures oldmeta = caseinfo['meta'] success = oldmeta['success'] if not success: