def _gather_shared_element(mesh, mode, shared_info, ielem, kelem, attrs, nverts, base, ivert, skip_adding=False): ld, md = shared_info if mode == 'face': imode = 2 elif mode == 'edge': imode = 1 else: imode = 0 # me_list = [[] for i in range(nprc)] mea_list = [[] for i in range(nprc)] for key in ld.keys(): mid, g_in_master = key if mid != myid: for le, me in zip(ld[key][imode], md[key][imode]): iii = np.where(ielem == le)[0] if len(iii) != 0: if not skip_adding: kelem[iii] = False me_list[mid].append(me) mea_list[mid].extend(list(attrs[iii])) assert len( iii ) < 2, "same iface (pls report this error to developer) ???" new_ivert = np.array([], dtype=ivert.dtype) for i in range(nprc): mev = gather_vector(np.atleast_1d(me_list[i]).astype(int), root=i) mea = gather_vector(np.atleast_1d(mea_list[i]).astype(int), root=i) if i == myid: check = np.in1d(mev, ielem, invert=True) missing, mii = np.unique(mev[check], return_index=True) missinga = mea[check][mii] if len(missing) != 0: print("adding (face)", myid, missing) print(len(missing), len(missinga), missinga) if not skip_adding: nverts, base = _add_face_data(mesh, missing, nverts, base) attrs = np.hstack((attrs, missinga)).astype(attrs.dtype) kelem = np.hstack((kelem, [True] * len(missing))) if not skip_adding: new_ivert = new_ivert.astype(ivert.dtype) new_ivert = allgather_vector(new_ivert) #ivert = np.hstack((ivert, new_ivert)) attrs = allgather_vector(attrs) base = allgather_vector(base) nverts = allgather_vector(nverts) kelem = allgather_vector(kelem) return kelem, attrs, nverts, base, ivert
def solve_parallel(self, A, b, x=None): if self.gui.write_mat: self.write_mat(A, b, x, "." + smyid) sol = [] # solve the problem and gather solution to head node... # may not be the best approach from petram.helper.mpi_recipes import gather_vector offset = A.RowOffsets() for bb in b: rows = MPI.COMM_WORLD.allgather(np.int32(bb.Size())) #rowstarts = np.hstack((0, np.cumsum(rows))) dprint1("row offset", offset.ToList()) if x is None: xx = mfem.BlockVector(offset) xx.Assign(0.0) else: xx = x if self.gui.use_ls_reducer: try: self.reducer.Mult(bb, xx, self.gui.assert_no_convergence) except debug.ConvergenceError: self.gui.set_solve_error( (True, "No Convergence: " + self.gui.name())) assert False, "No convergence" else: self.call_mult(self.solver, bb, xx) s = [] for i in range(offset.Size() - 1): v = xx.GetBlock(i).GetDataArray() if self.gui.merge_real_imag: w = int(len(v) // 2) vv1 = gather_vector(v[:w]) vv2 = gather_vector(v[w:]) vv = np.hstack((vv1, vv2)) else: vv = gather_vector(v) if myid == 0: s.append(vv) else: pass if myid == 0: sol.append(np.hstack(s)) if myid == 0: sol = np.transpose(np.vstack(sol)) return sol else: return None
def Mult(self, x, y): if use_parallel: from petram.helper.mpi_recipes import (gather_vector, scatter_vector, allgather_vector) from mpi4py import MPI else: from petram.helper.dummy_mpi import MPI myid = MPI.COMM_WORLD.rank nproc = MPI.COMM_WORLD.size if self.is_complex_operator: vec = x.GetDataArray() vec = self.real_to_complex(vec) else: vec = x.GetDataArray() if self.row_offset == -1: xx = np.atleast_2d(vec).transpose() else: if self.gui.use_dist_rhs: xx = np.atleast_2d(vec).transpose() else: xx = gather_vector(vec) if myid == 0: xx = np.atleast_2d(xx).transpose() # if myid == 0: # print("xx shape (at node-0)", xx.shape) s = [solver.Mult(xx) for solver in self.solver] if self.row_offset != -1: # if myid == 0: #w = [0.8*np.exp(1j*77/180*np.pi), np.exp(-1j*60/180*np.pi)*1.2] w = [1] * len(s) s = [xx.flatten() * w[i] for i, xx in enumerate(s)] s = np.mean(s, 0) # else: # s = None distributed_sol = use_parallel and nproc > 1 if not distributed_sol: size = np.sum(self.all_block_size, 0)[myid] s = scatter_vector(s, rcounts=size) if self.is_complex_operator: s = self.complex_to_real(s) else: s = [xx.flatten() for xx in s] s = np.mean(s, 0) if self.is_complex_operator: s = self.complex_to_real(s) y.Assign(s.flatten().astype(float, copy=False))
def Mult(self, x, y): try: from mpi4py import MPI except BaseException: from petram.helper.dummy_mpi import MPI myid = MPI.COMM_WORLD.rank nproc = MPI.COMM_WORLD.size if self.is_complex_operator: vec = x.GetDataArray() vec = self.real_to_complex(vec) else: vec = x.GetDataArray() if self.row_offset == -1: xx = np.atleast_2d(vec).transpose() else: from mpi4py import MPI comm = MPI.COMM_WORLD from petram.helper.mpi_recipes import gather_vector xx = gather_vector(vec) if myid == 0: xx = np.atleast_2d(xx).transpose() s = [solver.Mult(xx) for solver in self.solver] if self.row_offset != -1: from mpi4py import MPI comm = MPI.COMM_WORLD nprc = MPI.COMM_WORLD.size myid = MPI.COMM_WORLD.rank if myid == 0: #w = [0.8*np.exp(1j*77/180*np.pi), np.exp(-1j*60/180*np.pi)*1.2] w = [1] * len(s) s = [xx.flatten() * w[i] for i, xx in enumerate(s)] s = np.mean(s, 0) else: s = None size = np.sum(self.all_block_size, 0)[myid] s = scatter_vector(s, rcounts=size) if self.is_complex_operator: s = self.complex_to_real(s) else: s = [xx.flatten() for xx in s] s = np.mean(s, 0) if self.is_complex_operator: s = self.complex_to_real(s) y.Assign(s.flatten())
def Mult(self, x, y): # in the parallel enviroment, we need to collect x and # redistribute y # we keep RowPart array from opr since here y is # vector not ParVector even in the parallel env. if use_parallel: from mpi4py import MPI else: from petram.helper.dummy_mpi import MPI myid = MPI.COMM_WORLD.rank nproc = MPI.COMM_WORLD.size if self.is_complex_operator: vec = x.GetDataArray() ll = vec.size vec = vec[:ll // 2] + 1j * vec[ll // 2:] else: vec = x.GetDataArray() if self.row_part[0] == -1: xx = np.atleast_2d(vec).transpose() else: from mpi4py import MPI comm = MPI.COMM_WORLD from petram.helper.mpi_recipes import gather_vector if self.gui.use_dist_rhs: xx = np.atleast_2d(vec).transpose() else: xx = gather_vector(vec) if myid == 0: xx = np.atleast_2d(xx).transpose() # if myid == 0: # print("xx shape (at node-0)", xx.shape) s = self.solver.Mult(xx) if self.row_part[0] != -1: distributed_sol = use_parallel and nproc > 1 if not distributed_sol: comm = MPI.COMM_WORLD s = comm.bcast(s) s = s[self.row_part[0]:self.row_part[1]] if self.is_complex_operator: s = np.hstack((s.real.flatten(), s.imag.flatten())) #nicePrint(s.shape, y.Size()) y.Assign(s.flatten().astype(float, copy=False))
def Mult(self, b, x=None, case_base=0): self._solver.Mult(b[0], x) if use_parallel: from mpi4py import MPI from petram.helper.mpi_recipes import (gather_vector, allgather_vector) myid = MPI.COMM_WORLD.rank xx = gather_vector(x.GetDataArray()) if myid == 0: xx = np.atleast_2d(xx).transpose() else: xx = x.GetDataArray().copy().reshape(-1, 1) return xx
def _gather_shared_vertex(mesh, u, shared_info, *iverts): # u_own, iv1, iv2... = gather_shared_vertex(mesh, u, ld, md, iv1, iv2...) # u_own : unique vertex id ownd by a process # shared_info : shared data infomation # iv1, iv2, ...: array of vertex is after overwriting shadow vertex # to a real one, which is owned by other process # process shared vertex # 1) a vertex in sub-volume may be shadow # 2) the real one may not a part of sub-volume on the master node # 3) we always use the real vertex # 1) shadow vertex index is over-written to a real vertex # 2) make sure that the real one is added to sub-volume mesh obj. offset = np.hstack([0, np.cumsum(allgather(mesh.GetNV()))]) iverts = [iv + offset[myid] for iv in iverts] u = u + offset[myid] # -> global numbering ld, md = shared_info mv_list = [[] for i in range(nprc)] for key in ld.keys(): mid, g_in_master = key if mid != myid: for lv, mv in zip(ld[key][0], md[key][0]): ic = 0 for iv in iverts: iii = np.where(iv == lv)[0] ic = ic + len(iii) if len(iii) > 0: iv[iii] = mv if ic > 0: mv_list[mid].append(mv) u = u[np.in1d(u, ld[key][0], invert=True)] for i in range(nprc): mvv = gather_vector(np.atleast_1d(mv_list[i]).astype(int), root=i) if i == myid: missing = np.unique(mvv[np.in1d(mvv, u, invert=True)]) if len(missing) != 0: print("adding (vertex)", missing) u = np.hstack((u, missing)) u_own = np.sort(u - offset[myid]) return [u_own] + list(iverts) ## u_own, iv1, iv2 =
def solve_parallel(self, A, b, x=None): if self.gui.write_mat: self. write_mat(A, b, x, "."+smyid) M = self.make_preconditioner(A, parallel=True) solver = self.make_solver(A, M, use_mpi=True) sol = [] # solve the problem and gather solution to head node... # may not be the best approach from petram.helper.mpi_recipes import gather_vector offset = A.RowOffsets() for bb in b: rows = MPI.COMM_WORLD.allgather(np.int32(bb.Size())) rowstarts = np.hstack((0, np.cumsum(rows))) dprint1("rowstarts/offser",rowstarts, offset.ToList()) if x is None: xx = mfem.BlockVector(offset) xx.Assign(0.0) else: xx = x #for j in range(cols): # dprint1(x.GetBlock(j).Size()) # dprint1(x.GetBlock(j).GetDataArray()) #assert False, "must implement this" self.call_mult(solver, bb, xx) s = [] for i in range(offset.Size()-1): v = xx.GetBlock(i).GetDataArray() vv = gather_vector(v) if myid == 0: s.append(vv) else: pass if myid == 0: sol.append(np.hstack(s)) if myid == 0: sol = np.transpose(np.vstack(sol)) return sol else: return None
def Mult(self, x, y): # in the parallel enviroment, we need to collect x and # redistribute y # we keep RowPart array from opr since here y is # vector not ParVector even in the parallel env. try: from mpi4py import MPI except BaseException: from petram.helper.dummy_mpi import MPI myid = MPI.COMM_WORLD.rank nproc = MPI.COMM_WORLD.size if self.is_complex_operator: vec = x.GetDataArray() ll = vec.size vec = vec[:ll // 2] + 1j * vec[ll // 2:] else: vec = x.GetDataArray() if self.row_part[0] == -1: xx = np.atleast_2d(vec).transpose() else: from mpi4py import MPI comm = MPI.COMM_WORLD from petram.helper.mpi_recipes import gather_vector xx = gather_vector(vec) if myid == 0: xx = np.atleast_2d(xx).transpose() s = self.solver.Mult(xx) if self.row_part[0] != -1: from mpi4py import MPI comm = MPI.COMM_WORLD s = comm.bcast(s) s = s[self.row_part[0]:self.row_part[1]] if self.is_complex_operator: s = np.hstack((s.real.flatten(), s.imag.flatten())) y.Assign(s.flatten())
def gather(self, nprc, root=None, distribute=False, overwrite=True): # we don't take nprc from MPI.COMM_WROLD (see a comment above) if nprc == 1: dest = self if overwrite else GlobalNamedList() if not overwrite: for key in self: dest[key] = np.array(self[key]).copy() return dest dest = self if overwrite else GlobalNamedList() for j, key in enumerate(self._gkey): if key in self: data = np.array(self[key], dtype=self.dtype, copy=False) else: data = np.atleast_1d([]).astype(self.dtype) r = 0 if root is None else root r = j % nprc if distribute else r data = gather_vector(data, root=r) if myid == r: dest[key] = data else: if key in dest: del dest[key] return dest
def find_corner(mesh): ''' For 2D geometry find line (boundary between two bdr_attribute) and corner of lines ''' use_parallel = hasattr(mesh, "GroupNVertices") if use_parallel: from mpi4py import MPI myid = MPI.COMM_WORLD.rank nprc = MPI.COMM_WORLD.size comm = MPI.COMM_WORLD from mfem.common.mpi_debug import nicePrint, niceCall from petram.helper.mpi_recipes import allgather, allgather_vector, gather_vector from petram.mesh.mesh_utils import distribute_shared_entity if not hasattr(mesh, "shared_info"): mesh.shared_info = distribute_shared_entity(mesh) else: myid = 0 nprc = 1 ndim = mesh.Dimension() sdim = mesh.SpaceDimension() ne = mesh.GetNEdges() assert ndim == 2, "find_edge_corner is for 3D mesh" get_edges = mesh.GetElementEdges get_attr = mesh.GetAttribute iattr = mesh.GetAttributeArray() # min of this array is 1 nattr = 0 if iattr.size == 0 else np.max(iattr) nb = mesh.GetNE() nbe = mesh.GetNBE() if use_parallel: nbe = sum(allgather(nbe)) if nbe == 0: return {}, {}, {} if use_parallel: offset = np.hstack([0, np.cumsum(allgather(mesh.GetNEdges()))]) offsetf = np.hstack([0, np.cumsum(allgather(mesh.GetNFaces()))]) offsetv = np.hstack([0, np.cumsum(allgather(mesh.GetNV()))]) myoffset = offset[myid] myoffsetf = offsetf[myid] myoffsetv = offsetv[myid] nattr = max(allgather(nattr)) ne = sum(allgather(mesh.GetNEdges())) else: myoffset = np.array(0, dtype=int) myoffsetf = np.array(0, dtype=int) myoffsetv = np.array(0, dtype=int) if mesh.GetNBE() == 0: # some parallel node may have zero boundary battrs = [] iedges = np.array([], dtype=int) else: battrs = mesh.GetBdrAttributeArray() iedges = np.hstack([ mesh.GetBdrElementEdgeIndex(ibdr) for ibdr in range(mesh.GetNBE()) ]).astype(int, copy=False) line2edge = GlobalNamedList() line2edge.setlists(battrs, iedges) if use_parallel: ld, md = mesh.shared_info iedges = iedges + myoffset if use_parallel: for key2 in ld: if key2[0] == myid: continue iii = np.in1d(iedges, ld[key2][1], invert=True) if len(iii) == 0: continue iedges = iedges[iii] battrs = battrs[iii] line2realedge = GlobalNamedList() line2realedge.setlists(battrs, iedges) line2realvert = GlobalNamedList() for key in line2realedge: data = np.hstack([ mesh.GetEdgeVertices(i - myoffset) + myoffsetv for i in line2realedge[key] ]) if use_parallel: for key2 in ld: if key2[0] == myid: continue for lv, mv in zip(ld[key2][0], md[key2][0]): iii = np.where(data == lv)[0] data[iii] = mv line2realvert[key] = data line2realvert.sharekeys().gather(nprc, distribute=True) corners = GlobalNamedList() for key in line2realvert: seen = defaultdict(int) for iiv in line2realvert[key]: seen[iiv] += 1 corners[key] = [kk for kk in seen if seen[kk] == 1] sorted_key = corners.sharekeys().globalkeys corners.allgather() u_own = np.unique( np.hstack([corners[key] for key in corners]).astype(int, copy=False)) if use_parallel: idx = np.logical_and(u_own >= offsetv[myid], u_own < offsetv[myid + 1]) u_own = u_own[idx] if len(u_own) > 0: vtx = np.hstack([mesh.GetVertexArray(i - myoffsetv) for i in u_own]) else: vtx = np.atleast_1d([]) if use_parallel: vtx = gather_vector(vtx) u_own = gather_vector(u_own) # sort vertex if myid == 0: vtx = vtx.reshape(-1, sdim) tmp = sorted([(k, tuple(x)) for k, x in enumerate(vtx)], key=lambda x: x[1]) if len(tmp) > 0: vtx = np.vstack([x[1] for x in tmp]) u_own = np.hstack([[u_own[x[0]] for x in tmp]]).astype(int) ivert = np.arange(len(vtx), dtype=int) + 1 else: u_own = np.atleast_1d([]).astype(int) ivert = np.atleast_1d([]).astype(int) if use_parallel: #if myid != 0: # u_own = None; vtx = None u_own = comm.bcast(u_own) ivert = np.arange(len(u_own), dtype=int) + 1 for key in ld: if key[0] == myid: continue for lv, mv in zip(ld[key][0], md[key][0]): iii = np.where(u_own == mv)[0] u_own[iii] = lv idx = np.logical_and(u_own >= offsetv[myid], u_own < offsetv[myid + 1]) u_own = u_own[idx] vtx = comm.bcast(vtx) vtx = comm.bcast(vtx)[idx.flatten()] ivert = ivert[idx] vert2vert = {iv: iu - myoffsetv for iv, iu in zip(ivert, u_own)} # mapping line index to vertex index (not MFFEM vertex id) line2vert = {} #nicePrint(corners) corners.bcast(nprc, distributed=True) for j, key in enumerate(sorted_key): data = corners[key] if use_parallel: for key2 in ld: if key2[0] == myid: continue for lv, mv in zip(ld[key2][0], md[key2][0]): iii = np.where(data == mv)[0] data[iii] = lv idx = np.logical_and(data >= offsetv[myid], data < offsetv[myid + 1]) data = data[idx] data = list(data - myoffsetv) line2vert[j + 1] = [k for k in vert2vert if vert2vert[k] in data] if debug: g = GlobalNamedList(line2vert) g.sharekeys() gg = g.gather(nprc, overwrite=False).unique() if myid == 0: print(gg) for i in range(nprc): if use_parallel: comm.barrier() if myid == i: for k in vert2vert: print(myid, k, mesh.GetVertexArray(vert2vert[k])) if use_parallel: comm.barrier() return line2vert, line2edge, vert2vert
def find_edge_corner(mesh): ''' For 3D geometry find line (boundary between two bdr_attribute) and corner of lines ''' use_parallel = hasattr(mesh, "GroupNVertices") if use_parallel: from mpi4py import MPI myid = MPI.COMM_WORLD.rank nprc = MPI.COMM_WORLD.size comm = MPI.COMM_WORLD from mfem.common.mpi_debug import nicePrint, niceCall from petram.helper.mpi_recipes import allgather, allgather_vector, gather_vector from petram.mesh.mesh_utils import distribute_shared_entity if not hasattr(mesh, "shared_info"): mesh.shared_info = distribute_shared_entity(mesh) else: myid = 0 nprc = 1 ndim = mesh.Dimension() sdim = mesh.SpaceDimension() ne = mesh.GetNEdges() assert ndim == 3, "find_edge_corner is for 3D mesh" # 3D mesh get_edges = mesh.GetBdrElementEdges get_attr = mesh.GetBdrAttribute iattr = mesh.GetBdrAttributeArray() # min of this array is 1 nattr = 0 if iattr.size == 0 else np.max(iattr) nb = mesh.GetNBE() if mesh.GetNBE() == 0 and nprc == 1: return {}, {}, {}, {} if use_parallel: offset = np.hstack([0, np.cumsum(allgather(mesh.GetNEdges()))]) offsetf = np.hstack([0, np.cumsum(allgather(mesh.GetNFaces()))]) offsetv = np.hstack([0, np.cumsum(allgather(mesh.GetNV()))]) myoffset = offset[myid] myoffsetf = offsetf[myid] myoffsetv = offsetv[myid] nattr = max(allgather(nattr)) ne = sum(allgather(mesh.GetNEdges())) else: myoffset = np.array(0, dtype=int) myoffsetf = np.array(0, dtype=int) myoffsetv = np.array(0, dtype=int) edges = defaultdict(list) iedges = np.arange(nb, dtype=int) if use_parallel: # eliminate slave faces from consideration iface = np.array([mesh.GetBdrElementEdgeIndex(i) for i in iedges], dtype=int) + myoffsetf mask = np.array([True] * len(iface), dtype=bool) ld, md = mesh.shared_info for key in ld.keys(): mid, g_in_master = key if mid == myid: continue iii = np.in1d(iedges, ld[key][2], invert=True) mask = np.logical_and(mask, iii) iedges = iedges[mask] # nicePrint(len(iedges)) np 1,2,4 gives 900... ok for i in iedges: ie, io = get_edges(i) ie += myoffset iattr = get_attr(i) edges[iattr].extend(list(ie)) if use_parallel: # collect edges using master edge number # and gather it to a node. edgesc = {} ld, md = mesh.shared_info for j in range(1, nattr + 1): if j in edges: data = np.array(edges[j], dtype=int) for key in ld.keys(): mid, g_in_master = key if mid == myid: continue for le, me in zip(ld[key][1], md[key][1]): iii = np.where(data == le)[0] data[iii] = me else: data = np.atleast_1d([]).astype(int) data = gather_vector(data, root=j % nprc) if data is not None: edgesc[j] = data edges = edgesc # for each iattr real edge appears only once for key in edges.keys(): seen = defaultdict(int) for x in edges[key]: seen[x] += 1 edges[key] = [k for k in seen if seen[k] == 1] #nicePrint('Num edges', nedge = sum([len(edges[k]) for k in edges]) if nedge != 0: N = np.hstack( [np.zeros(len(edges[k]), dtype=int) + k - 1 for k in edges.keys()]) M = np.hstack([np.array(edges[k]) for k in edges.keys()]) else: N = np.atleast_1d([]).astype(int) M = np.atleast_1d([]).astype(int) M = M.astype(int, copy=False) N = N.astype(int, copy=False) if use_parallel: # send attribute to owner of edges for j in range(nprc): idx = np.logical_and(M >= offset[j], M < offset[j + 1]) Mpart = M[idx] Npart = N[idx] Mpart = gather_vector(Mpart, root=j) Npart = gather_vector(Npart, root=j) if j == myid: M2, N2 = Mpart, Npart M, N = M2, N2 #nicePrint('unique edge', len(np.unique(M))) #nicePrint('N', len(N)) data = M * 0 + 1 table1 = coo_matrix((data, (M, N)), shape=(ne, nattr), dtype=int) csr = table1.tocsr() #embeded surface only touches to one iattr idx = np.where(np.diff(csr.indptr) >= 1)[0] csr = csr[idx, :] # this is true bdr edges. bb_edges = defaultdict(list) indptr = csr.indptr indices = csr.indices for i in range(csr.shape[0]): idxs = tuple(sorted(indices[indptr[i]:indptr[i + 1]] + 1)) bb_edges[idxs].append(idx[i]) bb_edges.default_factory = None # sort keys (= attribute set) keys = list(bb_edges) if use_parallel: keys = comm.gather(keys) if myid == 0: keys = sum(keys, []) sorted_key = None if myid == 0: sorted_key = list(set(keys)) sorted_key.sort(key=lambda x: (len(x), x)) if use_parallel: sorted_key = comm.bcast(sorted_key, root=0) bb_edgess = OrderedDict() for k in sorted_key: if k in bb_edges: bb_edgess[k] = bb_edges[k] else: bb_edgess[k] = [ ] # in parallel, put empty so that key order is kept bb_edges = bb_edgess ''' res = [] for key in sorted_key: tmp = allgather(len(bb_edges[key])) if myid == 0: res.append((key, sum(tmp))) if myid == 0: print res ''' # at this point each node has its own edges populated in bb_edges (no shadow) ivert = {} for k in sorted_key: if len(bb_edges[k]) > 0: ivert[k] = np.hstack([ mesh.GetEdgeVertices(i - myoffset) + myoffsetv for i in np.unique(bb_edges[k]) ]).astype(int) else: ivert[k] = np.atleast_1d([]).astype(int) if use_parallel: # convert shadow vertex to real for k in sorted_key: data = ivert[k] for key in ld: if key[0] == myid: continue for le, me in zip(ld[key][0], md[key][0]): iii = np.where(data == le)[0] data[iii] = me ivert[k] = data ivertc = {} for j, k in enumerate(sorted_key): data = gather_vector(ivert[k], root=j % nprc) if data is not None: ivertc[k] = data ivert = ivertc corners = {} for key in ivert: seen = defaultdict(int) for iiv in ivert[key]: seen[iiv] += 1 corners[key] = [kk for kk in seen if seen[kk] == 1] if len(corners) == 0: u = np.atleast_1d([]).astype(int) else: u = np.unique(np.hstack([corners[key] for key in corners])).astype(int, copy=False) # collect vertex on each node and gather to node 0 u_own = u if use_parallel: u = np.unique(allgather_vector(u)) u_own = u.copy() for key in ld: if key[0] == myid: continue for lv, mv in zip(ld[key][0], md[key][0]): iii = np.where(u == mv)[0] u[iii] = lv idx = np.logical_and(u >= offsetv[myid], u < offsetv[myid + 1]) u = u[idx] # u include shared vertex idx = np.logical_and(u_own >= offsetv[myid], u_own < offsetv[myid + 1]) u_own = u_own[idx] # u_own is only owned vertex #nicePrint('u_own',mesh.GetNV(),",", u_own) if len(u_own) > 0: vtx = np.vstack([mesh.GetVertexArray(i - myoffsetv) for i in u_own]) else: vtx = np.atleast_1d([]).reshape(-1, sdim) if use_parallel: u_own = gather_vector(u_own) vtx = gather_vector(vtx.flatten()) # sort vertex if myid == 0: vtx = vtx.reshape(-1, sdim) #print('vtx shape', vtx.shape) tmp = sorted([(k, tuple(x)) for k, x in enumerate(vtx)], key=lambda x: x[1]) if len(tmp) > 0: vtx = np.vstack([x[1] for x in tmp]) u_own = np.hstack([[u_own[x[0]] for x in tmp]]).astype(int) ivert = np.arange(len(vtx), dtype=int) + 1 else: vtx = np.atleast_1d([]).astype(float) u_own = np.atleast_1d([]).astype(int) u_own = np.atleast_1d([]).astype(int) if use_parallel: #if myid != 0: # u_own = None; vtx = None u_own = comm.bcast(u_own) ivert = np.arange(len(u_own), dtype=int) + 1 for key in ld: if key[0] == myid: continue for lv, mv in zip(ld[key][0], md[key][0]): iii = np.where(u_own == mv)[0] u_own[iii] = lv idx = np.logical_and(u_own >= offsetv[myid], u_own < offsetv[myid + 1]) u_own = u_own[idx] ivert = ivert[idx] #vtx = comm.bcast(vtx) #vtx = comm.bcast(vtx)[idx.flatten()] vert2vert = {iv: iu - myoffsetv for iv, iu in zip(ivert, u_own)} #nicePrint('vert2vert', vert2vert) # mapping line index to vertex index (not MFFEM vertex id) line2vert = {} #nicePrint(corners) for j, key in enumerate(sorted_key): data = corners[key] if key in corners else None if use_parallel: data = comm.bcast(data, root=j % nprc) data = np.array(data, dtype=int) for key2 in ld: if key2[0] == myid: continue for lv, mv in zip(ld[key2][0], md[key2][0]): iii = np.where(data == mv)[0] data[iii] = lv idx = np.logical_and(data >= offsetv[myid], data < offsetv[myid + 1]) data = data[idx] else: data = np.array(data, dtype=int) data = list(data - myoffsetv) line2vert[j + 1] = [k for k in vert2vert if vert2vert[k] in data] # finish-up edge data if use_parallel: # distribute edges, convert (add) from master to local # number for attr_set in bb_edges: data = sum(allgather(bb_edges[attr_set]), []) data = np.array(data, dtype=int) for key in ld: if key[0] == myid: continue for le, me in zip(ld[key][1], md[key][1]): iii = np.where(data == me)[0] data[iii] = le idx = np.logical_and(data >= offset[myid], data < offset[myid + 1]) data = data[idx] bb_edges[attr_set] = list(data - myoffset) attrs = list(edges) attrsa = np.unique(sum(allgather(attrs), [])) for a in attrsa: if a in attrs: data = np.array(edges[a], dtype=int) else: data = np.atleast_1d([]).astype(int) data = allgather_vector(data) for key in ld: if key[0] == myid: continue for le, me in zip(ld[key][1], md[key][1]): iii = np.where(data == me)[0] data[iii] = le idx = np.logical_and(data >= offset[myid], data < offset[myid + 1]) data = data[idx] edges[a] = list(data - myoffset) line2edge = {} for k, attr_set in enumerate(sorted_key): if attr_set in bb_edges: line2edge[k + 1] = bb_edges[attr_set] else: line2edge[k + 1] = [] ''' # debug find true (non-shadow) edges line2edge_true = {} for k, attr_set in enumerate(sorted_key): if attr_set in bb_edges: data = np.array(bb_edges[attr_set], dtype=int) for key in ld: if key[0] == myid: continue iii = np.in1d(data+myoffset, ld[key][1], invert = True) data = data[iii] line2edge_true[k+1] = data else: line2edge_true[k+1] = [] nicePrint([sum(allgather(len(line2edge_true[key]))) for key in line2edge]) ''' surf2line = {k + 1: [] for k in range(nattr)} for k, attr_set in enumerate(sorted_key): for a in attr_set: surf2line[a].append(k + 1) if debug: g = GlobalNamedList(line2vert) g.sharekeys() gg = g.gather(nprc, overwrite=False).unique() if myid == 0: print("debug (gathered line2vert)", gg) return surf2line, line2vert, line2edge, vert2vert
def solve_parallel(self, A, b, x=None): from mpi4py import MPI myid = MPI.COMM_WORLD.rank nproc = MPI.COMM_WORLD.size from petram.helper.mpi_recipes import gather_vector def get_block(Op, i, j): try: return Op._linked_op[(i, j)] except KeyError: return None offset = A.RowOffsets() rows = A.NumRowBlocks() cols = A.NumColBlocks() if self.gui.write_mat: for i in range(cols): for j in range(rows): m = get_block(A, i, j) if m is None: continue m.Print('matrix_' + str(i) + '_' + str(j)) for i, bb in enumerate(b): for j in range(rows): v = bb.GetBlock(j) v.Print('rhs_' + str(i) + '_' + str(j) + '.' + smyid) if x is not None: for j in range(rows): xx = x.GetBlock(j) xx.Print('x_' + str(i) + '_' + str(j) + '.' + smyid) M = mfem.BlockDiagonalPreconditioner(offset) prcs = dict(self.gui.preconditioners) name = self.Aname assert not self.gui.parent.is_complex(), "can not solve complex" if self.gui.parent.is_converted_from_complex(): name = sum([[n, n] for n in name], []) for k, n in enumerate(name): prc = prcs[n][1] if prc == "None": continue name = "".join([tmp for tmp in prc if not tmp.isdigit()]) A0 = get_block(A, k, k) if A0 is None and not name.startswith('schur'): continue if hasattr(mfem.HypreSmoother, prc): invA0 = mfem.HypreSmoother(A0) invA0.SetType(getattr(mfem.HypreSmoother, prc)) elif prc == 'ams': depvar = self.engine.r_dep_vars[k] dprint1("setting up AMS for ", depvar) prec_fespace = self.engine.fespaces[depvar] invA0 = mfem.HypreAMS(A0, prec_fespace) invA0.SetSingularProblem() elif name == 'MUMPS': cls = SparseSmootherCls[name][0] invA0 = cls(A0, gui=self.gui[prc], engine=self.engine) elif name.startswith('schur'): args = name.split("(")[-1].split(")")[0].split(",") dprint1("setting up schur for ", args) if len(args) > 1: assert False, "not yet supported" for arg in args: r1 = self.engine.dep_var_offset(arg.strip()) c1 = self.engine.r_dep_var_offset(arg.strip()) B = get_block(A, k, c1) Bt = get_block(A, r1, k).Transpose() Bt = Bt.Transpose() B0 = get_block(A, r1, c1) Md = mfem.HypreParVector(MPI.COMM_WORLD, B0.GetGlobalNumRows(), B0.GetColStarts()) B0.GetDiag(Md) Bt.InvScaleRows(Md) S = mfem.ParMult(B, Bt) invA0 = mfem.HypreBoomerAMG(S) invA0.iterative_mode = False else: cls = SparseSmootherCls[name][0] invA0 = cls(A0, gui=self.gui[prc]) invA0.iterative_mode = False M.SetDiagonalBlock(k, invA0) ''' We should support Shur complement type preconditioner if offset.Size() > 2: B = get_block(A, 1, 0) MinvBt = get_block(A, 0, 1) #Md = mfem.HypreParVector(MPI.COMM_WORLD, # A0.GetGlobalNumRows(), # A0.GetRowStarts()) Md = mfem.Vector() A0.GetDiag(Md) MinvBt.InvScaleRows(Md) S = mfem.ParMult(B, MinvBt) invS = mfem.HypreBoomerAMG(S) invS.iterative_mode = False M.SetDiagonalBlock(1, invS) ''' maxiter = int(self.maxiter) atol = self.abstol rtol = self.reltol kdim = int(self.kdim) printit = 1 sol = [] solver = mfem.GMRESSolver(MPI.COMM_WORLD) solver.SetKDim(kdim) #solver = mfem.MINRESSolver(MPI.COMM_WORLD) #solver.SetOperator(A) #solver = mfem.CGSolver(MPI.COMM_WORLD) solver.SetOperator(A) solver.SetAbsTol(atol) solver.SetRelTol(rtol) solver.SetMaxIter(maxiter) solver.SetPreconditioner(M) solver.SetPrintLevel(1) # solve the problem and gather solution to head node... # may not be the best approach for bb in b: rows = MPI.COMM_WORLD.allgather(np.int32(bb.Size())) rowstarts = np.hstack((0, np.cumsum(rows))) dprint1("rowstarts/offser", rowstarts, offset.ToList()) if x is None: xx = mfem.BlockVector(offset) xx.Assign(0.0) else: xx = x #for j in range(cols): # dprint1(x.GetBlock(j).Size()) # dprint1(x.GetBlock(j).GetDataArray()) #assert False, "must implement this" solver.Mult(bb, xx) s = [] for i in range(offset.Size() - 1): v = xx.GetBlock(i).GetDataArray() vv = gather_vector(v) if myid == 0: s.append(vv) else: pass if myid == 0: sol.append(np.hstack(s)) if myid == 0: sol = np.transpose(np.vstack(sol)) return sol else: return None