def make_diagonal_mat(engine, fes1, fes2, value): if fes1 == fes2: bf = engine.new_bf(fes1) bf.Assemble() bf.Finalize() mat = engine.a2A(bf) else: bf = engine.new_mixed_bf(fes1, fes2) #one = mfem.ConstantCoefficient(0.0001) #itg = mfem.MixedScalarMassIntegrator() #bf.AddDomainIntegrator(itg) bf.Assemble() mat = engine.a2Am(bf) if use_parallel: mat.CopyRowStarts() mat.CopyColStarts() from mfem.common.chypre import MfemMat2PyMat m1 = MfemMat2PyMat(mat, None) if not use_parallel: from petram.helper.block_matrix import convert_to_ScipyCoo m1 = convert_to_ScipyCoo(m1) shape = m1.shape assert shape[0]==shape[1], "Identity Operator must be square" idx = range(shape[0]) m1.setDiag(idx, value=value) return m1
def convert_mat_to_operator(self, mat): ''' a utility routine to convert locally assembled mat to linear operator ''' is_complex = np.iscomplexobj(mat) m_coo = mat.tocoo() row = m_coo.row col = m_coo.col col = np.unique(col) from scipy.sparse import coo_matrix, csr_matrix if use_parallel: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = self.fes1.GetMyTDofOffset() end_col = self.fes1.GetMyTDofOffset() + self.fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype))
def assemble(self, *args, **kwargs): engine = self._engine() self.process_kwargs(engine, kwargs) fes1 = self._fes1() fes2 = fes1 if self._fes2 is None else self._fes2() if fes1 == fes2: bf = engine.new_bf(fes1) #one = mfem.ConstantCoefficient(0.0001) #itg = mfem.MassIntegrator() #bf.AddDomainIntegrator(itg) bf.Assemble() bf.Finalize() mat = engine.a2A(bf) else: bf = engine.new_mixed_bf(fes1, fes2) #one = mfem.ConstantCoefficient(0.0001) #itg = mfem.MixedScalarMassIntegrator() #bf.AddDomainIntegrator(itg) bf.Assemble() mat = engine.a2Am(bf) if use_parallel: mat.CopyRowStarts() mat.CopyColStarts() from mfem.common.chypre import MfemMat2PyMat m1 = MfemMat2PyMat(mat, None) if not use_parallel: from petram.helper.block_matrix import convert_to_ScipyCoo m1 = convert_to_ScipyCoo(m1) shape = m1.shape assert shape[0]==shape[1], "Identity Operator must be square" idx = range(shape[0]) m1.setDiag(idx) return m1
def projection_matrix(idx1, idx2, fes, tdof1, fes2=None, tdof2=None, trans1=None, trans2 = None, dphase=0.0, weight = None, tol = 1e-7, mode = 'surface', filldiag=True): ''' map: destinatiom mapping smap: source mapping ''' fec_name = fes.FEColl().Name() if fec_name.startswith('ND') and mode == 'volume': mapper = map_volume_nd elif fec_name.startswith('ND') and mode == 'surface': mapper = map_surface_nd elif fec_name.startswith('ND') and mode == 'edge': mapper = map_edge_nd elif fec_name.startswith('H1') and mode == 'volume': mapper = map_volume_h1 elif fec_name.startswith('H1') and mode == 'surface': mapper = map_surface_h1 elif fec_name.startswith('H1') and mode == 'edge': mapper = map_edge_h1 elif fec_name.startswith('RT') and mode == 'volume': mapper = map_volume_rt elif fec_name.startswith('RT') and mode == 'surface': mapper = map_surface_rt else: raise NotImplementedError("mapping :" + fec_name + ", mode: " + mode) map = mapper(idx2, idx1, fes, fes2=fes2, trans1=trans1, trans2=trans2, tdof1=tdof1, tdof2=tdof2, tol=tol) if weight is None: iscomplex = False if (dphase == 0.): pass elif (dphase == 180.): map = -map else: iscomplex = True map = map.astype(complex) map *= np.exp(-1j*np.pi/180*dphase) else: iscomplex = np.iscomplexobj(weight) if iscomplex: map = map.astype(complex) if map.nnz > 0: map *= -weight m_coo = map.tocoo() row = m_coo.row col = m_coo.col col = np.unique(col) if use_parallel: start_row = fes.GetMyTDofOffset() end_row = fes.GetMyTDofOffset() + fes.GetTrueVSize() col = np.unique(allgather_vector(col)) row = row + start_row else: start_row = 0 end_row = map.shape[0] if filldiag: for i in range(min(map.shape[0], map.shape[1])): r = start_row+i if not r in col: map[i, r] = 1.0 from scipy.sparse import coo_matrix, csr_matrix if use_parallel: if iscomplex: m1 = csr_matrix(map.real, dtype=float) m2 = csr_matrix(map.imag, dtype=float) else: m1 = csr_matrix(map.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes2.GetMyTDofOffset() end_col = fes2.GetMyTDofOffset() + fes2.GetTrueVSize() col_starts = [start_col, end_col, map.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(map, dtype=map.dtype)) return M, row, col
def hcurln(fes1, fes2, coeff, is_complex=False, bdr='all', orderinc=1, verbose=False): mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) mat2, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) from petram.helper.element_map import map_element name_fes1 = fes1.FEColl().Name()[:2] name_fes2 = fes2.FEColl().Name()[:2] if verbose: if myid == 0: dprint1("fes", name_fes1, name_fes2) mesh1 = fes1.GetMesh() mesh2 = fes2.GetMesh() mesh2.Print("/home/shiraiwa/part.mesh") if verbose: if myid == 0: dprint1("NE", mesh1.GetNE(), mesh2.GetNE()) elmap, elmap_r = map_element(mesh1, mesh2, bdr, map_bdr=True) sdim1 = mesh1.SpaceDimension() sdim2 = mesh1.SpaceDimension() dim1 = mesh1.Dimension() dim2 = mesh2.Dimension() shape1 = mfem.DenseMatrix() shape2 = mfem.Vector() ip = mfem.IntegrationPoint() nor = mfem.Vector(sdim1) if USE_PARALLEL: # this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P1mat = ToScipyCoo(P).tocsr() #VDoFtoGTDoF1 = P.indices #P = fes2.Dof_TrueDof_Matrix() #P = ToScipyCoo(P).tocsr() #VDoFtoGTDoF2 = P.indices #P2mat = P vdofs1_senddata = [] shared_data = [] el2_2_node = {} el2_2_el1 = {} for d in elmap_r: for x in list(elmap_r[d]): el2_2_node[x] = d for x in list(elmap_r[d]): el2_2_el1[x] = elmap_r[d][x] # working for fes2 # find boundary element on mesh1 using mesh2 boundary el2_arr = [list() for x in range(nprc)] el1_arr = [list() for x in range(nprc)] fe2o_arr = [list() for x in range(nprc)] for i_el in range(fes2.GetNE()): attr = fes2.GetAttribute(i_el) if bdr != 'all' and not attr in bdr: continue el1_arr[el2_2_node[i_el]].append(el2_2_el1[i_el]) el2_arr[el2_2_node[i_el]].append(i_el) fe2 = fes2.GetFE(i_el) fe2o_arr[el2_2_node[i_el]].append(fe2.GetOrder()) if USE_PARALLEL: el1_arr = alltoall_vector(el1_arr, int) # transfer to mesh1 owners # working for fes1 # find elemet order on mesh1 fe1o_arr = [list() for x in range(nprc)] i_fe1_arr = [list() for x in range(nprc)] rank = 0 for rank, i_bdrs in enumerate(el1_arr): for i_bdr in i_bdrs: iface = mesh1.GetBdrElementEdgeIndex(i_bdr) transs = mesh1.GetFaceElementTransformations(iface) i_el1 = transs.Elem1No assert transs.Elem2No == -1, "boundary must be exterior for this operator" fe1 = fes1.GetFE(i_el1) fe1o_arr[rank].append(fe1.GetOrder()) i_fe1_arr[rank].append(i_el1) rank = rank + 1 if USE_PARALLEL: fe1o_arr = alltoall_vector(fe1o_arr, int) # transfer to mesh2 # working for fes2 locnor_arr = [list() for x in range(nprc)] data2_arr = [list() for x in range(nprc)] verbose1 = verbose for rank, i_el2s in enumerate(el2_arr): for i_el2, fe1o in zip(i_el2s, fe1o_arr[rank]): eltrans = fes2.GetElementTransformation(i_el2) fe2 = fes2.GetFE(i_el2) nd2 = fe2.GetDof() ir = get_rule(fe1o, fe2, eltrans, orderinc=orderinc, verbose=verbose1) verbose1 = False shape2.SetSize(nd2) data2 = [] locnors = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) w = eltrans.Weight() * ip1.weight mfem.CalcOrtho(eltrans.Jacobian(), nor) nor2 = nor.GetDataArray() / np.linalg.norm(nor.GetDataArray()) fe2.CalcShape(ip1, shape2) if dim2 == 1: d = np.array([ip1.x] + list(eltrans.Transform(ip1)) + list(nor2)) locnors.append(d) elif dim2 == 2: d = np.array([ip1.x, ip1.y] + list(eltrans.Transform(ip1)) + list(nor2)) locnors.append(d) else: assert False, "boundary mesh must be dim=1 or 2" data2.append(w * shape2.GetDataArray().copy()) # np.vstack(locnors).shape = (#Npoints, dim2+sdim2*2) # np.vstack(data2).shape = (#Npoints, #NDoF2) #print("size here", np.vstack(locnors).shape, np.vstack(data2).shape) locnor_arr[rank].append(np.vstack(locnors)) data2_arr[rank].append(np.vstack(data2)) if USE_PARALLEL: locnor_arr = alltoall_vectorv(locnor_arr, float) # transfer to mesh1 ll = dim2 + 2 * sdim2 vdofs1_arr = [list() for x in range(nprc)] data1_arr = [list() for x in range(nprc)] # space to compute the coefficient MV = [mfem.Vector(sdim1), mfem.DenseMatrix(sdim1, sdim1)] max_misalignment = -np.inf for rank, i_fe1s in enumerate(i_fe1_arr): locnorss = locnor_arr[rank] sign_dict = {} for k, i_fe1 in enumerate(i_fe1s): fe1 = fes1.GetFE(i_fe1) nd1 = fe1.GetDof() eltrans = fes1.GetElementTransformation(i_fe1) doftrans = fes1.GetElementDofTransformation(i_fe1) #ctr = eval_element_center(fe1, eltrans) locnors2 = locnorss[k] shape1.SetSize(nd1, sdim1) vdofs1 = fes1.GetElementVDofs(i_fe1) dof_sign1 = np.array([ [1 if vv >= 0 else -1 for vv in vdofs1], ]) vdofs1 = [-1 - x if x < 0 else x for x in vdofs1] mat_doftrans = get_inv_doftrans(doftrans, dof_sign1) if USE_PARALLEL: # After DofTransformation is introduced we can not use GetGlobalTDofNumber, because # element local DoF could be linked with two TrueDoFs in neighber processes # We construct submatrix of Prolongation to construct element matrix # in TrueDof space vv1 = [ P1mat.indices[P1mat.indptr[ii]:P1mat.indptr[ii + 1]] for ii in vdofs1 ] vv3 = [ P1mat.data[P1mat.indptr[ii]:P1mat.indptr[ii + 1]] for ii in vdofs1 ] ngtof = np.sum([len(x) for x in vv3]) sub_p = np.zeros((nd1, ngtof)) k1 = 0 k2 = 0 for gtofs, weights in zip(vv1, vv3): for g, w in zip(gtofs, weights): sub_p[k1, k2] = w k2 = k2 + 1 k1 = k1 + 1 vdofs1 = np.hstack(vv1).flatten() mat_doftrans = mat_doftrans.dot(sub_p) res, misalignment = map_ir(fe1, eltrans, coeff, shape1, dim2, sdim2, locnors2, dof_sign1, mat_doftrans, MV) vdofs1_arr[rank].append(np.array(vdofs1)) data1_arr[rank].append(res) max_misalignment = np.max([max_misalignment, np.max(misalignment)]) # res.shape = (#Npoints, #DoF1) if USE_PARALLEL: vdofs1_arr = alltoall_vectorv(vdofs1_arr, int) # transfer to mesh2 if is_complex: data1_arr = alltoall_vectorv(data1_arr, complex) # transfer to mesh2 else: data1_arr = alltoall_vectorv(data1_arr, float) # transfer to mesh2 max_misalignment = np.max( MPI.COMM_WORLD.gather(max_misalignment, root=0)) dprint1("Max misalignment: ", max_misalignment) shared_data = [] for rank, i_el2s in enumerate(el2_arr): for k, i_el2 in enumerate(i_el2s): vdofs1 = vdofs1_arr[rank][k] fe2 = fes2.GetFE(i_el2) eltrans2 = fes2.GetElementTransformation(i_el2) vdofs2 = fes2.GetElementVDofs(i_el2) vdofs2 = [-1 - x if x < 0 else x for x in vdofs2] d1 = data1_arr[rank][k] d2 = data2_arr[rank][k] mm = d2.transpose().dot(d1) if USE_PARALLEL: # prepare data for not-owned DoFs, which will be shared later vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [fes2.GetGlobalTDofNumber(ii) for ii in vdofs2] kkk = 0 for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1]) kkk = kkk + 1 else: vdofs22 = vdofs2 for i, ltdof2 in enumerate(vdofs22): if ltdof2 < 0: continue for j, gtdof1 in enumerate(vdofs1): mat[ltdof2, gtdof1] = mat[ltdof2, gtdof1] + mm[i, j] if USE_PARALLEL: #nicePrint("shared data", shared_data) for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset for j, gtdof1 in enumerate(vdofs1): mat[i, gtdof1] = mat[i, gtdof1] + elmat[j] #mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M
def dof_mapping_matrix(src, dst, fes, tdof, engine=None, dphase=0.0, map_to_u=def_map_u, map_to_v=def_map_v, smap_to_u=None, smap_to_v=None, tol=1e-7): ''' map: destinatiom mapping smap: source mapping ''' fec_name = fes.FEColl().Name() if fec_name.startswith('ND'): mapper = find_dof_map_nd elif fec_name.startswith('H1'): mapper = find_dof_map_h1 else: raise NotImplementedError("mapping for " + fec_name) if smap_to_u is None: smap_to_u = map_to_u if smap_to_v is None: smap_to_v = map_to_v map = mapper(src, dst, map_to_u, map_to_v, smap_to_u, smap_to_v, fes, engine, tdof, tol=tol) if (dphase == 0.): pass elif (dphase == 180.): map = -map else: map = map.astype(complex) map *= np.exp(-1j * np.pi / 180 * dphase) coo = coo_matrix(map) r = coo.row c = coo.col idx = range(map.shape[0]) map.setdiag(1.0) for k in c: map[k, k] = 0.0 if use_parallel: start_row = fes.GetMyTDofOffset() end_row = fes.GetMyTDofOffset() + fes.GetTrueVSize() c = allgather_vector(c) map = map.transpose() m1 = csr_matrix(map.real[start_row:end_row, :], dtype=float) m2 = csr_matrix(map.imag[start_row:end_row, :], dtype=float) m1.eliminate_zeros() m2.eliminate_zeros() from mfem.common.chypre import CHypreMat dprint1(m1.shape, m2.shape) M = CHypreMat(m1, m2).transpose() else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(map, dtype=map.dtype)) #idx = range(M.shape[0]) #M.setDiag(idx, 1.0) #M.setDiag(c, 0.0) return M, r, c
def convolve2d(fes1, fes2, kernel=delta, support=None, orderinc=5, is_complex=False, trial_domain='all', test_domain='all', verbose=False, coeff=None): ''' fill linear operator for convolution \int phi_test(x) func(x-x') phi_trial(x') dx Genralized version to multi-dim test/trial ScalarFE, ScalarFE : func is scalar VectorFE, ScalarFE : func is vector (vertical) ScalarFE, VectorFE : func is vector (horizontal) VectorFE, VectorFE : func matrix ''' mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) if fes1.GetNE() == 0: assert False, "FESpace does not have element" eltrans1 = fes1.GetElementTransformation(0) ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose) name_fes1 = fes1.FEColl().Name()[:2] name_fes2 = fes2.FEColl().Name()[:2] sdim = fes1.GetMesh().SpaceDimension() if name_fes1 in ['RT', 'ND']: shape1 = mfem.DenseMatrix() vdim1 = fes1.GetMesh().SpaceDimension() else: shape1 = mfem.Vector() vdim1 = 1 if name_fes2 in ['RT', 'ND']: shape2 = mfem.DenseMatrix() vdim2 = fes1.GetMesh().SpaceDimension() else: shape2 = mfem.Vector() vdim1 = 1 #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE()) # communication strategy # (1) x2 (ir points on test space) is collected in each nodes # (2) x2 is send to other nodes # (3) each nodes compute \int f(x2-x1) phi(x1) # (4) non-zero results of (3) and global index should be send back # Step (1, 2) if verbose: dprint1("Step 1,2") x2_arr = [] i2_arr = [] ptx = mfem.DenseMatrix(ir.GetNPoints(), sdim) attrs1 = fes2.GetMesh().GetAttributeArray() attrs2 = fes2.GetMesh().GetAttributeArray() for i in range(fes2.GetNE()): # scan test space if test_domain != 'all': if not attrs1[i] in test_domain: continue eltrans = fes2.GetElementTransformation(i) eltrans.Transform(ir, ptx) x2_arr.append(ptx.GetDataArray().copy().transpose()) i2_arr.append(i) if support is not None: supports = np.array([support(np.mean(xxx, 0)) for xxx in x2_arr]) else: supports = -np.ones(len(x2_arr)) if len(i2_arr) > 0: ptx_x2 = np.stack(x2_arr) i2_arr = np.hstack(i2_arr) else: ptx_x2 = np.array([[[]]]) i2_arr = np.array([]) #nicePrint("x2 shape", ptx_x2.shape) if USE_PARALLEL: ## note: we could implement more advanced alg. to reduce ## the amount of data exchange.. x2_all = comm.allgather(ptx_x2) i2_all = comm.allgather(i2_arr) s_all = comm.allgather(supports) else: x2_all = [ptx_x2] i2_all = [i2_arr] s_all = [supports] #nicePrint("x2_all shape", supports.shape, len(x2_all), [tmp.shape for tmp in x2_all]) if USE_PARALLEL: #this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF1 = P.indices P = fes2.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF2 = P.indices # Step 3 if verbose: dprint1("Step 3") vdofs1_senddata = [] elmats_senddata = [] for knode1 in range(len(x2_all)): #dprint1("new knode1", myid, knode1) x2_onenode = x2_all[knode1] i2_onenode = i2_all[knode1] s_onenode = s_all[knode1] elmats_all = [] vdofs1_all = [] # collect vdofs for j in range(fes1.GetNE()): local_vdofs = fes1.GetElementVDofs(j) local_vdofs = [vv if vv >= 0 else -1 - vv for vv in local_vdofs] if USE_PARALLEL: subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs] vdofs1_all.append(subvdofs2) else: vdofs1_all.append(local_vdofs) #if myid == 0: # pr = profile_start() for i, x2s, su in zip(i2_onenode, x2_onenode, s_onenode): # loop over fes2 nd2 = len(x2s) #nicePrint("x2s", i, x2s.shape, x2s) elmats = [] for j in range(fes1.GetNE()): if trial_domain != 'all': if not attrs1[j] in trial_domain: continue # collect integration fe1 = fes1.GetFE(j) nd1 = fe1.GetDof() eltrans = fes1.GetElementTransformation(j) dof_sign1 = np.array( [1 if vv >= 0 else -1 for vv in fes1.GetElementVDofs(j)]) if name_fes1 in ['RT', 'ND']: shape1.SetSize(nd1, vdim1) else: shape1.SetSize(nd1) elmat = np.zeros((nd2, vdim2, nd1), dtype=mat.dtype) tmp_int = np.zeros((vdim2, nd1), dtype=mat.dtype).squeeze() #if myid == 0: print("fes1 idx", j) dataset = [] shapes = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) x1 = eltrans.Transform(ip1) if name_fes1 in ['RT', 'ND']: fe1.CalcVShape(eltrans, shape1) else: fe1.CalcShape(ip1, shape1) w = eltrans.Weight() * ip1.weight ss = shape1.GetDataArray().copy() if len(ss.shape) > 1: #dof_sign1 = dof_sign1.reshape(-1, 1) ss = np.transpose(ss) ss = ss * dof_sign1 dataset.append((x1, w, ss)) has_contribution = False for kkk, x2 in enumerate(x2s): tmp_int *= 0.0 has_contribution2 = False for x1, w, shape_arr in dataset: s = np.sqrt(np.sum((x1 - x2)**2)) if su >= 0 and s > su: continue val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w) if val is None: continue if coeff is not None: val = val * coeff((x2 + x1) / 2.0) tmp_int += np.dot(val, shape_arr) * w has_contribution2 = True if has_contribution2: elmat[kkk, ...] = tmp_int has_contribution = True if has_contribution: elmats.append((j, elmat)) #if myid == 0: # pr.dump_stats("/home/shiraiwa/test.prf") # profile_stop(pr) # assert False, "hoge" # pr = profile_start() if len(elmats) > 0: elmats_all.append((i, elmats)) vdofs1_senddata.append(vdofs1_all) elmats_senddata.append(elmats_all) # send this information to knodes; ''' if USE_PARALLEL: #nicePrint(vdofs1_all) #nicePrint("elmats", [len(x) for x in elmats_all]) if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) else: vdofs1_data = [vdofs1_all,] elmats_data = [elmats_all,] ''' if USE_PARALLEL: knode1 = 0 for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata): if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) knode1 = knode1 + 1 else: vdofs1_data = vdofs1_senddata elmats_data = elmats_senddata # Step 4 if verbose: dprint1("Step 4") shared_data = [] mpi_rank = 0 for vdofs1, elmats_all in zip(vdofs1_data, elmats_data): # loop over MPI nodes #nicePrint("len elmats", len(elmats_all)) #for i, elmats in enumerate(elmats_all): # corresponds to loop over fes2 if verbose: coupling = [len(elmats) for i, elmats in elmats_all] nicePrint("Element coupling for rank/count", mpi_rank, len(coupling)) nicePrint(" Average :", (0 if len(coupling) == 0 else np.mean(coupling))) nicePrint(" Max/Min :", (0 if len(coupling) == 0 else np.max(coupling)), (0 if len(coupling) == 0 else np.min(coupling))) mpi_rank += 1 for i, elmats in elmats_all: # corresponds to loop over fes2 vdofs2 = fes2.GetElementVDofs(i) dof_sign2 = np.array([ [1 if vv >= 0 else -1 for vv in vdofs2], ]).transpose() vdofs2 = [-1 - x if x < 0 else x for x in vdofs2] fe2 = fes2.GetFE(i) nd2 = fe2.GetDof() if name_fes2 in ['RT', 'ND']: shape2.SetSize(nd2, vdim2) else: shape2.SetSize(nd2) eltrans = fes2.GetElementTransformation(i) #for j, elmat in enumerate(elmats): for j, elmat in elmats: #print(vdofs1[j], elmat.shape) #if elmat is None: # continue mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float) for ii in range(ir.GetNPoints()): ip2 = ir.IntPoint(ii) eltrans.SetIntPoint(ip2) ww = eltrans.Weight() * ip2.weight if name_fes2 in ['RT', 'ND']: fe2.CalcVShape(eltrans, shape2) else: fe2.CalcShape(ip2, shape2) shape2 *= ww ss = shape2.GetDataArray().reshape(-1, vdim2) ss = ss * dof_sign2 tmp_int = elmat[ii, ...].reshape(vdim1, -1) tmp = np.dot(ss, tmp_int) mm = mm + tmp # preapre shared data if USE_PARALLEL: vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2] kkk = 0 #for v2, v2g in zip(vdofs22, vdofs22g): for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1[j]]) kkk = kkk + 1 # merge contribution to final mat for k, vv in enumerate(vdofs1[j]): try: if USE_PARALLEL: mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :] vdofs222 = [x for x in vdofs22 if x >= 0] else: vdofs222 = vdofs2 mmm = mm #if myid == 1: # print("check here", vdofs2, vdofs22, vdofs222) #print(mmm[:, [k]]) tmp = mat[vdofs222, vv] + mmm[:, [k]] mat[vdofs222, vv] = tmp.flatten() except: import traceback print("error", myid) #print(vdofs1, vdofs22, vdofs222, mmm.shape, k) traceback.print_exc() if USE_PARALLEL: for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset #print("procesising this", myid, i, v2g, elmat, vdofs1) mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M
def convolve1d(fes1, fes2, kernel=delta, support=None, orderinc=5, is_complex=False, trial_domain='all', test_domain='all', verbose=False, coeff=None): ''' fill linear operator for convolution \int phi_test(x) func(x-x') phi_trial(x') dx ''' mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) eltrans1 = fes1.GetElementTransformation(0) ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose) shape1 = mfem.Vector() shape2 = mfem.Vector() #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE()) # communication strategy # (1) x2 (ir points on test space) is collected in each nodes # (2) x2 is send to other nodes # (3) each nodes compute \int f(x2-x1) phi(x1) # (4) non-zero results of (3) and global index should be send back # Step (1, 2) if verbose: dprint1("Step 1,2") x2_arr = [] i2_arr = [] ptx = mfem.DenseMatrix(ir.GetNPoints(), 1) attrs1 = fes2.GetMesh().GetAttributeArray() attrs2 = fes2.GetMesh().GetAttributeArray() for i in range(fes2.GetNE()): # scan test space if test_domain != 'all': if not attrs1[i] in test_domain: continue eltrans = fes2.GetElementTransformation(i) eltrans.Transform(ir, ptx) x2_arr.append(ptx.GetDataArray().copy()) i2_arr.append(i) if len(i2_arr) > 0: ptx_x2 = np.vstack(x2_arr) i2_arr = np.hstack(i2_arr) else: ptx_x2 = np.array([[]]) i2_arr = np.array([]) #nicePrint("x2 shape", ptx_x2.shape) if USE_PARALLEL: ## note: we could implement more advanced alg. to reduce ## the amount of data exchange.. x2_all = comm.allgather(ptx_x2) i2_all = comm.allgather(i2_arr) else: x2_all = [ptx_x2] i2_all = [i2_arr] #nicePrint("x2_all shape", x2_all.shape) if USE_PARALLEL: #this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF1 = P.indices P = fes2.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF2 = P.indices # Step 3 if verbose: dprint1("Step 3") vdofs1_senddata = [] elmats_senddata = [] for knode1 in range(len(x2_all)): x2_onenode = x2_all[knode1] i2_onenode = i2_all[knode1] elmats_all = [] vdofs1_all = [] # collect vdofs for j in range(fes1.GetNE()): local_vdofs = fes1.GetElementVDofs(j) if USE_PARALLEL: subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs] vdofs1_all.append(subvdofs2) else: vdofs1_all.append(local_vdofs) for i, x2s in zip(i2_onenode, x2_onenode): # loop over fes2 nd2 = len(x2s) #nicePrint(x2s) elmats = [] for j in range(fes1.GetNE()): if trial_domain != 'all': if not attrs1[j] in trial_domain: continue # collect integration fe1 = fes1.GetFE(j) nd1 = fe1.GetDof() shape1.SetSize(nd1) eltrans = fes1.GetElementTransformation(j) tmp_int = np.zeros(shape1.Size(), dtype=mat.dtype) elmat = np.zeros((nd2, nd1), dtype=mat.dtype) #if myid == 0: print("fes1 idx", j) dataset = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) x1 = eltrans.Transform(ip1)[0] fe1.CalcShape(ip1, shape1) w = eltrans.Weight() * ip1.weight dataset.append((x1, w, shape1.GetDataArray().copy())) has_contribution = False for kkk, x2 in enumerate(x2s): tmp_int *= 0.0 for x1, w, shape_arr in dataset: if support is not None: s = support((x1 + x2) / 2.0) if np.abs(x1 - x2) > s: continue has_contribution = True #if myid == 0: print("check here", x1, x2) val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w) if coeff is not None: val = val * coeff((x2 + x1) / 2.0) #shape_arr *= w*val tmp_int += shape_arr * w * val elmat[kkk, :] = tmp_int if has_contribution: elmats.append((j, elmat)) #print(elmats) if len(elmats) > 0: elmats_all.append((i, elmats)) vdofs1_senddata.append(vdofs1_all) elmats_senddata.append(elmats_all) # send this information to knodes; ''' if USE_PARALLEL: #nicePrint(vdofs1_all) #nicePrint("elmats", [len(x) for x in elmats_all]) if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) else: vdofs1_data = [vdofs1_all,] elmats_data = [elmats_all,] ''' if USE_PARALLEL: knode1 = 0 for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata): if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) knode1 = knode1 + 1 else: vdofs1_data = vdofs1_senddata elmats_data = elmats_senddata # Step 4 if verbose: dprint1("Step 4") shared_data = [] mpi_rank = 0 for vdofs1, elmats_all in zip(vdofs1_data, elmats_data): # loop over MPI nodes #nicePrint("len elmats", len(elmats_all)) #for i, elmats in enumerate(elmats_all): # corresponds to loop over fes2 if verbose: coupling = [len(elmats) for i, elmats in elmats_all] nicePrint("Element coupling for rank", mpi_rank) nicePrint(" Average :", (0 if len(coupling) == 0 else np.mean(coupling))) nicePrint(" Max/Min :", (0 if len(coupling) == 0 else np.max(coupling)), (0 if len(coupling) == 0 else np.min(coupling))) mpi_rank += 1 for i, elmats in elmats_all: # corresponds to loop over fes2 vdofs2 = fes2.GetElementVDofs(i) fe2 = fes2.GetFE(i) nd2 = fe2.GetDof() shape2.SetSize(nd2) eltrans = fes2.GetElementTransformation(i) #for j, elmat in enumerate(elmats): for j, elmat in elmats: #print(vdofs1[j], elmat.shape) #if elmat is None: # continue mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float) for ii in range(ir.GetNPoints()): ip2 = ir.IntPoint(ii) eltrans.SetIntPoint(ip2) ww = eltrans.Weight() * ip2.weight fe2.CalcShape(ip2, shape2) shape2 *= ww tmp_int = elmat[ii, :] tmp = np.dot( np.atleast_2d(shape2.GetDataArray()).transpose(), np.atleast_2d(tmp_int)) mm = mm + tmp #print("check here", myid, mm.shape, tmp.shape) # merge contribution to final mat if USE_PARALLEL: vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2] kkk = 0 for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1[j]]) kkk = kkk + 1 for k, vv in enumerate(vdofs1[j]): try: if USE_PARALLEL: mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :] vdofs222 = [x for x in vdofs22 if x >= 0] else: vdofs222 = vdofs2 mmm = mm #if myid == 1: # print("check here", vdofs2, vdofs22, vdofs222) #print(mmm[:, [k]]) tmp = mat[vdofs222, vv] + mmm[:, [k]] mat[vdofs222, vv] = tmp.flatten() except: import traceback print("error", myid) #print(vdofs1, vdofs22, vdofs222, mmm.shape, k) traceback.print_exc() if USE_PARALLEL: for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset #print("procesising this", myid, i, v2g, elmat, vdofs1) mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) #print("mat", M) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M
def projection_matrix(idx1, idx2, fes, tdof1, fes2=None, tdof2=None, trans1=None, trans2=None, dphase=0.0, weight=None, tol=1e-7, mode='surface', filldiag=True, old_mapping=True): ''' map: destinatiom mapping smap: source mapping old_mapping: True : periodic boundary conditions are implemented this way x = M*y False: projection operator should use this flag. y = M*x the difference is only when mapping two/three DoFs sitting at the same location. therefore, it only matters for ND and RT cases ''' fec_name = fes.FEColl().Name() dprint1("constructing mapping to fec_name", fec_name, mode) if fec_name.startswith('ND') and mode == 'volume': mapper = map_volume_nd elif fec_name.startswith('ND') and mode == 'surface': mapper = map_surface_nd elif fec_name.startswith('ND') and mode == 'edge': mapper = map_edge_nd elif fec_name.startswith('H1') and mode == 'volume': mapper = map_volume_h1 elif fec_name.startswith('H1') and mode == 'surface': mapper = map_surface_h1 elif fec_name.startswith('H1') and mode == 'edge': mapper = map_edge_h1 elif fec_name.startswith('H1') and mode == 'point': mapper = map_point_h1 elif fec_name.startswith('L2') and mode == 'volume': mapper = map_volume_h1 elif fec_name.startswith('L2') and mode == 'surface': mapper = map_surface_h1 elif fec_name.startswith('L2') and mode == 'edge': mapper = map_edge_h1 elif fec_name.startswith('RT') and mode == 'volume': mapper = map_volume_rt elif fec_name.startswith('RT') and mode == 'surface': mapper = map_surface_rt else: raise NotImplementedError("mapping :" + fec_name + ", mode: " + mode) map = mapper(idx2, idx1, fes, fes2=fes2, trans1=trans1, trans2=trans2, tdof1=tdof1, tdof2=tdof2, tol=tol, old_mapping=old_mapping) if weight is None: iscomplex = False if (dphase == 0.): pass elif (dphase == 180.): map = map.tocsr() map *= -1 map = map.tolil() else: iscomplex = True map = map.astype(complex) # need to this to make efficient.... map = map.tocsr() map *= np.exp(-1j * np.pi / 180 * dphase) map = map.tolil() else: iscomplex = np.iscomplexobj(weight) if iscomplex: map = map.astype(complex) if map.nnz > 0: map = map.tocsr() map *= -weight map = map.tolil() m_coo = map.tocoo() row = m_coo.row col = m_coo.col col = np.unique(col) if use_parallel: start_row = fes.GetMyTDofOffset() end_row = fes.GetMyTDofOffset() + fes.GetTrueVSize() col = np.unique(allgather_vector(col)) row = row + start_row else: start_row = 0 end_row = map.shape[0] if filldiag: for i in range(min(map.shape[0], map.shape[1])): r = start_row + i if not r in col: map[i, r] = 1.0 from scipy.sparse import coo_matrix, csr_matrix if use_parallel: if iscomplex: m1 = csr_matrix(map.real, dtype=float) m2 = csr_matrix(map.imag, dtype=float) else: m1 = csr_matrix(map.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes2.GetMyTDofOffset() end_col = fes2.GetMyTDofOffset() + fes2.GetTrueVSize() col_starts = [start_col, end_col, map.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(map, dtype=map.dtype)) return M, row, col