def convolve2d(fes1, fes2, kernel=delta, support=None, orderinc=5, is_complex=False, trial_domain='all', test_domain='all', verbose=False, coeff=None): ''' fill linear operator for convolution \int phi_test(x) func(x-x') phi_trial(x') dx Genralized version to multi-dim test/trial ScalarFE, ScalarFE : func is scalar VectorFE, ScalarFE : func is vector (vertical) ScalarFE, VectorFE : func is vector (horizontal) VectorFE, VectorFE : func matrix ''' mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) if fes1.GetNE() == 0: assert False, "FESpace does not have element" eltrans1 = fes1.GetElementTransformation(0) ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose) name_fes1 = fes1.FEColl().Name()[:2] name_fes2 = fes2.FEColl().Name()[:2] sdim = fes1.GetMesh().SpaceDimension() if name_fes1 in ['RT', 'ND']: shape1 = mfem.DenseMatrix() vdim1 = fes1.GetMesh().SpaceDimension() else: shape1 = mfem.Vector() vdim1 = 1 if name_fes2 in ['RT', 'ND']: shape2 = mfem.DenseMatrix() vdim2 = fes1.GetMesh().SpaceDimension() else: shape2 = mfem.Vector() vdim1 = 1 #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE()) # communication strategy # (1) x2 (ir points on test space) is collected in each nodes # (2) x2 is send to other nodes # (3) each nodes compute \int f(x2-x1) phi(x1) # (4) non-zero results of (3) and global index should be send back # Step (1, 2) if verbose: dprint1("Step 1,2") x2_arr = [] i2_arr = [] ptx = mfem.DenseMatrix(ir.GetNPoints(), sdim) attrs1 = fes2.GetMesh().GetAttributeArray() attrs2 = fes2.GetMesh().GetAttributeArray() for i in range(fes2.GetNE()): # scan test space if test_domain != 'all': if not attrs1[i] in test_domain: continue eltrans = fes2.GetElementTransformation(i) eltrans.Transform(ir, ptx) x2_arr.append(ptx.GetDataArray().copy().transpose()) i2_arr.append(i) if support is not None: supports = np.array([support(np.mean(xxx, 0)) for xxx in x2_arr]) else: supports = -np.ones(len(x2_arr)) if len(i2_arr) > 0: ptx_x2 = np.stack(x2_arr) i2_arr = np.hstack(i2_arr) else: ptx_x2 = np.array([[[]]]) i2_arr = np.array([]) #nicePrint("x2 shape", ptx_x2.shape) if USE_PARALLEL: ## note: we could implement more advanced alg. to reduce ## the amount of data exchange.. x2_all = comm.allgather(ptx_x2) i2_all = comm.allgather(i2_arr) s_all = comm.allgather(supports) else: x2_all = [ptx_x2] i2_all = [i2_arr] s_all = [supports] #nicePrint("x2_all shape", supports.shape, len(x2_all), [tmp.shape for tmp in x2_all]) if USE_PARALLEL: #this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF1 = P.indices P = fes2.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF2 = P.indices # Step 3 if verbose: dprint1("Step 3") vdofs1_senddata = [] elmats_senddata = [] for knode1 in range(len(x2_all)): #dprint1("new knode1", myid, knode1) x2_onenode = x2_all[knode1] i2_onenode = i2_all[knode1] s_onenode = s_all[knode1] elmats_all = [] vdofs1_all = [] # collect vdofs for j in range(fes1.GetNE()): local_vdofs = fes1.GetElementVDofs(j) local_vdofs = [vv if vv >= 0 else -1 - vv for vv in local_vdofs] if USE_PARALLEL: subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs] vdofs1_all.append(subvdofs2) else: vdofs1_all.append(local_vdofs) #if myid == 0: # pr = profile_start() for i, x2s, su in zip(i2_onenode, x2_onenode, s_onenode): # loop over fes2 nd2 = len(x2s) #nicePrint("x2s", i, x2s.shape, x2s) elmats = [] for j in range(fes1.GetNE()): if trial_domain != 'all': if not attrs1[j] in trial_domain: continue # collect integration fe1 = fes1.GetFE(j) nd1 = fe1.GetDof() eltrans = fes1.GetElementTransformation(j) dof_sign1 = np.array( [1 if vv >= 0 else -1 for vv in fes1.GetElementVDofs(j)]) if name_fes1 in ['RT', 'ND']: shape1.SetSize(nd1, vdim1) else: shape1.SetSize(nd1) elmat = np.zeros((nd2, vdim2, nd1), dtype=mat.dtype) tmp_int = np.zeros((vdim2, nd1), dtype=mat.dtype).squeeze() #if myid == 0: print("fes1 idx", j) dataset = [] shapes = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) x1 = eltrans.Transform(ip1) if name_fes1 in ['RT', 'ND']: fe1.CalcVShape(eltrans, shape1) else: fe1.CalcShape(ip1, shape1) w = eltrans.Weight() * ip1.weight ss = shape1.GetDataArray().copy() if len(ss.shape) > 1: #dof_sign1 = dof_sign1.reshape(-1, 1) ss = np.transpose(ss) ss = ss * dof_sign1 dataset.append((x1, w, ss)) has_contribution = False for kkk, x2 in enumerate(x2s): tmp_int *= 0.0 has_contribution2 = False for x1, w, shape_arr in dataset: s = np.sqrt(np.sum((x1 - x2)**2)) if su >= 0 and s > su: continue val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w) if val is None: continue if coeff is not None: val = val * coeff((x2 + x1) / 2.0) tmp_int += np.dot(val, shape_arr) * w has_contribution2 = True if has_contribution2: elmat[kkk, ...] = tmp_int has_contribution = True if has_contribution: elmats.append((j, elmat)) #if myid == 0: # pr.dump_stats("/home/shiraiwa/test.prf") # profile_stop(pr) # assert False, "hoge" # pr = profile_start() if len(elmats) > 0: elmats_all.append((i, elmats)) vdofs1_senddata.append(vdofs1_all) elmats_senddata.append(elmats_all) # send this information to knodes; ''' if USE_PARALLEL: #nicePrint(vdofs1_all) #nicePrint("elmats", [len(x) for x in elmats_all]) if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) else: vdofs1_data = [vdofs1_all,] elmats_data = [elmats_all,] ''' if USE_PARALLEL: knode1 = 0 for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata): if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) knode1 = knode1 + 1 else: vdofs1_data = vdofs1_senddata elmats_data = elmats_senddata # Step 4 if verbose: dprint1("Step 4") shared_data = [] mpi_rank = 0 for vdofs1, elmats_all in zip(vdofs1_data, elmats_data): # loop over MPI nodes #nicePrint("len elmats", len(elmats_all)) #for i, elmats in enumerate(elmats_all): # corresponds to loop over fes2 if verbose: coupling = [len(elmats) for i, elmats in elmats_all] nicePrint("Element coupling for rank/count", mpi_rank, len(coupling)) nicePrint(" Average :", (0 if len(coupling) == 0 else np.mean(coupling))) nicePrint(" Max/Min :", (0 if len(coupling) == 0 else np.max(coupling)), (0 if len(coupling) == 0 else np.min(coupling))) mpi_rank += 1 for i, elmats in elmats_all: # corresponds to loop over fes2 vdofs2 = fes2.GetElementVDofs(i) dof_sign2 = np.array([ [1 if vv >= 0 else -1 for vv in vdofs2], ]).transpose() vdofs2 = [-1 - x if x < 0 else x for x in vdofs2] fe2 = fes2.GetFE(i) nd2 = fe2.GetDof() if name_fes2 in ['RT', 'ND']: shape2.SetSize(nd2, vdim2) else: shape2.SetSize(nd2) eltrans = fes2.GetElementTransformation(i) #for j, elmat in enumerate(elmats): for j, elmat in elmats: #print(vdofs1[j], elmat.shape) #if elmat is None: # continue mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float) for ii in range(ir.GetNPoints()): ip2 = ir.IntPoint(ii) eltrans.SetIntPoint(ip2) ww = eltrans.Weight() * ip2.weight if name_fes2 in ['RT', 'ND']: fe2.CalcVShape(eltrans, shape2) else: fe2.CalcShape(ip2, shape2) shape2 *= ww ss = shape2.GetDataArray().reshape(-1, vdim2) ss = ss * dof_sign2 tmp_int = elmat[ii, ...].reshape(vdim1, -1) tmp = np.dot(ss, tmp_int) mm = mm + tmp # preapre shared data if USE_PARALLEL: vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2] kkk = 0 #for v2, v2g in zip(vdofs22, vdofs22g): for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1[j]]) kkk = kkk + 1 # merge contribution to final mat for k, vv in enumerate(vdofs1[j]): try: if USE_PARALLEL: mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :] vdofs222 = [x for x in vdofs22 if x >= 0] else: vdofs222 = vdofs2 mmm = mm #if myid == 1: # print("check here", vdofs2, vdofs22, vdofs222) #print(mmm[:, [k]]) tmp = mat[vdofs222, vv] + mmm[:, [k]] mat[vdofs222, vv] = tmp.flatten() except: import traceback print("error", myid) #print(vdofs1, vdofs22, vdofs222, mmm.shape, k) traceback.print_exc() if USE_PARALLEL: for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset #print("procesising this", myid, i, v2g, elmat, vdofs1) mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M
def hcurln(fes1, fes2, coeff, is_complex=False, bdr='all', orderinc=1, verbose=False): mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) mat2, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) from petram.helper.element_map import map_element name_fes1 = fes1.FEColl().Name()[:2] name_fes2 = fes2.FEColl().Name()[:2] if verbose: if myid == 0: dprint1("fes", name_fes1, name_fes2) mesh1 = fes1.GetMesh() mesh2 = fes2.GetMesh() mesh2.Print("/home/shiraiwa/part.mesh") if verbose: if myid == 0: dprint1("NE", mesh1.GetNE(), mesh2.GetNE()) elmap, elmap_r = map_element(mesh1, mesh2, bdr, map_bdr=True) sdim1 = mesh1.SpaceDimension() sdim2 = mesh1.SpaceDimension() dim1 = mesh1.Dimension() dim2 = mesh2.Dimension() shape1 = mfem.DenseMatrix() shape2 = mfem.Vector() ip = mfem.IntegrationPoint() nor = mfem.Vector(sdim1) if USE_PARALLEL: # this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P1mat = ToScipyCoo(P).tocsr() #VDoFtoGTDoF1 = P.indices #P = fes2.Dof_TrueDof_Matrix() #P = ToScipyCoo(P).tocsr() #VDoFtoGTDoF2 = P.indices #P2mat = P vdofs1_senddata = [] shared_data = [] el2_2_node = {} el2_2_el1 = {} for d in elmap_r: for x in list(elmap_r[d]): el2_2_node[x] = d for x in list(elmap_r[d]): el2_2_el1[x] = elmap_r[d][x] # working for fes2 # find boundary element on mesh1 using mesh2 boundary el2_arr = [list() for x in range(nprc)] el1_arr = [list() for x in range(nprc)] fe2o_arr = [list() for x in range(nprc)] for i_el in range(fes2.GetNE()): attr = fes2.GetAttribute(i_el) if bdr != 'all' and not attr in bdr: continue el1_arr[el2_2_node[i_el]].append(el2_2_el1[i_el]) el2_arr[el2_2_node[i_el]].append(i_el) fe2 = fes2.GetFE(i_el) fe2o_arr[el2_2_node[i_el]].append(fe2.GetOrder()) if USE_PARALLEL: el1_arr = alltoall_vector(el1_arr, int) # transfer to mesh1 owners # working for fes1 # find elemet order on mesh1 fe1o_arr = [list() for x in range(nprc)] i_fe1_arr = [list() for x in range(nprc)] rank = 0 for rank, i_bdrs in enumerate(el1_arr): for i_bdr in i_bdrs: iface = mesh1.GetBdrElementEdgeIndex(i_bdr) transs = mesh1.GetFaceElementTransformations(iface) i_el1 = transs.Elem1No assert transs.Elem2No == -1, "boundary must be exterior for this operator" fe1 = fes1.GetFE(i_el1) fe1o_arr[rank].append(fe1.GetOrder()) i_fe1_arr[rank].append(i_el1) rank = rank + 1 if USE_PARALLEL: fe1o_arr = alltoall_vector(fe1o_arr, int) # transfer to mesh2 # working for fes2 locnor_arr = [list() for x in range(nprc)] data2_arr = [list() for x in range(nprc)] verbose1 = verbose for rank, i_el2s in enumerate(el2_arr): for i_el2, fe1o in zip(i_el2s, fe1o_arr[rank]): eltrans = fes2.GetElementTransformation(i_el2) fe2 = fes2.GetFE(i_el2) nd2 = fe2.GetDof() ir = get_rule(fe1o, fe2, eltrans, orderinc=orderinc, verbose=verbose1) verbose1 = False shape2.SetSize(nd2) data2 = [] locnors = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) w = eltrans.Weight() * ip1.weight mfem.CalcOrtho(eltrans.Jacobian(), nor) nor2 = nor.GetDataArray() / np.linalg.norm(nor.GetDataArray()) fe2.CalcShape(ip1, shape2) if dim2 == 1: d = np.array([ip1.x] + list(eltrans.Transform(ip1)) + list(nor2)) locnors.append(d) elif dim2 == 2: d = np.array([ip1.x, ip1.y] + list(eltrans.Transform(ip1)) + list(nor2)) locnors.append(d) else: assert False, "boundary mesh must be dim=1 or 2" data2.append(w * shape2.GetDataArray().copy()) # np.vstack(locnors).shape = (#Npoints, dim2+sdim2*2) # np.vstack(data2).shape = (#Npoints, #NDoF2) #print("size here", np.vstack(locnors).shape, np.vstack(data2).shape) locnor_arr[rank].append(np.vstack(locnors)) data2_arr[rank].append(np.vstack(data2)) if USE_PARALLEL: locnor_arr = alltoall_vectorv(locnor_arr, float) # transfer to mesh1 ll = dim2 + 2 * sdim2 vdofs1_arr = [list() for x in range(nprc)] data1_arr = [list() for x in range(nprc)] # space to compute the coefficient MV = [mfem.Vector(sdim1), mfem.DenseMatrix(sdim1, sdim1)] max_misalignment = -np.inf for rank, i_fe1s in enumerate(i_fe1_arr): locnorss = locnor_arr[rank] sign_dict = {} for k, i_fe1 in enumerate(i_fe1s): fe1 = fes1.GetFE(i_fe1) nd1 = fe1.GetDof() eltrans = fes1.GetElementTransformation(i_fe1) doftrans = fes1.GetElementDofTransformation(i_fe1) #ctr = eval_element_center(fe1, eltrans) locnors2 = locnorss[k] shape1.SetSize(nd1, sdim1) vdofs1 = fes1.GetElementVDofs(i_fe1) dof_sign1 = np.array([ [1 if vv >= 0 else -1 for vv in vdofs1], ]) vdofs1 = [-1 - x if x < 0 else x for x in vdofs1] mat_doftrans = get_inv_doftrans(doftrans, dof_sign1) if USE_PARALLEL: # After DofTransformation is introduced we can not use GetGlobalTDofNumber, because # element local DoF could be linked with two TrueDoFs in neighber processes # We construct submatrix of Prolongation to construct element matrix # in TrueDof space vv1 = [ P1mat.indices[P1mat.indptr[ii]:P1mat.indptr[ii + 1]] for ii in vdofs1 ] vv3 = [ P1mat.data[P1mat.indptr[ii]:P1mat.indptr[ii + 1]] for ii in vdofs1 ] ngtof = np.sum([len(x) for x in vv3]) sub_p = np.zeros((nd1, ngtof)) k1 = 0 k2 = 0 for gtofs, weights in zip(vv1, vv3): for g, w in zip(gtofs, weights): sub_p[k1, k2] = w k2 = k2 + 1 k1 = k1 + 1 vdofs1 = np.hstack(vv1).flatten() mat_doftrans = mat_doftrans.dot(sub_p) res, misalignment = map_ir(fe1, eltrans, coeff, shape1, dim2, sdim2, locnors2, dof_sign1, mat_doftrans, MV) vdofs1_arr[rank].append(np.array(vdofs1)) data1_arr[rank].append(res) max_misalignment = np.max([max_misalignment, np.max(misalignment)]) # res.shape = (#Npoints, #DoF1) if USE_PARALLEL: vdofs1_arr = alltoall_vectorv(vdofs1_arr, int) # transfer to mesh2 if is_complex: data1_arr = alltoall_vectorv(data1_arr, complex) # transfer to mesh2 else: data1_arr = alltoall_vectorv(data1_arr, float) # transfer to mesh2 max_misalignment = np.max( MPI.COMM_WORLD.gather(max_misalignment, root=0)) dprint1("Max misalignment: ", max_misalignment) shared_data = [] for rank, i_el2s in enumerate(el2_arr): for k, i_el2 in enumerate(i_el2s): vdofs1 = vdofs1_arr[rank][k] fe2 = fes2.GetFE(i_el2) eltrans2 = fes2.GetElementTransformation(i_el2) vdofs2 = fes2.GetElementVDofs(i_el2) vdofs2 = [-1 - x if x < 0 else x for x in vdofs2] d1 = data1_arr[rank][k] d2 = data2_arr[rank][k] mm = d2.transpose().dot(d1) if USE_PARALLEL: # prepare data for not-owned DoFs, which will be shared later vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [fes2.GetGlobalTDofNumber(ii) for ii in vdofs2] kkk = 0 for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1]) kkk = kkk + 1 else: vdofs22 = vdofs2 for i, ltdof2 in enumerate(vdofs22): if ltdof2 < 0: continue for j, gtdof1 in enumerate(vdofs1): mat[ltdof2, gtdof1] = mat[ltdof2, gtdof1] + mm[i, j] if USE_PARALLEL: #nicePrint("shared data", shared_data) for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset for j, gtdof1 in enumerate(vdofs1): mat[i, gtdof1] = mat[i, gtdof1] + elmat[j] #mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M
def convolve1d(fes1, fes2, kernel=delta, support=None, orderinc=5, is_complex=False, trial_domain='all', test_domain='all', verbose=False, coeff=None): ''' fill linear operator for convolution \int phi_test(x) func(x-x') phi_trial(x') dx ''' mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex) eltrans1 = fes1.GetElementTransformation(0) ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose) shape1 = mfem.Vector() shape2 = mfem.Vector() #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE()) # communication strategy # (1) x2 (ir points on test space) is collected in each nodes # (2) x2 is send to other nodes # (3) each nodes compute \int f(x2-x1) phi(x1) # (4) non-zero results of (3) and global index should be send back # Step (1, 2) if verbose: dprint1("Step 1,2") x2_arr = [] i2_arr = [] ptx = mfem.DenseMatrix(ir.GetNPoints(), 1) attrs1 = fes2.GetMesh().GetAttributeArray() attrs2 = fes2.GetMesh().GetAttributeArray() for i in range(fes2.GetNE()): # scan test space if test_domain != 'all': if not attrs1[i] in test_domain: continue eltrans = fes2.GetElementTransformation(i) eltrans.Transform(ir, ptx) x2_arr.append(ptx.GetDataArray().copy()) i2_arr.append(i) if len(i2_arr) > 0: ptx_x2 = np.vstack(x2_arr) i2_arr = np.hstack(i2_arr) else: ptx_x2 = np.array([[]]) i2_arr = np.array([]) #nicePrint("x2 shape", ptx_x2.shape) if USE_PARALLEL: ## note: we could implement more advanced alg. to reduce ## the amount of data exchange.. x2_all = comm.allgather(ptx_x2) i2_all = comm.allgather(i2_arr) else: x2_all = [ptx_x2] i2_all = [i2_arr] #nicePrint("x2_all shape", x2_all.shape) if USE_PARALLEL: #this is global TrueDoF (offset is not subtracted) P = fes1.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF1 = P.indices P = fes2.Dof_TrueDof_Matrix() P = ToScipyCoo(P).tocsr() VDoFtoGTDoF2 = P.indices # Step 3 if verbose: dprint1("Step 3") vdofs1_senddata = [] elmats_senddata = [] for knode1 in range(len(x2_all)): x2_onenode = x2_all[knode1] i2_onenode = i2_all[knode1] elmats_all = [] vdofs1_all = [] # collect vdofs for j in range(fes1.GetNE()): local_vdofs = fes1.GetElementVDofs(j) if USE_PARALLEL: subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs] vdofs1_all.append(subvdofs2) else: vdofs1_all.append(local_vdofs) for i, x2s in zip(i2_onenode, x2_onenode): # loop over fes2 nd2 = len(x2s) #nicePrint(x2s) elmats = [] for j in range(fes1.GetNE()): if trial_domain != 'all': if not attrs1[j] in trial_domain: continue # collect integration fe1 = fes1.GetFE(j) nd1 = fe1.GetDof() shape1.SetSize(nd1) eltrans = fes1.GetElementTransformation(j) tmp_int = np.zeros(shape1.Size(), dtype=mat.dtype) elmat = np.zeros((nd2, nd1), dtype=mat.dtype) #if myid == 0: print("fes1 idx", j) dataset = [] for jj in range(ir.GetNPoints()): ip1 = ir.IntPoint(jj) eltrans.SetIntPoint(ip1) x1 = eltrans.Transform(ip1)[0] fe1.CalcShape(ip1, shape1) w = eltrans.Weight() * ip1.weight dataset.append((x1, w, shape1.GetDataArray().copy())) has_contribution = False for kkk, x2 in enumerate(x2s): tmp_int *= 0.0 for x1, w, shape_arr in dataset: if support is not None: s = support((x1 + x2) / 2.0) if np.abs(x1 - x2) > s: continue has_contribution = True #if myid == 0: print("check here", x1, x2) val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w) if coeff is not None: val = val * coeff((x2 + x1) / 2.0) #shape_arr *= w*val tmp_int += shape_arr * w * val elmat[kkk, :] = tmp_int if has_contribution: elmats.append((j, elmat)) #print(elmats) if len(elmats) > 0: elmats_all.append((i, elmats)) vdofs1_senddata.append(vdofs1_all) elmats_senddata.append(elmats_all) # send this information to knodes; ''' if USE_PARALLEL: #nicePrint(vdofs1_all) #nicePrint("elmats", [len(x) for x in elmats_all]) if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) else: vdofs1_data = [vdofs1_all,] elmats_data = [elmats_all,] ''' if USE_PARALLEL: knode1 = 0 for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata): if myid == knode1: vdofs1_data = comm.gather(vdofs1_all, root=knode1) elmats_data = comm.gather(elmats_all, root=knode1) else: _ = comm.gather(vdofs1_all, root=knode1) _ = comm.gather(elmats_all, root=knode1) knode1 = knode1 + 1 else: vdofs1_data = vdofs1_senddata elmats_data = elmats_senddata # Step 4 if verbose: dprint1("Step 4") shared_data = [] mpi_rank = 0 for vdofs1, elmats_all in zip(vdofs1_data, elmats_data): # loop over MPI nodes #nicePrint("len elmats", len(elmats_all)) #for i, elmats in enumerate(elmats_all): # corresponds to loop over fes2 if verbose: coupling = [len(elmats) for i, elmats in elmats_all] nicePrint("Element coupling for rank", mpi_rank) nicePrint(" Average :", (0 if len(coupling) == 0 else np.mean(coupling))) nicePrint(" Max/Min :", (0 if len(coupling) == 0 else np.max(coupling)), (0 if len(coupling) == 0 else np.min(coupling))) mpi_rank += 1 for i, elmats in elmats_all: # corresponds to loop over fes2 vdofs2 = fes2.GetElementVDofs(i) fe2 = fes2.GetFE(i) nd2 = fe2.GetDof() shape2.SetSize(nd2) eltrans = fes2.GetElementTransformation(i) #for j, elmat in enumerate(elmats): for j, elmat in elmats: #print(vdofs1[j], elmat.shape) #if elmat is None: # continue mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float) for ii in range(ir.GetNPoints()): ip2 = ir.IntPoint(ii) eltrans.SetIntPoint(ip2) ww = eltrans.Weight() * ip2.weight fe2.CalcShape(ip2, shape2) shape2 *= ww tmp_int = elmat[ii, :] tmp = np.dot( np.atleast_2d(shape2.GetDataArray()).transpose(), np.atleast_2d(tmp_int)) mm = mm + tmp #print("check here", myid, mm.shape, tmp.shape) # merge contribution to final mat if USE_PARALLEL: vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2] vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2] kkk = 0 for v2, v2g in zip(vdofs22, vdofs22g): if v2 < 0: shared_data.append([v2g, mm[kkk, :], vdofs1[j]]) kkk = kkk + 1 for k, vv in enumerate(vdofs1[j]): try: if USE_PARALLEL: mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :] vdofs222 = [x for x in vdofs22 if x >= 0] else: vdofs222 = vdofs2 mmm = mm #if myid == 1: # print("check here", vdofs2, vdofs22, vdofs222) #print(mmm[:, [k]]) tmp = mat[vdofs222, vv] + mmm[:, [k]] mat[vdofs222, vv] = tmp.flatten() except: import traceback print("error", myid) #print(vdofs1, vdofs22, vdofs222, mmm.shape, k) traceback.print_exc() if USE_PARALLEL: for source_id in range(nprc): data = comm.bcast(shared_data, root=source_id) myoffset = fes2.GetMyTDofOffset() for v2g, elmat, vdofs1 in data: if v2g >= myoffset and v2g < myoffset + mat.shape[0]: i = v2g - myoffset #print("procesising this", myid, i, v2g, elmat, vdofs1) mat[i, vdofs1] = mat[i, vdofs1] + elmat from scipy.sparse import coo_matrix, csr_matrix if USE_PARALLEL: if is_complex: m1 = csr_matrix(mat.real, dtype=float) m2 = csr_matrix(mat.imag, dtype=float) else: m1 = csr_matrix(mat.real, dtype=float) m2 = None from mfem.common.chypre import CHypreMat start_col = fes1.GetMyTDofOffset() end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize() col_starts = [start_col, end_col, mat.shape[1]] M = CHypreMat(m1, m2, col_starts=col_starts) #print("mat", M) else: from petram.helper.block_matrix import convert_to_ScipyCoo M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype)) return M