예제 #1
0
def convolve2d(fes1,
               fes2,
               kernel=delta,
               support=None,
               orderinc=5,
               is_complex=False,
               trial_domain='all',
               test_domain='all',
               verbose=False,
               coeff=None):
    '''
    fill linear operator for convolution
    \int phi_test(x) func(x-x') phi_trial(x') dx
    
    Genralized version to multi-dim
    test/trial
        ScalarFE, ScalarFE   : func is scalar
        VectorFE, ScalarFE   : func is vector (vertical)
        ScalarFE, VectorFE   : func is vector (horizontal)
        VectorFE, VectorFE   : func matrix
    '''
    mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex)

    if fes1.GetNE() == 0:
        assert False, "FESpace does not have element"
    eltrans1 = fes1.GetElementTransformation(0)
    ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose)

    name_fes1 = fes1.FEColl().Name()[:2]
    name_fes2 = fes2.FEColl().Name()[:2]

    sdim = fes1.GetMesh().SpaceDimension()
    if name_fes1 in ['RT', 'ND']:
        shape1 = mfem.DenseMatrix()
        vdim1 = fes1.GetMesh().SpaceDimension()
    else:
        shape1 = mfem.Vector()
        vdim1 = 1
    if name_fes2 in ['RT', 'ND']:
        shape2 = mfem.DenseMatrix()
        vdim2 = fes1.GetMesh().SpaceDimension()
    else:
        shape2 = mfem.Vector()
        vdim1 = 1

    #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE())

    # communication strategy
    #   (1) x2 (ir points on test space) is collected in each nodes
    #   (2) x2 is send to other nodes
    #   (3) each nodes compute \int f(x2-x1) phi(x1)
    #   (4) non-zero results of (3) and global index should be send back

    # Step (1, 2)
    if verbose:
        dprint1("Step 1,2")
    x2_arr = []
    i2_arr = []

    ptx = mfem.DenseMatrix(ir.GetNPoints(), sdim)

    attrs1 = fes2.GetMesh().GetAttributeArray()
    attrs2 = fes2.GetMesh().GetAttributeArray()

    for i in range(fes2.GetNE()):  # scan test space
        if test_domain != 'all':
            if not attrs1[i] in test_domain: continue
        eltrans = fes2.GetElementTransformation(i)
        eltrans.Transform(ir, ptx)
        x2_arr.append(ptx.GetDataArray().copy().transpose())
        i2_arr.append(i)

    if support is not None:
        supports = np.array([support(np.mean(xxx, 0)) for xxx in x2_arr])
    else:
        supports = -np.ones(len(x2_arr))

    if len(i2_arr) > 0:
        ptx_x2 = np.stack(x2_arr)
        i2_arr = np.hstack(i2_arr)
    else:
        ptx_x2 = np.array([[[]]])
        i2_arr = np.array([])

    #nicePrint("x2 shape", ptx_x2.shape)
    if USE_PARALLEL:
        ## note: we could implement more advanced alg. to reduce
        ## the amount of data exchange..
        x2_all = comm.allgather(ptx_x2)
        i2_all = comm.allgather(i2_arr)
        s_all = comm.allgather(supports)
    else:
        x2_all = [ptx_x2]
        i2_all = [i2_arr]
        s_all = [supports]
    #nicePrint("x2_all shape", supports.shape, len(x2_all), [tmp.shape for tmp in x2_all])

    if USE_PARALLEL:
        #this is global TrueDoF (offset is not subtracted)
        P = fes1.Dof_TrueDof_Matrix()
        P = ToScipyCoo(P).tocsr()
        VDoFtoGTDoF1 = P.indices
        P = fes2.Dof_TrueDof_Matrix()
        P = ToScipyCoo(P).tocsr()
        VDoFtoGTDoF2 = P.indices

    # Step 3
    if verbose:
        dprint1("Step 3")
    vdofs1_senddata = []
    elmats_senddata = []

    for knode1 in range(len(x2_all)):
        #dprint1("new knode1", myid, knode1)

        x2_onenode = x2_all[knode1]
        i2_onenode = i2_all[knode1]
        s_onenode = s_all[knode1]

        elmats_all = []
        vdofs1_all = []

        # collect vdofs
        for j in range(fes1.GetNE()):
            local_vdofs = fes1.GetElementVDofs(j)
            local_vdofs = [vv if vv >= 0 else -1 - vv for vv in local_vdofs]
            if USE_PARALLEL:
                subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs]
                vdofs1_all.append(subvdofs2)
            else:
                vdofs1_all.append(local_vdofs)

        #if myid == 0:
        #    pr = profile_start()

        for i, x2s, su in zip(i2_onenode, x2_onenode,
                              s_onenode):  # loop over fes2
            nd2 = len(x2s)
            #nicePrint("x2s", i, x2s.shape, x2s)
            elmats = []
            for j in range(fes1.GetNE()):

                if trial_domain != 'all':
                    if not attrs1[j] in trial_domain: continue

                # collect integration
                fe1 = fes1.GetFE(j)
                nd1 = fe1.GetDof()
                eltrans = fes1.GetElementTransformation(j)
                dof_sign1 = np.array(
                    [1 if vv >= 0 else -1 for vv in fes1.GetElementVDofs(j)])

                if name_fes1 in ['RT', 'ND']:
                    shape1.SetSize(nd1, vdim1)
                else:
                    shape1.SetSize(nd1)
                elmat = np.zeros((nd2, vdim2, nd1), dtype=mat.dtype)
                tmp_int = np.zeros((vdim2, nd1), dtype=mat.dtype).squeeze()

                #if myid == 0: print("fes1 idx", j)

                dataset = []
                shapes = []
                for jj in range(ir.GetNPoints()):
                    ip1 = ir.IntPoint(jj)
                    eltrans.SetIntPoint(ip1)
                    x1 = eltrans.Transform(ip1)
                    if name_fes1 in ['RT', 'ND']:
                        fe1.CalcVShape(eltrans, shape1)
                    else:
                        fe1.CalcShape(ip1, shape1)
                    w = eltrans.Weight() * ip1.weight
                    ss = shape1.GetDataArray().copy()

                    if len(ss.shape) > 1:
                        #dof_sign1 = dof_sign1.reshape(-1, 1)
                        ss = np.transpose(ss)
                    ss = ss * dof_sign1
                    dataset.append((x1, w, ss))

                has_contribution = False
                for kkk, x2 in enumerate(x2s):
                    tmp_int *= 0.0
                    has_contribution2 = False
                    for x1, w, shape_arr in dataset:
                        s = np.sqrt(np.sum((x1 - x2)**2))
                        if su >= 0 and s > su:
                            continue

                        val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w)
                        if val is None:
                            continue
                        if coeff is not None:
                            val = val * coeff((x2 + x1) / 2.0)

                        tmp_int += np.dot(val, shape_arr) * w
                        has_contribution2 = True

                    if has_contribution2:
                        elmat[kkk, ...] = tmp_int
                        has_contribution = True
                if has_contribution:
                    elmats.append((j, elmat))

            #if myid == 0:
            #    pr.dump_stats("/home/shiraiwa/test.prf")
            #    profile_stop(pr)
            #    assert False, "hoge"
            #    pr = profile_start()
            if len(elmats) > 0:
                elmats_all.append((i, elmats))

        vdofs1_senddata.append(vdofs1_all)
        elmats_senddata.append(elmats_all)

        # send this information to knodes;
        '''
        if USE_PARALLEL:
            #nicePrint(vdofs1_all)
            #nicePrint("elmats", [len(x) for x in elmats_all])
            if myid == knode1:
                vdofs1_data = comm.gather(vdofs1_all, root=knode1)
                elmats_data = comm.gather(elmats_all, root=knode1)
            else:
                _ = comm.gather(vdofs1_all, root=knode1)
                _ = comm.gather(elmats_all, root=knode1)
        else:
            vdofs1_data = [vdofs1_all,]
            elmats_data = [elmats_all,]
        '''
    if USE_PARALLEL:
        knode1 = 0
        for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata):
            if myid == knode1:
                vdofs1_data = comm.gather(vdofs1_all, root=knode1)
                elmats_data = comm.gather(elmats_all, root=knode1)
            else:
                _ = comm.gather(vdofs1_all, root=knode1)
                _ = comm.gather(elmats_all, root=knode1)
            knode1 = knode1 + 1
    else:
        vdofs1_data = vdofs1_senddata
        elmats_data = elmats_senddata

    # Step 4
    if verbose:
        dprint1("Step 4")
    shared_data = []
    mpi_rank = 0
    for vdofs1, elmats_all in zip(vdofs1_data,
                                  elmats_data):  # loop over MPI nodes
        #nicePrint("len elmats", len(elmats_all))
        #for i, elmats in enumerate(elmats_all):  # corresponds to loop over fes2

        if verbose:
            coupling = [len(elmats) for i, elmats in elmats_all]
            nicePrint("Element coupling for rank/count", mpi_rank,
                      len(coupling))
            nicePrint("   Average :",
                      (0 if len(coupling) == 0 else np.mean(coupling)))
            nicePrint("   Max/Min :",
                      (0 if len(coupling) == 0 else np.max(coupling)),
                      (0 if len(coupling) == 0 else np.min(coupling)))
            mpi_rank += 1

        for i, elmats in elmats_all:  # corresponds to loop over fes2
            vdofs2 = fes2.GetElementVDofs(i)
            dof_sign2 = np.array([
                [1 if vv >= 0 else -1 for vv in vdofs2],
            ]).transpose()
            vdofs2 = [-1 - x if x < 0 else x for x in vdofs2]

            fe2 = fes2.GetFE(i)
            nd2 = fe2.GetDof()

            if name_fes2 in ['RT', 'ND']:
                shape2.SetSize(nd2, vdim2)
            else:
                shape2.SetSize(nd2)

            eltrans = fes2.GetElementTransformation(i)

            #for j, elmat in enumerate(elmats):
            for j, elmat in elmats:
                #print(vdofs1[j], elmat.shape)
                #if elmat is None:
                #    continue

                mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float)

                for ii in range(ir.GetNPoints()):
                    ip2 = ir.IntPoint(ii)
                    eltrans.SetIntPoint(ip2)
                    ww = eltrans.Weight() * ip2.weight

                    if name_fes2 in ['RT', 'ND']:
                        fe2.CalcVShape(eltrans, shape2)
                    else:
                        fe2.CalcShape(ip2, shape2)

                    shape2 *= ww
                    ss = shape2.GetDataArray().reshape(-1, vdim2)
                    ss = ss * dof_sign2

                    tmp_int = elmat[ii, ...].reshape(vdim1, -1)
                    tmp = np.dot(ss, tmp_int)
                    mm = mm + tmp

                # preapre shared data
                if USE_PARALLEL:
                    vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2]
                    vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2]
                    kkk = 0
                    #for v2, v2g in zip(vdofs22, vdofs22g):
                    for v2, v2g in zip(vdofs22, vdofs22g):
                        if v2 < 0:
                            shared_data.append([v2g, mm[kkk, :], vdofs1[j]])
                        kkk = kkk + 1

                # merge contribution to final mat
                for k, vv in enumerate(vdofs1[j]):
                    try:
                        if USE_PARALLEL:
                            mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :]
                            vdofs222 = [x for x in vdofs22 if x >= 0]
                        else:
                            vdofs222 = vdofs2
                            mmm = mm
                        #if myid == 1:
                        #    print("check here", vdofs2, vdofs22, vdofs222)
                        #print(mmm[:, [k]])
                        tmp = mat[vdofs222, vv] + mmm[:, [k]]
                        mat[vdofs222, vv] = tmp.flatten()

                    except:
                        import traceback
                        print("error", myid)
                        #print(vdofs1, vdofs22, vdofs222, mmm.shape, k)
                        traceback.print_exc()

    if USE_PARALLEL:
        for source_id in range(nprc):
            data = comm.bcast(shared_data, root=source_id)
            myoffset = fes2.GetMyTDofOffset()
            for v2g, elmat, vdofs1 in data:
                if v2g >= myoffset and v2g < myoffset + mat.shape[0]:
                    i = v2g - myoffset
                    #print("procesising this", myid, i, v2g, elmat, vdofs1)
                    mat[i, vdofs1] = mat[i, vdofs1] + elmat

    from scipy.sparse import coo_matrix, csr_matrix

    if USE_PARALLEL:
        if is_complex:
            m1 = csr_matrix(mat.real, dtype=float)
            m2 = csr_matrix(mat.imag, dtype=float)
        else:
            m1 = csr_matrix(mat.real, dtype=float)
            m2 = None
        from mfem.common.chypre import CHypreMat
        start_col = fes1.GetMyTDofOffset()
        end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize()
        col_starts = [start_col, end_col, mat.shape[1]]
        M = CHypreMat(m1, m2, col_starts=col_starts)
    else:
        from petram.helper.block_matrix import convert_to_ScipyCoo

        M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype))

    return M
예제 #2
0
def hcurln(fes1,
           fes2,
           coeff,
           is_complex=False,
           bdr='all',
           orderinc=1,
           verbose=False):

    mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex)
    mat2, rstart = get_empty_map(fes2, fes1, is_complex=is_complex)

    from petram.helper.element_map import map_element

    name_fes1 = fes1.FEColl().Name()[:2]
    name_fes2 = fes2.FEColl().Name()[:2]

    if verbose:
        if myid == 0:
            dprint1("fes", name_fes1, name_fes2)

    mesh1 = fes1.GetMesh()
    mesh2 = fes2.GetMesh()

    mesh2.Print("/home/shiraiwa/part.mesh")

    if verbose:
        if myid == 0:
            dprint1("NE", mesh1.GetNE(), mesh2.GetNE())
    elmap, elmap_r = map_element(mesh1, mesh2, bdr, map_bdr=True)

    sdim1 = mesh1.SpaceDimension()
    sdim2 = mesh1.SpaceDimension()
    dim1 = mesh1.Dimension()
    dim2 = mesh2.Dimension()

    shape1 = mfem.DenseMatrix()
    shape2 = mfem.Vector()
    ip = mfem.IntegrationPoint()
    nor = mfem.Vector(sdim1)

    if USE_PARALLEL:
        # this is global TrueDoF (offset is not subtracted)
        P = fes1.Dof_TrueDof_Matrix()
        P1mat = ToScipyCoo(P).tocsr()
        #VDoFtoGTDoF1 = P.indices
        #P = fes2.Dof_TrueDof_Matrix()
        #P = ToScipyCoo(P).tocsr()
        #VDoFtoGTDoF2 = P.indices
        #P2mat = P

    vdofs1_senddata = []

    shared_data = []

    el2_2_node = {}
    el2_2_el1 = {}

    for d in elmap_r:
        for x in list(elmap_r[d]):
            el2_2_node[x] = d
        for x in list(elmap_r[d]):
            el2_2_el1[x] = elmap_r[d][x]

    # working for fes2
    # find boundary element on mesh1 using mesh2 boundary
    el2_arr = [list() for x in range(nprc)]
    el1_arr = [list() for x in range(nprc)]
    fe2o_arr = [list() for x in range(nprc)]
    for i_el in range(fes2.GetNE()):
        attr = fes2.GetAttribute(i_el)
        if bdr != 'all' and not attr in bdr:
            continue
        el1_arr[el2_2_node[i_el]].append(el2_2_el1[i_el])
        el2_arr[el2_2_node[i_el]].append(i_el)
        fe2 = fes2.GetFE(i_el)
        fe2o_arr[el2_2_node[i_el]].append(fe2.GetOrder())

    if USE_PARALLEL:
        el1_arr = alltoall_vector(el1_arr, int)  # transfer to mesh1 owners

    # working for fes1
    # find elemet order on mesh1
    fe1o_arr = [list() for x in range(nprc)]
    i_fe1_arr = [list() for x in range(nprc)]
    rank = 0
    for rank, i_bdrs in enumerate(el1_arr):
        for i_bdr in i_bdrs:
            iface = mesh1.GetBdrElementEdgeIndex(i_bdr)
            transs = mesh1.GetFaceElementTransformations(iface)
            i_el1 = transs.Elem1No
            assert transs.Elem2No == -1, "boundary must be exterior for this operator"
            fe1 = fes1.GetFE(i_el1)
            fe1o_arr[rank].append(fe1.GetOrder())
            i_fe1_arr[rank].append(i_el1)
        rank = rank + 1

    if USE_PARALLEL:
        fe1o_arr = alltoall_vector(fe1o_arr, int)  # transfer to mesh2

    # working for fes2
    locnor_arr = [list() for x in range(nprc)]
    data2_arr = [list() for x in range(nprc)]
    verbose1 = verbose
    for rank, i_el2s in enumerate(el2_arr):
        for i_el2, fe1o in zip(i_el2s, fe1o_arr[rank]):
            eltrans = fes2.GetElementTransformation(i_el2)
            fe2 = fes2.GetFE(i_el2)
            nd2 = fe2.GetDof()
            ir = get_rule(fe1o,
                          fe2,
                          eltrans,
                          orderinc=orderinc,
                          verbose=verbose1)
            verbose1 = False
            shape2.SetSize(nd2)
            data2 = []
            locnors = []
            for jj in range(ir.GetNPoints()):
                ip1 = ir.IntPoint(jj)
                eltrans.SetIntPoint(ip1)
                w = eltrans.Weight() * ip1.weight
                mfem.CalcOrtho(eltrans.Jacobian(), nor)
                nor2 = nor.GetDataArray() / np.linalg.norm(nor.GetDataArray())
                fe2.CalcShape(ip1, shape2)

                if dim2 == 1:
                    d = np.array([ip1.x] + list(eltrans.Transform(ip1)) +
                                 list(nor2))
                    locnors.append(d)
                elif dim2 == 2:
                    d = np.array([ip1.x, ip1.y] +
                                 list(eltrans.Transform(ip1)) + list(nor2))

                    locnors.append(d)
                else:
                    assert False, "boundary mesh must be dim=1 or 2"
                data2.append(w * shape2.GetDataArray().copy())

            #   np.vstack(locnors).shape = (#Npoints, dim2+sdim2*2)
            #   np.vstack(data2).shape = (#Npoints, #NDoF2)
            #print("size here", np.vstack(locnors).shape, np.vstack(data2).shape)

            locnor_arr[rank].append(np.vstack(locnors))
            data2_arr[rank].append(np.vstack(data2))

    if USE_PARALLEL:
        locnor_arr = alltoall_vectorv(locnor_arr, float)  # transfer to mesh1

    ll = dim2 + 2 * sdim2

    vdofs1_arr = [list() for x in range(nprc)]
    data1_arr = [list() for x in range(nprc)]

    # space to compute the coefficient
    MV = [mfem.Vector(sdim1), mfem.DenseMatrix(sdim1, sdim1)]

    max_misalignment = -np.inf
    for rank, i_fe1s in enumerate(i_fe1_arr):
        locnorss = locnor_arr[rank]

        sign_dict = {}

        for k, i_fe1 in enumerate(i_fe1s):
            fe1 = fes1.GetFE(i_fe1)
            nd1 = fe1.GetDof()
            eltrans = fes1.GetElementTransformation(i_fe1)
            doftrans = fes1.GetElementDofTransformation(i_fe1)
            #ctr = eval_element_center(fe1, eltrans)

            locnors2 = locnorss[k]
            shape1.SetSize(nd1, sdim1)
            vdofs1 = fes1.GetElementVDofs(i_fe1)

            dof_sign1 = np.array([
                [1 if vv >= 0 else -1 for vv in vdofs1],
            ])
            vdofs1 = [-1 - x if x < 0 else x for x in vdofs1]

            mat_doftrans = get_inv_doftrans(doftrans, dof_sign1)

            if USE_PARALLEL:
                #  After DofTransformation is introduced we can not use GetGlobalTDofNumber, because
                #  element local DoF could be linked with two TrueDoFs in neighber processes
                #  We construct submatrix of Prolongation to construct element matrix
                #  in TrueDof space

                vv1 = [
                    P1mat.indices[P1mat.indptr[ii]:P1mat.indptr[ii + 1]]
                    for ii in vdofs1
                ]
                vv3 = [
                    P1mat.data[P1mat.indptr[ii]:P1mat.indptr[ii + 1]]
                    for ii in vdofs1
                ]
                ngtof = np.sum([len(x) for x in vv3])
                sub_p = np.zeros((nd1, ngtof))
                k1 = 0
                k2 = 0
                for gtofs, weights in zip(vv1, vv3):
                    for g, w in zip(gtofs, weights):
                        sub_p[k1, k2] = w
                        k2 = k2 + 1
                    k1 = k1 + 1

                vdofs1 = np.hstack(vv1).flatten()
                mat_doftrans = mat_doftrans.dot(sub_p)

            res, misalignment = map_ir(fe1, eltrans, coeff, shape1, dim2,
                                       sdim2, locnors2, dof_sign1,
                                       mat_doftrans, MV)

            vdofs1_arr[rank].append(np.array(vdofs1))
            data1_arr[rank].append(res)

            max_misalignment = np.max([max_misalignment, np.max(misalignment)])
            # res.shape = (#Npoints, #DoF1)

    if USE_PARALLEL:
        vdofs1_arr = alltoall_vectorv(vdofs1_arr, int)  # transfer to mesh2
        if is_complex:
            data1_arr = alltoall_vectorv(data1_arr,
                                         complex)  # transfer to mesh2
        else:
            data1_arr = alltoall_vectorv(data1_arr, float)  # transfer to mesh2
        max_misalignment = np.max(
            MPI.COMM_WORLD.gather(max_misalignment, root=0))
    dprint1("Max misalignment: ", max_misalignment)

    shared_data = []

    for rank, i_el2s in enumerate(el2_arr):
        for k, i_el2 in enumerate(i_el2s):
            vdofs1 = vdofs1_arr[rank][k]

            fe2 = fes2.GetFE(i_el2)
            eltrans2 = fes2.GetElementTransformation(i_el2)
            vdofs2 = fes2.GetElementVDofs(i_el2)
            vdofs2 = [-1 - x if x < 0 else x for x in vdofs2]

            d1 = data1_arr[rank][k]
            d2 = data2_arr[rank][k]

            mm = d2.transpose().dot(d1)

            if USE_PARALLEL:
                # prepare data for not-owned DoFs, which will be shared later
                vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2]
                vdofs22g = [fes2.GetGlobalTDofNumber(ii) for ii in vdofs2]

                kkk = 0
                for v2, v2g in zip(vdofs22, vdofs22g):
                    if v2 < 0:
                        shared_data.append([v2g, mm[kkk, :], vdofs1])
                    kkk = kkk + 1
            else:
                vdofs22 = vdofs2

            for i, ltdof2 in enumerate(vdofs22):
                if ltdof2 < 0:
                    continue
                for j, gtdof1 in enumerate(vdofs1):
                    mat[ltdof2, gtdof1] = mat[ltdof2, gtdof1] + mm[i, j]

    if USE_PARALLEL:
        #nicePrint("shared data", shared_data)
        for source_id in range(nprc):
            data = comm.bcast(shared_data, root=source_id)
            myoffset = fes2.GetMyTDofOffset()
            for v2g, elmat, vdofs1 in data:
                if v2g >= myoffset and v2g < myoffset + mat.shape[0]:
                    i = v2g - myoffset
                    for j, gtdof1 in enumerate(vdofs1):
                        mat[i, gtdof1] = mat[i, gtdof1] + elmat[j]
                    #mat[i, vdofs1] = mat[i, vdofs1] + elmat

    from scipy.sparse import coo_matrix, csr_matrix

    if USE_PARALLEL:
        if is_complex:
            m1 = csr_matrix(mat.real, dtype=float)
            m2 = csr_matrix(mat.imag, dtype=float)
        else:
            m1 = csr_matrix(mat.real, dtype=float)
            m2 = None
        from mfem.common.chypre import CHypreMat

        start_col = fes1.GetMyTDofOffset()
        end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize()
        col_starts = [start_col, end_col, mat.shape[1]]
        M = CHypreMat(m1, m2, col_starts=col_starts)
    else:
        from petram.helper.block_matrix import convert_to_ScipyCoo
        M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype))

    return M
예제 #3
0
def convolve1d(fes1,
               fes2,
               kernel=delta,
               support=None,
               orderinc=5,
               is_complex=False,
               trial_domain='all',
               test_domain='all',
               verbose=False,
               coeff=None):
    '''
    fill linear operator for convolution
    \int phi_test(x) func(x-x') phi_trial(x') dx
    '''
    mat, rstart = get_empty_map(fes2, fes1, is_complex=is_complex)

    eltrans1 = fes1.GetElementTransformation(0)
    ir = get_rule(fes1.GetFE(0), fes2.GetFE(0), eltrans1, orderinc, verbose)

    shape1 = mfem.Vector()
    shape2 = mfem.Vector()

    #nicePrint("shape", mat.shape, fes2.GetNE(), fes1.GetNE())

    # communication strategy
    #   (1) x2 (ir points on test space) is collected in each nodes
    #   (2) x2 is send to other nodes
    #   (3) each nodes compute \int f(x2-x1) phi(x1)
    #   (4) non-zero results of (3) and global index should be send back

    # Step (1, 2)
    if verbose:
        dprint1("Step 1,2")
    x2_arr = []
    i2_arr = []

    ptx = mfem.DenseMatrix(ir.GetNPoints(), 1)

    attrs1 = fes2.GetMesh().GetAttributeArray()
    attrs2 = fes2.GetMesh().GetAttributeArray()

    for i in range(fes2.GetNE()):  # scan test space
        if test_domain != 'all':
            if not attrs1[i] in test_domain: continue
        eltrans = fes2.GetElementTransformation(i)
        eltrans.Transform(ir, ptx)
        x2_arr.append(ptx.GetDataArray().copy())
        i2_arr.append(i)
    if len(i2_arr) > 0:
        ptx_x2 = np.vstack(x2_arr)
        i2_arr = np.hstack(i2_arr)
    else:
        ptx_x2 = np.array([[]])
        i2_arr = np.array([])

    #nicePrint("x2 shape", ptx_x2.shape)
    if USE_PARALLEL:
        ## note: we could implement more advanced alg. to reduce
        ## the amount of data exchange..
        x2_all = comm.allgather(ptx_x2)
        i2_all = comm.allgather(i2_arr)
    else:
        x2_all = [ptx_x2]
        i2_all = [i2_arr]
    #nicePrint("x2_all shape", x2_all.shape)

    if USE_PARALLEL:
        #this is global TrueDoF (offset is not subtracted)
        P = fes1.Dof_TrueDof_Matrix()
        P = ToScipyCoo(P).tocsr()
        VDoFtoGTDoF1 = P.indices
        P = fes2.Dof_TrueDof_Matrix()
        P = ToScipyCoo(P).tocsr()
        VDoFtoGTDoF2 = P.indices

    # Step 3
    if verbose:
        dprint1("Step 3")
    vdofs1_senddata = []
    elmats_senddata = []

    for knode1 in range(len(x2_all)):
        x2_onenode = x2_all[knode1]
        i2_onenode = i2_all[knode1]
        elmats_all = []
        vdofs1_all = []

        # collect vdofs
        for j in range(fes1.GetNE()):
            local_vdofs = fes1.GetElementVDofs(j)
            if USE_PARALLEL:
                subvdofs2 = [VDoFtoGTDoF1[i] for i in local_vdofs]
                vdofs1_all.append(subvdofs2)
            else:
                vdofs1_all.append(local_vdofs)

        for i, x2s in zip(i2_onenode, x2_onenode):  # loop over fes2
            nd2 = len(x2s)
            #nicePrint(x2s)
            elmats = []
            for j in range(fes1.GetNE()):
                if trial_domain != 'all':
                    if not attrs1[j] in trial_domain: continue

                # collect integration
                fe1 = fes1.GetFE(j)
                nd1 = fe1.GetDof()
                shape1.SetSize(nd1)
                eltrans = fes1.GetElementTransformation(j)

                tmp_int = np.zeros(shape1.Size(), dtype=mat.dtype)
                elmat = np.zeros((nd2, nd1), dtype=mat.dtype)

                #if myid == 0: print("fes1 idx", j)

                dataset = []
                for jj in range(ir.GetNPoints()):
                    ip1 = ir.IntPoint(jj)
                    eltrans.SetIntPoint(ip1)
                    x1 = eltrans.Transform(ip1)[0]
                    fe1.CalcShape(ip1, shape1)
                    w = eltrans.Weight() * ip1.weight
                    dataset.append((x1, w, shape1.GetDataArray().copy()))

                has_contribution = False
                for kkk, x2 in enumerate(x2s):
                    tmp_int *= 0.0

                    for x1, w, shape_arr in dataset:
                        if support is not None:
                            s = support((x1 + x2) / 2.0)
                            if np.abs(x1 - x2) > s:
                                continue

                        has_contribution = True
                        #if myid == 0: print("check here", x1, x2)
                        val = kernel(x2 - x1, (x2 + x1) / 2.0, w=w)
                        if coeff is not None:
                            val = val * coeff((x2 + x1) / 2.0)

                        #shape_arr *= w*val
                        tmp_int += shape_arr * w * val
                    elmat[kkk, :] = tmp_int

                if has_contribution:
                    elmats.append((j, elmat))
                #print(elmats)
            if len(elmats) > 0:
                elmats_all.append((i, elmats))

        vdofs1_senddata.append(vdofs1_all)
        elmats_senddata.append(elmats_all)

        # send this information to knodes;
        '''
        if USE_PARALLEL:
            #nicePrint(vdofs1_all)
            #nicePrint("elmats", [len(x) for x in elmats_all])
            if myid == knode1:
                vdofs1_data = comm.gather(vdofs1_all, root=knode1)
                elmats_data = comm.gather(elmats_all, root=knode1)
            else:
                _ = comm.gather(vdofs1_all, root=knode1)
                _ = comm.gather(elmats_all, root=knode1)
        else:
            vdofs1_data = [vdofs1_all,]
            elmats_data = [elmats_all,]
        '''
    if USE_PARALLEL:
        knode1 = 0
        for vdofs1_all, elmats_all in zip(vdofs1_senddata, elmats_senddata):
            if myid == knode1:
                vdofs1_data = comm.gather(vdofs1_all, root=knode1)
                elmats_data = comm.gather(elmats_all, root=knode1)
            else:
                _ = comm.gather(vdofs1_all, root=knode1)
                _ = comm.gather(elmats_all, root=knode1)
            knode1 = knode1 + 1
    else:
        vdofs1_data = vdofs1_senddata
        elmats_data = elmats_senddata

    # Step 4
    if verbose:
        dprint1("Step 4")
    shared_data = []
    mpi_rank = 0
    for vdofs1, elmats_all in zip(vdofs1_data,
                                  elmats_data):  # loop over MPI nodes
        #nicePrint("len elmats", len(elmats_all))
        #for i, elmats in enumerate(elmats_all):  # corresponds to loop over fes2

        if verbose:
            coupling = [len(elmats) for i, elmats in elmats_all]
            nicePrint("Element coupling for rank", mpi_rank)
            nicePrint("   Average :",
                      (0 if len(coupling) == 0 else np.mean(coupling)))
            nicePrint("   Max/Min :",
                      (0 if len(coupling) == 0 else np.max(coupling)),
                      (0 if len(coupling) == 0 else np.min(coupling)))
            mpi_rank += 1

        for i, elmats in elmats_all:  # corresponds to loop over fes2
            vdofs2 = fes2.GetElementVDofs(i)
            fe2 = fes2.GetFE(i)
            nd2 = fe2.GetDof()
            shape2.SetSize(nd2)

            eltrans = fes2.GetElementTransformation(i)

            #for j, elmat in enumerate(elmats):
            for j, elmat in elmats:
                #print(vdofs1[j], elmat.shape)
                #if elmat is None:
                #    continue

                mm = np.zeros((len(vdofs2), len(vdofs1[j])), dtype=float)

                for ii in range(ir.GetNPoints()):
                    ip2 = ir.IntPoint(ii)
                    eltrans.SetIntPoint(ip2)
                    ww = eltrans.Weight() * ip2.weight
                    fe2.CalcShape(ip2, shape2)
                    shape2 *= ww

                    tmp_int = elmat[ii, :]
                    tmp = np.dot(
                        np.atleast_2d(shape2.GetDataArray()).transpose(),
                        np.atleast_2d(tmp_int))
                    mm = mm + tmp
                    #print("check here", myid, mm.shape, tmp.shape)

                # merge contribution to final mat
                if USE_PARALLEL:
                    vdofs22 = [fes2.GetLocalTDofNumber(ii) for ii in vdofs2]
                    vdofs22g = [VDoFtoGTDoF2[ii] for ii in vdofs2]
                    kkk = 0
                    for v2, v2g in zip(vdofs22, vdofs22g):
                        if v2 < 0:
                            shared_data.append([v2g, mm[kkk, :], vdofs1[j]])
                        kkk = kkk + 1

                for k, vv in enumerate(vdofs1[j]):
                    try:
                        if USE_PARALLEL:
                            mmm = mm[np.where(np.array(vdofs22) >= 0)[0], :]
                            vdofs222 = [x for x in vdofs22 if x >= 0]
                        else:
                            vdofs222 = vdofs2
                            mmm = mm
                        #if myid == 1:
                        #    print("check here", vdofs2, vdofs22, vdofs222)
                        #print(mmm[:, [k]])
                        tmp = mat[vdofs222, vv] + mmm[:, [k]]
                        mat[vdofs222, vv] = tmp.flatten()
                    except:
                        import traceback
                        print("error", myid)
                        #print(vdofs1, vdofs22, vdofs222, mmm.shape, k)
                        traceback.print_exc()

    if USE_PARALLEL:
        for source_id in range(nprc):
            data = comm.bcast(shared_data, root=source_id)
            myoffset = fes2.GetMyTDofOffset()
            for v2g, elmat, vdofs1 in data:
                if v2g >= myoffset and v2g < myoffset + mat.shape[0]:
                    i = v2g - myoffset
                    #print("procesising this", myid, i, v2g, elmat, vdofs1)
                    mat[i, vdofs1] = mat[i, vdofs1] + elmat

    from scipy.sparse import coo_matrix, csr_matrix

    if USE_PARALLEL:
        if is_complex:
            m1 = csr_matrix(mat.real, dtype=float)
            m2 = csr_matrix(mat.imag, dtype=float)
        else:
            m1 = csr_matrix(mat.real, dtype=float)
            m2 = None
        from mfem.common.chypre import CHypreMat
        start_col = fes1.GetMyTDofOffset()
        end_col = fes1.GetMyTDofOffset() + fes1.GetTrueVSize()
        col_starts = [start_col, end_col, mat.shape[1]]
        M = CHypreMat(m1, m2, col_starts=col_starts)
        #print("mat", M)
    else:
        from petram.helper.block_matrix import convert_to_ScipyCoo

        M = convert_to_ScipyCoo(coo_matrix(mat, dtype=mat.dtype))

    return M