Esempio n. 1
0
def get_phir(XK, XV, surface, xq, Cells, par_reac, ind_reac):
    """
    It computes the reaction potential.
    To compute this potential we need more terms in the Taylor expansion, that
    is the reason why we need fine parameters (par_reac class) and a different
    array of indices (ind_reac) than ind0.

    Arguments
    ----------
    XK      : array, input for the double layer potential.
    XV      : array, input for the single layer potential.
    surface : class, surface where we are computing the reaction potential.
    xq      : array, it contains the position of the charges.
    Cells   : array, it contains the tree cells.
    par_reac: class, fine parameters related to the surface.
    ind_reac: array, it contains the indices related to the treecode
                     computation.

    Returns
    --------
    phi_reac: array, reaction potential.
    AI_int  : int, counter of the amount of near singular integrals solved.
    """

    N = len(XK)
    AI_int = 0

    # Setup vector
    K = par_reac.K
    tic = time.time()
    w = getWeights(K)
    X_V = numpy.zeros(N * K)
    X_Kx = numpy.zeros(N * K)
    X_Ky = numpy.zeros(N * K)
    X_Kz = numpy.zeros(N * K)
    X_Kc = numpy.zeros(N * K)
    X_Vc = numpy.zeros(N * K)

    for i in range(N * K):
        X_V[i] = XV[i // K] * w[i % K] * surface.area[i // K]
        X_Kx[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 0]
        X_Ky[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 1]
        X_Kz[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 2]
        X_Kc[i] = XK[i // K]
        X_Vc[i] = XV[i // K]

    toc = time.time()

    # P2M
    tic = time.time()
    C = 0
    getMultipole(Cells, C, surface.xj, surface.yj, surface.zj, X_V, X_Kx, X_Ky,
                 X_Kz, ind_reac, par_reac.P, par_reac.NCRIT)
    toc = time.time()
    time_P2M = toc - tic

    # M2M
    tic = time.time()
    for C in reversed(range(1, len(Cells))):
        PC = Cells[C].parent
        upwardSweep(Cells, C, PC, par_reac.P, ind_reac.II, ind_reac.JJ,
                    ind_reac.KK, ind_reac.index, ind_reac.combII,
                    ind_reac.combJJ, ind_reac.combKK, ind_reac.IImii,
                    ind_reac.JJmjj, ind_reac.KKmkk, ind_reac.index_small,
                    ind_reac.index_ptr)
    toc = time.time()
    time_M2M = toc - tic

    # Evaluation
    IorE = 0  # This evaluation is on charge points, no self-operator
    # 0 means it doesn't matter if it is internal or external.
    AI_int = 0
    phi_reac = numpy.zeros(len(xq))
    time_P2P = 0.
    time_M2P = 0.
    for i in range(len(xq)):
        CJ = 0
        Kval = 0.
        Vval = 0.
        source = []
        Kval, Vval, source, time_M2P = M2P_nonvec(Cells, CJ, xq[i], Kval, Vval,
                                                  ind_reac.index_large,
                                                  par_reac, source, time_M2P)
        Kval, Vval, AI_int, time_P2P = P2P_nonvec(Cells, surface, X_V, X_Kx,
                                                  X_Ky, X_Kz, X_Kc, X_Vc,
                                                  xq[i], Kval, Vval, IorE,
                                                  par_reac, w, source, AI_int,
                                                  time_P2P)
        phi_reac[i] = (-Kval + Vval) / (4 * pi)

    return phi_reac, AI_int
Esempio n. 2
0
def get_phir_gpu(XK, XV, surface, field, par_reac, kernel):
    """
    It computes the reaction potential on the GPU  and it brings the data
    to the cpu.

    Arguments
    ----------
    XK      : array, input for the double layer potential.
    XV      : array, input for the single layer potential.
    surface : class, surface where we are computing the reaction potential.
    field   : class, information about the different regions in the molecule.
    par_reac: class, fine parameters related to the surface.

    Returns
    --------
    phir_cpu: array, reaction potential brought from the GPU to the cpu.
    AI_int  : int, counter of the amount of near singular integrals solved.
    """

    REAL = par_reac.REAL
    Nq = len(field.xq)
    N = len(XK)
    AI_int = 0

    # Setup vector
    K = par_reac.K
    tic = time.time()
    w = getWeights(K)
    X_V = numpy.zeros(N * K)
    X_Kx = numpy.zeros(N * K)
    X_Ky = numpy.zeros(N * K)
    X_Kz = numpy.zeros(N * K)
    X_Kc = numpy.zeros(N * K)
    X_Vc = numpy.zeros(N * K)

    for i in range(N * K):
        X_V[i] = XV[i // K] * w[i % K] * surface.area[i // K]
        X_Kx[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 0]
        X_Ky[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 1]
        X_Kz[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 2]
        X_Kc[i] = XK[i // K]
        X_Vc[i] = XV[i // K]

    toc = time.time()
    sort = surface.sortSource
    phir = cuda.to_device(numpy.zeros(Nq, dtype=REAL))
    m_gpu = cuda.to_device(X_V[sort].astype(REAL))
    mx_gpu = cuda.to_device(X_Kx[sort].astype(REAL))
    my_gpu = cuda.to_device(X_Ky[sort].astype(REAL))
    mz_gpu = cuda.to_device(X_Kz[sort].astype(REAL))
    mKc_gpu = cuda.to_device(X_Kc[sort].astype(REAL))
    mVc_gpu = cuda.to_device(X_Vc[sort].astype(REAL))
    AI_int_gpu = cuda.to_device(numpy.zeros(Nq, dtype=numpy.int32))
    xkDev = cuda.to_device(surface.xk.astype(REAL))
    wkDev = cuda.to_device(surface.wk.astype(REAL))

    get_phir = kernel.get_function("get_phir")
    GSZ = int(numpy.ceil(float(Nq) / par_reac.BSZ))

    get_phir(phir,
             field.xq_gpu,
             field.yq_gpu,
             field.zq_gpu,
             m_gpu,
             mx_gpu,
             my_gpu,
             mz_gpu,
             mKc_gpu,
             mVc_gpu,
             surface.xjDev,
             surface.yjDev,
             surface.zjDev,
             surface.AreaDev,
             surface.kDev,
             surface.vertexDev,
             numpy.int32(len(surface.xj)),
             numpy.int32(Nq),
             numpy.int32(par_reac.K),
             xkDev,
             wkDev,
             REAL(par_reac.threshold),
             AI_int_gpu,
             numpy.int32(len(surface.xk)),
             surface.XskDev,
             surface.WskDev,
             block=(par_reac.BSZ, 1, 1),
             grid=(GSZ, 1))

    AI_aux = numpy.zeros(Nq, dtype=numpy.int32)
    AI_aux = cuda.from_device(AI_int_gpu, Nq, dtype=numpy.int32)
    AI_int = numpy.sum(AI_aux)

    phir_cpu = numpy.zeros(Nq, dtype=REAL)
    phir_cpu = cuda.from_device(phir, Nq, dtype=REAL)

    return phir_cpu, AI_int
Esempio n. 3
0
def project_Kt(XKt, LorY, surfSrc, surfTar, Kt_diag, self, param, ind0, timing,
               kernel):
    """
    It computes the adjoint double layer potential.

    Arguments
    ----------
    XKt    : array, input for the adjoint double layer potential.
    LorY   : int, Laplace (1) or Yukawa (2).
    surfSrc: class, source surface, the one that contains the gauss points.
    surfTar: class, target surface, the one that contains the collocation points.
    Kt_diag: array, diagonal elements of the adjoint double layer integral
                    operator.
    self   : int, position in the surface array of the source surface.
    param  : class, parameters related to the surface.
    ind0   : array, it contains the indices related to the treecode computation.
    timing : class, it contains timing information for different parts of the
                    code.
    kernel : pycuda source module.

    Returns
    --------
    Kt_lyr: array, adjoint double layer potential.
    """

    if param.GPU == 1:
        tic = cuda.Event()
        toc = cuda.Event()
    else:
        tic = Event()
        toc = Event()

    REAL = param.REAL
    Ns = len(surfSrc.triangle)

    tic.record()
    K = param.K
    w = getWeights(K)
    X_Kt = numpy.zeros(Ns * K)
    X_Ktc = numpy.zeros(Ns * K)

    NsK = numpy.arange(Ns * K)
    X_Kt[:] = XKt[NsK // K] * w[NsK % K] * surfSrc.area[NsK // K]
    X_Ktc[:] = XKt[NsK // K]

    toc.record()
    toc.synchronize()
    timing.time_mass += tic.time_till(toc) * 1e-3

    tic.record()
    C = 0
    X_aux = numpy.zeros(Ns * K)
    getMultipole(surfSrc.tree, C, surfSrc.xj, surfSrc.yj, surfSrc.zj, X_Kt,
                 X_aux, X_aux, X_aux, ind0, param.P, param.NCRIT)
    toc.record()
    toc.synchronize()
    timing.time_P2M += tic.time_till(toc) * 1e-3

    tic.record()
    for C in reversed(range(1, len(surfSrc.tree))):
        PC = surfSrc.tree[C].parent
        upwardSweep(surfSrc.tree, C, PC, param.P, ind0.II, ind0.JJ, ind0.KK,
                    ind0.index, ind0.combII, ind0.combJJ, ind0.combKK,
                    ind0.IImii, ind0.JJmjj, ind0.KKmkk, ind0.index_small,
                    ind0.index_ptr)
    toc.record()
    toc.synchronize()
    timing.time_M2M += tic.time_till(toc) * 1e-3

    tic.record()
    X_Kt = X_Kt[surfSrc.sortSource]
    X_Ktc = X_Ktc[surfSrc.sortSource]
    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    param.Nround = len(surfTar.twig) * param.NCRIT
    Ktx_aux = numpy.zeros(param.Nround)
    Kty_aux = numpy.zeros(param.Nround)
    Ktz_aux = numpy.zeros(param.Nround)

    ### CPU code
    if param.GPU == 0:
        if surfTar.offsetMlt[self, len(surfTar.twig)] > 0:
            Ktx_aux, Kty_aux, Ktz_aux = M2PKt_sort(surfSrc, surfTar, Ktx_aux,
                                                   Kty_aux, Ktz_aux, self,
                                                   ind0.index_large, param,
                                                   LorY, timing)

        Ktx_aux, Kty_aux, Ktz_aux = P2PKt_sort(surfSrc, surfTar, X_Kt, X_Ktc,
                                               Ktx_aux, Kty_aux, Ktz_aux, self,
                                               LorY, w, param, timing)

    ### GPU code
    elif param.GPU == 1:
        Ktx_gpu = cuda.to_device(Ktx_aux.astype(REAL))
        Kty_gpu = cuda.to_device(Kty_aux.astype(REAL))
        Ktz_gpu = cuda.to_device(Ktz_aux.astype(REAL))

        if surfTar.offsetMlt[self, len(surfTar.twig)] > 0:
            Ktx_gpu, Kty_gpu, Ktz_gpu = M2PKt_gpu(surfSrc, surfTar, Ktx_gpu,
                                                  Kty_gpu, Ktz_gpu, self, ind0,
                                                  param, LorY, timing, kernel)

        Ktx_gpu, Kty_gpu, Ktz_gpu = P2PKt_gpu(surfSrc, surfTar, X_Kt, X_Ktc,
                                              Ktx_gpu, Kty_gpu, Ktz_gpu, self,
                                              LorY, w, param, timing, kernel)

        tic.record()
        Ktx_aux = cuda.from_device(Ktx_gpu, len(Ktx_aux), dtype=REAL)
        Kty_aux = cuda.from_device(Kty_gpu, len(Kty_aux), dtype=REAL)
        Ktz_aux = cuda.from_device(Ktz_gpu, len(Ktz_aux), dtype=REAL)
        toc.record()
        toc.synchronize()
        timing.time_trans += tic.time_till(toc) * 1e-3

    tic.record()
    Kt_lyr = (Ktx_aux[surfTar.unsort] * surfTar.normal[:, 0] +
              Kty_aux[surfTar.unsort] * surfTar.normal[:, 1] +
              Ktz_aux[surfTar.unsort] * surfTar.normal[:, 2])

    if abs(Kt_diag) > 1e-12:  # if same surface
        Kt_lyr += Kt_diag * XKt

    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    return Kt_lyr
Esempio n. 4
0
def project(XK, XV, LorY, surfSrc, surfTar, K_diag, V_diag, IorE, self, param,
            ind0, timing, kernel):
    """
    It computes the single and double layer potentials.

    Arguments
    ----------
    XK     : array, input for the double layer potential.
    XV     : array, input for the single layer potential.
    LorY   : int, Laplace (1) or Yukawa (2).
    surfSrc: class, source surface, the one that contains the gauss points.
    surfTar: class, target surface, the one that contains the collocation
                    points.
    K_diag : array, diagonal elements of the double layer integral operator.
    V_diag : array, diagonal elements of the single layer integral operator.
    IorE   : int, internal (1) or external (2).
    self   : int, position in the surface array of the source surface.
    param  : class, parameters related to the surface.
    ind0   : array, it contains the indices related to the treecode computation.
    timing : class, it contains timing information for different parts of the
                    code.
    kernel : pycuda source module.

    Returns
    --------
    K_lyr  : array, double layer potential.
    V_lyr  : array, single layer potential.
    """

    if param.GPU == 1:
        tic = cuda.Event()
        toc = cuda.Event()
    else:
        tic = Event()
        toc = Event()

    REAL = param.REAL
    Ns = len(surfSrc.triangle)
    L = numpy.sqrt(2 * surfSrc.area)  # Representative length

    tic.record()
    K = param.K
    w = getWeights(K)
    X_V = numpy.zeros(Ns * K)
    X_Kx = numpy.zeros(Ns * K)
    X_Ky = numpy.zeros(Ns * K)
    X_Kz = numpy.zeros(Ns * K)
    X_Kc = numpy.zeros(Ns * K)
    X_Vc = numpy.zeros(Ns * K)

    NsK = numpy.arange(Ns * K)
    X_V[:] = XV[NsK // K] * w[NsK % K] * surfSrc.area[NsK // K]
    X_Kx[:] = XK[NsK // K] * w[NsK % K] * surfSrc.area[
        NsK // K] * surfSrc.normal[NsK // K, 0]
    X_Ky[:] = XK[NsK // K] * w[NsK % K] * surfSrc.area[
        NsK // K] * surfSrc.normal[NsK // K, 1]
    X_Kz[:] = XK[NsK // K] * w[NsK % K] * surfSrc.area[
        NsK // K] * surfSrc.normal[NsK // K, 2]
    X_Kc[:] = XK[NsK // K]
    X_Vc[:] = XV[NsK // K]

    toc.record()
    toc.synchronize()
    timing.time_mass += tic.time_till(toc) * 1e-3

    tic.record()
    C = 0
    getMultipole(surfSrc.tree, C, surfSrc.xj, surfSrc.yj, surfSrc.zj, X_V,
                 X_Kx, X_Ky, X_Kz, ind0, param.P, param.NCRIT)
    toc.record()
    toc.synchronize()
    timing.time_P2M += tic.time_till(toc) * 1e-3

    tic.record()
    for C in reversed(range(1, len(surfSrc.tree))):
        PC = surfSrc.tree[C].parent
        upwardSweep(surfSrc.tree, C, PC, param.P, ind0.II, ind0.JJ, ind0.KK,
                    ind0.index, ind0.combII, ind0.combJJ, ind0.combKK,
                    ind0.IImii, ind0.JJmjj, ind0.KKmkk, ind0.index_small,
                    ind0.index_ptr)
    toc.record()
    toc.synchronize()
    timing.time_M2M += tic.time_till(toc) * 1e-3

    tic.record()
    X_V = X_V[surfSrc.sortSource]
    X_Kx = X_Kx[surfSrc.sortSource]
    X_Ky = X_Ky[surfSrc.sortSource]
    X_Kz = X_Kz[surfSrc.sortSource]
    X_Kc = X_Kc[surfSrc.sortSource]
    X_Vc = X_Vc[surfSrc.sortSource]
    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    param.Nround = len(surfTar.twig) * param.NCRIT
    K_aux = numpy.zeros(param.Nround)
    V_aux = numpy.zeros(param.Nround)

    ### CPU code
    if param.GPU == 0:

        if surfTar.offsetMlt[self, len(surfTar.twig)] > 0:
            K_aux, V_aux = M2P_sort(surfSrc, surfTar, K_aux, V_aux, self,
                                    ind0.index_large, param, LorY, timing)

        K_aux, V_aux = P2P_sort(surfSrc, surfTar, X_V, X_Kx, X_Ky, X_Kz, X_Kc,
                                X_Vc, K_aux, V_aux, self, LorY, K_diag, V_diag,
                                IorE, L, w, param, timing)

    ### GPU code
    elif param.GPU == 1:
        K_gpu = cuda.to_device(K_aux.astype(REAL))
        V_gpu = cuda.to_device(V_aux.astype(REAL))

        if surfTar.offsetMlt[self, len(surfTar.twig)] > 0:
            K_gpu, V_gpu = M2P_gpu(surfSrc, surfTar, K_gpu, V_gpu, self, ind0,
                                   param, LorY, timing, kernel)

        K_gpu, V_gpu = P2P_gpu(surfSrc, surfTar, X_V, X_Kx, X_Ky, X_Kz, X_Kc,
                               X_Vc, K_gpu, V_gpu, self, LorY, K_diag, IorE, L,
                               w, param, timing, kernel)

        tic.record()
        K_aux = cuda.from_device(K_gpu, len(K_aux), dtype=REAL)
        V_aux = cuda.from_device(V_gpu, len(V_aux), dtype=REAL)
        toc.record()
        toc.synchronize()
        timing.time_trans += tic.time_till(toc) * 1e-3

    tic.record()
    K_lyr = K_aux[surfTar.unsort]
    V_lyr = V_aux[surfTar.unsort]
    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    return K_lyr, V_lyr
Esempio n. 5
0
def get_phir(XK, XV, surface, xq, Cells, par_reac, ind_reac):
    """
    It computes the reaction potential.
    To compute this potential we need more terms in the Taylor expansion, that
    is the reason why we need fine parameters (par_reac class) and a different
    array of indices (ind_reac) than ind0.

    Arguments
    ----------
    XK      : array, input for the double layer potential.
    XV      : array, input for the single layer potential.
    surface : class, surface where we are computing the reaction potential.
    xq      : array, it contains the position of the charges.
    Cells   : array, it contains the tree cells.
    par_reac: class, fine parameters related to the surface.
    ind_reac: array, it contains the indices related to the treecode
                     computation.

    Returns
    --------
    phi_reac: array, reaction potential.
    AI_int  : int, counter of the amount of near singular integrals solved.
    """

    N = len(XK)
    AI_int = 0

    # Setup vector
    K = par_reac.K
    tic = time.time()
    w = getWeights(K)
    X_V = numpy.zeros(N * K)
    X_Kx = numpy.zeros(N * K)
    X_Ky = numpy.zeros(N * K)
    X_Kz = numpy.zeros(N * K)
    X_Kc = numpy.zeros(N * K)
    X_Vc = numpy.zeros(N * K)

    for i in range(N * K):
        X_V[i] = XV[i // K] * w[i % K] * surface.area[i // K]
        X_Kx[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 0]
        X_Ky[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 1]
        X_Kz[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 2]
        X_Kc[i] = XK[i // K]
        X_Vc[i] = XV[i // K]

    toc = time.time()

    # P2M
    tic = time.time()
    C = 0
    getMultipole(Cells, C, surface.xj, surface.yj, surface.zj, X_V, X_Kx, X_Ky,
                 X_Kz, ind_reac, par_reac.P, par_reac.NCRIT)
    toc = time.time()
    time_P2M = toc - tic

    # M2M
    tic = time.time()
    for C in reversed(range(1, len(Cells))):
        PC = Cells[C].parent
        upwardSweep(Cells, C, PC, par_reac.P, ind_reac.II, ind_reac.JJ,
                    ind_reac.KK, ind_reac.index, ind_reac.combII,
                    ind_reac.combJJ, ind_reac.combKK, ind_reac.IImii,
                    ind_reac.JJmjj, ind_reac.KKmkk, ind_reac.index_small,
                    ind_reac.index_ptr)
    toc = time.time()
    time_M2M = toc - tic

    # Evaluation
    IorE = 0  # This evaluation is on charge points, no self-operator
    # 0 means it doesn't matter if it is internal or external.
    AI_int = 0
    phi_reac = numpy.zeros(len(xq))
    time_P2P = 0.
    time_M2P = 0.
    for i in range(len(xq)):
        CJ = 0
        Kval = 0.
        Vval = 0.
        source = []
        Kval, Vval, source, time_M2P = M2P_nonvec(Cells, CJ, xq[i], Kval, Vval,
                                                  ind_reac.index_large,
                                                  par_reac, source, time_M2P)
        Kval, Vval, AI_int, time_P2P = P2P_nonvec(
            Cells, surface, X_V, X_Kx, X_Ky, X_Kz, X_Kc, X_Vc, xq[i], Kval,
            Vval, IorE, par_reac, w, source, AI_int, time_P2P)
        phi_reac[i] = (-Kval + Vval) / (4 * pi)

    return phi_reac, AI_int
Esempio n. 6
0
def get_phir_gpu(XK, XV, surface, field, par_reac, kernel):
    """
    It computes the reaction potential on the GPU  and it brings the data
    to the cpu.

    Arguments
    ----------
    XK      : array, input for the double layer potential.
    XV      : array, input for the single layer potential.
    surface : class, surface where we are computing the reaction potential.
    field   : class, information about the different regions in the molecule.
    par_reac: class, fine parameters related to the surface.

    Returns
    --------
    phir_cpu: array, reaction potential brought from the GPU to the cpu.
    AI_int  : int, counter of the amount of near singular integrals solved.
    """

    REAL = par_reac.REAL
    Nq = len(field.xq)
    N = len(XK)
    AI_int = 0

    # Setup vector
    K = par_reac.K
    tic = time.time()
    w = getWeights(K)
    X_V = numpy.zeros(N * K)
    X_Kx = numpy.zeros(N * K)
    X_Ky = numpy.zeros(N * K)
    X_Kz = numpy.zeros(N * K)
    X_Kc = numpy.zeros(N * K)
    X_Vc = numpy.zeros(N * K)

    for i in range(N * K):
        X_V[i] = XV[i // K] * w[i % K] * surface.area[i // K]
        X_Kx[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 0]
        X_Ky[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 1]
        X_Kz[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 2]
        X_Kc[i] = XK[i // K]
        X_Vc[i] = XV[i // K]

    toc = time.time()
    sort = surface.sortSource
    phir = cuda.to_device(numpy.zeros(Nq, dtype=REAL))
    m_gpu = cuda.to_device(X_V[sort].astype(REAL))
    mx_gpu = cuda.to_device(X_Kx[sort].astype(REAL))
    my_gpu = cuda.to_device(X_Ky[sort].astype(REAL))
    mz_gpu = cuda.to_device(X_Kz[sort].astype(REAL))
    mKc_gpu = cuda.to_device(X_Kc[sort].astype(REAL))
    mVc_gpu = cuda.to_device(X_Vc[sort].astype(REAL))
    AI_int_gpu = cuda.to_device(numpy.zeros(Nq, dtype=numpy.int32))
    xkDev = cuda.to_device(surface.xk.astype(REAL))
    wkDev = cuda.to_device(surface.wk.astype(REAL))

    get_phir = kernel.get_function("get_phir")
    GSZ = int(numpy.ceil(float(Nq) / par_reac.BSZ))

    get_phir(phir,
             field.xq_gpu,
             field.yq_gpu,
             field.zq_gpu,
             m_gpu,
             mx_gpu,
             my_gpu,
             mz_gpu,
             mKc_gpu,
             mVc_gpu,
             surface.xjDev,
             surface.yjDev,
             surface.zjDev,
             surface.AreaDev,
             surface.kDev,
             surface.vertexDev,
             numpy.int32(len(surface.xj)),
             numpy.int32(Nq),
             numpy.int32(par_reac.K),
             xkDev,
             wkDev,
             REAL(par_reac.threshold),
             AI_int_gpu,
             numpy.int32(len(surface.xk)),
             surface.XskDev,
             surface.WskDev,
             block=(par_reac.BSZ, 1, 1),
             grid=(GSZ, 1))

    AI_aux = numpy.zeros(Nq, dtype=numpy.int32)
    AI_aux = cuda.from_device(AI_int_gpu, Nq, dtype=numpy.int32)
    AI_int = numpy.sum(AI_aux)

    phir_cpu = numpy.zeros(Nq, dtype=REAL)
    phir_cpu = cuda.from_device(phir, Nq, dtype=REAL)

    return phir_cpu, AI_int
Esempio n. 7
0
def project(XK, XV, LorY, surfSrc, surfTar, K_diag, V_diag, IorE, self, param,
            ind0, timing, kernel):
    """
    It computes the single and double layer potentials.

    Arguments
    ----------
    XK     : array, input for the double layer potential.
    XV     : array, input for the single layer potential.
    LorY   : int, Laplace (1) or Yukawa (2).
    surfSrc: class, source surface, the one that contains the gauss points.
    surfTar: class, target surface, the one that contains the collocation
                    points.
    K_diag : array, diagonal elements of the double layer integral operator.
    V_diag : array, diagonal elements of the single layer integral operator.
    IorE   : int, internal (1) or external (2).
    self   : int, position in the surface array of the source surface.
    param  : class, parameters related to the surface.
    ind0   : array, it contains the indices related to the treecode computation.
    timing : class, it contains timing information for different parts of the
                    code.
    kernel : pycuda source module.

    Returns
    --------
    K_lyr  : array, double layer potential.
    V_lyr  : array, single layer potential.
    """

    if param.GPU == 1:
        tic = cuda.Event()
        toc = cuda.Event()
    else:
        tic = Event()
        toc = Event()

    REAL = param.REAL
    Ns = len(surfSrc.triangle)
    L = numpy.sqrt(2 * surfSrc.area)  # Representative length

    tic.record()
    K = param.K
    w = getWeights(K)
    X_V = numpy.zeros(Ns * K)
    X_Kx = numpy.zeros(Ns * K)
    X_Ky = numpy.zeros(Ns * K)
    X_Kz = numpy.zeros(Ns * K)
    X_Kc = numpy.zeros(Ns * K)
    X_Vc = numpy.zeros(Ns * K)

    NsK = numpy.arange(Ns * K)
    X_V[:] = XV[NsK // K] * w[NsK % K] * surfSrc.area[NsK // K]
    X_Kx[:] = XK[NsK // K] * w[NsK % K] * surfSrc.area[
        NsK // K] * surfSrc.normal[NsK // K, 0]
    X_Ky[:] = XK[NsK // K] * w[NsK % K] * surfSrc.area[
        NsK // K] * surfSrc.normal[NsK // K, 1]
    X_Kz[:] = XK[NsK // K] * w[NsK % K] * surfSrc.area[
        NsK // K] * surfSrc.normal[NsK // K, 2]
    X_Kc[:] = XK[NsK // K]
    X_Vc[:] = XV[NsK // K]

    toc.record()
    toc.synchronize()
    timing.time_mass += tic.time_till(toc) * 1e-3

    tic.record()
    C = 0
    getMultipole(surfSrc.tree, C, surfSrc.xj, surfSrc.yj, surfSrc.zj, X_V,
                 X_Kx, X_Ky, X_Kz, ind0, param.P, param.NCRIT)
    toc.record()
    toc.synchronize()
    timing.time_P2M += tic.time_till(toc) * 1e-3

    tic.record()
    for C in reversed(range(1, len(surfSrc.tree))):
        PC = surfSrc.tree[C].parent
        upwardSweep(surfSrc.tree, C, PC, param.P, ind0.II, ind0.JJ, ind0.KK,
                    ind0.index, ind0.combII, ind0.combJJ, ind0.combKK,
                    ind0.IImii, ind0.JJmjj, ind0.KKmkk, ind0.index_small,
                    ind0.index_ptr)
    toc.record()
    toc.synchronize()
    timing.time_M2M += tic.time_till(toc) * 1e-3

    tic.record()
    X_V = X_V[surfSrc.sortSource]
    X_Kx = X_Kx[surfSrc.sortSource]
    X_Ky = X_Ky[surfSrc.sortSource]
    X_Kz = X_Kz[surfSrc.sortSource]
    X_Kc = X_Kc[surfSrc.sortSource]
    X_Vc = X_Vc[surfSrc.sortSource]
    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    param.Nround = len(surfTar.twig) * param.NCRIT
    K_aux = numpy.zeros(param.Nround)
    V_aux = numpy.zeros(param.Nround)

    ### CPU code
    if param.GPU == 0:
        K_aux, V_aux = M2P_sort(surfSrc, surfTar, K_aux, V_aux, self,
                                ind0.index_large, param, LorY, timing)

        K_aux, V_aux = P2P_sort(surfSrc, surfTar, X_V, X_Kx, X_Ky, X_Kz, X_Kc,
                                X_Vc, K_aux, V_aux, self, LorY, K_diag, V_diag,
                                IorE, L, w, param, timing)

    ### GPU code
    elif param.GPU == 1:
        K_gpu = cuda.to_device(K_aux.astype(REAL))
        V_gpu = cuda.to_device(V_aux.astype(REAL))

        if surfTar.offsetMlt[self, len(surfTar.twig)] > 0:
            K_gpu, V_gpu = M2P_gpu(surfSrc, surfTar, K_gpu, V_gpu, self, ind0,
                                   param, LorY, timing, kernel)

        K_gpu, V_gpu = P2P_gpu(surfSrc, surfTar, X_V, X_Kx, X_Ky, X_Kz, X_Kc,
                               X_Vc, K_gpu, V_gpu, self, LorY, K_diag, IorE, L,
                               w, param, timing, kernel)

        tic.record()
        K_aux = cuda.from_device(K_gpu, len(K_aux), dtype=REAL)
        V_aux = cuda.from_device(V_gpu, len(V_aux), dtype=REAL)
        toc.record()
        toc.synchronize()
        timing.time_trans += tic.time_till(toc) * 1e-3

    tic.record()
    K_lyr = K_aux[surfTar.unsort]
    V_lyr = V_aux[surfTar.unsort]
    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    return K_lyr, V_lyr
Esempio n. 8
0
def project_Kt(XKt, LorY, surfSrc, surfTar, Kt_diag, self, param, ind0, timing,
               kernel):
    """
    It computes the adjoint double layer potential.

    Arguments
    ----------
    XKt    : array, input for the adjoint double layer potential.
    LorY   : int, Laplace (1) or Yukawa (2).
    surfSrc: class, source surface, the one that contains the gauss points.
    surfTar: class, target surface, the one that contains the collocation points.
    Kt_diag: array, diagonal elements of the adjoint double layer integral
                    operator.
    self   : int, position in the surface array of the source surface.
    param  : class, parameters related to the surface.
    ind0   : array, it contains the indices related to the treecode computation.
    timing : class, it contains timing information for different parts of the
                    code.
    kernel : pycuda source module.

    Returns
    --------
    Kt_lyr: array, adjoint double layer potential.
    """

    if param.GPU == 1:
        tic = cuda.Event()
        toc = cuda.Event()
    else:
        tic = Event()
        toc = Event()

    REAL = param.REAL
    Ns = len(surfSrc.triangle)

    tic.record()
    K = param.K
    w = getWeights(K)
    X_Kt = numpy.zeros(Ns * K)
    X_Ktc = numpy.zeros(Ns * K)

    NsK = numpy.arange(Ns * K)
    X_Kt[:] = XKt[NsK // K] * w[NsK % K] * surfSrc.area[NsK // K]
    X_Ktc[:] = XKt[NsK // K]

    toc.record()
    toc.synchronize()
    timing.time_mass += tic.time_till(toc) * 1e-3

    tic.record()
    C = 0
    X_aux = numpy.zeros(Ns * K)
    getMultipole(surfSrc.tree, C, surfSrc.xj, surfSrc.yj, surfSrc.zj, X_Kt,
                 X_aux, X_aux, X_aux, ind0, param.P, param.NCRIT)
    toc.record()
    toc.synchronize()
    timing.time_P2M += tic.time_till(toc) * 1e-3

    tic.record()
    for C in reversed(range(1, len(surfSrc.tree))):
        PC = surfSrc.tree[C].parent
        upwardSweep(surfSrc.tree, C, PC, param.P, ind0.II, ind0.JJ, ind0.KK,
                    ind0.index, ind0.combII, ind0.combJJ, ind0.combKK,
                    ind0.IImii, ind0.JJmjj, ind0.KKmkk, ind0.index_small,
                    ind0.index_ptr)
    toc.record()
    toc.synchronize()
    timing.time_M2M += tic.time_till(toc) * 1e-3

    tic.record()
    X_Kt = X_Kt[surfSrc.sortSource]
    X_Ktc = X_Ktc[surfSrc.sortSource]
    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    param.Nround = len(surfTar.twig) * param.NCRIT
    Ktx_aux = numpy.zeros(param.Nround)
    Kty_aux = numpy.zeros(param.Nround)
    Ktz_aux = numpy.zeros(param.Nround)

    ### CPU code
    if param.GPU == 0:
        if surfTar.offsetMlt[self, len(surfTar.twig)] > 0:
            Ktx_aux, Kty_aux, Ktz_aux = M2PKt_sort(
                surfSrc, surfTar, Ktx_aux, Kty_aux, Ktz_aux, self,
                ind0.index_large, param, LorY, timing)

        Ktx_aux, Kty_aux, Ktz_aux = P2PKt_sort(surfSrc, surfTar, X_Kt, X_Ktc,
                                               Ktx_aux, Kty_aux, Ktz_aux, self,
                                               LorY, w, param, timing)

    ### GPU code
    elif param.GPU == 1:
        Ktx_gpu = cuda.to_device(Ktx_aux.astype(REAL))
        Kty_gpu = cuda.to_device(Kty_aux.astype(REAL))
        Ktz_gpu = cuda.to_device(Ktz_aux.astype(REAL))

        if surfTar.offsetMlt[self, len(surfTar.twig)] > 0:
            Ktx_gpu, Kty_gpu, Ktz_gpu = M2PKt_gpu(surfSrc, surfTar, Ktx_gpu,
                                                  Kty_gpu, Ktz_gpu, self, ind0,
                                                  param, LorY, timing, kernel)

        Ktx_gpu, Kty_gpu, Ktz_gpu = P2PKt_gpu(surfSrc, surfTar, X_Kt, X_Ktc,
                                              Ktx_gpu, Kty_gpu, Ktz_gpu, self,
                                              LorY, w, param, timing, kernel)

        tic.record()
        Ktx_aux = cuda.from_device(Ktx_gpu, len(Ktx_aux), dtype=REAL)
        Kty_aux = cuda.from_device(Kty_gpu, len(Kty_aux), dtype=REAL)
        Ktz_aux = cuda.from_device(Ktz_gpu, len(Ktz_aux), dtype=REAL)
        toc.record()
        toc.synchronize()
        timing.time_trans += tic.time_till(toc) * 1e-3

    tic.record()
    Kt_lyr = (Ktx_aux[surfTar.unsort]*surfTar.normal[:,0] +
              Kty_aux[surfTar.unsort]*surfTar.normal[:,1] +
              Ktz_aux[surfTar.unsort]*surfTar.normal[:,2])

    if abs(Kt_diag) > 1e-12:  # if same surface
        Kt_lyr += Kt_diag * XKt

    toc.record()
    toc.synchronize()
    timing.time_sort += tic.time_till(toc) * 1e-3

    return Kt_lyr
Esempio n. 9
0
def get_dphirdr_gpu(XK, XV, surface, field, par_reac, kernel):

    REAL = par_reac.REAL
    Nq = len(field.xq)
    N = len(XK)
    MV = numpy.zeros(len(XK))
    L = numpy.sqrt(2 * surface.area)  # Representative length
    AI_int = 0

    # Setup vector
    K = par_reac.K
    tic = time.time()
    w = getWeights(K)
    X_V = numpy.zeros(N * K)
    X_Kx = numpy.zeros(N * K)
    X_Ky = numpy.zeros(N * K)
    X_Kz = numpy.zeros(N * K)
    X_Kc = numpy.zeros(N * K)
    X_Vc = numpy.zeros(N * K)

    for i in range(N * K):
        X_V[i] = XV[i // K] * w[i % K] * surface.area[i // K]
        X_Kx[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 0]
        X_Ky[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 1]
        X_Kz[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 2]
        X_Kc[i] = XK[i // K]
        X_Vc[i] = XV[i // K]

    toc = time.time()
    time_set = toc - tic
    sort = surface.sortSource
    dphir_x = cuda.to_device(numpy.zeros(Nq, dtype=REAL))
    dphir_y = cuda.to_device(numpy.zeros(Nq, dtype=REAL))
    dphir_z = cuda.to_device(numpy.zeros(Nq, dtype=REAL))
    m_gpu = cuda.to_device(X_V[sort].astype(REAL))
    mx_gpu = cuda.to_device(X_Kx[sort].astype(REAL))
    my_gpu = cuda.to_device(X_Ky[sort].astype(REAL))
    mz_gpu = cuda.to_device(X_Kz[sort].astype(REAL))
    mKc_gpu = cuda.to_device(X_Kc[sort].astype(REAL))
    mVc_gpu = cuda.to_device(X_Vc[sort].astype(REAL))
    AI_int_gpu = cuda.to_device(numpy.zeros(Nq, dtype=numpy.int32))
    xkDev = cuda.to_device(surface.xk.astype(REAL))
    wkDev = cuda.to_device(surface.wk.astype(REAL))

    get_dphirdr = kernel.get_function("get_dphirdr")
    GSZ = int(numpy.ceil(float(Nq) / par_reac.BSZ))

    get_dphirdr(dphir_x,
                dphir_y,
                dphir_z,
                field.xq_gpu,
                field.yq_gpu,
                field.zq_gpu,
                m_gpu,
                mx_gpu,
                my_gpu,
                mz_gpu,
                mKc_gpu,
                mVc_gpu,
                surface.xjDev,
                surface.yjDev,
                surface.zjDev,
                surface.AreaDev,
                surface.kDev,
                surface.vertexDev,
                numpy.int32(len(surface.xj)),
                numpy.int32(Nq),
                numpy.int32(par_reac.K),
                xkDev,
                wkDev,
                REAL(par_reac.threshold),
                AI_int_gpu,
                numpy.int32(len(surface.xk)),
                surface.XskDev,
                surface.WskDev,
                block=(par_reac.BSZ, 1, 1),
                grid=(GSZ, 1))

    AI_aux = numpy.zeros(Nq, dtype=numpy.int32)
    AI_aux = cuda.from_device(AI_int_gpu, Nq, dtype=numpy.int32)
    AI_int = numpy.sum(AI_aux)

    dphir_x_cpu = numpy.zeros(Nq, dtype=REAL)
    dphir_x_cpu = cuda.from_device(dphir_x, Nq, dtype=REAL)
    dphir_y_cpu = numpy.zeros(Nq, dtype=REAL)
    dphir_y_cpu = cuda.from_device(dphir_y, Nq, dtype=REAL)
    dphir_z_cpu = numpy.zeros(Nq, dtype=REAL)
    dphir_z_cpu = cuda.from_device(dphir_z, Nq, dtype=REAL)

    return dphir_x_cpu, dphir_y_cpu, dphir_z_cpu, AI_int
Esempio n. 10
0
def get_dphirdr(XK, XV, surface, xq, Cells, par_reac, ind_reac):

    REAL = par_reac.REAL
    N = len(XK)
    MV = numpy.zeros(len(XK))
    L = numpy.sqrt(2 * surface.area)  # Representative length
    AI_int = 0

    # Setup vector
    K = par_reac.K
    tic = time.time()
    w = getWeights(K)
    X_V = numpy.zeros(N * K)
    X_Kx = numpy.zeros(N * K)
    X_Ky = numpy.zeros(N * K)
    X_Kz = numpy.zeros(N * K)
    X_Kc = numpy.zeros(N * K)
    X_Vc = numpy.zeros(N * K)

    for i in range(N * K):
        X_V[i] = XV[i // K] * w[i % K] * surface.area[i // K]
        X_Kx[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 0]
        X_Ky[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 1]
        X_Kz[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 2]
        X_Kc[i] = XK[i // K]
        X_Vc[i] = XV[i // K]

    toc = time.time()
    time_set = toc - tic

    # Evaluation
    IorE = 0  # This evaluation is on charge points, no self-operator
    AI_int = 0
    dphix_reac = numpy.zeros(len(xq))
    dphiy_reac = numpy.zeros(len(xq))
    dphiz_reac = numpy.zeros(len(xq))
    time_P2P = 0.
    time_M2P = 0.
    for i in range(len(xq)):
        CJ = 0
        dKxval = 0.
        dKyval = 0.
        dKzval = 0.
        dVxval = 0.
        dVyval = 0.
        dVzval = 0.
        source = []
        dKxval, dVxval, source, time_M2P = M2P_nonvec(Cells, CJ, xq[i], dKxval,
                                                      dVxval,
                                                      ind_reac.index_large,
                                                      par_reac, source,
                                                      time_M2P)

        dKxval, dKyval, dKzval, dVxval, \
        dVyval, dVzval, AI_int, time_P2P = P2P_nonvec_derivative(Cells, surface, X_V, X_Kx, X_Ky, X_Kz, X_Kc, X_Vc,
                                        xq[i], dKxval, dKyval, dKzval, dVxval, dVyval, dVzval, IorE, par_reac, w, source, AI_int, time_P2P)
        dphix_reac[i] = (-dKxval + dVxval) / (4 * pi)
        dphiy_reac[i] = (-dKyval + dVyval) / (4 * pi)
        dphiz_reac[i] = (-dKzval + dVzval) / (4 * pi)


#    print '\tTime set: %f'%time_P2M
#    print '\tTime P2M: %f'%time_P2M
#    print '\tTime M2M: %f'%time_M2M
#    print '\tTime M2P: %f'%time_M2P
#    print '\tTime P2P: %f'%time_P2P

    return dphix_reac, dphiy_reac, dphiz_reac, AI_int
Esempio n. 11
0
def get_phir(XK, XV, surface, xq, Cells, par_reac, ind_reac):
    """
    It computes the reaction potential using direct interacion.
    To compute this potential we need more terms in the Taylor expansion, that
    is the reason why we need fine parameters (par_reac class) and a different
    array of indices (ind_reac) than ind0.

    Arguments
    ----------
    XK      : array, input for the double layer potential.
    XV      : array, input for the single layer potential.
    surface : class, surface where we are computing the reaction potential.
    xq      : array, it contains the position of the charges.
    Cells   : array, it contains the tree cells.
    par_reac: class, fine parameters related to the surface.
    ind_reac: array, it contains the indices related to the treecode
                     computation.

    Returns
    --------
    phi_reac: array, reaction potential.
    AI_int  : int, counter of the amount of near singular integrals solved.
    """

    N = len(XK)
    AI_int = 0

    # Setup vector
    K = par_reac.K
    tic = time.time()
    w = getWeights(K)
    X_V = numpy.zeros(N * K)
    X_Kx = numpy.zeros(N * K)
    X_Ky = numpy.zeros(N * K)
    X_Kz = numpy.zeros(N * K)
    X_Kc = numpy.zeros(N * K)
    X_Vc = numpy.zeros(N * K)

    for i in range(N * K):
        X_V[i] = XV[i // K] * w[i % K] * surface.area[i // K]
        X_Kx[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 0]
        X_Ky[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 1]
        X_Kz[i] = XK[i // K] * w[i % K] * surface.area[
            i // K] * surface.normal[i // K, 2]
        X_Kc[i] = XK[i // K]
        X_Vc[i] = XV[i // K]

    # Evaluation
    IorE = 0  # This evaluation is on charge points, no self-operator
    # 0 means it doesn't matter if it is internal or external.

    AI_int = 0
    phi_reac = numpy.zeros(len(xq))
    source = list(range(len(surface.xj)))
    source = numpy.int32(numpy.array(source))

    m, mx, my, mz, mKc, mVc = X_V, X_Kx, X_Ky, X_Kz, X_Kc, X_Vc

    LorY = 1

    s_xj = surface.xj[source]
    s_yj = surface.yj[source]
    s_zj = surface.zj[source]
    s_m = m[source]
    s_mx = mx[source]
    s_my = my[source]
    s_mz = mz[source]
    s_mKc = mKc[source]
    s_mVc = mVc[source]

    tri = source / par_reac.K  # Triangle
    k = source % par_reac.K  # Gauss point

    K_diag = 0
    V_diag = 0

    xq_arr = numpy.ravel(numpy.array([xq[:, 0]]))
    yq_arr = numpy.ravel(numpy.array([xq[:, 1]]))
    zq_arr = numpy.ravel(numpy.array([xq[:, 2]]))

    direct_c(
        int(LorY), K_diag, V_diag, int(IorE),
        numpy.ravel(surface.vertex[surface.triangle[:]]), numpy.int32(tri),
        numpy.int32(k), surface.xi, surface.yi, surface.zi, s_xj, s_yj, s_zj,
        xq_arr, yq_arr, zq_arr, s_m, s_mx, s_my, s_mz, s_mKc, s_mVc,
        numpy.array([-1], dtype=numpy.int32), surface.area, surface.sglInt_int,
        surface.sglInt_ext, surface.xk, surface.wk, surface.Xsk, surface.Wsk,
        par_reac.kappa, par_reac.threshold, par_reac.eps, w[0], AI_int,
        numpy.ravel(phi_reac))

    return phi_reac, AI_int