Esempio n. 1
0
def sum(a, dtype=None, stream=None, allocator=None):
    from pycuda.reduction import get_sum_kernel
    krnl = get_sum_kernel(dtype, a.dtype)
    return krnl(a, stream=stream, allocator=allocator)
Esempio n. 2
0
def sum(a, dtype=None, stream=None, allocator=None):
    from pycuda.reduction import get_sum_kernel
    krnl = get_sum_kernel(dtype, a.dtype)
    return krnl(a, stream=stream, allocator=allocator)
Esempio n. 3
0
    def __init__(self, mat, geom, bcs, opt=None, dtype=np.float64, TPB=128):
        kappa = mat.E / (1 - 2 * mat.nu) / 3
        mu = mat.E / (1 + mat.nu) / 2
        cn = mat.tsi * mat.rho * np.pi / geom.L * (mat.E / mat.rho / 3 /
                                                   (1 - 2 * mat.nu))**0.5
        dt = 1
        bc = 12 * mat.E / np.pi / geom.hrad**4
        if opt is None:
            alpha = 0
        else:
            alpha = opt.alpha

        source = open("modules\\kernels.cu", "r")
        src = source.read()
        src = src.replace("NBr", str(geom.NB))
        src = src.replace("NDr", str(3))
        src = src.replace("NNr", str(geom.NN))
        src = src.replace("mur", str(mu))
        src = src.replace("NXr", str(geom.NX))
        src = src.replace("NYr", str(geom.NY))
        src = src.replace("NZr", str(geom.NZ))
        src = src.replace("Lr", str(geom.L))
        src = src.replace("dtr", str(dt))
        src = src.replace("ntaur", str(bcs.ntau))
        src = src.replace("rhor", str(mat.rho))
        src = src.replace("cnr", str(cn))
        src = src.replace("ecritr", str(mat.ecrit))
        src = src.replace(
            "dlmltr",
            str(geom.L**6 / geom.mi / geom.mi * (9 * kappa - 15 * mu)))
        src = src.replace("fmltr", str(2 * 15 * mu / geom.mi * geom.L**3))
        src = src.replace(
            "mvecr",
            str(.25 * dt**2 * 4 / 3 * np.pi * geom.hrad**3 * bc / geom.L**2))
        src = src.replace("SHr", str(4 * geom.NB + 1))
        src = src.replace("TPB", str(TPB))
        src = src.replace(
            "L0s[]", "L0s[" + str(geom.NB) + "] = " + np.array2string(
                geom.L0s, separator=',', max_line_width=np.nan).replace(
                    '[', '{').replace(']', '}'))
        src = src.replace(
            "jadd[]", "jadd[" + str(geom.NB) + "] = " + np.array2string(
                geom.jadd, separator=',', max_line_width=np.nan).replace(
                    '[', '{').replace(']', '}'))
        src = src.replace("alphar", str(alpha))
        src = src.replace("hradr", str(geom.L * geom.hrad))
        src = src.replace("xlr", str(geom.bbox[0][0]))
        src = src.replace("xhr", str(geom.bbox[0][1]))
        src = src.replace("ylr", str(geom.bbox[1][0]))
        src = src.replace("yhr", str(geom.bbox[1][1]))
        src = src.replace("zlr", str(geom.bbox[2][0]))
        src = src.replace("zhr", str(geom.bbox[2][1]))

        dsize = 8
        if dtype == np.float32:
            dsize = 4
            src = src.replace("double", "float")
            src = src.replace("sqrt", "sqrtf")
        self.dtype = dtype
        mod = SourceModule(src, options=["--use_fast_math"])

        self.d_dil = gpuarray.GPUArray([geom.NN], dtype)
        self.d_u = gpuarray.GPUArray([3 * geom.NN], dtype)
        self.d_up = gpuarray.GPUArray([3 * geom.NN], dtype)
        self.d_F = gpuarray.GPUArray([3 * geom.NN], dtype)
        self.d_cd = gpuarray.GPUArray([geom.NN], dtype)
        self.d_cn = gpuarray.GPUArray([geom.NN], dtype)
        self.d_Sf = gpuarray.to_gpu(geom.Sf)
        self.d_dmg = gpuarray.GPUArray([((geom.NB) * geom.NN + 7) // 8],
                                       np.bool_)
        self.d_NBCi = gpuarray.to_gpu(bcs.NBCi)
        self.d_EBCi = gpuarray.to_gpu(bcs.EBCi.flatten())
        self.d_NBC = gpuarray.to_gpu(
            np.array(bcs.NBC).astype(np.float32).flatten())
        self.d_EBC = gpuarray.to_gpu(np.array(bcs.EBC).astype(np.float32))

        self.d_c = cuda.mem_alloc(dsize)

        self.d_k = gpuarray.to_gpu(np.ones(geom.NN, dtype=dtype))
        self.d_W = gpuarray.GPUArray([geom.NN], dtype)

        self.d_calcForce = mod.get_function("calcForce")
        self.d_calcDilation = mod.get_function("calcDilation")
        self.d_calcDisplacement = mod.get_function("calcDisplacement")
        self.sum_reduce = reduction.get_sum_kernel(dtype, dtype)
        # self.d_calcForce.set_cache_config(cuda.func_cache.PREFER_L1)
        # self.d_calcDilation.set_cache_config(cuda.func_cache.PREFER_L1)

        if opt is not None:
            opt.d_kbar = gpuarray.GPUArray([geom.NN], dtype)
            opt.d_RM = cuda.mem_alloc(dsize)
            opt.d_calcKbar = mod.get_function("calcKbar")
            opt.d_updateK = mod.get_function("updateK")

        self.TPB = TPB
        self.geom = geom
        self.opt = opt
"""
Created on Wed May  7 12:07:56 2014

Author: Oren Freifeld
Email: [email protected]
"""
import numpy as np
#from pycuda import gpuarray
from of.utils import *
from of.gpu import CpuGpuArray
from cpab.cpa2d.calcs import PAT
#from cpab.prob_and_stats.cpa_simple_mean import cpa_simple_mean

# This saves some overhead in calling gpuarray.sum
from pycuda.reduction import get_sum_kernel
sum_krnl = get_sum_kernel(np.float64, np.float64)
from pycuda import gpuarray
import cv2


class ScaleDependentLogLikelihoodGeneral(object):
    supported_interp_types = [
        'gpu_linear', cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4
    ]

    def __init__(
        self,
        ms,
        level,
        data,
        sigma_signal,
Created on Wed May  7 12:07:56 2014

Author: Oren Freifeld
Email: [email protected]
"""
import numpy as np
#from pycuda import gpuarray
from of.utils import *
from of.gpu import CpuGpuArray
from cpab.cpa2d.calcs import PAT
#from cpab.prob_and_stats.cpa_simple_mean import cpa_simple_mean 
  
  
# This saves some overhead in calling gpuarray.sum
from pycuda.reduction import get_sum_kernel
sum_krnl = get_sum_kernel(np.float64, np.float64)
from pycuda import gpuarray
import cv2

  
class ScaleDependentLogLikelihoodGeneral (object):
    supported_interp_types = ['gpu_linear',
                               cv2.INTER_LINEAR,
                               cv2.INTER_CUBIC,
                               cv2.INTER_LANCZOS4]
    def __init__(self,ms,level,data,
                 sigma_signal,
                 params_flow_int,      
                 interp_type_for_ll,
#                 src=None,dst=None,transformed=None
                 ):