def sum(a, dtype=None, stream=None, allocator=None): from pycuda.reduction import get_sum_kernel krnl = get_sum_kernel(dtype, a.dtype) return krnl(a, stream=stream, allocator=allocator)
def __init__(self, mat, geom, bcs, opt=None, dtype=np.float64, TPB=128): kappa = mat.E / (1 - 2 * mat.nu) / 3 mu = mat.E / (1 + mat.nu) / 2 cn = mat.tsi * mat.rho * np.pi / geom.L * (mat.E / mat.rho / 3 / (1 - 2 * mat.nu))**0.5 dt = 1 bc = 12 * mat.E / np.pi / geom.hrad**4 if opt is None: alpha = 0 else: alpha = opt.alpha source = open("modules\\kernels.cu", "r") src = source.read() src = src.replace("NBr", str(geom.NB)) src = src.replace("NDr", str(3)) src = src.replace("NNr", str(geom.NN)) src = src.replace("mur", str(mu)) src = src.replace("NXr", str(geom.NX)) src = src.replace("NYr", str(geom.NY)) src = src.replace("NZr", str(geom.NZ)) src = src.replace("Lr", str(geom.L)) src = src.replace("dtr", str(dt)) src = src.replace("ntaur", str(bcs.ntau)) src = src.replace("rhor", str(mat.rho)) src = src.replace("cnr", str(cn)) src = src.replace("ecritr", str(mat.ecrit)) src = src.replace( "dlmltr", str(geom.L**6 / geom.mi / geom.mi * (9 * kappa - 15 * mu))) src = src.replace("fmltr", str(2 * 15 * mu / geom.mi * geom.L**3)) src = src.replace( "mvecr", str(.25 * dt**2 * 4 / 3 * np.pi * geom.hrad**3 * bc / geom.L**2)) src = src.replace("SHr", str(4 * geom.NB + 1)) src = src.replace("TPB", str(TPB)) src = src.replace( "L0s[]", "L0s[" + str(geom.NB) + "] = " + np.array2string( geom.L0s, separator=',', max_line_width=np.nan).replace( '[', '{').replace(']', '}')) src = src.replace( "jadd[]", "jadd[" + str(geom.NB) + "] = " + np.array2string( geom.jadd, separator=',', max_line_width=np.nan).replace( '[', '{').replace(']', '}')) src = src.replace("alphar", str(alpha)) src = src.replace("hradr", str(geom.L * geom.hrad)) src = src.replace("xlr", str(geom.bbox[0][0])) src = src.replace("xhr", str(geom.bbox[0][1])) src = src.replace("ylr", str(geom.bbox[1][0])) src = src.replace("yhr", str(geom.bbox[1][1])) src = src.replace("zlr", str(geom.bbox[2][0])) src = src.replace("zhr", str(geom.bbox[2][1])) dsize = 8 if dtype == np.float32: dsize = 4 src = src.replace("double", "float") src = src.replace("sqrt", "sqrtf") self.dtype = dtype mod = SourceModule(src, options=["--use_fast_math"]) self.d_dil = gpuarray.GPUArray([geom.NN], dtype) self.d_u = gpuarray.GPUArray([3 * geom.NN], dtype) self.d_up = gpuarray.GPUArray([3 * geom.NN], dtype) self.d_F = gpuarray.GPUArray([3 * geom.NN], dtype) self.d_cd = gpuarray.GPUArray([geom.NN], dtype) self.d_cn = gpuarray.GPUArray([geom.NN], dtype) self.d_Sf = gpuarray.to_gpu(geom.Sf) self.d_dmg = gpuarray.GPUArray([((geom.NB) * geom.NN + 7) // 8], np.bool_) self.d_NBCi = gpuarray.to_gpu(bcs.NBCi) self.d_EBCi = gpuarray.to_gpu(bcs.EBCi.flatten()) self.d_NBC = gpuarray.to_gpu( np.array(bcs.NBC).astype(np.float32).flatten()) self.d_EBC = gpuarray.to_gpu(np.array(bcs.EBC).astype(np.float32)) self.d_c = cuda.mem_alloc(dsize) self.d_k = gpuarray.to_gpu(np.ones(geom.NN, dtype=dtype)) self.d_W = gpuarray.GPUArray([geom.NN], dtype) self.d_calcForce = mod.get_function("calcForce") self.d_calcDilation = mod.get_function("calcDilation") self.d_calcDisplacement = mod.get_function("calcDisplacement") self.sum_reduce = reduction.get_sum_kernel(dtype, dtype) # self.d_calcForce.set_cache_config(cuda.func_cache.PREFER_L1) # self.d_calcDilation.set_cache_config(cuda.func_cache.PREFER_L1) if opt is not None: opt.d_kbar = gpuarray.GPUArray([geom.NN], dtype) opt.d_RM = cuda.mem_alloc(dsize) opt.d_calcKbar = mod.get_function("calcKbar") opt.d_updateK = mod.get_function("updateK") self.TPB = TPB self.geom = geom self.opt = opt
""" Created on Wed May 7 12:07:56 2014 Author: Oren Freifeld Email: [email protected] """ import numpy as np #from pycuda import gpuarray from of.utils import * from of.gpu import CpuGpuArray from cpab.cpa2d.calcs import PAT #from cpab.prob_and_stats.cpa_simple_mean import cpa_simple_mean # This saves some overhead in calling gpuarray.sum from pycuda.reduction import get_sum_kernel sum_krnl = get_sum_kernel(np.float64, np.float64) from pycuda import gpuarray import cv2 class ScaleDependentLogLikelihoodGeneral(object): supported_interp_types = [ 'gpu_linear', cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4 ] def __init__( self, ms, level, data, sigma_signal,
Created on Wed May 7 12:07:56 2014 Author: Oren Freifeld Email: [email protected] """ import numpy as np #from pycuda import gpuarray from of.utils import * from of.gpu import CpuGpuArray from cpab.cpa2d.calcs import PAT #from cpab.prob_and_stats.cpa_simple_mean import cpa_simple_mean # This saves some overhead in calling gpuarray.sum from pycuda.reduction import get_sum_kernel sum_krnl = get_sum_kernel(np.float64, np.float64) from pycuda import gpuarray import cv2 class ScaleDependentLogLikelihoodGeneral (object): supported_interp_types = ['gpu_linear', cv2.INTER_LINEAR, cv2.INTER_CUBIC, cv2.INTER_LANCZOS4] def __init__(self,ms,level,data, sigma_signal, params_flow_int, interp_type_for_ll, # src=None,dst=None,transformed=None ):