Python FFTShift 예제들, reikna.fft.FFTShift Python 예제들

예제 #1

0

파일 보기

파일: test_fftshift.py 프로젝트: xexo7C8/reikna

def check_performance(thr_and_double, shape_and_axes):

    thr, double = thr_and_double

    dtype = numpy.complex128 if double else numpy.complex64
    dtype = dtypes.normalize_type(dtype)

    shape, axes = shape_and_axes

    data = numpy.arange(product(shape)).reshape(shape).astype(dtype)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    data_dev = thr.to_device(data)
    res_dev = thr.empty_like(data)

    attempts = 10
    times = []
    for i in range(attempts):
        t1 = time.time()
        shiftc(res_dev, data_dev)
        thr.synchronize()
        times.append(time.time() - t1)

    res_ref = numpy.fft.fftshift(data, axes=axes)
    assert diff_is_negligible(res_dev.get(), res_ref)

    return min(times), product(shape) * dtype.itemsize

예제 #2

0

파일 보기

파일: test_fftshift.py 프로젝트: mgolub2/reikna

def check_performance(thr_and_double, shape_and_axes):

    thr, double = thr_and_double

    dtype = numpy.complex128 if double else numpy.complex64
    dtype = dtypes.normalize_type(dtype)

    shape, axes = shape_and_axes

    data = numpy.arange(product(shape)).reshape(shape).astype(dtype)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    data_dev = thr.to_device(data)
    res_dev = thr.empty_like(data)

    attempts = 10
    times = []
    for i in range(attempts):
        t1 = time.time()
        shiftc(res_dev, data_dev)
        thr.synchronize()
        times.append(time.time() - t1)

    res_ref = numpy.fft.fftshift(data, axes=axes)
    assert diff_is_negligible(res_dev.get(), res_ref)

    return min(times), product(shape) * dtype.itemsize

예제 #3

0

파일 보기

파일: gpuCore.py 프로젝트: CSymes/quickDDM

def createNormalisationKernel(thread, shape):
    footprint = thread.array(shape, dtype=numpy.complex)
    fftshift = FFTShift(footprint)

    norm = norm_const(footprint, 2)
    fftshift.parameter.output.connect(norm,
                                      norm.input,
                                      output_prime=norm.output)

    normalise = fftshift.compile(thread)
    return normalise

예제 #4

0

파일 보기

파일: test_fftshift.py 프로젝트: drtpotter/reikna

def check_errors(thr, shape_and_axes, inverse=False):

    dtype = numpy.int32

    shape, axes = shape_and_axes

    data = numpy.arange(product(shape)).reshape(shape).astype(dtype)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    ref_func = numpy.fft.ifftshift if inverse else numpy.fft.fftshift

    data_dev = thr.to_device(data)
    shiftc(data_dev, data_dev, inverse)
    res_ref = ref_func(data, axes=axes)

    assert diff_is_negligible(data_dev.get(), res_ref)

예제 #5

0

파일 보기

파일: test_fftshift.py 프로젝트: fjarri/reikna

def check_errors(thr, shape_and_axes, inverse=False):

    dtype = numpy.int32

    shape, axes = shape_and_axes

    data = numpy.arange(product(shape)).reshape(shape).astype(dtype)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    ref_func = numpy.fft.ifftshift if inverse else numpy.fft.fftshift

    data_dev = thr.to_device(data)
    shiftc(data_dev, data_dev, inverse)
    res_ref = ref_func(data, axes=axes)

    assert diff_is_negligible(data_dev.get(), res_ref)

예제 #6

0

파일 보기

파일: test_fftshift.py 프로젝트: xexo7C8/reikna

def check_errors(thr, shape_and_axes):

    dtype = numpy.int32

    shape, axes = shape_and_axes

    data = numpy.arange(product(shape)).reshape(shape).astype(dtype)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    #print(shiftc._kernel_calls[0]._kernel._program.source)
    #print(shiftc._kernel_calls[0]._kernel.global_size)

    data_dev = thr.to_device(data)
    shiftc(data_dev, data_dev)
    res_ref = numpy.fft.fftshift(data, axes=axes)
    #print(data)
    #print(res_ref)
    #print(data_dev.get())
    assert diff_is_negligible(data_dev.get(), res_ref)

예제 #7

0

파일 보기

파일: test_fftshift.py 프로젝트: mgolub2/reikna

def check_errors(thr, shape_and_axes):

    dtype = numpy.int32

    shape, axes = shape_and_axes

    data = numpy.arange(product(shape)).reshape(shape).astype(dtype)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    #print(shiftc._kernel_calls[0]._kernel._program.source)
    #print(shiftc._kernel_calls[0]._kernel.global_size)

    data_dev = thr.to_device(data)
    shiftc(data_dev, data_dev)
    res_ref = numpy.fft.fftshift(data, axes=axes)
    #print(data)
    #print(res_ref)
    #print(data_dev.get())
    assert diff_is_negligible(data_dev.get(), res_ref)

예제 #8

0

파일 보기

파일: test_fftshift.py 프로젝트: mgolub2/reikna

def test_trivial(some_thr):
    """
    Checks that even if the axes set is trivial (product of lengths == 1),
    the transformations are still attached and executed.
    """
    dtype = numpy.complex64
    shape = (128, 1, 1, 128)
    axes = (1, 2)
    param = 4

    data = get_test_array(shape, dtype)
    data_dev = some_thr.to_device(data)
    res_dev = some_thr.empty_like(data_dev)

    shift = FFTShift(data, axes=axes)
    scale = mul_param(data_dev, numpy.int32)
    shift.parameter.input.connect(scale, scale.output, input_prime=scale.input, param=scale.param)

    shiftc = shift.compile(some_thr)
    shiftc(res_dev, data_dev, param)
    assert diff_is_negligible(res_dev.get(), data * param)

예제 #9

0

파일 보기

파일: test_fftshift.py 프로젝트: xexo7C8/reikna

def test_trivial(some_thr):
    """
    Checks that even if the axes set is trivial (product of lengths == 1),
    the transformations are still attached and executed.
    """
    dtype = numpy.complex64
    shape = (128, 1, 1, 128)
    axes = (1, 2)
    param = 4

    data = get_test_array(shape, dtype)
    data_dev = some_thr.to_device(data)
    res_dev = some_thr.empty_like(data_dev)

    shift = FFTShift(data, axes=axes)
    scale = mul_param(data_dev, numpy.int32)
    shift.parameter.input.connect(scale,
                                  scale.output,
                                  input_prime=scale.input,
                                  param=scale.param)

    shiftc = shift.compile(some_thr)
    shiftc(res_dev, data_dev, param)
    assert diff_is_negligible(res_dev.get(), data * param)

예제 #10

0

파일 보기

파일: profile_reikna_opencl.py 프로젝트: maeager/Agilent2Dicom

def kspaceepanechnikov_filter_CL2(ksp, sigma):
    sz = ksp.shape
    dtype = np.complex64
    ftype = np.float32
    clear_first_arg_caches()
    fsiz = (5, 5, 5)
    print(np.ceil(sigma[0]) + 2, np.ceil(sigma[1]) + 2, np.ceil(sigma[2]) + 2)
    print sigma
    fsiz = (np.ceil(sigma) + 2).astype(int)
    for i in xrange(0, fsiz.size):
        if not fsiz[i] & 0x1:
            fsiz[i] += 1
    # Create image-domain Epanechikov kernel
    Kepa = epanechnikov_kernel(fsiz, sigma)
    # Place kernel at centre of ksp-sized matrix
    Kfilter = np.zeros(np.array(sz), dtype=np.complex64)
    szmin = np.floor(
        np.array(sz) / 2.0 - np.floor(np.array(Kepa.shape) / 2.0) - 1)
    szmax = np.floor(szmin + np.array(Kepa.shape))
    print "Epa filter size ", sz, " image filter ", Kepa.shape, " szmin ", szmin, " szmax ", szmax
    Kfilter[szmin[0]:szmax[0], szmin[1]:szmax[1], szmin[2]:szmax[2]] = Kepa
    Kfilter[szmin[0]:szmax[0], szmin[1]:szmax[1],
            szmin[2]:szmax[2]].imag = Kepa
    # Create fourier-domain Epanechnikov filter
    api = any_api()
    thr = api.Thread.create()
    data_dev = thr.to_device(Kfilter)
    rfft = FFT(data_dev)
    crfft = rfft.compile(thr)
    fftshift = FFTShift(data_dev)
    cfftshift = fftshift.compile(thr)
    crfft(data_dev, data_dev)
    thr.synchronize()
    cfftshift(data_dev, data_dev)
    Fepanechnikov = np.abs(data_dev.get())  # / np.prod(np.array(ksp.shape))
    #result2 = result2[::-1,::-1,::-1]
    thr.synchronize()
    #result = np.zeros(np.array(siz), dtype=np.complex64)
    #result.real = np.abs(result2) / np.sqrt(2)
    #result.imag = np.abs(result2) / np.sqrt(2)
    del data_dev, rfft, crfft, fftshift, cfftshift
    # Multiply Epanechnikov filter to real and imag ksp data
    program = thr.compile("""
KERNEL void multiply_them(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *a,
    GLOBAL_MEM ${ftype} *f)
{
  const SIZE_T i = get_local_id(0);
  dest[i].x = a[i].x * f[i];
  dest[i].y = a[i].y * f[i];
}""",
                          render_kwds=dict(ctype=dtypes.ctype(dtype),
                                           ftype=dtypes.ctype(ftype)))

    data_dev = thr.to_device(ksp)
    filter_dev = thr.to_device(Fepanechnikov)
    multiply_them = program.multiply_them
    multiply_them(data_dev, data_dev, filter_dev, global_size=512 * 512 * 512)
    thr.synchronize()
    del filter_dev, program
    #api = cluda.ocl_api()
    #api = any_api()
    #thr = api.Thread.create()
    # Filter
    # data_dev = thr.to_device(ksp)
    # ifft = FFT(data_dev)
    FACTOR = 1.0

    # Recon
    # thr.synchronize()
    #data_dev = thr.to_device(ksp)
    ifft = FFT(data_dev)
    cifft = ifft.compile(thr)
    fftshiftobj = FFTShift(data_dev)
    cfftshift = fftshiftobj.compile(thr)
    cifft(data_dev, data_dev, inverse=0)
    thr.synchronize()
    cfftshift(data_dev, data_dev)
    thr.synchronize()
    result2 = data_dev.get() / np.prod(np.array(ksp.shape))
    result2 = result2[::-1, ::-1, ::-1]
    thr.release()
    return result2

예제 #11

0

파일 보기

파일: profile_reikna_opencl.py 프로젝트: maeager/Agilent2Dicom

def fourierepanechnikov(siz, sigma):
    """
    Epanechnikov kernel in Fourier domain is
     A.(1-|x|^2)  => (3/2*w^3)(sin(w) - w*cos(w)/2)
    """

    # (uu, vv, ww) = fouriercoords(siz)
    # uu = uu + np.spacing(1)
    # vv = vv + np.spacing(1)
    # ww = ww + np.spacing(1)

    # if not hasattr(sigma, "__len__"):
    # #if type(sigma) is float or type(sigma) is numpy.float64:
    #     return ((3.0*sigma/16.0)/(np.pi*(uu + vv +
    #     ww)/(sigma))**3)*(np.sin(2*np.pi*(uu + vv + ww)/(sigma)) - np.pi*(uu
    #     + vv + ww)/(sigma)*np.cos(2*np.pi*(uu + vv + ww)/(sigma))/2)
    # else:
    # return ((3.0/16.0)/(np.pi*((uu**3)/sigma[0]**4 + (vv**3)/sigma[1]**4 +
    # (ww**3)/sigma[2]**4)))*(np.sin(2*np.pi*(uu/sigma[0] + vv/sigma[1] +
    # ww/sigma[2])) - np.pi*(uu/sigma[0] + vv/sigma[1] +
    # ww/sigma[2])*np.cos(2*np.pi*(uu/sigma[0] + vv/sigma[1] + ww/sigma[2])))

    def is_odd(num):
        return num & 0x1

    from cplxfilter import epanechnikov_kernel
    if not hasattr(sigma, "__len__"):
        Kepa = epanechnikov_kernel(
            (np.ceil(sigma) + 1, np.ceil(sigma) + 1, np.ceil(sigma) + 1),
            sigma)
    else:
        print(
            np.ceil(sigma[0]) + 2,
            np.ceil(sigma[1]) + 2,
            np.ceil(sigma[2]) + 2)
        print sigma
        fsiz = (np.ceil(sigma) + 2).astype(int)
        for i in xrange(0, fsiz.size):
            if is_odd(fsiz[i]):
                fsiz[i] += 1

        Kepa = epanechnikov_kernel(
            (np.ceil(sigma[0]) + 2, np.ceil(sigma[1]) + 2,
             np.ceil(sigma[2]) + 2), sigma)

    Kfilter = np.zeros(np.array(siz), dtype=np.complex64)
    szmin = np.floor(
        np.array(siz) / 2.0 - np.floor(np.array(Kepa.shape) / 2.0) - 1)
    szmax = np.floor(szmin + np.array(Kepa.shape))
    print "Epa filter size ", siz, " image filter ", Kepa.shape, " szmin ", szmin, " szmax ", szmax
    Kfilter[szmin[0]:szmax[0], szmin[1]:szmax[1], szmin[2]:szmax[2]] = Kepa
    Kfilter[szmin[0]:szmax[0], szmin[1]:szmax[1],
            szmin[2]:szmax[2]].imag = Kepa

    # return np.abs(fftshift(clfftn(Kfilter)))

    api = any_api()
    thr = api.Thread.create()
    data_dev = thr.to_device(Kfilter)
    fft = FFT(data_dev)
    cfft = fft.compile(thr)
    fftshift = FFTShift(data_dev)
    cfftshift = fftshift.compile(thr)
    cfft(data_dev, data_dev)
    thr.synchronize()
    cfftshift(data_dev, data_dev)
    thr.synchronize()

    result2 = data_dev.get()  # / np.prod(np.array(ksp.shape))
    #result2 = result2[::-1,::-1,::-1]
    thr.release()
    result = np.zeros(np.array(siz), dtype=np.complex64)
    result.real = np.abs(result2) / np.sqrt(2)
    result.imag = np.abs(result2) / np.sqrt(2)
    return result

예제 #12

0

파일 보기

파일: profile_reikna_opencl.py 프로젝트: maeager/Agilent2Dicom

def kspacegaussian_filter_CL2(ksp, sigma):
    """ Kspace gaussian filter and recon using GPU OpenCL

    1. GPU intialisation
    2. push KSP complex matrix to GPU
    3. declare FFT program
    4. declare Complex Gaussian GPU filter program
    5. Execute Gaussian GPU program
    6. GPU sync
    7. Execute FFT Recon
    8. Execute FFTshift
    9. Retrieve reconstruced complex image from GPU
    10. Reorganise image to standard (mimic numpy format)

    """
    sz = ksp.shape
    dtype = np.complex64
    ftype = np.float32
    ultype = np.uint64
    #api = cluda.ocl_api()
    api = any_api()
    thr = api.Thread.create()
    data_dev = thr.to_device(ksp)
    ifft = FFT(data_dev)
    FACTOR = 1.0
    program = thr.compile("""
KERNEL void gauss_kernel(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *src)
{
  const ulong x = get_global_id(0);
  const SIZE_T dim1= %d;
  const SIZE_T dim2= %d;
  const SIZE_T dim3= %d;                    
  ${ftype} sigma[3];
  sigma[0]=%f;sigma[1]=%f;sigma[2]=%f;
  ${ftype} factor = %f;            
  const double TWOPISQ = 19.739208802178716; //6.283185307179586;  //2*3.141592;
  const ${ftype} SQRT2PI = 2.5066282746;
  const double CUBEDSQRT2PI = 15.749609945722419;
  const ulong idx = x;
  ${ftype} i = (${ftype})((x / dim3) / dim2);
      i = (i - (${ftype})floor((${ftype})(dim1)/2.0f))/(${ftype})(dim1);
  ${ftype} j = (${ftype})(x / dim3);
      if((SIZE_T)j > dim2) {j=(${ftype})fmod(j, (${ftype})dim2);};
      j = (j - (${ftype})floor((${ftype})(dim2)/2.0f))/(${ftype})(dim2);
  // Account for large global index (stored as ulong) before performing modulus
  double pre_k=fmod((double)(x), (double)dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  // ${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  // ${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  dest[idx].x = src[idx].x * weight;
  dest[idx].y = src[idx].y * weight; 
  
}
""" % (sz[0], sz[1], sz[2], sigma[0], sigma[1], sigma[2], FACTOR),
                          render_kwds=dict(ctype=dtypes.ctype(dtype),
                                           ftype=dtypes.ctype(ftype),
                                           exp=functions.exp(ftype)),
                          fast_math=True)
    gauss_kernel = program.gauss_kernel
    #data_dev = thr.empty_like(ksp_dev)
    gauss_kernel(data_dev, data_dev, global_size=sz[0] * sz[1] * sz[2])

    thr.synchronize()
    # Recon
    #data_dev = thr.to_device(ksp)
    ifftobj = FFT(data_dev)
    cifft = ifftobj.compile(thr)
    fftshiftobj = FFTShift(data_dev)
    cfftshift = fftshiftobj.compile(thr)
    cifft(data_dev, data_dev, inverse=0)
    thr.synchronize()
    cfftshift(data_dev, data_dev)
    thr.synchronize()
    result2 = data_dev.get() / np.prod(np.array(ksp.shape))
    result2 = result2[::-1, ::-1, ::-1]
    thr.release()
    return result2

예제 #13

0

파일 보기

파일: profile_reikna_opencl.py 프로젝트: maeager/Agilent2Dicom

thr.synchronize()
toc()
result = np.fft.fftshift(data_dev.get() / N**3)
result = result[::-1, ::-1, ::-1]
result = np.roll(np.roll(np.roll(result, 1, axis=2), 1, axis=1), 1, axis=0)
print "Reikna IFFT time and first three results:"
print "%s sec, %s" % (toc(), str(np.abs(result[:3, 0, 0])))
thr.release()
del ifft, cifft, data_dev, thr

thr = api.Thread.create()
tic()
data_dev = thr.to_device(ksp)
ifft = FFT(data_dev)
cifft = ifft.compile(thr)
fftshiftobj = FFTShift(data_dev)
cfftshift = fftshiftobj.compile(thr)
cifft(data_dev, data_dev, inverse=0)
thr.synchronize()
toc()
cfftshift(data_dev, data_dev)
thr.synchronize()
result2 = data_dev.get() / N**3
result2 = result2[::-1, ::-1, ::-1]
#result = np.roll(np.roll(np.roll(result,1,axis=2),1,axis=1),1,axis=0)
print "Reikna IFFT time and first three results:"
print "%s sec, %s" % (toc(), str(np.abs(result2[:3, 0, 0])))
thr.release()
del ifft, cifft, data_dev, fftshiftobj, cfftshift

del thr, api

예제 #14

0

파일 보기

파일: demo_fftshift_transformation.py 프로젝트: fjarri/reikna

def run_test(thr, shape, dtype, axes=None):

    data = numpy.random.normal(size=shape).astype(dtype)

    fft = FFT(data, axes=axes)
    fftc = fft.compile(thr)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    # FFT + shift as two separate computations

    data_dev = thr.to_device(data)

    t_start = time.time()
    fftc(data_dev, data_dev)
    thr.synchronize()
    t_gpu_fft = time.time() - t_start

    t_start = time.time()
    shiftc(data_dev, data_dev)
    thr.synchronize()
    t_gpu_shift = time.time() - t_start

    data_dev = thr.to_device(data)

    t_start = time.time()
    fftc(data_dev, data_dev)
    shiftc(data_dev, data_dev)
    thr.synchronize()
    t_gpu_separate = time.time() - t_start

    data_gpu = data_dev.get()

    # FFT + shift as a computation with a transformation

    data_dev = thr.to_device(data)

    # a separate output array to avoid unsafety of the shift transformation
    res_dev = thr.empty_like(data_dev)

    shift_tr = fftshift(data, axes=axes)
    fft2 = fft.parameter.output.connect(shift_tr, shift_tr.input, new_output=shift_tr.output)
    fft2c = fft2.compile(thr)

    t_start = time.time()
    fft2c(res_dev, data_dev)
    thr.synchronize()
    t_gpu_combined = time.time() - t_start

    # Reference calculation with numpy

    t_start = time.time()
    numpy.fft.fftn(data, axes=axes)
    t_cpu_fft = time.time() - t_start

    t_start = time.time()
    numpy.fft.fftshift(data, axes=axes)
    t_cpu_shift = time.time() - t_start

    t_start = time.time()
    data_ref = numpy.fft.fftn(data, axes=axes)
    data_ref = numpy.fft.fftshift(data_ref, axes=axes)
    t_cpu_all = time.time() - t_start

    data_gpu2 = res_dev.get()

    # Checking that the results are correct
    # (note: this will require relaxing the tolerances
    # if complex64 is used instead of complex128)
    assert numpy.allclose(data_ref, data_gpu)
    assert numpy.allclose(data_ref, data_gpu2)

    return dict(
        t_gpu_fft=t_gpu_fft,
        t_gpu_shift=t_gpu_shift,
        t_gpu_separate=t_gpu_separate,
        t_gpu_combined=t_gpu_combined,
        t_cpu_fft=t_cpu_fft,
        t_cpu_shift=t_cpu_shift,
        t_cpu_all=t_cpu_all)

예제 #15

0

파일 보기

def run_test(thr, shape, dtype, axes=None):

    data = numpy.random.normal(size=shape).astype(dtype)

    fft = FFT(data, axes=axes)
    fftc = fft.compile(thr)

    shift = FFTShift(data, axes=axes)
    shiftc = shift.compile(thr)

    # FFT + shift as two separate computations

    data_dev = thr.to_device(data)

    t_start = time.time()
    fftc(data_dev, data_dev)
    thr.synchronize()
    t_gpu_fft = time.time() - t_start

    t_start = time.time()
    shiftc(data_dev, data_dev)
    thr.synchronize()
    t_gpu_shift = time.time() - t_start

    data_dev = thr.to_device(data)

    t_start = time.time()
    fftc(data_dev, data_dev)
    shiftc(data_dev, data_dev)
    thr.synchronize()
    t_gpu_separate = time.time() - t_start

    data_gpu = data_dev.get()

    # FFT + shift as a computation with a transformation

    data_dev = thr.to_device(data)

    # a separate output array to avoid unsafety of the shift transformation
    res_dev = thr.empty_like(data_dev)

    shift_tr = fftshift(data, axes=axes)
    fft2 = fft.parameter.output.connect(shift_tr,
                                        shift_tr.input,
                                        new_output=shift_tr.output)
    fft2c = fft2.compile(thr)

    t_start = time.time()
    fft2c(res_dev, data_dev)
    thr.synchronize()
    t_gpu_combined = time.time() - t_start

    # Reference calculation with numpy

    t_start = time.time()
    numpy.fft.fftn(data, axes=axes)
    t_cpu_fft = time.time() - t_start

    t_start = time.time()
    numpy.fft.fftshift(data, axes=axes)
    t_cpu_shift = time.time() - t_start

    t_start = time.time()
    data_ref = numpy.fft.fftn(data, axes=axes)
    data_ref = numpy.fft.fftshift(data_ref, axes=axes)
    t_cpu_all = time.time() - t_start

    data_gpu2 = res_dev.get()

    # Checking that the results are correct
    # (note: this will require relaxing the tolerances
    # if complex64 is used instead of complex128)
    assert numpy.allclose(data_ref, data_gpu)
    assert numpy.allclose(data_ref, data_gpu2)

    return dict(t_gpu_fft=t_gpu_fft,
                t_gpu_shift=t_gpu_shift,
                t_gpu_separate=t_gpu_separate,
                t_gpu_combined=t_gpu_combined,
                t_cpu_fft=t_cpu_fft,
                t_cpu_shift=t_cpu_shift,
                t_cpu_all=t_cpu_all)

예제 #16

0

파일 보기

파일: profile_reikna_cuda.py 프로젝트: maeager/Agilent2Dicom

def kspacegaussian_filter_CL2(ksp, sigma):
    sz = ksp.shape
    dtype = np.complex64
    ftype = np.float32
    #api = cluda.ocl_api()
    api = cuda_api()
    thr = api.Thread.create()
    data_dev = thr.to_device(ksp)
    ifft = FFT(data_dev)
    FACTOR = 1.0
    program = thr.compile("""
KERNEL void gauss_kernel(
    GLOBAL_MEM ${ctype} *dest,
    GLOBAL_MEM ${ctype} *src)
{
  const ulong x = get_global_id(0);
  const SIZE_T dim1= %d;
  const SIZE_T dim2= %d;
  const SIZE_T dim3= %d;                    
  ${ftype} sigma[3];
  sigma[0]=%f;sigma[1]=%f;sigma[2]=%f;
  ${ftype} factor = %f;            
  const double TWOPISQ = 19.739208802178716; //6.283185307179586;  //2*3.141592;
//  const ${ftype} SQRT2PI = 2.5066282746;
//  const double CUBEDSQRT2PI = 15.749609945722419;
  const ulong idx = x;
  ${ftype} i = (${ftype})((x / dim3) / dim2);
      i = (i - (${ftype})floor((${ftype})(dim1)/2.0))/(${ftype})(dim1);
  ${ftype} j = (${ftype})(x / dim3);
      if((SIZE_T)j > dim2) {j=(${ftype})fmod(j, (${ftype})dim2);};
      j = (j - (${ftype})floor((${ftype})(dim2)/2.0f))/(${ftype})(dim2);
  //Account for large global index (stored as ulong) before performing modulus
  double pre_k=fmod((double)(x) , (double) dim3);
  ${ftype} k = (${ftype}) pre_k;
      k = (k - (${ftype})floor((${ftype})(dim3)/2.0f))/(${ftype})(dim3);

  ${ftype} weight = exp(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  //${ftype} weight = expm1(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]))+1;
  //${ftype} weight= ${exp}(-TWOPISQ*((i*i)*sigma[0]*sigma[0] + (j*j)*sigma[1]*sigma[1] + (k*k)*sigma[2]*sigma[2]));
  dest[idx].x = src[idx].x * weight * factor;
  dest[idx].y = src[idx].y * weight * factor; 
  
}
""" % (sz[0], sz[1], sz[2], sigma[0], sigma[1], sigma[2], FACTOR),
        render_kwds=dict(ctype=dtypes.ctype(dtype),
                         ftype=dtypes.ctype(ftype),
                         exp=functions.exp(ftype)), fast_math=True)
    gauss_kernel = program.gauss_kernel
    #data_dev = thr.empty_like(ksp_dev)
    gauss_kernel(data_dev, data_dev, global_size=(sz[0], sz[1], sz[2]))

    thr.synchronize()
    #data_dev = thr.to_device(ksp)
    ifft = FFT(data_dev)
    cifft = ifft.compile(thr)
    fftshift = FFTShift(data_dev)
    cfftshift = fftshift.compile(thr)
    cifft(data_dev, data_dev, inverse=0)
    thr.synchronize()
    cfftshift(data_dev, data_dev)
    thr.synchronize()
    result2 = data_dev.get() / np.prod(np.array(ksp.shape))
    result2 = result2[::-1, ::-1, ::-1]
    thr.release()
    return result2