Beispiel #1
0
    def __init__(self, volume, template, mask, wedge, stdV, gpu=True):
        self.volume = gu.to_gpu(volume)

        self.template = Volume(template)
        self.templatePadded = gu.zeros_like(self.volume, dtype=np.float32)

        self.mask = Volume(mask)
        self.maskPadded = gu.zeros_like(self.volume, dtype=np.float32)
        self.sOrg = mask.shape
        self.sPad = volume.shape
        print(self.sPad, self.sOrg)
        rotate(self.mask, [0, 0, 0], self.maskPadded, self.sPad, self.sOrg)
        #paste_in_center_gpu(self.template.d_data, self.templatePadded, np.int32(self.sPad), np.int32(self.maskSize), block=(10, 10, 10), grid=(8,1,1))
        #rotate(self.template, [0, 0, 0], self.templatePadded, self.sPad, self.maskSize)
        print(volume.shape, stdV.shape, wedge.shape)
        self.wedge = gu.to_gpu(wedge)
        self.stdV = gu.to_gpu(stdV)

        self.fwd_plan = Plan(volume.shape, volume.dtype, np.complex64)
        self.inv_plan = Plan(volume.shape, np.complex64, volume.dtype)

        self.volume_fft = gu.zeros_like(self.volume, dtype=np.complex64)
        self.template_fft = gu.zeros_like(self.volume, dtype=np.complex64)

        self.ccc_map = gu.zeros_like(self.volume, dtype=np.float32)
        self.norm_volume = np.prod(volume.shape)

        self.scores = gu.ones_like(self.volume, dtype=np.float32) * -1000
        self.angles = gu.ones_like(self.volume, dtype=np.float32) * -1000
        self.p = sum(self.mask.d_data)
def scikit_gpu_fft_pipeline(filename):
    data = []
    start = timer()
    with open(filename, 'r') as file_obj:
        for _ in range(((32768 * 1024 * SIZE_MULTIPLIER // GULP_SIZE) //
                        COMPLEX_MULTIPLIER) // GULP_FRAME_FFT):
            data = np.fromfile(file_obj,
                               dtype=np.complex64,
                               count=GULP_SIZE * GULP_FRAME_FFT).reshape(
                                   (GULP_FRAME_FFT, GULP_SIZE))
            g_data = gpuarray.to_gpu(data)
            plan = Plan(data.shape[1],
                        np.complex64,
                        np.complex64,
                        batch=GULP_FRAME_FFT)
            plan_inverse = Plan(data.shape[1],
                                np.complex64,
                                np.complex64,
                                batch=GULP_FRAME_FFT)
            tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
            tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
            fft(g_data, tmp1, plan)
            ifft(tmp1, tmp2, plan_inverse)
            for _ in range(NUMBER_FFT - 1):
                # Can't do FFT in place for fairness (emulating full pipeline)
                tmp1 = gpuarray.empty(data.shape, dtype=np.complex64)
                fft(tmp2, tmp1, plan)
                tmp2 = gpuarray.empty(data.shape, dtype=np.complex64)
                ifft(tmp1, tmp2, plan_inverse)
    end = timer()
    return end - start
Beispiel #3
0
 def __init__(self, volume, template, mask, gpu):
     self.gpu = gpu
     self.volume = gu.to_gpu(volume)
     self.template = Volume(template)
     self.mask = gu.to_gpu(mask)
     self.fwd_plan = Plan(volume.shape, volume.dtype, np.complex64)
     self.inv_plan = Plan(volume.shape, np.complex64, volume.dtype)
     self.volume_fft = gu.zeros_like(self.volume, dtype=np.complex64)
     self.template_fft = gu.zeros_like(self.template.d_data, dtype=np.complex64)
     self.ccc_map = gu.zeros_like(self.volume, dtype=np.float32)
     self.norm_volume = np.prod(volume.shape)
     self.scores = gu.zeros_like(self.volume, dtype=np.float32)
     self.angles = gu.zeros_like(self.volume, dtype=np.float32)
Beispiel #4
0
def ifft2c2c_cuda(x, axes=(0, 1)):
    rank = len(axes)
    x = np.array(x).astype(np.complex64)
    x_gpu = gpuarray.to_gpu(x)
    xf_gpu = gpuarray.empty(x.shape, np.complex64)
    if len(x.shape) > rank:
        batch = np.prod(x.shape[rank:len(x.shape)])
        plan  = Plan(x.shape[0:rank], np.complex64, np.complex64, batch, None, 1, \
        np.array(x.shape[0:rank]).astype(np.int32), np.prod(x.shape[rank:len(x.shape)]), 1, \
        np.array(x.shape[0:rank]).astype(np.int32), np.prod(x.shape[rank:len(x.shape)]), 1 )
    else:
        batch = 1
        plan = Plan(x.shape[0:rank], np.complex64, np.complex64)
    ifft(x_gpu, xf_gpu, plan)
    return xf_gpu.get() / np.prod(x.shape[0:rank])
Beispiel #5
0
 def __init__(self, volume, template, gpu):
     self.gpu = gpu
     volume_gpu = gu.to_gpu(volume)
     self.fwd_plan = Plan(volume.shape, volume.dtype, np.complex64)
     self.volume_fft = gu.zeros_like(volume_gpu, dtype=np.complex64)
     fft(volume_gpu, self.volume_fft, self.fwd_plan)
     self.template_fft = gu.zeros_like(volume_gpu, dtype=np.complex64)
     self.ccc_map = gu.zeros_like(volume_gpu, dtype=np.float32)
     self.norm_volume = gu.prod(volume_gpu.shape)
     #self.scores = gu.zeros_like(volume_gpu, dtype=np.float32)
     #self.angles = gu.zeros_like(volume_gpu, dtype=np.float32)
     self.padded_volume = gu.zeros_like(volume_gpu, dtype=np.float32)
     del volume_gpu
     self.inv_plan = Plan(volume.shape, np.complex64, volume.dtype)
     self.template = Volume(template)
Beispiel #6
0
def get_rfft_plans(shape, double_precision=False):
    """
    Loads or computes fft plans for ffts performed on the GPU.
    """
    real_type = np.float32 if not double_precision else np.float64
    cplx_type = np.complex64 if not double_precision else np.complex128

    lab = '%s x %s real2complex' % (shape[0], shape[1])
    if double_precision: lab += ' (double)'
    if lab not in fft_plans.keys():
        print "lens_GPU : building and caching fft plan %s" % lab
        fft_plans[lab] = Plan(shape, real_type, cplx_type)
    if lab not in fft_inv_plans.keys():
        print "lens_GPU : building and caching ifft plan %s" % lab
        fft_inv_plans[lab] = Plan(shape, cplx_type, real_type)
    return fft_plans[lab], fft_inv_plans[lab]
Beispiel #7
0
def test3():
    N = 128
    x = np.asarray(np.random.rand(N, N, N), np.complex64)
    xf = np.fft.fftn(x, s=None, axes=(0, 1, 2))
    x_gpu = gpuarray.to_gpu(x)
    xf_gpu = gpuarray.empty((N, N, N), np.complex64)
    plan = Plan(x.shape, np.complex64, np.complex64)
    fft(x_gpu, xf_gpu, plan)
    print(np.allclose(xf[0:N, 0:N, 0:N], xf_gpu.get(), atol=1e-2))
Beispiel #8
0
def test1():
    N = 128
    x = np.asarray(np.random.rand(N), np.complex64)
    xf = np.fft.fft(x)
    x_gpu = gpuarray.to_gpu(x)
    xf_gpu = gpuarray.empty(N, np.complex64)
    plan = Plan(x.shape, np.complex64, np.complex64)
    fft(x_gpu, xf_gpu, plan)
    print(np.allclose(xf[0:N], xf_gpu.get(), atol=1e-3))
Beispiel #9
0
 def compute_inverse_plan(self):
     self.plan_inverse = Plan(
         self.cufft_shape,  # not shape_out
         self.dtype_out,
         self.dtype,
         batch=self.cufft_batch_size,
         stream=self.cufft_stream,
         # cufft extensible plan API is only supported after 0.5.1
         # (commit 65288d28ca0b93e1234133f8d460dc6becb65121)
         # but there is still no official 0.5.2
         #~ auto_allocate=True
     )
Beispiel #10
0
def cu_lpf(stimulus, dt, freq):
    """
    CUDA implementation of low-pass-filter.

    stimulus: ndarray
        The input to be filtered.
    dt: float
        The sampling interval of the input.
    freq: float
        The cut-off frequency of the low pass filter.
    """
    num = len(stimulus)
    num_fft = int(num / 2 + 1)
    idtype = stimulus.dtype
    odtype = np.complex128 if idtype == np.float64 else np.complex64

    if not isinstance(stimulus, gpuarray.GPUArray):
        d_stimulus = gpuarray.to_gpu(stimulus)
    else:
        d_stimulus = stimulus

    plan = Plan(stimulus.shape, idtype, odtype)
    d_fstimulus = gpuarray.empty(num_fft, odtype)
    fft(d_stimulus, d_fstimulus, plan)

    df = 1.0 / dt / num
    idx = int(freq // df)

    unit = int(d_fstimulus.dtype.itemsize / 4)
    offset = int(d_fstimulus.gpudata) + d_fstimulus.dtype.itemsize * idx

    cuda.memset_d32(offset, 0, unit * (num_fft - idx))

    plan = Plan(stimulus.shape, odtype, idtype)
    d_lpf_stimulus = gpuarray.empty(num, idtype)
    ifft(d_fstimulus, d_lpf_stimulus, plan, False)

    return d_lpf_stimulus.get()
Beispiel #11
0
import skcuda.cublas as cublas
import skcuda

s = cuda.Event()
e = cuda.Event()
s.record()

nStreams = 8
stream = [cuda.Stream() for i in range(nStreams)]
N = 8192
print skcuda.misc.get_current_device()

x = [np.asarray(np.random.rand(N/nStreams), np.float32) for i in range(nStreams)]
#x_pin = cuda.register_host_memory(x)
#xf = np.fft.fft(x)
x_gpu = [gpuarray.to_gpu_async(x[i], stream=stream[i]) for i in range(nStreams)]

xf_gpu = [gpuarray.empty((N/nStreams)/2 + 1, np.complex64) for i in range(nStreams)]
plan = [Plan(x[0].shape, np.float32, np.complex64, stream=stream[i]) for i in range(nStreams)]
print skcuda.misc.get_current_device()
for i in range(nStreams):
	fft(x_gpu[i], xf_gpu[i], plan[i])
	print skcuda.misc.get_current_device()

x_pin = [xf_gpu[i].get_async(stream=stream[i]) for i in range(nStreams)]

#print np.allclose(xf[0:N/2 + 1], xf_gpu.get(), atol=1e-6)

e.record()
e.synchronize()
print s.time_till(e), "ms"