Example #1
0
def task1(cufft, d_image_complex, d_response_complex):
    ### Task1 ###
    # Implement a inplace CUDA FFT convolution
    # Pseduocode:
    #   freq_imag = fft(image)
    #   freq_resp = fft(response)
    #   freq_out = fftimag * fftresp
    #   output = ifft(freq_out)
    #
    # Use the cuFFT functions:
    #   - cufft.fft_inplace(ary)
    #   - cufft.ifft_inplace(ary)
    #
    # Call `vmult` which is our elementwise complex multiplication.
    # Do a inplace operation on `d_image_complex`.
    # Hints:
    #   - keyword argument 'out' specify the output array
    #   - length of d_image_complex and d_response_complex has the same length.


    cufft.fft_inplace(d_image_complex)
    cufft.fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    cufft.ifft_inplace(d_image_complex)

    # At this point, we have applied the filter onto d_image_complex
    return  # Does not return anything
Example #2
0
def task1(cufft, d_image_complex, d_response_complex):
    ### Task1 ###
    # Implement a inplace CUDA FFT convolution
    # Pseduocode:
    #   freq_imag = fft(image)
    #   freq_resp = fft(response)
    #   freq_out = fftimag * fftresp
    #   output = ifft(freq_out)
    #
    # Use the cuFFT functions:
    #   - cufft.fft_inplace(ary)
    #   - cufft.ifft_inplace(ary)
    #
    # Call `vmult` which is our elementwise complex multiplication.
    # Do a inplace operation on `d_image_complex`.
    # Hints:
    #   - keyword argument 'out' specify the output array
    #   - length of d_image_complex and d_response_complex has the same length.

    cufft.fft_inplace(d_image_complex)
    cufft.fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    cufft.ifft_inplace(d_image_complex)

    # At this point, we have applied the filter onto d_image_complex
    return  # Does not return anything
Example #3
0
def main():
    # Build Filter
    laplacian_pts = '''
    -4 -1 0 -1 -4
    -1 2 3 2 -1
    0 3 4 3 0
    -1 2 3 2 -1
    -4 -1 0 -1 -4
    '''.split()

    laplacian = np.array(laplacian_pts, dtype=np.float32).reshape(5, 5)

    # Build Image
    try:
        filename = sys.argv[1]
        image = ndimage.imread(filename, flatten=True).astype(np.float32)
    except IndexError:
        image = misc.lena().astype(np.float32)

    print("Image size: %s" % (image.shape, ))

    response = np.zeros_like(image)
    response[:5, :5] = laplacian

    # CPU
    ts = timer()
    cvimage_cpu = fftconvolve(image, laplacian, mode='same')
    te = timer()
    print('CPU: %.2fs' % (te - ts))

    # GPU
    threadperblock = 32, 8
    blockpergrid = best_grid_size(tuple(reversed(image.shape)), threadperblock)
    print('kernel config: %s x %s' % (blockpergrid, threadperblock))

    # Trigger initialization the cuFFT system.
    # This takes significant time for small dataset.
    # We should not be including the time wasted here
    cufft.FFTPlan(shape=image.shape, itype=np.complex64, otype=np.complex64)

    # Start GPU timer
    ts = timer()
    image_complex = image.astype(np.complex64)
    response_complex = response.astype(np.complex64)

    d_image_complex = cuda.to_device(image_complex)
    d_response_complex = cuda.to_device(response_complex)

    cufft.fft_inplace(d_image_complex)
    cufft.fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    cufft.ifft_inplace(d_image_complex)

    cvimage_gpu = d_image_complex.copy_to_host().real / np.prod(image.shape)

    te = timer()
    print('GPU: %.2fs' % (te - ts))

    # Plot the results
    plt.subplot(1, 2, 1)
    plt.title('CPU')
    plt.imshow(cvimage_cpu, cmap=plt.cm.gray)
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.title('GPU')
    plt.imshow(cvimage_gpu, cmap=plt.cm.gray)
    plt.axis('off')

    plt.show()
Example #4
0
def main():
    # Build Filter
    laplacian_pts = '''
    -4 -1 0 -1 -4
    -1 2 3 2 -1
    0 3 4 3 0
    -1 2 3 2 -1
    -4 -1 0 -1 -4
    '''.split()

    laplacian = np.array(laplacian_pts, dtype=np.float32).reshape(5, 5)

    # Build Image
    try:
        filename = sys.argv[1]
        image = ndimage.imread(filename, flatten=True).astype(np.float32)
    except IndexError:
        image = misc.lena().astype(np.float32)

    print("Image size: %s" % (image.shape,))

    response = np.zeros_like(image)
    response[:5, :5] = laplacian

    # CPU
    ts = timer()
    cvimage_cpu = fftconvolve(image, laplacian, mode='same')
    te = timer()
    print('CPU: %.2fs' % (te - ts))

    # GPU
    threadperblock = 32, 8
    blockpergrid = best_grid_size(tuple(reversed(image.shape)), threadperblock)
    print('kernel config: %s x %s' % (blockpergrid, threadperblock))

    # Trigger initialization the cuFFT system.
    # This takes significant time for small dataset.
    # We should not be including the time wasted here
    cufft.FFTPlan(shape=image.shape, itype=np.complex64, otype=np.complex64)

    # Start GPU timer
    ts = timer()
    image_complex = image.astype(np.complex64)
    response_complex = response.astype(np.complex64)

    d_image_complex = cuda.to_device(image_complex)
    d_response_complex = cuda.to_device(response_complex)

    cufft.fft_inplace(d_image_complex)
    cufft.fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    cufft.ifft_inplace(d_image_complex)

    cvimage_gpu = d_image_complex.copy_to_host().real / np.prod(image.shape)

    te = timer()
    print('GPU: %.2fs' % (te - ts))

    # Plot the results
    plt.subplot(1, 2, 1)
    plt.title('CPU')
    plt.imshow(cvimage_cpu, cmap=plt.cm.gray)
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.title('GPU')
    plt.imshow(cvimage_gpu, cmap=plt.cm.gray)
    plt.axis('off')

    plt.show()