コード例 #1
0
def task1(d_image_complex, d_response_complex):
    ### Task1 ###
    # Implement a inplace CUDA FFT convolution
    # Pseduocode:
    #   freq_imag = fft(image)
    #   freq_resp = fft(response)
    #   freq_out = fftimag * fftresp
    #   output = ifft(freq_out)
    #
    # Use the cuFFT functions:
    #   - fft_inplace(ary)
    #   - ifft_inplace(ary)
    #
    # Call `vmult` which is our elementwise complex multiplication.
    # Do a inplace operation on `d_image_complex`.
    # Hints:
    #   - keyword argument 'out' specify the output array
    #   - length of d_image_complex and d_response_complex has the same length.


    fft_inplace(d_image_complex)
    fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    ifft_inplace(d_image_complex)

    # At this point, we have applied the filter onto d_image_complex
    return  # Does not return anything
def task1(d_image_complex, d_response_complex):
    ### Task1 ###
    # Implement a inplace CUDA FFT convolution
    # Pseduocode:
    #   freq_imag = fft(image)
    #   freq_resp = fft(response)
    #   freq_out = fftimag * fftresp
    #   output = ifft(freq_out)
    #
    # Use the cuFFT functions:
    #   - fft_inplace(ary)
    #   - ifft_inplace(ary)
    #
    # Call `vmult` which is our elementwise complex multiplication.
    # Do a inplace operation on `d_image_complex`.
    # Hints:
    #   - keyword argument 'out' specify the output array
    #   - length of d_image_complex and d_response_complex has the same length.

    fft_inplace(d_image_complex)
    fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    ifft_inplace(d_image_complex)

    # At this point, we have applied the filter onto d_image_complex
    return  # Does not return anything
コード例 #3
0
 def cuFFT_v2(ary, out=None, stream=0):
     itype = ary.dtype.type
     if out is not None:
         otype = out.dtype.type
         if otype is np.complex64:
             fft.fft(np.fft.fftshift(ary.astype(np.complex64)),
                     out=out,
                     stream=stream)
             out = np.fft.fftshift(out)
         else:
             raise Exception('Output must be type numpy.complex64')
         return out
     else:
         if itype is not np.complex64:
             ary = ary.astype(np.complex64)
         tmp = np.fft.fftshift(ary)
         fft.fft_inplace(tmp, stream=stream)
         ary = np.fft.fftshift(tmp)
         return ary
コード例 #4
0
def main():
    # Build Filter
    laplacian_pts = """
    -4 -1 0 -1 -4
    -1 2 3 2 -1
    0 3 4 3 0
    -1 2 3 2 -1
    -4 -1 0 -1 -4
    """.split()

    laplacian = np.array(laplacian_pts, dtype=np.float32).reshape(5, 5)

    # Build Image
    try:
        filename = sys.argv[1]
        image = ndimage.imread(filename, flatten=True).astype(np.float32)
    except IndexError:
        image = misc.face(gray=True).astype(np.float32)

    print("Image size: %s" % (image.shape,))

    response = np.zeros_like(image)
    response[:5, :5] = laplacian

    # CPU
    ts = timer()
    cvimage_cpu = fftconvolve(image, laplacian, mode="same")
    te = timer()
    print("CPU: %.2fs" % (te - ts))

    # GPU
    threadperblock = 32, 8
    blockpergrid = best_grid_size(tuple(reversed(image.shape)), threadperblock)
    print("kernel config: %s x %s" % (blockpergrid, threadperblock))

    # Trigger initialization the cuFFT system.
    # This takes significant time for small dataset.
    # We should not be including the time wasted here
    FFTPlan(shape=image.shape, itype=np.complex64, otype=np.complex64)

    # Start GPU timer
    ts = timer()
    image_complex = image.astype(np.complex64)
    response_complex = response.astype(np.complex64)

    d_image_complex = cuda.to_device(image_complex)
    d_response_complex = cuda.to_device(response_complex)

    fft_inplace(d_image_complex)
    fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    ifft_inplace(d_image_complex)

    cvimage_gpu = d_image_complex.copy_to_host().real / np.prod(image.shape)

    te = timer()
    print("GPU: %.2fs" % (te - ts))

    # Plot the results
    plt.subplot(1, 2, 1)
    plt.title("CPU")
    plt.imshow(cvimage_cpu, cmap=plt.cm.gray)
    plt.axis("off")

    plt.subplot(1, 2, 2)
    plt.title("GPU")
    plt.imshow(cvimage_gpu, cmap=plt.cm.gray)
    plt.axis("off")

    plt.show()
def main():
    # Build Filter
    laplacian_pts = '''
    -4 -1 0 -1 -4
    -1 2 3 2 -1
    0 3 4 3 0
    -1 2 3 2 -1
    -4 -1 0 -1 -4
    '''.split()

    laplacian = np.array(laplacian_pts, dtype=np.float32).reshape(5, 5)

    # Build Image
    try:
        filename = sys.argv[1]
        image = ndimage.imread(filename, flatten=True).astype(np.float32)
    except IndexError:
        image = misc.face(gray=True).astype(np.float32)

    print("Image size: %s" % (image.shape, ))

    response = np.zeros_like(image)
    response[:5, :5] = laplacian

    # CPU
    ts = timer()
    cvimage_cpu = fftconvolve(image, laplacian, mode='same')
    te = timer()
    print('CPU: %.2fs' % (te - ts))

    # GPU
    threadperblock = 32, 8
    blockpergrid = best_grid_size(tuple(reversed(image.shape)), threadperblock)
    print('kernel config: %s x %s' % (blockpergrid, threadperblock))

    # Trigger initialization the cuFFT system.
    # This takes significant time for small dataset.
    # We should not be including the time wasted here
    FFTPlan(shape=image.shape, itype=np.complex64, otype=np.complex64)

    # Start GPU timer
    ts = timer()
    image_complex = image.astype(np.complex64)
    response_complex = response.astype(np.complex64)

    d_image_complex = cuda.to_device(image_complex)
    d_response_complex = cuda.to_device(response_complex)

    fft_inplace(d_image_complex)
    fft_inplace(d_response_complex)

    vmult(d_image_complex, d_response_complex, out=d_image_complex)

    ifft_inplace(d_image_complex)

    cvimage_gpu = d_image_complex.copy_to_host().real / np.prod(image.shape)

    te = timer()
    print('GPU: %.2fs' % (te - ts))

    # Plot the results
    plt.subplot(1, 2, 1)
    plt.title('CPU')
    plt.imshow(cvimage_cpu, cmap=plt.cm.gray)
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.title('GPU')
    plt.imshow(cvimage_gpu, cmap=plt.cm.gray)
    plt.axis('off')

    plt.show()