Ejemplo n.º 1
0
        def thunk():
            input_shape = inputs[0][0].shape
            s = inputs[1][0]

            # Since padding is not supported, assert that last dimension corresponds to
            # input forward transform size.
            # assert (input_shape[1:-2] == s[:-1]).all()
            # assert ((input_shape[-2] - 1) * 2 + s[-1] % 2 == s[-1]).all()

            # construct output shape
            # chop off the extra length-2 dimension for real/imag
            # output_shape = [input_shape[0]] + list(s)
            # output_shape = tuple(output_shape)
            output_shape = input_shape

            z = outputs[0]

            # only allocate if there is no previous allocation of the
            # right size.
            if z[0] is None or z[0].shape != output_shape:
                z[0] = pygpu.zeros(output_shape,
                                   context=inputs[0][0].context,
                                   dtype='float32')

            input_pycuda = inputs[0][0]
            # input_pycuda is a float32 array with an extra dimension,
            # but will be interpreted by skcuda as a complex64
            # array instead.
            output_pycuda = z[0]

            with input_pycuda.context:
                # only initialise plan if necessary
                if plan[0] is None or plan_input_shape[0] != input_shape:
                    plan_input_shape[0] = input_shape
                    plan[0] = fft.Plan(s,
                                       np.complex64,
                                       np.complex64,
                                       batch=output_shape[0])

                # Sync GPU variables before computation
                input_pycuda.sync()
                output_pycuda.sync()

                fft.ifft(input_pycuda, output_pycuda, plan[0])
                # strangely enough, enabling rescaling here makes it run
                # very, very slowly, so do this rescaling manually
                # afterwards!

                # Sync results to ensure output contains completed computation
                pycuda.driver.Context.synchronize()
Ejemplo n.º 2
0
        def thunk():
            input_shape = inputs[0][0].shape
            s = inputs[1][0]

            # Since padding is not supported, assert that last dimension corresponds to
            # input forward transform size.
            # assert (input_shape[1:-2] == s[:-1]).all()
            # assert ((input_shape[-2] - 1) * 2 + s[-1] % 2 == s[-1]).all()

            # construct output shape
            # chop off the extra length-2 dimension for real/imag
            # output_shape = [input_shape[0]] + list(s)
            # output_shape = tuple(output_shape)
            output_shape = input_shape

            z = outputs[0]

            # only allocate if there is no previous allocation of the
            # right size.
            if z[0] is None or z[0].shape != output_shape:
                z[0] = pygpu.zeros(output_shape, context=inputs[0][0].context,
                                   dtype='float32')

            input_pycuda = inputs[0][0]
            # input_pycuda is a float32 array with an extra dimension,
            # but will be interpreted by skcuda as a complex64
            # array instead.
            output_pycuda = z[0]

            with input_pycuda.context:
                # only initialise plan if necessary
                if plan[0] is None or plan_input_shape[0] != input_shape:
                    plan_input_shape[0] = input_shape
                    plan[0] = fft.Plan(s, np.complex64, np.complex64,
                                       batch=output_shape[0])

                # Sync GPU variables before computation
                input_pycuda.sync()
                output_pycuda.sync()

                fft.ifft(input_pycuda, output_pycuda, plan[0])
                # strangely enough, enabling rescaling here makes it run
                # very, very slowly, so do this rescaling manually
                # afterwards!

                # Sync results to ensure output contains completed computation
                pycuda.driver.Context.synchronize()
        def thunk():
            input_shape = inputs[0][0].shape
            s = inputs[1][0]
            output_shape = input_shape

            z = outputs[0]

            # only allocate if there is no previous allocation of the
            # right size.
            if z[0] is None or z[0].shape != output_shape:
                z[0] = pygpu.zeros(output_shape,
                                   context=inputs[0][0].context,
                                   dtype='float32')

            input_pycuda = inputs[0][0]
            # input_pycuda is a float32 array with an extra dimension,
            # but will be interpreted by skcuda as a complex64
            # array instead.
            output_pycuda = z[0]

            with input_pycuda.context:
                # only initialise plan if necessary
                if plan[0] is None or plan_input_shape[0] != input_shape:
                    plan_input_shape[0] = input_shape
                    plan[0] = fft.Plan(s,
                                       np.complex64,
                                       np.complex64,
                                       batch=np.prod(input_shape[:-3]))

                # Sync GPU variables before computation
                input_pycuda.sync()
                output_pycuda.sync()

                fft.ifft(input_pycuda, output_pycuda, plan[0])
                # strangely enough, enabling rescaling here makes it run
                # very, very slowly, so do this rescaling manually
                # afterwards!

                # Sync results to ensure output contains completed computation
                pycuda.driver.Context.synchronize()
Ejemplo n.º 4
0
        def thunk():
            input_shape = inputs[0][0].shape
            s = inputs[1][0]
            output_shape = input_shape

            z = outputs[0]

            # only allocate if there is no previous allocation of the
            # right size.
            if z[0] is None or z[0].shape != output_shape:
                z[0] = pygpu.zeros(output_shape, context=inputs[0][0].context,
                                   dtype='float32')

            input_pycuda = inputs[0][0]
            # input_pycuda is a float32 array with an extra dimension,
            # but will be interpreted by skcuda as a complex64
            # array instead.
            output_pycuda = z[0]

            with input_pycuda.context:
                # only initialise plan if necessary
                if plan[0] is None or plan_input_shape[0] != input_shape:
                    plan_input_shape[0] = input_shape
                    plan[0] = fft.Plan(s, np.complex64, np.complex64,
                                       batch=np.prod(input_shape[:-3]))

                # Sync GPU variables before computation
                input_pycuda.sync()
                output_pycuda.sync()

                fft.ifft(input_pycuda, output_pycuda, plan[0])
                # strangely enough, enabling rescaling here makes it run
                # very, very slowly, so do this rescaling manually
                # afterwards!

                # Sync results to ensure output contains completed computation
                pycuda.driver.Context.synchronize()