Ejemplo n.º 1
0
    def NumbaConv(h, M, xdtype):
        L = len(h)
        outlen = M + L - 1

        xdtype = np.dtype(xdtype)
        htype = numba.__getattribute__(str(h.dtype))
        xtype = numba.__getattribute__(str(xdtype))
        outdtype = np.result_type(h.dtype, xdtype)
        outtype = numba.__getattribute__(str(outdtype))

        #@jit(restype=outtype[::1], argtypes=[htype[::1], xtype[::1]])
        @jit
        def conv(h, x):
            out = np.zeros(outlen, outdtype)
            for m in range(M):
                for l in range(L):
                    out[m + l] += h[l]*x[m]
            return out

        @filter_dec(h, M)
        def numba_conv(x):
            out = conv(h, x)
            return out

        return numba_conv
Ejemplo n.º 2
0
    def NumbaFFTW(h, M, xdtype=np.complex_, powerof2=True):
        L = len(h)
        outlen = M + L - 1
        nfft = outlen
        if powerof2:
            nfft = pow2(nfft)

        outdtype = np.result_type(h.dtype, xdtype)
        fftdtype = np.result_type(outdtype, np.complex64) # output is always complex, promote using smallest

        # speed not critical here, just use numpy fft
        # cast to outdtype so we use same type of fft as when transforming x
        hpad = zero_pad(h, nfft).astype(outdtype)
        if np.iscomplexobj(hpad):
            H = np.fft.fft(hpad)
        else:
            H = np.fft.rfft(hpad)
        H = (H / nfft).astype(fftdtype) # divide by nfft b/c FFTW's ifft does not do this

        xpad = pyfftw.n_byte_align(np.zeros(nfft, outdtype), 16) # outdtype so same type fft as h->H
        X = pyfftw.n_byte_align(np.zeros(len(H), fftdtype), 16) # len(H) b/c rfft may be used
        xfft = pyfftw.FFTW(xpad, X, threads=_THREADS)

        y = pyfftw.n_byte_align_empty(nfft, 16, outdtype)
        ifft = pyfftw.FFTW(X, y, direction='FFTW_BACKWARD', threads=_THREADS)

        xtype = numba.__getattribute__(str(np.dtype(xdtype)))
        outtype = numba.__getattribute__(str(outdtype))
        ffttype = numba.__getattribute__(str(fftdtype))

        #@jit(restype=outtype[::1],
            #argtypes=[outtype[::1], ffttype[::1], ffttype[::1], outtype[::1], xtype[::1]])
        #def filt(xpad, X, H, y, x):
            #xpad[:M] = x
            #xfft.execute() # input in xpad, result in X
            #X[:] = H*X
            #ifft.execute() # input in X, result in y
            #yc = y[:outlen].copy()
            #return yc

        #@filter_dec(h, M, nfft=nfft, H=H)
        #def numba_fftw(x):
            #return filt(xpad, X, H, y, x)

        #@jit(argtypes=[xtype[::1]])
        @jit
        def numba_fftw(x):
            xpad[:M] = x
            xfft.execute() # input in xpad, result in X
            X[:] = H*X # want expression that is optimized by numba but writes into X
            ifft.execute() # input in X, result in y
            yc = y[:outlen].copy()
            return yc

        numba_fftw = filter_dec(h, M, nfft=nfft, H=H)(numba_fftw)

        return numba_fftw
Ejemplo n.º 3
0
    def ShiftConvNumbaFFT(h, N, M, xdtype=np.complex_, powerof2=True):
        # implements Doppler filter:
        # y[n, p] = SUM_k (exp(2*pi*j*n*(k - (L-1))/N) * h[k]) * x[p - k]
        #         = SUM_k (exp(-2*pi*j*n*k/N) * s*[k]) * x[p - (L-1) + k]
        L = len(h)
        outlen = M + L - 1
        nfft = outlen
        if powerof2:
            nfft = pow2(nfft)

        dopplermat = np.exp(2*np.pi*1j*np.arange(N)[:, np.newaxis]*(np.arange(L) - (L - 1))/N)
        dopplermat.astype(np.result_type(h.dtype, np.complex64)) # cast to complex type with precision of h
        hbank = h*dopplermat
        # speed not critical here, just use numpy fft
        hbankpad = zero_pad(hbank, nfft)
        H = np.fft.fft(hbankpad) / nfft # divide by nfft b/c FFTW's ifft does not do this

        xcdtype = np.result_type(xdtype, np.complex64) # cast to complex type with precision of x
        xpad = pyfftw.n_byte_align(np.zeros(nfft, xcdtype), 16)
        X = pyfftw.n_byte_align(np.zeros(nfft, xcdtype), 16)
        xfft = pyfftw.FFTW(xpad, X, threads=_THREADS)

        ydtype = np.result_type(H.dtype, xcdtype)
        Y = pyfftw.n_byte_align_empty(H.shape, 16, ydtype)
        y = pyfftw.n_byte_align_empty(H.shape, 16, ydtype)
        ifft = pyfftw.FFTW(Y, y, direction='FFTW_BACKWARD', threads=_THREADS)

        xtype = numba.__getattribute__(str(np.dtype(xdtype)))

        #htype = numba.__getattribute__(str(H.dtype))
        #xctype = numba.__getattribute__(str(X.dtype))
        #ytype = numba.__getattribute__(str(Y.dtype))
        #@jit(argtypes=[htype[:, ::1], xctype[::1], ytype[:, ::1], xtype[::1]])
        #def fun(H, X, Y, x):
            #xpad[:M] = x
            #xfft.execute() # input is xpad, output is X
            #Y[:, :] = H*X # need expression optimized by numba but that writes into Y
            #ifft.execute() # input is Y, output is y

            #yc = np.array(y)[:, :outlen] # need a copy, which np.array provides
            #return yc

        #@dopplerbank_dec(h, N, M, nfft=nfft, H=H)
        #def shiftconv_numba_fft(x):
            #return fun(H, X, Y, x)

        #@jit(argtypes=[xtype[::1]])
        @jit
        def shiftconv_numba_fft(x):
            xpad[:M] = x
            xfft.execute() # input is xpad, output is X
            Y[:, :] = X*H # need expression optimized by numba but that writes into Y
            ifft.execute() # input is Y, output is y

            yc = np.array(y[:, :outlen]) # need a copy, which np.array provides
            return yc

        shiftconv_numba_fft = dopplerbank_dec(h, N, M, nfft=nfft, H=H)(shiftconv_numba_fft)

        return shiftconv_numba_fft
Ejemplo n.º 4
0
    def SweepSpectraNumba(h, N, M, xdtype=np.complex_):
        # implements Doppler filter:
        # y[n, p] = SUM_k exp(2*pi*j*n*(k - (L-1))/N) * (h[k] * x[p - k])
        #         = SUM_k exp(-2*pi*j*n*k/N) * (s*[k] * x[p - (L-1) + k])
        L = len(h)
        outlen = M + L - 1
        # when N < L, still need to take FFT with nfft >= L so we don't lose data
        # then subsample to get our N points that we desire
        step = L // N + 1
        nfft = N*step

        hrev = h[::-1]
        xpad = np.zeros(M + 2*(L - 1), xdtype) # x[0] at xpad[L - 1]

        demodpad = np.zeros((outlen, nfft), np.result_type(xdtype, h.dtype, np.complex64))
        demodpad = pyfftw.n_byte_align(demodpad, 16)
        y = pyfftw.n_byte_align(np.zeros_like(demodpad), 16)
        fft = pyfftw.FFTW(demodpad, y, threads=_THREADS)

        xtype = numba.__getattribute__(str(np.dtype(xdtype)))

        #@jit(argtypes=[xtype[::1]])
        @jit
        def sweepspectra_numba(x):
            xpad[(L - 1):outlen] = x
            for p in range(outlen):
                demodpad[p, :L] = hrev*xpad[p:(p + L)]
            fft.execute() # input is demodpad, output is y
            yc = np.array(y[:, ::step].T) # we need a copy, which np.array provides
            return yc

        sweepspectra_numba = dopplerbank_dec(h, N, M)(sweepspectra_numba)

        return sweepspectra_numba
def _make_numba_cubic_solver(dtype):
    eps = slippy.CUBIC_EPS
    s_dtype = str(dtype)
    if not s_dtype.startswith("float"):
        raise ValueError("can only make cubic solver for single and double floats")

    def solve_cubic_numba_base(b, c, d, r1, r2, r3):
        for i in range(len(b)):
            if np.abs(d[i]) < eps:
                # cancel and find remaining roots by quadratic formula
                r1[i] = 0
                diff = np.sqrt(b[i] * b[i] - 4 * c[i]) / 2
                r2[i] = (-b[i]) / 2 + diff
                r3[i] = (-b[i]) / 2 - diff
            else:
                # convert to depressed cubic
                p = c[i] - b[i] ** 2 / 3
                q = 2 * b[i] ** 3 / 27 - b[i] * c[i] / 3 + d[i]
                if np.abs(p) < eps:
                    r1[i] = np.sign(-q) * np.abs(q) ** (1 / 3) - b[i] / 3
                    r2[i] = r1[i]
                    r3[i] = r1[i]
                elif np.abs(q) < eps:
                    r3[i] = - b[i] / 3
                    if p < 0:
                        diff = np.sqrt(-p)
                        r2[i] = diff - b[i] / 3
                        r1[i] = - diff - b[i] / 3
                    else:
                        r1[i] = r3[i]
                        r2[i] = r3[i]
                else:
                    e = q * q / 4 + p * p * p / 27
                    if np.abs(e) < eps:
                        r2[i] = -1.5 * q / p - b[i] / 3
                        r3[i] = 3 * q / p - b[i] / 3
                        f_prime2 = 3 * r2[i] ** 2 + 2 * b[i] * r2[i] + c[i]
                        f_prime3 = 3 * r3[i] ** 2 + 2 * b[i] * r3[i] + c[i]
                        if np.abs(f_prime2) < np.abs(f_prime3):
                            r1[i] = r2[i]
                        else:
                            r1[i] = r3[i]
                    elif e > 0:
                        u = -q / 2 - np.sqrt(e)
                        u = np.sign(u) * np.abs(u) ** (1 / 3)
                        r1[i] = u - p / (3 * u) - b[i] / 3
                        r2[i] = r1[i]
                        r3[i] = r1[i]
                    else:
                        u = 2 * np.sqrt(-p / 3)
                        t = np.arccos(3 * q / p / u) / 3
                        k = 2 * np.pi / 3
                        r1[i] = u * np.cos(t) - b[i] / 3
                        r2[i] = u * np.cos(t - k) - b[i] / 3
                        r3[i] = u * np.cos(t - 2 * k) - b[i] / 3
            # sort the array
            r1[i], r2[i], r3[i] = np.sort(np.array([r1[i], r2[i], r3[i]]))

    numba_type = numba.__getattribute__(s_dtype)
    raw_func = numba.guvectorize([(numba_type[:], numba_type[:], numba_type[:],
                                   numba_type[:], numba_type[:], numba_type[:])],
                                 "(n),(n),(n)->(n),(n),(n)",
                                 nopython=True)(solve_cubic_numba_base)

    def full_func(b, c, d):
        r1 = np.zeros_like(b)
        r2 = np.zeros_like(b)
        r3 = np.zeros_like(b)
        raw_func(b, c, d, r1, r2, r3)
        return r1, r2, r3

    return full_func