def NumbaConv(h, M, xdtype): L = len(h) outlen = M + L - 1 xdtype = np.dtype(xdtype) htype = numba.__getattribute__(str(h.dtype)) xtype = numba.__getattribute__(str(xdtype)) outdtype = np.result_type(h.dtype, xdtype) outtype = numba.__getattribute__(str(outdtype)) #@jit(restype=outtype[::1], argtypes=[htype[::1], xtype[::1]]) @jit def conv(h, x): out = np.zeros(outlen, outdtype) for m in range(M): for l in range(L): out[m + l] += h[l]*x[m] return out @filter_dec(h, M) def numba_conv(x): out = conv(h, x) return out return numba_conv
def NumbaFFTW(h, M, xdtype=np.complex_, powerof2=True): L = len(h) outlen = M + L - 1 nfft = outlen if powerof2: nfft = pow2(nfft) outdtype = np.result_type(h.dtype, xdtype) fftdtype = np.result_type(outdtype, np.complex64) # output is always complex, promote using smallest # speed not critical here, just use numpy fft # cast to outdtype so we use same type of fft as when transforming x hpad = zero_pad(h, nfft).astype(outdtype) if np.iscomplexobj(hpad): H = np.fft.fft(hpad) else: H = np.fft.rfft(hpad) H = (H / nfft).astype(fftdtype) # divide by nfft b/c FFTW's ifft does not do this xpad = pyfftw.n_byte_align(np.zeros(nfft, outdtype), 16) # outdtype so same type fft as h->H X = pyfftw.n_byte_align(np.zeros(len(H), fftdtype), 16) # len(H) b/c rfft may be used xfft = pyfftw.FFTW(xpad, X, threads=_THREADS) y = pyfftw.n_byte_align_empty(nfft, 16, outdtype) ifft = pyfftw.FFTW(X, y, direction='FFTW_BACKWARD', threads=_THREADS) xtype = numba.__getattribute__(str(np.dtype(xdtype))) outtype = numba.__getattribute__(str(outdtype)) ffttype = numba.__getattribute__(str(fftdtype)) #@jit(restype=outtype[::1], #argtypes=[outtype[::1], ffttype[::1], ffttype[::1], outtype[::1], xtype[::1]]) #def filt(xpad, X, H, y, x): #xpad[:M] = x #xfft.execute() # input in xpad, result in X #X[:] = H*X #ifft.execute() # input in X, result in y #yc = y[:outlen].copy() #return yc #@filter_dec(h, M, nfft=nfft, H=H) #def numba_fftw(x): #return filt(xpad, X, H, y, x) #@jit(argtypes=[xtype[::1]]) @jit def numba_fftw(x): xpad[:M] = x xfft.execute() # input in xpad, result in X X[:] = H*X # want expression that is optimized by numba but writes into X ifft.execute() # input in X, result in y yc = y[:outlen].copy() return yc numba_fftw = filter_dec(h, M, nfft=nfft, H=H)(numba_fftw) return numba_fftw
def ShiftConvNumbaFFT(h, N, M, xdtype=np.complex_, powerof2=True): # implements Doppler filter: # y[n, p] = SUM_k (exp(2*pi*j*n*(k - (L-1))/N) * h[k]) * x[p - k] # = SUM_k (exp(-2*pi*j*n*k/N) * s*[k]) * x[p - (L-1) + k] L = len(h) outlen = M + L - 1 nfft = outlen if powerof2: nfft = pow2(nfft) dopplermat = np.exp(2*np.pi*1j*np.arange(N)[:, np.newaxis]*(np.arange(L) - (L - 1))/N) dopplermat.astype(np.result_type(h.dtype, np.complex64)) # cast to complex type with precision of h hbank = h*dopplermat # speed not critical here, just use numpy fft hbankpad = zero_pad(hbank, nfft) H = np.fft.fft(hbankpad) / nfft # divide by nfft b/c FFTW's ifft does not do this xcdtype = np.result_type(xdtype, np.complex64) # cast to complex type with precision of x xpad = pyfftw.n_byte_align(np.zeros(nfft, xcdtype), 16) X = pyfftw.n_byte_align(np.zeros(nfft, xcdtype), 16) xfft = pyfftw.FFTW(xpad, X, threads=_THREADS) ydtype = np.result_type(H.dtype, xcdtype) Y = pyfftw.n_byte_align_empty(H.shape, 16, ydtype) y = pyfftw.n_byte_align_empty(H.shape, 16, ydtype) ifft = pyfftw.FFTW(Y, y, direction='FFTW_BACKWARD', threads=_THREADS) xtype = numba.__getattribute__(str(np.dtype(xdtype))) #htype = numba.__getattribute__(str(H.dtype)) #xctype = numba.__getattribute__(str(X.dtype)) #ytype = numba.__getattribute__(str(Y.dtype)) #@jit(argtypes=[htype[:, ::1], xctype[::1], ytype[:, ::1], xtype[::1]]) #def fun(H, X, Y, x): #xpad[:M] = x #xfft.execute() # input is xpad, output is X #Y[:, :] = H*X # need expression optimized by numba but that writes into Y #ifft.execute() # input is Y, output is y #yc = np.array(y)[:, :outlen] # need a copy, which np.array provides #return yc #@dopplerbank_dec(h, N, M, nfft=nfft, H=H) #def shiftconv_numba_fft(x): #return fun(H, X, Y, x) #@jit(argtypes=[xtype[::1]]) @jit def shiftconv_numba_fft(x): xpad[:M] = x xfft.execute() # input is xpad, output is X Y[:, :] = X*H # need expression optimized by numba but that writes into Y ifft.execute() # input is Y, output is y yc = np.array(y[:, :outlen]) # need a copy, which np.array provides return yc shiftconv_numba_fft = dopplerbank_dec(h, N, M, nfft=nfft, H=H)(shiftconv_numba_fft) return shiftconv_numba_fft
def SweepSpectraNumba(h, N, M, xdtype=np.complex_): # implements Doppler filter: # y[n, p] = SUM_k exp(2*pi*j*n*(k - (L-1))/N) * (h[k] * x[p - k]) # = SUM_k exp(-2*pi*j*n*k/N) * (s*[k] * x[p - (L-1) + k]) L = len(h) outlen = M + L - 1 # when N < L, still need to take FFT with nfft >= L so we don't lose data # then subsample to get our N points that we desire step = L // N + 1 nfft = N*step hrev = h[::-1] xpad = np.zeros(M + 2*(L - 1), xdtype) # x[0] at xpad[L - 1] demodpad = np.zeros((outlen, nfft), np.result_type(xdtype, h.dtype, np.complex64)) demodpad = pyfftw.n_byte_align(demodpad, 16) y = pyfftw.n_byte_align(np.zeros_like(demodpad), 16) fft = pyfftw.FFTW(demodpad, y, threads=_THREADS) xtype = numba.__getattribute__(str(np.dtype(xdtype))) #@jit(argtypes=[xtype[::1]]) @jit def sweepspectra_numba(x): xpad[(L - 1):outlen] = x for p in range(outlen): demodpad[p, :L] = hrev*xpad[p:(p + L)] fft.execute() # input is demodpad, output is y yc = np.array(y[:, ::step].T) # we need a copy, which np.array provides return yc sweepspectra_numba = dopplerbank_dec(h, N, M)(sweepspectra_numba) return sweepspectra_numba
def _make_numba_cubic_solver(dtype): eps = slippy.CUBIC_EPS s_dtype = str(dtype) if not s_dtype.startswith("float"): raise ValueError("can only make cubic solver for single and double floats") def solve_cubic_numba_base(b, c, d, r1, r2, r3): for i in range(len(b)): if np.abs(d[i]) < eps: # cancel and find remaining roots by quadratic formula r1[i] = 0 diff = np.sqrt(b[i] * b[i] - 4 * c[i]) / 2 r2[i] = (-b[i]) / 2 + diff r3[i] = (-b[i]) / 2 - diff else: # convert to depressed cubic p = c[i] - b[i] ** 2 / 3 q = 2 * b[i] ** 3 / 27 - b[i] * c[i] / 3 + d[i] if np.abs(p) < eps: r1[i] = np.sign(-q) * np.abs(q) ** (1 / 3) - b[i] / 3 r2[i] = r1[i] r3[i] = r1[i] elif np.abs(q) < eps: r3[i] = - b[i] / 3 if p < 0: diff = np.sqrt(-p) r2[i] = diff - b[i] / 3 r1[i] = - diff - b[i] / 3 else: r1[i] = r3[i] r2[i] = r3[i] else: e = q * q / 4 + p * p * p / 27 if np.abs(e) < eps: r2[i] = -1.5 * q / p - b[i] / 3 r3[i] = 3 * q / p - b[i] / 3 f_prime2 = 3 * r2[i] ** 2 + 2 * b[i] * r2[i] + c[i] f_prime3 = 3 * r3[i] ** 2 + 2 * b[i] * r3[i] + c[i] if np.abs(f_prime2) < np.abs(f_prime3): r1[i] = r2[i] else: r1[i] = r3[i] elif e > 0: u = -q / 2 - np.sqrt(e) u = np.sign(u) * np.abs(u) ** (1 / 3) r1[i] = u - p / (3 * u) - b[i] / 3 r2[i] = r1[i] r3[i] = r1[i] else: u = 2 * np.sqrt(-p / 3) t = np.arccos(3 * q / p / u) / 3 k = 2 * np.pi / 3 r1[i] = u * np.cos(t) - b[i] / 3 r2[i] = u * np.cos(t - k) - b[i] / 3 r3[i] = u * np.cos(t - 2 * k) - b[i] / 3 # sort the array r1[i], r2[i], r3[i] = np.sort(np.array([r1[i], r2[i], r3[i]])) numba_type = numba.__getattribute__(s_dtype) raw_func = numba.guvectorize([(numba_type[:], numba_type[:], numba_type[:], numba_type[:], numba_type[:], numba_type[:])], "(n),(n),(n)->(n),(n),(n)", nopython=True)(solve_cubic_numba_base) def full_func(b, c, d): r1 = np.zeros_like(b) r2 = np.zeros_like(b) r3 = np.zeros_like(b) raw_func(b, c, d, r1, r2, r3) return r1, r2, r3 return full_func