def image_fftw(grids, nthread=1, wisdom=None, axes=(1, 2)): """ Plan pyfftw inverse fft and run it on input grids. Allows fft on 1d (time, npix) or 2d (time, npixx, npixy) grids. axes refers to dimensions of fft, so (1, 2) will do 2d fft on last two axes of (time, npixx, nipxy) data, while (1) will do 1d fft on last axis of (time, npix) data. Returns recentered fftoutput for each integration. """ if wisdom is not None: logger.debug('Importing wisdom...') pyfftw.import_wisdom(wisdom) logger.debug("Starting pyfftw ifft2") images = np.zeros_like(grids) # images = pyfftw.interfaces.numpy_fft.ifft2(grids, auto_align_input=True, # auto_contiguous=True, # planner_effort='FFTW_MEASURE', # overwrite_input=True, # threads=nthread) # nints, npixx, npixy = images.shape # # return np.fft.fftshift(images.real, (npixx//2, npixy//2)) fft_obj = pyfftw.FFTW(grids, images, axes=axes, direction="FFTW_BACKWARD") fft_obj.execute() logger.debug('Recentering fft output...') return np.fft.fftshift(images.real, axes=axes)
def store_plan_hints(filename, locking=True, reload_first=True): """Store data about the best FFT plans for this computer. FFT planning can take quite a while. After planning, the knowledge about the best plan for a given computer and given transform parameters can be written to disk so that the next time, planning can make use of that knowledge. Parameters: filename: file to write hints to. locking: if True, attempt to acquire an exclusive lock before writing which can otherwise cause problems if multiple processes are attempting to write to the same plan hints file. reload_first: if True, if the file exists, load the plan hints before storing them back. Safer in a multi-process setting where the hints may be written by a different process. """ filename = pathlib.Path(filename) if not filename.exists(): filename.touch() # can't open a file for read/write updating if it doesn't exist... with filename.open('r+b') as f: if locking: import fcntl fcntl.flock(f, fcntl.LOCK_EX) if reload_first: try: pyfftw.import_wisdom(pickle.load(f)) except: pass f.seek(0) pickle.dump(pyfftw.export_wisdom(), f) if locking: fcntl.flock(f, fcntl.LOCK_UN)
def _loadwisdom(self, infile): if infile is None: return try: pyfftw.import_wisdom(pickle.load(open(infile, "rb"))) except (IOError, TypeError) as e: self._savewisdom(infile)
def load_wisdom(self): for name in ('wisdom32', 'wisdom64'): path = getattr(self, f"_{name}_path") if Path(path).is_file(): with open(path, 'rb') as f: setattr(self, f"_{name}", f.read()) pyfftw.import_wisdom((self._wisdom64, self._wisdom32, b''))
def load_wisdom(): if os.path.exists(WISDOMFILE): try: with open(WISDOMFILE, 'rb') as f: pyfftw.import_wisdom(pickle.load(f)) except: logger.exception('Error loading wisdom')
def __init__(self, n = 1364, N = 2048, iter0 = 138000, nfiles = 64, fft_threads = 32, out_threads = 4, src_dir = './', dst_dir = './', src_format = 'K{0:0>6}QNP{1:0>3}', dst_format = 'RMHD_{0}_t{1:0>4x}_z{2:0>7x}'): self.src_format = src_format self.dst_format = dst_format self.src_dir = src_dir self.dst_dir = dst_dir self.n = n self.N = N self.iter0 = iter0 self.nfiles = nfiles self.kdata = pyfftw.n_byte_align_empty( (self.N//2+1, 2, self.N, self.N), pyfftw.simd_alignment, dtype = np.complex64) self.rrdata = pyfftw.n_byte_align_empty( (self.N, self.N, self.N, 2), pyfftw.simd_alignment, dtype = np.float32) self.rzdata = pyfftw.n_byte_align_empty( ((self.N//8) * (self.N//8) * (self.N//8), 8*8*8*2), pyfftw.simd_alignment, dtype = np.float32) if type(self.dst_dir) == type([]): self.zdir = np.array( range(0, self.rzdata.shape[0], self.rzdata.shape[0] // len(self.dst_dir))) self.cubbies_per_file = self.rzdata.shape[0] // self.nfiles if (os.path.isfile('fftw_wisdom.pickle.gz')): pyfftw.import_wisdom( pickle.load(gzip.open('fftw_wisdom.pickle.gz', 'rb'))) print('about to initialize the fftw plan, which can take a while') self.plan = pyfftw.FFTW( self.kdata.transpose(3, 2, 0, 1), self.rrdata, axes = (0, 1, 2), direction = 'FFTW_BACKWARD', flags = ('FFTW_MEASURE', 'FFTW_DESTROY_INPUT'), threads = fft_threads) print('finalized fftw initialization') bla = pyfftw.export_wisdom() pickle.dump(bla, gzip.open('fftw_wisdom.pickle.gz', 'wb')) self.fft_threads = fft_threads self.out_threads = out_threads self.shuffle_lib = np.ctypeslib.load_library( 'libzshuffle.so', os.path.abspath(os.path.join( os.path.expanduser('~'), 'repos/RMHD_converter/C-shuffle'))) return None
def load_wisdom(filenames=default_filenames): allwisdom = [] for fn in filenames: f = open(fn, 'r') allwisdom.append(f.read()) f.close() pyfftw.import_wisdom(tuple(allwisdom))
def load_wisdom(filenames=default_filenames): allwisdom = [] for fn in filenames: f=open(fn,'r') allwisdom.append(f.read()) f.close() pyfftw.import_wisdom(tuple(allwisdom))
def __init__(self, qs, L=15, ncol = 1, low_ring=True, fourier=False, threads=1, import_wisdom=False, wisdom_file='./fftw_wisdom.npy'): # numerical factor of sqrt(pi) in the Mellin transform # if doing integral in fourier space get in addition a factor of 2 pi / (2pi)^3 if not fourier: self.sqrtpi = np.sqrt(np.pi) else: self.sqrtpi = np.sqrt(np.pi) / (2*np.pi**2) self.q = qs self.L = L self.ncol = ncol self.Nx = len(qs) self.Delta = np.log(qs[-1]/qs[0])/(self.Nx-1) # zero pad the arrays to the preferred length format for ffts, 2^N self.N = 2**(int(np.ceil(np.log2(self.Nx))) + 1) self.Npad = self.N - self.Nx self.ii_l = self.Npad - self.Npad//2 # left and right indices sandwiching the padding self.ii_r = self.N - self.Npad//2 # Set up FFTW objects: if import_wisdom: pyfftw.import_wisdom(tuple(np.load(wisdom_file))) self.fks = pyfftw.empty_aligned((self.ncol,self.N//2 + 1), dtype='complex128') self.fs = pyfftw.empty_aligned((self.ncol,self.N), dtype='float64') pyfftw.config.NUM_THREADS = threads self.fft_object = pyfftw.FFTW(self.fs, self.fks, direction='FFTW_FORWARD',threads=threads) self.ifft_object = pyfftw.FFTW(self.fks, self.fs, direction='FFTW_BACKWARD',threads=threads) # Set up the FFTLog kernels u_m up to, but not including, L ms = np.arange(0, self.N//2+1) self.ydict = {}; self.udict = {}; self.qdict= {} if low_ring: for ll in range(L): q = max(0, 1.5 - ll) lnxy = self.Delta/np.pi * np.angle(self.UK(ll,q+1j*np.pi/self.Delta)) #ln(xmin*ymax) ys = np.exp( lnxy - self.Delta) * qs/ (qs[0]*qs[-1]) us = self.UK(ll, q + 2j * np.pi / self.N / self.Delta * ms) \ * np.exp(-2j * np.pi * lnxy / self.N / self.Delta * ms) us[self.N//2] = us[self.N//2].real # manually impose low ring self.ydict[ll] = ys; self.udict[ll] = us; self.qdict[ll] = q else: # if not low ring then just set x_min * y_max = 1 for ll in range(L): q = max(0, 1.5 - ll) ys = np.exp(-self.Delta) * qs / (qs[0]*qs[-1]) us = self.UK(ll, q + 2j * np.pi / self.N / self.Delta * ms) us[self.N//2] = us[self.N//2].real # manually impose low ring self.ydict[ll] = ys; self.udict[ll] = us; self.qdict[ll] = q
def load_wisdom(self,path_wisdom=None): if path_wisdom is not None: self.attrs['path_wisdom'] = path_wisdom path_wisdom = self.attrs['path_wisdom'].format(self.Nmesh,self.attrs['nthreads']) if os.path.isfile(path_wisdom): self.logger.info('Reading wisdom from {}.'.format(path_wisdom)) wisdom = open(path_wisdom,'r') pyfftw.import_wisdom(json.load(wisdom)) wisdom.close()
def import_wisdom(basedir): w = [] for i in range(3): # always 3 ? fname = path.join(basedir, "wis%d.dat" % i) if not (path.isfile(fname)): raise RuntimeError("Could find wisdom file %s" % fname) with open(fname, "rb") as fid: w.append(fid.read()) pyfftw.import_wisdom(w)
def load(cls): try: import pickle with pyfs.aopen(cls.paths[0], 'rb') as src: wisdom = pickle.load(src) print(wisdom) pyfftw.import_wisdom(wisdom) except pyfs.errors.FileNotFoundError: pass
def _loadwisdom(self, infile): if infile is None: return try: pyfftw.import_wisdom(pickle.load(open(infile, 'rb'))) except (IOError, TypeError) as e: self._savewisdom(infile) except EOFError as e: pass # file exists but is empty from another FFT being run
def __init__(self, n=1364, N=2048, iter0=138000, nfiles=64, fft_threads=32, out_threads=4, src_dir='./', dst_dir='./', src_format='K{0:0>6}QNP{1:0>3}', dst_format='RMHD_{0}_t{1:0>4x}_z{2:0>7x}'): self.src_format = src_format self.dst_format = dst_format self.src_dir = src_dir self.dst_dir = dst_dir self.n = n self.N = N self.iter0 = iter0 self.nfiles = nfiles self.kdata = pyfftw.n_byte_align_empty( (self.N // 2 + 1, 2, self.N, self.N), pyfftw.simd_alignment, dtype=np.complex64) self.rrdata = pyfftw.n_byte_align_empty((self.N, self.N, self.N, 2), pyfftw.simd_alignment, dtype=np.float32) self.rzdata = pyfftw.n_byte_align_empty( ((self.N // 8) * (self.N // 8) * (self.N // 8), 8 * 8 * 8 * 2), pyfftw.simd_alignment, dtype=np.float32) if type(self.dst_dir) == type([]): self.zdir = np.array( range(0, self.rzdata.shape[0], self.rzdata.shape[0] // len(self.dst_dir))) self.cubbies_per_file = self.rzdata.shape[0] // self.nfiles if (os.path.isfile('fftw_wisdom.pickle.gz')): pyfftw.import_wisdom( pickle.load(gzip.open('fftw_wisdom.pickle.gz', 'rb'))) print('about to initialize the fftw plan, which can take a while') self.plan = pyfftw.FFTW(self.kdata.transpose(3, 2, 0, 1), self.rrdata, axes=(0, 1, 2), direction='FFTW_BACKWARD', flags=('FFTW_MEASURE', 'FFTW_DESTROY_INPUT'), threads=fft_threads) print('finalized fftw initialization') bla = pyfftw.export_wisdom() pickle.dump(bla, gzip.open('fftw_wisdom.pickle.gz', 'wb')) self.fft_threads = fft_threads self.out_threads = out_threads self.shuffle_lib = np.ctypeslib.load_library( 'libzshuffle.so', os.path.abspath( os.path.join(os.path.expanduser('~'), 'repos/RMHD_converter/C-shuffle'))) return None
def load_wisdom(self): try: with open(_wisdom_filename(), 'rb') as fin: wisdom = pickle.load(fin) pyfftw.import_wisdom(wisdom) return True except IOError: sys.stderr.write('WARNING: No wisdom file {}. This may take a ' 'while ...\n'.format(_wisdom_filename())) return False
def load_wisdom(wisdomfile): """ Prime FFTW with knowledge of which FFTs are best on this machine by loading 'wisdom' from the file ``wisdomfile`` """ if wisdomfile is None: return try: pyfftw.import_wisdom(pickle.load(open(wisdomfile, 'rb'))) except (IOError, TypeError) as e: log.warn("No wisdom present, generating some at %r" % wisdomfile) save_wisdom(wisdomfile)
def load_plan_hints(filename, locking=True): """Load data about the best FFT plans for this computer. FFT planning can take quite a while. After planning, the knowledge about the best plan for a given computer and given transform parameters can be written to disk so that the next time, planning can make use of that knowledge. Parameters: filename: file to read hints from. locking: if True, attempt to acquire an exclusive lock before reading which can otherwise cause problems if multiple processes are attempting to write to the same plan hints file. Returns True if plan hints were successfully loaded. """ with open(filename, 'rb') as f: if locking: import fcntl fcntl.flock(f, fcntl.LOCK_EX) loaded = pyfftw.import_wisdom(pickle.load(f)) if locking: fcntl.flock(f, fcntl.LOCK_UN) return all(loaded)
def image_fftw(grids, nthread=1, wisdom=None): """ Plan pyfftw ifft2 and run it on uv grids (time, npixx, npixy) Returns time images. """ if wisdom: logger.debug('Importing wisdom...') pyfftw.import_wisdom(wisdom) images = pyfftw.interfaces.numpy_fft.ifft2(grids, auto_align_input=True, auto_contiguous=True, planner_effort='FFTW_MEASURE', threads=nthread) npixx, npixy = images[0].shape return recenter(images.real, (npixx//2, npixy//2))
def _load_wisdom(): """ Loads the 3 wisdom files. """ global _is_fftw_wisdom_loaded if _is_fftw_wisdom_loaded: return def load(filename): try: with open(filename, 'rb') as f: wisdom = f.read() except IOError: wisdom = b'' return wisdom wisdom = [load(f) for f in FFTW_WISDOM_FILES] pyfftw.import_wisdom(wisdom) _is_fftw_wisdom_loaded = True
def __init__(self, ellmat, filt_func=lambda ell: ell > 0, num_threads=4, flags_init=('FFTW_MEASURE', )): super(ffs_alm_pyFFTW, self).__init__(ellmat, filt_func=filt_func) # FIXME : This can be tricky in in hybrid MPI-OPENMP # Builds FFTW Wisdom : wisdom_fname = self.ell_mat.lib_dir + '/FFTW_wisdom_%s_%s.npy' % ( num_threads, ''.join(flags_init)) if not os.path.exists(wisdom_fname): print "++ ffs_alm_pyFFTW :: building and caching FFTW wisdom, this might take a little while..." if pbs.rank == 0: inpt = pyfftw.empty_aligned(self.ell_mat.shape, dtype='float64') oupt = pyfftw.empty_aligned(self.ell_mat.rshape, dtype='complex128') fft = pyfftw.FFTW(inpt, oupt, axes=(0, 1), direction='FFTW_FORWARD', flags=flags_init, threads=num_threads) ifft = pyfftw.FFTW(oupt, inpt, axes=(0, 1), direction='FFTW_BACKWARD', flags=flags_init, threads=num_threads) wisdom = pyfftw.export_wisdom() np.save(wisdom_fname, wisdom) del inpt, oupt, fft, ifft pbs.barrier() pyfftw.import_wisdom(np.load(wisdom_fname)) # print "++ ffs_alm_pyFFTW :: loaded widsom ", wisdom_fname self.flags = ( 'FFTW_WISDOM_ONLY', ) # This will make the code crash if arrays are not properly aligned. # self.flags = ('FFTW_MEASURE',) self.threads = num_threads
def test_import(self): forget_wisdom() self.generate_wisdom() after_wisdom = export_wisdom() forget_wisdom() before_wisdom = export_wisdom() success = import_wisdom(after_wisdom) self.compare(before_wisdom, after_wisdom) self.assertEqual(success, tuple([x in _supported_types for x in ['64', '32', 'ld']]))
def test_import(self): forget_wisdom() self.generate_wisdom() after_wisdom = export_wisdom() forget_wisdom() before_wisdom = export_wisdom() success = import_wisdom(after_wisdom) for n in range(0,2): self.assertNotEqual(before_wisdom[n], after_wisdom[n]) self.assertEqual(success, (True, True, True))
def test_import(self): forget_wisdom() self.generate_wisdom() after_wisdom = export_wisdom() forget_wisdom() before_wisdom = export_wisdom() success = import_wisdom(after_wisdom) self.compare(before_wisdom, after_wisdom) self.assertEqual( success, tuple([x in _supported_types for x in ['64', '32', 'ld']]))
def load_plan(self, plan_filename, verbose=True, return_all=False): if not (self.use_fftw): raise RuntimeError("FFTW is not used") try: np.load(plan_filename) except IOError: print( "Error: could not read file %s for some reason (possibly permission denied)" % plan_filename) if return_all: return (False, False, False) else: return res = pyfftw.import_wisdom(np.load(plan_filename)["plan"]) if return_all: return res elif verbose: # interpret the result st = "" if res[0] else "was not" print("Double precision plan %s imported" % st) st = "" if res[1] else "was not" print("Single precision plan %s imported" % st) st = "" if res[2] else "was not" print("Long double precision plan %s imported" % st)
def fftw_load_wisdom(filename=None): """ Read accumulated FFTW wisdom previously saved in previously saved in a file By default this file will be in the user's astropy configuration directory. (Another location could be chosen - this is simple and works easily cross-platform.) Parameters ------------ filename : string, optional Filename to use (instead of the default, poppy_fftw_wisdom.txt) """ import os from astropy import config try: import pyfftw except: return # FFTW is not present, therefore this is a null op if filename is None: filename=os.path.join( config.get_config_dir(), "poppy_fftw_wisdom.txt") if not os.path.exists(filename): return # gracefully ignore the case of lacking wisdom yet. _log.debug("Trying to reload wisdom from file "+filename) try: lines = open(filename,'r').readlines() # the first four lines are comments and should be ignored. wisdom = [lines[i].replace(r'\n', '\n') for i in [4,5,6]] wisdom = tuple(wisdom) except: _log.debug("ERROR - wisdom tuple could not be loaded from file :"+filename) return False success = pyfftw.import_wisdom(wisdom) _log.debug("Reloaded double precision wisdom: "+str(success[0])) _log.debug("Reloaded single precision wisdom: "+str(success[1])) _log.debug("Reloaded longdouble precision wisdom: "+str(success[2])) return True
def lens_glm_GLth_sym_timed(spin, dlm, glm, lmax_target, nband=16, facres=0, clm=None, olm=None, rotpol=True): """ Same as lens_alm but lens simultnously a North and South colatitude band, to make profit of the symmetries of the spherical harmonics. """ assert spin >= 0, spin times = {} t0 = time.time() tGL, wg = gauleg.get_xgwg(lmax_target + 2) times['GL points and weights'] = time.time() - t0 target_nt = 3**1 * 2**(11 + facres ) # on one hemisphere (0.87 arcmin spacing) th1s = np.arange(nband) * (np.pi * 0.5 / nband) th2s = np.concatenate((th1s[1:], [np.pi * 0.5])) Nt = target_nt / nband tGL = np.arccos(tGL) tGL = np.sort(tGL) wg = wg[np.argsort(tGL)] times['pol. rot.'] = 0. times['vtm2defl2ang'] = 0. times['vtmdefl'] = 0. def coadd_times(tim): for _k, _t in tim.iteritems(): if _k not in times: times[_k] = _t else: times[_k] += _t shapes = [] shapes_d = [] tGLNs = [] tGLSs = [] wgs = [] # Collects (Nt,Nphi) per band and prepare wisdom wisdomhash = str(lmax_target) + '_' + str(nband) + '_' + str(facres + 1000) + '.npy' assert os.path.exists( os.path.dirname(os.path.realpath(__file__)) + '/pyfftw_cache/') t0 = time.time() print "building and caching FFTW wisdom, this might take a while" for ib, th1, th2 in zip(range(nband), th1s, th2s): Np = get_Nphi(th1, th2, facres=facres, target_amin=60. * 90. / target_nt) # same spacing as theta grid Np_d = min(get_Nphi(th1, th2, target_amin=180. * 60. / lmax_target), 2 * lmax_target) #Equator point density pixN, = np.where((tGL >= th1) & (tGL <= th2)) pixS, = np.where((tGL >= (np.pi - th2)) & (tGL <= (np.pi - th1))) assert np.all(pixN[::-1] == len(tGL) - 1 - pixS), 'symmetry of GL points' shapes_d.append((len(pixN), Np_d)) shapes.append((Nt, Np)) tGLNs.append(tGL[pixN]) tGLSs.append(tGL[pixS]) wgs.append(np.concatenate([wg[pixN], wg[pixS]])) print "BAND %s in %s. deflection (%s x %s) pts " % (ib, nband, len(pixN), Np_d) print " interpolation (%s x %s) pts " % (Nt, Np) #==== For each block we have the following ffts: # (Np_d) complex to complex (deflection map) BACKWARD (vtm2map) # (Nt,Np) complex to complex (bicubic prefiltering) BACKWARD (vt2mmap) (4 threads) # (Nt) complex to complex (bicubic prefiltering) FORWARD (vt2map) # (Np_d) complex to complex FORWARD (map2vtm) # Could rather do a try with FFTW_WISDOM_ONLY if not os.path.exists( os.path.dirname(os.path.realpath(__file__)) + '/pyfftw_cache/' + wisdomhash): a = pyfftw.empty_aligned(Np_d, dtype='complex128') b = pyfftw.empty_aligned(Np_d, dtype='complex128') fft = pyfftw.FFTW(a, b, direction='FFTW_FORWARD', threads=1) fft = pyfftw.FFTW(a, b, direction='FFTW_BACKWARD', threads=1) a = pyfftw.empty_aligned(Nt, dtype='complex128') b = pyfftw.empty_aligned(Nt, dtype='complex128') fft = pyfftw.FFTW(a, b, direction='FFTW_FORWARD', threads=1) a = pyfftw.empty_aligned((Nt, Np), dtype='complex128') b = pyfftw.empty_aligned((Nt, Np), dtype='complex128') fft = pyfftw.FFTW(a, b, direction='FFTW_BACKWARD', axes=(0, 1), threads=4) if not os.path.exists( os.path.dirname(os.path.realpath(__file__)) + '/pyfftw_cache/' + wisdomhash): np.save( os.path.dirname(os.path.realpath(__file__)) + '/pyfftw_cache/' + wisdomhash, pyfftw.export_wisdom()) pyfftw.import_wisdom( np.load( os.path.dirname(os.path.realpath(__file__)) + '/pyfftw_cache/' + wisdomhash)) shts.PYFFTWFLAGS = ['FFTW_WISDOM_ONLY'] times['pyfftw_caches'] = time.time() - t0 print "Total number of interpo points: %s = %s ** 2" % (np.sum( [np.prod(s) for s in shapes]), np.sqrt(1. * np.sum([np.prod(s) for s in shapes]))) print "Total number of deflect points: %s = %s ** 2" % (np.sum([ np.prod(s) for s in shapes_d ]), np.sqrt(1. * np.sum([np.prod(s) for s in shapes_d]))) glmout = np.zeros(shts.util.lmax2nlm(lmax_target), dtype=np.complex) clmout = np.zeros(shts.util.lmax2nlm(lmax_target), dtype=np.complex) for ib, th1, th2 in zip(range(nband), th1s, th2s): Nt_d, Np_d = shapes_d[ib] Nt, Np = shapes[ib] t0 = time.time() vtm_def = shts.vlm2vtm_sym(1, _th2colat(tGLNs[ib]), shts.util.alm2vlm(dlm, clm=olm)) times['vtmdefl'] += time.time() - t0 #==== gettting deflected positions # NB: forward slice to keep theta -> pi - theta correspondance. t0 = time.time() dmapN = shts.vtm2map(1, vtm_def[:Nt_d, :], Np_d).flatten() dmapS = shts.vtm2map(1, vtm_def[slice(Nt_d, 2 * Nt_d), :], Np_d).flatten() told = np.outer(tGLNs[ib], np.ones(Np_d)).flatten() phiold = np.outer(np.ones(Nt_d), np.arange(Np_d) * (2. * np.pi / Np_d)).flatten() tnewN, phinewN = _buildangles((told, phiold), dmapN.real, dmapN.imag) tnewS, phinewS = _buildangles(((np.pi - told)[::-1], phiold), dmapS.real, dmapS.imag) del vtm_def times['vtm2defl2ang'] += time.time() - t0 #===== Adding a 10 pixels buffer for new angles to be safely inside interval. # th1,th2 is mapped onto pi - th2,pi -th1 so we need to make sure to cover both buffers matnewN = np.max(tnewN) mitnewN = np.min(tnewN) matnewS = np.max(tnewS) mitnewS = np.min(tnewS) buffN = 10 * (matnewN - mitnewN) / (Nt - 1) / (1. - 2. * 10. / (Nt - 1)) buffS = 10 * (matnewS - mitnewS) / (Nt - 1) / (1. - 2. * 10. / (Nt - 1)) _thup = min(np.pi - (matnewS + buffS), mitnewN - buffN) _thdown = max(np.pi - (mitnewS - buffS), matnewN + buffN) #==== these are the theta and limits. It is ok to go negative or > 180 dphi_patch = (2. * np.pi) / Np * max(np.sin(_thup), np.sin(_thdown)) dth_patch = (_thdown - _thup) / (Nt - 1) print 'input t1,t2 %.3f %.3f in degrees' % (_thup / np.pi * 180, _thdown / np.pi * 180.) print 'North %.3f and South %.3f buffers in amin' % ( buffN / np.pi * 180 * 60, buffS / np.pi * 180. * 60.) print "cell (theta,phi) in amin (%.3f,%.3f)" % ( dth_patch / np.pi * 60. * 180, dphi_patch / np.pi * 60. * 180) if spin == 0: lenN, lenS, tim = lens_band_sym_timed(glm, _thup, _thdown, Nt, (tnewN, phinewN), (tnewS, phinewS), Nphi=Np) ret = np.zeros((2 * Nt_d, Np_d), dtype=complex) ret[:Nt_d, :] = lenN.reshape((Nt_d, Np_d)) ret[Nt_d:, :] = lenS.reshape((Nt_d, Np_d)) vtm = shts.map2vtm(spin, lmax_target, ret) glmout -= shts.vtm2tlm_sym( np.concatenate([tGLNs[ib], tGLSs[ib]]), vtm * np.outer(wgs[ib], np.ones(vtm.shape[1]))) else: assert 0, 'fix this' lenNR, lenNI, lenSR, lenSI, tim = gclm2lensmap_symband_timed( spin, glm, _thup, _thdown, Nt, (tnewN, phinewN), (tnewS, phinewS), Nphi=Nphi, clm=clm) retN = (lenNR + 1j * lenNI).reshape((len(pixN), Np_d)) retS = (lenSR + 1j * lenSI).reshape((len(pixN), Np_d)) glm, clm = shts.util.vlm2alm( shts.vtm2vlm(spin, tGL, vtm * np.outer(wg, np.ones(vtm.shape[1])))) t0 = time.time() if rotpol and spin > 0: ret[pixN, :] *= polrot(spin, retN.flatten(), tnewN, dmapN.real, dmapN.imag) ret[pixS, :] *= polrot(spin, retS.flatten(), tnewS, dmapS.real, dmapS.imag) times['pol. rot.'] += time.time() - t0 coadd_times(tim) t0 = time.time() print "STATS for lmax tlm %s lmax dlm %s" % (hp.Alm.getlmax( glm.size), hp.Alm.getlmax(dlm.size)) tot = 0. for _k, _t in times.iteritems(): print '%20s: %.2f' % (_k, _t) tot += _t print "%20s: %2.f sec." % ('tot', tot) return glmout, clmout, ret
def load_wisdom(self, wisdom_file=DEFAULT_WISDOM_FILE): """Load saved FFTW wisdom from file.""" with open(wisdom_file, 'rb') as f: wisdom = cPickle.load(f) pyfftw.import_wisdom(wisdom)
def pyFFTWPlanner( realMage, fouMage=None, wisdomFile = None, effort = 'FFTW_MEASURE', n_threads = None, doForward = True, doReverse = True ): """ Appends an FFTW plan for the given realMage to a text file stored in the same directory as RAMutil, which can then be loaded in the future with pyFFTWLoadWisdom. NOTE: realMage should be typecast to 'complex64' normally. NOTE: planning pickle files are hardware dependant, so don't copy them from one machine to another. wisdomFile allows you to specify a .pkl file with the wisdom tuple written to it. The wisdomFile is never updated, whereas the default wisdom _is_ updated with each call. For multiprocessing, it's important to let FFTW generate its plan from an ideal processor state. TODO: implement real, half-space fourier transforms rfft2 and irfft2 as built """ import pyfftw import pickle import os.path from multiprocessing import cpu_count utilpath = os.path.dirname(os.path.realpath(__file__)) # First import whatever we already have if wisdomFile is None: wisdomFile = os.path.join( utilpath, "pyFFTW_wisdom.pkl" ) if os.path.isfile(wisdomFile): try: fh = open( wisdomFile, 'rb') except: print( "Util: pyFFTW wisdom plan file: " + str(wisdomFile) + " invalid/unreadable" ) try: pyfftw.import_wisdom( pickle.load( fh ) ) except: # THis is not normally a problem, it might be empty? print( "Util: pickle failed to import FFTW wisdom" ) pass try: fh.close() except: pass else: # Touch the file os.umask(0000) # Everyone should be able to delete scratch files with open( wisdomFile, 'wb') as fh: pass # I think the fouMage array has to be smaller to do the real -> complex FFT? if fouMage is None: if realMage.dtype.name == 'float32': print( "pyFFTW is recommended to work on purely complex data" ) fouShape = realMage.shape fouShape.shape[-1] = realMage.shape[-1]//2 + 1 fouDtype = 'complex64' fouMage = np.empty( fouShape, dtype=fouDtype ) elif realMage.dtype.name == 'float64': print( "pyFFTW is recommended to work on purely complex data" ) fouShape = realMage.shape fouShape.shape[-1] = realMage.shape[-1]//2 + 1 fouDtype = 'complex128' fouMage = np.empty( fouShape, dtype=fouDtype ) else: # Assume dtype is complexXX fouDtype = realMage.dtype.name fouMage = np.zeros( realMage.shape, dtype=fouDtype ) if n_threads is None: n_threads = cpu_count() print( "FFTW using " + str(n_threads) + " threads" ) if bool(doForward): #print( "Planning forward pyFFTW for shape: " + str( realMage.shape ) ) FFT2 = pyfftw.builders.fft2( realMage, planner_effort=effort, threads=n_threads, auto_align_input=True ) else: FFT2 = None if bool(doReverse): #print( "Planning reverse pyFFTW for shape: " + str( realMage.shape ) ) IFFT2 = pyfftw.builders.ifft2( fouMage, planner_effort=effort, threads=n_threads, auto_align_input=True ) else: IFFT2 = None # Setup so that we can call .execute on each one without re-copying arrays # if FFT2 is not None and IFFT2 is not None: # FFT2.update_arrays( FFT2.get_input_array(), IFFT2.get_input_array() ) # IFFT2.update_arrays( IFFT2.get_input_array(), FFT2.get_input_array() ) # Something is different in the builders compared to FFTW directly. # Can also repeat this for pyfftw.builders.rfft2 and .irfft2 if desired, but # generally it seems slower. # Opening a file for writing is supposed to truncate it # if bool(savePlan): #if wisdomFile is None: # with open( utilpath + "/pyFFTW_wisdom.pkl", 'wb') as fh: with open( wisdomFile, 'wb' ) as fh: pickle.dump( pyfftw.export_wisdom(), fh ) return FFT2, IFFT2
def pyfftw_call(array_in, array_out, direction='forward', axes=None, halfcomplex=False, **kwargs): """Calculate the DFT with pyfftw. The discrete Fourier (forward) transform calcuates the sum:: f_hat[k] = sum_j( f[j] * exp(-2*pi*1j * j*k/N) ) where the summation is taken over all indices ``j = (j[0], ..., j[d-1])`` in the range ``0 <= j < N`` (component-wise), with ``N`` being the shape of the input array. The output indices ``k`` lie in the same range, except for half-complex transforms, where the last axis ``i`` in ``axes`` is shortened to ``0 <= k[i] < floor(N[i]/2) + 1``. In the backward transform, sign of the the exponential argument is flipped. Parameters ---------- array_in : `numpy.ndarray` Array to be transformed array_out : `numpy.ndarray` Output array storing the transformed values, may be aliased with ``array_in``. direction : {'forward', 'backward'}, optional Direction of the transform axes : int or sequence of ints, optional Dimensions along which to take the transform. ``None`` means using all axes and is equivalent to ``np.arange(ndim)``. halfcomplex : bool, optional If ``True``, calculate only the negative frequency part along the last axis. If ``False``, calculate the full complex FFT. This option can only be used with real input data. Other Parameters ---------------- fftw_plan : ``pyfftw.FFTW``, optional Use this plan instead of calculating a new one. If specified, the options ``planning_effort``, ``planning_timelimit`` and ``threads`` have no effect. planning_effort : str, optional Flag for the amount of effort put into finding an optimal FFTW plan. See the `FFTW doc on planner flags <http://www.fftw.org/fftw3_doc/Planner-Flags.html>`_. Available options: {'estimate', 'measure', 'patient', 'exhaustive'} Default: 'estimate' planning_timelimit : float or ``None``, optional Limit planning time to roughly this many seconds. Default: ``None`` (no limit) threads : int, optional Number of threads to use. Default: Number of CPUs if the number of data points is larger than 4096, else 1. normalise_idft : bool, optional If ``True``, the result of the backward transform is divided by ``1 / N``, where ``N`` is the total number of points in ``array_in[axes]``. This ensures that the IDFT is the true inverse of the forward DFT. Default: ``False`` import_wisdom : filename or file handle, optional File to load FFTW wisdom from. If the file does not exist, it is ignored. export_wisdom : filename or file handle, optional File to append the accumulated FFTW wisdom to Returns ------- fftw_plan : ``pyfftw.FFTW`` The plan object created from the input arguments. It can be reused for transforms of the same size with the same data types. Note that reuse only gives a speedup if the initial plan used a planner flag other than ``'estimate'``. If ``fftw_plan`` was specified, the returned object is a reference to it. Notes ----- * The planning and direction flags can also be specified as capitalized and prepended by ``'FFTW_'``, i.e. in the original FFTW form. * For a ``halfcomplex`` forward transform, the arrays must fulfill ``array_out.shape[axes[-1]] == array_in.shape[axes[-1]] // 2 + 1``, and vice versa for backward transforms. * All planning schemes except ``'estimate'`` require an internal copy of the input array but are often several times faster after the first call (measuring results are cached). Typically, 'measure' is a good compromise. If you cannot afford the copy, use ``'estimate'``. * If a plan is provided via the ``fftw_plan`` parameter, no copy is needed internally. """ import pickle if not array_in.flags.aligned: raise ValueError('input array not aligned') if not array_out.flags.aligned: raise ValueError('output array not aligned') if axes is None: axes = tuple(range(array_in.ndim)) axes = normalized_axes_tuple(axes, array_in.ndim) direction = _flag_pyfftw_to_odl(direction) fftw_plan_in = kwargs.pop('fftw_plan', None) planning_effort = _flag_pyfftw_to_odl( kwargs.pop('planning_effort', 'estimate') ) planning_timelimit = kwargs.pop('planning_timelimit', None) threads = kwargs.pop('threads', None) normalise_idft = kwargs.pop('normalise_idft', False) wimport = kwargs.pop('import_wisdom', '') wexport = kwargs.pop('export_wisdom', '') # Cast input to complex if necessary array_in_copied = False if is_real_dtype(array_in.dtype) and not halfcomplex: # Need to cast array_in to complex dtype array_in = array_in.astype(complex_dtype(array_in.dtype)) array_in_copied = True # Do consistency checks on the arguments _pyfftw_check_args(array_in, array_out, axes, halfcomplex, direction) # Import wisdom if possible if wimport: try: with open(wimport, 'rb') as wfile: wisdom = pickle.load(wfile) except IOError: wisdom = [] except TypeError: # Got file handle wisdom = pickle.load(wimport) if wisdom: pyfftw.import_wisdom(wisdom) # Copy input array if it hasn't been done yet and the planner is likely # to destroy it. If we already have a plan, we don't have to worry. planner_destroys = _pyfftw_destroys_input( [planning_effort], direction, halfcomplex, array_in.ndim) must_copy_array_in = fftw_plan_in is None and planner_destroys if must_copy_array_in and not array_in_copied: plan_arr_in = np.empty_like(array_in) flags = [_flag_odl_to_pyfftw(planning_effort), 'FFTW_DESTROY_INPUT'] else: plan_arr_in = array_in flags = [_flag_odl_to_pyfftw(planning_effort)] if fftw_plan_in is None: if threads is None: if plan_arr_in.size <= 4096: # Trade-off wrt threading overhead threads = 1 else: threads = cpu_count() fftw_plan = pyfftw.FFTW( plan_arr_in, array_out, direction=_flag_odl_to_pyfftw(direction), flags=flags, planning_timelimit=planning_timelimit, threads=threads, axes=axes) else: fftw_plan = fftw_plan_in fftw_plan(array_in, array_out, normalise_idft=normalise_idft) if wexport: try: with open(wexport, 'ab') as wfile: pickle.dump(pyfftw.export_wisdom(), wfile) except TypeError: # Got file handle pickle.dump(pyfftw.export_wisdom(), wexport) return fftw_plan
if(line.startswith(" ") or line.startswith(")")): wisdom_tuple[-1] += line # append to string else: wisdom_tuple.append(line) wisdom = wisdom_tuple # override return wisdom # if configured to use centuries of fftw wisdom, read the fftw oracle of # delphi (i.e. the wisdom file) - do this on import: if(wisdom_file is not None): wisdom = read_wisdom() if(wisdom is not None): pyfftw.import_wisdom(wisdom) pyfftw_simd_alignment = pyfftw.simd_alignment pyfftw.interfaces.cache.enable() pyfftw.interfaces.cache.set_keepalive_time(300) # keep cache alive for 300 sec # TODO: make this a configurable parameter? def get_num_threads(): """Get number of threads from environment variable. Returns ------- num_threads : int $TFFTW_NUM_THREADS if set, 1 otherwise. """
def __init__(self, **kwargs): """ The following parameters are to be specified X_gridDIM - the coordinate grid size X_amplitude - maximum value of the coordinates P_gridDIM - the momentum grid size P_amplitude - maximum value of the momentum V(self, x) - potential energy (as a function) may depend on time diff_V(self, x) (optional) - the derivative of the potential energy for the Ehrenfest theorem calculations K(self, p) - the kinetic energy (as a function) may depend on time diff_K(self, p) (optional) - the derivative of the kinetic energy for the Ehrenfest theorem calculations dt - time step t (optional) - initial value of time (default t = 0) alpha (optional) - the absorbing boundary smoothing parameter. If not specified, absorbing boundary not used. FFTW settings (for details see https://hgomersall.github.io/pyFFTW/pyfftw/pyfftw.html#pyfftw.FFTW) ffw_flags (optional) - a list of strings and is a subset of the flags that FFTW allows for the planners fftw_threads (optional) - how many threads to use when invoking FFTW, with a default of 1 fftw_wisdom (optionla) - a tuple of strings returned by pyfftw.export_wisdom() for efficient simulations """ # save all attributes for name, value in kwargs.items(): # if the value supplied is a function, then dynamically assign it as a method; # otherwise bind it a property if isinstance(value, FunctionType): setattr(self, name, MethodType(value, self, self.__class__)) else: setattr(self, name, value) # Check that all attributes were specified try: self.X_gridDIM except AttributeError: raise AttributeError("Coordinate grid size (X_gridDIM) was not specified") assert self.X_gridDIM % 2 == 0, "Coordinate grid size (X_gridDIM) must be even" try: self.P_gridDIM except AttributeError: raise AttributeError("Momentum grid size (P_gridDIM) was not specified") assert self.P_gridDIM % 2 == 0, "Momentum grid size (P_gridDIM) must be even" try: self.X_amplitude except AttributeError: raise AttributeError("Coordinate grid range (X_amplitude) was not specified") try: self.P_amplitude except AttributeError: raise AttributeError("Momentum grid range (P_amplitude) was not specified") try: self.V except AttributeError: raise AttributeError("Potential energy (V) was not specified") try: self.K except AttributeError: raise AttributeError("Momentum dependence (K) was not specified") try: self.dt except AttributeError: raise AttributeError("Time-step (dt) was not specified") try: self.t except AttributeError: print("Warning: Initial time (t) was not specified, thus it is set to zero.") self.t = 0. ########################################################################################## # # Generating grids # ########################################################################################## # get coordinate and momentum step sizes self.dX = 2. * self.X_amplitude / self.X_gridDIM self.dP = 2. * self.P_amplitude / self.P_gridDIM # coordinate grid self.X = np.linspace(-self.X_amplitude, self.X_amplitude - self.dX, self.X_gridDIM) self.X = self.X[np.newaxis, :] # Lambda grid (variable conjugate to the coordinate) self.Lambda = np.fft.fftfreq(self.X_gridDIM, self.dX / (2 * np.pi)) # take only first half, as required by the real fft self.Lambda = self.Lambda[:(1 + self.X_gridDIM // 2)] # self.Lambda = self.Lambda[np.newaxis, :] # momentum grid self.P = np.linspace(-self.P_amplitude, self.P_amplitude - self.dP, self.P_gridDIM) self.P = self.P[:, np.newaxis] # Theta grid (variable conjugate to the momentum) self.Theta = np.fft.fftfreq(self.P_gridDIM, self.dP / (2 * np.pi)) # take only first half, as required by the real fft self.Theta = self.Theta[:(1 + self.P_gridDIM // 2)] # self.Theta = self.Theta[:, np.newaxis] ########################################################################################## # # Pre-calculate absorbing boundary # ########################################################################################## # auxiliary grids X_minus = self.X - 0.5 * self.Theta X_plus = self.X + 0.5 * self.Theta try: self.alpha # if user specified the absorbing boundary smoothing parameter (alpha) # then generate the absorbing boundary xmin = min(X_minus.min(), X_plus.min()) xmax = max(X_minus.max(), X_plus.max()) self.abs_boundary = np.sin(np.pi * (X_plus - xmin) / (xmax - xmin)) self.abs_boundary *= np.sin(np.pi * (X_minus - xmin) / (xmax - xmin)) np.abs(self.abs_boundary, out=self.abs_boundary) self.abs_boundary **= abs(self.alpha * self.dt) except AttributeError: # if the absorbing boundary smoothing parameter was not specified # then we should not use the absorbing boundary self.abs_boundary = 1 ########################################################################################## # # Pre-calculate exponents # ########################################################################################## try: # Cache the potential energy exponent, if the potential is time independent self._expV = np.exp(-self.dt * 0.5j * (self.V(X_minus) - self.V(X_plus))) # Apply absorbing boundary self._expV *= self.abs_boundary # Dynamically assign the method self.get_exp_v(t) to access the cached exponential self.get_exp_v = MethodType(lambda self, t: self._expV, self, self.__class__) except TypeError: # If exception is generated, then the potential is time-dependent and caching is not possible, # thus, dynamically assign the method self.get_exp_v(t) to recalculate the exponent for every t self.X_minus = X_minus self.X_plus = X_plus def get_exp_v(self, t): result = -self.dt * 0.5j * (self.V(self.X_minus, t) - self.V(self.X_plus, t)) np.exp(result, out=result) # Apply absorbing boundary result *= self.abs_boundary return result self.get_exp_v = MethodType(get_exp_v, self, self.__class__) ########################################################################################## try: # Cache the kinetic energy exponent, if the potential is time independent self._expK = np.exp( -self.dt * 1j * (self.K(self.P + 0.5 * self.Lambda) - self.K(self.P - 0.5 * self.Lambda)) ) # Dynamically assign the method self.get_exp_k(t) to access the cached exponential self.get_exp_k = MethodType(lambda self, t: self._expK, self, self.__class__) except TypeError: # If exception is generated, then the kinetic term is time-dependent and caching is not possible, # thus, dynamically assign the method self.get_exp_k(t) to recalculate the exponent for every t self.P_minus = self.P - 0.5 * self.Lambda self.P_plus = self.P + 0.5 * self.Lambda def get_exp_k(self, t): result = -self.dt * 1j * (self.K(self.P_plus, t) - self.K(self.P_minus, t)) np.exp(result, out=result) return result self.get_exp_k = MethodType(get_exp_k, self, self.__class__) ########################################################################################## # # Ehrenfest theorems (optional) # ########################################################################################## try: # Check whether the necessary terms are specified to calculate the Ehrenfest theorems # Pre-calculate RHS if time independent (using similar ideas as in self.get_exp_v above) try: self._diff_V = self.diff_V(self.X) self.get_diff_v = MethodType(lambda self, t: self._diff_V, self, self.__class__) except TypeError: self.get_diff_v = MethodType( lambda self, t: self.diff_V(self.X, t), self, self.__class__ ) # Pre-calculate RHS if time independent (using similar ideas as in self.get_exp_v above) try: self._diff_K = self.diff_K(self.P) self.get_diff_k = MethodType(lambda self, t: self._diff_K, self, self.__class__) except TypeError: self.get_diff_k = MethodType( lambda self, t: self.diff_K(self.P, t), self, self.__class__ ) # Pre-calculate the potential and kinetic energies for # calculating the expectation value of Hamiltonian try: self._V = self.V(self.X) self.get_v = MethodType(lambda self, t: self._V, self, self.__class__) except TypeError: self.get_v = MethodType(lambda self, t: self.V(self.X, t), self, self.__class__) try: self._K = self.K(self.P) self.get_k = MethodType(lambda self, t: self._K, self, self.__class__) except TypeError: self.get_k = MethodType(lambda self, t: self.K(self.P, t), self, self.__class__) # Lists where the expectation values of X and P self.X_average = [] self.P_average = [] # Lists where the right hand sides of the Ehrenfest theorems for X and P self.X_average_RHS = [] self.P_average_RHS = [] # List where the expectation value of the Hamiltonian will be calculated self.hamiltonian_average = [] # Flag requesting tha the Ehrenfest theorem calculations self.isEhrenfest = True except AttributeError: # Since self.diff_V and self.diff_K are not specified, # the Ehrenfest theorem will not be calculated self.isEhrenfest = False ########################################################################################## # # FTTW set-up # ########################################################################################## # Check for FFTW flags try: self.ffw_flags except AttributeError: # otherwise assign some default values self.ffw_flags = ('FFTW_ESTIMATE',) # Allow to destroy data in input arrays during FFT to speed up calculations self.ffw_flags = self.ffw_flags + ('FFTW_DESTROY_INPUT',) # Number of threads used for FFTW try: self.fftw_threads except AttributeError: self.fftw_threads = 1 # load FFTW wisdom, if provided try: pyfftw.import_wisdom(self.fftw_wisdom) except AttributeError: pass # allocate memory for the Wigner function self.wignerfunction = pyfftw.empty_aligned((self.P.size, self.X.size), dtype=np.float) ########################################################################################## # # allocate memory for the wigner function in the theta x and p lambda representations # by reusing the memory # ########################################################################################## # find sizes of each representations size_theta_x = self.Theta.size * self.X.size size_p_lambda = self.P.size * self.Lambda.size if size_theta_x > size_p_lambda: # since theta x representation requires more memory, allocate it self.wigner_theta_x = pyfftw.empty_aligned((self.Theta.size, self.X.size), dtype=np.complex) # for p lambda representation uses a smaller subspace self.wigner_p_lambda = np.frombuffer(self.wigner_theta_x, dtype=np.complex, count=size_p_lambda) self.wigner_p_lambda = self.wigner_p_lambda.reshape((self.P.size, self.Lambda.size)) else: # since p lambda representation requires more memory, allocate it self.wigner_p_lambda = pyfftw.empty_aligned((self.P.size, self.Lambda.size), dtype=np.complex) # for theta x representation uses a smaller subspace self.wigner_theta_x = np.frombuffer(self.wigner_p_lambda, dtype=np.complex, count=size_theta_x) self.wigner_theta_x = self.wigner_theta_x.reshape((self.Theta.size, self.X.size)) ########################################################################################## # plan the FFT for the p x -> theta x transform self.p2theta_transform = pyfftw.FFTW( self.wignerfunction, self.wigner_theta_x, axes=(0,), direction='FFTW_FORWARD', flags=self.ffw_flags, threads=self.fftw_threads ) # plan the FFT for the theta x -> p x transform self.theta2p_transform = pyfftw.FFTW( self.wigner_theta_x, self.wignerfunction, axes=(0,), direction='FFTW_BACKWARD', flags=self.ffw_flags, threads=self.fftw_threads ) # plan the FFT for the p x -> p lambda transform self.x2lambda_transform = pyfftw.FFTW( self.wignerfunction, self.wigner_p_lambda, axes=(1,), direction='FFTW_FORWARD', flags=self.ffw_flags, threads=self.fftw_threads ) # plan the FFT for the p lambda -> p x transform self.lambda2x_transform = pyfftw.FFTW( self.wigner_p_lambda, self.wignerfunction, axes=(1,), direction='FFTW_BACKWARD', flags=self.ffw_flags, threads=self.fftw_threads )
def iterate(self, iloop, save_wisdom=1): cat = self.cat ran = self.ran smooth = self.smooth binsize = self.binsize beta = self.beta bias = self.bias f = self.f nbins = self.nbins #-- Creating arrays for FFTW if iloop == 0: delta = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') deltak = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') psi_x = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') psi_y = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') psi_z = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') #delta = N.zeros((nbins, nbins, nbins), dtype='complex128') #deltak= N.zeros((nbins, nbins, nbins), dtype='complex128') #psi_x = N.zeros((nbins, nbins, nbins), dtype='complex128') #psi_y = N.zeros((nbins, nbins, nbins), dtype='complex128') #psi_z = N.zeros((nbins, nbins, nbins), dtype='complex128') print('Allocating randoms in cells...') deltar = self.allocate_gal_cic(ran) print('Smoothing...') deltar = gaussian_filter(deltar, smooth / binsize) #-- Initialize FFT objects and load wisdom if available wisdomFile = "wisdom." + str(nbins) + "." + str(self.nthreads) if os.path.isfile(wisdomFile): print('Reading wisdom from ', wisdomFile) g = open(wisdomFile, 'r') wisd = json.load(g) pyfftw.import_wisdom(wisd) g.close() print('Creating FFTW objects...') fft_obj = pyfftw.FFTW(delta, delta, axes=[0, 1, 2], threads=self.nthreads) ifft_obj = pyfftw.FFTW(deltak, psi_x, axes=[0, 1, 2], \ threads=self.nthreads, \ direction='FFTW_BACKWARD') else: delta = self.delta deltak = self.deltak deltar = self.deltar psi_x = self.psi_x psi_y = self.psi_y psi_z = self.psi_z fft_obj = self.fft_obj ifft_obj = self.ifft_obj #fft_obj = pyfftw.FFTW(delta, delta, threads=self.nthreads, axes=[0, 1, 2]) #-- Allocate galaxies and randoms to grid with CIC method #-- using new positions print('Allocating galaxies in cells...') deltag = self.allocate_gal_cic(cat) print('Smoothing...') deltag = gaussian_filter(deltag, smooth / binsize) print('Computing fluctuations...') delta[:] = deltag - self.alpha * deltar w = N.where(deltar > self.ran_min) delta[w] = delta[w] / (self.alpha * deltar[w]) w2 = N.where((deltar <= self.ran_min)) delta[w2] = 0. w3 = N.where(delta > N.percentile(delta[w].ravel(), 99)) delta[w3] = 0. del (w) del (w2) del (w3) del (deltag) print('Fourier transforming delta field...') norm_fft = 1. #binsize**3 fft_obj(input_array=delta, output_array=delta) #delta = pyfftw.builders.fftn(\ # delta, axes=[0, 1, 2], \ # threads=self.nthreads, overwrite_input=True)() #-- delta/k**2 k = fftfreq(self.nbins, d=binsize) * 2 * N.pi delta /= k[:, None, None]**2 + k[None, :, None]**2 + k[None, None, :]**2 delta[0, 0, 0] = 1 #-- Estimating the IFFT in Eq. 12 of Burden et al. 2015 print('Inverse Fourier transforming to get psi...') norm_ifft = 1. #(k[1]-k[0])**3/(2*N.pi)**3*nbins**3 deltak[:] = delta * -1j * k[:, None, None] / bias ifft_obj(input_array=deltak, output_array=psi_x) deltak[:] = delta * -1j * k[None, :, None] / bias ifft_obj(input_array=deltak, output_array=psi_y) deltak[:] = delta * -1j * k[None, None, :] / bias ifft_obj(input_array=deltak, output_array=psi_z) #psi_x = pyfftw.builders.ifftn(\ # delta*-1j*k[:, None, None]/bias, \ # axes=[0, 1, 2], \ # threads=self.nthreads, overwrite_input=True)().real #psi_y = pyfftw.builders.ifftn(\ # delta*-1j*k[None, :, None]/bias, \ # axes=[0, 1, 2], \ # threads=self.nthreads, overwrite_input=True)().real #psi_z = pyfftw.builders.ifftn(\ # delta*-1j*k[None, None, :]/bias, \ # axes=[0, 1, 2], \ # threads=self.nthreads, overwrite_input=True)().real #psi_x = ifftn(-1j*delta*k[:, None, None]/bias).real*norm_ifft #psi_y = ifftn(-1j*delta*k[None, :, None]/bias).real*norm_ifft #psi_z = ifftn(-1j*delta*k[None, None, :]/bias).real*norm_ifft #-- removing RSD from galaxies shift_x, shift_y, shift_z = \ self.get_shift(cat, psi_x.real, psi_y.real, psi_z.real, \ use_newpos=True) for i in range(10): print(shift_x[i], shift_y[i], shift_z[i], cat.x[i]) #-- for first loop need to approximately remove RSD component #-- from Psi to speed up calculation #-- first loop so want this on original positions (cp), #-- not final ones (np) - doesn't actualy matter if iloop == 0: psi_dot_rhat = (shift_x*cat.x + \ shift_y*cat.y + \ shift_z*cat.z ) /cat.dist shift_x -= beta / (1 + beta) * psi_dot_rhat * cat.x / cat.dist shift_y -= beta / (1 + beta) * psi_dot_rhat * cat.y / cat.dist shift_z -= beta / (1 + beta) * psi_dot_rhat * cat.z / cat.dist #-- remove RSD from original positions (cp) of #-- galaxies to give new positions (np) #-- these positions are then used in next determination of Psi, #-- assumed to not have RSD. #-- the iterative procued then uses the new positions as #-- if they'd been read in from the start psi_dot_rhat = (shift_x * cat.x + shift_y * cat.y + shift_z * cat.z) / cat.dist cat.newx = cat.x + f * psi_dot_rhat * cat.x / cat.dist cat.newy = cat.y + f * psi_dot_rhat * cat.y / cat.dist cat.newz = cat.z + f * psi_dot_rhat * cat.z / cat.dist self.deltar = deltar self.delta = delta self.deltak = deltak self.psi_x = psi_x self.psi_y = psi_y self.psi_z = psi_z self.fft_obj = fft_obj self.ifft_obj = ifft_obj #-- save wisdom wisdomFile = "wisdom." + str(nbins) + "." + str(self.nthreads) if iloop == 0 and save_wisdom and not os.path.isfile(wisdomFile): wisd = pyfftw.export_wisdom() f = open(wisdomFile, 'w') json.dump(wisd, f) f.close() print('Wisdom saved at', wisdomFile)
def pyfftw_call(array_in, array_out, direction='forward', axes=None, halfcomplex=False, **kwargs): """Calculate the DFT with pyfftw. The discrete Fourier (forward) transform calcuates the sum:: f_hat[k] = sum_j( f[j] * exp(-2*pi*1j * j*k/N) ) where the summation is taken over all indices ``j = (j[0], ..., j[d-1])`` in the range ``0 <= j < N`` (component-wise), with ``N`` being the shape of the input array. The output indices ``k`` lie in the same range, except for half-complex transforms, where the last axis ``i`` in ``axes`` is shortened to ``0 <= k[i] < floor(N[i]/2) + 1``. In the backward transform, sign of the the exponential argument is flipped. Parameters ---------- array_in : `numpy.ndarray` Array to be transformed array_out : `numpy.ndarray` Output array storing the transformed values, may be aliased with ``array_in``. direction : {'forward', 'backward'} Direction of the transform axes : int or sequence of ints, optional Dimensions along which to take the transform. ``None`` means using all axes and is equivalent to ``np.arange(ndim)``. halfcomplex : bool, optional If ``True``, calculate only the negative frequency part along the last axis. If ``False``, calculate the full complex FFT. This option can only be used with real input data. Other Parameters ---------------- fftw_plan : ``pyfftw.FFTW``, optional Use this plan instead of calculating a new one. If specified, the options ``planning_effort``, ``planning_timelimit`` and ``threads`` have no effect. planning_effort : {'estimate', 'measure', 'patient', 'exhaustive'} Flag for the amount of effort put into finding an optimal FFTW plan. See the `FFTW doc on planner flags <http://www.fftw.org/fftw3_doc/Planner-Flags.html>`_. Default: 'estimate' planning_timelimit : float or ``None``, optional Limit planning time to roughly this many seconds. Default: ``None`` (no limit) threads : int, optional Number of threads to use. Default: Number of CPUs if the number of data points is larger than 4096, else 1. normalise_idft : bool, optional If ``True``, the result of the backward transform is divided by ``1 / N``, where ``N`` is the total number of points in ``array_in[axes]``. This ensures that the IDFT is the true inverse of the forward DFT. Default: ``False`` import_wisdom : filename or file handle, optional File to load FFTW wisdom from. If the file does not exist, it is ignored. export_wisdom : filename or file handle, optional File to append the accumulated FFTW wisdom to Returns ------- fftw_plan : ``pyfftw.FFTW`` The plan object created from the input arguments. It can be reused for transforms of the same size with the same data types. Note that reuse only gives a speedup if the initial plan used a planner flag other than ``'estimate'``. If ``fftw_plan`` was specified, the returned object is a reference to it. Notes ----- * The planning and direction flags can also be specified as capitalized and prepended by ``'FFTW_'``, i.e. in the original FFTW form. * For a ``halfcomplex`` forward transform, the arrays must fulfill ``array_out.shape[axes[-1]] == array_in.shape[axes[-1]] // 2 + 1``, and vice versa for backward transforms. * All planning schemes except ``'estimate'`` require an internal copy of the input array but are often several times faster after the first call (measuring results are cached). Typically, 'measure' is a good compromise. If you cannot afford the copy, use ``'estimate'``. * If a plan is provided via the ``fftw_plan`` parameter, no copy is needed internally. """ import pickle if not array_in.flags.aligned: raise ValueError('input array not aligned') if not array_out.flags.aligned: raise ValueError('output array not aligned') if axes is None: axes = tuple(range(array_in.ndim)) axes = normalized_axes_tuple(axes, array_in.ndim) direction = _pyfftw_to_local(direction) fftw_plan_in = kwargs.pop('fftw_plan', None) planning_effort = _pyfftw_to_local(kwargs.pop('planning_effort', 'estimate')) planning_timelimit = kwargs.pop('planning_timelimit', None) threads = kwargs.pop('threads', None) normalise_idft = kwargs.pop('normalise_idft', False) wimport = kwargs.pop('import_wisdom', '') wexport = kwargs.pop('export_wisdom', '') # Cast input to complex if necessary array_in_copied = False if is_real_dtype(array_in.dtype) and not halfcomplex: # Need to cast array_in to complex dtype array_in = array_in.astype(complex_dtype(array_in.dtype)) array_in_copied = True # Do consistency checks on the arguments _pyfftw_check_args(array_in, array_out, axes, halfcomplex, direction) # Import wisdom if possible if wimport: try: with open(wimport, 'rb') as wfile: wisdom = pickle.load(wfile) except IOError: wisdom = [] except TypeError: # Got file handle wisdom = pickle.load(wimport) if wisdom: pyfftw.import_wisdom(wisdom) # Copy input array if it hasn't been done yet and the planner is likely # to destroy it. If we already have a plan, we don't have to worry. planner_destroys = _pyfftw_destroys_input( [planning_effort], direction, halfcomplex, array_in.ndim) must_copy_array_in = fftw_plan_in is None and planner_destroys if must_copy_array_in and not array_in_copied: plan_arr_in = np.empty_like(array_in) flags = [_local_to_pyfftw(planning_effort), 'FFTW_DESTROY_INPUT'] else: plan_arr_in = array_in flags = [_local_to_pyfftw(planning_effort)] if fftw_plan_in is None: if threads is None: if plan_arr_in.size <= 4096: # Trade-off wrt threading overhead threads = 1 else: threads = cpu_count() fftw_plan = pyfftw.FFTW( plan_arr_in, array_out, direction=_local_to_pyfftw(direction), flags=flags, planning_timelimit=planning_timelimit, threads=threads, axes=axes) else: fftw_plan = fftw_plan_in fftw_plan(array_in, array_out, normalise_idft=normalise_idft) if wexport: try: with open(wexport, 'ab') as wfile: pickle.dump(pyfftw.export_wisdom(), wfile) except TypeError: # Got file handle pickle.dump(pyfftw.export_wisdom(), wexport) return fftw_plan
def __init__(self, **kwargs): """ The following parameters must be specified X_gridDIM - specifying the grid size X_amplitude - maximum value of the coordinates V - potential energy (as a string to be evaluated by numexpr) K - momentum dependent part of the hamiltonian (as a string to be evaluated by numexpr) A - a coordinate dependent Lindblad dissipator (as a string to be evaluated by numexpr) RHS_P_A (optional) -- the correction to the second Ehrenfest theorem due to A B - a momentum dependent Lindblad dissipator (as a string to be evaluated by numexpr) RHS_X_B (optional) -- the correction to the first Ehrenfest theorem due to B diff_V (optional) -- the derivative of the potential energy for the Ehrenfest theorem calculations diff_K (optional) -- the derivative of the kinetic energy for the Ehrenfest theorem calculations t (optional) - initial value of time abs_boundary (optional) -- absorbing boundary (as a string to be evaluated by numexpr) """ # save all attributes for name, value in kwargs.items(): # if the value supplied is a function, then dynamically assign it as a method; # otherwise bind it a property if isinstance(value, FunctionType): setattr(self, name, MethodType(value, self, self.__class__)) else: setattr(self, name, value) # Check that all attributes were specified try: # make sure self.X_gridDIM has a value of power of 2 assert 2 ** int(np.log2(self.X_gridDIM)) == self.X_gridDIM, \ "A value of the grid size (X_gridDIM) must be a power of 2" except AttributeError: raise AttributeError("Grid size (X_gridDIM) was not specified") try: self.X_amplitude except AttributeError: raise AttributeError("Coordinate range (X_amplitude) was not specified") try: self.V except AttributeError: raise AttributeError("Potential energy (V) was not specified") try: self.K except AttributeError: raise AttributeError("Momentum dependence (K) was not specified") try: self.A except AttributeError: self.A = self.RHS_P_A = "0." print("Warning: Coordinate dependent Lindblad dissipator (A) was not specified so it is set to zero") try: self.B except AttributeError: self.B = self.RHS_X_B = "0." print("Warning: Momentum dependent Lindblad dissipator (B) was not specified so it is set to zero") try: self.dt except AttributeError: raise AttributeError("Time-step (dt) was not specified") try: self.t except AttributeError: print("Warning: Initial time (t) was not specified, thus it is set to zero.") self.t = 0. try: self.abs_boundary except AttributeError: print("Warning: Absorbing boundary (abs_boundary) was not specified, thus it is turned off") self.abs_boundary = "1." ######################################################################################## # # Initialize Fourier transform for efficient calculations # ######################################################################################## # Load FFTW wisdom if saved before try: with open('fftw_wisdom', 'rb') as f: pyfftw.import_wisdom(pickle.load(f)) print("\nFFTW wisdom has been loaded\n") except IOError: pass # allocate the array for density matrix self.rho = pyfftw.empty_aligned((self.X_gridDIM, self.X_gridDIM), dtype=np.complex) # FFTW settings to achive good performace. For details see # https://hgomersall.github.io/pyFFTW/pyfftw/pyfftw.html#pyfftw.FFTW fftw_flags = ('FFTW_MEASURE','FFTW_DESTROY_INPUT') # how many threads to use for parallelized calculation of FFT. # Use the same number of threads as in numexpr fftw_nthreads = ne.nthreads # Create plan to pefrom FFT over the zeroth axis. It is equivalent to # fftpack.fft(self.rho, axis=0, overwrite_x=True) self.rho_fft_ax0 = pyfftw.FFTW( self.rho, self.rho, axes=(0,), direction='FFTW_FORWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_fft_ax1 = pyfftw.FFTW( self.rho, self.rho, axes=(1,), direction='FFTW_FORWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_ifft_ax0 = pyfftw.FFTW( self.rho, self.rho, axes=(0,), direction='FFTW_BACKWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_ifft_ax1 = pyfftw.FFTW( self.rho, self.rho, axes=(1,), direction='FFTW_BACKWARD', flags=fftw_flags, threads=fftw_nthreads ) # Save FFTW wisdom with open('fftw_wisdom', 'wb') as f: pickle.dump(pyfftw.export_wisdom(), f) ######################################################################################## # get coordinate step size self.dX = 2. * self.X_amplitude / self.X_gridDIM # generate coordinate range k = np.arange(self.X_gridDIM) self.k = k[:, np.newaxis] self.k_prime = k[np.newaxis, :] X = (k - self.X_gridDIM / 2) * self.dX self.X = X[:, np.newaxis] self.X_prime = X[np.newaxis, :] # generate momentum range self.dP = np.pi / self.X_amplitude P = (k - self.X_gridDIM / 2) * self.dP self.P = P[:, np.newaxis] self.P_prime = P[np.newaxis, :] # allocate an axillary array needed for propagation self.expV = np.zeros_like(self.rho) # construct the coordinate dependent phase containing the dissipator as well as coherent propagator phase_X = "1j * (({V_X_prime}) - ({V_X})) " \ "+ ({A_X}) * conj({A_X_prime}) - 0.5 * abs({A_X}) ** 2 - 0.5 * abs({A_X_prime}) ** 2".format( V_X_prime=self.V.format(X="X_prime"), V_X=self.V.format(X="X"), A_X_prime=self.A.format(X="X_prime"), A_X=self.A.format(X="X"), ) # numexpr code to calculate (-)**(k + k_prime) * exp(0.5 * dt * F) self.code_expV = "(%s) * (%s) * (-1) ** (k + k_prime) * exp(0.5 * dt * (%s))" % ( self.abs_boundary.format(X="X"), self.abs_boundary.format(X="X_prime"), phase_X ) # construct the coordinate dependent phase containing the dissipator as well as coherent propagator phase_P = "1j * (({K_P_prime}) - ({K_P})) " \ "+ ({B_P}) * conj({B_P_prime}) - 0.5 * abs({B_P}) ** 2 - 0.5 * abs({B_P_prime}) ** 2".format( K_P_prime=self.K.format(P="P_prime"), K_P=self.K.format(P="P"), B_P_prime=self.B.format(P="P_prime"), B_P=self.B.format(P="P"), ) # numexpr code to calculate rho * exp(1j * dt * K) self.code_expK = "rho * exp(dt * (%s))" % phase_P # Check whether the necessary terms are specified to calculate the first-order Ehrenfest theorems try: # Allocate a copy of the wavefunction for storing the density matrix in the momentum representation self.rho_p = pyfftw.empty_aligned(self.rho.shape, dtype=self.rho.dtype) # Create FFT plans to operate on self.rho_p self.rho_p_fft_ax0 = pyfftw.FFTW( self.rho_p, self.rho_p, axes=(0,), direction='FFTW_FORWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_p_ifft_ax1 = pyfftw.FFTW( self.rho_p, self.rho_p, axes=(1,), direction='FFTW_BACKWARD', flags=fftw_flags, threads=fftw_nthreads ) # numexpr codes to calculate the First Ehrenfest theorems self.code_V_average = "sum((%s) * density)" % self.V.format(X="X") self.code_K_average = "sum((%s) * density)" % self.K.format(P="P") self.code_X_average = "sum(X * density)" self.code_P_average = "sum(P * density)" self.code_P_average_RHS = "sum((-(%s) + (%s)) * density)" % ( self.diff_V.format(X="X"), self.RHS_P_A.format(X="X") ) self.code_X_average_RHS = "sum(((%s) + (%s)) * density)" % ( self.diff_K.format(P="P"), self.RHS_X_B.format(P="P") ) # Lists where the expectation values of X and P self.X_average = [] self.P_average = [] # Lists where the right hand sides of the Ehrenfest theorems for X and P self.X_average_RHS = [] self.P_average_RHS = [] # List where the expectation value of the Hamiltonian will be calculated self.hamiltonian_average = [] # Flag requesting tha the Ehrenfest theorem calculations self.isEhrenfest = True except AttributeError: # Since self.diff_V and self.diff_K are not specified, # the first Ehrenfest theorem will not be calculated self.isEhrenfest = False
# load tools for creating animation import sys import matplotlib if sys.platform == 'darwin': # only for MacOS matplotlib.use('TKAgg') import matplotlib.animation import matplotlib.pyplot as plt # Load FFTW wisdom if saved before try: with open('fftw_wisdom', 'rb') as f: pyfftw.import_wisdom(pickle.load(f)) print("\nFFTW wisdom has been loaded\n") except IOError: pass ########################################################################################## # # Parameters of quantum systems # ########################################################################################## sys_params = dict( t=0., dt=0.01, X_gridDIM=512,
def import_wisdom(self): try: wis = pickle.load(open(MyFFTW._WISDOM_FILE,"r")) pyfftw.import_wisdom(wis) except Exception as e : print e
def iterate(self, iloop, save_wisdom=1, verbose=1): dat = self.dat ran = self.ran smooth = self.smooth binsize = self.binsize beta = self.beta bias = self.bias f = self.f nbins = self.nbins print("Loop %d" % iloop) #-- Creating arrays for FFTW if iloop == 0: delta = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') deltak = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') rho = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') rhok = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') psi_x = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') psi_y = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') psi_z = pyfftw.empty_aligned((nbins, nbins, nbins), dtype='complex128') #-- Initialize FFT objects and load wisdom if available wisdom_file = "wisdom." + str(nbins) + "." + str( self.nthreads) + '.npy' if os.path.isfile(wisdom_file): print('Reading wisdom from ', wisdom_file) wisd = tuple(np.load(wisdom_file)) print('Status of importing wisdom', pyfftw.import_wisdom(wisd)) sys.stdout.flush() print('Creating FFTW objects...') fft_obj = pyfftw.FFTW(delta, delta, axes=[0, 1, 2], threads=self.nthreads) ifft_obj = pyfftw.FFTW(deltak, psi_x, axes=[0, 1, 2], \ threads=self.nthreads, \ direction='FFTW_BACKWARD') kr = fftfreq(nbins, d=binsize) * 2 * np.pi * self.smooth norm = np.exp(-0.5 * ( kr[:, None, None] ** 2 \ + kr[None, :, None] ** 2 \ + kr[None, None, :] ** 2)) if verbose: print('Allocating randoms...') sys.stdout.flush() deltar = np.zeros((nbins, nbins, nbins), dtype='float64') fastmodules.allocate_gal_cic(deltar, ran.x, ran.y, ran.z, ran.we, ran.size, self.xmin, self.ymin, self.zmin, self.box, nbins, 1) if verbose: print('Smoothing...') sys.stdout.flush() # We do the smoothing via FFTs rather than scipy's gaussian_filter # because if using several threads for pyfftw it is much faster this way # (if only using 1 thread gains are negligible) rho = deltar + 0.0j fft_obj(input_array=rho, output_array=rhok) fastmodules.mult_norm(rhok, rhok, norm) ifft_obj(input_array=rhok, output_array=rho) deltar = rho.real else: delta = self.delta deltak = self.deltak deltar = self.deltar rho = self.rho rhok = self.rhok psi_x = self.psi_x psi_y = self.psi_y psi_z = self.psi_z fft_obj = self.fft_obj ifft_obj = self.ifft_obj norm = self.norm #fft_obj = pyfftw.FFTW(delta, delta, threads=self.nthreads, axes=[0, 1, 2]) #-- Allocate galaxies and randoms to grid with CIC method #-- using new positions if verbose: print('Allocating galaxies in cells...') sys.stdout.flush() deltag = np.zeros((nbins, nbins, nbins), dtype='float64') fastmodules.allocate_gal_cic(deltag, dat.newx, dat.newy, dat.newz, dat.we, dat.size, self.xmin, self.ymin, self.zmin, self.box, nbins, 1) #deltag = self.allocate_gal_cic(dat) if verbose: print('Smoothing...') sys.stdout.flush() #deltag = gaussian_filter(deltag, smooth/binsize) ##-- Smoothing via FFTs rho = deltag + 0.0j fft_obj(input_array=rho, output_array=rhok) fastmodules.mult_norm(rhok, rhok, norm) ifft_obj(input_array=rhok, output_array=rho) deltag = rho.real if verbose: print('Computing density fluctuations, delta...') sys.stdout.flush() # normalize using the randoms, avoiding possible divide-by-zero errors fastmodules.normalize_delta_survey(delta, deltag, deltar, self.alpha, self.ran_min) del (deltag) # deltag no longer required anywhere if verbose: print('Fourier transforming delta field...') sys.stdout.flush() fft_obj(input_array=delta, output_array=delta) ## -- delta/k**2 k = fftfreq(self.nbins, d=binsize) * 2 * np.pi fastmodules.divide_k2(delta, delta, k) # now solve the basic building block: IFFT[-i k delta(k)/(b k^2)] if verbose: print('Inverse Fourier transforming to get psi...') sys.stdout.flush() fastmodules.mult_kx(deltak, delta, k, bias) ifft_obj(input_array=deltak, output_array=psi_x) fastmodules.mult_ky(deltak, delta, k, bias) ifft_obj(input_array=deltak, output_array=psi_y) fastmodules.mult_kz(deltak, delta, k, bias) ifft_obj(input_array=deltak, output_array=psi_z) # from grid values of Psi_est = IFFT[-i k delta(k)/(b k^2)], compute the values at the galaxy positions if verbose: print('Calculating shifts...') sys.stdout.flush() shift_x, shift_y, shift_z = self.get_shift(dat.newx, dat.newy, dat.newz, psi_x.real, psi_y.real, psi_z.real) #-- for first loop need to approximately remove RSD component #-- from Psi to speed up calculation #-- first loop so want this on original positions (cp), #-- not final ones (np) - doesn't actualy matter if iloop == 0: psi_dot_rhat = (shift_x*dat.x + \ shift_y*dat.y + \ shift_z*dat.z ) /dat.dist shift_x -= beta / (1 + beta) * psi_dot_rhat * dat.x / dat.dist shift_y -= beta / (1 + beta) * psi_dot_rhat * dat.y / dat.dist shift_z -= beta / (1 + beta) * psi_dot_rhat * dat.z / dat.dist #-- remove RSD from original positions (cp) of #-- galaxies to give new positions (np) #-- these positions are then used in next determination of Psi, #-- assumed to not have RSD. #-- the iterative procued then uses the new positions as #-- if they'd been read in from the start psi_dot_rhat = (shift_x * dat.x + shift_y * dat.y + shift_z * dat.z) / dat.dist dat.newx = dat.x + f * psi_dot_rhat * dat.x / dat.dist dat.newy = dat.y + f * psi_dot_rhat * dat.y / dat.dist dat.newz = dat.z + f * psi_dot_rhat * dat.z / dat.dist if verbose: print( 'Debug: first 10 x,y,z shifts and old and new observer distances' ) for i in range(10): oldr = np.sqrt(dat.x[i]**2 + dat.y[i]**2 + dat.z[i]**2) newr = np.sqrt(dat.newx[i]**2 + dat.newy[i]**2 + dat.newz[i]**2) print('%.3f %.3f %.3f %.3f %.3f' % (shift_x[i], shift_y[i], shift_z[i], oldr, newr)) self.deltar = deltar self.delta = delta self.deltak = deltak self.rho = rho self.rhok = rhok self.psi_x = psi_x self.psi_y = psi_y self.psi_z = psi_z self.fft_obj = fft_obj self.ifft_obj = ifft_obj self.norm = norm #-- save wisdom wisdom_file = "wisdom." + str(nbins) + "." + str( self.nthreads) + '.npy' if iloop == 0 and save_wisdom and not os.path.isfile(wisdom_file): wisd = pyfftw.export_wisdom() np.save(wisdom_file, wisd) print('Wisdom saved at', wisdom_file)
dampingFunction = UtilityFunctions.TanhDamping(max_XY).unscaledFunction() damping = dampingFunction(x, y) # Set up arrays to store the density # First two dimensions are spatial, third is time # density = np.zeros(x.shape + tuple([N_TIMESTEPS])) # Set up fft and inverse fft # NOTE: psi must be initialised to psi0 *after* the plan is created. Creation of # the plan may erase the contents of psi! # # When we call fft_object(), psi will be replaced with the fft of psi. The same # is true for ifft_object # Optimal alignment for the CPU if len(sys.argv) > 1: f = open(sys.argv[1]) importStatus = pyfftw.import_wisdom(json.load(f)) if not np.array(importStatus).all(): raise IOError("Wisdom not correctly loaded") if len(sys.argv) > 2: N_THREADS = int(sys.argv[2]) else: N_THREADS = 2 print("N threads: %d" % N_THREADS) al = pyfftw.simd_alignment psi = pyfftw.n_byte_align_empty((N, N), al, 'complex128') flag = 'FFTW_PATIENT' fft_object = pyfftw.FFTW(psi, psi, flags=[flag], axes=(0, 1), threads=N_THREADS) ifft_object = pyfftw.FFTW(psi, psi, flags=[flag], axes=(0, 1), threads=N_THREADS, direction='FFTW_BACKWARD') # copy psi0 into psi. To be safe about keeping the alignment of psi, set all the
if (line.startswith(" ") or line.startswith(")")): wisdom_tuple[-1] += line # append to string else: wisdom_tuple.append(line) wisdom = wisdom_tuple # override return wisdom # if configured to use centuries of fftw wisdom, read the fftw oracle of # delphi (i.e. the wisdom file) - do this on import: if (wisdom_file is not None): wisdom = read_wisdom() if (wisdom is not None): pyfftw.import_wisdom(wisdom) pyfftw_simd_alignment = pyfftw.simd_alignment pyfftw.interfaces.cache.enable() pyfftw.interfaces.cache.set_keepalive_time(300) # keep cache alive for 300 sec # TODO: make this a configurable parameter? def get_num_threads(): """Get number of threads from environment variable. Returns ------- num_threads : int $TFFTW_NUM_THREADS if set, 1 otherwise. """
def __init__(self, **kwargs): """ The following parameters must be specified X_gridDIM - specifying the grid size X_amplitude - maximum value of the coordinates V - potential energy (as a string to be evaluated by numexpr) K - momentum dependent part of the hamiltonian (as a string to be evaluated by numexpr) A - a coordinate dependent Lindblad dissipator (as a string to be evaluated by numexpr) RHS_P_A (optional) -- the correction to the second Ehrenfest theorem due to A B - a momentum dependent Lindblad dissipator (as a string to be evaluated by numexpr) RHS_X_B (optional) -- the correction to the first Ehrenfest theorem due to B diff_V (optional) -- the derivative of the potential energy for the Ehrenfest theorem calculations diff_K (optional) -- the derivative of the kinetic energy for the Ehrenfest theorem calculations t (optional) - initial value of time abs_boundary (optional) -- absorbing boundary (as a string to be evaluated by numexpr) """ # save all attributes for name, value in kwargs.items(): # if the value supplied is a function, then dynamically assign it as a method; if isinstance(value, FunctionType): setattr(self, name, MethodType(value, self)) # otherwise bind it as a property else: setattr(self, name, value) # Check that all attributes were specified try: # make sure self.X_gridDIM has a value of power of 2 assert 2 ** int(np.log2(self.X_gridDIM)) == self.X_gridDIM, \ "A value of the grid size (X_gridDIM) must be a power of 2" except AttributeError: raise AttributeError("Grid size (X_gridDIM) was not specified") try: self.X_amplitude except AttributeError: raise AttributeError("Coordinate range (X_amplitude) was not specified") try: self.V except AttributeError: raise AttributeError("Potential energy (V) was not specified") try: self.K except AttributeError: raise AttributeError("Momentum dependence (K) was not specified") try: self.A except AttributeError: self.A = self.RHS_P_A = "0." warnings.warn("coordinate dependent Lindblad dissipator (A) was not specified so it is set to zero") try: self.B except AttributeError: self.B = self.RHS_X_B = "0." warnings.warn("momentum dependent Lindblad dissipator (B) was not specified so it is set to zero") try: self.dt except AttributeError: raise AttributeError("Time-step (dt) was not specified") try: self.t except AttributeError: warnings.warn("initial time (t) was not specified, thus it is set to zero.") self.t = 0. try: self.abs_boundary except AttributeError: warnings.warn("absorbing boundary (abs_boundary) was not specified, thus it is turned off") self.abs_boundary = "1." ######################################################################################## # # Initialize Fourier transform for efficient calculations # ######################################################################################## # Load FFTW wisdom if saved before try: with open('fftw_wisdom', 'rb') as f: pyfftw.import_wisdom(pickle.load(f)) print("\nFFTW wisdom has been loaded\n") except IOError: pass # allocate the array for density matrix self.rho = pyfftw.empty_aligned((self.X_gridDIM, self.X_gridDIM), dtype=np.complex) # FFTW settings to achive good performace. For details see # https://hgomersall.github.io/pyFFTW/pyfftw/pyfftw.html#pyfftw.FFTW fftw_flags = ('FFTW_MEASURE','FFTW_DESTROY_INPUT') # how many threads to use for parallelized calculation of FFT. # Use the same number of threads as in numexpr fftw_nthreads = ne.nthreads # Create plan to pefrom FFT over the zeroth axis. It is equivalent to # fftpack.fft(self.rho, axis=0, overwrite_x=True) self.rho_fft_ax0 = pyfftw.FFTW( self.rho, self.rho, axes=(0,), direction='FFTW_FORWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_fft_ax1 = pyfftw.FFTW( self.rho, self.rho, axes=(1,), direction='FFTW_FORWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_ifft_ax0 = pyfftw.FFTW( self.rho, self.rho, axes=(0,), direction='FFTW_BACKWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_ifft_ax1 = pyfftw.FFTW( self.rho, self.rho, axes=(1,), direction='FFTW_BACKWARD', flags=fftw_flags, threads=fftw_nthreads ) # Save FFTW wisdom with open('fftw_wisdom', 'wb') as f: pickle.dump(pyfftw.export_wisdom(), f) ######################################################################################## # get coordinate step size self.dX = 2. * self.X_amplitude / self.X_gridDIM # generate coordinate range k = np.arange(self.X_gridDIM) self.k = k[:, np.newaxis] self.k_prime = k[np.newaxis, :] X = (k - self.X_gridDIM / 2) * self.dX self.X = X[:, np.newaxis] self.X_prime = X[np.newaxis, :] # generate momentum range self.dP = np.pi / self.X_amplitude P = (k - self.X_gridDIM / 2) * self.dP self.P = P[:, np.newaxis] self.P_prime = P[np.newaxis, :] # allocate an axillary array needed for propagation self.expV = np.zeros_like(self.rho) # construct the coordinate dependent phase containing the dissipator as well as coherent propagator phase_X = "1j * (({V_X_prime}) - ({V_X})) " \ "+ ({A_X}) * conj({A_X_prime}) - 0.5 * abs({A_X}) ** 2 - 0.5 * abs({A_X_prime}) ** 2".format( V_X_prime=self.V.format(X="X_prime"), V_X=self.V.format(X="X"), A_X_prime=self.A.format(X="X_prime"), A_X=self.A.format(X="X"), ) # numexpr code to calculate (-)**(k + k_prime) * exp(0.5 * dt * F) self.code_expV = "(%s) * (%s) * (-1) ** (k + k_prime) * exp(0.5 * dt * (%s))" % ( self.abs_boundary.format(X="X"), self.abs_boundary.format(X="X_prime"), phase_X ) # construct the coordinate dependent phase containing the dissipator as well as coherent propagator phase_P = "1j * (({K_P_prime}) - ({K_P})) " \ "+ ({B_P}) * conj({B_P_prime}) - 0.5 * abs({B_P}) ** 2 - 0.5 * abs({B_P_prime}) ** 2".format( K_P_prime=self.K.format(P="P_prime"), K_P=self.K.format(P="P"), B_P_prime=self.B.format(P="P_prime"), B_P=self.B.format(P="P"), ) # numexpr code to calculate rho * exp(1j * dt * K) self.code_expK = "rho * exp(dt * (%s))" % phase_P # Check whether the necessary terms are specified to calculate the first-order Ehrenfest theorems try: # Allocate a copy of the wavefunction for storing the density matrix in the momentum representation self.rho_p = pyfftw.empty_aligned(self.rho.shape, dtype=self.rho.dtype) # Create FFT plans to operate on self.rho_p self.rho_p_fft_ax0 = pyfftw.FFTW( self.rho_p, self.rho_p, axes=(0,), direction='FFTW_FORWARD', flags=fftw_flags, threads=fftw_nthreads ) self.rho_p_ifft_ax1 = pyfftw.FFTW( self.rho_p, self.rho_p, axes=(1,), direction='FFTW_BACKWARD', flags=fftw_flags, threads=fftw_nthreads ) # numexpr codes to calculate the First Ehrenfest theorems self.code_V_average = "sum((%s) * density)" % self.V.format(X="X") self.code_K_average = "sum((%s) * density)" % self.K.format(P="P") self.code_X_average = "sum(X * density)" self.code_P_average = "sum(P * density)" self.code_P_average_RHS = "sum((-(%s) + (%s)) * density)" % ( self.diff_V.format(X="X"), self.RHS_P_A.format(X="X") ) self.code_X_average_RHS = "sum(((%s) + (%s)) * density)" % ( self.diff_K.format(P="P"), self.RHS_X_B.format(P="P") ) # Lists where the expectation values of X and P self.X_average = [] self.P_average = [] # Lists where the right hand sides of the Ehrenfest theorems for X and P self.X_average_RHS = [] self.P_average_RHS = [] # List where the expectation value of the Hamiltonian will be calculated self.hamiltonian_average = [] # Flag requesting tha the Ehrenfest theorem calculations self.isEhrenfest = True except AttributeError: # Since self.diff_V and self.diff_K are not specified, # the first Ehrenfest theorem will not be calculated self.isEhrenfest = False
def hilbert_fftw(s, debug=False, dtype = 'complex64'): ''' fftw drop replacement for scipy_fftpack.hilbert beware of sign of returned seq. is '-' http://au.mathworks.com/help/signal/ref/hilbert.html The analytic signal for a sequence x has a one-sided Fourier transform. That is, the transform vanishes for negative frequencies. To approximate the analytic signal, hilbert calculates the FFT of the input sequence, replaces those FFT coefficients that correspond to negative frequencies with zeros, and calculates the inverse FFT of the result. In detail, hilbert uses a four-step algorithm: It calculates the FFT of the input sequence, storing the result in a vector x. It creates a vector h whose elements h(i) have the values: 1 for i = 1, (n/2)+1 2 for i = 2, 3, ... , (n/2) 0 for i = (n/2)+2, ... , n It calculates the element-wise product of x and h. It calculates the inverse FFT of the sequence obtained in step 3 and returns the first n elements of the result. This algorithm was first introduced in [2]. The technique assumes that the input signal, x, is a finite block of data. This assumption allows the function to remove the spectral redundancy in x exactly. Methods based on FIR filtering can only approximate the analytic signal, but they have the advantage that they operate continuously on the data. See Single-Sideband Amplitude Modulation for another example of a Hilbert transform computed with an FIR filter.''' n = len(s) pyfftw.interfaces.cache.enable() pyfftw.interfaces.cache.set_keepalive_time(50.0) align = pyfftw.simd_alignment write_wisdom=False try: wisdom = pickle.load( open( "wisdom_hilbert.wis", "rb" ) ) pyfftw.import_wisdom(wisdom) except: write_wisdom = True print 'no wisdom file' fft_in = pyfftw.empty_aligned(n, dtype=dtype, n=align) fft_out = pyfftw.empty_aligned(n, dtype=dtype, n=align) ifft_in = pyfftw.empty_aligned(n, dtype=dtype, n=align) ifft_out = pyfftw.empty_aligned(n, dtype=dtype, n=align) fft_machine = pyfftw.FFTW(fft_in, fft_out, direction='FFTW_FORWARD', flags=('FFTW_ESTIMATE',), threads=8) ifft_machine = pyfftw.FFTW(ifft_in, ifft_out, direction='FFTW_BACKWARD', flags=('FFTW_ESTIMATE',), threads=8) if write_wisdom: wisdom = pyfftw.export_wisdom() pickle.dump( wisdom, open( "wisdom_hilbert.wis", "wb" ) ) s_0 = time.time() fft_in[:] = s S = fft_machine() if debug: print 'fft', time.time()-s_0 s_1 = time.time() h = np.zeros(n) h[0] = 1. h[n/2] = 1. h[1:n/2] = 2. if debug: print 'setup', time.time()-s_1 s_2 = time.time() ifft_in[:] = h*S ret = ifft_machine() if debug: print 'ifft', time.time()-s_2 return -ret[:n].imag
fourier method """ from __future__ import division from UtilityFunctions import TanhDamping import numpy as np import pyfftw import json import os from copy import deepcopy WISDOM_LOCATION = os.path.join(os.path.expanduser('~'), '.wisdom', 'wisdom') FLAG = 'FFTW_PATIENT' # Load wisdom? try: wisdomFile = open(WISDOM_LOCATION, 'r+') importStatus = pyfftw.import_wisdom(json.load(wisdomFile)) print "Wisdom found" if not np.array(importStatus).all(): print "Wisdom not loaded correctly" # raise Warning("Wisdom not loaded correctly.") wisdomFile.close() except IOError: print "Wisdom not present" # raise Warning("Wisdom not present.") # TODO: Equations should be defined without the i # TODO: Allow us to specify whether fields should be complex # TODO: Account for discrepancy between D(k^2) and D(\nabla^2) class Equation(object):
def suns_online_track(filename_video, filename_CNN, Params_pre, Params_post, dims, \ frames_init, merge_every, batch_size_init=1, useSF=True, useTF=True, useSNR=True, \ med_subtract=False, update_baseline=False, \ useWT=False, prealloc=True, display=True, useMP=True, p=None): '''The complete SUNS online procedure with tracking. It uses the trained CNN model from "filename_CNN" and the optimized hyper-parameters in "Params_post" to process the video "Exp_ID" in "dir_video" Inputs: filename_video (str): The path of the file of the input raw video. The file must be a ".h5" file, with dataset "mov" being the input video (shape = (T0,Lx0,Ly0)). filename_CNN (str): The path of the trained CNN model. Params_pre (dict): Parameters for pre-processing. Params_pre['gauss_filt_size'] (float): The standard deviation of the spatial Gaussian filter in pixels Params_pre['Poisson_filt'] (1D numpy.ndarray of float32): The temporal filter kernel Params_pre['num_median_approx'] (int): Number of frames used to compute the median and median-based standard deviation Params_pre['nn'] (int): Number of frames at the beginning of the video to be processed. The remaining video is not considered a part of the input video. Params_post (dict): Parameters for post-processing. Params_post['minArea']: Minimum area of a valid neuron mask (unit: pixels). Params_post['avgArea']: The typical neuron area (unit: pixels). Params_post['thresh_pmap']: The probablity threshold. Values higher than thresh_pmap are active pixels. It is stored in uint8, so it should be converted to float32 before using. Params_post['thresh_mask']: Threashold to binarize the real-number mask. Params_post['thresh_COM0']: Threshold of COM distance (unit: pixels) used for the first COM-based merging. Params_post['thresh_COM']: Threshold of COM distance (unit: pixels) used for the second COM-based merging. Params_post['thresh_IOU']: Threshold of IOU used for merging neurons. Params_post['thresh_consume']: Threshold of consume ratio used for merging neurons. Params_post['cons']: Minimum number of consecutive frames that a neuron should be active for. dims (tuplel of int, shape = (2,)): lateral dimension of the raw video. frames_init (int): Number of frames used for initialization. merge_every (int): SUNS online merge the newly segmented frames every "merge_every" frames. batch_size_init (int, default to 1): batch size of CNN inference for initialization frames. useSF (bool, default to True): True if spatial filtering is used. useTF (bool, default to True): True if temporal filtering is used. useSNR (bool, default to True): True if pixel-by-pixel SNR normalization filtering is used. med_subtract (bool, default to False): True if the spatial median of every frame is subtracted before temporal filtering. Can only be used when spatial filtering is not used. update_baseline (bool, default to False): True if the median and median-based std is updated every "frames_init" frames. useWT (bool, default to False): Indicator of whether watershed is used. prealloc (bool, default to True): True if pre-allocate memory space for large variables. Achieve faster speed at the cost of higher memory occupation. display (bool, default to True): Indicator of whether to show intermediate information useMP (bool, defaut to True): indicator of whether multiprocessing is used to speed up. p (multiprocessing.Pool, default to None): Outputs: Masks (3D numpy.ndarray of bool, shape = (n,Lx0,Ly0)): the final segmented masks. Masks_2 (scipy.csr_matrix of bool, shape = (n,Lx0*Ly0)): the final segmented masks in the form of sparse matrix. time_total (list of float, shape = (3,)): the total time spent for initalization, online processing, and total processing time_frame (list of float, shape = (3,)): the average time spent on every frame for initalization, online processing, and total processing ''' if display: start = time.time() (Lx, Ly) = dims # zero-pad the lateral dimensions to multiples of 8, suitable for CNN rowspad = math.ceil(Lx / 8) * 8 colspad = math.ceil(Ly / 8) * 8 dimspad = (rowspad, colspad) Poisson_filt = Params_pre['Poisson_filt'] gauss_filt_size = Params_pre['gauss_filt_size'] nn = Params_pre['nn'] leng_tf = Poisson_filt.size leng_past = 2 * leng_tf # number of past frames stored for temporal filtering list_time_per = np.zeros(nn) # Load CNN model fff = get_shallow_unet() fff.load_weights(filename_CNN) # run CNN inference once to warm up init_imgs = np.zeros((batch_size_init, rowspad, colspad, 1), dtype='float32') init_masks = np.zeros((batch_size_init, rowspad, colspad, 1), dtype='uint8') fff.evaluate(init_imgs, init_masks, batch_size=batch_size_init) del init_imgs, init_masks # load optimal post-processing parameters minArea = Params_post['minArea'] avgArea = Params_post['avgArea'] # thresh_pmap = Params_post['thresh_pmap'] thresh_mask = Params_post['thresh_mask'] # thresh_COM0 = Params_post['thresh_COM0'] # thresh_COM = Params_post['thresh_COM'] thresh_IOU = Params_post['thresh_IOU'] thresh_consume = Params_post['thresh_consume'] # cons = Params_post['cons'] # thresh_pmap_float = (Params_post['thresh_pmap']+1.5)/256 thresh_pmap_float = (Params_post['thresh_pmap'] + 1) / 256 # for published version # Spatial filtering preparation if useSF == True: # lateral dimensions slightly larger than the raw video but faster for FFT rows1 = cv2.getOptimalDFTSize(rowspad) cols1 = cv2.getOptimalDFTSize(colspad) # if the learned 2D and 3D wisdom files have been saved, load them. # Otherwise, learn wisdom later Length_data2 = str((rows1, cols1)) cc2 = load_wisdom_txt('wisdom\\' + Length_data2) Length_data3 = str((frames_init, rows1, cols1)) cc3 = load_wisdom_txt('wisdom\\' + Length_data3) if cc3: pyfftw.import_wisdom(cc3) # mask for spatial filter mask2 = plan_mask2(dims, (rows1, cols1), gauss_filt_size) # FFT planning (bb, bf, fft_object_b, fft_object_c) = plan_fft(frames_init, (rows1, cols1), prealloc) else: (mask2, bf, fft_object_b, fft_object_c) = (None, None, None, None) bb = np.zeros((frames_init, rowspad, colspad), dtype='float32') # Temporal filtering preparation frames_initf = frames_init - leng_tf + 1 if useTF == True: if prealloc: # past frames stored for temporal filtering past_frames = np.ones((leng_past, rowspad, colspad), dtype='float32') else: past_frames = np.zeros((leng_past, rowspad, colspad), dtype='float32') else: past_frames = None if prealloc: # Pre-allocate memory for some future variables med_frame2 = np.ones((rowspad, colspad, 2), dtype='float32') video_input = np.ones((frames_initf, rowspad, colspad), dtype='float32') pmaps_b_init = np.ones((frames_initf, Lx, Ly), dtype='uint8') frame_SNR = np.ones(dimspad, dtype='float32') pmaps_b = np.ones(dims, dtype='uint8') if update_baseline: video_tf_past = np.ones((frames_init, rowspad, colspad), dtype='float32') else: med_frame2 = np.zeros((rowspad, colspad, 2), dtype='float32') video_input = np.zeros((frames_initf, rowspad, colspad), dtype='float32') pmaps_b_init = np.zeros((frames_initf, Lx, Ly), dtype='uint8') frame_SNR = np.zeros(dimspad, dtype='float32') pmaps_b = np.zeros(dims, dtype='uint8') if update_baseline: video_tf_past = np.zeros((frames_init, rowspad, colspad), dtype='float32') if display: time_init = time.time() print('Parameter initialization time: {} s'.format(time_init - start)) # %% Load raw video h5_img = h5py.File(filename_video, 'r') video_raw = np.array(h5_img['mov']) h5_img.close() nframes = video_raw.shape[0] nframesf = nframes - leng_tf + 1 bb[:, :Lx, :Ly] = video_raw[:frames_init] if display: time_load = time.time() print('Load data: {} s'.format(time_load - time_init)) # %% Actual processing starts after the video is loaded into memory # Initialization using the first "frames_init" frames print('Initialization of algorithms using the first {} frames'.format( frames_init)) if display: start_init = time.time() med_frame3, segs_all, recent_frames = init_online( bb, dims, video_input, pmaps_b_init, fff, thresh_pmap_float, Params_post, \ med_frame2, mask2, bf, fft_object_b, fft_object_c, Poisson_filt, \ useSF=useSF, useTF=useTF, useSNR=useSNR, med_subtract=med_subtract, \ useWT=useWT, batch_size_init=batch_size_init, p=p) if useTF == True: past_frames[:leng_tf] = recent_frames tuple_temp = merge_complete(segs_all[:frames_initf], dims, Params_post) # Initialize Online track variables (Masksb_temp, masks_temp, times_temp, area_temp, have_cons_temp) = tuple_temp # list of previously found neurons that satisfy consecutive frame requirement Masks_cons = select_cons(tuple_temp) # sparse matrix of previously found neurons that satisfy consecutive frame requirement Masks_cons_2D = sparse.vstack(Masks_cons) # indices of previously found neurons that satisfy consecutive frame requirement ind_cons = have_cons_temp.nonzero()[0] segs0 = segs_all[0] # segs of initialization frames # segs if no neurons are found segs_empty = (segs0[0][0:0], segs0[1][0:0], segs0[2][0:0], segs0[3][0:0]) # Number of previously found neurons that satisfy consecutive frame requirement N1 = len(Masks_cons) # list of "segs" for neurons that are not previously found list_segs_new = [] # list of newly segmented masks for old neurons (segmented in previous frames) list_masks_old = [[] for _ in range(N1)] # list of the newly active indices of frames of old neurons times_active_old = [[] for _ in range(N1)] # True if the old neurons are active in the previous frame active_old_previous = np.zeros(N1, dtype='bool') if display: end_init = time.time() time_init = end_init - start_init time_frame_init = time_init / (frames_initf) * 1000 print('Initialization time: {:6f} s, {:6f} ms/frame'.format( time_init, time_frame_init)) if display: start_online = time.time() # Spatial filtering preparation for online processing. # Attention: this part counts to the total time if useSF: if cc2: pyfftw.import_wisdom(cc2) (bb, bf, fft_object_b, fft_object_c) = plan_fft2((rows1, cols1)) else: (bf, fft_object_b, fft_object_c) = (None, None, None) bb = np.zeros(dimspad, dtype='float32') print('Start frame by frame processing') # %% Online processing for the following frames current_frame = leng_tf + 1 t_merge = frames_initf for t in range(frames_initf, nframesf): if display: start_frame = time.time() # load the current frame bb[:Lx, :Ly] = video_raw[t + leng_tf - 1] bb[Lx:, :] = 0 bb[:, Ly:] = 0 # PreProcessing frame_SNR, frame_tf = preprocess_online(bb, dimspad, med_frame3, frame_SNR, \ past_frames[current_frame-leng_tf:current_frame], mask2, bf, fft_object_b, fft_object_c, \ Poisson_filt, useSF=useSF, useTF=useTF, useSNR=useSNR, \ med_subtract=med_subtract, update_baseline=update_baseline) if update_baseline: t_past = (t - frames_initf) % frames_init video_tf_past[t_past] = frame_tf if t_past == frames_init - 1: # update median and median-based standard deviation every "frames_init" frames if useSNR: med_frame3 = SNR_normalization(video_tf_past, med_frame2, (rowspad, colspad), 1, display=False) else: med_frame3 = median_normalization(video_tf_past, med_frame2, (rowspad, colspad), 1, display=False) # CNN inference frame_prob = CNN_online(frame_SNR, fff, dims) # first step of post-processing segs = separate_neuron_online(frame_prob, pmaps_b, thresh_pmap_float, minArea, avgArea, useWT) active_old = np.zeros( N1, dtype='bool' ) # True if the old neurons are active in the current frame masks_t, neuronstate_t, cents_t, areas_t = segs N2 = neuronstate_t.size if N2: # Try to merge the new masks to old neurons new_found = np.zeros(N2, dtype='bool') for n2 in range(N2): masks_t2 = masks_t[n2] cents_t2 = np.round(cents_t[n2, 1]) * Ly + np.round(cents_t[n2, 0]) # If a new masks belongs to an old neuron, the COM of the new mask must be inside the old neuron area. # Select possible old neurons that the new mask can merge to possible_masks1 = Masks_cons_2D[:, cents_t2].nonzero()[0] IOUs = np.zeros(len(possible_masks1)) areas_t2 = areas_t[n2] for (ind, n1) in enumerate(possible_masks1): # Calculate IoU and consume ratio to determine merged neurons area_i = Masks_cons[n1].multiply(masks_t2).nnz area_temp1 = area_temp[n1] area_u = area_temp1 + areas_t2 - area_i IOU = area_i / area_u consume = area_i / min(area_temp1, areas_t2) contain = (IOU >= thresh_IOU) or (consume >= thresh_consume) if contain: # merging criterion satisfied IOUs[ind] = IOU num_contains = IOUs.nonzero()[0].size if num_contains: # The new mask can merge to one of the old neurons. # If there are multiple candicates, choose the one with the highest IoU belongs = possible_masks1[IOUs.argmax()] # merge the mask and active frame index list_masks_old[belongs].append(masks_t2) times_active_old[belongs].append(t + frames_initf) # This old neurons is active in the current frame active_old[belongs] = True else: # The new mask can not merge to any old neuron. new_found[n2] = True if np.any( new_found ): # There are some new masks that can not merge to old neurons segs_new = (masks_t[new_found], neuronstate_t[new_found], cents_t[new_found], areas_t[new_found]) else: # All masks already merged to old neurons segs_new = segs_empty else: # No neurons fould in the current frame segs_new = segs list_segs_new.append(segs_new) if (t + 1 - t_merge) != merge_every or t == (nframesf - 1): # Update the old neurons with new appearances in the current frame. if t < (nframesf - 1): # True if the neurons are active in the previous frame but not active in the current frame inactive = np.logical_and( active_old_previous, np.logical_not(active_old)).nonzero()[0] else: # last frame # All active neurons should be updated, so they are treated as inactive in the next frame inactive = active_old_previous.nonzero()[0] # Update the indicators of the previous frame using the current frame active_old_previous = active_old.copy() for n1 in inactive: # merge new active frames to existing active frames for already found neurons # n1 is the index in the old neurons that satisfy consecutive frame requirement. # n10 is the index in all old neurons. n10 = ind_cons[n1] # Add all the new masks to the overall real-number masks mask_update = masks_temp[n10] + sum(list_masks_old[n1]) masks_temp[n10] = mask_update # Add indices of active frames times_add = np.unique(np.array(times_active_old[n1])) times_temp[n10] = np.hstack([times_temp[n10], times_add]) # reset lists used to store the information from new frames related to old neurons list_masks_old[n1] = [] times_active_old[n1] = [] # update the binary masks and areas Maskb_update = mask_update >= mask_update.max() * thresh_mask Masksb_temp[n10] = Maskb_update Masks_cons[n1] = Maskb_update area_temp[n10] = Maskb_update.nnz if inactive.size: Masks_cons_2D = sparse.vstack(Masks_cons) if (t + 1 - t_merge) == merge_every or t == (nframesf - 1): if t < (nframesf - 1): # delay merging new frame to next frame by assuming all the neurons active in the previous frame # are still active in the current frame, to reserve merging time for new neurons active_old_previous = np.logical_or(active_old_previous, active_old) # merge new neurons with old masks that do not satisfy consecutive frame requirement tuple_temp = (Masksb_temp, masks_temp, times_temp, area_temp, have_cons_temp) # merge the remaining new masks from the most recent "merge_every" frames tuple_add = merge_complete(list_segs_new, dims, Params_post) (Masksb_add, masks_add, times_add, area_add, have_cons_add) = tuple_add # update the indices of active frames times_add = [x + merge_every for x in times_add] tuple_add = (Masksb_add, masks_add, times_add, area_add, have_cons_add) # merge the remaining new masks with the existing masks that do not satisfy consecutive frame requirement tuple_temp = merge_2_nocons(tuple_temp, tuple_add, dims, Params_post) (Masksb_temp, masks_temp, times_temp, area_temp, have_cons_temp) = tuple_temp # Update the indices of old neurons that satisfy consecutive frame requirement ind_cons_new = have_cons_temp.nonzero()[0] for (ind, ind_cons_0) in enumerate(ind_cons_new): if ind_cons_0 not in ind_cons: # update lists used to store the information from new frames related to old neurons if ind_cons_0 > ind_cons.max(): list_masks_old.append([]) times_active_old.append([]) else: list_masks_old.insert(ind, []) times_active_old.insert(ind, []) # Update the list of previously found neurons that satisfy consecutive frame requirement Masks_cons = select_cons(tuple_temp) Masks_cons_2D = sparse.vstack(Masks_cons) N1 = len(Masks_cons) list_segs_new = [] # Update whether the old neurons are active in the previous frame active_old_previous = np.zeros_like(have_cons_temp) active_old_previous[ind_cons] = active_old active_old_previous = active_old_previous[ind_cons_new] ind_cons = ind_cons_new t_merge += merge_every current_frame += 1 # Update the stored latest frames when it runs out: move them "leng_tf" ahead if current_frame > leng_past: current_frame = leng_tf + 1 past_frames[:leng_tf] = past_frames[-leng_tf:] if display: end_frame = time.time() list_time_per[t] = end_frame - start_frame if t % 1000 == 0: print('{} frames has been processed'.format(t)) Masks_cons = select_cons(tuple_temp) # final result. Masks_2 is a 2D sparse matrix of the segmented neurons if len(Masks_cons): Masks_2 = sparse.vstack(Masks_cons) else: Masks_2 = sparse.csc_matrix((0, dims[0] * dims[1])) if display: end_online = time.time() time_online = end_online - start_online time_frame_online = time_online / (nframesf - frames_initf) * 1000 print('Online time: {:6f} s, {:6f} ms/frame'.format( time_online, time_frame_online)) # Save total processing time, and average processing time per frame if display: end_final = time.time() time_all = end_final - start_init time_frame_all = time_all / nframes * 1000 print('Total time: {:6f} s, {:6f} ms/frame'.format( time_all, time_frame_all)) time_total = np.array([time_init, time_online, time_all]) time_frame = np.array( [time_frame_init, time_frame_online, time_frame_all]) else: time_total = np.zeros((3, )) time_frame = np.zeros((3, )) # convert to a 3D array of the segmented neurons Masks = np.reshape(Masks_2.toarray(), (Masks_2.shape[0], Lx, Ly)).astype('bool') return Masks, Masks_2, time_total, time_frame
def __init__(self, spatialGrid, kGrid, damping='default', FFTW_METHOD='FFTW_PATIENT', N_THREADS=6): """ Initialise an instance of a GPESolver. Parameters: spatialGrid: A tuple (x, y) representing the spatial grid that the simulation will be performed on. This grid should be scaled to units of the characteristic length defined in ParameterContainer. kGrid: A tuple (k_x, k_y) representing the k-space grid corresponding to the (x, y) grid. The scaling of this grid should correspond to that of the (x, y) grid. That is, it should be scaled to units of the inverse of the characterestic length defined in ParameterContainer. damping: The damping method to use in order to suppress the implicit periodic boundary conditions. Default is a tanh function that drops from 1.0 to 0 over the last 10% of each dimension. Presently, the only other option is "None", which means no damping, and hence periodic boundary conditions FFTW_METHOD: The method for FFTW to use when planning the transforms FFTW_PATIENT, FFTW_EXHAUSTIVE and FFTW_MEASURE will result in faster transforms, but may take a significant amount of time to plan N_THREADS: The number of threads for FFTW to use. Currently 2 threads seems to give the lowest time per step (8 core computer). Increasing this may greatly increase the time that FFTW requires to plan the transforms. """ # Load any existing wisdom try: wisdomFile = open(WISDOM_LOCATION, 'r+') importStatus = pyfftw.import_wisdom(json.load(wisdomFile)) print "Wisdom found" if not np.array(importStatus).all(): print "Wisdom not loaded correctly" # raise Warning("Wisdom not loaded correctly.") wisdomFile.close() except IOError: print "Wisdom not present" # raise Warning("Wisdom not present.") self.x, self.y = spatialGrid self.kx, self.ky = kGrid self.K = self.kx ** 2 + self.ky ** 2 # This is already scaled because we obtained it from the scaled grid. self.max_XY = np.abs(self.x[-1, -1]) self.N = self.x.shape[0] self.N_THREADS = N_THREADS self.time = 0 # TODO: Allow for rectangular grids. assert self.x.shape == self.y.shape, "Spatial grids are not the same\ shape" assert self.kx.shape == self.ky.shape, "k grids are not the same shape" assert self.x.shape == self.kx.shape, "Spatial grids are not the same\ shape as k grid" assert self.x.shape == (self.N, self.N), 'Grid must be square.' if damping == 'default': tanhDamping = UtilityFunctions.RadialTanhDamping(self.max_XY) # We can use the unscaled function here because max_XY is already # scaled. # A damping mask self.damping = tanhDamping.unscaledFunction()(self.x, self.y) # Set up fftw objects # Optimal alignment for this CPU self.al = pyfftw.simd_alignment self.psi = pyfftw.n_byte_align_empty((self.N, self.N), self.al, 'complex128') flag = FFTW_METHOD self.fft_object = pyfftw.FFTW(self.psi, self.psi, flags=[flag], axes=(0, 1), threads=N_THREADS) self.ifft_object = pyfftw.FFTW(self.psi, self.psi, flags=[flag], axes=(0, 1), threads=N_THREADS, direction='FFTW_BACKWARD') # Save pyfftw's newfound wisdom f = open(WISDOM_LOCATION, 'w+') json.dump(pyfftw.export_wisdom(), f) f.close()