def set_numexpr_threads(n=None): # if we are using numexpr, set the threads to n # otherwise reset if _NUMEXPR_INSTALLED and _USE_NUMEXPR: if n is None: n = ne.detect_number_of_cores() ne.set_num_threads(n)
def calc_energy(data): """Calculate the energy of the entire system. :Parameters: **data** -- the standard python data dictionary """ #name relevant variables x = data['x'] y = data['y'] nv = data['nv'] rho = 1000 #density of water #initialize EK to zero l = nv.shape[0] area = np.zeros(l) for i in xrange(l): #first, calculate the area of the triangle xCoords = x[nv[i,:]] yCoords = y[nv[i,:]] #Compute two vectors for the area calculation. v1x = xCoords[1] - xCoords[0] v2x = xCoords[2] - xCoords[0] v1y = yCoords[1] - yCoords[0] v2y = yCoords[2] - yCoords[0] #calculate the area as the determinant area[i] = abs(v1x*v2y - v2x*v1y) #get a vector of speeds. sdata = calc_speed(data) speed = sdata['speed'] #calculate EK, use numexpr for speed (saves ~15 seconds) ne.set_num_threads(ne.detect_number_of_cores()) ek = ne.evaluate("sum(rho * area * speed * speed, 1)") ek = ek / 4 data['ek'] = ek return data
def initSysVar(fgView=False): ''' 初始化系统环境参数,以及部分全局变量 ''' #----------设置绘图&数据输出格式 mpl.style.use('seaborn-whitegrid') pd.set_option('display.width', 450) # zsys.tim0_sys = arrow.now() zsys.tim0_str = zsys.tim0_sys.format('YYYY-MM-DD HH:mm:ss') #-------------- zsys.cpu_num_core = psu.cpu_count(logical=False) zsys.cpu_num9 = psu.cpu_count() zsys.cpu_num = round(zsys.cpu_num9 * 0.8) ne.set_num_threads(zsys.cpu_num) # if fgView: print('cpu_num_core:', zsys.cpu_num_core) print('cpu_num9:', zsys.cpu_num9) print('cpu_num:', zsys.cpu_num) # print('tim0_str:', zsys.tim0_str) print('tim0_sys:', zsys.tim0_sys) # print('tim0.year:', zsys.tim0_sys.year) print('tim0.month:', zsys.tim0_sys.month) print('tim0.day:', zsys.tim0_sys.day) # print('tim0.shift(-2):', zsys.tim0_sys.shift(days=-2)) print('tim0.shift(2):', zsys.tim0_sys.shift(days=2))
def numexpr_darts(nd=200000, nprocs=1): """'nd' is the number of darts. 'nprocs' is number of processors. """ ne.set_num_threads(nprocs) # Let us define a numpy version of a throw_dart helper function def throw_dart(): ''' Throw "n" darts at a square of length 1.0, and return how many times they landed in a concentric circle of radius 0.5. ''' x = np.random.uniform(size = nd) y = np.random.uniform(size = nd) expr1 = ne.evaluate('(x - 0.5)**2 + (y - 0.5)**2') radius = np.sqrt(expr1) in_or_out = ne.evaluate('radius <= 0.5') in_or_out = in_or_out.astype(np.int) total_inside = ne.evaluate('sum(in_or_out)') return total_inside number_of_darts_in_circle = 0 # Record the simulation time start_time = time() number_of_darts_in_circle = throw_dart() end_time = time() execution_time = end_time - start_time number_of_darts = nd pi_approx = 4 * number_of_darts_in_circle / float(number_of_darts) # Return a tuple containing: # (0) Number of darts # (1) Execution time # (2) Darts Thrown per second return (number_of_darts, execution_time, number_of_darts / execution_time)
def start_kernel(self): self.DTYPE = self._dtype self._G = getattr(np, self.DTYPE)(self._G) ne.set_num_threads(ne.detect_number_of_cores()) # Get const values self.MASS_LEN = len(self) self.SIM_DIM = len(self._mass_list[0]._r) # Allocate memory: Object parameters self.mass_r_array = np.zeros((self.MASS_LEN, self.SIM_DIM), dtype=self.DTYPE) self.mass_a_array = np.zeros((self.MASS_LEN, self.SIM_DIM), dtype=self.DTYPE) self.mass_m_array = np.zeros((self.MASS_LEN, ), dtype=self.DTYPE) # Copy const data into Numpy infrastructure for pm_index, pm in enumerate(self._mass_list): self.mass_m_array[pm_index] = pm._m # Allocate memory: Temporary variables self.relative_r = np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE) self.distance_sq = np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE) self.distance_sqv = np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE) self.distance_inv = np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE) self.a_factor = np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE) self.a1 = np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE) self.a1r = np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE) self.a1v = np.zeros((self.SIM_DIM, ), dtype=self.DTYPE) self.a2 = np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE) self.a2r = np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE)
def compute(x, nt): if what == "numpy": y = compute_parallel(expr, x, nt) else: ne.set_num_threads(nt) y = ne.evaluate(expr) return y
def apply_spacetime_smoothing(self): """ Apply spacetime smoothing to linear models. """ if self.log_results: self.warnings.time_stamp("Apply spacetime smoothing") self.st_models.apply_smoothing( self.mod_inputs.data_frame, self.mod_inputs.knockouts, self.mod_inputs.omega_age_smooth, self.mod_inputs.lambda_time_smooth, self.mod_inputs.lambda_time_smooth_nodata, self.mod_inputs.zeta_space_smooth, self.mod_inputs.zeta_space_smooth_nodata) if self.log_results: self.warnings.time_stamp("Reset residuals") self.st_models.reset_residuals(self.mod_inputs.data_frame, self.mod_inputs.knockouts, self.mod_inputs.response_list) if self.debug_mode: self.st_models.pred_mat_st = self.st_models.all_models[ -1].st_smooth_mat.copy() else: pass ne.set_num_threads(1)
def test_changing_nthreads_01_dec(self): a = linspace(-1, 1, 1e6) b = ((.25 * a + .75) * a - 1.5) * a - 2 for nthreads in range(6, 1, -1): numexpr.set_num_threads(nthreads) c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c)
def test_profiling_disables_threadpools(tmpdir): """ Memory profiling disables thread pools, then restores them when done. """ cwd = os.getcwd() os.chdir(tmpdir) import numexpr import blosc numexpr.set_num_threads(3) blosc.set_nthreads(3) with threadpoolctl.threadpool_limits(3, "blas"): with run_with_profile(): assert numexpr.set_num_threads(2) == 1 assert blosc.set_nthreads(2) == 1 for d in threadpoolctl.threadpool_info(): assert d["num_threads"] == 1, d # Resets when done: assert numexpr.set_num_threads(2) == 3 assert blosc.set_nthreads(2) == 3 for d in threadpoolctl.threadpool_info(): if d["user_api"] == "blas": assert d["num_threads"] == 3, d
def test_changing_nthreads_01_dec(self): a = linspace(-1, 1, 1e6) b = ((.25*a + .75)*a - 1.5)*a - 2 for nthreads in range(6, 1, -1): numexpr.set_num_threads(nthreads) c = evaluate("((.25*a + .75)*a - 1.5)*a - 2") assert_array_almost_equal(b, c)
def payoff(self, central=None, i=0.001, n=1000): if central == None: central = self.strike #increment #i = 0.001 start = -n end = n + 1 s = np.arange(start, end, 1, float) * float(i) + float(central) k = np.ones(end - start, float) * float(self.strike) order = np.ones(end - start, float) * (-1.) if self.order.lower( ) == "sell" else np.ones(end - start, float) p = np.ones(end - start, float) * float(self.price) amount = np.ones(end - start) * self.amount print(s) #call if self.product.lower() == "call": f = "amount*order*((abs(s-k)+abs(s+k))/2-k-p)" #put else: f = "amount*order*((abs(s-k)+abs(s+k))/2-s-p)" ne.set_num_threads(4) #return np.column_stack((s,ne.evaluate(f))) return s, ne.evaluate(f)
def _start_worker(object, proc_id, affinity, worker_queue, pause_on_start=False): """ Helper method for worker process startup. ets up affinity, and calls _run_worker method on object with the specified work queue. Args: object: proc_id: affinity: work_queue: Returns: """ if pause_on_start: os.kill(os.getpid(), signal.SIGSTOP) numexpr.set_num_threads( 1) # no sub-threads in workers, as it messes with everything _pyArrays.pySetOMPNumThreads(1) _pyArrays.pySetOMPDynamicNumThreads(1) AsyncProcessPool.proc_id = proc_id MyLogger.subprocess_id = proc_id if affinity: psutil.Process().cpu_affinity(affinity) object._run_worker(worker_queue) if object.verbose: print >> log, ModColor.Str("exiting worker pid %d" % os.getpid())
def loop_3(loops, threads): # using numexpr and multiple threads start_time = time.time() ne.set_num_threads(threads) a = np.arange(1, loops) f = '3 * log(a) + cos(a) ** 2' r = ne.evaluate(f) print "Execution time is %5.3fs" % (time.time() - start_time)
def _great_circle_distance_fast(ra1, dec1, ra2, dec2, nthreads): """ Computes the `great circle distance` between two points. Parameters ---------- ra1, dec1 : array_like Right Ascension and Declination of the 1st point. Units in `degrees`. ra2, dec2 : array_like Right Ascension and Declination of the 2nd point. Units in `degrees`. nthreads : int Number of threads to use for calculation Returns ---------- great_circle_dist : float Great Circle distance between the 1st and 2nd location Notes ---------- This function uses a vincenty distance. For more information see: https://en.wikipedia.org/wiki/Vincenty%27s_formulae It is a bit slower than others, but numerically stable. """ import numexpr as ne ## ## Terminology from the Vincenty Formula - `lambda` and `phi` and ## `standpoint` and `forepoint` lambs = np.radians(ra1) phis = np.radians(dec1) lambf = np.radians(ra2) phif = np.radians(dec2) # Calculations dlamb = lambf - lambs ## Number of Threads ne.set_num_threads(nthreads) ## Constants for evaluation # Calculate these ones instead of few times! hold1 = ne.evaluate('sin(phif)') hold2 = ne.evaluate('sin(phis)') hold3 = ne.evaluate('cos(phif)') hold4 = ne.evaluate('cos(dlamb)') hold5 = ne.evaluate('cos(phis)') numera = ne.evaluate('hold3 * sin(dlamb)') numerb = ne.evaluate('hold5 * hold1 - hold2 * hold3 * hold4') numer = ne.evaluate('sqrt(numera**2 + numerb**2)') denom = ne.evaluate('hold2 * hold1 + hold5 * hold3 * hold4') pi = math.pi # Great Circle Distance great_circle_dist = ne.evaluate('(arctan2(numer, denom))*180.0/pi') return great_circle_dist
def nExpr(np_a, thread): ne.set_num_threads(thread) start = time.time() f = getExpr() r = ne.evaluate(f) end = time.time() print("Time elapsed with numexpression [{} threaded] is:{}".format( thread, end - start)) return [round(elem, 8) for elem in r]
def asarray(self, binpoint=13): ne.set_num_threads(32) try: data = self.data.reshape(-1, self._nCh).numpy() except: data = self.data.reshape(-1, self._nCh) _scale = np.float32(2**binpoint) self.data = torch.from_numpy(ne.evaluate('data/_scale')) return self.data.numpy()
def __init__(self, endog, exog, **kwargs): super(NLRQ, self).__init__(endog, exog, **kwargs) ne.set_num_threads(8) self._initialize() self.PreEstimation = EventHook() self.PostVarianceCalculation = EventHook() self.PostEstimation = EventHook() self.PostInnerStep = EventHook() self.PostOuterStep = EventHook()
def execute(threadCount): n = 100000000 # 10 times fewer than C due to speed issues. delta = 1.0 / n startTime = time() set_num_threads(threadCount) value = arange(n, dtype=double) pi = 4.0 * delta * evaluate('1.0 / (1.0 + ((value - 0.5) * delta) ** 2)').sum() elapseTime = time() - startTime out(__file__, pi, n, elapseTime, threadCount)
def __init__(self, Precision="D", NCPU=6): self.NCPU = NCPU ne.set_num_threads(self.NCPU) if Precision == "D": self.CType = np.complex128 self.FType = np.float64 if Precision == "S": self.CType = np.complex64 self.FType = np.float32
def single_threaded(A_np, expression): """ Args: A_np: <np.Array> to evaluate expression on expression: <str> expression to evaluate Returns: Result of single threaded evaluation of given expression """ ne.set_num_threads(1) return ne.evaluate(expression)
def execute(threadCount): n = 100000000 # 10 times fewer than C due to speed issues. delta = 1.0 / n startTime = time() set_num_threads(threadCount) value = arange(n, dtype=double) pi = 4.0 * delta * evaluate( '1.0 / (1.0 + ((value - 0.5) * delta) ** 2)').sum() elapseTime = time() - startTime out(__file__, pi, n, elapseTime, threadCount)
def main() -> None: """ Parse arguments and call the function depending on if oneshot or fullmodel subcommand was used, passing the parsed arguments. """ parser = _setup_parser() args = parser.parse_args() if args.cores: numexpr.set_num_threads(args.cores) args.func(args)
def p_haversine_pairwise(t1, t2, threads=4): import numexpr as ne """ Calculate the great circle distance between two points on the earth (specified in decimal degrees) All args must be of equal length. """ lon1, lat1, lon2, lat2 = map( np.radians, [t1.v('lon'), t1.v('lat'), t2.v('lon'), t2.v('lat')]) n1 = lon1.size n2 = lon2.size lon1g = np.tile(lon1, (n2, 1)) lat1g = np.tile(lat1, (n2, 1)) lon2g = np.tile(np.array([lon2]).T, (1, n1)) lat2g = np.tile(np.array([lat2]).T, (1, n1)) lon1v = lon1g.reshape(n2 * n1) lat1v = lat1g.reshape(n2 * n1) lon2v = lon2g.reshape(n2 * n1) lat2v = lat2g.reshape(n2 * n1) dlon = lon2v - lon1v dlat = lat2v - lat1v # SDLON CLAT1 CCOS2 SDLON #a = np.sin(dlat/2.0)**2 + np.cos(lat1v) * np.cos(lat2v) * np.sin(dlon/2.0)**2 # ne.evaluate('sin(a)') # SDLON = ne.evaluate('sin(dlat/2.0)')**2 # CLAT1 = ne.evaluate('cos(lat1v)') # CCOS2 = ne.evaluate('cos(lat2v)') # SDLON = ne.evaluate('sin(dlon/2.0)')**2 ne.set_num_threads(threads) a = ne.evaluate( '6367 * 2 * arcsin( sqrt( (sin(dlat/2.0)**2 + cos(lat1v) * cos(lat2v) * sin(dlon/2.0)**2) ) )' ) # a = SDLON + CLAT1 * CCOS2 * SDLON # #ASINA #c = 2 * np.arcsin(np.sqrt(a)) #c = 2 * ne.evaluate('arcsin(sqrt(a))') #km = 6367 * c km = a ret = km.reshape(n2, n1) return (ret)
def __init__(self, complexity_threshold=2, multicore=True): if numexpr_ver is None or numexpr_ver<(2, 0, 0): raise ImportError("numexpr version 2.0.0 or better required.") self.complexity_threshold = complexity_threshold nc = numexpr.detect_number_of_cores() if multicore is True: multicore = nc elif multicore is False: multicore = 1 elif multicore<=0: multicore += nc numexpr.set_num_threads(multicore)
def test_multiprocess(self): import multiprocessing as mp # Check for two threads at least numexpr.set_num_threads(2) #print "**** Running from main process:" _worker() #print "**** Running from subprocess:" qout = mp.Queue() ps = mp.Process(target=_worker, args=(qout,)) ps.daemon = True ps.start() result = qout.get()
def test_multiprocess(self): import multiprocessing as mp # Check for two threads at least numexpr.set_num_threads(2) #print "**** Running from main process:" _worker() #print "**** Running from subprocess:" qout = mp.Queue() ps = mp.Process(target=_worker, args=(qout, )) ps.daemon = True ps.start() result = qout.get()
def initLayer(self): ne.set_num_threads(mp.cpu_count()) # 1 thread per core rlayer = ( self.dlg.comboBox.currentLayer() ) # QgsMapLayerRegistry.instance().mapLayersByName(self.dlg.comboBox.currentText())[0]#self.getLayerByName(self.dlg.comboBox.currentText()) sensorHt = self.dlg.spinBox_sensorHt.value() # get list of sun vectors vectors = self.skyVectors() self.dlg.progressBar.setMaximum(len(vectors)) scale = rlayer.rasterUnitsPerPixelX() # assumes square pixels. . . bandNum = self.dlg.spinBox_bands.value() maxVal = rlayer.dataProvider().bandStatistics(bandNum).maximumValue # QgsMessageLog.logMessage("maxVal = %s" % str(maxVal), "Plugins", 0) maxUsrHeight = self.dlg.spinBox_maxHt.value() # QgsMessageLog.logMessage("maxUsrHeight = %s" % str(maxUsrHeight), "Plugins", 0) unitZ = maxVal / maxUsrHeight # QgsMessageLog.logMessage("unitZ = %s" % str(unitZ), "Plugins", 0) bandCnt = rlayer.bandCount() data = self.shaDEM.rasterToArray(rlayer, bandNum) # t = time.time() a = data["array"].copy() adjSensorHt = sensorHt / unitZ a = ne.evaluate("a + adjSensorHt") # QgsMessageLog.logMessage("Adjusted Sensor Height= %s" % str(adjSensorHt), "Plugins", 0) svfArr = np.zeros(a.shape) i = 0 for vector in vectors: # debug - print solar altitude angles # QgsMessageLog.logMessage("Vector[%i] solar alt angle: %.2f" % (i+1, math.degrees(math.atan(vector[2]/math.sqrt(vector[0]**2+vector[1]**2)))), "Profile", 0) result = self.shaDEM.ShadowCalc(data, vector, scale, unitZ, maxVal) b = result[0] dz = result[1] svfArr = ne.evaluate("where((b-a) <= 0, svfArr + 1, svfArr)") self.dlg.progressBar.setValue(i) i += 1 # t = time.time() - t # QgsMessageLog.logMessage("SVF main loop : " + str(t), "Profile", 0) data["array"] = svfArr / self.dlg.spinBox_vectors.value() self.saveToFile(data)
def multi_threaded(A_np, expression, num_threads): """ Args: A_np: <np.Array> to evaluate expression on expression: <str> expression to evaluate num_threads: <int> number of threads to use Returns: Result of multi-threaded evaluation of given expression """ # number of threads must be an integer assert isinstance(num_threads, int) ne.set_num_threads(num_threads) return ne.evaluate(expression)
def main(): args = get_step_args() setup_logging(model_version_id=args.model_version_id, step_id=STEP_IDS['GPRDraws']) ne.set_num_threads(1) logger.info("Initiating GPR draws.") t = GPRDraws(model_version_id=args.model_version_id, db_connection=args.db_connection, debug_mode=args.debug_mode, old_covariates_mvid=args.old_covariates_mvid, cores=args.cores) t.alerts.alert("Creating GPR draws.") t.make_draws() t.save_outputs() t.alerts.alert("Done creating GPR draws.")
def _abx(features_path, temp_dir, task, task_type, load_fun, distance, normalized, njobs, log): """Runs the ABX pipeline""" dist2fun = { 'cosine': default_distance, 'KL': dtw_kl_distance, 'levenshtein': edit_distance } # convert log.debug('loading features ...') features = os.path.join(temp_dir, 'features.h5') if not os.path.isfile(features): convert(features_path, h5_filename=features, load=load_fun) # avoid annoying log message numexpr.set_num_threads(njobs) log.debug('computing %s distances ...', distance) # ABX Distances prints some messages we do not want to display sys.stdout = open(os.devnull, 'w') distance_file = os.path.join(temp_dir, 'distance_{}.h5'.format(task_type)) with warnings.catch_warnings(): # inhibit some useless warnings about complex to float conversion warnings.filterwarnings("ignore", category=np.ComplexWarning) # compute the distances ABXpy.distances.distances.compute_distances(features, 'features', task, distance_file, dist2fun[distance], normalized, n_cpu=njobs) sys.stdout = sys.__stdout__ log.debug('computing abx score ...') # score score_file = os.path.join(temp_dir, 'score_{}.h5'.format(task_type)) score(task, distance_file, score_file) # analyze analyze_file = os.path.join(temp_dir, 'analyze_{}.csv'.format(task_type)) analyze(task, score_file, analyze_file) # average abx_score = _average(analyze_file, task_type) return abx_score
def __init__( self, psi, potential, variables={}, diag=False, num_of_threads=1, FFTWflags=( "FFTW_ESTIMATE", "FFTW_DESTROY_INPUT", ), ): """Initialize the propagator.""" self.psi = psi self.v = self.Potential(potential, variables, diag) assert isinstance(psi, pytalises.wavefunction.Wavefunction) assert self.v.num_int_dim == self.psi.num_int_dim assert self.psi._amp.shape[-1] == self.psi.num_int_dim self.V_eval_array = np.zeros( psi.number_of_grid_points + (psi.num_int_dim, psi.num_int_dim), order="C", dtype="complex128", ) self.V_eval_eigval_array = np.zeros( psi.number_of_grid_points + (psi.num_int_dim,), order="C", dtype="complex128", ) self.num_of_threads = num_of_threads set_num_threads(num_of_threads) ne.set_num_threads(num_of_threads) self.psi.construct_FFT(num_of_threads, FFTWflags) # Chose method for calculating time propgation # If potential is nondiagonal, additional # numeric diangonalization will be performed if self.v.diag is True: self.prop_method = self.diag_potential_prop else: self.prop_method = self.nondiag_potential_prop # Check if potential is static and if that is the case # precompute the potential grid V(x,y,z) to use it # for all following calculations if self.v.static is True: if self.v.diag is True: self.eval_diag_V() if self.v.diag is False: self.eval_V() get_eig(self.V_eval_array, self.V_eval_eigval_array)
def worker_remove_interior_points(points, data, prog_dec): ne.set_num_threads(1) # parallelized here for k in range(data.shape[0]): sphere_r_sq = (0.99 * data[k, 3])**2. sphere_x = data[k, 0] sphere_y = data[k, 1] sphere_z = data[k, 2] points_x = points[:, 0] points_y = points[:, 1] points_z = points[:, 2] dist_sq = ne.evaluate( "(points_x-sphere_x)**2 + (points_y-sphere_y)**2 + (points_z-sphere_z)**2" ) points = points[dist_sq > sphere_r_sq, :] if k % prog_dec == 0: print(".", end='') return points
def main(): args = get_step_args() setup_logging(model_version_id=args.model_version_id, step_id=STEP_IDS['ApplyGPSmoothing']) logger.info("Initiating GPR smoothing.") ne.set_num_threads(1) t = ApplyGPSmoothing(model_version_id=args.model_version_id, db_connection=args.db_connection, debug_mode=args.debug_mode, old_covariates_mvid=args.old_covariates_mvid, cores=args.cores) t.alerts.alert("Applying GP Smoothing") t.apply_gp_smoothing() t.save_outputs() t.alerts.alert("Done with GP Smoothing")
def __init__(self, step_id, **kwargs): """ General class for calculating predictive validity on either linear models or space-time models (they both go through the same process). This is sub-classed for both of the linear- and spacetime- specific tasks. First make predictions in a uniform space, then calculate RMSE and trend. :param kwargs: """ super().__init__(**kwargs, step_id=step_id) ne.set_num_threads(1)
def __init__(self, **kwargs): ''' All parameters listed in _params are mandatory. In addition, optional arguments are: Nthread: the number of threads to use in numexpr computations. CacheDir: the directory in which to store cahced files. ''' self.Nthread = kwargs.get('Nthread', 1) self.CacheDir = kwargs.get('CacheDir', "tmp") self.FDDir = environ.get('FD_DIR', ".") ParametricObject.__init__(self, **kwargs) ne.set_num_threads(self.Nthread) self.TDot = self.OpMatrix(self["Nxfrm"]) self['Factory'] = self
def __init__(self, threads): self.totally_radical = True _ = ne.set_num_threads(threads) self.threads = threads self.Z_loss_series = [] self.U_loss_series = [] self.Z_grad_series = [] self.U_grad_series = []
def run_experiment(num_iterations): previous_threads = set_num_threads(1) scratch = zeros(grid_shape) grid = zeros(grid_shape) block_low = int(grid_shape[0] * .4) block_high = int(grid_shape[0] * .5) grid[block_low:block_high, block_low:block_high] = 0.005 start = time.time() for i in range(num_iterations): evolve(grid, 0.1, scratch) grid, scratch = scratch, grid set_num_threads(previous_threads) return time.time() - start
def run_experiment(num_iterations): previous_threads = set_num_threads(1) scratch = zeros(grid_shape) grid = zeros(grid_shape) block_low = int(grid_shape[0] * 0.4) block_high = int(grid_shape[0] * 0.5) grid[block_low:block_high, block_low:block_high] = 0.005 start = time.time() for i in range(num_iterations): evolve(grid, 0.1, scratch) grid, scratch = scratch, grid set_num_threads(previous_threads) return time.time() - start
def __init__( self, expression='in0', in_sig=(numpy.complex64,), out_sig=(numpy.complex64,), nthreads=None ): """ Args: expression: either a NumExpr string (in0, in1, ... are the inputs) or a callable (in0, in1 as args) to be used in work() in_sig: a list of numpy dtype as input signature out_sig: a list of numpy dtype as output signature nthreads: how many threads NumExpr should use """ gr.sync_block.__init__(self, "numexpr_evaluate", in_sig, out_sig) self._expression = None if numexpr and nthreads: numexpr.set_num_threads(nthreads) self.expression = expression
def calc_speed(data): """ Calculates the speed from ua and va :Parameters: **data** -- the standard python data dictionary .. note:: We use numexpr here because, with multiple threads, it is\ about 30 times faster than direct calculation. """ #name required variables ua = data['ua'] va = data['va'] #we can take advantage of multiple cores to do this calculation ne.set_num_threads(ne.detect_number_of_cores()) #calculate the speed at each point. data['speed'] = ne.evaluate("sqrt(ua*ua + va*va)") return data
def spherical_to_cartesian(ra, dec, threads=1): """ Inputs in degrees. Outputs x,y,z """ import numexpr as ne import math ne.set_num_threads(threads) pi = math.pi rar = ne.evaluate('ra*pi/180.0') decr = ne.evaluate('dec*pi/180.0') hold1=ne.evaluate('cos(decr)') x = ne.evaluate('cos(rar) * hold1') y = ne.evaluate('sin(rar) * hold1') z = ne.evaluate('sin(decr)') return x, y, z
def _great_circle_distance_fast(ra1, dec1, ra2, dec2, threads): """ (Private internal function) Returns great circle distance. Inputs in degrees. Uses vicenty distance formula - a bit slower than others, but numerically stable. A faster version than the function above. """ import numexpr as ne # terminology from the Vicenty formula - lambda and phi and # "standpoint" and "forepoint" lambs = np.radians(ra1) phis = np.radians(dec1) lambf = np.radians(ra2) phif = np.radians(dec2) dlamb = lambf - lambs #using numexpr #nthreads = ne.detect_number_of_cores() nthreads = threads ne.set_num_threads(nthreads) hold1=ne.evaluate('sin(phif)') #calculate these once instead of a few times! hold2=ne.evaluate('sin(phis)') hold3=ne.evaluate('cos(phif)') hold4=ne.evaluate('cos(dlamb)') hold5=ne.evaluate('cos(phis)') numera = ne.evaluate( 'hold3 * sin(dlamb)') numerb = ne.evaluate('hold5 * hold1 - hold2 * hold3 * hold4') numer = ne.evaluate('sqrt(numera**2 + numerb**2)') denom = ne.evaluate('hold2 * hold1 + hold5 * hold3 * hold4') pi=math.pi return ne.evaluate('(arctan2(numer, denom))*180.0/pi')
def _spherical_to_cartesian_fast(ra, dec, threads): """ (Private internal function) Inputs in degrees. Outputs x,y,z A faster version than the function above. """ import numexpr as ne #nthreads = ne.detect_number_of_cores() nthreads = threads ne.set_num_threads(nthreads) pi = math.pi rar = ne.evaluate('ra*pi/180.0') decr = ne.evaluate('dec*pi/180.0') hold1=ne.evaluate('cos(decr)') x = ne.evaluate('cos(rar) * hold1') y = ne.evaluate('sin(rar) * hold1') z = ne.evaluate('sin(decr)') return x, y, z
""" from __future__ import absolute_import import os import sys sys.setrecursionlimit(8192) import time import re import shutil import zlib import numpy as np import bcolz import numexpr as ne bcolz.blosc_set_nthreads(2) ne.set_num_threads(2) import sqlalchemy as sql from . import database from . import compression from .gemini_utils import get_gt_cols def get_samples(metadata): return [x['name'] for x in metadata.tables['samples'].select().order_by("sample_id").execute()] def get_n_variants(cur): return next(iter(cur.execute(sql.text("select count(*) from variants"))))[0] def get_bcolz_dir(db): if not "://" in db: return db + ".gts"
blockxsize = args['--blockxsize'] if blockxsize.lower() == 'none': blockxsize = None else: blockxsize = int(blockxsize) blockysize = args['--blockysize'] if blockysize.lower() == 'none': blockysize = None else: blockysize = int(blockysize) ncpu = int(args['--ncpu']) if ncpu > 1: logger.warning('NCPU only supported for `numexpr` so far...') ne.set_num_threads(ncpu) logger.debug('Finding pairs of MODIS data') pairs = find_MODIS_pairs(location, pattern) output_names = get_output_names(pairs, outdir) logger.info('Found {n} pairs of M[OY]D09GQ and M[OY]D09GA'.format( n=len(pairs))) if resume: pairs, output_names = check_resume(pairs, output_names) logger.info('Resuming calculation for {n} files'.format(n=len(pairs))) failed = 0 for i, (p, o) in enumerate(zip(pairs, output_names)):
ret = [] for view in views: if isinstance(view, View): ret.append(view.ndarray) else: ret.append(view) return ret def get_data_pointer(ary, allocate=False, nullify=False): return target_numpy.get_data_pointer(ary, allocate, nullify) def set_bhc_data_from_ary(self, ary): return target_numpy.set_bhc_data_from_ary(self, ary) # Setup numexpr numexpr.set_num_threads(int(os.getenv('OMP_NUM_THREADS', 1))) print("using numexpr target with %d threads" % int(os.getenv('OMP_NUM_THREADS', 1)) ) UFUNC_CMDS = { 'identity': "i1", 'add': "i1 + i2", 'subtract': "i1 - i2", 'multiply': "i1 * i2", 'divide': "i1 / i2", 'power': "i1**i2", 'absolute': "abs(i1)", 'sqrt': "sqrt(i1)" } def ufunc(op, *args):
from bcolz.py2help import check_output # make an absolute path if required, for example when running in a clone if not path.isabs(path_): path_ = path.join(os.getcwd(), path_) # look up the commit using subprocess and git describe try: # redirect stderr to stdout to make sure the git error message in case # we are not in a git repo doesn't appear on the screen and confuse the # user. label = check_output(["git", "describe"], cwd=path_, stderr=subprocess.STDOUT).strip() return label except OSError: # in case git wasn't found pass except subprocess.CalledProcessError: # not in git repo pass git_description = _get_git_descrtiption(__path__[0]) # Initialization code for the Blosc and numexpr libraries _blosc_init() ncores = detect_number_of_cores() blosc_set_nthreads(ncores) # Benchmarks show that using several threads can be an advantage in bcolz blosc_set_nthreads(ncores) if numexpr_here: numexpr.set_num_threads(ncores) import atexit atexit.register(_blosc_destroy)
def backpropagate_3d_tilted(uSin, angles, res, nm, lD=0, tilted_axis=[0, 1, 0], coords=None, weight_angles=True, onlyreal=False, offset_alpha=0, offset_beta=0, padding=(True, True), padfac=1.75, padval=None, intp_order=2, dtype=_np_float64, num_cores=_ncores, save_memory=False, copy=True, jmc=None, jmm=None, verbose=_verbose): u""" 3D backpropagation with the Fourier diffraction theorem Three-dimensional diffraction tomography reconstruction algorithm for scattering of a plane wave :math:`u_0(\mathbf{r}) = u_0(x,y,z)` by a dielectric object with refractive index :math:`n(x,y,z)`. This method implements the 3D backpropagation algorithm with a rotational axis that is tilted by :math:`\\theta_\mathrm{tilt}` w.r.t. the imaging plane [3]_ .. math:: f(\mathbf{r}) = -\\frac{i k_\mathrm{m}}{2\pi} \\sum_{j=1}^{N} \! \Delta \phi_0 D_{-\phi_j}^\mathrm{tilt} \!\! \\left \{ \\text{FFT}^{-1}_{\mathrm{2D}} \\left \{ \\left| k_\mathrm{Dx} \cdot \cos \\theta_\mathrm{tilt}\\right| \\frac{\\text{FFT}_{\mathrm{2D}} \\left \{ u_{\mathrm{B},\phi_j}(x_\mathrm{D}, y_\mathrm{D}) \\right \}} {u_0(l_\mathrm{D})} \exp \! \\left[i k_\mathrm{m}(M - 1) \cdot (z_{\phi_j}-l_\mathrm{D}) \\right] \\right \} \\right \} with a modified rotational operator :math:`D_{-\phi_j}^\mathrm{tilt}` and a different filter in Fourier space :math:`|k_\mathrm{Dx} \cdot \cos \\theta_\mathrm{tilt}|` when compared to :func:`backpropagate_3d`. .. versionadded:: 0.1.2 Parameters ---------- uSin : (A, Ny, Nx) ndarray Three-dimensional sinogram of plane recordings :math:`u_{\mathrm{B}, \phi_j}(x_\mathrm{D}, y_\mathrm{D})` divided by the incident plane wave :math:`u_0(l_\mathrm{D})` measured at the detector. angles : ndarray of shape (A,3) or 1D array of length A If the shape is (A,3), then ``angles`` consists of vectors on the unit sphere that correspond to the direction of illumination and acquisition (s₀). If the shape is (A,), then ``angles`` is a one-dimensional array of angles in radians that determines the angular position :math:`\phi_j`. In both cases, ``tilted_axis`` must be set according to the tilt of the rotational axis. res : float Vacuum wavelength of the light :math:`\lambda` in pixels. nm : float Refractive index of the surrounding medium :math:`n_\mathrm{m}`. lD : float Distance from center of rotation to detector plane :math:`l_\mathrm{D}` in pixels. tilted_axis : list of floats The coordinates [x, y, z] on a unit sphere representing the tilted axis of rotation. The default is (0,1,0), which corresponds to a rotation about the y-axis and follows the behavior of :func:`odtbrain.backpropagate_3d`. coords : None [(3, M) ndarray] Only compute the output image at these coordinates. This keyword is reserved for future versions and is not implemented yet. weight_angles : bool If ``True``, weights each backpropagated projection with a factor proportional to the angular distance between the neighboring projections. .. math:: \Delta \phi_0 \\longmapsto \Delta \phi_j = \\frac{\phi_{j+1} - \phi_{j-1}}{2} This currently only works when `angles` has the shape (A,). onlyreal : bool If ``True``, only the real part of the reconstructed image will be returned. This saves computation time. padding : tuple of bool Pad the input data to the second next power of 2 before Fourier transforming. This reduces artifacts and speeds up the process for input image sizes that are not powers of 2. The default is padding in x and y: ``padding=(True, True)``. For padding only in x-direction (e.g. for cylindrical symmetries), set ``padding`` to ``(True, False)``. To turn off padding, set it to ``(False, False)``. padfac : float Increase padding size of the input data. A value greater than one will trigger padding to the second-next power of two. For example, a value of 1.75 will lead to a padded size of 256 for an initial size of 144, whereas for it will lead to a padded size of 512 for an initial size of 150. Values geater than 2 are allowed. This parameter may greatly increase memory usage! padval : float The value used for padding. This is important for the Rytov approximation, where an approximat zero in the phase might translate to 2πi due to the unwrapping algorithm. In that case, this value should be a multiple of 2πi. If `padval`` is ``None``, then the edge values are used for padding (see documentation of :func:`numpy.pad`). order : int between 0 and 5 Order of the interpolation for rotation. See :func:`scipy.ndimage.interpolation.affine_transform` for details. dtype : dtype object or argument for :func:`numpy.dtype` The data type that is used for calculations (float or double). Defaults to ``numpy.float``. num_cores : int The number of cores to use for parallel operations. This value defaults to the number of cores on the system. save_memory : bool Saves memory at the cost of longer computation time. .. versionadded:: 0.1.5 copy : bool Copy input sinogram ``uSin`` for data processing. If ``copy`` is set to ``False``, then ``uSin`` will be overridden. .. versionadded:: 0.1.5 jmc, jmm : instance of :func:`multiprocessing.Value` or ``None`` The progress of this function can be monitored with the :mod:`jobmanager` package. The current step ``jmc.value`` is incremented ``jmm.value`` times. ``jmm.value`` is set at the beginning. verbose : int Increment to increase verbosity. Returns ------- f : ndarray of shape (Nx, Ny, Nx), complex if ``onlyreal==False`` Reconstructed object function :math:`f(\mathbf{r})` as defined by the Helmholtz equation. :math:`f(x,z) = k_m^2 \\left(\\left(\\frac{n(x,z)}{n_m}\\right)^2 -1\\right)` See Also -------- odt_to_ri : conversion of the object function :math:`f(\mathbf{r})` to refractive index :math:`n(\mathbf{r})`. Notes ----- This implementation can deal with projection angles that are not distributed along a circle about the rotational axis. If there are slight deviations from this circle, simply pass the 3D rotational positions instead of the 1D angles to the `angles` argument. In principle, this should improve the reconstruction. The general problem here is that the backpropagation algorithm requires a ramp filter in Fourier space that is oriented perpendicular to the rotational axis. If the sample does not rotate about a single axis, then a 1D parametric representation of this rotation must be found to correctly determine the filter in Fourier space. Such a parametric representation could e.g. be a spiral between the poles of the unit sphere (but this kind of rotation is probably difficult to implement experimentally). Do not use the parameter `lD` in combination with the Rytov approximation - the propagation is not correctly described. Instead, numerically refocus the sinogram prior to converting it to Rytov data (using e.g. :func:`odtbrain.sinogram_as_rytov`) with a numerical focusing algorithm (available in the Python package :py:mod:`nrefocus`). """ ne.set_num_threads(num_cores) if copy: uSin = uSin.copy() angles = angles.copy() # `tilted_axis` is required for several things: # 1. the filter |kDx*v + kDy*u| with (u,v,w)==tilted_axis # 2. the alignment of the rotational axis with the y-axis # 3. the determination of the point coordinates if only # angles in radians are given. # For (1) we need the exact axis that corresponds to our input data. # For (2) and (3) we need `tilted_axis_yz` (see below) which is the # axis `tilted_axis` rotated in the detector plane such that its # projection onto the detector coincides with the y-axis. # Normalize input axis tilted_axis=norm_vec(tilted_axis) # `tilted_axis_yz` is computed by performing the inverse rotation in # the x-y plane with `angz`. We will again use `angz` in the transform # within the for-loop to rotate each projection according to its # acquisition angle. angz = np.arctan2(tilted_axis[0], tilted_axis[1]) rotmat = np.array([ [np.cos(angz), -np.sin(angz), 0], [np.sin(angz), np.cos(angz), 0], [0 , 0, 1], ]) # rotate `tilted_axis` onto the y-z plane. tilted_axis_yz = norm_vec(np.dot(rotmat, tilted_axis)) A = angles.shape[0] angles = np.squeeze(angles) # Allow shapes (A,1) assert angles.shape in [(A,), (A,3)], "`angles` must have shape (A,) or (A,3)!" # jobmanager if jmm is not None: jmm.value = A + 2 if len(angles.shape) == 1: if weight_angles: weights = util.compute_angle_weights_1d(angles).reshape(-1,1,1) # compute the 3D points from tilted axis angles = sphere_points_from_angles_and_tilt(angles, tilted_axis_yz) else: if weight_angles: warnings.warn("3D angular weighting not yet supported!") weights = 1 # normalize and rotate angles for ii in range(angles.shape[0]): #angles[ii] = norm_vec(angles[ii]) #-> not correct # instead rotate like `tilted_axis` onto the y-z plane. angles[ii] = norm_vec(np.dot(rotmat, angles[ii])) # check for dtype dtype = np.dtype(dtype) assert dtype.name in ["float32", "float64"], "dtype must be float32 or float64!" assert num_cores <= _ncores, "`num_cores` must not exceed number " +\ "of physical cores: {}".format(_ncores) assert uSin.dtype == np.complex128, "uSin dtype must be complex128." dtype_complex = np.dtype("complex{}".format( 2 * int(dtype.name.strip("float")))) # set ctype ct_dt_map = {np.dtype(np.float32): ctypes.c_float, np.dtype(np.float64): ctypes.c_double } assert len(uSin.shape) == 3, "Input data `uSin` must have shape (A,Ny,Nx)." assert len(uSin) == A, "`len(angles)` must be equal to `len(uSin)`." assert len(list(padding)) == 2, "Parameter `padding` must be boolean tuple of length 2!" assert np.array(padding).dtype is np.dtype(bool), "Parameter `padding` must be boolean tuple." assert coords is None, "Setting coordinates is not yet supported." # Cut-Off frequency # km [1/px] km = (2 * np.pi * nm) / res # The notation in the our optical tomography script for # a wave propagating to the right is: # # u0(x) = exp(ikx) # # However, in physics usually we use the other sign convention: # # u0(x) = exp(-ikx) # # In order to be consistent with programs like Meep or our # scattering script for a dielectric cylinder, we want to use the # latter sign convention. # This is not a big problem. We only need to multiply the imaginary # part of the scattered wave by -1. sinogram = uSin if weight_angles: sinogram *= weights # lengths of the input data (la, lny, lnx) = sinogram.shape ln = max(lnx, lny) # We do a zero-padding before performing the Fourier transform. # This gets rid of artifacts due to false periodicity and also # speeds up Fourier transforms of the input image size is not # a power of 2. # transpose so we can call resize correctly orderx = max(64., 2**np.ceil(np.log(lnx * padfac) / np.log(2))) ordery = max(64., 2**np.ceil(np.log(lny * padfac) / np.log(2))) if padding[0]: padx = orderx - lnx else: padx = 0 if padding[1]: pady = ordery - lny else: pady = 0 # Apply a Fourier filter before projecting the sinogram slices. # Resize image to next power of two for fourier analysis # (reduces artifacts). padyl = np.int(np.ceil(pady / 2)) padyr = np.int(pady - padyl) padxl = np.int(np.ceil(padx / 2)) padxr = np.int(padx - padyl) #TODO: This padding takes up a lot of memory. Move it to a separate # for loop or to the main for-loop. if padval is None: sino = np.pad(sinogram, ((0, 0), (padyl, padyr), (padxl, padxr)), mode="edge") if verbose > 0: print("......Padding with edge values.") else: sino = np.pad(sinogram, ((0, 0), (padyl, padyr), (padxl, padxr)), mode="linear_ramp", end_values=(padval,)) if verbose > 0: print("......Verifying padding value: {}".format(padval)) # save memory del sinogram if verbose > 0: print("......Image size (x,y): {}x{}, padded: {}x{}".format( lnx, lny, sino.shape[2], sino.shape[1])) # zero-padded length of sinogram. (lA, lNy, lNx) = sino.shape # @UnusedVariable lNz = ln # Ask for the filter. Do not include zero (first element). # # Integrals over ϕ₀ [0,2π]; kx [-kₘ,kₘ] # - double coverage factor 1/2 already included # - unitary angular frequency to unitary ordinary frequency # conversion performed in calculation of UB=FT(uB). # # f(r) = -i kₘ / ((2π)² a₀) (prefactor) # * iiint dϕ₀ dkx dky (prefactor) # * |kx| (prefactor) # * exp(-i kₘ M lD ) (prefactor) # * UBϕ₀(kx) (dependent on ϕ₀) # * exp( i (kx t⊥ + kₘ (M - 1) s₀) r ) (dependent on ϕ₀ and r) # (r and s₀ are vectors. The last term contains a dot-product) # # kₘM = sqrt( kₘ² - kx² - ky² ) # t⊥ = ( cos(ϕ₀), ky/kx, sin(ϕ₀) ) # s₀ = ( -sin(ϕ₀), 0 , cos(ϕ₀) ) # # The filter can be split into two parts # # 1) part without dependence on the z-coordinate # # -i kₘ / ((2π)² a₀) # * iiint dϕ₀ dkx dky # * |kx| # * exp(-i kₘ M lD ) # # 2) part with dependence of the z-coordinate # # exp( i (kx t⊥ + kₘ (M - 1) s₀) r ) # # The filter (1) can be performed using the classical filter process # as in the backprojection algorithm. # # # if lNx != lNy: # raise NotImplementedError("Input data must be square shaped!") # Corresponding sample frequencies fx = np.fft.fftfreq(lNx) # 1D array fy = np.fft.fftfreq(lNy) # 1D array # kx is a 1D array. kx = 2 * np.pi * fx ky = 2 * np.pi * fy # Differentials for integral dphi0 = 2 * np.pi / A # We will later multiply with phi0. # a, y, x kx = kx.reshape(1, 1, -1) ky = ky.reshape(1, -1, 1) # Low-pass filter: # less-than-or-equal would give us zero division error. filter_klp = (kx**2 + ky**2 < km**2) # Filter M so there are no nans from the root M = 1. / km * np.sqrt((km**2 - kx**2 - ky**2) * filter_klp) prefactor = -1j * km / (2 * np.pi) prefactor *= dphi0 # Also filter the prefactor, so nothing outside the required # low-pass contributes to the sum. # The filter is now dependent on the rotational position of the # specimen. We have to include information from the angles. # We want to estimate the rotational axis for every frame. We # do that by computing the cross-product of the vectors in # angles from the current and previous image. u, v, _w = tilted_axis filterabs = np.abs(kx*v+ky*u) * filter_klp # new in version 0.1.4: # We multiply by the factor (M-1) instead of just (M) # to take into account that we have a scattered # wave that is normalized by u0. prefactor *= np.exp(-1j * km * (M-1) * lD) # Perform filtering of the sinogram, # save memory by in-place operations #projection = np.fft.fft2(sino, axes=(-1,-2)) * prefactor # Flag is "estimate": # specifies that, instead of actual measurements of different # algorithms, a simple heuristic is used to pick a (probably # sub-optimal) plan quickly. With this flag, the input/output # arrays are not overwritten during planning. # Byte-aligned arrays temp_array = pyfftw.n_byte_align_empty(sino[0].shape, 16, dtype_complex) myfftw_plan = pyfftw.FFTW(temp_array, temp_array, threads=num_cores, flags=["FFTW_ESTIMATE"], axes=(0,1)) if jmc is not None: jmc.value += 1 for p in range(len(sino)): # this overwrites sino temp_array[:] = sino[p, :, :] myfftw_plan.execute() sino[p, :, :] = temp_array[:] temp_array, myfftw_plan projection = sino projection[:] *= prefactor / (lNx * lNy) projection[:] *= filterabs # save memory del prefactor, filter_klp # # # filter (2) must be applied before rotation as well # exp( i (kx t⊥ + kₘ (M - 1) s₀) r ) # # kₘM = sqrt( kₘ² - kx² - ky² ) # t⊥ = ( cos(ϕ₀), ky/kx, sin(ϕ₀) ) # s₀ = ( -sin(ϕ₀), 0 , cos(ϕ₀) ) # # This filter is effectively an inverse Fourier transform # # exp(i kx xD) exp(i ky yD) exp(i kₘ (M - 1) zD ) # # xD = x cos(ϕ₀) + z sin(ϕ₀) # zD = - x sin(ϕ₀) + z cos(ϕ₀) # Everything is in pixels center = lNz / 2.0 #x = np.linspace(-centerx, centerx, lNx, endpoint=False) #x = np.arange(lNx) - center + .5 # Meshgrid for output array #zv, yv, xv = np.meshgrid(x,x,x) # z, y, x #xv = x.reshape( 1, 1,-1) #yv = x.reshape( 1,-1, 1) #z = np.arange(ln) - center + .5 z = np.linspace(-center, center, lNz, endpoint=False) zv = z.reshape(-1, 1, 1) # z, y, x Mp = M.reshape(lNy, lNx) # filter2 = np.exp(1j * zv * km * (Mp - 1)) f2_exp_fac = 1j* km * (Mp - 1) if save_memory: # compute filter2 later pass else: # compute filter2 now filter2 = ne.evaluate("exp(factor * zv)", local_dict={"factor": f2_exp_fac, "zv":zv}) # occupies some amount of ram, but yields faster # computation later #filter2[0].size*len(filter2)*128/8/1024**3 if jmc is not None: jmc.value += 1 # a, z, y, x #projection = projection.reshape(la, 1, lNy, lNx) projection = projection.reshape(la, lNy, lNx) # This frees comparatively few data del M #del Mp # Prepare complex output image if onlyreal: outarr = np.zeros((ln, lny, lnx), dtype=dtype) else: outarr = np.zeros((ln, lny, lnx), dtype=dtype_complex) # Create plan for fftw: inarr = pyfftw.n_byte_align_empty((lNy, lNx), 16, dtype_complex) #inarr[:] = (projection[0]*filter2)[0,:,:] # plan is "patient": # FFTW_PATIENT is like FFTW_MEASURE, but considers a wider range # of algorithms and often produces a “more optimal” plan # (especially for large transforms), but at the expense of # several times longer planning time (especially for large # transforms). # print(inarr.flags) myifftw_plan = pyfftw.FFTW(inarr, inarr, threads=num_cores, axes=(0,1), direction="FFTW_BACKWARD", flags=["FFTW_MEASURE"]) #assert shared_array.base.base is shared_array_base.get_obj() shared_array_base = mp.Array(ct_dt_map[dtype], ln * lny * lnx) _shared_array = np.ctypeslib.as_array(shared_array_base.get_obj()) _shared_array = _shared_array.reshape(ln, lny, lnx) # Initialize the pool with the shared array odtbrain._shared_array = _shared_array pool4loop = mp.Pool(processes=num_cores) # filtered projections in loop filtered_proj = np.zeros((ln, lny, lnx), dtype=dtype_complex) # Rotate all points such that we are effectively rotating everything # about the y-axis. angles = rotate_points_to_axis(points=angles, axis=tilted_axis_yz) for aa in np.arange(A): # A == la # projection.shape == (A, lNx, lNy) # filter2.shape == (ln, lNx, lNy) for p in range(len(zv)): if save_memory: # compute filter2 here; # this is comparatively slower than the other case ne.evaluate("exp(factor * zvp) * projectioni", local_dict={"zvp":zv[p], "projectioni":projection[aa], "factor":f2_exp_fac}, out=inarr) else: # use universal functions np.multiply(filter2[p], projection[aa], out=inarr) myifftw_plan.execute() filtered_proj[p, :, :] = inarr[ padyl:padyl + lny, padxl:padxl + lnx ] # The Cartesian axes in our array are ordered like this: [z,y,x] # However, the rotation matrix requires [x,y,z]. Therefore, we # need to np.transpose the first and last axis and also invert the # y-axis. fil_p_t = filtered_proj.transpose(2,1,0)[:,::-1,:] # get rotation matrix for this point and also rotate in plane _drot, drotinv = rotation_matrix_from_point_planerot(angles[aa], plane_angle=angz, ret_inv=True) ## apply offset required by affine_transform # The offset is only required for the rotation in # the x-z-plane. # This could be achieved like so: # The offset "-.5" assures that we are rotating about # the center of the image and not the value at the center # of the array (this is also what `scipy.ndimage.rotate` does. c = 0.5 * np.array(fil_p_t.shape) - .5 offset = c - np.dot(drotinv, c) ## Perform rotation # We cannot split the inplace-rotation into multiple subrotations # as we did in _Back_3d_tilted.backpropagate_3d, because the rotation # axis is arbitrarily placed in the 3d array. Rotating single # slices does not yield the same result as rotating the entire # array. Instead of using affine_transform, map_coordinates might # be faster for multiple cores. # Also undo the axis transposition that we performed previously. outarr.real += scipy.ndimage.interpolation.affine_transform( fil_p_t.real, drotinv, offset=offset, mode="constant", cval=0, order=intp_order).transpose(2,1,0)[:,::-1,:] if not onlyreal: outarr.imag += scipy.ndimage.interpolation.affine_transform( fil_p_t.imag, drotinv, offset=offset, mode="constant", cval=0, order=intp_order).transpose(2,1,0)[:,::-1,:] if jmc is not None: jmc.value += 1 pool4loop.terminate() pool4loop.join() del _shared_array, inarr, odtbrain._shared_array del shared_array_base gc.collect() return outarr
import os import numpy as np import numexpr as ne ne.set_num_threads(ne.ncores) # inclusive HyperThreading cores import sys sys.path.append(os.path.expanduser('~/devel/mapalign/mapalign')) sys.path.append(os.path.expanduser('~/devel/hcp_corr')) import embed import hcp_util def fisher_r2z(R): return ne.evaluate('arctanh(R)') def fisher_z2r(Z): X = ne.evaluate('exp(2*Z)') return ne.evaluate('(X - 1) / (X + 1)') # here we go ... ## parse command line arguments # first arg is output prefix, e.g. /ptmp/sbayrak/fisher/fisher_ cliarg_out_prfx = sys.argv[1] # the rest args are the subject path(s), e.g. /ptmp/sbayrak/hcp/* cliarg_rest = sys.argv[2:] # list of all subjects as numpy array subject_list = np.array(cliarg_rest) # e.g. /ptmp/sbayrak/hcp/* cnt_files = 4
def backpropagate_3d(uSin, angles, res, nm, lD=0, coords=None, weight_angles=True, onlyreal=False, padding=(True, True), padfac=1.75, padval=None, intp_order=2, dtype=_np_float64, num_cores=_ncores, save_memory=False, copy=True, jmc=None, jmm=None, verbose=_verbose): u""" 3D backpropagation with the Fourier diffraction theorem Three-dimensional diffraction tomography reconstruction algorithm for scattering of a plane wave :math:`u_0(\mathbf{r}) = u_0(x,y,z)` by a dielectric object with refractive index :math:`n(x,y,z)`. This method implements the 3D backpropagation algorithm [1]_ .. math:: f(\mathbf{r}) = -\\frac{i k_\mathrm{m}}{2\pi} \\sum_{j=1}^{N} \! \Delta \phi_0 D_{-\phi_j} \!\! \\left \{ \\text{FFT}^{-1}_{\mathrm{2D}} \\left \{ \\left| k_\mathrm{Dx} \\right| \\frac{\\text{FFT}_{\mathrm{2D}} \\left \{ u_{\mathrm{B},\phi_j}(x_\mathrm{D}, y_\mathrm{D}) \\right \}} {u_0(l_\mathrm{D})} \exp \! \\left[i k_\mathrm{m}(M - 1) \cdot (z_{\phi_j}-l_\mathrm{D}) \\right] \\right \} \\right \} with the forward :math:`\\text{FFT}_{\mathrm{2D}}` and inverse :math:`\\text{FFT}^{-1}_{\mathrm{2D}}` 2D fast Fourier transform, the rotational operator :math:`D_{-\phi_j}`, the angular distance between the projections :math:`\Delta \phi_0`, the ramp filter in Fourier space :math:`|k_\mathrm{Dx}|`, and the propagation distance :math:`(z_{\phi_j}-l_\mathrm{D})`. Parameters ---------- uSin : (A, Ny, Nx) ndarray Three-dimensional sinogram of plane recordings :math:`u_{\mathrm{B}, \phi_j}(x_\mathrm{D}, y_\mathrm{D})` divided by the incident plane wave :math:`u_0(l_\mathrm{D})` measured at the detector. angles : (A,) ndarray Angular positions :math:`\phi_j` of ``uSin`` in radians. res : float Vacuum wavelength of the light :math:`\lambda` in pixels. nm : float Refractive index of the surrounding medium :math:`n_\mathrm{m}`. lD : float Distance from center of rotation to detector plane :math:`l_\mathrm{D}` in pixels. coords : None [(3, M) ndarray] Only compute the output image at these coordinates. This keyword is reserved for future versions and is not implemented yet. weight_angles : bool If ``True``, weights each backpropagated projection with a factor proportional to the angular distance between the neighboring projections. .. math:: \Delta \phi_0 \\longmapsto \Delta \phi_j = \\frac{\phi_{j+1} - \phi_{j-1}}{2} .. versionadded:: 0.1.1 onlyreal : bool If ``True``, only the real part of the reconstructed image will be returned. This saves computation time. padding : tuple of bool Pad the input data to the second next power of 2 before Fourier transforming. This reduces artifacts and speeds up the process for input image sizes that are not powers of 2. The default is padding in x and y: ``padding=(True, True)``. For padding only in x-direction (e.g. for cylindrical symmetries), set ``padding`` to ``(True, False)``. To turn off padding, set it to ``(False, False)``. padfac : float Increase padding size of the input data. A value greater than one will trigger padding to the second-next power of two. For example, a value of 1.75 will lead to a padded size of 256 for an initial size of 144, whereas it will lead to a padded size of 512 for an initial size of 150. Values geater than 2 are allowed. This parameter may greatly increase memory usage! padval : float The value used for padding. This is important for the Rytov approximation, where an approximat zero in the phase might translate to 2πi due to the unwrapping algorithm. In that case, this value should be a multiple of 2πi. If ``padval`` is ``None``, then the edge values are used for padding (see documentation of :func:`numpy.pad`). order : int between 0 and 5 Order of the interpolation for rotation. See :func:`scipy.ndimage.interpolation.rotate` for details. dtype : dtype object or argument for :func:`numpy.dtype` The data type that is used for calculations (float or double). Defaults to ``numpy.float``. num_cores : int The number of cores to use for parallel operations. This value defaults to the number of cores on the system. save_memory : bool Saves memory at the cost of longer computation time. .. versionadded:: 0.1.5 copy : bool Copy input sinogram ``uSin`` for data processing. If ``copy`` is set to ``False``, then ``uSin`` will be overridden. .. versionadded:: 0.1.5 jmc, jmm : instance of :func:`multiprocessing.Value` or ``None`` The progress of this function can be monitored with the :mod:`jobmanager` package. The current step ``jmc.value`` is incremented ``jmm.value`` times. ``jmm.value`` is set at the beginning. verbose : int Increment to increase verbosity. Returns ------- f : ndarray of shape (Nx, Ny, Nx), complex if ``onlyreal==False`` Reconstructed object function :math:`f(\mathbf{r})` as defined by the Helmholtz equation. :math:`f(x,z) = k_m^2 \\left(\\left(\\frac{n(x,z)}{n_m}\\right)^2 -1\\right)` See Also -------- odt_to_ri : conversion of the object function :math:`f(\mathbf{r})` to refractive index :math:`n(\mathbf{r})`. Notes ----- Do not use the parameter `lD` in combination with the Rytov approximation - the propagation is not correctly described. Instead, numerically refocus the sinogram prior to converting it to Rytov data (using e.g. :func:`odtbrain.sinogram_as_rytov`) with a numerical focusing algorithm (available in the Python package :py:mod:`nrefocus`). """ ne.set_num_threads(num_cores) if copy: uSin = uSin.copy() A = angles.shape[0] # jobmanager if jmm is not None: jmm.value = A + 2 # check for dtype dtype = np.dtype(dtype) assert dtype.name in ["float32", "float64"], "dtype must be float32 or float64!" assert num_cores <= _ncores, "`num_cores` must not exceed number " +\ "of physical cores: {}".format(_ncores) assert uSin.dtype == np.complex128, "uSin dtype must be complex128." dtype_complex = np.dtype("complex{}".format( 2 * np.int(dtype.name.strip("float")))) # set ctype ct_dt_map = {np.dtype(np.float32): ctypes.c_float, np.dtype(np.float64): ctypes.c_double } assert len(uSin.shape) == 3, "Input data `uSin` must have shape (A,Ny,Nx)." assert len(uSin) == A, "`len(angles)` must be equal to `len(uSin)`." assert len(list(padding)) == 2, "Parameter `padding` must be boolean tuple of length 2!" assert np.array(padding).dtype is np.dtype(bool), "Parameter `padding` must be boolean tuple." assert coords is None, "Setting coordinates is not yet supported." # Cut-Off frequency # km [1/px] km = (2 * np.pi * nm) / res # Here, the notation for # a wave propagating to the right is: # # u0(x) = exp(ikx) # # However, in physics usually we use the other sign convention: # # u0(x) = exp(-ikx) # # In order to be consistent with programs like Meep or our # scattering script for a dielectric cylinder, we want to use the # latter sign convention. # This is not a big problem. We only need to multiply the imaginary # part of the scattered wave by -1. sinogram = uSin # Perform weighting if weight_angles: weights = util.compute_angle_weights_1d(angles).reshape(-1,1,1) sinogram *= weights # lengths of the input data (la, lny, lnx) = sinogram.shape ln = max(lnx, lny) # We perform padding before performing the Fourier transform. # This gets rid of artifacts due to false periodicity and also # speeds up Fourier transforms of the input image size is not # a power of 2. # transpose so we can call resize correctly orderx = max(64., 2**np.ceil(np.log(lnx * padfac) / np.log(2))) ordery = max(64., 2**np.ceil(np.log(lny * padfac) / np.log(2))) if padding[0]: padx = orderx - lnx else: padx = 0 if padding[1]: pady = ordery - lny else: pady = 0 # Apply a Fourier filter before projecting the sinogram slices. # Resize image to next power of two for fourier analysis # Reduces artifacts padyl = np.int(np.ceil(pady / 2)) padyr = np.int(pady - padyl) padxl = np.int(np.ceil(padx / 2)) padxr = np.int(padx - padyl) #TODO: This padding takes up a lot of memory. Move it to a separate # for loop or to the main for-loop. if padval is None: sino = np.pad(sinogram, ((0, 0), (padyl, padyr), (padxl, padxr)), mode="edge") if verbose > 0: print("......Padding with edge values.") else: sino = np.pad(sinogram, ((0, 0), (padyl, padyr), (padxl, padxr)), mode="linear_ramp", end_values=(padval,)) if verbose > 0: print("......Verifying padding value: {}".format(padval)) # save memory del sinogram if verbose > 0: print("......Image size (x,y): {}x{}, padded: {}x{}".format( lnx, lny, sino.shape[2], sino.shape[1])) # zero-padded length of sinogram. (lA, lNy, lNx) = sino.shape # @UnusedVariable lNz = ln # Ask for the filter. Do not include zero (first element). # # Integrals over ϕ₀ [0,2π]; kx [-kₘ,kₘ] # - double coverage factor 1/2 already included # - unitary angular frequency to unitary ordinary frequency # conversion performed in calculation of UB=FT(uB). # # f(r) = -i kₘ / ((2π)² a₀) (prefactor) # * iiint dϕ₀ dkx dky (prefactor) # * |kx| (prefactor) # * exp(-i kₘ M lD ) (prefactor) # * UBϕ₀(kx) (dependent on ϕ₀) # * exp( i (kx t⊥ + kₘ (M - 1) s₀) r ) (dependent on ϕ₀ and r) # (r and s₀ are vectors. The last term contains a dot-product) # # kₘM = sqrt( kₘ² - kx² - ky² ) # t⊥ = ( cos(ϕ₀), ky/kx, sin(ϕ₀) ) # s₀ = ( -sin(ϕ₀), 0 , cos(ϕ₀) ) # # The filter can be split into two parts # # 1) part without dependence on the z-coordinate # # -i kₘ / ((2π)² a₀) # * iiint dϕ₀ dkx dky # * |kx| # * exp(-i kₘ M lD ) # # 2) part with dependence of the z-coordinate # # exp( i (kx t⊥ + kₘ (M - 1) s₀) r ) # # The filter (1) can be performed using the classical filter process # as in the backprojection algorithm. # # # Corresponding sample frequencies fx = np.fft.fftfreq(lNx) # 1D array fy = np.fft.fftfreq(lNy) # 1D array # kx is a 1D array. kx = 2 * np.pi * fx ky = 2 * np.pi * fy # Differentials for integral dphi0 = 2 * np.pi / A # We will later multiply with phi0. # a, y, x kx = kx.reshape(1, 1, -1) ky = ky.reshape(1, -1, 1) # Low-pass filter: # less-than-or-equal would give us zero division error. filter_klp = (kx**2 + ky**2 < km**2) # Filter M so there are no nans from the root M = 1. / km * np.sqrt((km**2 - kx**2 - ky**2) * filter_klp) prefactor = -1j * km / (2 * np.pi) prefactor *= dphi0 # Also filter the prefactor, so nothing outside the required # low-pass contributes to the sum. prefactor *= np.abs(kx) * filter_klp #prefactor *= np.sqrt(((kx**2+ky**2)) * filter_klp ) # new in version 0.1.4: # We multiply by the factor (M-1) instead of just (M) # to take into account that we have a scattered # wave that is normalized by u0. prefactor *= np.exp(-1j * km * (M-1) * lD) # Perform filtering of the sinogram, # save memory by in-place operations #projection = np.fft.fft2(sino, axes=(-1,-2)) * prefactor # FFTW-flag is "estimate": # specifies that, instead of actual measurements of different # algorithms, a simple heuristic is used to pick a (probably # sub-optimal) plan quickly. With this flag, the input/output # arrays are not overwritten during planning. # Byte-aligned arrays temp_array = pyfftw.n_byte_align_empty(sino[0].shape, 16, dtype_complex) myfftw_plan = pyfftw.FFTW(temp_array, temp_array, threads=num_cores, flags=["FFTW_ESTIMATE"], axes=(0,1)) if jmc is not None: jmc.value += 1 for p in range(len(sino)): # this overwrites sino temp_array[:] = sino[p, :, :] myfftw_plan.execute() sino[p, :, :] = temp_array[:] temp_array, myfftw_plan projection = sino # - normalize to (lNx * lNy) for FFTW projection[:] *= prefactor / (lNx * lNy) # save memory del prefactor, filter_klp # # # filter (2) must be applied before rotation as well # exp( i (kx t⊥ + kₘ (M - 1) s₀) r ) # # kₘM = sqrt( kₘ² - kx² - ky² ) # t⊥ = ( cos(ϕ₀), ky/kx, sin(ϕ₀) ) # s₀ = ( -sin(ϕ₀), 0 , cos(ϕ₀) ) # # This filter is effectively an inverse Fourier transform # # exp(i kx xD) exp(i ky yD) exp(i kₘ (M - 1) zD ) # # xD = x cos(ϕ₀) + z sin(ϕ₀) # zD = - x sin(ϕ₀) + z cos(ϕ₀) # Everything is in pixels center = lNz / 2.0 z = np.linspace(-center, center, lNz, endpoint=False) zv = z.reshape(-1, 1, 1) # z, y, x Mp = M.reshape(lNy, lNx) # filter2 = np.exp(1j * zv * km * (Mp - 1)) f2_exp_fac = 1j* km * (Mp - 1) if save_memory: # compute filter2 later pass else: # compute filter2 now filter2 = ne.evaluate("exp(factor * zv)", local_dict={"factor": f2_exp_fac, "zv":zv}) # occupies some amount of ram, but yields faster # computation later #filter2[0].size*len(filter2)*128/8/1024**3 if jmc is not None: jmc.value += 1 # a, z, y, x #projection = projection.reshape(la, 1, lNy, lNx) projection = projection.reshape(la, lNy, lNx) # This frees comparatively few data del M #del Mp # Prepare complex output image if onlyreal: outarr = np.zeros((ln, lny, lnx), dtype=dtype) else: outarr = np.zeros((ln, lny, lnx), dtype=dtype_complex) # Create plan for fftw: inarr = pyfftw.n_byte_align_empty((lNy, lNx), 16, dtype_complex) #inarr[:] = (projection[0]*filter2)[0,:,:] # plan is "patient": # FFTW_PATIENT is like FFTW_MEASURE, but considers a wider range # of algorithms and often produces a “more optimal” plan # (especially for large transforms), but at the expense of # several times longer planning time (especially for large # transforms). # print(inarr.flags) myifftw_plan = pyfftw.FFTW(inarr, inarr, threads=num_cores, axes=(0,1), direction="FFTW_BACKWARD", flags=["FFTW_MEASURE"]) #assert shared_array.base.base is shared_array_base.get_obj() shared_array_base = mp.Array(ct_dt_map[dtype], ln * lny * lnx) _shared_array = np.ctypeslib.as_array(shared_array_base.get_obj()) _shared_array = _shared_array.reshape(ln, lny, lnx) # Initialize the pool with the shared array odtbrain._shared_array = _shared_array pool4loop = mp.Pool(processes=num_cores) # filtered projections in loop filtered_proj = np.zeros((ln, lny, lnx), dtype=dtype_complex) for aa in np.arange(A): # 14x Speedup with fftw3 compared to numpy fft and # memory reduction by a factor of 2! # ifft will be computed in-place # A == la # projection.shape == (A, lNx, lNy) # filter2.shape == (ln, lNx, lNy) for p in range(len(zv)): if save_memory: # compute filter2 here; # this is comparatively slower than the other case ne.evaluate("exp(factor * zvp) * projectioni", local_dict={"zvp":zv[p], "projectioni":projection[aa], "factor":f2_exp_fac}, out=inarr) else: # use universal functions np.multiply(filter2[p], projection[aa], out=inarr) myifftw_plan.execute() filtered_proj[p, :, :] = inarr[ padyl:padyl + lny, padxl:padxl + lnx ] # resize image to original size # The copy is necessary to prevent memory leakage. # The fftw did not normalize the data. #_shared_array[:] = sino_filtered.real[:ln, :lny, :lnx] / (lNx * lNy) # By performing the "/" operation here, we magically use less # memory and we gain speed... _shared_array[:] = filtered_proj.real #_shared_array[:] = sino_filtered.real[ :ln, padyl:padyl + lny, padxl:padxl + lnx] / (lNx * lNy) phi0 = np.rad2deg(angles[aa]) if not onlyreal: filtered_proj_imag = filtered_proj.imag _mprotate(phi0, lny, pool4loop, intp_order) outarr.real += _shared_array if not onlyreal: _shared_array[:] = filtered_proj_imag #_shared_array[:] = sino_filtered_imag[ # :ln, :lny, :lnx] / (lNx * lNy) del filtered_proj_imag _mprotate(phi0, lny, pool4loop, intp_order) outarr.imag += _shared_array if jmc is not None: jmc.value += 1 pool4loop.terminate() pool4loop.join() del _shared_array, inarr, odtbrain._shared_array del shared_array_base gc.collect() return outarr
def bayesian_blocks(tt, ttstart, ttstop, p0, bkg_integral_distribution=None): """ Divide a series of events characterized by their arrival time in blocks of perceptibly constant count rate. If the background integral distribution is given, divide the series in blocks where the difference with respect to the background is perceptibly constant. :param tt: arrival times of the events :param ttstart: the start of the interval :param ttstop: the stop of the interval :param p0: the false positive probability. This is used to decide the penalization on the likelihood, so this parameter affects the number of blocks :param bkg_integral_distribution: (default: None) If given, the algorithm account for the presence of the background and finds changes in rate with respect to the background :return: the np.array containing the edges of the blocks """ # Verify that the input array is one-dimensional tt = np.asarray(tt, dtype=float) assert tt.ndim == 1 if bkg_integral_distribution is not None: # Transforming the inhomogeneous Poisson process into an homogeneous one with rate 1, # by changing the time axis according to the background rate logger.debug("Transforming the inhomogeneous Poisson process to a homogeneous one with rate 1...") t = np.array(bkg_integral_distribution(tt)) logger.debug("done") # Now compute the start and stop time in the new system tstart = bkg_integral_distribution(ttstart) tstop = bkg_integral_distribution(ttstop) else: t = tt tstart = ttstart tstop = ttstop # Create initial cell edges (Voronoi tessellation) edges = np.concatenate([[t[0]], 0.5 * (t[1:] + t[:-1]), [t[-1]]]) # Create the edges also in the original time system edges_ = np.concatenate([[tt[0]], 0.5 * (tt[1:] + tt[:-1]), [tt[-1]]]) # Create a lookup table to be able to transform back from the transformed system # to the original one lookup_table = {key: value for (key, value) in zip(edges, edges_)} # The last block length is 0 by definition block_length = tstop - edges if np.sum((block_length <= 0)) > 1: raise RuntimeError("Events appears to be out of order! Check for order, or duplicated events.") N = t.shape[0] # arrays to store the best configuration best = np.zeros(N, dtype=float) last = np.zeros(N, dtype=int) # eq. 21 from Scargle 2012 prior = 4 - np.log(73.53 * p0 * (N**-0.478)) logger.debug("Finding blocks...") # This is where the computation happens. Following Scargle et al. 2012. # This loop has been optimized for speed: # * the expression for the fitness function has been rewritten to # avoid multiple log computations, and to avoid power computations # * the use of scipy.weave and numexpr has been evaluated. The latter # gives a big gain (~40%) if used for the fitness function. No other # gain is obtained by using it anywhere else # Set numexpr precision to low (more than enough for us), which is # faster than high oldaccuracy = numexpr.set_vml_accuracy_mode('low') numexpr.set_num_threads(1) numexpr.set_vml_num_threads(1) # Speed tricks: resolve once for all the functions which will be used # in the loop numexpr_evaluate = numexpr.evaluate numexpr_re_evaluate = numexpr.re_evaluate # Pre-compute this aranges = np.arange(N+1, 0, -1) for R in range(N): br = block_length[R + 1] T_k = block_length[:R + 1] - br # this looks like it is not used, but it actually is, # inside the numexpr expression # N_k: number of elements in each block # This expression has been simplified for the case of # unbinned events (i.e., one element in each block) # It was: #N_k = cumsum(x[:R + 1][::-1])[::-1] # Now it is: N_k = aranges[N - R:] # where aranges has been pre-computed # Evaluate fitness function # This is the slowest part, which I'm speeding up by using # numexpr. It provides a ~40% gain in execution speed. # The first time we need to "compile" the expression in numexpr, # all the other times we can reuse it if R == 0: fit_vec = numexpr_evaluate('''N_k * log(N_k/ T_k) ''', optimization='aggressive', local_dict={'N_k': N_k, 'T_k': T_k}) else: fit_vec = numexpr_re_evaluate(local_dict={'N_k': N_k, 'T_k': T_k}) A_R = fit_vec - prior # type: np.ndarray A_R[1:] += best[:R] i_max = A_R.argmax() last[R] = i_max best[R] = A_R[i_max] numexpr.set_vml_accuracy_mode(oldaccuracy) logger.debug("Done\n") # Now peel off and find the blocks (see the algorithm in Scargle et al.) change_points = np.zeros(N, dtype=int) i_cp = N ind = N while True: i_cp -= 1 change_points[i_cp] = ind if ind == 0: break ind = last[ind - 1] change_points = change_points[i_cp:] edg = edges[change_points] # Transform the found edges back into the original time system if (bkg_integral_distribution is not None): final_edges = map(lambda x: lookup_table[x], edg) else: final_edges = edg # Now fix the first and last edge so that they are tstart and tstop final_edges[0] = ttstart final_edges[-1] = ttstop return np.asarray(final_edges)
from scipy.optimize import leastsq from numpy import sin, pi #from scipy.io import wavfile MEASUREMENT_TIMEFRAME = 1 #second BUFFERMAXSIZE = 10 #seconds LOG_SIZE = 100 #measurments MEASUREMENTS_FILE = "measurements.csv" INFORMAT = alsaaudio.PCM_FORMAT_FLOAT_LE INPUT_CHANNEL=2 CHANNELS = 1 RATE = 24000 FRAMESIZE = 512 ne.set_num_threads(3) SANITY_MAX_FREQUENCYCHANGE = 0.03 SANITY_UPPER_BOUND = 50.4 SANITY_LOWER_BOUND = 49.6 # A multithreading compatible buffer. Tuned for maximum write_in performance #According to #https://stackoverflow.com/questions/7133885/fastest-way-to-grow-a-numpy-numeric-array # appending to python arrays is way faster than appending to numpy arrays. class Buffer(): def __init__(self, minSize, maxSize): self.data = array.array('f')
def setUp(self): numexpr.set_num_threads(self.nthreads)
def __exit__(self, exc_type, exc_value, traceback): ne.set_num_threads(self.oldn)
import telescope import receiver from .. import constants from ..constants import DTOR, RTOD from .. import float_type from ..util import tools, math, fits, hdf, time from ..util.tools import struct from ..data import c_interface from copy import deepcopy from mpl_toolkits.axes_grid1 import make_axes_locatable """ DTOR = np.pi/180. RTOD = 180./np.pi ne.set_num_threads(int(os.environ.get('OMP_NUM_THREADS',4))) # Don't use all the processors! tqu=['T','Q','U'] teb=['T','E','B'] #pol_index = {'T':0, 'Q':1, 'U':2, 'I':0, 'V':3, 'T':0, 'E':1, 'B':2, 'I':0} # For translating letters into array indices proj_name_to_index = {'Sanson-Flamsteed':0, 'CAR':1, 'SIN':2, 'Healpix':3, 'Sterographic':4, 'Lambert':5, 'CAR00':7, 'rot00':8, 'BICEP':9} __proj_name_to_index = dict( [(key.lower(), value) for key, value in proj_name_to_index.iteritems()]) # Lower-case version for comparisons. # Projections tested in testAllProj: proj_index_in_idl = [0,1,2,4,5] proj_to_test = [0,1,2,4,5,7] ########################################################################################## def read(filename, file_type=None, verbose=True): """
Created on Thu Jul 3 14:25:12 2014 @author: sbramlett """ import os import sys import time import io import profile import numexpr as ne sys.path.append("/home/sbramlett/workspace/PythonPWA/bdemello/pythonPWA/pythonPWA/pythonPWA") from fileHandlers.gampReader import gampReader ne.set_num_threads(4) class gampSlist(object): def __init__(self, indir, gfile): self.indir = indir self.gfile = gfile print time.time() igreader=gampReader(gampFile = open(os.path.join(indir,gfile),'r')) self.events = igreader.readGamp() print time.time() self.eventslist = [] def generate(self): for event in self.events: for particles in event.particles: if particles.particleID == "14": #proton prE = float(particles.particleE) prpx = float(particles.particleXMomentum)
N = 10*1000*1000 # the number of points to compute expression x = np.linspace(-1, 1, N) # the x in range [-1, 1] #what = "numpy" # uses numpy for computations what = "numexpr" # uses numexpr for computations def compute(): """Compute the polynomial.""" if what == "numpy": y = eval(expr) else: y = ne.evaluate(expr) return len(y) if __name__ == '__main__': if len(sys.argv) > 1: # first arg is the package to use what = sys.argv[1] if len(sys.argv) > 2: # second arg is the number of threads to use nthreads = int(sys.argv[2]) if "ncores" in dir(ne): ne.set_num_threads(nthreads) if what not in ("numpy", "numexpr"): print "Unrecognized module:", what sys.exit(0) print "Computing: '%s' using %s with %d points" % (expr, what, N) t0 = time() result = compute() ts = round(time() - t0, 3) print "*** Time elapsed:", ts
def __enter__(self): self.oldn = ne.set_num_threads(self.n)