def check_mask(self, expected, result): # There's no guarantee that TBB will use a full mask worth of # threads if it deems it inefficient to do so if threading_layer() == 'tbb': self.assertTrue(np.all(result <= expected)) elif threading_layer() in ('omp', 'workqueue'): np.testing.assert_equal(expected, result) else: assert 0, 'unreachable'
def numba_environment() -> Dict[str, Any]: """return information about the numba setup used Returns: (dict) information about the numba setup """ # determine whether Nvidia Cuda is available try: from numba import cuda cuda_available = cuda.is_available() except ImportError: cuda_available = False # determine whether AMD ROC is available try: from numba import roc roc_available = roc.is_available() except ImportError: roc_available = False # determine threading layer try: threading_layer = nb.threading_layer() except ValueError: # threading layer was not initialized, so compile a mock function @nb.jit("i8()", parallel=True) def f(): s = 0 for i in nb.prange(4): s += i return s f() try: threading_layer = nb.threading_layer() except ValueError: # cannot initialize threading threading_layer = None except AttributeError: # old numba version threading_layer = None return { "version": nb.__version__, "parallel": NUMBA_PARALLEL, "fastmath": NUMBA_FASTMATH, "debug": NUMBA_DEBUG, "using_svml": nb.config.USING_SVML, "threading_layer": threading_layer, "omp_num_threads": os.environ.get("OMP_NUM_THREADS"), "mkl_num_threads": os.environ.get("MKL_NUM_THREADS"), "num_threads": nb.config.NUMBA_NUM_THREADS, "num_threads_default": nb.config.NUMBA_DEFAULT_NUM_THREADS, "cuda_available": cuda_available, "roc_available": roc_available, }
def numba_environment() -> Dict[str, Any]: """ return information about the numba setup used Returns: (dict) information about the numba setup """ # determine whether Nvidia Cuda is available try: from numba import cuda cuda_available = cuda.is_available() except ImportError: cuda_available = False # determine whether AMD ROC is available try: from numba import roc roc_available = roc.is_available() except ImportError: roc_available = False # determine threading layer try: threading_layer = nb.threading_layer() except ValueError: # threading layer was not initialized, so compile a mock function @nb.jit('i8()', parallel=True) def f(): s = 0 for i in nb.prange(4): s += i return s f() try: threading_layer = nb.threading_layer() except ValueError: # cannot initialize threading threading_layer = None except AttributeError: # old numba version threading_layer = None return { 'version': nb.__version__, 'parallel': NUMBA_PARALLEL, 'fastmath': NUMBA_FASTMATH, 'debug': NUMBA_DEBUG, 'using_svml': nb.config.USING_SVML, 'threading_layer': threading_layer, 'omp_num_threads': os.environ.get('OMP_NUM_THREADS'), 'mkl_num_threads': os.environ.get('MKL_NUM_THREADS'), 'num_threads': nb.config.NUMBA_NUM_THREADS, 'num_threads_default': nb.config.NUMBA_DEFAULT_NUM_THREADS, 'cuda_available': cuda_available, 'roc_available': roc_available }
def __init__(self, *, options: Options, n_dims: (int, None) = None, non_unit_g_factor: bool = False, grid: (tuple, None) = None, n_threads: (int, None) = None): self.options = options if n_dims is not None and grid is not None: raise ValueError() if n_dims is None and grid is None: raise ValueError() if grid is None: grid = tuple([-1] * n_dims) if n_dims is None: n_dims = len(grid) if n_threads is None: n_threads = numba.get_num_threads() self.n_threads = 1 if n_dims == 1 else n_threads if self.n_threads > 1 and numba.threading_layer() == 'workqueue': warnings.warn( "Numba is using the ``workqueue'' threading layer, switch" " to ``omp'' or ``tbb'' for higher parallel performance" " (see https://numba.pydata.org/numba-doc/latest/user/threading-layer.html)" ) self.n_dims = n_dims self.__call = make_step_impl(options, non_unit_g_factor, grid, self.n_threads)
def test_numba_info(): ni = d.numba_info() if numba.config.DISABLE_JIT: assert ni is None else: assert ni is not None assert ni.threading == numba.threading_layer() assert ni.threads == numba.get_num_threads()
def _test_nested_parallelism_3(self): if threading_layer() == 'workqueue': self.skipTest("workqueue is not threadsafe") # check that the right number of threads are present in nesting # this relies on there being a load of cores present BIG = 1000000 @njit(parallel=True) def work(local_nt): # arg is value 3 tid = np.zeros(BIG) acc = 0 set_num_threads(local_nt) # set to 3 threads for i in prange(BIG): acc += 1 tid[i] = _get_thread_id() return acc, np.unique(tid) @njit(parallel=True) def test_func_jit(nthreads): set_num_threads(nthreads) # set to 2 threads lens = np.zeros(nthreads) total = 0 for i in prange(nthreads): my_acc, tids = work(nthreads + 1) # call with value 3 lens[i] = len(tids) total += my_acc return total, np.unique(lens) NT = 2 expected_acc = BIG * NT expected_thread_count = NT + 1 got_acc, got_tc = test_func_jit(NT) self.assertEqual(expected_acc, got_acc) self.check_mask(expected_thread_count, got_tc) def test_guvectorize(nthreads): @guvectorize(['int64[:], int64[:]'], '(n), (n)', nopython=True, target='parallel') def test_func_guvectorize(total, lens): my_acc, tids = work(nthreads + 1) lens[0] = len(tids) total[0] += my_acc total = np.zeros((nthreads, 1), dtype=np.int64) lens = np.zeros(nthreads, dtype=np.int64).reshape((nthreads, 1)) test_func_guvectorize(total, lens) # vectorize does not reduce, so total is summed return total.sum(), np.unique(lens) got_acc, got_tc = test_guvectorize(NT) self.assertEqual(expected_acc, got_acc) self.check_mask(expected_thread_count, got_tc)
def time_script(): res_file_name_quad = 'quad.csv' # output data file name res_file_name_total= 'total.csv' # output data file name max_input_size_mag = 6 # max number of input point (power of 10) num_points = 200 # number of runs collected trial = 5 # For each run, the number of trials run. num_device = 1 # number of GPUs used config.THREADING_LAYER = 'threadsafe' set_num_threads(12) # numba: number of concurrent CPU threads print("Threading layer chosen: %s" % threading_layer()) ## Header: # [num input, cpu_result (ms), gpu_result (ms)] result_quad = np.zeros((num_points, 4)) result_total= np.zeros((num_points, 4)) this_result_cpu_total = np.zeros(trial) this_result_gpu_total = np.zeros(trial) this_result_cpu_quad = np.zeros(trial) this_result_gpu_quad = np.zeros(trial) # generate a set of input data size, linear in log space between 1 and maximum for idx, in_size in enumerate(np.logspace(1, max_input_size_mag, num=num_points)): # for idx, in_size in enumerate(np.linspace(1e5, 1e6, num_points)): result_quad[idx, 0] = idx result_quad[idx, 1] = int(in_size) result_total[idx, 0] = idx result_total[idx, 1] = int(in_size) state, spacing = initialize(int(in_size)) # output from GPU for i in range(0, trial, 1): # GPU time rhs, total_time, quad_time = single_advance_gpu(state, int(in_size), spacing) this_result_gpu_total[i] = total_time this_result_gpu_quad[i] = quad_time # numba time: rhs, total_time, quad_time = single_advance_cpu(state, int(in_size), spacing) this_result_cpu_total[i] = total_time this_result_cpu_quad[i] = quad_time result_quad[idx, 2] = np.min(this_result_cpu_quad) result_quad[idx, 3] = np.min(this_result_gpu_quad) result_total[idx, 2] = np.min(this_result_cpu_total) result_total[idx, 3] = np.min(this_result_gpu_total) print("[{}/{}] running on {} inputs, CPU: {:4f}, GPU: {:4f}".format( idx, num_points, int(in_size), result_quad[idx, 2], result_quad[idx, 3])) np.savetxt(res_file_name_quad, result_quad, delimiter=',') np.savetxt(res_file_name_total, result_total, delimiter=',')
def numba_info(): x = _par_test(100) _log.debug('sum: %d', x) try: layer = numba.threading_layer() except ValueError: _log.info('Numba threading not initialized') return None _log.info('numba threading layer: %s', layer) nth = numba.get_num_threads() return NumbaInfo(layer, nth)
def check_threading_layer(): """ Check which numba threading_layer is active, and warn if it is "workqueue". """ _dummy_numba(np.ones(1)) try: if threading_layer() == "workqueue": warn( 'Using `numba.threading_layer()=="workqueue"` can be devastatingly slow! See https://numba.pydata.org/numba-doc/latest/user/threading-layer.html for alternatives.', SliseWarning, ) except ValueError as e: warn(f"Numba: {e}", SliseWarning)
def time_script(): res_file_name = 'chyqmom4_res_2.csv' # output data file name max_input_size_mag = 6 # max number of input point (power of 10) num_points = 200 # number of runs collected trial = 5 # For each run, the number of trials run. num_device = 1 # number of GPUs used config.THREADING_LAYER = 'threadsafe' set_num_threads(12) # numba: number of concurrent CPU threads print("Threading layer chosen: %s" % threading_layer()) ## Header: # [num input, cpu_result (ms), gpu_result (ms)] result = np.zeros((num_points, 4)) this_result_cpu = np.zeros(trial) this_result_gpu = np.zeros(trial) # generate a set of input data size, linear in log space between 1 and maximum for idx, in_size in enumerate(np.logspace(1, max_input_size_mag, num=num_points)): # for idx, in_size in enumerate(np.linspace(1e5, 1e6, num_points)): result[idx, 0] = idx result[idx, 1] = int(in_size) this_moment = init_moment_27(int(in_size)) # output from GPU for i in range(0, trial, 1): # GPU time try: this_result_gpu[i], w, x,y,z = chyqmom27(this_moment, int(in_size)) except: pass # chyqmom27(this_moment, int(in_size)) # numba time: start_time = time.perf_counter() chyqmom27_cpu(this_moment.transpose(), int(in_size)) stop_time = time.perf_counter() this_result_cpu[i] = (stop_time - start_time) * 1e3 #ms w.free() x.free() y.free() z.free() result[idx, 1] = np.min(this_result_cpu) result[idx, 2] = np.min(this_result_gpu) print("[{}/{}] running on {} inputs, CPU: {:4f}, GPU: {:4f}".format( idx, num_points, int(in_size), result[idx, 1], result[idx, 2])) np.savetxt(res_file_name, result, delimiter=',')
def _test_nested_parallelism_1(self): if threading_layer() == 'workqueue': self.skipTest("workqueue is not threadsafe") # check that get_num_threads is ok in nesting mask = config.NUMBA_NUM_THREADS - 1 N = config.NUMBA_NUM_THREADS M = 2 * config.NUMBA_NUM_THREADS @njit(parallel=True) def child_func(buf, fid): M, N = buf.shape for i in prange(N): buf[fid, i] = get_num_threads() def get_test(test_type): if test_type == 'njit': def test_func(nthreads, py_func=False): @njit(parallel=True) def _test_func(nthreads): acc = 0 buf = np.zeros((M, N)) set_num_threads(nthreads) for i in prange(M): local_mask = 1 + i % mask # set threads in parent function set_num_threads(local_mask) if local_mask < N: child_func(buf, local_mask) acc += get_num_threads() return acc, buf if py_func: return _test_func.py_func(nthreads) else: return _test_func(nthreads) elif test_type == 'guvectorize': def test_func(nthreads, py_func=False): def _test_func(acc, buf, local_mask): set_num_threads(nthreads) # set threads in parent function set_num_threads(local_mask[0]) if local_mask[0] < N: child_func(buf, local_mask[0]) acc[0] += get_num_threads() buf = np.zeros((M, N), dtype=np.int64) acc = np.zeros((M, 1), dtype=np.int64) local_mask = (1 + np.arange(M) % mask).reshape((M, 1)) sig = ['void(int64[:], int64[:, :], int64[:])'] layout = '(p), (n, m), (p)' if not py_func: _test_func = guvectorize(sig, layout, nopython=True, target='parallel')(_test_func) else: _test_func = guvectorize(sig, layout, forceobj=True)(_test_func) _test_func(acc, buf, local_mask) return acc, buf return test_func for test_type in ['njit', 'guvectorize']: test_func = get_test(test_type) got_acc, got_arr = test_func(mask) exp_acc, exp_arr = test_func(mask, py_func=True) np.testing.assert_equal(exp_acc, got_acc) np.testing.assert_equal(exp_arr, got_arr) # check the maths reconciles, guvectorize does not reduce, njit does math_acc_exp = 1 + np.arange(M) % mask if test_type == 'guvectorize': math_acc = math_acc_exp.reshape((M, 1)) else: math_acc = np.sum(math_acc_exp) np.testing.assert_equal(math_acc, got_acc) math_arr = np.zeros((M, N)) for i in range(1, N): # there's branches on 1, ..., num_threads - 1 math_arr[i, :] = i np.testing.assert_equal(math_arr, got_arr)
tic = time() pygbm_model = GradientBoostingClassifier(loss='binary_crossentropy', learning_rate=lr, max_iter=n_trees, max_bins=max_bins, max_leaf_nodes=n_leaf_nodes, random_state=0, scoring=None, verbose=1, validation_split=None) pygbm_model.fit(data_train, target_train) toc = time() predicted_test = pygbm_model.predict(data_test) roc_auc = roc_auc_score(target_test, predicted_test) acc = accuracy_score(target_test, predicted_test) print(f"done in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}") if hasattr(numba, 'threading_layer'): print("Threading layer chosen: %s" % numba.threading_layer()) if not args.no_lightgbm: print("Fitting a LightGBM model...") tic = time() lightgbm_model = get_lightgbm_estimator(pygbm_model) lightgbm_model.fit(data_train, target_train) toc = time() predicted_test = lightgbm_model.predict(data_test) roc_auc = roc_auc_score(target_test, predicted_test) acc = accuracy_score(target_test, predicted_test) print(f"done in {toc - tic:.3f}s, ROC AUC: {roc_auc:.4f}, ACC: {acc :.4f}")
rhof = p / (pc_r_d * t * (1.0 + pc_rvd_o * qv - qc - qi)) return rhof if __name__ == '__main__': shapes = [128 * 128 * 80] t = np.ones(shapes, dtype=np.float64) p = np.ones(shapes, dtype=np.float64) qv = np.ones(shapes, dtype=np.float64) qc = np.ones(shapes, dtype=np.float64) qi = np.ones(shapes, dtype=np.float64) rhof = np.ones(shapes, dtype=np.float64) start = time.time() rhof = rho(t, p, qv, qc, qi) # rho.parallel_diagnostics(level=4) end = time.time() print("Elapsed (with compilation) = %s" % (end - start)) times = np.empty(100) for count in range(100): start = time.time() rho(t, p, qv, qc, qi) end = time.time() times[count] = (end - start) print(times) print("Elapsed time = {0}, {1}".format(np.mean(times), np.std(times))) print("Threading layer chosen: %s" % threading_layer()) print("Num threads: %s" % numba.get_num_threads())
def _test_nested_parallelism_2(self): if threading_layer() == 'workqueue': self.skipTest("workqueue is not threadsafe") # check that get_num_threads is ok in nesting N = config.NUMBA_NUM_THREADS + 1 M = 4 * config.NUMBA_NUM_THREADS + 1 def get_impl(child_type, test_type): if child_type == 'parallel': child_dec = njit(parallel=True) elif child_type == 'njit': child_dec = njit(parallel=False) elif child_type == 'none': def child_dec(x): return x @child_dec def child(buf, fid): M, N = buf.shape set_num_threads(fid) # set threads in child function for i in prange(N): buf[fid, i] = get_num_threads() if test_type in ['parallel', 'njit', 'none']: if test_type == 'parallel': test_dec = njit(parallel=True) elif test_type == 'njit': test_dec = njit(parallel=False) elif test_type == 'none': def test_dec(x): return x @test_dec def test_func(nthreads): buf = np.zeros((M, N)) set_num_threads(nthreads) for i in prange(M): local_mask = 1 + i % mask # when the threads exit the child functions they should # have a TLS slot value of the local mask as it was set # in child if local_mask < config.NUMBA_NUM_THREADS: child(buf, local_mask) assert get_num_threads() == local_mask return buf else: if test_type == 'guvectorize': test_dec = guvectorize(['int64[:,:], int64[:]'], '(n, m), (k)', nopython=True, target='parallel') elif test_type == 'guvectorize-obj': test_dec = guvectorize(['int64[:,:], int64[:]'], '(n, m), (k)', forceobj=True) def test_func(nthreads): @test_dec def _test_func(buf, local_mask): set_num_threads(nthreads) # when the threads exit the child functions they should # have a TLS slot value of the local mask as it was set # in child if local_mask[0] < config.NUMBA_NUM_THREADS: child(buf, local_mask[0]) assert get_num_threads() == local_mask[0] buf = np.zeros((M, N), dtype=np.int64) local_mask = (1 + np.arange(M) % mask).reshape((M, 1)) _test_func(buf, local_mask) return buf return test_func mask = config.NUMBA_NUM_THREADS - 1 res_arrays = {} for test_type in [ 'parallel', 'njit', 'none', 'guvectorize', 'guvectorize-obj' ]: for child_type in ['parallel', 'njit', 'none']: if child_type == 'none' and test_type != 'none': continue set_num_threads(mask) res_arrays[test_type, child_type] = get_impl(child_type, test_type)(mask) py_arr = res_arrays['none', 'none'] for arr in res_arrays.values(): np.testing.assert_equal(arr, py_arr) # check the maths reconciles math_arr = np.zeros((M, N)) # there's branches on modulo mask but only NUMBA_NUM_THREADS funcs for i in range(1, config.NUMBA_NUM_THREADS): math_arr[i, :] = i np.testing.assert_equal(math_arr, py_arr)
def set_numba_threading(): """Set the numba threading layer. For parallel numba jit blocks, the backend threading layer is selected at runtime based on an order set inside the numba package. We would like to change the order of selection to prefer one of the thread-based backends (omp or tbb). We also set the maximum number of threads used by numba to be the same as the number of threads used by TOAST. Since TOAST does not use numba, it means that there will be a consistent maximum number of threads in use at all times and no oversubscription. Args: None Returns: None """ global numba_threading_layer if numba_threading_layer is not None: # Already set. return # Get the number of threads used by TOAST at runtime. env = Environment.get() log = Logger.get() toastthreads = env.max_threads() print("max toast threads = ", toastthreads, flush=True) rank = 0 if env.use_mpi4py(): from .mpi import MPI rank = MPI.COMM_WORLD.rank threading = "default" have_numba_omp = False try: # New style package layout from numba.np.ufunc import omppool have_numba_omp = True if rank == 0: log.debug("Numba has OpenMP threading support") except ImportError: try: # Old style from numba.npyufunc import omppool have_numba_omp = True if rank == 0: log.debug("Numba has OpenMP threading support") except ImportError: # no OpenMP support if rank == 0: log.debug("Numba does not support OpenMP") have_numba_tbb = False try: # New style package layout from numba.np.ufunc import tbbpool have_numba_tbb = True if rank == 0: log.debug("Numba has TBB threading support") except ImportError: try: # Old style from numba.npyufunc import tbbpool have_numba_tbb = True if rank == 0: log.debug("Numba has TBB threading support") except ImportError: # no TBB if rank == 0: log.debug("Numba does not support TBB") # Prefer OMP backend if have_numba_omp: threading = "omp" elif have_numba_tbb: threading = "tbb" try: from numba import vectorize, config, threading_layer # Set threading layer and number of threads. Note that this still # does not always work. The conf structure is repopulated from the # environment on every compilation if any of the NUMBA_* variables # have changed. config.THREADING_LAYER = threading config.NUMBA_DEFAULT_NUM_THREADS = toastthreads config.NUMBA_NUM_THREADS = toastthreads os.environ["NUMBA_THREADING_LAYER"] = threading os.environ["NUMBA_DEFAULT_NUM_THREADS"] = "{:d}".format(toastthreads) os.environ["NUMBA_NUM_THREADS"] = "{:d}".format(toastthreads) # In order to get numba to actually select a threading layer, we must # trigger compilation of a parallel function. @vectorize("float64(float64)", target="parallel") def force_thread_launch(x): return x + 1 force_thread_launch(np.zeros(1)) # Log the layer that was selected numba_threading_layer = threading_layer() if rank == 0: log.debug("Numba threading layer set to {}".format(numba_threading_layer)) log.debug( "Numba max threads now forced to {}".format(config.NUMBA_NUM_THREADS) ) except ImportError: # Numba not available at all if rank == 0: log.debug("Cannot import numba- ignoring threading layer.")
z = T * sig_sig_two c = 0.25 * z y = 1./sqrt(z) w1 = (a - b + c) * y w2 = (a - b - c) * y d1 = 0.5 + 0.5 * erf(w1) d2 = 0.5 + 0.5 * erf(w2) Se = exp(b) * S r = P * d1 - Se * d2 call [i] = r put [i] = r - P + Se @nb.guvectorize('(f8[::1],f8[::1],f8[::1],f8[:],f8[:],f8[::1],f8[::1])', '(a),(a),(a),(),()->(a),(a)', nopython=True, target="parallel") def black_scholes_numba_vec(price, strike, t, mr, sig_sig_two, call, put): black_scholes_jit( price, strike, t, mr[0], sig_sig_two[0], call, put) @nb.jit def black_scholes(nopt, price, strike, t, rate, vol, call, put): sig_sig_two = vol*vol*2 mr = -rate black_scholes_numba_vec(price.reshape((-1,512)), strike.reshape((-1,512)), t.reshape((-1,512)), mr, sig_sig_two, call.reshape((-1,512)), put.reshape((-1,512)).reshape((-1,512))) base_bs_erf.run("Numba@guvec-par-simd", black_scholes, pass_args=True) print("Threading layer:", nb.threading_layer())
) exit() #read input C, dim = Q3AP_instance.read_input(sys.argv[1]) moves = generateNhood(dim) tstart = time.time() #call once to trigger compilation (Warmup) sol = solution(np.arange(dim, dtype=np.int64), np.arange(dim, dtype=np.int64), 0) sol, nhood_evals = runILS(sol, 1) elapsed_time = time.time() - tstart print("Time (First LS/Compilation):\t", elapsed_time) print("Using Threading layer: %s\n" % threading_layer()) #initial solutions (random) sol = solution(np.random.permutation(dim), np.random.permutation(dim), 0) sol.cost = eval(sol) #nb iterations (ILS outer loop) ils_iter = 100 ils_iter = int(sys.argv[2]) tstart = time.time() sol, nhood_evals = runILS(sol, ils_iter) elapsed_time = time.time() - tstart print("Best Solution:\n\t", sol.perm1, sol.perm2, "\n\t Cost:\t", sol.cost) print('nhood-eval:\t', nhood_evals)
def main(nqubits, circuit_name, backend="custom", precision="double", nreps=1, nshots=None, transfer=False, fuse=False, device=None, accelerators=None, threadsafe=False, compile=False, get_branch=True, nlayers=None, gate_type=None, params={}, filename=None): """Runs circuit simulation benchmarks for different circuits. See benchmark documentation for a description of arguments. """ qibo.set_backend(backend) qibo.set_precision(precision) if device is not None: qibo.set_device(device) logs = BenchmarkLogger(filename) # Create log dict logs.append({ "nqubits": nqubits, "circuit_name": circuit_name, "threading": "", "backend": qibo.get_backend(), "precision": qibo.get_precision(), "device": qibo.get_device(), "accelerators": accelerators, "nshots": nshots, "transfer": transfer, "fuse": fuse, "compile": compile, }) if get_branch: logs[-1]["branch"] = get_active_branch_name() params = {k: v for k, v in params.items() if v is not None} kwargs = {"nqubits": nqubits, "circuit_name": circuit_name} if params: kwargs["params"] = params if nlayers is not None: kwargs["nlayers"] = nlayers if gate_type is not None: kwargs["gate_type"] = gate_type if accelerators is not None: kwargs["accelerators"] = accelerators logs[-1].update(kwargs) start_time = time.time() circuit = circuits.CircuitFactory(**kwargs) if nshots is not None: # add measurement gates circuit.add(qibo.gates.M(*range(nqubits))) if fuse: circuit = circuit.fuse() logs[-1]["creation_time"] = time.time() - start_time start_time = time.time() if compile: circuit.compile() # Try executing here so that compile time is not included # in the simulation time result = circuit(nshots=nshots) del (result) logs[-1]["compile_time"] = time.time() - start_time start_time = time.time() result = circuit(nshots=nshots) logs[-1]["dry_run_time"] = time.time() - start_time start_time = time.time() if transfer: result = result.numpy() logs[-1]["dry_run_transfer_time"] = time.time() - start_time del (result) simulation_times, transfer_times = [], [] for _ in range(nreps): start_time = time.time() result = circuit(nshots=nshots) simulation_times.append(time.time() - start_time) start_time = time.time() if transfer: result = result.numpy() transfer_times.append(time.time() - start_time) logs[-1]["dtype"] = str(result.dtype) if nshots is None: del (result) logs[-1]["simulation_times"] = simulation_times logs[-1]["transfer_times"] = transfer_times logs[-1]["simulation_times_mean"] = np.mean(simulation_times) logs[-1]["simulation_times_std"] = np.std(simulation_times) logs[-1]["transfer_times_mean"] = np.mean(transfer_times) logs[-1]["transfer_times_std"] = np.std(transfer_times) start_time = time.time() if nshots is not None: freqs = result.frequencies() logs[-1]["measurement_time"] = time.time() - start_time if logs[-1]["backend"] == "qibojit" and qibo.K.get_platform() == "numba": from numba import threading_layer logs[-1]["threading"] = threading_layer() print() print(logs) print() logs.dump()