def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() dev = af.get_device() print_func(dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(dev)
def main(): T = 1 nT = 20 * T R_first = 1000 R = 5000000 x0 = 0 # initial log stock price v0 = 0.087**2 # initial volatility r = math.log(1.0319) # risk-free rate rho = -0.82 # instantaneous correlation between Brownian motions sigmaV = 0.14 # variance of volatility kappa = 3.46 # mean reversion speed vBar = 0.008 # mean variance k = math.log(0.95) # strike price # first run ( x, v ) = simulateHestonModel( T, nT, R_first, r, kappa, vBar, sigmaV, rho, x0, v0 ) # Price plain vanilla call option tic = time.time() ( x, v ) = simulateHestonModel( T, nT, R, r, kappa, vBar, sigmaV, rho, x0, v0 ) af.sync() toc = time.time() - tic K = math.exp(k) zeroConstant = af.constant(0, R, dtype=af.Dtype.f32) C_CPU = math.exp(-r * T) * af.mean(af.maxof(af.exp(x) - K, zeroConstant)) print("Time elapsed = {} secs".format(toc)) print("Call price = {}".format(C_CPU)) print(af.mean(v))
def strang_step(self, dt): """ Advances the system using a strang-split scheme. This scheme is 2nd order accurate in time. Parameters ---------- dt : double Time-step size to evolve the system """ self.dt = dt if(self.performance_test_flag == True): tic = af.time() if(self.physical_system.params.solver_method_in_q == 'FVM'): if( self.physical_system.params.solver_method_in_p == 'ASL' and self.physical_system.params.EM_fields_enabled == True ): split.strang(self, op_fvm, op_fields, dt) else: op_fvm(self, dt) # Advective Semi-lagrangian method elif(self.physical_system.params.solver_method_in_q == 'ASL'): if(self.physical_system.params.EM_fields_enabled == True): def op_advect_q_and_solve_src(self, dt): return(split.strang(self, op1 = op_advect_q, op2 = op_solve_src, dt = dt ) ) if(self.physical_system.params.solver_method_in_p == 'ASL'): split.strang(self, op_advect_q_and_solve_src, op_fields, dt) # For FVM in p-space: else: split.strang(self, op_advect_q_and_solve_src, op_fvm, dt) else: split.strang(self, op_advect_q, op_solve_src, dt) check_divergence(self) self.time_elapsed += dt if(self.performance_test_flag == True): af.sync() toc = af.time() self.time_ts += toc - tic return
def jia_step(self, dt): """ Advances the system using the Jia split scheme. reference:<https://www.sciencedirect.com/science/article/pii/S089571771000436X> NOTE: This scheme is computationally expensive, and should only be used for testing/debugging Parameters ---------- dt : double Time-step size to evolve the system """ self.dt = dt if (self.performance_test_flag == True): tic = af.time() if (self.physical_system.params.solver_method_in_q == 'FVM'): if (self.physical_system.params.solver_method_in_p == 'ASL' and self.physical_system.params.fields_enabled == True): split.jia(self, op_fvm, op_fields, dt) else: op_fvm(self, dt) # Advective Semi-lagrangian method elif (self.physical_system.params.solver_method_in_q == 'ASL'): if (self.physical_system.params.fields_enabled == True): def op_advect_q_and_solve_src(self, dt): return (split.jia(self, op1=op_advect_q, op2=op_solve_src, dt=dt)) if (self.physical_system.params.solver_method_in_p == 'ASL'): split.jia(self, op_advect_q_and_solve_src, op_fields, dt) # For FVM in p-space: else: split.jia(self, op_advect_q_and_solve_src, op_fvm, dt) else: split.jia(self, op_advect_q, op_solve_src, dt) af.eval(self.f) check_divergence(self) self.time_elapsed += dt if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_ts += toc - tic return
def communicate_fields(self, on_fdtd_grid=False): """ Used in communicating the values at the boundary zones for each of the local vectors among all procs.This routine is called to take care of communication(and periodic B.C's) procedures for the EM field arrays. The function is used for communicating the EM field values on the cell centered grid which is used by default. Additionally,it can also be used to communicate the values on the Yee-grid which is used by the FDTD solver. """ if (self.performance_test_flag == True): tic = af.time() # Obtaining start coordinates for the local zone # Additionally, we also obtain the size of the local zone ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_fields.getCorners() N_g = self.N_g # Assigning the values of the af.Array # fields quantities to the PETSc.Vec: if (on_fdtd_grid is True): flattened_global_EM_fields_array = \ af.flat(self.yee_grid_EM_fields[:, :, N_g:-N_g, N_g:-N_g]) flattened_global_EM_fields_array.to_ndarray(self._glob_fields_array) else: flattened_global_EM_fields_array = \ af.flat(self.cell_centered_EM_fields[:, :, N_g:-N_g, N_g:-N_g]) flattened_global_EM_fields_array.to_ndarray(self._glob_fields_array) # Takes care of boundary conditions and interzonal communications: self._da_fields.globalToLocal(self._glob_fields, self._local_fields) # Converting back to af.Array if (on_fdtd_grid is True): self.yee_grid_EM_fields = af.moddims( af.to_array(self._local_fields_array), 6, 1, N_q1_local + 2 * N_g, N_q2_local + 2 * N_g) af.eval(self.yee_grid_EM_fields) else: self.cell_centered_EM_fields = af.moddims( af.to_array(self._local_fields_array), 6, 1, N_q1_local + 2 * N_g, N_q2_local + 2 * N_g) af.eval(self.cell_centered_EM_fields) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_communicate_fields += toc - tic return
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(curr_dev) a = af.randu(10,10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) c = af.randu(10,10) af.lock_array(c) af.unlock_array(c) a = af.constant(1, 3, 3) b = af.constant(2, 3, 3) af.eval(a) af.eval(b) print_func(a) print_func(b) c = a + b d = a - b af.eval(c, d) print_func(c) print_func(d) print_func(af.set_manual_eval_flag(True)) assert(af.get_manual_eval_flag() == True) print_func(af.set_manual_eval_flag(False)) assert(af.get_manual_eval_flag() == False) display_func(af.is_locked_array(a))
def arrayfire_perceptron_demo(dataset, num_classes=None): # Determine number of classes if not provided if num_classes is None: num_classes = np.amax(dataset[1] + 1) # Convert numpy array to af array (and convert labels/targets from ints to # one-hot encodings) train_feats = af.from_ndarray(dataset[0]) train_targets = af.from_ndarray(ints_to_onehots(dataset[1], num_classes)) test_feats = af.from_ndarray(dataset[2]) test_targets = af.from_ndarray(ints_to_onehots(dataset[3], num_classes)) num_train = train_feats.dims()[0] num_test = test_feats.dims()[0] # Add bias train_bias = af.constant(1, num_train, 1) test_bias = af.constant(1, num_test, 1) train_feats = af.join(1, train_bias, train_feats) test_feats = af.join(1, test_bias, test_feats) print('arrayfire perceptron classifier implementation') clf = AfPerceptron(alpha=0.1, maxerr=0.01, maxiter=1000, verbose=False) # Initial run to avoid overhead in training clf.train(train_feats, train_targets) clf.init_weights(train_feats, train_targets) # Benchmark training t0 = time.time() clf.train(train_feats, train_targets) clf.eval() t1 = time.time() dt_train = t1 - t0 print('Training time: {0:4.4f} s'.format(dt_train)) # Benchmark prediction iters = 100 test_outputs = None t0 = time.time() for i in range(iters): test_outputs = clf.predict_proba(test_feats) af.eval(test_outputs) af.sync() t1 = time.time() dt = t1 - t0 print('Prediction time: {0:4.4f} s'.format(dt / iters)) print('Accuracy (test data): {0:2.2f}'.format( accuracy(test_outputs, test_targets))) # print('Accuracy on training data: {0:2.2f}'.format(accuracy(train_outputs, train_targets))) # print('Accuracy on testing data: {0:2.2f}'.format(accuracy(test_outputs, test_targets))) # print('Maximum error on testing data: {0:2.2f}'.format(abserr(test_outputs, test_targets))) return clf, dt_train
def calc_arrayfire(n): A = af.randu(n, n) af.sync() def run(iters): for t in range(iters): B = af.matmul(A, A) af.sync() return run
def fdtd_evolve_B(self, dt): """ Evolves magnetic fields from B^{n + 1/2} --> B^{n + 3/2} Parameters ---------- dt : double Time-step size to evolve the system """ if (self.performance_test_flag == True): tic = af.time() dq1 = self.dq1 dq2 = self.dq2 E1 = self.yee_grid_EM_fields[0] # (i, j + 1/2) E2 = self.yee_grid_EM_fields[1] # (i + 1/2, j) E3 = self.yee_grid_EM_fields[2] # (i + 1/2, j + 1/2) E1_minus_q2 = af.shift(E1, 0, 0, 0, 1) E2_minus_q1 = af.shift(E2, 0, 0, 1, 0) E3_minus_q1 = af.shift(E3, 0, 0, 1, 0) E3_minus_q2 = af.shift(E3, 0, 0, 0, 1) # dB/dt = -(∇ x E) # dB1/dt = - dE3/dq2 # dB2/dt = + dE3/dq1 # dB3/dt = - (dE2/dq1 - dE1/dq2) # curlE_x = dE3/dq2 curlE_1 = (E3 - E3_minus_q2) / dq2 # (i + 1/2, j) # curlE_y = -dE3/dq1 curlE_2 = -(E3 - E3_minus_q1) / dq1 # (i, j + 1/2) # curlE_z = (dE2/dq1 - dE1/dq2) curlE_3 = (E2 - E2_minus_q1) / dq1 - (E1 - E1_minus_q2) / dq2 # (i, j) # B1 --> (i + 1/2, j) self.yee_grid_EM_fields[3] += -dt * curlE_1 # B2 --> (i, j + 1/2) self.yee_grid_EM_fields[4] += -dt * curlE_2 # B3 --> (i, j) self.yee_grid_EM_fields[5] += -dt * curlE_3 af.eval(self.yee_grid_EM_fields) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_fieldsolver += toc - tic return
def sync(compute_device: tp.Optional[tp.Union[int, ComputeDevice]] = None): if isinstance(compute_device, ComputeDevice): device = compute_device.id elif not compute_device or isinstance(compute_device, int): device = compute_device else: raise TypeError(f"The argument compute_device must be of " f"type ComputeDevice or of type int. The argument " f"provided is of type {type(compute_device)}") af.sync(device=device)
def calc_arrayfire(n): A = af.randu(n, n) af.sync() def run(iters): for t in range(iters): B = af.fft2(A) af.sync() return run
def communicate_f(self): """ Used in communicating the values at the boundary zones for each of the local vectors among all procs. This routine is called to take care of communication (and periodic B.C's) procedures for the distribution function array. """ if (self.performance_test_flag == True): tic = af.time() # Obtaining start coordinates for the local zone # Additionally, we also obtain the size of the local zone ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_f.getCorners() N_g_q = self.N_ghost_q N_g_p = self.N_ghost_p # Assigning the local array only when Dirichlet # boundary conditions are applied. This is needed since # only inflowing characteristics are to be changed by # the apply boundary conditions function. if (self.boundary_conditions.in_q1_left == 'dirichlet' or self.boundary_conditions.in_q1_right == 'dirichlet' or self.boundary_conditions.in_q2_bottom == 'dirichlet' or self.boundary_conditions.in_q2_top == 'dirichlet'): af.flat(self.f).to_ndarray(self._local_f_array) # Global value is non-inclusive of the ghost-zones: af.flat(self.f[:, :, N_g_q:-N_g_q, N_g_q:-N_g_q]).to_ndarray(self._glob_f_array) # The following function takes care of interzonal communications # Additionally, it also automatically applies periodic BCs when necessary self._da_f.globalToLocal(self._glob_f, self._local_f) # Converting back from PETSc.Vec to af.Array: f_flattened = af.to_array(self._local_f_array) self.f = af.moddims(f_flattened, (self.N_p1 + 2 * N_g_p) * (self.N_p2 + 2 * N_g_p) * (self.N_p3 + 2 * N_g_p), self.N_species, N_q1_local + 2 * N_g_q, N_q2_local + 2 * N_g_q) af.eval(self.f) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_communicate_f += toc - tic return
def jia_step(self, dt): """ Advances the system using the Jia split scheme. NOTE: This scheme is computationally expensive, and should only be used for testing/debugging Parameters ---------- dt : float Time-step size to evolve the system """ self.dt = dt if (self.performance_test_flag == True): tic = af.time() if (self.physical_system.params.solver_method_in_q == 'FVM'): if (self.physical_system.params.solver_method_in_p == 'ASL' and self.physical_system.params.charge_electron != 0): split.strang(self, op_fvm_q, op_fields, dt) else: op_fvm_q(self, dt) # Advective Semi-lagrangian method elif (self.physical_system.params.solver_method_in_q == 'ASL'): if (self.physical_system.params.charge_electron == 0): split.jia(self, op_advect_q, op_solve_src, dt) else: def op_advect_q_and_solve_src(self, dt): return (split.jia(self, op1=op_advect_q, op2=op_solve_src, dt=dt)) split.jia(self, op_advect_q_and_solve_src, op_fields, dt) check_divergence(self) self.time_elapsed += dt if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_ts += toc - tic return
def fft_poisson(self, f=None): """ Solves the Poisson Equation using the FFTs: Used as a backup solver in case of low resolution runs (ie. used on a single node) with periodic boundary conditions. """ if (self.performance_test_flag == True): tic = af.time() if (self._comm.size != 1): raise Exception('FFT solver can only be used when run in serial') else: N_g = self.N_ghost rho = af.reorder( self.physical_system.params.charge_electron \ * self.compute_moments('density', f)[:, N_g:-N_g, N_g:-N_g], 1, 2, 0 ) k_q1 = fftfreq(rho.shape[0], self.dq1) k_q2 = fftfreq(rho.shape[1], self.dq2) k_q2, k_q1 = np.meshgrid(k_q2, k_q1) k_q1 = af.to_array(k_q1) k_q2 = af.to_array(k_q2) rho_hat = af.fft2(rho) potential_hat = rho_hat / (4 * np.pi**2 * (k_q1**2 + k_q2**2)) potential_hat[0, 0] = 0 E1_hat = -1j * 2 * np.pi * k_q1 * potential_hat E2_hat = -1j * 2 * np.pi * k_q2 * potential_hat # Non-inclusive of ghost-zones: E1_physical = af.reorder(af.real(af.ifft2(E1_hat)), 2, 0, 1) E2_physical = af.reorder(af.real(af.ifft2(E2_hat)), 2, 0, 1) self.cell_centered_EM_fields[0, N_g:-N_g, N_g:-N_g] = E1_physical self.cell_centered_EM_fields[1, N_g:-N_g, N_g:-N_g] = E2_physical af.eval(self.cell_centered_EM_fields) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_fieldsolver += toc - tic return
def swss_step(self, dt): """ Advances the system using a SWSS-split scheme. This scheme is 2nd order accurate in time. Parameters ---------- dt : float Time-step size to evolve the system """ self.dt = dt if (self.performance_test_flag == True): tic = af.time() if (self.physical_system.params.solver_method_in_q == 'FVM'): if (self.physical_system.params.solver_method_in_p == 'ASL' and self.physical_system.params.charge_electron != 0): split.strang(self, op_fvm_q, op_fields, dt) else: op_fvm_q(self, dt) # Advective Semi-lagrangian method elif (self.physical_system.params.solver_method_in_q == 'ASL'): if (self.physical_system.params.charge_electron == 0): split.swss(self, op_advect_q, op_solve_src, dt) else: def op_advect_q_and_solve_src(self, dt): return (split.swss(self, op1=op_advect_q, op2=op_solve_src, dt=dt)) split.swss(self, op_advect_q_and_solve_src, op_fields, dt) check_divergence(self) self.time_elapsed += dt if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_ts += toc - tic return
def f_interp_2d(self, dt): """ Performs 2D interpolation in the q-space to solve for the equation: df/dt + A_q1 df/dq1 + A_q2 df/dq2 = 0 This is done by backtracing the characteristic curves and interpolating at the origin of the characteristics. Parameters ---------- dt : double Time-step size to evolve the system """ if(self.performance_test_flag == True): tic = af.time() A_q1, A_q2 = af.broadcast(self._A_q, self.f, self.time_elapsed, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.physical_system.params ) # Using the add method wrapped with af.broadcast q1_center_new = add(self.q1_center, - A_q1 * dt) q2_center_new = add(self.q2_center, - A_q2 * dt) # Reordering from (dof, N_s, N_q1, N_q2) --> (N_q1, N_q2, N_s, dof) # NOTE: To be changed after the implementation of axes specific # interpolation operators gets completed from ArrayFire's end. # Ref:https://github.com/arrayfire/arrayfire/issues/1955 self.f = af.approx2(af.reorder(self.f, 2, 3, 1, 0), af.reorder(q1_center_new, 2, 3, 1, 0), af.reorder(q2_center_new, 2, 3, 1, 0), af.INTERP.BICUBIC_SPLINE, xp = af.reorder(self.q1_center, 2, 3, 1, 0), yp = af.reorder(self.q2_center, 2, 3, 1, 0) ) # Reordering from (N_q1, N_q2, N_s, dof) --> (dof, N_s, N_q1, N_q2) self.f = af.reorder(self.f, 3, 2, 0, 1) af.eval(self.f) if(self.performance_test_flag == True): af.sync() toc = af.time() self.time_interp2 += toc - tic return
def op_fvm(self, dt): if (self.performance_test_flag == True): tic = af.time() timestep_fvm(self, dt) af.eval(self.f) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_fvm_solver += toc - tic return
def reconstruct(self, input_array, axis, reconstruction_method): """ Reconstructs the variation within a cell using the reconstruction method specified. Parameters ---------- input_array: af.Array Array holding the cells data. axis: int Axis along which the reconstruction method is to be applied. reconstruction_method: str Reconstruction method which needs to be applied. """ if (self.performance_test_flag == True): tic = af.time() if (reconstruction_method == 'piecewise-constant'): left_face_value = input_array right_face_value = input_array elif (reconstruction_method == 'minmod'): left_face_value, right_face_value = reconstruct_minmod( input_array, axis) elif (reconstruction_method == 'ppm'): left_face_value, right_face_value = reconstruct_ppm(input_array, axis) elif (reconstruction_method == 'weno5'): raise SystemExit('WENO5 is currently buggy. Avoid Using!!!') left_face_value, right_face_value = reconstruct_weno5( input_array, axis) else: raise NotImplementedError( 'Reconstruction method invalid/not-implemented') if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_reconstruct += toc - tic return (left_face_value, right_face_value)
def riemann_solver(self, left_state, right_state, velocity): """ Returns the upwinded state, using the 1st order upwind Riemann solver. Parameters ---------- left_state : af.Array Array holding the values for the state at the left edge of the cells. right_state : af.Array Array holding the values for the state at the right edge of the cells. velocity : af.Array Velocity array whose sign will be used to determine whether the left or right state is chosen. """ if (self.performance_test_flag == True): tic = af.time() # Checking if array isn't 4D: try: size_axis_2 = left_state.shape[2] except: size_axis_2 = 1 try: size_axis_3 = left_state.shape[3] except: size_axis_3 = 1 # Tiling to get to appropriate shape: try: assert (velocity.shape[2] == left_state.shape[2]) except: velocity = af.tile(velocity, 1, 1, size_axis_2, size_axis_3) upwind_state = af.select(velocity > 0, left_state, right_state) af.eval(upwind_state) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_riemann += toc - tic return (upwind_state)
def arrayfire_knn_demo(dataset, num_classes=None): # Determine number of classes if not provided if num_classes is None: num_classes = np.amax(dataset[1] + 1) # Convert numpy array to af array (and convert labels/targets from ints to # one-hot encodings) train_feats = af.from_ndarray(dataset[0]) train_targets = af.from_ndarray(dataset[1].astype('int32')) test_feats = af.from_ndarray(dataset[2]) test_targets = af.from_ndarray(dataset[3].astype('int32')) num_train = train_feats.dims()[0] num_test = test_feats.dims()[0] print('arrayfire knn classifier implementation') clf = AfKNearestNeighbors(weight_by_dist=True) # Benchmark training t0 = time.time() clf.train(train_feats, train_targets) t1 = time.time() dt_train = t1 - t0 print('Training time: {0:4.4f} s'.format(dt_train)) # Benchmark prediction iters = 5 test_outputs = None t0 = time.time() for i in range(iters): test_outputs = clf.predict(test_feats) af.eval(test_outputs) af.sync() t1 = time.time() dt = t1 - t0 print('Prediction time: {0:4.4f} s'.format(dt / iters)) print('Accuracy (test data): {0:2.2f}'.format( accuracy2(test_outputs, test_targets))) # print('Accuracy on training data: {0:2.2f}'.format(accuracy(train_outputs, train_targets))) # print('Accuracy on testing data: {0:2.2f}'.format(accuracy(test_outputs, test_targets))) # print('Maximum error on testing data: {0:2.2f}'.format(abserr(test_outputs, test_targets))) return clf, dt_train
def riemann_solver(self, left_flux, right_flux, left_f, right_f, dim): if (self.performance_test_flag == True): tic = af.time() if (self.physical_system.params.riemann_solver == 'upwind-flux'): if (dim == 'q1'): velocity = af.tile(self._C_q1, 1, left_flux.shape[1], left_flux.shape[2]) elif (dim == 'q2'): velocity = af.tile(self._C_q2, 1, left_flux.shape[1], left_flux.shape[2]) else: raise NotImplementedError('Invalid Option!') flux = upwind_flux(left_flux, right_flux, velocity) elif (self.physical_system.params.riemann_solver == 'lax-friedrichs'): if (dim == 'q1'): c_lax = self.dt / self.dq1 elif (dim == 'q2'): c_lax = self.dt / self.dq2 else: raise NotImplementedError('Invalid Option!') flux = lax_friedrichs_flux(left_flux, right_flux, left_f, right_f, c_lax) else: raise NotImplementedError( 'Riemann solver passed is invalid/not-implemented') if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_riemann += toc - tic return (flux)
def op_solve_src(self, dt): """ Evolves the source term of the equations specified: df/dt = source Parameters ---------- dt : double Time-step size to evolve the system """ if(self.performance_test_flag == True): tic = af.time() # Solving for tau = 0 systems: if(self.physical_system.params.instantaneous_collisions == True): self.f = self._source(self.f, self.time_elapsed, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.compute_moments, self.physical_system.params, True ) if( self.physical_system.params.source_enabled == True and self.physical_system.params.instantaneous_collisions != True ): self.f = integrators.RK2(self._source, self.f, dt, self.time_elapsed, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.compute_moments, self.physical_system.params ) af.eval(self.f) if(self.performance_test_flag == True): af.sync() toc = af.time() self.time_sourcets += toc - tic return
def op_fields(self, dt): """ Evolves the following part of the equations specified: df/dt + A_p1 df/dp1 + A_p2 df/dp1 + A_p3 df/dp1 = 0 Parameters ---------- dt : double Time-step size to evolve the system """ if (self.performance_test_flag == True): tic = af.time() if (self.physical_system.params.fields_solver == 'electrostatic'): rho = multiply(self.physical_system.params.charge, self.compute_moments('density')) self.fields_solver.compute_electrostatic_fields(rho) # Evolving fields: if (self.physical_system.params.fields_solver == 'fdtd'): J1 = multiply( self.physical_system.params.charge, self.compute_moments('mom_v1_bulk')) # (i + 1/2, j + 1/2) J2 = multiply( self.physical_system.params.charge, self.compute_moments('mom_v2_bulk')) # (i + 1/2, j + 1/2) J3 = multiply( self.physical_system.params.charge, self.compute_moments('mom_v3_bulk')) # (i + 1/2, j + 1/2) self.fields_solver.evolve_electrodynamic_fields(J1, J2, J3, dt) f_interp_p_3d(self, dt) af.eval(self.f) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_fieldstep += toc - tic return
def bandwidth_test(n_evals): a = af.randu(32, 32, 32**3, dtype=af.Dtype.f64) b = af.randu(32, 32, 32**3, dtype=af.Dtype.f64) c = a + b af.eval(c) af.sync() tic = af.time() for i in range(n_evals): c = a + b af.eval(c) af.sync() toc = af.time() bandwidth_available = memory_bandwidth(a.elements(), 2, 1, n_evals, toc - tic) return (bandwidth_available)
def op_solve_src(self, dt): """ Evolves the source term of the equations specified: df/dt = source Parameters ---------- dt : double Time-step size to evolve the system """ if (self.performance_test_flag == True): tic = af.time() # Solving for tau = 0 systems: tau = self.physical_system.params.tau(self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center) if (af.any_true(tau == 0)): self.f = af.select( tau == 0, self._source(self.f, self.time_elapsed, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.compute_moments, self.physical_system.params, True), self.f) self.f = integrators.RK2(self._source, self.f, dt, self.time_elapsed, self.q1_center, self.q2_center, self.p1_center, self.p2_center, self.p3_center, self.compute_moments, self.physical_system.params) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_sourcets += toc - tic return
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert (k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert (mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert (mem_info['lock']['buffers'] == 1 + mem_info_old['lock']['buffers']) af.set_device(curr_dev) a = af.randu(10, 10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) af.lock_device_ptr(b) af.unlock_device_ptr(b)
def fdtd_evolve_E(self, dt): if (self.performance_test_flag == True): tic = af.time() dq1 = self.dq1 dq2 = self.dq2 B1 = self.yee_grid_EM_fields[3] B2 = self.yee_grid_EM_fields[4] B3 = self.yee_grid_EM_fields[5] # dE1/dt = + dB3/dq2 # dE2/dt = - dB3/dq1 # dE3/dt = dB2/dq1 - dB1/dq2 B1_shifted_q2 = af.shift(B1, 0, 0, 0, 1) B2_shifted_q1 = af.shift(B2, 0, 0, 1, 0) B3_shifted_q1 = af.shift(B3, 0, 0, 1, 0) B3_shifted_q2 = af.shift(B3, 0, 0, 0, 1) self.yee_grid_EM_fields[0] += (dt / dq2) * (B3 - B3_shifted_q2) - self.J1 * dt self.yee_grid_EM_fields[1] += -(dt / dq1) * (B3 - B3_shifted_q1) - self.J2 * dt self.yee_grid_EM_fields[2] += (dt / dq1) * (B2 - B2_shifted_q1) \ - (dt / dq2) * (B1 - B1_shifted_q2) \ - dt * self.J3 af.eval(self.yee_grid_EM_fields) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_fieldsolver += toc - tic return
def communicate_fields(self, on_fdtd_grid = False): """ Used in communicating the values at the boundary zones for each of the local vectors among all procs.This routine is called to take care of communication(and periodic B.C's) procedures for the EM field arrays. The function is used for communicating the EM field values on the cell centered grid which is used by default. Additionally,it can also be used to communicate the values on the Yee-grid which is used by the FDTD solver. """ if(self.performance_test_flag == True): tic = af.time() # Assigning the values of the af.Array # fields quantities to the PETSc.Vec: if(on_fdtd_grid is True): tmp_array = self.yee_grid_EM_fields else: tmp_array = self.cell_centered_EM_fields af_to_petsc_glob_array(self, tmp_array, self._glob_fields_array) # Takes care of boundary conditions and interzonal communications: self._da_fields.globalToLocal(self._glob_fields, self._local_fields) # Converting back to af.Array tmp_array = petsc_local_array_to_af(self, 6, 1, self._local_fields_array) if(on_fdtd_grid is True): self.yee_grid_EM_fields = tmp_array else: self.cell_centered_EM_fields = tmp_array if(self.performance_test_flag == True): af.sync() toc = af.time() self.time_communicate_fields += toc - tic return
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(curr_dev) a = af.randu(10,10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) af.lock_device_ptr(b) af.unlock_device_ptr(b)
def op_solve_src(self, dt): if (self.performance_test_flag == True): tic = af.time() # Solving for tau = 0 systems if (af.any_true( self.physical_system.params.tau(self.q1_center, self.q2_center, self.p1, self.p2, self.p3) == 0)): self.f = self._source(self.f, self.q1_center, self.q2_center, self.p1, self.p2, self.p3, self.compute_moments, self.physical_system.params, True) else: self.f = integrators.RK2(self._source, self.f, dt, self.q1_center, self.q2_center, self.p1, self.p2, self.p3, self.compute_moments, self.physical_system.params) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_sourcets += toc - tic return
def fdtd_evolve_B(self, dt): if (self.performance_test_flag == True): tic = af.time() dq1 = self.dq1 dq2 = self.dq2 E1 = self.yee_grid_EM_fields[0] E2 = self.yee_grid_EM_fields[1] E3 = self.yee_grid_EM_fields[2] # dB1/dt = - dE3/dq2 # dB2/dt = + dE3/dq1 # dB3/dt = - (dE2/dq1 - dE1/dq2) E1_shifted_q2 = af.shift(E1, 0, 0, 0, -1) E2_shifted_q1 = af.shift(E2, 0, 0, -1, 0) E3_shifted_q1 = af.shift(E3, 0, 0, -1, 0) E3_shifted_q2 = af.shift(E3, 0, 0, 0, -1) self.yee_grid_EM_fields[3] += -(dt / dq2) * (E3_shifted_q2 - E3) self.yee_grid_EM_fields[4] += (dt / dq1) * (E3_shifted_q1 - E3) self.yee_grid_EM_fields[5] += - (dt / dq1) * (E2_shifted_q1 - E2) \ + (dt / dq2) * (E1_shifted_q2 - E1) af.eval(self.yee_grid_EM_fields) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_fieldsolver += toc - tic return
def run(iters): for t in range(iters): B = af.fft2(A) af.sync()
return af.mean(payoff) * math.exp(-r * t) def monte_carlo_simulate(N, use_barrier, num_iter = 10): steps = 180 stock_price = 100.0 maturity = 0.5 volatility = 0.3 rate = 0.01 strike = 100 barrier = 115.0 start = time() for i in range(num_iter): monte_carlo_options(N, stock_price, maturity, volatility, rate, strike, steps, use_barrier, barrier) return (time() - start) / num_iter if __name__ == "__main__": if (len(sys.argv) > 1): af.set_device(int(sys.argv[1])) af.info() monte_carlo_simulate(1000, use_barrier = False) monte_carlo_simulate(1000, use_barrier = True ) af.sync() for n in range(10000, 100001, 10000): print("Time for %7d paths - vanilla method: %4.3f ms, barrier method: % 4.3f ms\n" % (n, 1000 * monte_carlo_simulate(n, False, 100), 1000 * monte_carlo_simulate(n, True, 100)))
import nonlinear_solver from bolt.lib.nonlinear_solver.tests.performance.input_files \ import domain from bolt.lib.nonlinear_solver.tests.performance.input_files \ import boundary_conditions from bolt.lib.nonlinear_solver.tests.performance.input_files \ import params from bolt.lib.nonlinear_solver.tests.performance.input_files \ import initialize import bolt.src.nonrelativistic_boltzmann.advection_terms as advection_terms import bolt.src.nonrelativistic_boltzmann.collision_operator \ as collision_operator import bolt.src.nonrelativistic_boltzmann.moment_defs as moment_defs # Defining the physical system to be solved: system = physical_system(domain, boundary_conditions, params, initialize, advection_terms, collision_operator.BGK, moment_defs) # Defining a nonlinear solver object: nls = nonlinear_solver(system, True) nls.strang_timestep(0.001) af.sync() for i in range(100): nls.strang_timestep(0.001) af.sync() nls.print_performance_timings(101)
def bench(A, iters=100): start = time() for t in range(iters): B = af.fft2(A) af.sync() return (time() - start) / iters
def apply_bcs_fields(self, on_fdtd_grid=False): """ Applies boundary conditions to the EM fields as specified by the user in params. """ if (self.performance_test_flag == True): tic = af.time() # Obtaining start coordinates for the local zone # Additionally, we also obtain the size of the local zone ((i_q1_start, i_q2_start), (N_q1_local, N_q2_local)) = self._da_fields.getCorners() # Obtaining the end coordinates for the local zone (i_q1_end, i_q2_end) = (i_q1_start + N_q1_local - 1, i_q2_start + N_q2_local - 1) # If local zone includes the left physical boundary: if (i_q1_start == 0): if (self.boundary_conditions.in_q1_left == 'dirichlet'): apply_dirichlet_bcs_fields(self, 'left', on_fdtd_grid) elif (self.boundary_conditions.in_q1_left == 'mirror'): apply_mirror_bcs_fields(self, 'left', on_fdtd_grid) elif (self.boundary_conditions.in_q1_left == 'mirror+dirichlet'): apply_mirror_bcs_fields(self, 'left', on_fdtd_grid) apply_dirichlet_bcs_fields(self, 'left', on_fdtd_grid) # This is automatically handled by the PETSc function globalToLocal() elif (self.boundary_conditions.in_q1_left == 'periodic'): pass elif (self.boundary_conditions.in_q1_left == 'shearing-box'): apply_shearing_box_bcs_fields(self, 'left', on_fdtd_grid) else: raise NotImplementedError('Unavailable/Invalid boundary condition') # If local zone includes the right physical boundary: if (i_q1_end == self.N_q1 - 1): if (self.boundary_conditions.in_q1_right == 'dirichlet'): apply_dirichlet_bcs_fields(self, 'right', on_fdtd_grid) elif (self.boundary_conditions.in_q1_right == 'mirror'): apply_mirror_bcs_fields(self, 'right', on_fdtd_grid) elif (self.boundary_conditions.in_q1_right == 'mirror+dirichlet'): apply_mirror_bcs_fields(self, 'right', on_fdtd_grid) apply_dirichlet_bcs_fields(self, 'right', on_fdtd_grid) # This is automatically handled by the PETSc function globalToLocal() elif (self.boundary_conditions.in_q1_right == 'periodic'): pass elif (self.boundary_conditions.in_q1_right == 'shearing-box'): apply_shearing_box_bcs_fields(self, 'right', on_fdtd_grid) else: raise NotImplementedError('Unavailable/Invalid boundary condition') # If local zone includes the bottom physical boundary: if (i_q2_start == 0): if (self.boundary_conditions.in_q2_bottom == 'dirichlet'): apply_dirichlet_bcs_fields(self, 'bottom', on_fdtd_grid) elif (self.boundary_conditions.in_q2_bottom == 'mirror'): apply_mirror_bcs_fields(self, 'bottom', on_fdtd_grid) elif (self.boundary_conditions.in_q2_bottom == 'mirror+dirichlet'): apply_mirror_bcs_fields(self, 'bottom', on_fdtd_grid) apply_dirichlet_bcs_fields(self, 'bottom', on_fdtd_grid) # This is automatically handled by the PETSc function globalToLocal() elif (self.boundary_conditions.in_q2_bottom == 'periodic'): pass elif (self.boundary_conditions.in_q2_bottom == 'shearing-box'): apply_shearing_box_bcs_fields(self, 'bottom', on_fdtd_grid) else: raise NotImplementedError('Unavailable/Invalid boundary condition') # If local zone includes the top physical boundary: if (i_q2_end == self.N_q2 - 1): if (self.boundary_conditions.in_q2_top == 'dirichlet'): apply_dirichlet_bcs_fields(self, 'top', on_fdtd_grid) elif (self.boundary_conditions.in_q2_top == 'mirror'): apply_mirror_bcs_fields(self, 'top', on_fdtd_grid) elif (self.boundary_conditions.in_q2_top == 'mirror+dirichlet'): apply_mirror_bcs_fields(self, 'top', on_fdtd_grid) apply_dirichlet_bcs_fields(self, 'top', on_fdtd_grid) # This is automatically handled by the PETSc function globalToLocal() elif (self.boundary_conditions.in_q2_top == 'periodic'): pass elif (self.boundary_conditions.in_q2_top == 'shearing-box'): apply_shearing_box_bcs_fields(self, 'top', on_fdtd_grid) else: raise NotImplementedError('Unavailable/Invalid boundary condition') af.eval(self.cell_centered_EM_fields) if (self.performance_test_flag == True): af.sync() toc = af.time() self.time_apply_bcs_fields += toc - tic return
####################################################### # Copyright (c) 2015, ArrayFire # All rights reserved. # # This file is distributed under 3-clause BSD license. # The complete license agreement can be obtained at: # http://arrayfire.com/licenses/BSD-3-Clause ######################################################## import arrayfire as af af.info() print(af.device_info()) print(af.get_device_count()) print(af.is_dbl_supported()) af.sync() print('starting the loop') for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print(af.is_dbl_supported(k)) a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1) assert(mem_info[ 'lock']['buffers'] == 1)
def bench(A, iters = 100): start = time() for t in range(iters): B = af.matmul(A, A) af.sync() return (time() - start) / iters