def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() dev = af.get_device() print_func(dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(dev)
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() dev = af.get_device() print_func(dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert (k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert (mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert (mem_info['lock']['buffers'] == 1 + mem_info_old['lock']['buffers']) af.set_device(dev)
def deconvolve_gpu_chunked(vol_a: Volume, vol_b: Volume, n: int, psf_a: np.ndarray, psf_b: np.ndarray, nchunks: int, blind: bool = False) -> Volume: """ Perform joint Richardson-Lucy deconvolution on two volumes using two specified PSFs on the GPU in chunks along the z-axis :param vol_a: The first volume :param vol_b: The second volume :param n: The number of Richardson-Lucy iterations :param psf_a: The PSF for the first volume :param psf_b: The PSF for the second volume :param blind: Whether to perform blind RL deconvolution using the given PSFs as initial estimates :param nchunks: The number of chunks to subdivide the volume into :return: The fused RL deconvolution """ import arrayfire as af result = np.zeros(vol_a.shape, np.float32) chunk_size = vol_a.shape[2] // nchunks for i in range(nchunks): start = i * chunk_size end = (i + 1) * chunk_size if i < nchunks - 1 else vol_a.shape[2] lpad = int(psf_a.shape[2] * 4) rpad = int(psf_a.shape[2] * 4) start_exp = max(0, start - lpad) end_exp = min(vol_a.shape[2], end + rpad) with metrack.Context(f'Chunk {i}'): if not blind: chunk_est = deconvolve_gpu( Volume(vol_a[:, :, start_exp:end_exp], False, (1, 1, 1)), Volume(vol_b[:, :, start_exp:end_exp], False, (1, 1, 1)), n, psf_a, psf_b) else: chunk_est = deconvolve_gpu_blind( Volume(vol_a[:, :, start_exp:end_exp], False, (1, 1, 1)), Volume(vol_b[:, :, start_exp:end_exp], False, (1, 1, 1)), n, 5, psf_a, psf_b) af.device_gc() if end != end_exp: result[:, :, start:end] = chunk_est[:, :, start - start_exp:end - end_exp] else: result[:, :, start:end] = chunk_est[:, :, start - start_exp:] # FIXME: Proper outlier clipping! e_min, e_max = np.percentile(result, [0.002, 99.998]) result = ((np.clip(result, e_min, e_max) - e_min) / (e_max - e_min) * (2**16 - 1)).astype(np.uint16) return Volume(result, inverted=False, spacing=(1, 1, 1))
def run_cases(q_dim, p_dim, charge_electron, tau): params.charge[0] = charge_electron params.tau = tau # Running the setup for all resolutions: for i in range(N.size): af.device_gc() domain.N_q1 = int(N[i]) if (q_dim == 2): domain.N_q2 = int(N[i]) params.k_q2 = 4 * np.pi if (p_dim >= 2): domain.N_p2 = 32 domain.p2_start = -10 domain.p2_end = 10 if (p_dim == 3): domain.N_p3 = 32 domain.p3_start = -10 domain.p3_end = 10 if (charge_electron != 0): domain.N_p1 = int(N[i]) if (p_dim >= 2): domain.N_p2 = int(N[i]) if (p_dim == 3): domain.N_p3 = int(N[i]) params.p_dim = p_dim # Defining the physical system to be solved: system = physical_system(domain, boundary_conditions, params, initialize, advection_terms, collision_operator.BGK, moments) nls = nonlinear_solver(system) # Timestep as set by the CFL condition: dt = params.N_cfl * min(nls.dq1, nls.dq2) \ / max(domain.p1_end, domain.p2_end, domain.p3_end) time_array = np.arange(dt, params.t_final + dt, dt) # Checking that time array doesn't cross final time: if (time_array[-1] > params.t_final): time_array = np.delete(time_array, -1) for time_index, t0 in enumerate(time_array): nls.strang_timestep(dt) if (time_index % 25 == 0): nls.f = lowpass_filter(nls.f) nls.dump_distribution_function('dump_files/nlsf_' + str(N[i]))
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(curr_dev) a = af.randu(10,10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) c = af.randu(10,10) af.lock_array(c) af.unlock_array(c) a = af.constant(1, 3, 3) b = af.constant(2, 3, 3) af.eval(a) af.eval(b) print_func(a) print_func(b) c = a + b d = a - b af.eval(c, d) print_func(c) print_func(d) print_func(af.set_manual_eval_flag(True)) assert(af.get_manual_eval_flag() == True) print_func(af.set_manual_eval_flag(False)) assert(af.get_manual_eval_flag() == False) display_func(af.is_locked_array(a))
def test_df_dp_background(): N = 32 * np.arange(1, 10) error_1 = np.zeros(N.size) error_2 = np.zeros(N.size) error_3 = np.zeros(N.size) for i in range(N.size): af.device_gc() obj = test(N[i]) calculate_dfdp_background(obj) dfdp1_expected = -2 * obj.p1 \ * af.exp(-obj.p1**2) \ * af.exp(-obj.p2**2) \ * af.exp(-obj.p3**2) dfdp2_expected = -2 * obj.p2 \ * af.exp(-obj.p1**2) \ * af.exp(-obj.p2**2) \ * af.exp(-obj.p3**2) dfdp3_expected = -2 * obj.p3 \ * af.exp(-obj.p1**2) \ * af.exp(-obj.p2**2) \ * af.exp(-obj.p3**2) af.eval(obj.dfdp1_background, obj.dfdp2_background, obj.dfdp3_background) error_1[i] = af.sum(af.abs(dfdp1_expected - obj.dfdp1_background)) \ / dfdp1_expected.elements() error_2[i] = af.sum(af.abs(dfdp2_expected - obj.dfdp2_background)) \ / dfdp2_expected.elements() error_3[i] = af.sum(af.abs(dfdp3_expected - obj.dfdp3_background)) \ / dfdp3_expected.elements() poly_1 = np.polyfit(np.log10(N), np.log10(error_1), 1) poly_2 = np.polyfit(np.log10(N), np.log10(error_2), 1) poly_3 = np.polyfit(np.log10(N), np.log10(error_3), 1) assert (abs(poly_1[0] + 4) < 0.2) assert (abs(poly_2[0] + 4) < 0.2) assert (abs(poly_3[0] + 4) < 0.2)
def check_error(params): error = np.zeros(N.size) for i in range(N.size): af.device_gc() domain.N_q1 = int(N[i]) domain.N_p1 = int(N[i]) # Defining the physical system to be solved: system = physical_system(domain, boundary_conditions, params, initialize, advection_terms, collision_operator.BGK, moments ) # Declaring a linear system object which will evolve the defined physical system: nls = nonlinear_solver(system) N_g = nls.N_ghost # Time parameters: dt = 0.001 * 32/nls.N_q1 t_final = 0.1 time_array = np.arange(dt, t_final + dt, dt) f_reference = af.broadcast(initialize.initialize_f, af.broadcast(lambda a, b:a+b, nls.q1_center, - nls.p1_center * t_final), nls.q2_center, nls.p1_center, nls.p2_center, nls.p3_center, params ) for time_index, t0 in enumerate(time_array): nls.strang_timestep(dt) error[i] = af.mean(af.abs( nls.f[:, :, N_g:-N_g, N_g:-N_g] - f_reference[:, :, N_g:-N_g, N_g:-N_g] ) ) return(error)
def test_f_interp_p_3d(): N = 2**np.arange(5, 9) error = np.zeros(N.size) for i in range(N.size): af.device_gc() obj = test(int(N[i])) f_interp_p_3d(obj, 1e-5) f_analytic = af.exp(- (obj.p1 - 1e-5)**2 - 2*(obj.p2 - 1e-5)**2 - 3*(obj.p3 - 1e-5)**2 ) error[i] = af.sum(af.abs(obj.f - f_analytic)) / f_analytic.elements() poly = np.polyfit(np.log10(N), np.log10(error), 1) assert(abs(poly[0] + 2)<0.2)
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert (k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert (mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert (mem_info['lock']['buffers'] == 1 + mem_info_old['lock']['buffers']) af.set_device(curr_dev) a = af.randu(10, 10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) af.lock_device_ptr(b) af.unlock_device_ptr(b)
def simple_device(verbose=False): display_func = _util.display_func(verbose) print_func = _util.print_func(verbose) print_func(af.device_info()) print_func(af.get_device_count()) print_func(af.is_dbl_supported()) af.sync() curr_dev = af.get_device() print_func(curr_dev) for k in range(af.get_device_count()): af.set_device(k) dev = af.get_device() assert(k == dev) print_func(af.is_dbl_supported(k)) af.device_gc() mem_info_old = af.device_mem_info() a = af.randu(100, 100) af.sync(dev) mem_info = af.device_mem_info() assert(mem_info['alloc']['buffers'] == 1 + mem_info_old['alloc']['buffers']) assert(mem_info[ 'lock']['buffers'] == 1 + mem_info_old[ 'lock']['buffers']) af.set_device(curr_dev) a = af.randu(10,10) display_func(a) dev_ptr = af.get_device_ptr(a) print_func(dev_ptr) b = af.Array(src=dev_ptr, dims=a.dims(), dtype=a.dtype(), is_device=True) display_func(b) af.lock_device_ptr(b) af.unlock_device_ptr(b)
from bolt.lib.nonlinear.nonlinear_solver import nonlinear_solver import domain import boundary_conditions import params import initialize import bolt.src.nonrelativistic_boltzmann.advection_terms as advection_terms import bolt.src.nonrelativistic_boltzmann.collision_operator as collision_operator import bolt.src.nonrelativistic_boltzmann.moments as moments N = np.array([32, 48, 64, 96, 112]) #, 128, 144, 160]) error = np.zeros(N.size) for i in range(N.size): af.device_gc() domain.N_q1 = int(N[i]) domain.N_q2 = int(N[i]) domain.N_p1 = int(N[i]) domain.N_p2 = int(N[i]) # Defining the physical system to be solved: system = physical_system(domain, boundary_conditions, params, initialize, advection_terms, collision_operator.BGK, moments )
def check_error(params): error = np.zeros(N.size) for i in range(N.size): af.device_gc() print(N[i]) N_r = int(N[i]) N_theta = int(N[i]) N_rdot = int(N[i]) N_thetadot = int(N[i]) N_phidot = 1 dr = (domain.q1_end - domain.q1_start) / N_r dtheta = (domain.q2_end - domain.q2_start) / N_theta drdot = (domain.p1_end[0] - domain.p1_start[0]) / N_rdot dthetadot = (domain.p2_end[0] - domain.p2_start[0]) / N_thetadot dphidot = (domain.p3_end[0] - domain.p3_start[0]) / N_phidot r = domain.q1_start + (0.5 + np.arange(N_r)) * dr theta = domain.q2_start + (0.5 + np.arange(N_theta)) * dtheta rdot = domain.p1_start[0] + (0.5 + np.arange(N_rdot)) * drdot thetadot = domain.p2_start[0] + (0.5 + np.arange(N_thetadot)) * dthetadot phidot = domain.p3_start[0] + (0.5 + np.arange(N_phidot)) * dphidot thetadot, rdot, phidot = np.meshgrid(thetadot, rdot, phidot) theta, r = np.meshgrid(theta, r) r = r.reshape(1, N_r, N_theta) theta = theta.reshape(1, N_r, N_theta) rdot = rdot.flatten('F').reshape(N_rdot * N_thetadot * N_phidot, 1, 1) thetadot = thetadot.flatten('F').reshape( N_rdot * N_thetadot * N_phidot, 1, 1) phidot = phidot.flatten('F').reshape(N_rdot * N_thetadot * N_phidot, 1, 1) # DEBUGGING: # h5f = h5py.File('data.h5', 'r') # q1r = ((h5f['q1'][:])[:, :, 1:-1, 1:-1]).reshape(1, 2, 3) # q2r = ((h5f['q2'][:])[:, :, 1:-1, 1:-1]).reshape(1, 2, 3) # p1r = (h5f['p1'][:]) # p2r = (h5f['p2'][:]) # p3r = (h5f['p3'][:]) # h5f.close() # print(np.sum(abs(q1r - r))) # print(np.sum(abs(q2r - theta))) # print(np.sum(abs(p1r - rdot))) # print(np.sum(abs(p2r - thetadot))) # print(np.sum(abs(p3r - phidot))) q1 = r * np.cos(theta) q2 = r * np.sin(theta) p1 = rdot * np.cos(theta) - r * np.sin(theta) * thetadot p2 = rdot * np.sin(theta) + r * np.cos(theta) * thetadot p3 = phidot f_reference = af.broadcast(initialize.initialize_f, af.to_array(q1 - p1 * params.t_final), af.to_array(q2 - p2 * params.t_final), af.to_array(p1), af.to_array(p2), af.to_array(p3), params) h5f = h5py.File('dump/%04d' % (int(N[i])) + '.h5', 'r') f = np.swapaxes(h5f['distribution_function'][:], 0, 2) h5f.close() error[i] = np.mean(abs(f - np.array(f_reference))) return (error)
def solve(self, initialization=None, iteration_count=10, display_iteration_delta=None, **kwargs): # Process display iteration delta if display_iteration_delta is None: display_iteration_delta = iteration_count // 10 # Try to import arrayfire and call garbage collection to free memory try: import arrayfire arrayfire.device_gc() except ImportError: pass # Initialize solver if it hasn't been already if not self.initialized: self._initialize(initialization, **kwargs) cost = [] # Run Algorithm for iteration in range(iteration_count): # Determine step norm if self.multi_objective: x_prev_norm = sum([yp.norm(x) for x in self.x]) else: x_prev_norm = yp.norm(self.x) # Perform iteration self.x = self._iteration_function(self.x, iteration, self.step_size) # Apply nesterov acceleration if desired if self.use_nesterov_acceleration: self.x = self.nesterov.iterate(self.x) # Store cost objective_value = self.objective( self.x) if not self.multi_objective else self.objective[0]( self.x[0]) cost.append(abs(yp.scalar(objective_value))) # Determine step norm if self.multi_objective: step_norm = abs( sum([yp.norm(x) for x in self.x]) - x_prev_norm) else: step_norm = abs(yp.norm(self.x) - x_prev_norm) # Show update if self.display_type == 'text': if (iteration + 1) % display_iteration_delta == 0: self.plot.update(iteration + 1, cost[-1], time.time() - self.t0, step_norm) elif self.display_type == 'plot': self.plot.update(iteration, new_cost=cost[-1]) self.fig.canvas.draw() elif self.display_type is not None: raise ValueError('display_type %s is not defined!' % self.display_type) # Check if converged or diverged if len(cost) > 2: if self.convergence_tol is not None and ( abs(cost[-1] - cost[-2]) / max(cost[-1], 1e-10) < self.convergence_tol or cost[-1] < 1e-20): print( "Met convergence requirement (delta < %.2E) at iteration %d" % (self.convergence_tol, iteration + 1)) return (self.x) elif cost[-1] > cost[-2] and not self.let_diverge: print("Diverged at iteration %d" % (iteration + 1)) return (self.x) return (self.x)
def test_fdtd_mode1_periodic(): N = np.array([128]) #2**np.arange(5, 8) error_B1 = np.zeros(N.size) error_B2 = np.zeros(N.size) error_E3 = np.zeros(N.size) for i in range(N.size): af.device_gc() dt = (1 / int(N[i])) * 1 / 2 time = np.arange(dt, 10 * 1 + dt, dt) params.dt = dt obj = test_periodic(int(N[i]), initialize_fdtd_mode1, params) N_g = obj.fields_solver.N_g E1_initial = obj.fields_solver.yee_grid_EM_fields[0].copy() E2_initial = obj.fields_solver.yee_grid_EM_fields[1].copy() E3_initial = obj.fields_solver.yee_grid_EM_fields[2].copy() B1_initial = obj.fields_solver.yee_grid_EM_fields[3].copy() B2_initial = obj.fields_solver.yee_grid_EM_fields[4].copy() B3_initial = obj.fields_solver.yee_grid_EM_fields[5].copy() # electric_energy = 1/4 * ( E3_initial**2 # Ez(i+1/2, j+1/2) # + af.shift(E3_initial, 0, 0, 1, 0)**2 # Ez(i-1/2, j+1/2) # + af.shift(E3_initial, 0, 0, 0, 1)**2 # Ez(i+1/2, j-1/2) # + af.shift(E3_initial, 0, 0, 1, 1)**2 # Ez(i-1/2, j-1/2) # ) # magnetic_energy_x = 0.5 * ( B1_n_plus_half * B1_n_minus_half # (i+1/2, j) # + af.shift(B1_n_plus_half * B1_n_minus_half, 0, 0, 1, 0) # (i-1/2, j) # ) # magnetic_energy_y = 0.5 * ( B2_n_plus_half * B2_n_minus_half # (i, j+1/2) # + af.shift(B2_n_plus_half * B2_n_minus_half, 0, 0, 0, 1) # (i, j-1/2) # ) energy = np.zeros([time.size]) B1_at_n_minus_half_i = obj.fields_solver.yee_grid_EM_fields[3].copy() B1_at_n_minus_half_i_plus_1 = af.shift(B1_at_n_minus_half_i, 0, 0, 0, -1) for time_index, t0 in enumerate(time): B1_at_n_plus_half_i = obj.fields_solver.yee_grid_EM_fields[3].copy( ) B1_at_n_plus_half_i_plus_1 = af.shift(B1_at_n_plus_half_i, 0, 0, 0, -1) E3_n = obj.fields_solver.yee_grid_EM_fields[2].copy() J1 = J2 = J3 = 0 * obj.fields_solver.q1_center**0 obj.fields_solver.evolve_electrodynamic_fields(J1, J2, J3, dt) E3_n_plus_1 = obj.fields_solver.yee_grid_EM_fields[2].copy() energy[time_index] = af.sum( (E3_n_plus_1 * B1_at_n_minus_half_i_plus_1 )[:, :, N_g:-N_g, N_g:-N_g]) * obj.fields_solver.dq1 * obj.fields_solver.dq2 B1_at_n_minus_half_i = B1_at_n_plus_half_i.copy() B1_at_n_minus_half_i_plus_1 = af.shift(B1_at_n_minus_half_i, 0, 0, 0, -1) # electric_energy = E3_at_n**2 # magnetic_energy_x = B1_n_plus_half * B1_n_minus_half # magnetic_energy_y = B2_n_plus_half * B2_n_minus_half # pl.plot(np.array(obj.fields_solver.q1_center).reshape(134, 9)[3:-3, 0], # np.array(obj.fields_solver.yee_grid_EM_fields[1]).reshape(134, 9)[3:-3, 0], # label = r'$Ey_{i+1/2}^n$') # pl.plot(np.array(obj.fields_solver.q1_center).reshape(134, 9)[3:-3, 0], # np.array(obj.fields_solver.yee_grid_EM_fields[5]).reshape(134, 9)[3:-3, 0], '--', # label = r'${Bz}_{i}^{n+1/2}$') # pl.legend(fontsize = 20) # pl.ylim([-2, 2]) # pl.title('Time = %.2f'%t0) # pl.savefig('images/%04d'%time_index + '.png') # pl.clf() # pl.contourf(np.array(obj.fields_solver.yee_grid_EM_fields[2]).reshape(134, 134), 40) # pl.savefig('images/%04d'%time_index + '.png') # pl.clf() # energy = af.sum((electric_energy + magnetic_energy_x + magnetic_energy_y)[:, :, N_g:-N_g, N_g:-N_g]) # if(time_index == 0): # error[time_index] = energy * obj.fields_solver.dq1 * obj.fields_solver.dq2 # else: # error[time_index] = abs((energy) * obj.fields_solver.dq1 * obj.fields_solver.dq2 - error[0]) import h5py h5f = h5py.File('data/Bi+n-_Ei+n+.h5', 'w') h5f.create_dataset('data', data=energy[1:]) h5f.create_dataset('time', data=time[1:]) h5f.close() # pl.plot(time[1:], energy[1:]) # pl.show() # # pl.ylabel(r'$B_z^{n+1/2}(i) \times (E_y^{n+1}(i+1/2) + E_y^{n}(i+1/2))$') # pl.xlabel('Time') # # pl.ylim([1e-15, 1e-9]) # pl.savefig('plot.png', bbox_inches = 'tight') error_B1[i] = af.sum( af.abs(obj.fields_solver.yee_grid_EM_fields[3, :, N_g:-N_g, N_g:-N_g] - B1_initial[:, :, N_g:-N_g, N_g:-N_g])) / ( B1_initial.elements()) error_B2[i] = af.sum( af.abs(obj.fields_solver.yee_grid_EM_fields[4, :, N_g:-N_g, N_g:-N_g] - B2_initial[:, :, N_g:-N_g, N_g:-N_g])) / ( B2_initial.elements()) error_E3[i] = af.sum( af.abs(obj.fields_solver.yee_grid_EM_fields[2, :, N_g:-N_g, N_g:-N_g] - E3_initial[:, :, N_g:-N_g, N_g:-N_g])) / ( E3_initial.elements()) poly_B1 = np.polyfit(np.log10(N), np.log10(error_B1), 1) poly_B2 = np.polyfit(np.log10(N), np.log10(error_B2), 1) poly_E3 = np.polyfit(np.log10(N), np.log10(error_E3), 1) print(error_B1) print(error_B2) print(error_E3) print(poly_B1) print(poly_B2) print(poly_E3) pl.loglog(N, error_B1, '-o', label=r'$B_x$') pl.loglog(N, error_B2, '-o', label=r'$B_y$') pl.loglog(N, error_E3, '-o', label=r'$E_z$') pl.loglog(N, error_B2[0] * 32**2 / N**2, '--', color='black', label=r'$O(N^{-2})$') pl.xlabel(r'$N$') pl.ylabel('Error') pl.legend() pl.savefig('convergenceplot.png') assert (abs(poly_B1[0] + 2) < 0.2) assert (abs(poly_B2[0] + 2) < 0.2) assert (abs(poly_E3[0] + 2) < 0.2)
def _solveFirstOrderGradient(self, measurements, verbose, callback=None): """ MAIN part of the solver, runs the FISTA algorithm configs: configs object from class AlgorithmConfigs measurements: all measurements self.configs.recon_from_field == True: field self.configs.recon_from_field == False: amplitude measurement verbose: boolean variable to print verbosely """ flag_FISTA = False if self.configs.method == "FISTA": flag_FISTA = True # update multiple angles at a time batch_update = False if self.configs.fista_global_update or self.configs.batch_size != 1: gradient_batch = af.constant(0.0, self.phase_obj_3d.shape[0],\ self.phase_obj_3d.shape[1],\ self.phase_obj_3d.shape[2], dtype = af_complex_datatype) batch_update = True if self.configs.fista_global_update: self.configs.batch_size = 0 #TODO: what if num_batch is not an integer if self.configs.batch_size == 0: num_batch = 1 else: num_batch = self.number_illum // self.configs.batch_size stepsize = self.configs.stepsize max_iter = self.configs.max_iter reg_term = self.configs.reg_term self.configs.error = [] obj_gpu = af.constant(0.0, self.phase_obj_3d.shape[0],\ self.phase_obj_3d.shape[1],\ self.phase_obj_3d.shape[2], dtype = af_complex_datatype) #Initialization for FISTA update if flag_FISTA: restart = self.configs.restart y_k = self._x.copy() t_k = 1.0 #Set Callback flag if callback is None: run_callback = False else: run_callback = True #Start of iterative algorithm with contexttimer.Timer() as timer: if verbose: print("---- Start of the %5s algorithm ----" %(self.scat_model)) for iteration in range(max_iter): illu_counter = 0 cost = 0.0 obj_gpu[:] = af.to_array(self._x) if self.configs.random_order: illu_order = np.random.permutation(range(self.number_illum)) else: illu_order = range(self.number_illum) for batch_idx in range(num_batch): if batch_update: gradient_batch[:,:,:] = 0.0 if self.configs.batch_size == 0: illu_indices = illu_order else: illu_indices = illu_order[batch_idx * self.configs.batch_size : (batch_idx+1) * self.configs.batch_size] for illu_idx in illu_indices: #forward scattering fx_illu = self.fx_illu_list[illu_idx] fy_illu = self.fy_illu_list[illu_idx] fields = self._forwardMeasure(fx_illu, fy_illu, obj = obj_gpu) #calculate error measurement = af.to_array(measurements[:,:,:,illu_idx].astype(np_complex_datatype)) if self.configs.recon_from_field: residual = fields["forward_scattered_field"] - measurement else: if self.configs.cost_criterion == "intensity": residual = af.abs(fields["forward_scattered_field"])**2 - measurement**2 elif self.configs.cost_criterion == "amplitude": residual = af.abs(fields["forward_scattered_field"]) - measurement cost += af.sum(residual*af.conjg(residual)).real #calculate gradient if batch_update: gradient_batch[:, :, :] += self._computeGradient(fields, measurement)[0] else: gradient = self._computeGradient(fields, measurement) obj_gpu[:, :, :] -= stepsize * gradient if verbose: if self.number_illum > 1: print("gradient update of illumination {:03d}/{:03d}.".format(illu_counter, self.number_illum), end="\r") illu_counter += 1 fields = None residual = None gradient = None measurement = None pupil = None af.device_gc() if batch_update: obj_gpu[:, :, :] -= stepsize * gradient_batch if np.isnan(obj_gpu).sum() > 0: stepsize *= 0.1 self.configs.time_elapsed = timer.elapsed print("WARNING: Gradient update diverges! Resetting stepsize to %3.2f" %(stepsize)) t_k = 1.0 continue # L2 regularizer obj_gpu[:, :, :] -= stepsize * reg_term * obj_gpu #record total error self.configs.error.append(cost + reg_term * af.sum(obj_gpu*af.conjg(obj_gpu)).real) #Prox operators af.device_gc() obj_gpu = self._regularizer_obj.applyRegularizer(obj_gpu) if flag_FISTA: #check convergence if iteration > 0: if self.configs.error[-1] > self.configs.error[-2]: if restart: t_k = 1.0 self._x[:, :, :] = y_k # stepsize *= 0.8 print("WARNING: FISTA Restart! Error: %5.5f" %(np.log10(self.configs.error[-1]))) if run_callback: callback(self._x, self.configs) continue else: print("WARNING: Error increased! Error: %5.5f" %(np.log10(self.configs.error[-1]))) #FISTA auxiliary variable y_k1 = np.array(obj_gpu) if len(y_k1.shape) < 3: y_k1 = y_k1[:,:,np.newaxis] #FISTA update t_k1 = 0.5*(1.0 + (1.0 + 4.0*t_k**2)**0.5) beta = (t_k - 1.0) / t_k1 self._x[:, :, :] = y_k1 + beta * (y_k1 - y_k) t_k = t_k1 y_k = y_k1.copy() else: #check convergence temp = np.array(obj_gpu) if len(temp.shape) < 3: temp = temp[:,:,np.newaxis] self._x[:, :, :] = temp if iteration > 0: if self.configs.error[-1] > self.configs.error[-2]: print("WARNING: Error increased! Error: %5.5f" %(np.log10(self.configs.error[-1]))) stepsize *= 0.8 if verbose: print("iteration: %d/%d, error: %5.5f, elapsed time: %5.2f seconds" %(iteration+1, max_iter, np.log10(self.configs.error[-1]), timer.elapsed)) if run_callback: callback(self._x, self.configs) self.configs.time_elapsed = timer.elapsed return self._x
def check_error(params): error = np.zeros(N.size) for i in range(N.size): af.device_gc() print(N[i]) N_q1 = int(N[i]) N_q2 = int(N[i]) N_p1 = int(N[i]) N_p2 = int(N[i]) N_p3 = 1 N_g = domain.N_ghost dq1 = (domain.q1_end - domain.q1_start) / N_q1 dq2 = (domain.q2_end - domain.q2_start) / N_q2 dp1 = (domain.p1_end[0] - domain.p1_start[0]) / N_p1 dp2 = (domain.p2_end[0] - domain.p2_start[0]) / N_p2 dp3 = (domain.p3_end[0] - domain.p3_start[0]) / N_p3 q1 = domain.q1_start + (0.5 + np.arange(N_q1)) * dq1 q2 = domain.q2_start + (0.5 + np.arange(N_q2)) * dq2 p1 = domain.p1_start[0] + (0.5 + np.arange(N_p1)) * dp1 p2 = domain.p2_start[0] + (0.5 + np.arange(N_p2)) * dp2 p3 = domain.p3_start[0] + (0.5 + np.arange(N_p3)) * dp3 p2, p1, p3 = np.meshgrid(p2, p1, p3) q2, q1 = np.meshgrid(q2, q1) q1 = q1.reshape(1, N_q1, N_q2) q2 = q2.reshape(1, N_q1, N_q2) p1 = p1.reshape(N_p1 * N_p2 * N_p3, 1, 1) p2 = p2.reshape(N_p1 * N_p2 * N_p3, 1, 1) p3 = p3.reshape(N_p1 * N_p2 * N_p3, 1, 1) h5f = h5py.File('dump/%04d' % (int(N[i])) + '.h5', 'r') f = np.swapaxes(np.swapaxes(h5f['distribution_function'][:], 0, 2), 1, 2) h5f.close() q1_new = af.to_array(q1 - p1 * params.t_final) q2_new = af.to_array(q2 - p2 * params.t_final) # Periodic B.Cs for j in range(5): q1_new = af.select(q1_new < 0, q1_new + 1, q1_new) q2_new = af.select(q2_new < 0, q2_new + 1, q2_new) q1_new = af.select(q1_new > 1, q1_new - 1, q1_new) q2_new = af.select(q2_new > 1, q2_new - 1, q2_new) f_reference = af.broadcast(initialize.initialize_f, q1_new, q2_new, af.to_array(p1), af.to_array(p2), af.to_array(p3), params) error[i] = np.mean(abs(f - np.array(f_reference))) return (error)
def run_cases(q_dim, p_dim, charge_electron, tau): params.charge[0] = charge_electron params.tau = tau # Running the setup for all resolutions: for i in range(N.size): af.device_gc() domain.N_q1 = int(N[i]) if(q_dim == 2): domain.N_q2 = int(N[i]) params.k_q2 = 4 * np.pi if(p_dim == 2): domain.N_p2 = 32 domain.p2_start = -10 domain.p2_end = 10 if(p_dim == 3): domain.N_p3 = 32 domain.p3_start = -10 domain.p3_end = 10 if(charge_electron != 0): domain.N_p1 = int(N[i]) if(p_dim == 2): domain.N_p2 = int(N[i]) if(p_dim == 3): domain.N_p3 = int(N[i]) params.p_dim = p_dim dt = 1e-3/(2**i) # Defining the physical system to be solved: system = physical_system(domain, boundary_conditions, params, initialize, advection_terms, collision_operator.BGK, moments ) # linearized_system = physical_system(domain, # boundary_conditions, # params, # initialize, # advection_terms, # collision_operator.linearized_BGK, # moments # ) # Declaring a linear system object which will # evolve the defined physical system: nls = nonlinear_solver(system) ls = linear_solver(system) time_array = np.arange(dt, t_final + dt, dt) for time_index, t0 in enumerate(time_array): nls.strang_timestep(dt) ls.RK4_timestep(dt) nls.dump_distribution_function('dump_files/nlsf_' + str(N[i])) ls.dump_distribution_function('dump_files/lsf_' + str(N[i]))