def sim_health_index(n_runs): # Set up OpenCL context and command queue ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue)) t0 = time.time() rho = 0.5 mu = 3.0 sigma = 1.0 z_0 = mu # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps n_steps = int(4160) #4160 rand_gen = clrand.PhiloxGenerator(ctx) ran = rand_gen.normal(queue, (n_runs * n_steps), np.float32, mu=0, sigma=1.0) # Establish boundaries for each simulated walk (i.e. start and end) # Necessary so that we perform scan only within rand walks and not between seg_boundaries = [1] + [0] * (n_steps - 1) seg_boundaries = np.array(seg_boundaries, dtype=np.uint8) seg_boundary_flags = np.tile(seg_boundaries, int(n_runs)) seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags) # GPU: Define Segmented Scan Kernel, scanning simulations: f(n-1) + f(n) prefix_sum = GenericScanKernel( ctx, np.float32, arguments="__global float *ary, __global char *segflags, " "__global float *out, float rho, float mu", input_expr="segflags[i] ? (ary[i]+mu):(ary[i]+(1-rho)*mu)", scan_expr="across_seg_boundary ? (b):(rho*a+b)", neutral="0", is_segment_start_expr="segflags[i]", output_statement="out[i] = item", options=[]) dev_result = cl_array.arange(queue, len(ran), dtype=np.float32, allocator=mem_pool) # Enqueue and Run Scan Kernel prefix_sum(ran, seg_boundary_flags, dev_result, rho, mu) # Get results back on CPU to plot and do final calcs, just as in Lab 1 health_index_all = (dev_result.get().reshape(n_runs, n_steps).transpose()) final_time = time.time() time_elapsed = final_time - t0 print("Simulated %d Health Index in: %f seconds" % (n_runs, time_elapsed)) #print(health_index_all) #print(ran.reshape(n_runs, n_steps).transpose()) #plt.plot(health_index_all) return
def normal(shape: Union[tuple[int, ...], int] = (1, 1), gpu=False) -> Tensor: """Draw random samples from a normal (Gaussian) distribution.""" if gpu: return Tensor( clrandom.PhiloxGenerator(CONTEXT).normal(cq=QUEUE, shape=shape, dtype=np.float32), gpu=True, ) return Tensor(np.random.normal(size=shape).astype(np.float32))
def sim_lifetime(S, T): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) t0 = time.time() rand_gen = clrand.PhiloxGenerator(ctx, seed=25) eps_mat = rand_gen.normal(queue, (S*T), np.float32, mu=0, sigma=1) z_row = np.array(([3] + [0] * (T-1)), dtype=np.float32) z_mat = np.tile(z_row, int(S)) z_mat = cl_array.to_device(queue, z_mat) seg_boundaries = [1] + [0]*(T-1) seg_boundaries = np.array(seg_boundaries, dtype=np.uint8) seg_boundary_flags = np.tile(seg_boundaries, int(S)) seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags) prefix_sum = GenericScanKernel(ctx, np.float32, arguments="__global float *ary, __global char *segflags, " "__global float *eps, __global float *out, __global float r", input_expr="ary[i] + eps[i] + 3*(1-r)", scan_expr="across_seg_boundary ? b : (r*a+b)", neutral="0", is_segment_start_expr="segflags[i]", output_statement="out[i] = item", options=[]) rho_neg_tracker = [] for r in np.linspace(-0.95, 0.95, 200): dev_result = cl_array.empty_like(eps_mat) prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result, r) simulation_all = (dev_result.get().reshape(S, T)) neg_mean = avg_first_negative(simulation_all) rho_neg_tracker.append([r, neg_mean]) best_rho = find_best_rho(rho_neg_tracker) time_elapsed = time.time() - t0 print('Time taken to run: {}'.format(time_elapsed)) print('Best Rho Value: {}'.format(best_rho[0])) print('Max period: {}'.format(best_rho[1])) return
def sim_lifetime(S, T): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) t0 = time.time() rand_gen = clrand.PhiloxGenerator(ctx, seed=25) eps_mat = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1) z_row = np.array(([3] + [0] * (T - 1)), dtype=np.float32) z_mat = np.tile(z_row, int(S)) z_mat = cl_array.to_device(queue, z_mat) seg_boundaries = [1] + [0] * (T - 1) seg_boundaries = np.array(seg_boundaries, dtype=np.uint8) seg_boundary_flags = np.tile(seg_boundaries, int(S)) seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags) prefix_sum = GenericScanKernel( ctx, np.float32, arguments="__global float *ary, __global char *segflags, " "__global float *eps, __global float *out", input_expr="ary[i] + eps[i] + 3*(1-0.5)", scan_expr="across_seg_boundary ? b : (0.5*a+b)", neutral="0", is_segment_start_expr="segflags[i]", output_statement="out[i] = item", options=[]) dev_result = cl_array.empty_like(eps_mat) prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result) simulation_all = (dev_result.get().reshape(S, T).transpose()) average_finish = np.mean(simulation_all[-1]) std_finish = np.std(simulation_all[-1]) final_time = time.time() time_elapsed = final_time - t0 print("Simulated %d lifetimes in: %f seconds" % (S, time_elapsed)) print("Average final health score: %f, Standard Deviation: %f" % (average_finish, std_finish)) return
def main(): t0 = time.time() S = 2 T = 50 ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) rand_gen = clrand.PhiloxGenerator(ctx) ran = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1) print( optimize.minimize(get_neg_indx, x0=0.1, args=ran, method='L-BFGS-B', bounds=((-0.95, 0.95), ), options={'eps': 0.001})) time_elapsed = time.time() - t0 print("Time used: %d" % (time_elapsed))
def sim_lifetime(rho): ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) t0 = time.time() S = 1000 T = int(4160) rand_gen = clrand.PhiloxGenerator(ctx, seed=25) eps_mat = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1) z_row = np.array(([3] + [0] * (T - 1)), dtype=np.float32) z_mat = np.tile(z_row, int(S)) z_mat = cl_array.to_device(queue, z_mat) seg_boundaries = [1] + [0] * (T - 1) seg_boundaries = np.array(seg_boundaries, dtype=np.uint8) seg_boundary_flags = np.tile(seg_boundaries, int(S)) seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags) prefix_sum = GenericScanKernel( ctx, np.float32, arguments="__global float *ary, __global char *segflags, " "__global float *eps, __global float *out, __global float r", input_expr="ary[i] + eps[i] + 3*(1-r)", scan_expr="across_seg_boundary ? b : (r*a+b)", neutral="0", is_segment_start_expr="segflags[i]", output_statement="out[i] = item", options=[]) dev_result = cl_array.empty_like(eps_mat) prefix_sum(z_mat, seg_boundary_flags, eps_mat, dev_result, rho) simulation_all = (dev_result.get().reshape(S, T).transpose()) avg_first_neg = avg_first_negative(simulation_all) return -avg_first_neg # turned negative for minimization
def uniform( shape: Union[tuple[int, ...], int] = (1, 1), min: float = 0.0, max: float = 1.0, gpu=False, ) -> Tensor: """Draw samples from a uniform distribution.""" if gpu: return Tensor( clrandom.PhiloxGenerator(CONTEXT).uniform(cq=QUEUE, shape=shape, dtype=np.float32, a=min, b=max), gpu=True, ) return Tensor( np.random.uniform(min, max, size=shape).astype(np.float32))
def sim_rand_walks(n_runs): # Set up context and command queue ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) # mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue)) t0 = time.time() # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps n_steps = 100 rand_gen = clrand.PhiloxGenerator(ctx) ran = rand_gen.normal(queue, (n_runs * n_steps), np.float32, mu=0, sigma=1) # Establish boundaries for each simulated walk (i.e. start and end) # Necessary so that we perform scan only within rand walks and not between seg_boundaries = [1] + [0] * (n_steps - 1) seg_boundaries = np.array(seg_boundaries, dtype=np.uint8) seg_boundary_flags = np.tile(seg_boundaries, int(n_runs)) seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags) # GPU: Define Segmented Scan Kernel, scanning simulations: f(n-1) + f(n) prefix_sum = GenericScanKernel( ctx, np.float32, arguments="__global float *ary, __global char *segflags, " "__global float *out", input_expr="ary[i]", scan_expr="across_seg_boundary ? b : (a+b)", neutral="0", is_segment_start_expr="segflags[i]", output_statement="out[i] = item + 100", options=[]) # Allocate space for result of kernel on device ''' Note: use a Memory Pool (commented out above and below) if you're invoking multiple times to avoid wasting time creating brand new memory areas each time you invoke the kernel: https://documen.tician.de/pyopencl/tools.html ''' # dev_result = cl_array.arange(queue, len(ran), dtype=np.float32, # allocator=mem_pool) dev_result = cl_array.empty_like(ran) # Enqueue and Run Scan Kernel prefix_sum(ran, seg_boundary_flags, dev_result) # Get results back on CPU to plot and do final calcs, just as in Lab 1 r_walks_all = (dev_result.get().reshape(n_runs, n_steps).transpose()) average_finish = np.mean(r_walks_all[-1]) std_finish = np.std(r_walks_all[-1]) final_time = time.time() time_elapsed = final_time - t0 print("Simulated %d Random Walks in: %f seconds" % (n_runs, time_elapsed)) print("Average final position: %f, Standard Deviation: %f" % (average_finish, std_finish)) # Plot Random Walk Paths ''' Note: Scan already only starts scanning at the second entry, but for the sake of the plot, let's set all of our random walk starting positions to 100 and then plot the random walk paths. ''' r_walks_all[0] = [100] * n_runs plt.plot(r_walks_all) plt.savefig("r_walk_nruns%d_gpu.png" % n_runs) return
from pyopencl.scan import GenericScanKernel import matplotlib.pyplot as plt import time ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) t0 = time.time() np.random.seed(25) rho = 0.5 sigma = 1.0 z_0 = 3 S = 1000 T = int(4160) rand_gen = clrand.PhiloxGenerator(ctx) ran = rand_gen.normal(queue, (S * T), np.float32, mu=0, sigma=1) seg_boundaries = [1] + [0] * (T - 1) seg_boundaries = np.array(seg_boundaries, dtype=np.uint8) seg_boundary_flags = np.tile(seg_boundaries, int(S)) seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags) prefix_sum = GenericScanKernel( ctx, np.float32, arguments="__global float *ary, __global char *segflags, " "__global float *out", input_expr="segflags[i] == 1 ? ary[i]: ary[i]", scan_expr="across_seg_boundary ? b : (a*0.5+b+1.5)", neutral="0",
def sim_health_index(n_runs): # Set up OpenCL context and command queue ctx = cl.create_some_context() queue = cl.CommandQueue(ctx) mem_pool = cltools.MemoryPool(cltools.ImmediateAllocator(queue)) t0 = time.time() rho = 0.5 mu = 3.0 sigma = 1.0 z_0 = mu # Generate an array of Normal Random Numbers on GPU of length n_sims*n_steps n_steps = int(4160) #4160 rand_gen = clrand.PhiloxGenerator(ctx) ran = rand_gen.normal(queue, (n_runs * n_steps), np.float32, mu=0, sigma=1.0) # Establish boundaries for each simulated walk (i.e. start and end) # Necessary so that we perform scan only within rand walks and not between seg_boundaries = [1] + [0] * (n_steps - 1) seg_boundaries = np.array(seg_boundaries, dtype=np.uint8) seg_boundary_flags = np.tile(seg_boundaries, int(n_runs)) seg_boundary_flags = cl_array.to_device(queue, seg_boundary_flags) # GPU: Define Segmented Scan Kernel, scanning simulations: rho*f(n-1)+(1-rho)*mu+ran # also output whether the value is smaller than 0 or not prefix_sum = GenericScanKernel( ctx, np.float32, arguments="__global float *ary, __global char *segflags, " "__global float *out, float rho, float mu", input_expr="segflags[i] ? (ary[i]+mu):(ary[i]+(1-rho)*mu)", scan_expr="across_seg_boundary ? (b):(rho*a+b)", neutral="0", is_segment_start_expr="segflags[i]", output_statement="out[i] =(item>0) ? (0):(1)", options=[]) dev_result = cl_array.arange(queue, len(ran), dtype=np.float32, allocator=mem_pool) # print time of GPU simulation #sim_time = time.time() #time_elapsed = sim_time - t0 #print("Simulated %d Health Index in: %f seconds"% (n_runs, time_elapsed)) # Iterate For 200 rho values rho_set = np.linspace(-0.95, 0.95, 200) rho_avgt_t = [] for rho in rho_set: #Enqueue and Run Scan Kernel #print(rho) prefix_sum(ran, seg_boundary_flags, dev_result, rho, mu) # Get results back on CPU to plot and do final calcs, just as in Lab 1 health_index_all = (dev_result.get().reshape(n_runs, n_steps)) # Find and averaged the index of first negative values across simulations t_all = [] for s in health_index_all: if 1 in s: s = list(s) t = s.index(1) else: t = n_steps t_all.append(t) #print(len(t_all)) avg_t = sum(t_all) / len(t_all) rho_avgt_t.append(avg_t) final_time = time.time() time_elapsed = final_time - t0 print("Simulated %d Health Index for 200 rho values in: %f seconds" % (n_runs, time_elapsed)) plt.plot(rho_set, rho_avgt_t) plt.title('Averaged periods of first negative index across Rho') plt.xlabel('Rho') plt.ylabel('Avged Period of first negative index') plt.savefig("GPU_rho_avgt_nruns%d.png" % (n_runs)) max_period = max(rho_avgt_t) max_rho = rho_set[rho_avgt_t.index(max_period)] print("Max Period: %f; Max Rho: %f." % (max_period, max_rho)) return
jt_sym = sym.make_sym_vector("jt", 2) rho_sym = sym.var("rho") from pytential.symbolic.pde.maxwell import ( PECChargeCurrentMFIEOperator, get_sym_maxwell_point_source, get_sym_maxwell_plane_wave) mfie = PECChargeCurrentMFIEOperator() test_source = case.get_source(actx) calc_patch = CalculusPatch(np.array([-3, 0, 0]), h=0.01) calc_patch_tgt = PointsTarget(actx.from_numpy(calc_patch.points)) import pyopencl.clrandom as clrandom rng = clrandom.PhiloxGenerator(actx.context, seed=12) from pytools.obj_array import make_obj_array src_j = make_obj_array([ rng.normal(actx.queue, (test_source.ndofs), dtype=np.float64) for _ in range(3)]) def eval_inc_field_at(places, source=None, target=None): if source is None: source = "test_source" if use_plane_wave: # plane wave return bind(places, get_sym_maxwell_plane_wave( amplitude_vec=np.array([1, 1, 1]),