def test_profiling(self): with cuda.profiling(): a = cuda.device_array(10) del a with cuda.profiling(): a = cuda.device_array(100) del a
if flat_slice[j] < flat_slice[j + 1]: flat_slice[j], flat_slice[j + 1] = flat_slice[j + 1], flat_slice[j] if slice.size % 2 == 0: upper = int(slice.size / 2) lower = upper - 1 out[x, y, z] = (flat_slice[upper] + flat_slice[lower]) / 2 else: out[x, y, z] = flat_slice[slice.size // 2] im_array = np.array(Image.open("lab02img.bmp")).astype(np.uint8) with allocated_gpu() as gpu: threadsperblock = ( math.floor(math.sqrt(gpu.MAX_THREADS_PER_BLOCK / (2 * im_array.shape[2]))), math.floor(math.sqrt(gpu.MAX_THREADS_PER_BLOCK / (2 * im_array.shape[2]))), im_array.shape[2] ) blockspergrid = ( math.ceil(im_array.shape[0] / threadsperblock[0]), math.ceil(im_array.shape[1] / threadsperblock[1]) ) result = np.zeros(shape=(im_array.shape[0], im_array.shape[1], im_array.shape[2])) with cuda.profiling(), timed("on GPU"): calculate_gpu[blockspergrid, threadsperblock](im_array, result) Image.fromarray(result.astype(np.uint8), 'RGB').save('lab02result.bmp')
N_runs = int(T // t_run) #Adjust t_run: t_run = T / N_runs else: N_runs = 1 t_run = T #print('N_runs =', N_runs) #Mean number of encounters in time t_run N_mean = t_run * encounterRate(n_p, v_rel, b_min, b_max_max, v_min, v_max) print('Evolving population, start time:', datetime.datetime.now()) for i in range(N_runs): #Number of encounters N_enc = np.random.poisson(N_mean, size=N_bin) rng_states = create_xoroshiro128p_states(threadsperblock * blockspergrid, seed=1) with cuda.profiling(): run_encounters[blockspergrid, threadsperblock](a, e, m1, m2, M_p, v_rel, a_T, b_max_max, v_min, v_max, N_enc, delta, G_new, N_broken, rng_states) e = e[np.where(a > 0.0)] a = a[np.where(a > 0.0)] #N_broken = N_bin - np.size(a) #print('a_final/au =', a) #print('e_final =', e) print('Finish time:', datetime.datetime.now()) print('N_broken =', N_broken) print('Saving') np.savez('simulation_GPU_{}Msol_{}e{}.npz'.format( int(M_p * mass_unit / (2.0 * 10.0**30.0)),