def main(nx, ny, nz, num_iter, num_halo=2, plot_result=False): """Driver for apply_diffusion that sets up fields and does timings""" assert 0 < nx <= 1024 * 1024, 'You have to specify a reasonable value for nx' assert 0 < ny <= 1024 * 1024, 'You have to specify a reasonable value for ny' assert 0 < nz <= 1024, 'You have to specify a reasonable value for nz' assert 0 < num_iter <= 1024 * 1024, 'You have to specify a reasonable value for num_iter' assert 0 < num_halo <= 256, 'Your have to specify a reasonable number of halo points' alpha = 1. / 32. comm = MPI.COMM_WORLD rank = comm.Get_rank() p = Partitioner(comm, [nz, ny, nx], num_halo) if rank == 0: f = np.zeros((nz, ny + 2 * num_halo, nx + 2 * num_halo)) # Option 1: Original stencil2d-mpi during HPC4WC course: # f[nz // 4:3 * nz // 4, num_halo + ny // 4:num_halo + 3 * ny // 4, num_halo + nx // 4:num_halo + 3 * nx // 4] = 1.0 # Option 2: Similar to option 1, but positive region extended towards tile edges: # f[nz // 10:9 * nz // 10, num_halo + ny // 10:num_halo + 9 * ny // 10, num_halo + nx // 10:num_halo + 9 * nx // 10] = 1.0 # Option 3: One positive region in bottom-left (0-0) corner, one positive region in top-right (ny-nx) corner # f[nz // 4:3 * nz // 4, num_halo:num_halo + ny // 4, num_halo:num_halo + nx // 4] = 1.0 # f[nz // 4:3 * nz // 4, num_halo + 3 * ny // 4:-num_halo, num_halo + 3 * nx // 4:-num_halo] = 1.0 # Option 4: Positive region line prime number fraction off-center across tile: f[nz // 4:3 * nz // 4, num_halo + ny // 7:num_halo + 2 * ny // 7, num_halo:-num_halo] = 1.0 else: f = np.empty(1) in_field = p.scatter(f) out_field = np.copy(in_field) f = p.gather(in_field) if rank == 0: np.save('in_field', f) if plot_result: plt.ioff() plt.imshow(f[in_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('in_field.png') plt.close() # warmup caches apply_diffusion(in_field, out_field, alpha, num_halo, p=p) comm.Barrier() # time the actual work tic = time.time() apply_diffusion(in_field, out_field, alpha, num_halo, num_iter=num_iter, p=p) toc = time.time() comm.Barrier() if rank == 0: print("Elapsed time for work = {} s".format(toc - tic)) update_halo(out_field, num_halo, p) f = p.gather(out_field) if rank == 0: np.save('out_field', f) if plot_result: plt.imshow(f[out_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('out_field.png') plt.close()
result = _nanmean(data_work[:, :, :], mask[:, :, :]) #result = _nanmean(data.values[1,:,:,:], mask.values[1,:,:,:]) #result = _nanmean(data.values[2,:,:,:], mask.values[2,:,:,:]) toc = datetime.now() print(f'this filter function took {toc-tic}') """ tic = datetime.now() # slightly slower result = nanmean(data.values[0,:,:,:], mask.values[0,:,:,:]) result = nanmean(data.values[1,:,:,:], mask.values[1,:,:,:]) result = nanmean(data.values[2,:,:,:], mask.values[2,:,:,:]) toc = datetime.now() print(f'this filter function took {toc-tic}') """ # wait here until each process has finished before getting the results and testing (which breaks everything when an error occurs comm.Barrier() data = p.gather(result) if rank == 0: data_out = data_orig.fillna(data) # test if results are the same as in "ground truth" from unittest_simple import test_simple res = xr.open_dataarray('baseline_result.nc') test_simple(data_out, res) # test fails bec one dim missing # my PhD Project goes on with: # gapfill each variable by regressing over all the others # in an iterative EM-like fashion # with spatiotemporal gapfill as initial guess # until estimates for missing values converge
def main(nx, ny, nz, num_iter, num_halo=2, plot_result=False): """Driver for apply_diffusion that sets up fields and does timings""" assert 0 < nx <= 1024 * 1024, 'You have to specify a reasonable value for nx' assert 0 < ny <= 1024 * 1024, 'You have to specify a reasonable value for ny' assert 0 < nz <= 1024, 'You have to specify a reasonable value for nz' assert 0 < num_iter <= 1024 * 1024, 'You have to specify a reasonable value for num_iter' assert 0 < num_halo <= 256, 'Your have to specify a reasonable number of halo points' alpha = 1. / 32. comm = MPI.COMM_WORLD rank = comm.Get_rank() p = Partitioner(comm, [nz, ny, nx], num_halo) if rank == 0: f = np.zeros((nz, ny + 2 * num_halo, nx + 2 * num_halo)) f[nz // 4:3 * nz // 4, num_halo + ny // 4:num_halo + 3 * ny // 4, num_halo + nx // 4:num_halo + 3 * nx // 4] = 1.0 else: f = np.empty(1) in_field = p.scatter(f) out_field = np.copy(in_field) f = p.gather(in_field) if rank == 0: np.save('in_field', f) if plot_result: plt.ioff() plt.imshow(f[in_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('in_field.png') plt.close() # warmup caches apply_diffusion(in_field, out_field, alpha, num_halo, p=p) comm.Barrier() # time the actual work tic = time.time() apply_diffusion(in_field, out_field, alpha, num_halo, num_iter=num_iter, p=p) toc = time.time() comm.Barrier() if rank == 0: print("Elapsed time for work = {} s".format(toc - tic)) update_halo(out_field, num_halo, p) f = p.gather(out_field) if rank == 0: np.save('out_field', f) if plot_result: plt.imshow(f[out_field.shape[0] // 2, :, :], origin='lower') plt.colorbar() plt.savefig('out_field.png') plt.close()