PHI [i ,j ,k ], PHI [i ,j ,k+1], PHI [i ,j ,k-1], PHIVB [i ,j ,k ], PHIVB [i ,j ,k+1], KHEAT [i ,j ,k ], KHEAT [i ,j ,k+1], RHO [i ,j ,k ], RHOVB [i ,j ,k ], RHOVB [i ,j ,k+1], SLHFLX [i ,j ,0 ], A[i ,j ,0], dsigma[0 ,0 ,k], moist_dif_coef[0 ,0 ,k], k) if gpu_enable: moist_tendency_gpu = cuda.jit(cuda_kernel_decorator(launch_cuda_kernel))\ (launch_cuda_kernel) ############################################################################### ### SPECIALIZE FOR CPU ############################################################################### hor_adv = njit(hor_adv_py) comp_VARVB_log = njit(comp_VARVB_log_py) vert_adv = njit(vert_adv_py) num_dif = njit(num_dif_pw_py) turb_flux_tendency = njit(turb_flux_tendency_py) add_up_tendencies = njit(add_up_tendencies_py) def launch_numba_cpu(A, dsigma, moist_dif_coef,
# 2D COLP [i ,j ,0 ], COLP [i ,j-1,0 ], # GR horizontal corf [i ,j ,0 ], corf [i ,j-1,0 ], lat_rad [i ,j ,0 ], lat_rad [i ,j-1,0 ], dlon_rad [i ,j ,0 ], dlat_rad [i ,j ,0 ], dxjs [i ,j ,0 ], # GR vertical dsigma [0 ,0 ,k ], sigma_vb [0 ,0 ,k ], sigma_vb [0 ,0 ,k+1], UVFLX_dif_coef[0,0,k], k, i) if gpu_enable: VFLX_tendency_gpu = cuda.jit(cuda_kernel_decorator( launch_cuda_main_kernel))(launch_cuda_main_kernel) #################################################################### ### SPECIALIZE FOR CPU #################################################################### UVFLX_hor_adv = njit(UVFLX_hor_adv_py) interp_VAR_ds = njit(interp_VAR_ds_py) coriolis_and_spherical_VWIND = njit(coriolis_and_spherical_VWIND_py) pre_grad = njit(pre_grad_py) num_dif = njit(num_dif_py) add_up_tendencies = njit(add_up_tendencies_py) def launch_numba_cpu_main(dVFLXdt, VFLX, UWIND, VWIND, RFLX_3D, SFLX_3D, TFLX_3D, QFLX_3D, PHI, PHIVB, COLP, POTT, PVTF, PVTFVB, WWIND_VWIND, KMOM_dVWINDdz, RHO,
k], RHO[i, j, k], RAIN[i, j, 0], RAINRATE[i, j, 0], ACCRAIN[i, j, 0], dt, k, nz, reset_accum) kiter += 1 cuda.syncthreads() if gpu_enable: compute_microphysics_gpu = cuda.jit( cuda_kernel_decorator(launch_cuda_kernel, non_3D={ 'dt': wp_str, 'k': wp_int, 'nz': wp_int, 'reset_accum': wp_bool }))(launch_cuda_kernel) ############################################################################### ### SPECIALIZE FOR CPU ############################################################################### #tendency_SOILTEMP = njit(tendency_SOILTEMP_py) #timestep_SOILTEMP = njit(timestep_SOILTEMP_py) #calc_albedo = njit(calc_albedo_py) #calc_specific_humidity = njit(calc_specific_humidity_py) #calc_srfc_fluxes = njit(calc_srfc_fluxes_py) #run_full_timestep = njit(run_full_timestep_py) # #
QV_OLD[i, j, k], dQVdt[i, j, k], QC_OLD[i, j, k], dQCdt[i, j, k], A[i, j, 0], A[i - 1, j, 0], A[i + 1, j, 0], A[i, j - 1, 0], A[i, j + 1, 0], A[i - 1, j - 1, 0], A[i - 1, j + 1, 0], A[i + 1, j - 1, 0], A[i + 1, j + 1, 0], dt, i, j) if gpu_enable: make_timestep_gpu = cuda.jit( cuda_kernel_decorator(make_timestep_gpu, non_3D={'dt': wp_str}))(make_timestep_gpu) ############################################################################### ### SPECIALIZE FOR CPU ############################################################################### euler_forward_pw = njit(euler_forward_pw_py) interp_COLPA_js = njit(interp_COLPA_js_py) interp_COLPA_is = njit(interp_COLPA_is_py) def make_timestep_cpu(COLP, COLP_OLD, UWIND, UWIND_OLD, dUFLXdt, VWIND, VWIND_OLD, dVFLXdt, POTT, POTT_OLD, dPOTTdt, QV, QV_OLD, dQVdt, QC, QC_OLD, dQCdt, A, dt): for i in prange(nb, nxs + nb): for j in range(nb, nys + nb):
while kt < nzs - 1: if kt == k - 1: vert_sum[k] = vert_sum[k] + vert_sum[k - 1] fluxdivsum = vert_sum[kt] kt = kt + 1 cuda.syncthreads() WWIND[i, j, k] = (-fluxdivsum / COLP_NEW[i, j, 0] - sigma_vb[0, 0, k] * dCOLPdt[i, j, 0] / COLP_NEW[i, j, 0]) cuda.syncthreads() if gpu_enable: continuity_gpu = cuda.jit( cuda_kernel_decorator(launch_cuda_main_kernel, non_3D={'dt': wp_str}))(launch_cuda_main_kernel) ############################################################################### ### SPECIALIZE FOR CPU ############################################################################### calc_UFLX = njit(calc_UFLX_py) calc_VFLX = njit(calc_VFLX_py) calc_FLXDIV = njit(calc_FLXDIV_py) euler_forward = vectorize()(euler_forward_py) def launch_numba_cpu(UFLX, VFLX, FLXDIV, UWIND, VWIND, WWIND, COLP, dCOLPdt, COLP_NEW, COLP_OLD, dyis, dxjs, dsigma, sigma_vb, A, dt): for i in prange(nb, nx + nb): for j in range(nb, ny + nb):
SSHFLX[i, j, 0], SLHFLX[i, j, 0]) = run_full_timestep( SOILTEMP[i, j, 0], SOILMOIST[i, j, 0], LWFLXNET[i, j, nzs - 1], SWFLXNET[i, j, nzs - 1], SOILCP[i, j, 0], SOILRHO[i, j, 0], SOILDEPTH[i, j, 0], OCEANMASK[i, j, 0], TAIR[i, j, nz - 1], QV[i, j, nz - 1], WIND[i, j, nz - 1], RHO[i, j, nz - 1], PSURF[i, j, 0], COLP[i, j, 0], WINDX[i, j, nz - 1], WINDY[i, j, nz - 1], RAIN[i, j, 0], DRAGCM, DRAGCH, A[i, j, 0], dt) if gpu_enable: advance_timestep_srfc_gpu = cuda.jit( cuda_kernel_decorator(launch_cuda_kernel, non_3D={ 'dt': wp_str, 'OCEANMASK': 'int32[:,:,:]', 'DRAGCM': wp_str, 'DRAGCH': wp_str }))(launch_cuda_kernel) ############################################################################### ### SPECIALIZE FOR CPU ############################################################################### tendency_SOILTEMP = njit(tendency_SOILTEMP_py) timestep_SOILTEMP = njit(timestep_SOILTEMP_py) calc_albedo = njit(calc_albedo_py) calc_specific_humidity = njit(calc_specific_humidity_py) calc_srfc_fluxes = njit(calc_srfc_fluxes_py) run_full_timestep = njit(run_full_timestep_py)
if i >= nb and i < nx+nb and j >= nb and j < ny+nb: BFLX[i,j,k],RFLX[i,j,k] = calc_momentum_fluxes_ij( UFLX, UFLX_im1, UFLX_im1_jm1, UFLX_im1_jp1, UFLX_ip1, UFLX_ip1_jm1, UFLX_ip1_jp1, UFLX_jm1, UFLX_jp1, VFLX, VFLX_im1, VFLX_im1_jm1, VFLX_im1_jp1, VFLX_ip1, VFLX_ip1_jm1, VFLX_ip1_jp1, VFLX_jm1, VFLX_jp1) if gpu_enable: UVFLX_prep_adv_gpu = cuda.jit(cuda_kernel_decorator( launch_cuda_prep_adv_kernel))( launch_cuda_prep_adv_kernel) ############################################################################### ############################################################################### ### SPECIALIZE FOR CPU ############################################################################### interp_WWIND_UVWIND = njit(interp_WWIND_UVWIND_py) interp_KMOM_dUVWINDdz = njit(interp_KMOM_dUVWINDdz_py) calc_momentum_fluxes_isjs = njit(calc_momentum_fluxes_isjs_py) calc_momentum_fluxes_ijs = njit(calc_momentum_fluxes_ijs_py) calc_momentum_fluxes_isj = njit(calc_momentum_fluxes_isj_py) calc_momentum_fluxes_ij = njit(calc_momentum_fluxes_ij_py)
i, j, k = cuda.grid(3) if i < nx+2*nb and j < ny+2*nb and k < nz: pairvb_km12 = pair_top + sigma_vb[0,0,k ] * COLP[i,j,0] pairvb_kp12 = pair_top + sigma_vb[0,0,k+1] * COLP[i,j,0] PVTF[i,j,k] = wp(1.)/(wp(1.)+con_kappa) * ( pow( pairvb_kp12/wp(100000.) , con_kappa ) * pairvb_kp12 - pow( pairvb_km12/wp(100000.) , con_kappa ) * pairvb_km12 ) /( pairvb_kp12 - pairvb_km12 ) PVTFVB[i,j,k] = pow( pairvb_km12/wp(100000.) , con_kappa ) if k == nz-1: PVTFVB[i,j,k+1] = pow( pairvb_kp12/wp(100000.) , con_kappa ) if gpu_enable: diag_PVTF_gpu = cuda.jit(cuda_kernel_decorator( diag_PVTF_gpu))(diag_PVTF_gpu) def diag_PHI_gpu(PHI, PHIVB, PVTF, PVTFVB, POTT, HSURF): i, j, k = cuda.grid(3) if i < nx+2*nb and j < ny+2*nb and k < nzs: kiter = nzs-1 if k == kiter: PHIVB[i,j,k] = HSURF[i,j,0]*con_g kiter = kiter - 1 cuda.syncthreads() while kiter >= 0: if k == kiter: PHI [i,j,k] = PHIVB[i,j,k+1] - con_cp* (
if i < nx + 2 * nb and j < ny + 2 * nb and k > 0 and k < nzs - 1: KMOM[i, j, k], KHEAT[i, j, k] = run_all(PHIVB[i, j, k], HSURF[i, j, 0], PHI[i, j, k - 1], PHI[i, j, k], QV[i, j, k - 1], QV[i, j, k], WINDX[i, j, k - 1], WINDX[i, j, k], WINDY[i, j, k - 1], WINDY[i, j, k], POTTVB[i, j, k], POTT[i, j, k - 1], POTT[i, j, k], k) if gpu_enable: compute_turbulence_gpu = cuda.jit( cuda_kernel_decorator(launch_cuda_kernel))(launch_cuda_kernel) ############################################################################### ### SPECIALIZE FOR CPU ############################################################################### calc_virtual_temperature = njit(calc_virtual_temperature_py) comp_VARVB_log = njit(comp_VARVB_log_py) bulk_richardson = njit(bulk_richardson_py) compute_K_coefs = njit(compute_K_coefs_py) run_all = njit(run_all_py) def launch_numba_cpu(KMOM, KHEAT, PHIVB, HSURF, PHI, QV, WINDX, WINDY, POTTVB, POTT): for i in prange(0, nx + 2 * nb):
## zonal boundaries if fnx == nxs + 2 * nb: # staggered in x if i == 0: FIELD[i, j, k] = FIELD[nxs - 1, j, k] elif i == nxs: FIELD[i, j, k] = FIELD[1, j, k] elif i == nxs + 1: FIELD[i, j, k] = FIELD[2, j, k] else: # unstaggered in x if i == 0: FIELD[i, j, k] = FIELD[nx, j, k] elif i == nx + 1: FIELD[i, j, k] = FIELD[1, j, k] # meridional boundaries if fny == nys + 2 * nb: # staggered in y if j == 0 or j == 1 or j == nys or j == nys + 1: FIELD[i, j, k] = 0. else: # unstaggered in y if j == 0: FIELD[i, j, k] = FIELD[i, 1, k] elif j == ny + 1: FIELD[i, j, k] = FIELD[i, ny, k] if gpu_enable: exchange_BC_gpu = cuda.jit( cuda_kernel_decorator(exchange_BC_gpu))(exchange_BC_gpu)