def add_to_Cfunction_dict_MoL_free_memory(MoL_method, which_gfs): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = "Method of Lines (MoL) for \"" + MoL_method + "\" method: Free memory for \"" + which_gfs + "\" gridfunctions\n" desc += " - y_n_gfs are used to store data for the vector of gridfunctions y_i at t_n, at the start of each MoL timestep\n" desc += " - non_y_n_gfs are needed for intermediate (e.g., k_i) storage in chosen MoL method\n" c_type = "void" y_n_gridfunctions, non_y_n_gridfunctions_list, diagnostic_gridfunctions_point_to = \ generate_gridfunction_names(MoL_method=MoL_method) gridfunctions_list = [] if which_gfs == "y_n_gfs": gridfunctions_list = [y_n_gridfunctions] elif which_gfs == "non_y_n_gfs": gridfunctions_list = non_y_n_gridfunctions_list else: print("ERROR: which_gfs = \"" + which_gfs + "\" unrecognized.") sys.exit(1) name = "MoL_free_memory_" + which_gfs params = "const paramstruct *restrict params, MoL_gridfunctions_struct *restrict gridfuncs" body = "" for gridfunctions in gridfunctions_list: body += " free(gridfuncs->" + gridfunctions + ");\n" add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=indent_Ccode(body, " "), rel_path_to_Cparams=os.path.join("."))
def add_to_Cfunction_dict_MoL_malloc(MoL_method, which_gfs): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = "Method of Lines (MoL) for \"" + MoL_method + "\" method: Allocate memory for \"" + which_gfs + "\" gridfunctions\n" desc += " * y_n_gfs are used to store data for the vector of gridfunctions y_i at t_n, at the start of each MoL timestep\n" desc += " * non_y_n_gfs are needed for intermediate (e.g., k_i) storage in chosen MoL method\n" c_type = "void" y_n_gridfunctions, non_y_n_gridfunctions_list, diagnostic_gridfunctions_point_to = \ generate_gridfunction_names(MoL_method = MoL_method) gridfunctions_list = [] if which_gfs == "y_n_gfs": gridfunctions_list = [y_n_gridfunctions] elif which_gfs == "non_y_n_gfs": gridfunctions_list = non_y_n_gridfunctions_list else: print("ERROR: which_gfs = \"" + which_gfs + "\" unrecognized.") sys.exit(1) name = "MoL_malloc_" + which_gfs params = "const paramstruct *restrict params, MoL_gridfunctions_struct *restrict gridfuncs" body = "const int Nxx_plus_2NGHOSTS_tot = Nxx_plus_2NGHOSTS0*Nxx_plus_2NGHOSTS1*Nxx_plus_2NGHOSTS2;\n" for gridfunctions in gridfunctions_list: num_gfs = "NUM_EVOL_GFS" if gridfunctions == "auxevol_gfs": num_gfs = "NUM_AUXEVOL_GFS" body += "gridfuncs->" + gridfunctions + " = (REAL *restrict)malloc(sizeof(REAL) * " + num_gfs + " * Nxx_plus_2NGHOSTS_tot);\n" body += "\ngridfuncs->diagnostic_output_gfs = gridfuncs->" + diagnostic_gridfunctions_point_to + ";\n" add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=indent_Ccode(body, " "), rel_path_to_Cparams=os.path.join("."))
def add_to_Cfunction_dict__GiRaFFE_NRPy_FCVAL(includes=None): desc = "Interpolate metric gridfunctions to cell faces" name = "interpolate_metric_gfs_to_cell_faces" params = "const paramstruct *params,REAL *auxevol_gfs,const int flux_dirn" preloop = """ int in_gf,out_gf; REAL Qm2,Qm1,Qp0,Qp1; """ body = """ for(int gf = 0;gf < num_metric_gfs;gf++) { in_gf = metric_gfs_list[gf]; out_gf = metric_gfs_face_list[gf]; for (int i2 = 2;i2 < Nxx_plus_2NGHOSTS2-1;i2++) { for (int i1 = 2;i1 < Nxx_plus_2NGHOSTS1-1;i1++) { for (int i0 = 2;i0 < Nxx_plus_2NGHOSTS0-1;i0++) { Qm2 = auxevol_gfs[IDX4S(in_gf,i0-2*kronecker_delta[flux_dirn][0],i1-2*kronecker_delta[flux_dirn][1],i2-2*kronecker_delta[flux_dirn][2])]; Qm1 = auxevol_gfs[IDX4S(in_gf,i0-kronecker_delta[flux_dirn][0],i1-kronecker_delta[flux_dirn][1],i2-kronecker_delta[flux_dirn][2])]; Qp0 = auxevol_gfs[IDX4S(in_gf,i0,i1,i2)]; Qp1 = auxevol_gfs[IDX4S(in_gf,i0+kronecker_delta[flux_dirn][0],i1+kronecker_delta[flux_dirn][1],i2+kronecker_delta[flux_dirn][2])]; auxevol_gfs[IDX4S(out_gf,i0,i1,i2)] = COMPUTE_FCVAL(Qm2,Qm1,Qp0,Qp1); } } } } """ add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, prefunc=prefunc, preloop=preloop, body=body)
def Cfunc_free_memory(): includes = ["NRPy_basic_defines.h"] desc = "(c) 2022 Leo Werneck" c_type = "void" name = "NRPyEOS_free_memory" params = "NRPyEOS_params *restrict eos_params" body = r""" fprintf(stderr,"(NRPyEOS) *******************************\n"); fprintf(stderr,"(NRPyEOS) Freeing up memory.\n"); // Free memory allocated for the table free(eos_params->logrho); free(eos_params->logtemp); free(eos_params->yes); free(eos_params->alltables); free(eos_params->epstable); fprintf(stderr,"(NRPyEOS) All done!\n"); fprintf(stderr,"(NRPyEOS) *******************************\n"); """ outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, enableCparameters=False)
def ID_scalarfield(Ccodesdir=".", new_way=False): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = """(c) 2021 Leo Werneck This is the scalar field initial data driver functiono. """ c_type = "void" name = "ID_scalarfield" params = """const paramstruct *restrict params,REAL *restrict xx[3], ID_inputs other_inputs,REAL *restrict in_gfs""" body = """ const int idx = IDX3S(i0,i1,i2); const REAL xx0xx1xx2[3] = {xx0,xx1,xx2}; ID_scalarfield_xx0xx1xx2_to_BSSN_xx0xx1xx2(params,xx0xx1xx2,other_inputs, &in_gfs[IDX4ptS(SFGF,idx)], &in_gfs[IDX4ptS(SFMGF,idx)]); """ loopopts = "AllPoints,Read_xxs" if new_way == True: outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, loopopts=loopopts) else: outfile = os.path.join(Ccodesdir, "ID_scalarfield.h") outC.outCfunction(outfile=outfile, includes=None, desc=desc, c_type=c_type, name=name, params=params, body=body, loopopts=loopopts)
def add_to_Cfunction_dict__GiRaFFE_NRPy_A2B(gammaDD, AD, BU, includes=None): # Set spatial dimension (must be 3 for BSSN) DIM = 3 par.set_parval_from_str("grid::DIM", DIM) # Compute the sqrt of the three metric determinant. import GRHD.equations as gh gh.compute_sqrtgammaDET(gammaDD) # Import the Levi-Civita symbol and build the corresponding tensor. # We already have a handy function to define the Levi-Civita symbol in indexedexp.py LeviCivitaUUU = ixp.LeviCivitaTensorUUU_dim3_rank3(gh.sqrtgammaDET) AD_dD = ixp.declarerank2("AD_dD", "nosym") BU = ixp.zerorank1() for i in range(DIM): for j in range(DIM): for k in range(DIM): BU[i] += LeviCivitaUUU[i][j][k] * AD_dD[k][j] # Here, we'll use the add_to_Cfunction_dict() function to output a function that will compute the magnetic field # on the interior. Then, we'll add postloop code to handle the ghostzones. desc = "Compute the magnetic field from the vector potential everywhere, including ghostzones" name = "driver_A_to_B" params = "const paramstruct *restrict params,REAL *restrict in_gfs,REAL *restrict auxevol_gfs" body = fin.FD_outputC("returnstring", [ lhrh(lhs=gri.gfaccess("out_gfs", "BU0"), rhs=BU[0]), lhrh(lhs=gri.gfaccess("out_gfs", "BU1"), rhs=BU[1]), lhrh(lhs=gri.gfaccess("out_gfs", "BU2"), rhs=BU[2]) ]) postloop = """ int imin[3] = { NGHOSTS_A2B, NGHOSTS_A2B, NGHOSTS_A2B }; int imax[3] = { NGHOSTS+Nxx0, NGHOSTS+Nxx1, NGHOSTS+Nxx2 }; // Now, we loop over the ghostzones to calculate the magnetic field there. for(int which_gz = 0; which_gz < NGHOSTS_A2B; which_gz++) { // After updating each face, adjust imin[] and imax[] // to reflect the newly-updated face extents. compute_A2B_in_ghostzones(params,in_gfs,auxevol_gfs,imin[0]-1,imin[0], imin[1],imax[1], imin[2],imax[2]); imin[0]--; compute_A2B_in_ghostzones(params,in_gfs,auxevol_gfs,imax[0],imax[0]+1, imin[1],imax[1], imin[2],imax[2]); imax[0]++; compute_A2B_in_ghostzones(params,in_gfs,auxevol_gfs,imin[0],imax[0], imin[1]-1,imin[1], imin[2],imax[2]); imin[1]--; compute_A2B_in_ghostzones(params,in_gfs,auxevol_gfs,imin[0],imax[0], imax[1],imax[1]+1, imin[2],imax[2]); imax[1]++; compute_A2B_in_ghostzones(params,in_gfs,auxevol_gfs,imin[0],imax[0], imin[1],imax[1], imin[2]-1,imin[2]); imin[2]--; compute_A2B_in_ghostzones(params,in_gfs,auxevol_gfs,imin[0],imax[0], imin[1],imax[1], imax[2],imax[2]+1); imax[2]++; } """ loopopts = "InteriorPoints" add_to_Cfunction_dict(includes=includes, desc=desc, name=name, prefunc=prefunc, params=params, body=body, loopopts=loopopts, postloop=postloop) outC_function_dict[name] = outC_function_dict[name].replace( "= NGHOSTS", "= NGHOSTS_A2B").replace( "NGHOSTS+Nxx0", "Nxx_plus_2NGHOSTS0-NGHOSTS_A2B").replace( "NGHOSTS+Nxx1", "Nxx_plus_2NGHOSTS1-NGHOSTS_A2B").replace( "NGHOSTS+Nxx2", "Nxx_plus_2NGHOSTS2-NGHOSTS_A2B").replace( "../set_Cparameters.h", "set_Cparameters.h")
def BSSN_ID_function_string(cf, hDD, lambdaU, aDD, trK, alpha, vetU, betU, include_NRPy_basic_defines=False): includes = [] if include_NRPy_basic_defines: includes = ["NRPy_basic_defines.h"] rhss = [trK, alpha, cf] lhss = [ "in_gfs[IDX4S(TRKGF,i0,i1,i2)]", "in_gfs[IDX4S(ALPHAGF,i0,i1,i2)]", "in_gfs[IDX4S(CFGF,i0,i1,i2)]" ] for i in range(3): rhss.append(lambdaU[i]) lhss.append("in_gfs[IDX4S(LAMBDAU" + str(i) + "GF,i0,i1,i2)]") rhss.append(vetU[i]) lhss.append("in_gfs[IDX4S(VETU" + str(i) + "GF,i0,i1,i2)]") rhss.append(betU[i]) lhss.append("in_gfs[IDX4S(BETU" + str(i) + "GF,i0,i1,i2)]") for j in range(i, 3): rhss.append(hDD[i][j]) lhss.append("in_gfs[IDX4S(HDD" + str(i) + str(j) + "GF,i0,i1,i2)]") rhss.append(aDD[i][j]) lhss.append("in_gfs[IDX4S(ADD" + str(i) + str(j) + "GF,i0,i1,i2)]") # Sort the lhss list alphabetically, and rhss to match: lhss, rhss = [ list(x) for x in zip(*sorted(zip(lhss, rhss), key=lambda pair: pair[0])) ] body = outputC( rhss, lhss, filename="returnstring", params= "preindent=1,CSE_enable=True,outCverbose=False", # outCverbose=False to prevent # enormous output files. prestring="", poststring="") desc = "Set up the initial data at all points on the numerical grid." add_to_Cfunction_dict( includes=includes, desc=desc, name="initial_data", params= "const paramstruct *restrict params,REAL *restrict xx[3], REAL *restrict in_gfs", body=body, loopopts="AllPoints,Read_xxs")
def ID_scalarfield_xx0xx1xx2_to_BSSN_xx0xx1xx2(Ccodesdir=".", pointer_to_ID_inputs=False, new_way=False): rfm.reference_metric() rthph = outC.outputC(rfm.xxSph[0:3], ["rthph[0]", "rthph[1]", "rthph[2]"], "returnstring", "includebraces=False,outCverbose=False,preindent=1") includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = """(c) 2021 Leo Werneck This function takes as input either (x,y,z) or (r,th,ph) and outputs all scalar field quantities in the Cartesian or Spherical basis, respectively. """ c_type = "void" name = "ID_scalarfield_xx0xx1xx2_to_BSSN_xx0xx1xx2" params = "const paramstruct *restrict params,const REAL xx0xx1xx2[3],\n" if pointer_to_ID_inputs == True: params += "ID_inputs *other_inputs,\n" else: params += "ID_inputs other_inputs,\n" params += "REAL *restrict sf, REAL *restrict sfM" body = """ const REAL xx0 = xx0xx1xx2[0]; const REAL xx1 = xx0xx1xx2[1]; const REAL xx2 = xx0xx1xx2[2]; REAL rthph[3]; """ + rthph + """ ID_scalarfield_spherical(rthph,other_inputs,sf,sfM); """ if new_way == True: outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body) else: outfile = os.path.join(Ccodesdir, "ID_scalarfield_xx0xx1xx2_to_BSSN_xx0xx1xx2.h") outC.outCfunction(outfile=outfile, includes=None, desc=desc, c_type=c_type, name=name, params=params, body=body)
def Cfunc_unknown_T(auxvar, eos_params_in): eos_params = sorted(eos_params_in) includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = "(c) 2022 Leo Werneck" c_type = "void" name = func_name(eos_params, auxvar.var) params = func_params(c_type, name, auxvar.var, eos_params, unknown_T=True) body = func_body(name, eos_params, auxvar) outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, enableCparameters=False)
def ID_scalarfield_spherical(Ccodesdir=".", new_way=False): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = """(c) 2021 Leo Werneck This function takes as input either (x,y,z) or (r,th,ph) and outputs all scalar field quantities in the Cartesian or Spherical basis, respectively. """ c_type = "void" name = "ID_scalarfield_spherical" params = "const REAL xyz_or_rthph[3],const ID_inputs other_inputs,REAL *restrict sf,REAL *restrict sfM" body = """ const REAL r = xyz_or_rthph[0]; const REAL th = xyz_or_rthph[1]; const REAL ph = xyz_or_rthph[2]; REAL sf_star,psi4_star,alpha_star; scalarfield_interpolate_1D(r, other_inputs.interp_stencil_size, other_inputs.numlines_in_file, other_inputs.r_arr, other_inputs.sf_arr, other_inputs.psi4_arr, other_inputs.alpha_arr, &sf_star,&psi4_star,&alpha_star); // Update varphi *sf = sf_star; // Update Pi *sfM = 0; """ if new_way == True: outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, enableCparameters=False) else: outfile = os.path.join(Ccodesdir, "ID_scalarfield_spherical.h") outC.outCfunction(outfile=outfile, includes=None, desc=desc, c_type=c_type, name=name, params=params, body=body, enableCparameters=False)
def add_to_Cfunction_dict_freemem_bcstruct(): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = "Free memory allocated within bcstruct" c_type = "void" name = "freemem_bcstruct" params = "const paramstruct *restrict params, const bc_struct *restrict bcstruct" body = r""" for(int i=0;i<NGHOSTS;i++) { free(bcstruct->outer[i]); free(bcstruct->inner[i]); } free(bcstruct->outer); free(bcstruct->inner); free(bcstruct->num_ob_gz_pts); free(bcstruct->num_ib_gz_pts); """ add_to_Cfunction_dict( includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, rel_path_to_Cparams=os.path.join("."))
def add_to_Cfunction_dict__functions_for_StildeD_source_term( outCparams, gammaDD, betaU, alpha, ValenciavU, BU, sqrt4pi, includes=None): generate_memory_access_code() # First, we declare some dummy tensors that we will use for the codegen. gammaDDdD = ixp.declarerank3("gammaDDdD", "sym01", DIM=3) betaUdD = ixp.declarerank2("betaUdD", "nosym", DIM=3) alphadD = ixp.declarerank1("alphadD", DIM=3) # We need to rerun a few of these functions with the reset lists to make sure these functions # don't cheat by using analytic expressions GRHD.compute_sqrtgammaDET(gammaDD) GRHD.u4U_in_terms_of_ValenciavU__rescale_ValenciavU_by_applying_speed_limit( alpha, betaU, gammaDD, ValenciavU) GRFFE.compute_smallb4U(gammaDD, betaU, alpha, GRHD.u4U_ito_ValenciavU, BU, sqrt4pi) GRFFE.compute_smallbsquared(gammaDD, betaU, alpha, GRFFE.smallb4U) GRFFE.compute_TEM4UU(gammaDD, betaU, alpha, GRFFE.smallb4U, GRFFE.smallbsquared, GRHD.u4U_ito_ValenciavU) GRHD.compute_g4DD_zerotimederiv_dD(gammaDD, betaU, alpha, gammaDDdD, betaUdD, alphadD) GRHD.compute_S_tilde_source_termD(alpha, GRHD.sqrtgammaDET, GRHD.g4DD_zerotimederiv_dD, GRFFE.TEM4UU) for i in range(3): desc = "Adds the source term to StildeD" + str(i) + "." name = "calculate_StildeD" + str(i) + "_source_term" params = "const paramstruct *params,const REAL *auxevol_gfs, REAL *rhs_gfs" body = general_access \ +metric_deriv_access[i]\ +outputC(GRHD.S_tilde_source_termD[i],"Stilde_rhsD"+str(i),"returnstring",params=outCparams)\ +write_final_quantity[i] loopopts = "InteriorPoints" add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, body=body, loopopts=loopopts)
def add_to_Cfunction_dict__prims_to_cons(gammaDD,betaU,alpha, ValenciavU,BU, sqrt4pi, includes=None): C2P_P2C.GiRaFFE_NRPy_P2C(gammaDD,betaU,alpha, ValenciavU,BU, sqrt4pi) values_to_print = [ lhrh(lhs=gri.gfaccess("in_gfs","StildeD0"),rhs=C2P_P2C.StildeD[0]), lhrh(lhs=gri.gfaccess("in_gfs","StildeD1"),rhs=C2P_P2C.StildeD[1]), lhrh(lhs=gri.gfaccess("in_gfs","StildeD2"),rhs=C2P_P2C.StildeD[2]), ] desc = "Recompute StildeD after current sheet fix to Valencia 3-velocity to ensure consistency between conservative & primitive variables." name = "GiRaFFE_NRPy_prims_to_cons" params ="const paramstruct *params,REAL *auxevol_gfs,REAL *in_gfs" body = fin.FD_outputC("returnstring",values_to_print,params=outCparams) loopopts ="AllPoints" add_to_Cfunction_dict( includes=includes, desc=desc, name=name, params=params, body=body, loopopts=loopopts)
def add_to_Cfunction_dict__AD_gauge_term_psi6Phi_fin_diff(includes=None): xi_damping = par.Cparameters("REAL",thismodule,"xi_damping",0.1) GRFFE.compute_psi6Phi_rhs_damping_term(alpha,psi6Phi,xi_damping) AevolParen_dD = ixp.declarerank1("AevolParen_dD",DIM=3) PhievolParenU_dD = ixp.declarerank2("PhievolParenU_dD","nosym",DIM=3) A_rhsD = ixp.zerorank1() psi6Phi_rhs = GRFFE.psi6Phi_damping for i in range(3): A_rhsD[i] += -AevolParen_dD[i] psi6Phi_rhs += -PhievolParenU_dD[i][i] # Add Kreiss-Oliger dissipation to the GRFFE RHSs: # psi6Phi_dKOD = ixp.declarerank1("psi6Phi_dKOD") # AD_dKOD = ixp.declarerank2("AD_dKOD","nosym") # for i in range(3): # psi6Phi_rhs += diss_strength*psi6Phi_dKOD[i]*rfm.ReU[i] # ReU[i] = 1/scalefactor_orthog_funcform[i] # for j in range(3): # A_rhsD[j] += diss_strength*AD_dKOD[j][i]*rfm.ReU[i] # ReU[i] = 1/scalefactor_orthog_funcform[i] RHSs_to_print = [ lhrh(lhs=gri.gfaccess("rhs_gfs","AD0"),rhs=A_rhsD[0]), lhrh(lhs=gri.gfaccess("rhs_gfs","AD1"),rhs=A_rhsD[1]), lhrh(lhs=gri.gfaccess("rhs_gfs","AD2"),rhs=A_rhsD[2]), lhrh(lhs=gri.gfaccess("rhs_gfs","psi6Phi"),rhs=psi6Phi_rhs), ] desc = "Calculate AD gauge term and psi6Phi RHSs" name = "calculate_AD_gauge_psi6Phi_RHSs" params ="const paramstruct *params,const REAL *in_gfs,const REAL *auxevol_gfs,REAL *rhs_gfs" body = fin.FD_outputC("returnstring",RHSs_to_print,params=outCparams) loopopts ="InteriorPoints" add_to_Cfunction_dict( includes=includes, desc=desc, name=name, params=params, body=body, loopopts=loopopts) outC_function_dict[name] = outC_function_dict[name].replace("= NGHOSTS","= NGHOSTS_A2B").replace("NGHOSTS+Nxx0","Nxx_plus_2NGHOSTS0-NGHOSTS_A2B").replace("NGHOSTS+Nxx1","Nxx_plus_2NGHOSTS1-NGHOSTS_A2B").replace("NGHOSTS+Nxx2","Nxx_plus_2NGHOSTS2-NGHOSTS_A2B")
def add_to_Cfunction_dict__cons_to_prims(StildeD,BU,gammaDD,betaU,alpha, includes=None): C2P_P2C.GiRaFFE_NRPy_C2P(StildeD,BU,gammaDD,betaU,alpha) values_to_print = [ lhrh(lhs=gri.gfaccess("in_gfs","StildeD0"),rhs=C2P_P2C.outStildeD[0]), lhrh(lhs=gri.gfaccess("in_gfs","StildeD1"),rhs=C2P_P2C.outStildeD[1]), lhrh(lhs=gri.gfaccess("in_gfs","StildeD2"),rhs=C2P_P2C.outStildeD[2]), lhrh(lhs=gri.gfaccess("auxevol_gfs","ValenciavU0"),rhs=C2P_P2C.ValenciavU[0]), lhrh(lhs=gri.gfaccess("auxevol_gfs","ValenciavU1"),rhs=C2P_P2C.ValenciavU[1]), lhrh(lhs=gri.gfaccess("auxevol_gfs","ValenciavU2"),rhs=C2P_P2C.ValenciavU[2]) ] desc = "Apply fixes to \tilde{S}_i and recompute the velocity to match with current sheet prescription." name = "GiRaFFE_NRPy_cons_to_prims" params ="const paramstruct *params,REAL *xx[3],REAL *auxevol_gfs,REAL *in_gfs" body = fin.FD_outputC("returnstring",values_to_print,params=outCparams) loopopts ="AllPoints,Read_xxs" add_to_Cfunction_dict( includes=includes, desc=desc, name=name, params=params, body=body, loopopts=loopopts)
def add_to_Cfunction_dict__AD_gauge_term_psi6Phi_flux_term(includes=None): GRHD.compute_sqrtgammaDET(gammaDD) GRFFE.compute_AD_source_term_operand_for_FD(GRHD.sqrtgammaDET,betaU,alpha,psi6Phi,AD) GRFFE.compute_psi6Phi_rhs_flux_term_operand(gammaDD,GRHD.sqrtgammaDET,betaU,alpha,AD,psi6Phi) parens_to_print = [ lhrh(lhs=gri.gfaccess("auxevol_gfs","AevolParen"),rhs=GRFFE.AevolParen), lhrh(lhs=gri.gfaccess("auxevol_gfs","PhievolParenU0"),rhs=GRFFE.PhievolParenU[0]), lhrh(lhs=gri.gfaccess("auxevol_gfs","PhievolParenU1"),rhs=GRFFE.PhievolParenU[1]), lhrh(lhs=gri.gfaccess("auxevol_gfs","PhievolParenU2"),rhs=GRFFE.PhievolParenU[2]), ] desc = "Calculate quantities to be finite-differenced for the GRFFE RHSs" name = "calculate_AD_gauge_term_psi6Phi_flux_term_for_RHSs" params = "const paramstruct *restrict params,const REAL *restrict in_gfs,REAL *restrict auxevol_gfs" body = fin.FD_outputC("returnstring",parens_to_print,params=outCparams) loopopts = "AllPoints" rel_path_to_Cparams=os.path.join("../") add_to_Cfunction_dict( includes=includes, desc=desc, name=name, params=params, body=body, loopopts=loopopts)
def Cfunc_read_table_set_EOS_params(): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = "(c) 2022 Leo Werneck" c_type = "void" name = "NRPyEOS_readtable_set_EOS_params" params = "const char *nuceos_table_name, NRPyEOS_params *restrict eos_params" prefunc = r""" // mini NoMPI #ifdef HAVE_CAPABILITY_MPI #include <mpi.h> #define BCAST(buffer, size) MPI_Bcast(buffer, size, MPI_BYTE, my_reader_process, MPI_COMM_WORLD) #else #define BCAST(buffer, size) do { /* do nothing */ } while(0) #endif // If on the IO proc (doIO == True) actually perform HDF5 IO, catch possible // HDF5 errors #define HDF5_DO_IO(fn_call) \ { \ int _error_code = fn_call; \ if (_error_code < 0) { \ fprintf(stderr,"(NRPyEOS) HDF5 call '%s' returned error code %d", \ #fn_call, _error_code); \ } \ } """ body = r""" fprintf(stderr,"(NRPyEOS) *******************************\n"); fprintf(stderr,"(NRPyEOS) Reading EOS table from file:\n"); fprintf(stderr,"(NRPyEOS) %s\n",nuceos_table_name); fprintf(stderr,"(NRPyEOS) *******************************\n"); hid_t file; HDF5_DO_IO(file = H5Fopen(nuceos_table_name, H5F_ACC_RDONLY, H5P_DEFAULT)); // Use these two defines to easily read in a lot of variables in the same way // The first reads in one variable of a given type completely #define READ_BCAST_EOS_HDF5(NAME,VAR,TYPE,MEM,NELEMS) \ do { \ hid_t dataset; \ HDF5_DO_IO(dataset = H5Dopen(file, NAME, H5P_DEFAULT)); \ HDF5_DO_IO(H5Dread(dataset, TYPE, MEM, H5S_ALL, H5P_DEFAULT, VAR)); \ BCAST (VAR, sizeof(*(VAR))*(NELEMS)); \ HDF5_DO_IO(H5Dclose(dataset)); \ } while (0) // The second reads a given variable into a hyperslab of the alltables_temp array #define READ_BCAST_EOSTABLE_HDF5(NAME,OFF,DIMS) \ do { \ READ_BCAST_EOS_HDF5(NAME,&alltables_temp[(OFF)*(DIMS)[1]],H5T_NATIVE_DOUBLE,H5S_ALL,(DIMS)[1]); \ } while (0) // Read size of tables READ_BCAST_EOS_HDF5("pointsrho", &eos_params->nrho, H5T_NATIVE_INT, H5S_ALL, 1); READ_BCAST_EOS_HDF5("pointstemp", &eos_params->ntemp, H5T_NATIVE_INT, H5S_ALL, 1); READ_BCAST_EOS_HDF5("pointsye", &eos_params->nye, H5T_NATIVE_INT, H5S_ALL, 1); // Allocate memory for tables double* alltables_temp; if (!(alltables_temp = (double*)malloc(eos_params->nrho * eos_params->ntemp * eos_params->nye * NRPyEOS_ntablekeys * sizeof(double)))) { fprintf(stderr,"(NRPyEOS) Cannot allocate memory for EOS table"); } if (!(eos_params->logrho = (double*)malloc(eos_params->nrho * sizeof(double)))) { fprintf(stderr,"(NRPyEOS) Cannot allocate memory for EOS table"); } if (!(eos_params->logtemp = (double*)malloc(eos_params->ntemp * sizeof(double)))) { fprintf(stderr,"(NRPyEOS) Cannot allocate memory for EOS table"); } if (!(eos_params->yes = (double*)malloc(eos_params->nye * sizeof(double)))) { fprintf(stderr,"(NRPyEOS) Cannot allocate memory for EOS table"); } // Prepare HDF5 to read hyperslabs into alltables_temp hsize_t table_dims[2] = {NRPyEOS_ntablekeys, (hsize_t)eos_params->nrho * eos_params->ntemp * eos_params->nye}; hid_t mem3 = H5Screate_simple(2, table_dims, NULL); // Read alltables_temp READ_BCAST_EOSTABLE_HDF5("logpress", 0, table_dims); READ_BCAST_EOSTABLE_HDF5("logenergy", 1, table_dims); READ_BCAST_EOSTABLE_HDF5("entropy", 2, table_dims); READ_BCAST_EOSTABLE_HDF5("munu", 3, table_dims); READ_BCAST_EOSTABLE_HDF5("cs2", 4, table_dims); READ_BCAST_EOSTABLE_HDF5("dedt", 5, table_dims); READ_BCAST_EOSTABLE_HDF5("dpdrhoe", 6, table_dims); READ_BCAST_EOSTABLE_HDF5("dpderho", 7, table_dims); // chemical potentials READ_BCAST_EOSTABLE_HDF5("muhat", 8, table_dims); READ_BCAST_EOSTABLE_HDF5("mu_e", 9, table_dims); READ_BCAST_EOSTABLE_HDF5("mu_p", 10, table_dims); READ_BCAST_EOSTABLE_HDF5("mu_n", 11, table_dims); // compositions READ_BCAST_EOSTABLE_HDF5("Xa", 12, table_dims); READ_BCAST_EOSTABLE_HDF5("Xh", 13, table_dims); READ_BCAST_EOSTABLE_HDF5("Xn", 14, table_dims); READ_BCAST_EOSTABLE_HDF5("Xp", 15, table_dims); // average nucleus READ_BCAST_EOSTABLE_HDF5("Abar", 16, table_dims); READ_BCAST_EOSTABLE_HDF5("Zbar", 17, table_dims); // Gamma READ_BCAST_EOSTABLE_HDF5("gamma", 18, table_dims); // Read additional tables and variables READ_BCAST_EOS_HDF5("logrho", eos_params->logrho, H5T_NATIVE_DOUBLE, H5S_ALL, eos_params->nrho); READ_BCAST_EOS_HDF5("logtemp", eos_params->logtemp, H5T_NATIVE_DOUBLE, H5S_ALL, eos_params->ntemp); READ_BCAST_EOS_HDF5("ye", eos_params->yes, H5T_NATIVE_DOUBLE, H5S_ALL, eos_params->nye); READ_BCAST_EOS_HDF5("energy_shift", &eos_params->energy_shift, H5T_NATIVE_DOUBLE, H5S_ALL, 1); HDF5_DO_IO(H5Sclose(mem3)); HDF5_DO_IO(H5Fclose(file)); // change ordering of alltables array so that // the table kind is the fastest changing index if (!(eos_params->alltables = (double*)malloc(eos_params->nrho * eos_params->ntemp * eos_params->nye * NRPyEOS_ntablekeys * sizeof(double)))) { fprintf(stderr,"(NRPyEOS) Cannot allocate memory for EOS table"); } for(int iv = 0;iv<NRPyEOS_ntablekeys;iv++) for(int k = 0; k<eos_params->nye;k++) for(int j = 0; j<eos_params->ntemp; j++) for(int i = 0; i<eos_params->nrho; i++) { int indold = i + eos_params->nrho*(j + eos_params->ntemp*(k + eos_params->nye*iv)); int indnew = iv + NRPyEOS_ntablekeys*(i + eos_params->nrho*(j + eos_params->ntemp*k)); eos_params->alltables[indnew] = alltables_temp[indold]; } // free memory of temporary array free(alltables_temp); // convert units, convert logs to natural log // The latter is great, because exp() is way faster than pow() // pressure eos_params->energy_shift = eos_params->energy_shift * EPSGF; for(int i=0;i<eos_params->nrho;i++) { // rewrite: //logrho[i] = log(pow(10.0,logrho[i]) * RHOGF); // by using log(a^b*c) = b*log(a)+log(c) eos_params->logrho[i] = eos_params->logrho[i] * log(10.) + log(RHOGF); } for(int i=0;i<eos_params->ntemp;i++) { //logtemp[i] = log(pow(10.0,logtemp[i])); eos_params->logtemp[i] = eos_params->logtemp[i]*log(10.0); } // allocate epstable; a linear-scale eps table // that allows us to extrapolate to negative eps if (!(eos_params->epstable = (double*)malloc(eos_params->nrho * eos_params->ntemp * eos_params->nye * sizeof(double)))) { fprintf(stderr,"(NRPyEOS) Cannot allocate memory for eps table\n"); } // convert units for(int i=0;i<eos_params->nrho*eos_params->ntemp*eos_params->nye;i++) { { // pressure int idx = 0 + NRPyEOS_ntablekeys*i; eos_params->alltables[idx] = eos_params->alltables[idx] * log(10.0) + log(PRESSGF); } { // eps int idx = 1 + NRPyEOS_ntablekeys*i; eos_params->alltables[idx] = eos_params->alltables[idx] * log(10.0) + log(EPSGF); eos_params->epstable[i] = exp(eos_params->alltables[idx]); } { // cs2 int idx = 4 + NRPyEOS_ntablekeys*i; eos_params->alltables[idx] *= LENGTHGF*LENGTHGF/TIMEGF/TIMEGF; } { // dedT int idx = 5 + NRPyEOS_ntablekeys*i; eos_params->alltables[idx] *= EPSGF; } { // dpdrhoe int idx = 6 + NRPyEOS_ntablekeys*i; eos_params->alltables[idx] *= PRESSGF/RHOGF; } { // dpderho int idx = 7 + NRPyEOS_ntablekeys*i; eos_params->alltables[idx] *= PRESSGF/EPSGF; } } eos_params->temp0 = exp(eos_params->logtemp[0]); eos_params->temp1 = exp(eos_params->logtemp[1]); // set up some vars eos_params->dtemp = (eos_params->logtemp[eos_params->ntemp-1] - eos_params->logtemp[0]) / (1.0*(eos_params->ntemp-1)); eos_params->dtempi = 1.0/eos_params->dtemp; eos_params->dlintemp = eos_params->temp1-eos_params->temp0; eos_params->dlintempi = 1.0/eos_params->dlintemp; eos_params->drho = (eos_params->logrho[eos_params->nrho-1] - eos_params->logrho[0]) / (1.0*(eos_params->nrho-1)); eos_params->drhoi = 1.0/eos_params->drho; eos_params->dye = (eos_params->yes[eos_params->nye-1] - eos_params->yes[0]) / (1.0*(eos_params->nye-1)); eos_params->dyei = 1.0/eos_params->dye; eos_params->drhotempi = eos_params->drhoi * eos_params->dtempi; eos_params->drholintempi = eos_params->drhoi * eos_params->dlintempi; eos_params->drhoyei = eos_params->drhoi * eos_params->dyei; eos_params->dtempyei = eos_params->dtempi * eos_params->dyei; eos_params->dlintempyei = eos_params->dlintempi * eos_params->dyei; eos_params->drhotempyei = eos_params->drhoi * eos_params->dtempi * eos_params->dyei; eos_params->drholintempyei = eos_params->drhoi * eos_params->dlintempi * eos_params->dyei; eos_params->eos_rhomax = exp(eos_params->logrho[eos_params->nrho-1]); eos_params->eos_rhomin = exp(eos_params->logrho[0]); eos_params->eos_tempmax = exp(eos_params->logtemp[eos_params->ntemp-1]); eos_params->eos_tempmin = exp(eos_params->logtemp[0]); eos_params->eos_yemax = eos_params->yes[eos_params->nye-1]; eos_params->eos_yemin = eos_params->yes[0]; """ outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, enableCparameters=False, prefunc=prefunc)
def add_enforce_detgammahat_constraint_to_Cfunction_dict( includes=None, rel_path_to_Cparams=os.path.join("."), enable_rfm_precompute=True, enable_golden_kernels=False, OMP_pragma_on="i2", func_name_suffix=""): # This function disables SIMD, as it includes cbrt() and abs() functions. if includes is None: includes = [] # This function does not use finite differences! # enable_FD_functions = bool(par.parval_from_str("finite_difference::enable_FD_functions")) # if enable_FD_functions: # includes += ["finite_difference_functions.h"] # Set up the C function for enforcing the det(gammabar) = det(gammahat) BSSN algebraic constraint desc = "Enforce the det(gammabar) = det(gammahat) (algebraic) constraint" name = "enforce_detgammahat_constraint" + func_name_suffix params = "const paramstruct *restrict params, " if enable_rfm_precompute: params += "const rfm_struct *restrict rfmstruct, " else: params += "REAL *xx[3], " params += "REAL *restrict in_gfs" # Construct body: enforce_detg_constraint_symb_expressions = EGC.Enforce_Detgammahat_Constraint_symb_expressions( ) preloop = "" enableCparameters = True # Set up preloop in case we're outputting code for the Einstein Toolkit (ETK) if par.parval_from_str("grid::GridFuncMemAccess") == "ETK": params, preloop = set_ETK_func_params_preloop(func_name_suffix, enable_SIMD=False) enableCparameters = False FD_outCparams = "outCverbose=False,enable_SIMD=False" FD_outCparams += ",GoldenKernelsEnable=" + str(enable_golden_kernels) starttime = print_msg_with_timing( "Enforcing det(gammabar)=det(gammahat) constraint", msg="Ccodegen", startstop="start") body = fin.FD_outputC("returnstring", enforce_detg_constraint_symb_expressions, params=FD_outCparams) print_msg_with_timing("Enforcing det(gammabar)=det(gammahat) constraint", msg="Ccodegen", startstop="stop", starttime=starttime) enable_SIMD = False add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, preloop=preloop, body=body, loopopts=get_loopopts("AllPoints", enable_SIMD, enable_rfm_precompute, OMP_pragma_on), rel_path_to_Cparams=rel_path_to_Cparams, enableCparameters=enableCparameters) return pickle_NRPy_env()
def add_FD_func_to_outC_function_dict( list_of_deriv_vars, list_of_base_gridfunction_names_in_derivs, list_of_deriv_operators, fdcoeffs, fdstencl): # Step 5.a.ii.A: First construct a list of all the unique finite difference functions list_of_uniq_deriv_operators = superfast_uniq(list_of_deriv_operators) Ctype = "REAL" if par.parval_from_str("grid::GridFuncMemAccess") == "ETK": Ctype = "CCTK_REAL" func_prefix = "order_" + str(FDparams.FD_CD_order) + "_" if FDparams.SIMD_enable == "True": Ctype = "REAL_SIMD_ARRAY" func_prefix = "SIMD_" + func_prefix # Stores the needed calls to the functions we're adding to outC_function_dict: FDfunccall_list = [] for op in list_of_uniq_deriv_operators: which_op_idx = find_which_op_idx(op, list_of_deriv_operators) rhs_expr = sp.sympify(0) for j in range(len(fdcoeffs[which_op_idx])): var = sp.sympify("f" + varsuffix(fdstencl[which_op_idx][j], FDparams)) rhs_expr += fdcoeffs[which_op_idx][j] * var # Multiply each expression by the appropriate power # of 1/dx[i] invdx = [] used_invdx = [False, False, False, False] for d in range(FDparams.DIM): invdx.append(sp.sympify("invdx" + str(d))) # First-order or Kreiss-Oliger derivatives: if ((len(op) == 5 and "dKOD" in op) or (len(op) == 3 and "dD" in op) or (len(op) == 5 and ("dupD" in op or "ddnD" in op))): dirn = int(op[len(op) - 1]) rhs_expr *= invdx[dirn] used_invdx[dirn] = True # Second-order derivs: elif len(op) == 5 and "dDD" in op: dirn1 = int(op[len(op) - 2]) dirn2 = int(op[len(op) - 1]) used_invdx[dirn1] = used_invdx[dirn2] = True rhs_expr *= invdx[dirn1] * invdx[dirn2] else: print("Error: was unable to parse derivative operator: ", op) sys.exit(1) outfunc_params = "" for d in range(FDparams.DIM): if used_invdx[d]: outfunc_params += "const " + Ctype + " invdx" + str(d) + "," for j in range(len(fdcoeffs[which_op_idx])): var = sp.sympify("f" + varsuffix(fdstencl[which_op_idx][j], FDparams)) outfunc_params += "const " + Ctype + " " + str(var) if j != len(fdcoeffs[which_op_idx]) - 1: outfunc_params += "," for i in range(len(list_of_deriv_operators)): # print("comparing ",list_of_deriv_operators[i],op) if list_of_deriv_operators[i] == op: funccall = type__var( list_of_deriv_vars[i], FDparams) + " = " + func_prefix + "f_" + str(op) + "(" for d in range(FDparams.DIM): if used_invdx[d]: funccall += "invdx" + str(d) + "," gfname = list_of_base_gridfunction_names_in_derivs[i] for j in range(len(fdcoeffs[which_op_idx])): funccall += gfname + varsuffix(fdstencl[which_op_idx][j], FDparams) if j != len(fdcoeffs[which_op_idx]) - 1: funccall += "," funccall += ");" FDfunccall_list.append(funccall) # If the function already exists in the outC_function_dict, then do not add it; move to the next op. if func_prefix + "f_" + str(op) not in outC_function_dict: p = "preindent=1,SIMD_enable=" + FDparams.SIMD_enable + ",outCverbose=False,CSE_preprocess=True,includebraces=False" outFDstr = outputC(rhs_expr, "retval", "returnstring", params=p) outFDstr = outFDstr.replace("retval = ", "return ") add_to_Cfunction_dict( desc=" * (__FD_OPERATOR_FUNC__) Finite difference operator for " + str(op).replace("dDD", "second derivative: ").replace( "dD", "first derivative: ").replace( "dKOD", "Kreiss-Oliger derivative: ").replace( "dupD", "upwinded derivative: ").replace( "ddnD", "downwinded derivative: "), type="static " + Ctype + " _NOINLINE _UNUSED", name=func_prefix + "f_" + str(op), opts="DisableCparameters", params=outfunc_params, preloop="", body=outFDstr) return FDfunccall_list
def add_to_Cfunction_dict__Stilde_flux( includes=None, inputs_provided=False, alpha_face=None, gamma_faceDD=None, beta_faceU=None, Valenciav_rU=None, B_rU=None, Valenciav_lU=None, B_lU=None, sqrt4pi=None, outCparams="outCverbose=False,CSE_sorting=none", write_cmax_cmin=False): if not inputs_provided: # We will pass values of the gridfunction on the cell faces into the function. This requires us # to declare them as C parameters in NRPy+. We will denote this with the _face infix/suffix. alpha_face = gri.register_gridfunctions("AUXEVOL", "alpha_face") gamma_faceDD = ixp.register_gridfunctions_for_single_rank2( "AUXEVOL", "gamma_faceDD", "sym01") beta_faceU = ixp.register_gridfunctions_for_single_rank1( "AUXEVOL", "beta_faceU") # We'll need some more gridfunctions, now, to represent the reconstructions of BU and ValenciavU # on the right and left faces Valenciav_rU = ixp.register_gridfunctions_for_single_rank1( "AUXEVOL", "Valenciav_rU", DIM=3) B_rU = ixp.register_gridfunctions_for_single_rank1("AUXEVOL", "B_rU", DIM=3) Valenciav_lU = ixp.register_gridfunctions_for_single_rank1( "AUXEVOL", "Valenciav_lU", DIM=3) B_lU = ixp.register_gridfunctions_for_single_rank1("AUXEVOL", "B_lU", DIM=3) sqrt4pi = par.Cparameters("REAL", thismodule, "sqrt4pi", "sqrt(4.0*M_PI)") # We'll also need to store the results of the HLLE step between functions. ixp.register_gridfunctions_for_single_rank1("AUXEVOL", "Stilde_flux_HLLED") input_params_for_Stilde_flux = "const paramstruct *params,REAL *auxevol_gfs,REAL *rhs_gfs" if write_cmax_cmin: name_suffixes = ["_x", "_y", "_z"] for flux_dirn in range(3): calculate_Stilde_flux(flux_dirn,alpha_face,gamma_faceDD,beta_faceU,\ Valenciav_rU,B_rU,Valenciav_lU,B_lU,sqrt4pi) Stilde_flux_to_print = [ lhrh(lhs=gri.gfaccess("out_gfs", "Stilde_flux_HLLED0"), rhs=Stilde_fluxD[0]), lhrh(lhs=gri.gfaccess("out_gfs", "Stilde_flux_HLLED1"), rhs=Stilde_fluxD[1]), lhrh(lhs=gri.gfaccess("out_gfs", "Stilde_flux_HLLED2"), rhs=Stilde_fluxD[2]) ] if write_cmax_cmin: Stilde_flux_to_print = Stilde_flux_to_print \ +[ lhrh(lhs=gri.gfaccess("out_gfs","cmax"+name_suffixes[flux_dirn]),rhs=chsp.cmax), lhrh(lhs=gri.gfaccess("out_gfs","cmin"+name_suffixes[flux_dirn]),rhs=chsp.cmin) ] desc = "Compute the flux term of all 3 components of tilde{S}_i on the left face in the " + str( flux_dirn) + "direction for all components." name = "calculate_Stilde_flux_D" + str(flux_dirn) body = fin.FD_outputC("returnstring", Stilde_flux_to_print, params=outCparams) loopopts = "InteriorPoints" add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=input_params_for_Stilde_flux, body=body, loopopts=loopopts) outC_function_dict[name] = outC_function_dict[name].replace( "NGHOSTS+Nxx0", "NGHOSTS+Nxx0+1").replace("NGHOSTS+Nxx1", "NGHOSTS+Nxx1+1").replace( "NGHOSTS+Nxx2", "NGHOSTS+Nxx2+1") pre_body = """// Notice in the loop below that we go from 3 to cctk_lsh-3 for i, j, AND k, even though // we are only computing the flux in one direction. This is because in the end, // we only need the rhs's from 3 to cctk_lsh-3 for i, j, and k. const REAL invdxi[4] = {1e100,invdx0,invdx1,invdx2}; const REAL invdx = invdxi[flux_dirn];""" FD_body = """const int index = IDX3S(i0,i1,i2); const int indexp1 = IDX3S(i0+kronecker_delta[flux_dirn][0],i1+kronecker_delta[flux_dirn][1],i2+kronecker_delta[flux_dirn][2]); rhs_gfs[IDX4ptS(STILDED0GF,index)] += (auxevol_gfs[IDX4ptS(STILDE_FLUX_HLLED0GF,index)] - auxevol_gfs[IDX4ptS(STILDE_FLUX_HLLED0GF,indexp1)] ) * invdx; rhs_gfs[IDX4ptS(STILDED1GF,index)] += (auxevol_gfs[IDX4ptS(STILDE_FLUX_HLLED1GF,index)] - auxevol_gfs[IDX4ptS(STILDE_FLUX_HLLED1GF,indexp1)] ) * invdx; rhs_gfs[IDX4ptS(STILDED2GF,index)] += (auxevol_gfs[IDX4ptS(STILDE_FLUX_HLLED2GF,index)] - auxevol_gfs[IDX4ptS(STILDE_FLUX_HLLED2GF,indexp1)] ) * invdx;""" desc = "Compute the difference in the flux of StildeD on the opposite faces in flux_dirn for all components." name = "calculate_Stilde_rhsD" params = "const int flux_dirn,const paramstruct *params,const REAL *auxevol_gfs,REAL *rhs_gfs" preloop = pre_body body = FD_body loopopts = "InteriorPoints" add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, preloop=pre_body, body=body, loopopts=loopopts)
def add_HI_func_to_outC_function_dict( list_of_interp_vars, list_of_base_gridfunction_names_in_interps, list_of_interp_operators, hicoeffs, histencl): # Step 5.a.ii.A: First construct a list of all the unique Hermite interpolator functions list_of_uniq_interp_operators = superfast_uniq(list_of_interp_operators) c_type = "REAL" if par.parval_from_str("grid::GridFuncMemAccess") == "ETK": c_type = "CCTK_REAL" func_prefix = "order_" + str(HIparams.HI_DM_order) + "_" if HIparams.enable_SIMD == "True": c_type = "REAL_SIMD_ARRAY" func_prefix = "SIMD_" + func_prefix # Stores the needed calls to the functions we're adding to outC_function_dict: HIfunccall_list = [] for op in list_of_uniq_interp_operators: which_op_idx = find_which_op_idx(op, list_of_interp_operators) rhs_expr = sp.sympify(0) for j in range(len(hicoeffs[which_op_idx])): var = sp.sympify("f" + varsuffix(histencl[which_op_idx][j], HIparams)) rhs_expr += hicoeffs[which_op_idx][j] * var # Multiply each expression by the appropriate power # of 1/dx[i] invdx = [] used_invdx = [False, False, False, False] for d in range(HIparams.DIM): invdx.append(sp.sympify("invdx" + str(d))) # First-order or Kreiss-Oliger interpolators: if ((len(op) == 5 and "dKOD" in op) or (len(op) == 3 and "dD" in op) or (len(op) == 5 and ("dupD" in op or "ddnD" in op))): dirn = int(op[len(op) - 1]) rhs_expr *= invdx[dirn] used_invdx[dirn] = True # Second-order interps: elif len(op) == 5 and "dDD" in op: dirn1 = int(op[len(op) - 2]) dirn2 = int(op[len(op) - 1]) used_invdx[dirn1] = used_invdx[dirn2] = True rhs_expr *= invdx[dirn1] * invdx[dirn2] else: print("Error: was unable to parse interpolator operator: ", op) sys.exit(1) outfunc_params = "" for d in range(HIparams.DIM): if used_invdx[d]: outfunc_params += "const " + c_type + " invdx" + str(d) + "," for j in range(len(hicoeffs[which_op_idx])): var = sp.sympify("f" + varsuffix(histencl[which_op_idx][j], HIparams)) outfunc_params += "const " + c_type + " " + str(var) if j != len(hicoeffs[which_op_idx]) - 1: outfunc_params += "," for i in range(len(list_of_interp_operators)): # print("comparing ",list_of_interp_operators[i],op) if list_of_interp_operators[i] == op: funccall = type__var( list_of_interp_vars[i], HIparams) + " = " + func_prefix + "f_" + str(op) + "(" for d in range(HIparams.DIM): if used_invdx[d]: funccall += "invdx" + str(d) + "," gfname = list_of_base_gridfunction_names_in_interps[i] for j in range(len(hicoeffs[which_op_idx])): funccall += gfname + varsuffix(histencl[which_op_idx][j], HIparams) if j != len(hicoeffs[which_op_idx]) - 1: funccall += "," funccall += ");" HIfunccall_list.append(funccall) # If the function already exists in the outC_function_dict, then do not add it; move to the next op. if func_prefix + "f_" + str(op) not in outC_function_dict: p = "preindent=1,enable_SIMD=" + HIparams.enable_SIMD + ",outCverbose=False,CSE_preprocess=True,includebraces=False" outHIstr = outputC(rhs_expr, "retval", "returnstring", params=p) outHIstr = outHIstr.replace("retval = ", "return ") add_to_Cfunction_dict( desc= " * (__HI_OPERATOR_FUNC__) Hermite interpolator operator for " + str(op).replace("dDD", "second interpolator: ").replace( "dD", "first interpolator: ").replace( "dKOD", "Kreiss-Oliger interpolator: ").replace( "dupD", "upwinded interpolator: ").replace( "ddnD", "downwinded interpolator: ") + " direction. In Cartesian coordinates, directions 0,1,2 correspond to x,y,z directions, respectively.", c_type="static " + c_type + " _NOINLINE _UNUSED", name=func_prefix + "f_" + str(op), enableCparameters=False, params=outfunc_params, preloop="", body=outHIstr) return HIfunccall_list
def add_to_Cfunction_dict_MoL_step_forward_in_time(MoL_method, RHS_string = "", post_RHS_string = "", enable_rfm=False, enable_curviBCs=False): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] desc = "Method of Lines (MoL) for \"" + MoL_method + "\" method: Step forward one full timestep.\n" c_type = "void" name = "MoL_step_forward_in_time" params = "const paramstruct *restrict params, " if enable_rfm: params += "const rfm_struct *restrict rfmstruct, " else: params += "REAL *xx[3], " if enable_curviBCs: params += "const bc_struct *restrict bcstruct, " params += "MoL_gridfunctions_struct *restrict gridfuncs, const REAL dt" indent = "" # We don't bother with an indent here. body = indent + "// C code implementation of -={ " + MoL_method + " }=- Method of Lines timestepping.\n\n" y_n_gridfunctions, non_y_n_gridfunctions_list, _throwaway = generate_gridfunction_names(MoL_method) body += "// First set gridfunction aliases from gridfuncs struct\n\n" body += "// y_n gridfunctions:\n" body += "REAL *restrict " + y_n_gridfunctions + " = gridfuncs->" + y_n_gridfunctions + ";\n" body += "\n" body += "// Temporary timelevel & AUXEVOL gridfunctions:\n" for gf in non_y_n_gridfunctions_list: body += "REAL *restrict " + gf + " = gridfuncs->" + gf + ";\n" body += "\n" body += "// Next perform a full step forward in time\n" # Implement Method of Lines (MoL) Timestepping Butcher = Butcher_dict[MoL_method][0] # Get the desired Butcher table from the dictionary num_steps = len(Butcher)-1 # Specify the number of required steps to update solution # Diagonal RK3 only!!! if diagonal(MoL_method) and "RK3" in MoL_method: # In a diagonal RK3 method, only 3 gridfunctions need be defined. Below implements this approach. # k_1 body += """ // In a diagonal RK3 method like this one, only 3 gridfunctions need be defined. Below implements this approach. // Using y_n_gfs as input, k1 and apply boundary conditions\n""" body += single_RK_substep( commentblock = """// -={ START k1 substep }=- // RHS evaluation: // 1. We will store k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs now as // ... the update for the next rhs evaluation y_n + a21*k1*dt // Post-RHS evaluation: // 1. Apply post-RHS to y_n + a21*k1*dt""", RHS_str = RHS_string, RHS_input_str = "y_n_gfs", RHS_output_str = "k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs", RK_lhss_list = ["k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs"], RK_rhss_list = ["("+sp.ccode(Butcher[1][1]).replace("L","")+")*k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs[i]*dt + y_n_gfs[i]"], post_RHS_list = [post_RHS_string], post_RHS_output_list = ["k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs"]) + "// -={ END k1 substep }=-\n\n" # k_2 body += single_RK_substep( commentblock="""// -={ START k2 substep }=- // RHS evaluation: // 1. Reassign k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs to be the running total y_{n+1}; a32*k2*dt to the running total // 2. Store k2_or_y_nplus_a32_k2_gfs now as y_n + a32*k2*dt // Post-RHS evaluation: // 1. Apply post-RHS to both y_n + a32*k2 (stored in k2_or_y_nplus_a32_k2_gfs) // ... and the y_{n+1} running total, as they have not been applied yet to k2-related gridfunctions""", RHS_str=RHS_string, RHS_input_str="k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs", RHS_output_str="k2_or_y_nplus_a32_k2_gfs", RK_lhss_list=["k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs","k2_or_y_nplus_a32_k2_gfs"], RK_rhss_list=["("+sp.ccode(Butcher[3][1]).replace("L","")+")*(k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs[i] - y_n_gfs[i])/("+sp.ccode(Butcher[1][1]).replace("L","")+") + y_n_gfs[i] + ("+sp.ccode(Butcher[3][2]).replace("L","")+")*k2_or_y_nplus_a32_k2_gfs[i]*dt", "("+sp.ccode(Butcher[2][2]).replace("L","")+")*k2_or_y_nplus_a32_k2_gfs[i]*dt + y_n_gfs[i]"], post_RHS_list=[post_RHS_string,post_RHS_string], post_RHS_output_list=["k2_or_y_nplus_a32_k2_gfs","k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs"]) + "// -={ END k2 substep }=-\n\n" # k_3 body += single_RK_substep( commentblock="""// -={ START k3 substep }=- // RHS evaluation: // 1. Add k3 to the running total and save to y_n // Post-RHS evaluation: // 1. Apply post-RHS to y_n""", RHS_str=RHS_string, RHS_input_str="k2_or_y_nplus_a32_k2_gfs", RHS_output_str="y_n_gfs", RK_lhss_list=["y_n_gfs","k2_or_y_nplus_a32_k2_gfs"], RK_rhss_list=["k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs[i] + ("+sp.ccode(Butcher[3][3]).replace("L","")+")*y_n_gfs[i]*dt"], post_RHS_list=[post_RHS_string], post_RHS_output_list=["y_n_gfs"]) + "// -={ END k3 substep }=-\n\n" else: y_n = "y_n_gfs" if not diagonal(MoL_method): for s in range(num_steps): next_y_input = "next_y_input_gfs" # If we're on the first step (s=0), we use y_n gridfunction as input. # Otherwise next_y_input is input. Output is just the reverse. if s == 0: # If on first step: RHS_input = y_n else: # If on second step or later: RHS_input = next_y_input RHS_output = "k" + str(s + 1) + "_gfs" if s == num_steps-1: # If on final step: RK_lhs = y_n RK_rhs = y_n + "[i] + dt*(" else: # If on anything but the final step: RK_lhs = next_y_input RK_rhs = y_n + "[i] + dt*(" for m in range(s+1): if Butcher[s+1][m+1] != 0: if Butcher[s+1][m+1] != 1: RK_rhs += " + k"+str(m+1)+"_gfs[i]*("+sp.ccode(Butcher[s+1][m+1]).replace("L","")+")" else: RK_rhs += " + k"+str(m+1)+"_gfs[i]" RK_rhs += " )" post_RHS = post_RHS_string if s == num_steps-1: # If on final step: post_RHS_output = y_n else: # If on anything but the final step: post_RHS_output = next_y_input body += single_RK_substep( commentblock="// -={ START k" + str(s + 1) + " substep }=-", RHS_str=RHS_string, RHS_input_str=RHS_input, RHS_output_str=RHS_output, RK_lhss_list=[RK_lhs], RK_rhss_list=[RK_rhs], post_RHS_list=[post_RHS], post_RHS_output_list=[post_RHS_output]) + "// -={ END k" + str(s + 1) + " substep }=-\n\n" else: # diagonal case: y_nplus1_running_total = "y_nplus1_running_total_gfs" if MoL_method == 'Euler': # Euler's method doesn't require any k_i, and gets its own unique algorithm body += single_RK_substep( commentblock=indent + "// ***Euler timestepping only requires one RHS evaluation***", RHS_str=RHS_string, RHS_input_str=y_n, RHS_output_str=y_nplus1_running_total, RK_lhss_list=[y_n], RK_rhss_list=[y_n+"[i] + "+y_nplus1_running_total+"[i]*dt"], post_RHS_list=[post_RHS_string], post_RHS_output_list=[y_n]) else: for s in range(num_steps): # If we're on the first step (s=0), we use y_n gridfunction as input. # and k_odd as output. if s == 0: RHS_input = "y_n_gfs" RHS_output = "k_odd_gfs" # For the remaining steps the inputs and ouputs alternate between k_odd and k_even elif s % 2 == 0: RHS_input = "k_even_gfs" RHS_output = "k_odd_gfs" else: RHS_input = "k_odd_gfs" RHS_output = "k_even_gfs" RK_lhs_list = [] RK_rhs_list = [] if s != num_steps-1: # For anything besides the final step if s == 0: # The first RK step RK_lhs_list.append(y_nplus1_running_total) RK_rhs_list.append(RHS_output+"[i]*dt*("+sp.ccode(Butcher[num_steps][s+1]).replace("L","")+")") RK_lhs_list.append(RHS_output) RK_rhs_list.append(y_n+"[i] + "+RHS_output+"[i]*dt*("+sp.ccode(Butcher[s+1][s+1]).replace("L","")+")") else: if Butcher[num_steps][s+1] != 0: RK_lhs_list.append(y_nplus1_running_total) if Butcher[num_steps][s+1] != 1: RK_rhs_list.append(y_nplus1_running_total+"[i] + "+RHS_output+"[i]*dt*("+sp.ccode(Butcher[num_steps][s+1]).replace("L","")+")") else: RK_rhs_list.append(y_nplus1_running_total+"[i] + "+RHS_output+"[i]*dt") if Butcher[s+1][s+1] != 0: RK_lhs_list.append(RHS_output) if Butcher[s+1][s+1] != 1: RK_rhs_list.append(y_n+"[i] + "+RHS_output+"[i]*dt*("+sp.ccode(Butcher[s+1][s+1]).replace("L","")+")") else: RK_rhs_list.append(y_n+"[i] + "+RHS_output+"[i]*dt") post_RHS_output = RHS_output if s == num_steps-1: # If on the final step if Butcher[num_steps][s+1] != 0: RK_lhs_list.append(y_n) if Butcher[num_steps][s+1] != 1: RK_rhs_list.append(y_n+"[i] + "+y_nplus1_running_total+"[i] + "+RHS_output+"[i]*dt*("+sp.ccode(Butcher[num_steps][s+1]).replace("L","")+")") else: RK_rhs_list.append(y_n+"[i] + "+y_nplus1_running_total+"[i] + "+RHS_output+"[i]*dt)") post_RHS_output = y_n body += single_RK_substep( commentblock=indent + "// -={ START k" + str(s + 1) + " substep }=-", RHS_str=RHS_string, RHS_input_str=RHS_input, RHS_output_str=RHS_output, RK_lhss_list=RK_lhs_list, RK_rhss_list=RK_rhs_list, post_RHS_list=[post_RHS_string], post_RHS_output_list=[post_RHS_output]) + "// -={ END k" + str(s + 1) + " substep }=-\n\n" add_to_Cfunction_dict( includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=indent_Ccode(body, " "), rel_path_to_Cparams=os.path.join("."))
def add_Ricci_eval_to_Cfunction_dict( includes=None, rel_path_to_Cparams=os.path.join("."), enable_rfm_precompute=True, enable_golden_kernels=False, enable_SIMD=True, enable_split_for_optimizations_doesnt_help=False, OMP_pragma_on="i2", func_name_suffix=""): if includes is None: includes = [] if enable_SIMD: includes += [os.path.join("SIMD", "SIMD_intrinsics.h")] enable_FD_functions = bool( par.parval_from_str("finite_difference::enable_FD_functions")) if enable_FD_functions: includes += ["finite_difference_functions.h"] # Set up the C function for the 3-Ricci tensor desc = "Evaluate the 3-Ricci tensor" name = "Ricci_eval" + func_name_suffix params = "const paramstruct *restrict params, " if enable_rfm_precompute: params += "const rfm_struct *restrict rfmstruct, " else: params += "REAL *xx[3], " params += "const REAL *restrict in_gfs, REAL *restrict auxevol_gfs" # Construct body: Ricci_SymbExpressions = Ricci__generate_symbolic_expressions() FD_outCparams = "outCverbose=False,enable_SIMD=" + str(enable_SIMD) FD_outCparams += ",GoldenKernelsEnable=" + str(enable_golden_kernels) loopopts = get_loopopts("InteriorPoints", enable_SIMD, enable_rfm_precompute, OMP_pragma_on) FDorder = par.parval_from_str("finite_difference::FD_CENTDERIVS_ORDER") starttime = print_msg_with_timing("3-Ricci tensor (FD order=" + str(FDorder) + ")", msg="Ccodegen", startstop="start") # Construct body: preloop = "" enableCparameters = True # Set up preloop in case we're outputting code for the Einstein Toolkit (ETK) if par.parval_from_str("grid::GridFuncMemAccess") == "ETK": params, preloop = set_ETK_func_params_preloop(func_name_suffix) enableCparameters = False if enable_split_for_optimizations_doesnt_help and FDorder >= 8: loopopts += ",DisableOpenMP" Ricci_SymbExpressions_pt1 = [] Ricci_SymbExpressions_pt2 = [] for lhsrhs in Ricci_SymbExpressions: if "RBARDD00" in lhsrhs.lhs or "RBARDD11" in lhsrhs.lhs or "RBARDD22" in lhsrhs.lhs: Ricci_SymbExpressions_pt1.append( lhrh(lhs=lhsrhs.lhs, rhs=lhsrhs.rhs)) else: Ricci_SymbExpressions_pt2.append( lhrh(lhs=lhsrhs.lhs, rhs=lhsrhs.rhs)) preloop = """#pragma omp parallel { #pragma omp for """ preloopbody = fin.FD_outputC("returnstring", Ricci_SymbExpressions_pt1, params=FD_outCparams) preloop += lp.simple_loop(loopopts, preloopbody) preloop += "#pragma omp for\n" body = fin.FD_outputC("returnstring", Ricci_SymbExpressions_pt2, params=FD_outCparams) postloop = "\n } // END #pragma omp parallel\n" else: body = fin.FD_outputC("returnstring", Ricci_SymbExpressions, params=FD_outCparams) postloop = "" print_msg_with_timing("3-Ricci tensor (FD order=" + str(FDorder) + ")", msg="Ccodegen", startstop="stop", starttime=starttime) add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, preloop=preloop, body=body, loopopts=loopopts, postloop=postloop, rel_path_to_Cparams=rel_path_to_Cparams, enableCparameters=enableCparameters) return pickle_NRPy_env()
def add_psi4_tetrad_to_Cfunction_dict(includes=None, rel_path_to_Cparams=os.path.join("."), setPsi4tozero=False): starttime = print_msg_with_timing("psi4 tetrads", msg="Ccodegen", startstop="start") # Set up the C function for BSSN basis transformations desc = "Compute tetrad for psi4" name = "psi4_tetrad" # First set up the symbolic expressions (RHSs) and their names (LHSs) psi4tet.Psi4_tetrads() list_of_varnames = [] list_of_symbvars = [] for i in range(4): list_of_varnames.append("*mre4U" + str(i)) list_of_symbvars.append(psi4tet.mre4U[i]) for i in range(4): list_of_varnames.append("*mim4U" + str(i)) list_of_symbvars.append(psi4tet.mim4U[i]) for i in range(4): list_of_varnames.append("*n4U" + str(i)) list_of_symbvars.append(psi4tet.n4U[i]) paramsindent = " " params = """const paramstruct *restrict params,\n""" + paramsindent list_of_metricvarnames = ["cf"] for i in range(3): for j in range(i, 3): list_of_metricvarnames.append("hDD" + str(i) + str(j)) for var in list_of_metricvarnames: params += "const REAL " + var + "," params += "\n" + paramsindent for var in list_of_varnames: params += "REAL " + var + "," params += "\n" + paramsindent + "REAL *restrict xx[3], const int i0,const int i1,const int i2" # Set the body of the function body = "" outCparams = "includebraces=False,outCverbose=False,enable_SIMD=False,preindent=1" if not setPsi4tozero: for i in range(3): body += " const REAL xx" + str(i) + " = xx[" + str( i) + "][i" + str(i) + "];\n" body += " // Compute tetrads:\n" body += " {\n" # Sort the lhss list alphabetically, and rhss to match: lhss, rhss = [ list(x) for x in zip(*sorted(zip(list_of_varnames, list_of_symbvars), key=lambda pair: pair[0])) ] body += outputC(rhss, lhss, filename="returnstring", params=outCparams) body += " }\n" elif setPsi4tozero: body += "return;\n" loopopts = "" print_msg_with_timing("psi4 tetrads", msg="Ccodegen", startstop="stop", starttime=starttime) add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, body=body, loopopts=loopopts, rel_path_to_Cparams=rel_path_to_Cparams) return pickle_NRPy_env()
def add_psi4_part_to_Cfunction_dict(includes=None, rel_path_to_Cparams=os.path.join("."), whichpart=0, setPsi4tozero=False, OMP_pragma_on="i2"): starttime = print_msg_with_timing("psi4, part " + str(whichpart), msg="Ccodegen", startstop="start") # Set up the C function for psi4 if includes is None: includes = [] includes += ["NRPy_function_prototypes.h"] desc = "Compute psi4 at all interior gridpoints, part " + str(whichpart) name = "psi4_part" + str(whichpart) params = """const paramstruct *restrict params, const REAL *restrict in_gfs, REAL *restrict xx[3], REAL *restrict aux_gfs""" body = "" gri.register_gridfunctions("AUX", [ "psi4_part" + str(whichpart) + "re", "psi4_part" + str(whichpart) + "im" ]) FD_outCparams = "outCverbose=False,enable_SIMD=False,CSE_sorting=none" if not setPsi4tozero: # Set the body of the function # First compute the symbolic expressions psi4.Psi4(specify_tetrad=False) # We really don't want to store these "Cparameters" permanently; they'll be set via function call... # so we make a copy of the original par.glb_Cparams_list (sans tetrad vectors) and restore it below Cparams_list_orig = par.glb_Cparams_list.copy() par.Cparameters("REAL", __name__, ["mre4U0", "mre4U1", "mre4U2", "mre4U3"], [0, 0, 0, 0]) par.Cparameters("REAL", __name__, ["mim4U0", "mim4U1", "mim4U2", "mim4U3"], [0, 0, 0, 0]) par.Cparameters("REAL", __name__, ["n4U0", "n4U1", "n4U2", "n4U3"], [0, 0, 0, 0]) body += """ REAL mre4U0,mre4U1,mre4U2,mre4U3,mim4U0,mim4U1,mim4U2,mim4U3,n4U0,n4U1,n4U2,n4U3; psi4_tetrad(params, in_gfs[IDX4S(CFGF, i0,i1,i2)], in_gfs[IDX4S(HDD00GF, i0,i1,i2)], in_gfs[IDX4S(HDD01GF, i0,i1,i2)], in_gfs[IDX4S(HDD02GF, i0,i1,i2)], in_gfs[IDX4S(HDD11GF, i0,i1,i2)], in_gfs[IDX4S(HDD12GF, i0,i1,i2)], in_gfs[IDX4S(HDD22GF, i0,i1,i2)], &mre4U0,&mre4U1,&mre4U2,&mre4U3,&mim4U0,&mim4U1,&mim4U2,&mim4U3,&n4U0,&n4U1,&n4U2,&n4U3, xx, i0,i1,i2); """ body += "REAL xCart_rel_to_globalgrid_center[3];\n" body += "xx_to_Cart(params, xx, i0, i1, i2, xCart_rel_to_globalgrid_center);\n" body += "int ignore_Cart_to_i0i1i2[3]; REAL xx_rel_to_globalgridorigin[3];\n" body += "Cart_to_xx_and_nearest_i0i1i2_global_grid_center(params, xCart_rel_to_globalgrid_center,xx_rel_to_globalgridorigin,ignore_Cart_to_i0i1i2);\n" for i in range(3): body += "const REAL xx" + str( i) + "=xx_rel_to_globalgridorigin[" + str(i) + "];\n" body += fin.FD_outputC("returnstring", [ lhrh(lhs=gri.gfaccess("in_gfs", "psi4_part" + str(whichpart) + "re"), rhs=psi4.psi4_re_pt[whichpart]), lhrh(lhs=gri.gfaccess("in_gfs", "psi4_part" + str(whichpart) + "im"), rhs=psi4.psi4_im_pt[whichpart]) ], params=FD_outCparams) par.glb_Cparams_list = Cparams_list_orig.copy() elif setPsi4tozero: body += fin.FD_outputC("returnstring", [ lhrh(lhs=gri.gfaccess("in_gfs", "psi4_part" + str(whichpart) + "re"), rhs=sp.sympify(0)), lhrh(lhs=gri.gfaccess("in_gfs", "psi4_part" + str(whichpart) + "im"), rhs=sp.sympify(0)) ], params=FD_outCparams) enable_SIMD = False enable_rfm_precompute = False print_msg_with_timing("psi4, part " + str(whichpart), msg="Ccodegen", startstop="stop", starttime=starttime) add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, body=body, loopopts=get_loopopts("InteriorPoints", enable_SIMD, enable_rfm_precompute, OMP_pragma_on, enable_xxs=False), rel_path_to_Cparams=rel_path_to_Cparams) return pickle_NRPy_env()
def Cfunc_general_wrapper_known_T(): includes = [ "NRPy_basic_defines.h", "NRPy_function_prototypes.h", "NRPyEOS_tabulated_helpers.h" ] desc = "(c) 2022 Leo Werneck" c_type = "void" name = "NRPyEOS_from_rho_Ye_T_interpolate_n_quantities" indent = param_indentation(c_type, name) params = "const NRPyEOS_params *restrict eos_params,\n" params += indent + "const int n,\n" params += indent + "const double rho,\n" params += indent + "const double Y_e,\n" params += indent + "const double T,\n" params += indent + "const int *restrict tablevars_keys,\n" params += indent + "double *restrict tablevars,\n" params += indent + "NRPyEOS_error_report *restrict report" body = r""" // This function will interpolate n table quantities from // (rho,Ye,T). It replaces EOS_Omni calls with keytemp = 1 if( n > NRPyEOS_ntablekeys ) { fprintf(stderr,"(NRPyEOS) from_rho_Ye_T_interpolate_n_quantities: number of quantities exceed maximum allowed: %d > %d. ABORTING.", n,NRPyEOS_ntablekeys); } // Start by assuming no errors report->error = false; // Check table bounds for input variables report->error_key = NRPyEOS_checkbounds(eos_params,rho,T,Y_e); if( report->error_key != 0 ) { // This should never happen, because we enforce // limits before calling this function sprintf(report->message,"from_rho_Ye_T_interpolate_n_quantities: problem with checkbounds"); report->error = true; return; } // Get interpolation spots int idx[8]; double delx,dely,delz; const double lr = log(rho); const double lt = log(T); NRPyEOS_get_interp_spots(eos_params,lr,lt,Y_e,&delx,&dely,&delz,idx); for(int i=0;i<n;i++) { // Now perform the interpolations int key = tablevars_keys[i]; double tablevar_out; NRPyEOS_linterp_one(eos_params,idx,delx,dely,delz,&tablevar_out,key); // We have the result, but we must convert appropriately. // The only edge cases are P and eps, for which we obtain // log(P) and log(eps+eps0). We must check for them here if( key == NRPyEOS_press_key ) { tablevar_out = exp(tablevar_out); } else if( key == NRPyEOS_eps_key ) { tablevar_out = exp(tablevar_out) - eos_params->energy_shift; } // Then update tablevars tablevars[i] = tablevar_out; } """ outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, enableCparameters=False)
def Cfunc_general_wrapper_unknown_T(): includes = [ "NRPy_basic_defines.h", "NRPy_function_prototypes.h", "NRPyEOS_tabulated_helpers.h" ] desc = "(c) 2022 Leo Werneck" c_type = "void" name = "NRPyEOS_from_rho_Ye_aux_find_T_and_interpolate_n_quantities" indent = param_indentation(c_type, name) params = "const NRPyEOS_params *restrict eos_params,\n" params += indent + "const int n,\n" params += indent + "const double prec,\n" params += indent + "const double rho,\n" params += indent + "const double Y_e,\n" params += indent + "const double tablevar_in,\n" params += indent + "const int tablevar_in_key,\n" params += indent + "const int *restrict tablevars_keys,\n" params += indent + "double *restrict tablevars,\n" params += indent + "double *restrict T,\n" params += indent + "NRPyEOS_error_report *restrict report" body = r""" // This function will interpolate n table quantities from // (rho,Ye,aux). It replaces EOS_Omni calls with keytemp != 1 if( n > NRPyEOS_ntablekeys ) { fprintf(stderr,"(NRPyEOS) NRPyEOS_from_rho_Ye_aux_find_T_and_interpolate_n_quantities: number of quantities exceed maximum allowed: %d > %d. ABORTING.", n,NRPyEOS_ntablekeys); } // Check table bounds for input variables report->error_key = NRPyEOS_checkbounds_kt0_noTcheck(eos_params,rho,Y_e); if( report->error_key != 0 ) { // This should never happen, because we enforce // limits before calling this function sprintf(report->message,"NRPyEOS_from_rho_Ye_aux_find_T_and_interpolate_n_quantities: problem with checkbounds_kt0_noTcheck"); report->error = true; return; } // First step is to recover the temperature. The variable // tablevar_in is the one used in the temperature recovery. // For example, if tablevar_in = eps, then we recover T // using (rho,Ye,eps). double aux = tablevar_in; if( tablevar_in_key == NRPyEOS_press_key ) { // If aux = P, then we need log(P). aux = log(aux); } else if( tablevar_in_key == NRPyEOS_eps_key ) { // If aux = eps, then we need log(eps+eps0). // Compute eps+eps0 aux += eos_params->energy_shift; // At this point, aux *must* be positive. If not, error out. if( aux < 0.0 ) { fprintf(stderr,"(NRPyEOS) NRPyEOS_from_rho_Ye_aux_find_T_and_interpolate_n_quantities: found eps+energy_shift < 0.0 (%e). ABORTING.", aux); } // Compute log(eps+eps0) aux = log(aux); } // Now compute the temperature const double lr = log(rho); const double lt0 = log(*T); double lt = 0.0; int keyerr=0; NRPyEOS_findtemp_from_any(eos_params,tablevar_in_key,lr,lt0,Y_e,aux,prec,<,&keyerr); // Now set the temperature *T = exp(lt); // Then interpolate the quantities we want from (rho,Ye,T) int anyerr=0; NRPyEOS_from_rho_Ye_T_interpolate_n_quantities(eos_params,n,rho,Y_e,*T,tablevars_keys,tablevars,report); report->error_key = keyerr; report->error = anyerr; """ outC.add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=body, enableCparameters=False)
def add_to_Cfunction_dict_MoL_step_forward_in_time(MoL_method, RHS_string="", post_RHS_string="", post_post_RHS_string="", enable_rfm=False, enable_curviBCs=False, enable_SIMD=False, enable_griddata=False): includes = ["NRPy_basic_defines.h", "NRPy_function_prototypes.h"] if enable_SIMD: includes += [os.path.join("SIMD", "SIMD_intrinsics.h")] desc = "Method of Lines (MoL) for \"" + MoL_method + "\" method: Step forward one full timestep.\n" c_type = "void" name = "MoL_step_forward_in_time" if enable_griddata: params = "griddata_struct *restrict griddata, const REAL dt" else: params = "const paramstruct *restrict params, " if enable_rfm: params += "const rfm_struct *restrict rfmstruct, " else: params += "REAL *restrict xx[3], " if enable_curviBCs: params += "const bc_struct *restrict bcstruct, " params += "MoL_gridfunctions_struct *restrict gridfuncs, const REAL dt" indent = "" # We don't bother with an indent here. body = indent + "// C code implementation of -={ " + MoL_method + " }=- Method of Lines timestepping.\n\n" y_n_gridfunctions, non_y_n_gridfunctions_list, _throwaway = generate_gridfunction_names( MoL_method) if enable_griddata: gf_prefix = "griddata->gridfuncs." else: gf_prefix = "gridfuncs->" gf_aliases = """// Set gridfunction aliases from gridfuncs struct REAL *restrict """ + y_n_gridfunctions + " = " + gf_prefix + y_n_gridfunctions + """; // y_n gridfunctions // Temporary timelevel & AUXEVOL gridfunctions:\n""" for gf in non_y_n_gridfunctions_list: gf_aliases += "REAL *restrict " + gf + " = " + gf_prefix + gf + ";\n" if enable_griddata: gf_aliases += "paramstruct *restrict params = &griddata->params;\n" if enable_rfm: gf_aliases += "const rfm_struct *restrict rfmstruct = &griddata->rfmstruct;\n" else: gf_aliases += "REAL * xx[3]; for(int ww=0;ww<3;ww++) xx[ww] = griddata->xx[ww];\n" gf_aliases += "const bc_struct *restrict bcstruct = &griddata->bcstruct;\n" for i in ["0", "1", "2"]: gf_aliases += "const int Nxx_plus_2NGHOSTS" + i + " = griddata->params.Nxx_plus_2NGHOSTS" + i + ";\n" if not enable_griddata: body += gf_aliases # Implement Method of Lines (MoL) Timestepping Butcher = Butcher_dict[MoL_method][ 0] # Get the desired Butcher table from the dictionary num_steps = len( Butcher) - 1 # Specify the number of required steps to update solution # Diagonal RK3 only!!! dt = sp.Symbol("dt", real=True) if diagonal(MoL_method) and "RK3" in MoL_method: # In a diagonal RK3 method, only 3 gridfunctions need be defined. Below implements this approach. y_n_gfs = sp.Symbol("y_n_gfsL", real=True) k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs = sp.Symbol( "k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfsL", real=True) k2_or_y_nplus_a32_k2_gfs = sp.Symbol("k2_or_y_nplus_a32_k2_gfsL", real=True) # k_1 body += """ // In a diagonal RK3 method like this one, only 3 gridfunctions need be defined. Below implements this approach. // Using y_n_gfs as input, k1 and apply boundary conditions\n""" body += single_RK_substep_input_symbolic( commentblock="""// -={ START k1 substep }=- // RHS evaluation: // 1. We will store k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs now as // ... the update for the next rhs evaluation y_n + a21*k1*dt // Post-RHS evaluation: // 1. Apply post-RHS to y_n + a21*k1*dt""", RHS_str=RHS_string, RHS_input_str=y_n_gfs, RHS_output_str=k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs, RK_lhss_list=[k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs], RK_rhss_list=[ Butcher[1][1] * k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs * dt + y_n_gfs ], post_RHS_list=[post_RHS_string], post_RHS_output_list=[ k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs ], enable_SIMD=enable_SIMD, enable_griddata=enable_griddata, gf_aliases=gf_aliases, post_post_RHS_string=post_post_RHS_string ) + "// -={ END k1 substep }=-\n\n" # k_2 body += single_RK_substep_input_symbolic( commentblock="""// -={ START k2 substep }=- // RHS evaluation: // 1. Reassign k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs to be the running total y_{n+1}; a32*k2*dt to the running total // 2. Store k2_or_y_nplus_a32_k2_gfs now as y_n + a32*k2*dt // Post-RHS evaluation: // 1. Apply post-RHS to both y_n + a32*k2 (stored in k2_or_y_nplus_a32_k2_gfs) // ... and the y_{n+1} running total, as they have not been applied yet to k2-related gridfunctions""", RHS_str=RHS_string, RHS_input_str=k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs, RHS_output_str=k2_or_y_nplus_a32_k2_gfs, RK_lhss_list=[ k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs, k2_or_y_nplus_a32_k2_gfs ], RK_rhss_list=[ Butcher[3][1] * (k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs - y_n_gfs) / Butcher[1][1] + y_n_gfs + Butcher[3][2] * k2_or_y_nplus_a32_k2_gfs * dt, Butcher[2][2] * k2_or_y_nplus_a32_k2_gfs * dt + y_n_gfs ], post_RHS_list=[post_RHS_string, post_RHS_string], post_RHS_output_list=[ k2_or_y_nplus_a32_k2_gfs, k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs ], enable_SIMD=enable_SIMD, enable_griddata=enable_griddata, gf_aliases=gf_aliases, post_post_RHS_string=post_post_RHS_string ) + "// -={ END k2 substep }=-\n\n" # k_3 body += single_RK_substep_input_symbolic( commentblock="""// -={ START k3 substep }=- // RHS evaluation: // 1. Add k3 to the running total and save to y_n // Post-RHS evaluation: // 1. Apply post-RHS to y_n""", RHS_str=RHS_string, RHS_input_str=k2_or_y_nplus_a32_k2_gfs, RHS_output_str=y_n_gfs, RK_lhss_list=[y_n_gfs], RK_rhss_list=[ k1_or_y_nplus_a21_k1_or_y_nplus1_running_total_gfs + Butcher[3][3] * y_n_gfs * dt ], post_RHS_list=[post_RHS_string], post_RHS_output_list=[y_n_gfs], enable_SIMD=enable_SIMD, enable_griddata=enable_griddata, gf_aliases=gf_aliases, post_post_RHS_string=post_post_RHS_string ) + "// -={ END k3 substep }=-\n\n" else: y_n = sp.Symbol("y_n_gfsL", real=True) if not diagonal(MoL_method): for s in range(num_steps): next_y_input = sp.Symbol("next_y_input_gfsL", real=True) # If we're on the first step (s=0), we use y_n gridfunction as input. # Otherwise next_y_input is input. Output is just the reverse. if s == 0: # If on first step: RHS_input = y_n else: # If on second step or later: RHS_input = next_y_input RHS_output = sp.Symbol("k" + str(s + 1) + "_gfs", real=True) if s == num_steps - 1: # If on final step: RK_lhs = y_n else: # If on anything but the final step: RK_lhs = next_y_input RK_rhs = y_n for m in range(s + 1): k_mp1_gfs = sp.Symbol("k" + str(m + 1) + "_gfsL") if Butcher[s + 1][m + 1] != 0: if Butcher[s + 1][m + 1] != 1: RK_rhs += dt * k_mp1_gfs * Butcher[s + 1][m + 1] else: RK_rhs += dt * k_mp1_gfs post_RHS = post_RHS_string if s == num_steps - 1: # If on final step: post_RHS_output = y_n else: # If on anything but the final step: post_RHS_output = next_y_input body += single_RK_substep_input_symbolic( commentblock="// -={ START k" + str(s + 1) + " substep }=-", RHS_str=RHS_string, RHS_input_str=RHS_input, RHS_output_str=RHS_output, RK_lhss_list=[RK_lhs], RK_rhss_list=[RK_rhs], post_RHS_list=[post_RHS], post_RHS_output_list=[post_RHS_output], enable_SIMD=enable_SIMD, enable_griddata=enable_griddata, gf_aliases=gf_aliases, post_post_RHS_string=post_post_RHS_string ) + "// -={ END k" + str(s + 1) + " substep }=-\n\n" else: y_n = sp.Symbol("y_n_gfsL", real=True) y_nplus1_running_total = sp.Symbol("y_nplus1_running_total_gfsL", real=True) if MoL_method == 'Euler': # Euler's method doesn't require any k_i, and gets its own unique algorithm body += single_RK_substep_input_symbolic( commentblock=indent + "// ***Euler timestepping only requires one RHS evaluation***", RHS_str=RHS_string, RHS_input_str=y_n, RHS_output_str=y_nplus1_running_total, RK_lhss_list=[y_n], RK_rhss_list=[y_n + y_nplus1_running_total * dt], post_RHS_list=[post_RHS_string], post_RHS_output_list=[y_n], enable_SIMD=enable_SIMD, enable_griddata=enable_griddata, gf_aliases=gf_aliases, post_post_RHS_string=post_post_RHS_string) else: for s in range(num_steps): # If we're on the first step (s=0), we use y_n gridfunction as input. # and k_odd as output. if s == 0: RHS_input = sp.Symbol("y_n_gfsL", real=True) RHS_output = sp.Symbol("k_odd_gfsL", real=True) # For the remaining steps the inputs and ouputs alternate between k_odd and k_even elif s % 2 == 0: RHS_input = sp.Symbol("k_even_gfsL", real=True) RHS_output = sp.Symbol("k_odd_gfsL", real=True) else: RHS_input = sp.Symbol("k_odd_gfsL", real=True) RHS_output = sp.Symbol("k_even_gfsL", real=True) RK_lhs_list = [] RK_rhs_list = [] if s != num_steps - 1: # For anything besides the final step if s == 0: # The first RK step RK_lhs_list.append(y_nplus1_running_total) RK_rhs_list.append(RHS_output * dt * Butcher[num_steps][s + 1]) RK_lhs_list.append(RHS_output) RK_rhs_list.append(y_n + RHS_output * dt * Butcher[s + 1][s + 1]) else: if Butcher[num_steps][s + 1] != 0: RK_lhs_list.append(y_nplus1_running_total) if Butcher[num_steps][s + 1] != 1: RK_rhs_list.append( y_nplus1_running_total + RHS_output * dt * Butcher[num_steps][s + 1]) else: RK_rhs_list.append(y_nplus1_running_total + RHS_output * dt) if Butcher[s + 1][s + 1] != 0: RK_lhs_list.append(RHS_output) if Butcher[s + 1][s + 1] != 1: RK_rhs_list.append(y_n + RHS_output * dt * Butcher[s + 1][s + 1]) else: RK_rhs_list.append(y_n + RHS_output * dt) post_RHS_output = RHS_output if s == num_steps - 1: # If on the final step if Butcher[num_steps][s + 1] != 0: RK_lhs_list.append(y_n) if Butcher[num_steps][s + 1] != 1: RK_rhs_list.append(y_n + y_nplus1_running_total + RHS_output * dt * Butcher[num_steps][s + 1]) else: RK_rhs_list.append(y_n + y_nplus1_running_total + RHS_output * dt) post_RHS_output = y_n body += single_RK_substep_input_symbolic( commentblock=indent + "// -={ START k" + str(s + 1) + " substep }=-", RHS_str=RHS_string, RHS_input_str=RHS_input, RHS_output_str=RHS_output, RK_lhss_list=RK_lhs_list, RK_rhss_list=RK_rhs_list, post_RHS_list=[post_RHS_string], post_RHS_output_list=[post_RHS_output], enable_SIMD=enable_SIMD, enable_griddata=enable_griddata, gf_aliases=gf_aliases, post_post_RHS_string=post_post_RHS_string ) + "// -={ END k" + str(s + 1) + " substep }=-\n\n" enableCparameters = True if enable_griddata: enableCparameters = False add_to_Cfunction_dict(includes=includes, desc=desc, c_type=c_type, name=name, params=params, body=indent_Ccode(body, " "), enableCparameters=enableCparameters, rel_path_to_Cparams=os.path.join("."))
def add_SpinWeight_minus2_SphHarmonics_to_Cfunction_dict( includes=None, rel_path_to_Cparams=os.path.join("."), maximum_l=8): starttime = print_msg_with_timing("Spin-weight s=-2 Spherical Harmonics", msg="Ccodegen", startstop="start") # Set up the C function for computing the spin-weight -2 spherical harmonic at theta,phi: Y_{s=-2, l,m}(theta,phi) prefunc = r"""// Compute at a single point (th,ph) the spin-weight -2 spherical harmonic Y_{s=-2, l,m}(th,ph) // Manual "inline void" of this function results in compilation error with clang. void SpinWeight_minus2_SphHarmonics(const int l, const int m, const REAL th, const REAL ph, REAL *reYlmswm2_l_m, REAL *imYlmswm2_l_m) { """ # Construct prefunc: outCparams = "preindent=1,outCfileaccess=a,outCverbose=False,includebraces=False" prefunc += """ switch(l) { """ for l in range(maximum_l + 1): # Output values up to and including l=8. prefunc += " case " + str(l) + ":\n" prefunc += " switch(m) {\n" for m in range(-l, l + 1): prefunc += " case " + str(m) + ":\n" prefunc += " {\n" Y_m2_lm = SWm2SH.Y(-2, l, m, SWm2SH.th, SWm2SH.ph) prefunc += outputC([sp.re(Y_m2_lm), sp.im(Y_m2_lm)], ["*reYlmswm2_l_m", "*imYlmswm2_l_m"], "returnstring", outCparams) prefunc += " }\n" prefunc += " return;\n" prefunc += " } // END switch(m)\n" prefunc += " } // END switch(l)\n" prefunc += r""" fprintf(stderr, "ERROR: SpinWeight_minus2_SphHarmonics handles only l=[0,""" + str( maximum_l) + r"""] and only m=[-l,+l] is defined.\n"); fprintf(stderr, " You chose l=%d and m=%d, which is out of these bounds.\n",l,m); exit(1); } void lowlevel_decompose_psi4_into_swm2_modes(const int Nxx_plus_2NGHOSTS1,const int Nxx_plus_2NGHOSTS2, const REAL dxx1, const REAL dxx2, const REAL curr_time, const REAL R_ext, const REAL *restrict th_array, const REAL *restrict sinth_array, const REAL *restrict ph_array, const REAL *restrict psi4r_at_R_ext, const REAL *restrict psi4i_at_R_ext) { for(int l=2;l<=""" + str( maximum_l) + r""";l++) { // The maximum l here is set in Python. for(int m=-l;m<=l;m++) { // Parallelize the integration loop: REAL psi4r_l_m = 0.0; REAL psi4i_l_m = 0.0; #pragma omp parallel for reduction(+:psi4r_l_m,psi4i_l_m) for(int i1=0;i1<Nxx_plus_2NGHOSTS1-2*NGHOSTS;i1++) { const REAL th = th_array[i1]; const REAL sinth = sinth_array[i1]; for(int i2=0;i2<Nxx_plus_2NGHOSTS2-2*NGHOSTS;i2++) { const REAL ph = ph_array[i2]; // Construct integrand for psi4 spin-weight s=-2,l=2,m=0 spherical harmonic REAL ReY_sm2_l_m,ImY_sm2_l_m; SpinWeight_minus2_SphHarmonics(l,m, th,ph, &ReY_sm2_l_m,&ImY_sm2_l_m); const int idx2d = i1*(Nxx_plus_2NGHOSTS2-2*NGHOSTS)+i2; const REAL a = psi4r_at_R_ext[idx2d]; const REAL b = psi4i_at_R_ext[idx2d]; const REAL c = ReY_sm2_l_m; const REAL d = ImY_sm2_l_m; psi4r_l_m += (a*c + b*d) * dxx2 * sinth*dxx1; psi4i_l_m += (b*c - a*d) * dxx2 * sinth*dxx1; } } // Step 4: Output the result of the integration to file. char filename[100]; sprintf(filename,"outpsi4_l%d_m%d-r%.2f.txt",l,m, (double)R_ext); // If you love "+"'s in filenames by all means enable this (ugh): //if(m>=0) sprintf(filename,"outpsi4_l%d_m+%d-r%.2f.txt",l,m, (double)R_ext); FILE *outpsi4_l_m; // 0 = n*dt when n=0 is exactly represented in double/long double precision, // so no worries about the result being ~1e-16 in double/ld precision if(curr_time==0) outpsi4_l_m = fopen(filename, "w"); else outpsi4_l_m = fopen(filename, "a"); fprintf(outpsi4_l_m,"%e %.15e %.15e\n", (double)(curr_time), (double)psi4r_l_m,(double)psi4i_l_m); fclose(outpsi4_l_m); } } } """ desc = "" name = "driver__spherlikegrids__psi4_spinweightm2_decomposition" params = r"""const paramstruct *restrict params, REAL *restrict diagnostic_output_gfs, const int *restrict list_of_R_ext_idxs, const int num_of_R_ext_idxs, const REAL time, REAL *restrict xx[3],void xx_to_Cart(const paramstruct *restrict params, REAL *restrict xx[3],const int i0,const int i1,const int i2, REAL xCart[3])""" body = r""" // Step 1: Allocate memory for 2D arrays used to store psi4, theta, sin(theta), and phi. const int sizeof_2Darray = sizeof(REAL)*(Nxx_plus_2NGHOSTS1-2*NGHOSTS)*(Nxx_plus_2NGHOSTS2-2*NGHOSTS); REAL *restrict psi4r_at_R_ext = (REAL *restrict)malloc(sizeof_2Darray); REAL *restrict psi4i_at_R_ext = (REAL *restrict)malloc(sizeof_2Darray); // ... also store theta, sin(theta), and phi to corresponding 1D arrays. REAL *restrict sinth_array = (REAL *restrict)malloc(sizeof(REAL)*(Nxx_plus_2NGHOSTS1-2*NGHOSTS)); REAL *restrict th_array = (REAL *restrict)malloc(sizeof(REAL)*(Nxx_plus_2NGHOSTS1-2*NGHOSTS)); REAL *restrict ph_array = (REAL *restrict)malloc(sizeof(REAL)*(Nxx_plus_2NGHOSTS2-2*NGHOSTS)); // Step 2: Loop over all extraction indices: for(int ii0=0;ii0<num_of_R_ext_idxs;ii0++) { // Step 2.a: Set the extraction radius R_ext based on the radial index R_ext_idx REAL R_ext; { REAL xCart[3]; xx_to_Cart(params,xx,list_of_R_ext_idxs[ii0],1,1,xCart); // values for itheta and iphi don't matter. R_ext = sqrt(xCart[0]*xCart[0] + xCart[1]*xCart[1] + xCart[2]*xCart[2]); } // Step 2.b: Compute psi_4 at this extraction radius and store to a local 2D array. const int i0=list_of_R_ext_idxs[ii0]; #pragma omp parallel for for(int i1=NGHOSTS;i1<Nxx_plus_2NGHOSTS1-NGHOSTS;i1++) { th_array[i1-NGHOSTS] = xx[1][i1]; sinth_array[i1-NGHOSTS] = sin(xx[1][i1]); for(int i2=NGHOSTS;i2<Nxx_plus_2NGHOSTS2-NGHOSTS;i2++) { ph_array[i2-NGHOSTS] = xx[2][i2]; // Compute real & imaginary parts of psi_4, output to diagnostic_output_gfs const REAL psi4r = (diagnostic_output_gfs[IDX4S(PSI4_PART0REGF, i0,i1,i2)] + diagnostic_output_gfs[IDX4S(PSI4_PART1REGF, i0,i1,i2)] + diagnostic_output_gfs[IDX4S(PSI4_PART2REGF, i0,i1,i2)]); const REAL psi4i = (diagnostic_output_gfs[IDX4S(PSI4_PART0IMGF, i0,i1,i2)] + diagnostic_output_gfs[IDX4S(PSI4_PART1IMGF, i0,i1,i2)] + diagnostic_output_gfs[IDX4S(PSI4_PART2IMGF, i0,i1,i2)]); // Store result to "2D" array (actually 1D array with 2D storage): const int idx2d = (i1-NGHOSTS)*(Nxx_plus_2NGHOSTS2-2*NGHOSTS)+(i2-NGHOSTS); psi4r_at_R_ext[idx2d] = psi4r; psi4i_at_R_ext[idx2d] = psi4i; } } // Step 3: Perform integrations across all l,m modes from l=2 up to and including L_MAX (global variable): lowlevel_decompose_psi4_into_swm2_modes(Nxx_plus_2NGHOSTS1,Nxx_plus_2NGHOSTS2, dxx1,dxx2, time, R_ext, th_array, sinth_array, ph_array, psi4r_at_R_ext,psi4i_at_R_ext); } // Step 4: Free all allocated memory: free(psi4r_at_R_ext); free(psi4i_at_R_ext); free(sinth_array); free(th_array); free(ph_array); """ print_msg_with_timing("Spin-weight s=-2 Spherical Harmonics", msg="Ccodegen", startstop="stop", starttime=starttime) add_to_Cfunction_dict(includes=includes, prefunc=prefunc, desc=desc, name=name, params=params, body=body, rel_path_to_Cparams=rel_path_to_Cparams) return pickle_NRPy_env()
def add_BSSN_constraints_to_Cfunction_dict( includes=None, rel_path_to_Cparams=os.path.join("."), enable_rfm_precompute=True, enable_golden_kernels=False, enable_SIMD=True, enable_stress_energy_source_terms=False, leave_Ricci_symbolic=True, output_H_only=False, OMP_pragma_on="i2", func_name_suffix=""): if includes is None: includes = [] if enable_SIMD: includes += [os.path.join("SIMD", "SIMD_intrinsics.h")] enable_FD_functions = bool( par.parval_from_str("finite_difference::enable_FD_functions")) if enable_FD_functions: includes += ["finite_difference_functions.h"] # Set up the C function for the BSSN constraints desc = "Evaluate the BSSN constraints" name = "BSSN_constraints" + func_name_suffix params = "const paramstruct *restrict params, " if enable_rfm_precompute: params += "const rfm_struct *restrict rfmstruct, " else: params += "REAL *xx[3], " params += """ const REAL *restrict in_gfs, const REAL *restrict auxevol_gfs, REAL *restrict aux_gfs""" # Construct body: BSSN_constraints_SymbExpressions = BSSN_constraints__generate_symbolic_expressions( enable_stress_energy_source_terms, leave_Ricci_symbolic=leave_Ricci_symbolic, output_H_only=output_H_only) preloop = "" enableCparameters = True # Set up preloop in case we're outputting code for the Einstein Toolkit (ETK) if par.parval_from_str("grid::GridFuncMemAccess") == "ETK": params, preloop = set_ETK_func_params_preloop(func_name_suffix) enableCparameters = False FD_outCparams = "outCverbose=False,enable_SIMD=" + str(enable_SIMD) FD_outCparams += ",GoldenKernelsEnable=" + str(enable_golden_kernels) FDorder = par.parval_from_str("finite_difference::FD_CENTDERIVS_ORDER") starttime = print_msg_with_timing("BSSN constraints (FD order=" + str(FDorder) + ")", msg="Ccodegen", startstop="start") body = fin.FD_outputC("returnstring", BSSN_constraints_SymbExpressions, params=FD_outCparams) print_msg_with_timing("BSSN constraints (FD order=" + str(FDorder) + ")", msg="Ccodegen", startstop="stop", starttime=starttime) add_to_Cfunction_dict(includes=includes, desc=desc, name=name, params=params, preloop=preloop, body=body, loopopts=get_loopopts("InteriorPoints", enable_SIMD, enable_rfm_precompute, OMP_pragma_on), rel_path_to_Cparams=rel_path_to_Cparams, enableCparameters=enableCparameters) return pickle_NRPy_env()