def create_table_for_jet(fpath): pce = Eos(1) import cStringIO output = cStringIO.StringIO() fname = os.path.join(fpath, 'bulkinfo.h5') with h5py.File(fname, 'r') as h5: tau_list = h5['coord/tau'][...] x_list = h5['coord/x'][...] y_list = h5['coord/y'][...] for tau in tau_list: tau_str = ('%s' % tau).replace('.', 'p') ed = h5['bulk2d/exy_tau%s' % tau_str][...] vx = h5['bulk2d/vx_xy_tau%s' % tau_str][...] vy = h5['bulk2d/vy_xy_tau%s' % tau_str][...] T = pce.f_T(ed) QGP_fraction = qgp_fraction(T) x, y, ed_new = interp_2d(ed, x_list, y_list) x, y, vx_new = interp_2d(vx, x_list, y_list) x, y, vy_new = interp_2d(vy, x_list, y_list) x, y, T_new = interp_2d(T, x_list, y_list) x, y, frac_new = interp_2d(QGP_fraction, x_list, y_list) for i, xi in enumerate(x): for j, yj in enumerate(y): print >> output, tau, xi, yj, ed_new[i, j], T_new[ i, j], vx_new[i, j], vy_new[i, j], frac_new[i, j], 0.0 with open(os.path.join(fpath, 'bulk.dat'), 'w') as f: f.write(output.getvalue())
def ppcollision(eostype='SU3', outdir = '../results/event0'): print('start ...') t0 = time() if not os.path.exists(outdir): os.mkdir(outdir) if eostype == 'SU3': cfg.eos_type = 'pure_gauge' elif eostype == 'QCD': cfg.eos_type = 'lattice_wb' elif eostype == 'EOSI': cfg.eos_type == 'ideal_gas' eos = Eos(cfg.eos_type) # update the configuration #cfg.Edmax = eos.f_ed(Tmax) cfg.Edmax = 50.0 cfg.fPathOut = outdir cfg.NX = 301 cfg.NY = 301 cfg.NZ = 1 cfg.DT = 0.01 cfg.DX = 0.08 cfg.DY = 0.08 cfg.ntskip = 50 cfg.NumOfNucleons = 1 cfg.Ra = 0.8 cfg.Eta = 0.6 cfg.Si0 = 6.4 cfg.TAU0 = 0.6 cfg.ImpactParameter = 0.0 cfg.ETAOS = 0.0 cfg.SQRTS = 2760 #cfg.Edmax = 600.0 cfg.Hwn = 1.0 write_config(cfg) xmax = cfg.NX/2*cfg.DX ymax = cfg.NY/2*cfg.DY x = np.linspace(-xmax, xmax, cfg.NX) y = np.linspace(-ymax, ymax, cfg.NY) x, y = np.meshgrid(x, y) ed = cfg.Edmax * pp_energydensity(x, y, b=cfg.ImpactParameter) #plt.imshow(ed) #plt.show() ideal = CLIdeal(cfg, gpu_id=1) edv = np.zeros((ideal.size, 4), ideal.cfg.real) print(edv.shape) edv[:, 0] = ed.T.flatten() ideal.load_ini(edv) ideal.evolve(max_loops=1000, save_hypersf=False, to_maxloop=True) t1 = time() print('finished. Total time: {dtime}'.format(dtime = t1-t0 ))
def glueball(Tmax = 0.6, outdir = '../results/event0', eos_type='pure_gauge'): print('start ...') t0 = time() if not os.path.exists(outdir): os.mkdir(outdir) cfg.eos_type = eos_type eos = Eos(cfg.eos_type) # update the configuration #cfg.Edmax = eos.f_ed(Tmax) #cfg.Edmax = 166.0 cfg.Edmax = 55.0 cfg.fPathOut = outdir cfg.NX = 501 cfg.NY = 501 cfg.NZ = 1 cfg.DT = 0.01 cfg.DX = 0.08 cfg.DY = 0.08 #cfg.NumOfNucleons = 208 #cfg.Ra = 6.62 #cfg.Eta = 0.546 #cfg.Si0 = 6.4 cfg.NumOfNucleons = 197 cfg.Ra = 6.4 cfg.Eta = 0.546 cfg.Si0 = 4.0 cfg.TAU0 = 0.4 cfg.ImpactParameter = 7.74 #cfg.ImpactParameter = 0.0 cfg.ETAOS = 0.0 cfg.save_to_hdf5 = False if eos_type == 'pure_gauge': cfg.TFRZ = 0.2 #cfg.Edmax = 600.0 #cfg.Hwn = 1.0 cfg.Hwn = 0.95 write_config(cfg) ideal = CLIdeal(cfg, gpu_id=2) from glauber import Glauber Glauber(cfg, ideal.ctx, ideal.queue, ideal.compile_options, ideal.d_ev[1]) ideal.evolve(max_loops=3000, save_hypersf=False, to_maxloop=True) t1 = time() print('finished. Total time: {dtime}'.format(dtime = t1-t0 )) from subprocess import call call(['python', './spec.py', cfg.fPathOut])
def __init__(self, cfg, ctx, queue, eos_table, compile_options): self.cfg = cfg self.ctx = ctx self.queue = queue self.eos_table = eos_table self.compile_options = list(compile_options) NX, NY, NZ = cfg.NX, cfg.NY, cfg.NZ self.h_ev = np.zeros((NX * NY * NZ, 4), cfg.real) self.h_pi = np.zeros(10 * NX * NY * NZ, self.cfg.real) # one dimensional self.ex, self.ey, self.ez = [], [], [] self.vx, self.vy, self.vz = [], [], [] # in transverse plane (z==0) self.exy, self.vx_xy, self.vy_xy, self.vz_xy = [], [], [], [] self.pixx_xy, self.piyy_xy, self.pitx_xy = [], [], [] # in reaction plane self.exz, self.vx_xz, self.vy_xz, self.vz_xz = [], [], [], [] self.ecc2_vs_rapidity = [] self.ecc1_vs_rapidity = [] self.time = [] self.edmax = [] self.__loadAndBuildCLPrg() self.eos = Eos(cfg.eos_type) self.x = np.linspace(-floor(NX / 2) * cfg.DX, floor(NX / 2) * cfg.DX, NX, endpoint=True) self.y = np.linspace(-floor(NY / 2) * cfg.DY, floor(NY / 2) * cfg.DY, NY, endpoint=True) self.z = np.linspace(-floor(NZ / 2) * cfg.DZ, floor(NZ / 2) * cfg.DZ, NZ, endpoint=True)
class BulkInfo(object): '''The bulk information like: ed(x), ed(y), ed(eta), T(x), T(y), T(eta) vx, vy, veta, ecc_x, ecc_p''' def __init__(self, cfg, ctx, queue, eos_table, compile_options): self.cfg = cfg self.ctx = ctx self.queue = queue self.eos_table = eos_table self.compile_options = list(compile_options) NX, NY, NZ = cfg.NX, cfg.NY, cfg.NZ self.h_ev = np.zeros((NX * NY * NZ, 4), cfg.real) self.h_pi = np.zeros(10 * NX * NY * NZ, self.cfg.real) # one dimensional self.ex, self.ey, self.ez = [], [], [] self.vx, self.vy, self.vz = [], [], [] # in transverse plane (z==0) self.exy, self.vx_xy, self.vy_xy, self.vz_xy = [], [], [], [] self.pixx_xy, self.piyy_xy, self.pitx_xy = [], [], [] # in reaction plane self.exz, self.vx_xz, self.vy_xz, self.vz_xz = [], [], [], [] self.ecc2_vs_rapidity = [] self.ecc1_vs_rapidity = [] self.time = [] self.edmax = [] self.__loadAndBuildCLPrg() self.eos = Eos(cfg.eos_type) self.x = np.linspace(-floor(NX / 2) * cfg.DX, floor(NX / 2) * cfg.DX, NX, endpoint=True) self.y = np.linspace(-floor(NY / 2) * cfg.DY, floor(NY / 2) * cfg.DY, NY, endpoint=True) self.z = np.linspace(-floor(NZ / 2) * cfg.DZ, floor(NZ / 2) * cfg.DZ, NZ, endpoint=True) def __loadAndBuildCLPrg(self): #load and build *.cl programs with compile self.compile_options edslice_src = '''#include"real_type.h" __kernel void get_ed(__global real4 * d_ev, __global real4 * d_ev_x0, __global real4 * d_ev_y0, __global real4 * d_ev_z0, __global real4 * d_ev_xy, __global real4 * d_ev_xz, __global real4 * d_ev_yz) { int gid = get_global_id(0); if ( gid < NX ) { int j = NY/2; int k = NZ/2; d_ev_x0[gid] = d_ev[gid*NY*NZ + j*NZ + k]; int i = gid; for ( j = 0; j< NY; j ++ ) { d_ev_xy[i*NY+j] = d_ev[i*NY*NZ + j*NZ + k]; } j = NY/2; for ( k = 0; k < NZ; k ++ ) { d_ev_xz[i*NZ+k] = d_ev[i*NY*NZ + j*NZ + k]; } } if ( gid < NY ) { int i = NX/2; int k = NZ/2; d_ev_y0[gid] = d_ev[i*NY*NZ + gid*NZ + k]; int j = gid; for ( k = 0; k < NZ; k ++ ) { d_ev_yz[j*NZ+k] = d_ev[i*NY*NZ + j*NZ + k]; } } if ( gid < NZ ) { int i = NX/2; int j = NY/2; d_ev_z0[gid] = d_ev[i*NY*NZ + j*NZ + gid]; } } __kernel void get_pimn(__global real * d_pi, __global real * d_pixx_xy, __global real * d_piyy_xy, __global real * d_pitx_xy) { int gid_x = get_global_id(0); int gid_y = get_global_id(1); int oid = gid_x*NY*(NZ/2) + gid_y*(NZ/2) + NZ/2; int nid = gid_x*NY + gid_y; d_pixx_xy[nid] = d_pi[10*oid + 4]; d_piyy_xy[nid] = d_pi[10*oid + 7]; d_pitx_xy[nid] = d_pi[10*oid + 1]; } ''' self.kernel_edslice = cl.Program( self.ctx, edslice_src).build(options=' '.join(self.compile_options)) def get(self, tau, d_ev1, edmax, d_pi=None): self.time.append(tau) self.edmax.append(edmax) mf = cl.mem_flags NX, NY, NZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ self.ecc_vs_rapidity(d_ev1) h_ev1d = np.zeros((2000, 4), self.cfg.real) h_evxy = np.zeros((NX * NY, 4), self.cfg.real) h_evxz = np.zeros((NX * NZ, 4), self.cfg.real) h_evyz = np.zeros((NY * NZ, 4), self.cfg.real) d_evx0 = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_ev1d.nbytes) d_evy0 = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_ev1d.nbytes) d_evz0 = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_ev1d.nbytes) d_evxy = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_evxy.nbytes) d_evxz = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_evxz.nbytes) d_evyz = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_evyz.nbytes) self.kernel_edslice.get_ed(self.queue, (2000, ), None, d_ev1, d_evx0, d_evy0, d_evz0, d_evxy, d_evxz, d_evyz).wait() h_evx0 = np.zeros((NX, 4), self.cfg.real) h_evy0 = np.zeros((NY, 4), self.cfg.real) h_evz0 = np.zeros((NZ, 4), self.cfg.real) cl.enqueue_copy(self.queue, h_evx0, d_evx0).wait() cl.enqueue_copy(self.queue, h_evy0, d_evy0).wait() cl.enqueue_copy(self.queue, h_evz0, d_evz0).wait() self.ex.append(h_evx0[:, 0]) self.ey.append(h_evy0[:, 0]) self.ez.append(h_evz0[:, 0]) self.vx.append(h_evx0[:, 1]) self.vy.append(h_evy0[:, 2]) self.vz.append(h_evz0[:, 3]) cl.enqueue_copy(self.queue, h_evxy, d_evxy).wait() cl.enqueue_copy(self.queue, h_evxz, d_evxz).wait() cl.enqueue_copy(self.queue, h_evyz, d_evyz).wait() self.exy.append(h_evxy[:, 0].reshape(NX, NY)) self.vx_xy.append(h_evxy[:, 1].reshape(NX, NY)) self.vy_xy.append(h_evxy[:, 2].reshape(NX, NY)) self.exz.append(h_evxz[:, 0].reshape(NX, NZ)) self.vx_xz.append(h_evxz[:, 1].reshape(NX, NZ)) self.vy_xz.append(h_evxz[:, 2].reshape(NX, NZ)) self.vz_xz.append(h_evxz[:, 3].reshape(NX, NZ)) #logging.debug('d_pi is not None: %s'%(d_pi is not None)) if d_pi is not None: h_pixx = np.zeros(NX * NY, self.cfg.real) h_piyy = np.zeros(NX * NY, self.cfg.real) h_pitx = np.zeros(NX * NY, self.cfg.real) d_pixx = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_pixx.nbytes) d_piyy = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_pixx.nbytes) d_pitx = cl.Buffer(self.ctx, mf.READ_WRITE, size=h_pixx.nbytes) self.kernel_edslice.get_pimn(self.queue, (NX, NY), None, d_pi, d_pixx, d_piyy, d_pitx).wait() cl.enqueue_copy(self.queue, h_pixx, d_pixx).wait() self.pixx_xy.append(h_pixx.reshape(NX, NY)) cl.enqueue_copy(self.queue, h_piyy, d_piyy).wait() self.piyy_xy.append(h_piyy.reshape(NX, NY)) cl.enqueue_copy(self.queue, h_pitx, d_pitx).wait() self.pitx_xy.append(h_pitx.reshape(NX, NY)) def eccp(self, ed, vx, vy, vz=0.0, pixx=None, piyy=None, pitx=None): ''' eccx = <y*y-x*x>/<y*y+x*x> where <> are averaged eccp = <Txx-Tyy>/<Txx+Tyy> ''' ed[ed < 1.0E-10] = 1.0E-10 pre = self.eos.f_P(ed) vr2 = vx * vx + vy * vy + vz * vz vr2[vr2 > 1.0] = 0.999999 u0 = 1.0 / np.sqrt(1.0 - vr2) Tyy = (ed + pre) * u0 * u0 * vy * vy + pre Txx = (ed + pre) * u0 * u0 * vx * vx + pre T0x = (ed + pre) * u0 * u0 * vx v2 = 0.0 if pixx is not None: pi_sum = (pixx + piyy).sum() pi_dif = (pixx - piyy).sum() v2 = ((Txx - Tyy).sum() + pi_dif) / ((Txx + Tyy).sum() + pi_sum) else: v2 = (Txx - Tyy).sum() / (Txx + Tyy).sum() v1 = T0x.sum() / (Txx + Tyy).sum() return v1, v2 def mean_vr(self, ed, vx, vy, vz=0.0): ''' <vr> = <gamma * ed * sqrt(vx*vx + vy*vy)>/<gamma*ed> where <> are averaged over whole transverse plane''' ed[ed < 1.0E-10] = 1.0E-10 vr2 = vx * vx + vy * vy + vz * vz vr2[vr2 > 1.0] = 0.999999 u0 = 1.0 / np.sqrt(1.0 - vr2) vr = (u0 * ed * np.sqrt(vx * vx + vy * vy)).sum() / (u0 * ed).sum() return vr def total_entropy(self, tau, ed, vx, vy, vz=0.0): '''get the total entropy as a function of time''' ed[ed < 1.0E-10] = 1.0E-10 vr2 = vx * vx + vy * vy + vz * vz vr2[vr2 > 1.0] = 0.999999 u0 = 1.0 / np.sqrt(1.0 - vr2) return (u0 * self.eos.f_S(ed)).sum() * tau * self.cfg.DX * self.cfg.DY def ecc_vs_rapidity(self, d_ev): NX, NY, NZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ cl.enqueue_copy(self.queue, self.h_ev, d_ev).wait() bulk = self.h_ev.reshape(NX, NY, NZ, 4) ecc1 = np.empty(NZ) ecc2 = np.empty(NZ) for k in range(NZ): ed = bulk[:, :, k, 0] vx = bulk[:, :, k, 1] vy = bulk[:, :, k, 2] vz = bulk[:, :, k, 3] ecc1[k], ecc2[k] = self.eccp(ed, vx, vy, vz) self.ecc1_vs_rapidity.append(ecc1) self.ecc2_vs_rapidity.append(ecc2) def save(self, viscous_on=False): # use absolute path incase call bulkinfo.save() from other directory path_out = os.path.abspath(self.cfg.fPathOut) np.savetxt(path_out + '/ex.dat', np.array(self.ex).T) np.savetxt(path_out + '/ey.dat', np.array(self.ey).T) np.savetxt(path_out + '/ez.dat', np.array(self.ez).T) np.savetxt(path_out + '/Tx.dat', self.eos.f_T(self.ex).T) np.savetxt(path_out + '/Ty.dat', self.eos.f_T(self.ey).T) np.savetxt(path_out + '/Tz.dat', self.eos.f_T(self.ez).T) np.savetxt(path_out + '/vx.dat', np.array(self.vx).T) np.savetxt(path_out + '/vy.dat', np.array(self.vy).T) np.savetxt(path_out + '/vz.dat', np.array(self.vz).T) if len(self.ecc2_vs_rapidity) != 0: np.savetxt(path_out + '/ecc2.dat', np.array(self.ecc2_vs_rapidity).T) np.savetxt(path_out + '/ecc1.dat', np.array(self.ecc1_vs_rapidity).T) entropy = [] vr = [] ecc2 = [] ecc1 = [] ecc2_visc = [] for idx, exy in enumerate(self.exy): vx = self.vx_xy[idx] vy = self.vy_xy[idx] np.savetxt(path_out + '/edxy%d.dat' % idx, exy) np.savetxt(path_out + '/Txy%d.dat' % idx, self.eos.f_T(exy)) np.savetxt(path_out + '/vx_xy%d.dat' % idx, vx) np.savetxt(path_out + '/vy_xy%d.dat' % idx, vy) tmp0, tmp1 = self.eccp(exy, vx, vy) ecc1.append(tmp0) ecc2.append(tmp1) vr.append(self.mean_vr(exy, vx, vy)) tau = self.time[idx] entropy.append(self.total_entropy(tau, exy, vx, vy)) if viscous_on: pixx = self.pixx_xy[idx] piyy = self.piyy_xy[idx] pitx = self.pitx_xy[idx] ecc_visc1, ecc_visc2 = self.eccp(exy, vx, vy, pixx=pixx, piyy=piyy, pitx=pitx) ecc2_visc.append(ecc_visc2) for idx, exz in enumerate(self.exz): np.savetxt(path_out + '/ed_xz%d.dat' % idx, exz) np.savetxt(path_out + '/vx_xz%d.dat' % idx, self.vx_xz[idx]) np.savetxt(path_out + '/vy_xz%d.dat' % idx, self.vy_xz[idx]) np.savetxt(path_out + '/vz_xz%d.dat' % idx, self.vz_xz[idx]) np.savetxt(path_out + '/T_xz%d.dat' % idx, self.eos.f_T(exz)) np.savetxt(path_out + '/eccp.dat', np.array(list(zip(self.time, ecc2))), header='tau eccp') if viscous_on: np.savetxt(path_out + '/eccp_visc.dat', np.array(list(zip(self.time, ecc2_visc))), header='tau eccp_visc') np.savetxt(path_out + '/Tmax.dat', np.array(list(zip(self.time, self.eos.f_T(self.edmax)))), header='tau, Tmax') np.savetxt(path_out + '/edmax.dat', np.array(list(zip(self.time, self.edmax))), header='tau, edmax') np.savetxt(path_out + '/vr.dat', np.array(list(zip(self.time, vr))), header='tau <vr>') np.savetxt(path_out + '/entropy.dat', np.array(list(zip(self.time, entropy))), header='tau entropy')
def __init__(self, configs, handcrafted_eos=None, gpu_id=0): '''Params: :param configs: hydrodynamic configurations, from configs import cfg :param gpu_id: use which gpu for the calculation if there are many per node ''' # create opencl environment self.cfg = configs self.cwd, cwf = os.path.split(__file__) # create the fPathOut directory if not exists path = self.cfg.fPathOut if not os.path.exists(path): os.makedirs(path) # choose proper real, real4, real8 sizes self.determine_float_size(self.cfg) from backend_opencl import OpenCLBackend self.backend = OpenCLBackend(self.cfg, gpu_id) self.ctx = self.backend.ctx self.queue = self.backend.default_queue self.size = self.cfg.NX * self.cfg.NY * self.cfg.NZ self.tau = self.cfg.real(self.cfg.TAU0) self.compile_options = self.__compile_options() # set eos, create eos table for interpolation # self.eos_table must be before __loadAndBuildCLPrg() to pass # table information to definitions if handcrafted_eos is None: self.eos = Eos(self.cfg.eos_type) else: self.eos = handcrafted_eos chemical_potential_on_hypersf(self.cfg.TFRZ, path, eos_type=self.cfg.eos_type) if handcrafted_eos is not None: self.eos_table = self.eos.create_table(self.ctx, self.compile_options) elif self.cfg.eos_type == 'lattice_pce165': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=100, ncol=1555) elif self.cfg.eos_type == 'lattice_pce150': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=100, ncol=1555) elif self.cfg.eos_type == 'hotqcd2014': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=100, ncol=1555) elif self.cfg.eos_type == 'lattice_wb': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=4, ncol=1001) else: self.eos_table = self.eos.create_table(self.ctx, self.compile_options) self.efrz = self.eos.f_ed(self.cfg.TFRZ) # store 1D and 2d bulk info at each time step if self.cfg.save_to_hdf5: from bulkinfo_h5 import BulkInfo else: from bulkinfo import BulkInfo self.bulkinfo = BulkInfo(self.cfg, self.ctx, self.queue, self.eos_table, self.compile_options) self.__loadAndBuildCLPrg() #define buffer on device side, d_ev1 stores ed, vx, vy, vz mf = cl.mem_flags self.h_ev1 = np.zeros((self.size, 4), self.cfg.real) self.d_ev = [ cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1), cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1), cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1) ] self.d_Src = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1) self.submax = np.empty(64, self.cfg.real) self.d_submax = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, self.submax.nbytes) # d_ev_old: for hypersf calculation; self.d_ev_old = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.h_ev1.nbytes) # d_hypersf: store the dSigma^{mu}, vx, vy, veta, tau, x, y, eta # on freeze out hyper surface self.d_hypersf = cl.Buffer(self.ctx, mf.READ_WRITE, size=1500000 * self.cfg.sz_real8) # the position of the hyper surface in cartersian coordinates self.d_sf_txyz = cl.Buffer(self.ctx, mf.READ_WRITE, size=1500000 * self.cfg.sz_real4) h_num_of_sf = np.zeros(1, np.int32) self.d_num_of_sf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=h_num_of_sf) self.history = []
class CLIdeal(object): '''The pyopencl version for 3+1D ideal hydro dynamic simulation''' def __init__(self, configs, handcrafted_eos=None, gpu_id=0): '''Params: :param configs: hydrodynamic configurations, from configs import cfg :param gpu_id: use which gpu for the calculation if there are many per node ''' # create opencl environment self.cfg = configs self.cwd, cwf = os.path.split(__file__) # create the fPathOut directory if not exists path = self.cfg.fPathOut if not os.path.exists(path): os.makedirs(path) # choose proper real, real4, real8 sizes self.determine_float_size(self.cfg) from backend_opencl import OpenCLBackend self.backend = OpenCLBackend(self.cfg, gpu_id) self.ctx = self.backend.ctx self.queue = self.backend.default_queue self.size = self.cfg.NX * self.cfg.NY * self.cfg.NZ self.tau = self.cfg.real(self.cfg.TAU0) self.compile_options = self.__compile_options() # set eos, create eos table for interpolation # self.eos_table must be before __loadAndBuildCLPrg() to pass # table information to definitions if handcrafted_eos is None: self.eos = Eos(self.cfg.eos_type) else: self.eos = handcrafted_eos chemical_potential_on_hypersf(self.cfg.TFRZ, path, eos_type=self.cfg.eos_type) if handcrafted_eos is not None: self.eos_table = self.eos.create_table(self.ctx, self.compile_options) elif self.cfg.eos_type == 'lattice_pce165': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=100, ncol=1555) elif self.cfg.eos_type == 'lattice_pce150': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=100, ncol=1555) elif self.cfg.eos_type == 'hotqcd2014': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=100, ncol=1555) elif self.cfg.eos_type == 'lattice_wb': self.eos_table = self.eos.create_table(self.ctx, self.compile_options, nrow=4, ncol=1001) else: self.eos_table = self.eos.create_table(self.ctx, self.compile_options) self.efrz = self.eos.f_ed(self.cfg.TFRZ) # store 1D and 2d bulk info at each time step if self.cfg.save_to_hdf5: from bulkinfo_h5 import BulkInfo else: from bulkinfo import BulkInfo self.bulkinfo = BulkInfo(self.cfg, self.ctx, self.queue, self.eos_table, self.compile_options) self.__loadAndBuildCLPrg() #define buffer on device side, d_ev1 stores ed, vx, vy, vz mf = cl.mem_flags self.h_ev1 = np.zeros((self.size, 4), self.cfg.real) self.d_ev = [ cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1), cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1), cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1) ] self.d_Src = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.h_ev1) self.submax = np.empty(64, self.cfg.real) self.d_submax = cl.Buffer(self.ctx, cl.mem_flags.READ_WRITE, self.submax.nbytes) # d_ev_old: for hypersf calculation; self.d_ev_old = cl.Buffer(self.ctx, mf.READ_WRITE, size=self.h_ev1.nbytes) # d_hypersf: store the dSigma^{mu}, vx, vy, veta, tau, x, y, eta # on freeze out hyper surface self.d_hypersf = cl.Buffer(self.ctx, mf.READ_WRITE, size=1500000 * self.cfg.sz_real8) # the position of the hyper surface in cartersian coordinates self.d_sf_txyz = cl.Buffer(self.ctx, mf.READ_WRITE, size=1500000 * self.cfg.sz_real4) h_num_of_sf = np.zeros(1, np.int32) self.d_num_of_sf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=h_num_of_sf) self.history = [] def determine_float_size(self, cfg): cfg.sz_int = np.dtype('int32').itemsize #==sizeof(int) in c if cfg.use_float32 == True: cfg.real = np.float32 cfg.real4 = array.vec.float4 cfg.real8 = array.vec.float8 cfg.sz_real = np.dtype('float32').itemsize #==sizeof(float) in c cfg.sz_real4 = array.vec.float4.itemsize cfg.sz_real8 = array.vec.float8.itemsize else: cfg.real = np.float64 cfg.real4 = array.vec.double4 cfg.real8 = array.vec.double8 cfg.sz_real = np.dtype('float64').itemsize #==sizeof(double) in c cfg.sz_real4 = array.vec.double4.itemsize cfg.sz_real8 = array.vec.double8.itemsize def load_ini(self, dat): '''load initial condition stored in np array whose 4 columns are (Ed, vx, vy, vz) and num_of_rows = NX*NY*NZ''' print('start to load ini data') self.h_ev1 = dat.astype(self.cfg.real) cl.enqueue_copy(self.queue, self.d_ev[1], self.h_ev1).wait() print('end of loading ini data') def __compile_options(self): optlist = ['TAU0', 'DT', 'DX', 'DY', 'DZ', 'ETAOS_XMIN', 'ETAOS_YMIN', \ 'ETAOS_LEFT_SLOP', 'ETAOS_RIGHT_SLOP', 'LAM1'] gpu_defines = [ '-D %s=%sf' % (key, value) for (key, value) in list(self.cfg.__dict__.items()) if key in optlist ] gpu_defines.append('-D {key}={value}'.format(key='NX', value=self.cfg.NX)) gpu_defines.append('-D {key}={value}'.format(key='NY', value=self.cfg.NY)) gpu_defines.append('-D {key}={value}'.format(key='NZ', value=self.cfg.NZ)) gpu_defines.append('-D {key}={value}'.format( key='SIZE', value=self.cfg.NX * self.cfg.NY * self.cfg.NZ)) #local memory size along x,y,z direction with 4 boundary cells gpu_defines.append('-D {key}={value}'.format(key='BSZ', value=self.cfg.BSZ)) #determine float32 or double data type in *.cl file if self.cfg.use_float32: gpu_defines.append('-D USE_SINGLE_PRECISION') #choose EOS by ifdef in *.cl file if self.cfg.riemann_test: gpu_defines.append('-D RIEMANN_TEST') gpu_defines.append('-D EOS_TABLE') #set the include path for the header file gpu_defines.append('-I ' + os.path.join(self.cwd, 'kernel/')) return gpu_defines def __loadAndBuildCLPrg(self): #load and build *.cl programs with compile self.compile_options with open(os.path.join(self.cwd, 'kernel', 'kernel_ideal.cl'), 'r') as f: prg_src = f.read() self.kernel_ideal = cl.Program( self.ctx, prg_src).build(options=' '.join(self.compile_options)) with open(os.path.join(self.cwd, 'kernel', 'kernel_reduction.cl'), 'r') as f: src_maxEd = f.read() self.kernel_reduction = cl.Program( self.ctx, src_maxEd).build(options=' '.join(self.compile_options)) with open(os.path.join(self.cwd, 'kernel', 'kernel_jet_eloss.cl'), 'r') as f: src = f.read() self.kernel_jet_eloss = cl.Program( self.ctx, src).build(options=' '.join(self.compile_options)) hypersf_defines = list(self.compile_options) hypersf_defines.append('-D {key}={value}'.format( key='nxskip', value=self.cfg.nxskip)) hypersf_defines.append('-D {key}={value}'.format( key='nyskip', value=self.cfg.nyskip)) hypersf_defines.append('-D {key}={value}'.format( key='nzskip', value=self.cfg.nzskip)) hypersf_defines.append('-D {key}={value}f'.format(key='EFRZ', value=self.efrz)) with open(os.path.join(self.cwd, 'kernel', 'kernel_hypersf.cl'), 'r') as f: src_hypersf = f.read() self.kernel_hypersf = cl.Program( self.ctx, src_hypersf).build(options=' '.join(hypersf_defines)) @classmethod def roundUp(cls, value, multiple): '''This function rounds one integer up to the nearest multiple of another integer, to get the global work size (which are multiples of local work size) from NX, NY, NZ. ''' remainder = value % multiple if remainder != 0: value += multiple - remainder return value #@profile def stepUpdate(self, step, jet_eloss_src={ 'switch_on': False, 'start_pos_index': 0, 'direction': 0 }): ''' Do step update in kernel with KT algorithm Args: gpu_ev_old: self.d_ev[1] for the 1st step, self.d_ev[2] for the 2nd step step: the 1st or the 2nd step in runge-kutta ''' # upadte d_Src by KT time splitting, along=1,2,3 for 'x','y','z' # input: gpu_ev_old, tau, size, along_axis # output: self.d_Src NX, NY, NZ, BSZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ, self.cfg.BSZ self.kernel_ideal.kt_src_christoffel(self.queue, (NX * NY * NZ, ), None, self.d_Src, self.d_ev[step], self.eos_table, self.tau, np.int32(step)).wait() self.kernel_ideal.kt_src_alongx(self.queue, (BSZ, NY, NZ), (BSZ, 1, 1), self.d_Src, self.d_ev[step], self.eos_table, self.tau).wait() self.kernel_ideal.kt_src_alongy(self.queue, (NX, BSZ, NZ), (1, BSZ, 1), self.d_Src, self.d_ev[step], self.eos_table, self.tau).wait() self.kernel_ideal.kt_src_alongz(self.queue, (NX, NY, BSZ), (1, 1, BSZ), self.d_Src, self.d_ev[step], self.eos_table, self.tau).wait() # update src term with external jet_eloss_src if jet_eloss_src['switch_on'] == True: mf = cl.mem_flags jet_start_pos_index = jet_eloss_src['start_pos_index'] jet_start_angle = jet_eloss_src['direction'] self.kernel_jet_eloss.jet_eloss_src(self.queue, (NX, NY, NZ), None, self.d_Src, self.d_ev[step], self.tau, self.cfg.real(jet_start_angle), np.int32(jet_start_pos_index), self.eos_table).wait() # if step=1, T0m' = T0m + d_Src*dt, update d_ev[2] # if step=2, T0m = T0m + 0.5*dt*d_Src, update d_ev[1] # Notice that d_Src=f(t,x) at step1 and # d_Src=(f(t,x)+f(t+dt, x(t+dt))) at step2 # output: d_ev[] where need_update=2 for step 1 and 1 for step 2 self.kernel_ideal.update_ev(self.queue, (NX * NY * NZ, ), None, self.d_ev[3 - step], self.d_ev[1], self.d_Src, self.eos_table, self.tau, np.int32(step)).wait() def max_energy_density(self): '''Calc the maximum energy density on GPU and output the value ''' self.kernel_reduction.reduction_stage1(self.queue, (256 * 64, ), (256, ), self.d_ev[1], self.d_submax, np.int32(self.size)).wait() cl.enqueue_copy(self.queue, self.submax, self.d_submax).wait() return self.submax.max() def ev_to_host(self): '''copy energy density and fluid velocity from device to host''' cl.enqueue_copy(self.queue, self.h_ev1, self.d_ev[1]).wait() def get_hypersf(self, n, ntskip): '''get the freeze out hyper surface from d_ev_old and d_ev_new global_size=(NX//nxskip, NY//nyskip, NZ//nzskip} ''' is_finished = self.edmax < self.efrz if n == 0: cl.enqueue_copy(self.queue, self.d_ev_old, self.d_ev[1]).wait() self.tau_old = self.cfg.TAU0 elif (n % ntskip == 0) or is_finished: nx = (self.cfg.NX - 1) // self.cfg.nxskip + 1 ny = (self.cfg.NY - 1) // self.cfg.nyskip + 1 nz = (self.cfg.NZ - 1) // self.cfg.nzskip + 1 tau_new = self.tau # get dSigma, vx, vy, veta, etas on hypersf self.kernel_hypersf.get_hypersf(self.queue, (nx, ny, nz), None, self.d_hypersf, self.d_sf_txyz, self.d_num_of_sf, self.d_ev_old, self.d_ev[1], self.cfg.real(self.tau_old), self.cfg.real(tau_new)).wait() # update with current tau and d_ev[1] cl.enqueue_copy(self.queue, self.d_ev_old, self.d_ev[1]).wait() self.tau_old = tau_new return is_finished def save(self, save_hypersf=True, save_bulk=False, viscous_on=False): self.num_of_sf = np.zeros(1, dtype=np.int32) cl.enqueue_copy(self.queue, self.num_of_sf, self.d_num_of_sf).wait() # convert the single value array [num_of_sf] to num_of_sf. self.num_of_sf = np.squeeze(self.num_of_sf) print("num of sf=", self.num_of_sf) if save_hypersf: hypersf = np.empty(self.num_of_sf, dtype=self.cfg.real8) cl.enqueue_copy(self.queue, hypersf, self.d_hypersf).wait() out_path = os.path.join(self.cfg.fPathOut, 'hypersf.dat') np.savetxt( out_path, hypersf, fmt='%.6e', header= 'Tfrz=%.6e ; other rows: dS0, dS1, dS2, dS3, vx, vy, veta, etas' % self.cfg.TFRZ) sf_txyz = np.empty(self.num_of_sf, dtype=self.cfg.real4) cl.enqueue_copy(self.queue, sf_txyz, self.d_sf_txyz).wait() out_path = os.path.join(self.cfg.fPathOut, 'sf_txyz.dat') np.savetxt( out_path, sf_txyz, fmt='%.6e', header= '(t, x, y, z) the time-space coordinates of hypersf elements') if save_bulk: self.bulkinfo.save(viscous_on=viscous_on) def update_time(self, loop): '''update time with TAU0 and loop, convert its type to np.float32 or float64 which can be used directly as parameter in kernel functions''' self.tau = self.cfg.real(self.cfg.TAU0 + (loop + 1) * self.cfg.DT) def evolve(self, max_loops=2000, save_hypersf=True, save_bulk=True, plot_bulk=True, to_maxloop=False, jet_eloss_src={ 'switch_on': False, 'start_pos_index': 0, 'direction': 0.0 }): '''The main loop of hydrodynamic evolution ''' for n in range(max_loops): t0 = time() self.edmax = self.max_energy_density() self.history.append([self.tau, self.edmax]) print('tau=', self.tau, ' EdMax= ', self.edmax) is_finished = False if save_hypersf: is_finished = self.get_hypersf(n, self.cfg.ntskip) if is_finished and not to_maxloop: break if (save_bulk or plot_bulk) and n % self.cfg.ntskip == 0: self.bulkinfo.get(self.tau, self.d_ev[1], self.edmax) self.stepUpdate(step=1) # update tau=tau+dtau for the 2nd step in RungeKutta self.update_time(loop=n) self.stepUpdate(step=2, jet_eloss_src=jet_eloss_src) t1 = time() print('one step: {dtime}'.format(dtime=t1 - t0)) self.save(save_hypersf=save_hypersf, save_bulk=save_bulk)
from time import time from glob import glob import pyopencl as cl import matplotlib.pyplot as plt import h5py from scipy.interpolate import interp2d import os, sys cwd, cwf = os.path.split(__file__) print('cwd=', cwd) sys.path.append(os.path.join(cwd, '../pyvisc')) from eos.eos import Eos pce = Eos(1) def qgp_fraction(T): '''calc the QGP fraction from temperature''' frac = np.zeros_like(T) frac[T > 0.22] = 1.0 frac[T < 0.165] = 0.0 cross_over = np.logical_and(T >= 0.165, T <= 0.22) frac[cross_over] = (T[cross_over] - 0.165) / (0.22 - 0.165) return frac
def __init__(self, cfg, ctx, queue, eos_table, compile_options): self.cfg = cfg self.ctx = ctx self.queue = queue self.eos_table = eos_table self.compile_options = list(compile_options) NX, NY, NZ = cfg.NX, cfg.NY, cfg.NZ if NX % 2 == 1: self.x = np.linspace(-floor(NX / 2) * cfg.DX, floor(NX / 2) * cfg.DX, NX, endpoint=True) self.y = np.linspace(-floor(NY / 2) * cfg.DY, floor(NY / 2) * cfg.DY, NY, endpoint=True) self.z = np.linspace(-floor(NZ / 2) * cfg.DZ, floor(NZ / 2) * cfg.DZ, NZ, endpoint=True) #including grid point 0 elif NX % 2 == 0: self.x = np.linspace(-((NX - 1) / 2.0) * cfg.DX, ((NX - 1) / 2.0) * cfg.DX, NX, endpoint=True) self.y = np.linspace(-((NY - 1) / 2.0) * cfg.DY, ((NY - 1) / 2.0) * cfg.DY, NY, endpoint=True) self.z = np.linspace(-floor(NZ / 2) * cfg.DZ, floor(NZ / 2) * cfg.DZ, NZ, endpoint=True) #NOT including grid point 0 for trento2D self.h_ev = np.zeros((NX * NY * NZ, 4), cfg.real) self.a_ed = cl_array.empty(self.queue, NX * NY * NZ, cfg.real) self.a_entropy = cl_array.empty(self.queue, NX * NY * NZ, cfg.real) # the momentum eccentricity as a function of rapidity self.a_eccp1 = cl_array.empty(self.queue, NZ, cfg.real) self.a_eccp2 = cl_array.empty(self.queue, NZ, cfg.real) # store the data in hdf5 file #h5_path = os.path.join(cfg.fPathOut, 'bulkinfo.h5') #self.f_hdf5 = h5py.File(h5_path, 'w') self.eos = Eos(cfg.eos_type) self.__load_and_build_cl_prg() # time evolution for , edmax and ed, T at (x=0,y=0,etas=0) self.time = [] self.edmax = [] self.edcent = [] self.Tcent = [] # time evolution for total_entropy, eccp, eccx and <vr> self.energy = [] self.entropy = [] self.eccp_vs_tau = [] self.eccx = [] self.vr = [] # time evolution for bulk3D self.Tau_tijk = [] self.X_tijk = [] self.Y_tijk = [] self.Z_tijk = [] self.ED_tijk = [] self.Tp_tijk = [] # self.Frc_tijk = [] self.Vx_tijk = [] self.Vy_tijk = [] self.Vz_tijk = [] # time evolution for bulk2D self.Tau_2d = [] self.X_2d = [] self.Y_2d = [] self.ED_2d = [] self.Tp_2d = [] self.Vx_2d = [] self.Vy_2d = [] self.Vz_2d = [] self.Frc_2d = []
class BulkInfo(object): '''The bulk information like: ed(x), ed(y), ed(eta), T(x), T(y), T(eta) vx, vy, veta, ecc_x, ecc_p''' def __init__(self, cfg, ctx, queue, eos_table, compile_options): self.cfg = cfg self.ctx = ctx self.queue = queue self.eos_table = eos_table self.compile_options = list(compile_options) NX, NY, NZ = cfg.NX, cfg.NY, cfg.NZ if NX % 2 == 1: self.x = np.linspace(-floor(NX / 2) * cfg.DX, floor(NX / 2) * cfg.DX, NX, endpoint=True) self.y = np.linspace(-floor(NY / 2) * cfg.DY, floor(NY / 2) * cfg.DY, NY, endpoint=True) self.z = np.linspace(-floor(NZ / 2) * cfg.DZ, floor(NZ / 2) * cfg.DZ, NZ, endpoint=True) #including grid point 0 elif NX % 2 == 0: self.x = np.linspace(-((NX - 1) / 2.0) * cfg.DX, ((NX - 1) / 2.0) * cfg.DX, NX, endpoint=True) self.y = np.linspace(-((NY - 1) / 2.0) * cfg.DY, ((NY - 1) / 2.0) * cfg.DY, NY, endpoint=True) self.z = np.linspace(-floor(NZ / 2) * cfg.DZ, floor(NZ / 2) * cfg.DZ, NZ, endpoint=True) #NOT including grid point 0 for trento2D self.h_ev = np.zeros((NX * NY * NZ, 4), cfg.real) self.a_ed = cl_array.empty(self.queue, NX * NY * NZ, cfg.real) self.a_entropy = cl_array.empty(self.queue, NX * NY * NZ, cfg.real) # the momentum eccentricity as a function of rapidity self.a_eccp1 = cl_array.empty(self.queue, NZ, cfg.real) self.a_eccp2 = cl_array.empty(self.queue, NZ, cfg.real) # store the data in hdf5 file #h5_path = os.path.join(cfg.fPathOut, 'bulkinfo.h5') #self.f_hdf5 = h5py.File(h5_path, 'w') self.eos = Eos(cfg.eos_type) self.__load_and_build_cl_prg() # time evolution for , edmax and ed, T at (x=0,y=0,etas=0) self.time = [] self.edmax = [] self.edcent = [] self.Tcent = [] # time evolution for total_entropy, eccp, eccx and <vr> self.energy = [] self.entropy = [] self.eccp_vs_tau = [] self.eccx = [] self.vr = [] # time evolution for bulk3D self.Tau_tijk = [] self.X_tijk = [] self.Y_tijk = [] self.Z_tijk = [] self.ED_tijk = [] self.Tp_tijk = [] # self.Frc_tijk = [] self.Vx_tijk = [] self.Vy_tijk = [] self.Vz_tijk = [] # time evolution for bulk2D self.Tau_2d = [] self.X_2d = [] self.Y_2d = [] self.ED_2d = [] self.Tp_2d = [] self.Vx_2d = [] self.Vy_2d = [] self.Vz_2d = [] self.Frc_2d = [] def __load_and_build_cl_prg(self): with open(os.path.join(cwd, 'kernel', 'kernel_bulkinfo.cl')) as f: prg_src = f.read() self.kernel_bulk = cl.Program( self.ctx, prg_src).build(options=' '.join(self.compile_options)) #@profile def get(self, tau, d_ev, edmax, d_pi=None): ''' store the bulkinfo to hdf5 file ''' NX, NY, NZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ self.time.append(tau) self.edmax.append(edmax) cl.enqueue_copy(self.queue, self.h_ev, d_ev).wait() bulk = self.h_ev.reshape(NX, NY, NZ, 4) # tau=0.6 changes to tau='0p6' time_stamp = ('%s' % tau).replace('.', 'p') i0, j0, k0 = NX // 2, NY // 2, NZ // 2 exy = bulk[:, :, k0, 0] vx = bulk[:, :, k0, 1] vy = bulk[:, :, k0, 2] vz2d = bulk[:, :, k0, 3].flatten() exy2d = bulk[:, :, k0, 0].flatten() vx2d = bulk[:, :, k0, 1].flatten() vy2d = bulk[:, :, k0, 2].flatten() Tp2d = self.eos.f_T(exy2d) ed_ijk = bulk[:, :, :, 0].flatten() vx_ijk = bulk[:, :, :, 1].flatten() vy_ijk = bulk[:, :, :, 2].flatten() vz_ijk = bulk[:, :, :, 3].flatten() Tp_ijk = self.eos.f_T(ed_ijk) xline = self.x xline2d = np.repeat(xline, NY) self.X_2d.extend(xline2d) x_ijk = np.repeat(xline, NY * NZ) self.X_tijk.extend(x_ijk) yline = self.y y_ij = np.tile(yline, NX) yline2d = np.tile(yline, NX) self.Y_2d.extend(yline2d) y_ijk = np.repeat(y_ij, NZ) self.Y_tijk.extend(y_ijk) zline = self.z z_ijk = np.tile(zline, NX * NY) self.Z_tijk.extend(z_ijk) tau_ijk = np.repeat(tau, NX * NY * NZ) tau2d = np.repeat(tau, NX * NY) frac2d = np.repeat(0, NX * NY) self.Tau_tijk.extend(tau_ijk) self.ED_tijk.extend(ed_ijk) self.Tp_tijk.extend(Tp_ijk) self.Vx_tijk.extend(vx_ijk) self.Vy_tijk.extend(vy_ijk) self.Vz_tijk.extend(vz_ijk) self.Tau_2d.extend(tau2d) self.ED_2d.extend(exy2d) self.Tp_2d.extend(Tp2d) self.Vx_2d.extend(vx2d) self.Vy_2d.extend(vy2d) self.Vz_2d.extend(vz2d) self.Frc_2d.extend(frac2d) self.eccp_vs_tau.append(self.eccp(exy, vx, vy)[1]) self.vr.append(self.mean_vr(exy, vx, vy)) #self.get_total_energy_and_entropy_on_gpu(tau, d_ev) ed_cent = exy[i0, j0] self.edcent.append(ed_cent) self.Tcent.append(self.eos.f_T(ed_cent)) #ecc1, ecc2 = self.ecc_vs_rapidity(bulk) #ecc1, ecc2 = self.ecc_vs_rapidity_on_gpu(tau, d_ev) #self.f_hdf5.create_dataset('bulk1d/eccp1_tau%s'%time_stamp, data = ecc1) #self.f_hdf5.create_dataset('bulk1d/eccp2_tau%s'%time_stamp, data = ecc2) ## ed_x(y=0, z=0), ed_y(x=0, z=0), ed_z(x=0, y=0) #self.f_hdf5.create_dataset('bulk1d/ex_tau%s'%time_stamp, data = bulk[:, j0, k0, 0]) #self.f_hdf5.create_dataset('bulk1d/ey_tau%s'%time_stamp, data = bulk[i0, :, k0, 0]) #self.f_hdf5.create_dataset('bulk1d/ez_tau%s'%time_stamp, data = bulk[i0, j0, :, 0]) ## vx_x(y=0, z=0), vy_y(x=0, z=0), vz_z(x=0, y=0) #self.f_hdf5.create_dataset('bulk1d/vx_tau%s'%time_stamp, data = bulk[:, j0, k0, 1]) #self.f_hdf5.create_dataset('bulk1d/vy_tau%s'%time_stamp, data = bulk[i0, :, k0, 2]) #self.f_hdf5.create_dataset('bulk1d/vz_tau%s'%time_stamp, data = bulk[i0, j0, :, 3]) ## ed_xy(z=0), ed_xz(y=0), ed_yz(x=0) #self.f_hdf5.create_dataset('bulk2d/exy_tau%s'%time_stamp, data = bulk[:, :, k0, 0]) #self.f_hdf5.create_dataset('bulk2d/exz_tau%s'%time_stamp, data = bulk[:, j0, :, 0]) #self.f_hdf5.create_dataset('bulk2d/eyz_tau%s'%time_stamp, data = bulk[i0, :, :, 0]) ## vx_xy(z=0), vx_xz(y=0), vx_yz(x=0) #self.f_hdf5.create_dataset('bulk2d/vx_xy_tau%s'%time_stamp, data = bulk[:, :, k0, 1]) #self.f_hdf5.create_dataset('bulk2d/vx_xz_tau%s'%time_stamp, data = bulk[:, j0, :, 1]) ##self.f_hdf5.create_dataset('bulk2d/vx_yz_tau%s'%time_stamp, data = bulk[i0, :, :, 1]) ## vy_xy(z=0), vy_xz(y=0), vy_yz(x=0) #self.f_hdf5.create_dataset('bulk2d/vy_xy_tau%s'%time_stamp, data = bulk[:, :, k0, 2]) ##self.f_hdf5.create_dataset('bulk2d/vy_xz_tau%s'%time_stamp, data = bulk[:, j0, :, 2]) #self.f_hdf5.create_dataset('bulk2d/vy_yz_tau%s'%time_stamp, data = bulk[i0, :, :, 2]) ## vz_xy(z=0), vz_xz(y=0), vz_yz(x=0) #self.f_hdf5.create_dataset('bulk2d/vz_xy_tau%s'%time_stamp, data = bulk[:, :, k0, 3]) #self.f_hdf5.create_dataset('bulk2d/vz_xz_tau%s'%time_stamp, data = bulk[:, j0, :, 3]) ##self.f_hdf5.create_dataset('bulk2d/vz_yz_tau%s'%time_stamp, data = bulk[i0, :, :, 3]) def eccp(self, ed, vx, vy, vz=0.0): ''' eccx = <y*y-x*x>/<y*y+x*x> where <> are averaged eccp = <Txx-Tyy>/<Txx+Tyy> ''' ed[ed < 1.0E-10] = 1.0E-10 pre = self.eos.f_P(ed) vr2 = vx * vx + vy * vy + vz * vz vr2[vr2 > 1.0] = 0.999999 u0 = 1.0 / np.sqrt(1.0 - vr2) Tyy = (ed + pre) * u0 * u0 * vy * vy + pre Txx = (ed + pre) * u0 * u0 * vx * vx + pre T0x = (ed + pre) * u0 * u0 * vx v2 = (Txx - Tyy).sum() / (Txx + Tyy).sum() v1 = T0x.sum() / (Txx + Tyy).sum() return v1, v2 def mean_vr(self, ed, vx, vy, vz=0.0): ''' <vr> = <gamma * ed * sqrt(vx*vx + vy*vy)>/<gamma*ed> where <> are averaged over whole transverse plane''' ed[ed < 1.0E-10] = 1.0E-10 vr2 = vx * vx + vy * vy + vz * vz vr2[vr2 > 1.0] = 0.999999 u0 = 1.0 / np.sqrt(1.0 - vr2) vr = (u0 * ed * np.sqrt(vx * vx + vy * vy)).sum() / (u0 * ed).sum() return vr def total_entropy(self, tau, ed, vx, vy, vz=0.0): '''get the total entropy (at mid rapidity ) as a function of time''' ed[ed < 1.0E-10] = 1.0E-10 vr2 = vx * vx + vy * vy + vz * vz vr2[vr2 > 1.0] = 0.999999 u0 = 1.0 / np.sqrt(1.0 - vr2) return (u0 * self.eos.f_S(ed)).sum() * tau * self.cfg.DX * self.cfg.DY def get_total_energy_and_entropy_on_gpu(self, tau, d_ev): NX, NY, NZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ self.kernel_bulk.total_energy_and_entropy(self.queue, (NX, NY, NZ), None, self.a_ed.data, self.a_entropy.data, d_ev, self.eos_table, np.float32(tau)).wait() volum = tau * self.cfg.DX * self.cfg.DY * self.cfg.DZ e_total = cl_array.sum(self.a_ed).get() * volum s_total = cl_array.sum(self.a_entropy).get() * volum self.energy.append(e_total) self.entropy.append(s_total) def ecc_vs_rapidity(self, bulk): ''' bulk = self.h_ev.reshape(NX, NY, NZ, 4)''' NX, NY, NZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ ecc1 = np.empty(NZ) ecc2 = np.empty(NZ) for k in range(NZ): ed = bulk[:, :, k, 0] vx = bulk[:, :, k, 1] vy = bulk[:, :, k, 2] vz = bulk[:, :, k, 3] ecc1[k], ecc2[k] = self.eccp(ed, vx, vy, vz) return ecc1, ecc2 def ecc_vs_rapidity_on_gpu(self, tau, d_ev): NX, NY, NZ = self.cfg.NX, self.cfg.NY, self.cfg.NZ self.kernel_bulk.eccp_vs_rapidity(self.queue, (NZ * 256, ), (256, ), self.a_eccp1.data, self.a_eccp2.data, d_ev, self.eos_table, np.float32(tau)).wait() return self.a_eccp1.get(), self.a_eccp2.get() def save(self, viscous_on=False): # use absolute path incase call bulkinfo.save() from other directory path_out = os.path.abspath(self.cfg.fPathOut) np.savetxt(path_out + '/avg.dat', np.array( list( zip(self.time, self.eccp_vs_tau, self.edcent, self.entropy, self.energy, self.vr))), header='tau, eccp, ed(0,0,0), stotal, Etotal, <vr>') #self.f_hdf5.create_dataset('coord/tau', data = self.time) #self.f_hdf5.create_dataset('coord/x', data = self.x) #self.f_hdf5.create_dataset('coord/y', data = self.y) #self.f_hdf5.create_dataset('coord/etas', data = self.z) #self.f_hdf5.create_dataset('avg/eccp', data = np.array(self.eccp_vs_tau)) #self.f_hdf5.create_dataset('avg/edcent', data = np.array(self.edcent)) #self.f_hdf5.create_dataset('avg/Tcent', data = self.eos.f_T(np.array(self.edcent))) #self.f_hdf5.create_dataset('avg/entropy', data = np.array(self.entropy)) #self.f_hdf5.create_dataset('avg/energy', data = np.array(self.energy)) #self.f_hdf5.create_dataset('avg/vr', data = np.array(self.vr)) #self.f_hdf5.close() #np.savetxt(path_out + '/bulk3D.dat', \ #np.array(zip(self.Tau_tijk, self.X_tijk, self.Y_tijk, self.Z_tijk, \ #self.ED_tijk, self.Tp_tijk, self.Vx_tijk, self.Vy_tijk, self.Vz_tijk)), \ #fmt='%.2f %.2f %.2f %.2f %.8e %.8e %.8e %.8e %.8e',header = 'tau x y z Ed T vx vy veta') np.savetxt(path_out + '/bulk2D.dat', \ np.array(zip(self.Tau_2d, self.X_2d, self.Y_2d, \ self.ED_2d, self.Tp_2d, self.Vx_2d, self.Vy_2d, self.Vz_2d , self.Frc_2d)), \ fmt='%.2f %.2f %.2f %.8e %.8e %.8e %.8e %.8e %.1f',header = 'tau x y Ed T vx vy veta frc')
def __init__(self, path): data_path = path print('Loading data file,please wait for a minuts!') datafile = os.path.join(data_path, 'bulk3D.dat') self.data_t = np.loadtxt(datafile) print('Data file loading complete!') self.NX0 = 70 self.NY0 = 70 self.NZ0 = 41 self.TAU0 = 0.6 self.DX0 = 0.3 self.DY0 = 0.3 self.DZ0 = 0.3 self.DT = 0.3 self.NT = self.data_t.shape[0] // (self.NX0 * self.NY0 * self.NZ0) print("steps of Time is %i" % self.NT) self.NX = self.NX0 self.NY = self.NY0 self.NZ = self.NZ0 self.DX = self.DX0 self.DY = self.DY0 self.DZ = self.DZ0 # Switchs self.Dim_Switch = 3 self.Grids_Switch = False self.sEd = True self.sT = False self.sVt = True self.sVz = True self.sFrac = False self.IEOS = 1 self.eos = Eos(self.IEOS) self.OutPutPath = None # self.Ed_txyz = self.data_t[:,0].reshape(self.NT,self.NX0,self.NY0,self.NZ0) # self.Vx_txyz = self.data_t[:,1].reshape(self.NT,self.NX0,self.NY0,self.NZ0) # self.Vy_txyz = self.data_t[:,2].reshape(self.NT,self.NX0,self.NY0,self.NZ0) # self.Vz_txyz = self.data_t[:,3].reshape(self.NT,self.NX0,self.NY0,self.NZ0) self.Block_txyz = np.zeros(self.NT * self.NX0 * self.NY0 * self.NZ0 * 4).reshape(self.NT, self.NX0, self.NY0, self.NZ0, 4) self.Block_txyz[:, :, :, :, 0] = self.data_t[:, 0].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Ed self.Block_txyz[:, :, :, :, 1] = self.data_t[:, 1].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Vx self.Block_txyz[:, :, :, :, 2] = self.data_t[:, 2].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Vy self.Block_txyz[:, :, :, :, 3] = self.data_t[:, 3].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Vz # self.OutPut_col_shape = [] self.Ed_newGrids = [] self.T_newGrids = [] self.Frac_newGrids = [] self.Vt_newGrids = [] self.Vz_newGrids = [] self.Grids = [] self.Hotel = [] self.todo = []
class HPP(object): def __init__(self, path): data_path = path print('Loading data file,please wait for a minuts!') datafile = os.path.join(data_path, 'bulk3D.dat') self.data_t = np.loadtxt(datafile) print('Data file loading complete!') self.NX0 = 70 self.NY0 = 70 self.NZ0 = 41 self.TAU0 = 0.6 self.DX0 = 0.3 self.DY0 = 0.3 self.DZ0 = 0.3 self.DT = 0.3 self.NT = self.data_t.shape[0] // (self.NX0 * self.NY0 * self.NZ0) print("steps of Time is %i" % self.NT) self.NX = self.NX0 self.NY = self.NY0 self.NZ = self.NZ0 self.DX = self.DX0 self.DY = self.DY0 self.DZ = self.DZ0 # Switchs self.Dim_Switch = 3 self.Grids_Switch = False self.sEd = True self.sT = False self.sVt = True self.sVz = True self.sFrac = False self.IEOS = 1 self.eos = Eos(self.IEOS) self.OutPutPath = None # self.Ed_txyz = self.data_t[:,0].reshape(self.NT,self.NX0,self.NY0,self.NZ0) # self.Vx_txyz = self.data_t[:,1].reshape(self.NT,self.NX0,self.NY0,self.NZ0) # self.Vy_txyz = self.data_t[:,2].reshape(self.NT,self.NX0,self.NY0,self.NZ0) # self.Vz_txyz = self.data_t[:,3].reshape(self.NT,self.NX0,self.NY0,self.NZ0) self.Block_txyz = np.zeros(self.NT * self.NX0 * self.NY0 * self.NZ0 * 4).reshape(self.NT, self.NX0, self.NY0, self.NZ0, 4) self.Block_txyz[:, :, :, :, 0] = self.data_t[:, 0].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Ed self.Block_txyz[:, :, :, :, 1] = self.data_t[:, 1].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Vx self.Block_txyz[:, :, :, :, 2] = self.data_t[:, 2].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Vy self.Block_txyz[:, :, :, :, 3] = self.data_t[:, 3].reshape(self.NT, self.NX0, self.NY0, self.NZ0) # Vz # self.OutPut_col_shape = [] self.Ed_newGrids = [] self.T_newGrids = [] self.Frac_newGrids = [] self.Vt_newGrids = [] self.Vz_newGrids = [] self.Grids = [] self.Hotel = [] self.todo = [] # load customized grids def FormatCFG(self, NX=200, NY=200, NZ=200, DeltX=0.3, DeltY=0.3, DeltZ=0.3, Dim_Switch=3, Grids_Switch=False, sEd=True, sT=False, sFrac=False, sV=True, Outputpath=None): self.NX = NX self.NY = NY self.NZ = NZ self.DX = DeltX self.DY = DeltY self.DZ = DeltZ self.Dim_Switch = Dim_Switch self.Grids_Switch = Grids_Switch self.sEd = sEd self.sT = sT self.sFrac = sFrac self.sVt = sV self.sVz = sV self.OutPutPath = Outputpath if Dim_Switch == 3: # self.OutPut_col_shape = np.zeros(self.NT*NX*NY*NZ) # self.Vt_newGrids = np.zeros((self.NT*NX*NY*NZ,2)) self.Hotel = np.zeros((self.NT * self.NX * self.NY * self.NZ, 10)) # if Grids_Switch: # self.Grids = np.zeros((self.NT*NX*NY*NZ,4)) elif Dim_Switch == 2: self.sVz = False # self.OutPut_col_shape = np.zeros(self.NT*NX*NY) # self.Vt_newGrids = np.zeros((self.NT*NX*NY,2)) self.Hotel = np.zeros((self.NT * self.NX * self.NY, 10)) # if Grids_Switch: # self.Grids = np.zeros((self.NT*NX*NY,3)) # give the final time def Finaltimestep(self): return self.NT, self.TAU0, self.DT # give the fraction of QGP and Hadron def Frac(self, Temp): if Temp > 0.22: frac = 1.0 elif Temp < 0.18: frac = 0.0 else: frac = (Temp - 0.18) / (0.22 - 0.18) return frac # change 3D to 2D directly def change3Dto2D(self): OutPut2D = self.Block_txyz[:, :, :, self.NZ0 // 2, 0:3].reshape(self.NT * self.NX0 * self.NY0, 3) # np.savetxt(filepath+'new_bulk2D.dat',OutPut2D,\ # header = 'ED,T,frac,VX,VY'+' NT=%i'%self.NT+' NX=%i'%self.NX0+' NY=%i'%self.NY0) return OutPut2D # return the hydro imformation of the input location def loc(self, t=0.6, x=0, y=0, z=0): # (x,y,z)should loacted in the range of (xmin,xmax)&(ymin,ymax)and so on # This peculiar part is because the trento's grid cannot get the 0 points at the transverse plane X_min = -self.DX * (self.NX0 - 1) / 2.0 X_max = -X_min Y_min = -self.DY * (self.NY0 - 1) / 2.0 Y_max = -Y_min Z_min = -(self.NZ0 // 2) * self.DZ Z_max = -Z_min if x > X_max or x < X_min or y > Y_max or y < Y_min or z > Z_max or z < Z_min: return np.zeros(4) else: L_NX_L = int((x - X_min) / self.DX) L_NY_L = int((y - Y_min) / self.DY) L_NZ_L = int((z - Z_min) / self.DZ) L_NT_L = int((t - self.TAU0) / self.DT) rt = abs((t - self.TAU0) / self.DT - L_NT_L) if rt < 1e-6: L_NT = L_NT_L elif (rt - 1) < 1e-6: L_NT = L_NT_L + 1 # bi mian ge dian shang de quzhi dao zhi fushu rx = abs((x - X_min) / self.DX - L_NX_L) ry = abs((y - Y_min) / self.DY - L_NY_L) rz = abs((z - Z_min) / self.DZ - L_NZ_L) return self.Int3D(rx, ry, rz, L_NX_L, L_NY_L, L_NZ_L, L_NT) # 2D # (x,y,z)should loacted in the range of (xmin,xmax)&(ymin,ymax)and so on # This peculiar part is because the trento's grid cannot get the 0 points at the transverse plane def loc2D(self, t=0.6, x=0, y=0): X_min = -self.DX * (self.NX0 - 1) / 2.0 X_max = -X_min Y_min = -self.DY * (self.NY0 - 1) / 2.0 Y_max = -Y_min if x > X_max or x < X_min or y > Y_max or y < Y_min: return np.zeros(3) else: L_NX_L = int((x - X_min) / self.DX) L_NY_L = int((y - Y_min) / self.DY) L_NT_L = int((t - self.TAU0) / self.DT) rt = abs((t - self.TAU0) / self.DT - L_NT_L) if rt < 1e-6: L_NT = L_NT_L elif (rt - 1) < 1e-6: L_NT = L_NT_L + 1 # bi mian ge dian shang de quzhi dao zhi fushu rx = abs((x - X_min) / self.DX - L_NX_L) y = abs((y - Y_min) / self.DY - L_NY_L) return self.Int2D(rx, ry, L_NX_L, L_NY_L, L_NT) # make 3D chazhi for different observer def Int3D(self, rx, ry, rz, L_NX_L, L_NY_L, L_NZ_L, L_NT): int100 = self.Block_txyz[L_NT, L_NX_L, L_NY_L, L_NZ_L, :] * ( 1 - rx) + self.Block_txyz[L_NT, L_NX_L + 1, L_NY_L, L_NZ_L, :] * rx int101 = self.Block_txyz[L_NT, L_NX_L, L_NY_L, L_NZ_L + 1, :] * ( 1 - rx) + self.Block_txyz[L_NT, L_NX_L + 1, L_NY_L, L_NZ_L + 1, :] * rx int110 = self.Block_txyz[L_NT, L_NX_L, L_NY_L + 1, L_NZ_L, :] * ( 1 - rx) + self.Block_txyz[L_NT, L_NX_L + 1, L_NY_L + 1, L_NZ_L, :] * rx int111 = self.Block_txyz[L_NT, L_NX_L, L_NY_L + 1, L_NZ_L + 1, :] * ( 1 - rx) + self.Block_txyz[L_NT, L_NX_L + 1, L_NY_L + 1, L_NZ_L + 1, :] * rx intA = int101 * rz + int100 * (1 - rz) intB = int111 * rz + int110 * (1 - rz) intF = intB * ry + intA * (1 - ry) return intF # intF[0]=Ed , intF[1]=Vx ........ def Int2D(self, rx, ry, rz, L_NX_L, L_NY_L, L_NT): int10 = self.Block_txyz[L_NT, L_NX_L, L_NY_L, self.NZ0 // 2, :] * ( 1 - rx) + self.Block_txyz[L_NT, L_NX_L + 1, L_NY_L, self.NZ0 // 2, :] int11 = self.Block_txyz[L_NT, L_NX_L, L_NY_L + 1, self.NZ0 // 2, :] * ( 1 - rx) + self.Block_txyz[L_NT, L_NX_L + 1, L_NY_L + 1, self.NZ0 // 2, :] intF2D = int11 * ry + int10 * (1 - ry) return intF2D[0:3] # save data file with required format def save( self ): #, sGrids = False, sEd = True, sT = False, sFrac = False, sVt = True, sVz = True): m = 0 qmark = np.zeros(10, bool) sGrids_t = False sGrids_z = False if self.Grids_Switch: sGrids_t = True sGrids_z = True if self.Dim_Switch == 2: sGrids_z = False self.sVz = False for quant in (sGrids_t, sGrids_t, sGrids_t, sGrids_z, self.sEd, self.sT, self.sFrac, self.sVt, self.sVt, self.sVz): #(t,x,y,z,ed,T,frac,vx,vy,vz) qmark[m] = quant m += 1 if self.Grids_Switch: if self.NX % 2 == 1: xline = np.linspace(-np.floor(self.NX / 2) * self.DX, np.floor(self.NX / 2) * self.DX, self.NX, endpoint=True) yline = np.linspace(-np.floor(self.NY / 2) * self.DY, np.floor(self.NY / 2) * self.DY, self.NY, endpoint=True) elif self.NX % 2 == 0: xline = np.linspace(-((self.NX - 1) / 2.0) * self.DX, ((self.NX - 1) / 2.0) * self.DX, self.NX, endpoint=True) yline = np.linspace(-((self.NY - 1) / 2.0) * self.DY, ((self.NY - 1) / 2.0) * self.DY, self.NY, endpoint=True) tau = np.linspace(self.TAU0, self.TAU0 + (self.NT - 1) * self.DT, self.NT, endpoint=True) print(tau.shape) x_t = np.tile(xline, self.NT) y_tx = np.tile(yline, self.NT * self.NX) if self.Dim_Switch == 2: self.Hotel[:, 0] = np.repeat(tau, self.NX * self.NY) self.Hotel[:, 1] = np.repeat(x_t, self.NY) self.Hotel[:, 2] = y_tx if self.Dim_Switch == 3: zline = np.linspace(-np.floor(self.NZ / 2) * self.DZ, np.floor(self.NZ / 2) * self.DZ, self.NZ, endpoint=True) blocksize = self.NX * self.NY * self.NZ * self.NT self.Hotel[:, 0] = np.repeat(tau, blocksize / self.NT) self.Hotel[:, 1] = np.repeat(x_t, self.NY * self.NZ) self.Hotel[:, 2] = np.repeat(y_tx, self.NZ) self.Hotel[:, 3] = np.tile(zline, blocksize / self.NZ) if self.sEd: self.Hotel[:, 4] = self.todo[:, 0] if self.sT: self.Hotel[:, 5] = self.eos.f_T(self.Hotel[:, 4]) #T if self.sFrac: self.Hotel[:, 6] = np.array(map(self.Frac, self.Hotel[:, 5])) #Frac if self.sVt: self.Hotel[:, 7] = self.todo[:, 1] self.Hotel[:, 8] = self.todo[:, 2] if self.sVz: self.Hotel[:, 9] = self.todo[:, 3] OutPutData = self.Hotel[:, qmark] os.chdir(self.OutPutPath) if Dim_Switch == 3: np.savetxt('new_bulk3D.dat',OutPutData,\ header = 'ED,VX,VY,VEta'+' NT=%i '%self.NT+' NX=%i'%self.NX+' NY=%i'%self.NY+' NZ=%i'%self.NZ) print(OutPutData.shape) print("new_bulk3D.dat Finished") elif Dim_Switch == 2: np.savetxt('new_bulk2D.dat',OutPutData,\ header = 'ED,VX,VY'+' NT=%i'%self.NT+' NX=%i'%self.NX+' NY=%i'%self.NY) print("new_bulk2D.dat Finished")
def test_rootfinding(self): cwd, cwf = os.path.split(__file__) kernel_src = """ #include "helper.h" __kernel void rootfinding_test( global real4 * d_edv, global real * result, const int size, read_only image2d_t eos_table) { int gid = (int) get_global_id(0); if ( gid < size ) { real4 edv = d_edv[gid]; real eps = edv.s0; real pre = P(eps, eos_table); real4 umu = (real4)(1.0f, edv.s1, edv.s2, edv.s3); real u0 = gamma(umu.s1, umu.s2, umu.s3); umu = u0*umu; real4 T0m = (eps+pre)*u0*umu - pre*gm[0]; real M = sqrt(T0m.s1*T0m.s1 + T0m.s2*T0m.s2 + T0m.s3*T0m.s3); real T00 = T0m.s0; real ed_found; rootFinding(&ed_found, T00, M, eos_table); result[gid] = ed_found; } } """ compile_options = ['-I %s' % os.path.join(cwd, '..', 'kernel')] compile_options.append('-D USE_SINGLE_PRECISION') compile_options.append('-D EOSI') eos_table = Eos(cfg).create_table(self.ctx, compile_options) prg = cl.Program(self.ctx, kernel_src).build(compile_options) size = 205 * 205 * 85 edv = np.empty((size, 4), cfg.real) edv[:, 0] = np.random.uniform(1.0, 10.0, size) v_mag = np.random.uniform(0.0, 0.999, size) theta = np.random.uniform(0.0, np.pi, size) phi = np.random.uniform(-np.pi, np.pi, size) edv[:, 1] = v_mag * np.cos(theta) * np.cos(phi) edv[:, 2] = v_mag * np.cos(theta) * np.sin(phi) edv[:, 3] = v_mag * np.sin(theta) final = np.empty(size).astype(np.float32) mf = cl.mem_flags final_gpu = cl.Buffer(self.ctx, mf.READ_WRITE, final.nbytes) edv_gpu = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=edv) prg.rootfinding_test(self.queue, (size, ), None, edv_gpu, final_gpu, np.int32(size), eos_table) cl.enqueue_copy(self.queue, final, final_gpu).wait() np.testing.assert_almost_equal(final, edv[:, 0], 2) print('rootfinding test pass')