def minimum(ary, backend=None): if backend is None: backend = ary.backend if backend == 'cython': return ary.dev.min() elif backend == 'opencl': import pyopencl.array as gpuarray return gpuarray.min(ary.dev).get() elif backend == 'cuda': import pycuda.gpuarray as gpuarray return gpuarray.min(ary.dev).get()
def ndt_min(params, G, ctop): sh = G.shapes global knl_ndt if knl_ndt is None: # Note that ghost zones are already added where necessary! # ndt is kept bulk only as this is all that's needed/makes sense code = """ ndt[i,j,k] = 1 / ( 1/(cour * dx[1] / ctop[1,i+ng,j+ng,k+ng]) + 1/(cour * dx[2] / ctop[2,i+ng,j+ng,k+ng]) + 1/(cour * dx[3] / ctop[3,i+ng,j+ng,k+ng]) ) """ knl_ndt = lp.make_kernel(sh.isl_grid_scalar, code, [*vecArrayArgs("ctop"), ...]) knl_ndt = lp.fix_parameters(knl_ndt, cour=params['cour']) knl_ndt = lp.fix_parameters(knl_ndt, ndim=4) knl_ndt = tune_grid_kernel(knl_ndt, sh.bulk_scalar, ng=G.NG) print("Compiled ndt min") global ndt if ndt is None: ndt = cl_array.zeros(params['queue'], sh.bulk_scalar, dtype=np.float64) # TODO if debug print/record argmin? evt, _ = knl_ndt(params['queue'], ctop=ctop, dx=G.dx_d, ndt=ndt) # TODO manual reduce this? Loopy doesn't like reductions... return cl_array.min(ndt)
def minimum_cl(a, b=None): """ Minimum values of two GPUArrays. Parameters ---------- a : gpuarray First GPUArray. b : gpuarray Second GPUArray. Returns ------- gpuarray Minimum values from both GPArrays, or single value if one GPUarray. Examples -------- >>> a = minimum_cl(give_cl(queue, [1, 2, 3]), give_cl(queue, [3, 2, 1])) [1, 2, 1] >>> type(a) <class 'pyopencl.array.Array'> """ if b is not None: return cl_array.minimum(a, b) return cl_array.min(a)
def min(t: Tensor) -> np.float32: """The minimum of the values in a tensor.""" if t.gpu: return clarray.min(t._data).get().flat[0] return np.min(t._data)
def min(*args, **kwargs): a = args[0] if a.ndim==0 or not 'axis' in kwargs.keys(): res = clarray.min(a, queue=queue) #np.sum(*args, **kwargs) if not isinstance(res, myclArray): res.__class__ = myclArray res.reinit() return res else: kwargs['prg2load'] = programs.min return _sum(*args, **kwargs)
def _get_min_max(self): # as amax is too slow for bug arrays, do it on the gpu if self.dataModel: try: im = self.renderer.dataImg tmp_buf = OCLArray.empty(im.shape, im.dtype) tmp_buf.copy_image(im) mi = float(cl_array.min(tmp_buf).get()) ma = float(cl_array.max(tmp_buf).get()) except Exception as e: print(e) mi = np.amin(self.dataModel[0]) ma = np.amax(self.dataModel[0]) return mi, ma
def U_to_P(params, G, U, P, Pout=None, iter_max=None): s = G.slices sh = G.shapes # Set some default parameters, but allow overrides if iter_max is None: if 'invert_iter_max' in params: iter_max = params['invert_iter_max'] else: iter_max = 8 if 'invert_err_tol' in params: err_tol = params['invert_err_tol'] else: err_tol = 1.e-8 if 'invert_iter_delta' in params: delta = params['invert_iter_delta'] else: delta = 1.e-5 if 'gamma_max' not in params: params['gamma_max'] = 25 # Don't overwrite old memory by default, just allow using old values # Caller can pass new/old memory but is responsible that it contain logical values if we don't update it if Pout is None: Pout = P.copy() # Update the primitive B-fields G.vecdivbygeom(params['queue'], u=U[s.B3VEC], g=G.gdet_d[Loci.CENT.value], out=Pout[s.B3VEC]) # Cached constant quantities global ncov, ncon, lgdet if ncov is None: # For later on ncov = cl_array.zeros(params['queue'], sh.grid_vector, dtype=np.float64) G.timesgeom(params['queue'], u=cl_array.empty_like(ncov[0]).fill(1.0), g=-G.lapse_d[Loci.CENT.value], out=ncov[0]) ncon = G.raise_grid(ncov) lgdet = G.lapse_d[Loci.CENT.value] / G.gdet_d[Loci.CENT.value] # Eflag will indicate inversion failures # Define a generic kernel so we can split out flagging in the future # Use it to catch negative density early on eflag = cl_array.zeros(params['queue'], sh.grid_scalar, dtype=np.int32) global knl_set_eflag if knl_set_eflag is None: code = add_ghosts( """eflag[i,j,k] = if(var[i,j,k] < 0, flag, eflag[i,j,k])""") knl_set_eflag = lp.make_kernel( sh.isl_grid_scalar, code, [ *scalarArrayArgs("eflag", dtype=np.int32), *scalarArrayArgs("var"), lp.ValueArg("flag", dtype=np.int32), ... ], default_offset=lp.auto) knl_set_eflag = tune_grid_kernel(knl_set_eflag, sh.bulk_scalar, ng=G.NG) evt, _ = knl_set_eflag(params['queue'], var=U[s.RHO], eflag=eflag, flag=-100) # Convert from conserved variables to four-vectors Bcon = cl_array.zeros(params['queue'], sh.grid_vector, dtype=np.float64) G.vectimesgeom(params['queue'], u=U[s.B3VEC], g=lgdet, out=Bcon[1:]) Qcov = cl_array.empty_like(Bcon) G.timesgeom(params['queue'], u=(U[s.UU] - U[s.RHO]), g=lgdet, out=Qcov[0]) G.vectimesgeom(params['queue'], u=U[s.U3VEC], g=lgdet, out=Qcov[1:]) Bcov = G.lower_grid(Bcon) Qcon = G.raise_grid(Qcov) # This will have fringes of zeros still! Bsq = G.dot(Bcon, Bcov) QdB = G.dot(Bcon, Qcov) Qdotn = G.dot(Qcon, ncov) Qsq = G.dot(Qcon, Qcov) Qtsq = Qsq + Qdotn**2 Qtcon = cl_array.empty_like(Qcon) for i in range(4): Qtcon[i] = Qcon[i] + ncon[i] * Qdotn # Set up eqn for W', the energy density D = cl_array.zeros_like(Qsq) G.timesgeom(params['queue'], u=U[s.RHO], g=lgdet, out=D) Ep = -Qdotn - D del Bcov, Qcon, Qcov # Numerical rootfinding # Take guesses from primitives Wp = Wp_func(params, G, P, Loci.CENT, eflag) # Trap on any failures so far if debugging. They're very rare. if 'debug' in params and params['debug']: if np.any(eflag.get()[s.bulk] != 0): raise ValueError("Unexpected flag set!") # Step around the guess & evaluate errors h = delta * Wp # TODO stable enough? Need fancy subtraction from iharm3d? errp = err_eqn(params, G, Bsq, D, Ep, QdB, Qtsq, Wp + h, eflag) err = err_eqn(params, G, Bsq, D, Ep, QdB, Qtsq, Wp, eflag) errm = err_eqn(params, G, Bsq, D, Ep, QdB, Qtsq, Wp - h, eflag) # Preserve Wp/err before updating them below Wp1 = Wp.copy() err1 = err.copy() global knl_utop_prep if knl_utop_prep is None: # TODO keep an accumulator here to avoid that costly any() call? code = add_ghosts(""" # TODO put error/prep in here, not calling below # Attempt a Halley/Muller/Bailey/Press step dedW := (errp[i,j,k] - errm[i,j,k]) / (2 * h[i,j,k]) dedW2 := (errp[i,j,k] - 2.*err[i,j,k] + errm[i,j,k]) / (h[i,j,k]**2) # Limit size of 2nd derivative correction # Loopy trick common in HARM: define intermediate variables (xt, xt2, xt3...) to impose clipping # This allows assignments or substitutions while keeping dependencies straight ft := 0.5*err[i,j,k]*dedW2/(dedW**2) ft2 := if(ft > 0.3, 0.3, ft) f := if(ft2 < -0.3, -0.3, ft2) # Limit size of step dWt := -err[i,j,k] / dedW / (1. - f) dWt2 := if(dWt < -0.5*Wp[i,j,k], -0.5*Wp[i,j,k], dWt) dW := if(dWt2 > 2.0*Wp[i,j,k], 2.0*Wp[i,j,k], dWt2) Wp[i,j,k] = Wp[i,j,k] + dW {id=wp} # Guarantee we take one step in every bulk zone stop_flag[i,j,k] = 0 # This would avoid the step where there's convergence, but would require taking 2*dW above or similar #stop_flag[i,j,k] = (fabs(dW / Wp[i,j,k]) < err_tol) + \ # (fabs(err[i,j,k] / Wp[i,j,k]) < err_tol) {dep=wp,nosync=wp} """) knl_utop_prep = lp.make_kernel( sh.isl_grid_scalar, code, [ *scalarArrayArgs("err", "errp", "errm", "h", "Wp"), *scalarArrayArgs("stop_flag", dtype=np.int8), ... ], assumptions=sh.assume_grid, seq_dependencies=True) knl_utop_prep = lp.fix_parameters(knl_utop_prep, err_tol=err_tol) knl_utop_prep = tune_grid_kernel(knl_utop_prep, sh.bulk_scalar, ng=G.NG) print("Compiled utop_prep") # Fill stop_flag with 1 so we don't have to worry about ghost zones taking steps stop_flag = cl_array.empty(params['queue'], sh.grid_scalar, dtype=np.int8).fill(1) evt, _ = knl_utop_prep(params['queue'], err=err, errp=errp, errm=errm, h=h, Wp=Wp, stop_flag=stop_flag) evt.wait() err_eqn(params, G, Bsq, D, Ep, QdB, Qtsq, Wp, eflag, out=err) # Iteration kernel for 1Dw solver global knl_utop_iter if knl_utop_iter is None: code = add_ghosts(""" # Evaluate whether we need to do any of this <> go = not(stop_flag[i,j,k]) {id=insn_go} # Normal secant increment is dW. Limit guess to between 0.5 and 2 times current value dWt := (Wp1[i,j,k] - Wp[i,j,k]) * err[i,j,k] / (err[i,j,k] - err1[i,j,k]) dWt2 := if(dWt < -0.5*Wp[i,j,k], -0.5*Wp[i,j,k], dWt) <> dW = if(dWt2 > 2.0*Wp[i,j,k], 2.0*Wp[i,j,k], dWt2) {id=dw,if=go} # Preserve last values, after use but before any changes Wp1[i,j,k] = Wp[i,j,k] {id=wp1,nosync=dw,if=go} err1[i,j,k] = err[i,j,k] {nosync=dw,if=go} # Update Wp. Err will be updated outside kernel Wp[i,j,k] = Wp[i,j,k] + dW {id=wp,nosync=dw:wp1,if=go} # Set flag not to continue in zones that have converged stop_flag[i,j,k] = if(fabs(dW / Wp[i,j,k]) < err_tol, 1, stop_flag[i,j,k]) {nosync=dw:wp:insn_go,if=go} # For the future, when we've defined err_eqn for loopy kernels # err = err_eqn(Bsq, D, Ep, QdB, Qtsq, Wp, gam, gamma_max, eflag) {if=go} # stop_flag[i,j,k] = stop_flag[i,j,k] + (fabs(err[] / Wp[]) < err_tol) {if=go} """) knl_utop_iter = lp.make_kernel( sh.isl_grid_scalar, code, [ *scalarArrayArgs("Wp", "Wp1", "err", "err1"), *scalarArrayArgs("stop_flag", dtype=np.int8), ... ], assumptions=sh.assume_grid, default_offset=lp.auto, seq_dependencies=True) knl_utop_iter = lp.fix_parameters(knl_utop_iter, err_tol=err_tol) knl_utop_iter = tune_grid_kernel(knl_utop_iter, sh.bulk_scalar, ng=G.NG) print("Compiled utop_iter") # Iterate at least once to set new values from first step # TODO Needed now we set Wp, err1 right? for niter in range(iter_max): #print("U_to_P iter") evt, _ = knl_utop_iter(params['queue'], Wp=Wp, Wp1=Wp1, err=err, err1=err1, stop_flag=stop_flag) err = err_eqn(params, G, Bsq, D, Ep, QdB, Qtsq, Wp, eflag) # TODO there may be better/faster if/reduction statements here... stop_flag |= (clm.fabs(err / Wp) < err_tol) if cl_array.min(stop_flag) >= 1: break # If secant method failed to converge, do not set primitives other than B eflag += (stop_flag == 0) del Wp1, err, err1, stop_flag # Find utsq, gamma, rho0 from Wp gamma = gamma_func(params, G, Bsq, D, QdB, Qtsq, Wp, eflag) if 'debug' in params and params['debug']: if np.any(gamma.get()[s.bulk] < 1.): raise ValueError("gamma < 1 failure!") # Find the scalars global knl_utop_set if knl_utop_set is None: code = add_ghosts( replace_prim_names(""" rho0 := D[i,j,k] / gamma[i,j,k] W := Wp[i,j,k] + D[i,j,k] w := W / (gamma[i,j,k]**2) pres := (w - rho0) * (gam - 1.) / gam u := w - (rho0 + pres) # Set flag if prims are < 0 eflag[i,j,k] = if((u < 0)*(rho0 < 0), 8, if(u < 0, 7, if(rho0 < 0, 6, eflag[i,j,k]))) {id=ef} # Don't update flagged primitives (necessary? Could skip the branch if fixup does ok) <> set = not(eflag[i,j,k]) {dep=ef,nosync=ef} P[RHO,i,j,k] = rho0 {if=set} P[UU,i,j,k] = u {if=set} P[U1,i,j,k] = (gamma[i,j,k] / (W + Bsq[i,j,k])) * (Qtcon[1,i,j,k] + QdB[i,j,k] * Bcon[1,i,j,k] / W) {if=set} P[U2,i,j,k] = (gamma[i,j,k] / (W + Bsq[i,j,k])) * (Qtcon[2,i,j,k] + QdB[i,j,k] * Bcon[2,i,j,k] / W) {if=set} P[U3,i,j,k] = (gamma[i,j,k] / (W + Bsq[i,j,k])) * (Qtcon[3,i,j,k] + QdB[i,j,k] * Bcon[3,i,j,k] / W) {if=set} """)) if 'electrons' in params and params['electrons']: code += add_ghosts(""" P[KEL,i,j,k] = U[KEL,i,j,k]/U[RHO,i,j,k] P[KTOT,i,j,k] = U[KTOT,i,j,k]/U[RHO,i,j,k] """) knl_utop_set = lp.make_kernel( sh.isl_grid_scalar, code, [ *primsArrayArgs("P"), *vecArrayArgs("Qtcon", "Bcon"), *scalarArrayArgs("D", "gamma", "Wp", "Bsq", "QdB"), *scalarArrayArgs("eflag", dtype=np.int32), ... ], assumptions=sh.assume_grid, default_offset=lp.auto) knl_utop_set = lp.fix_parameters(knl_utop_set, gam=params['gam'], nprim=params['n_prim'], ndim=4) knl_utop_set = tune_grid_kernel(knl_utop_set, sh.bulk_scalar, ng=G.NG) print("Compiled utop_set") evt, _ = knl_utop_set(params['queue'], P=Pout, Qtcon=Qtcon, Bcon=Bcon, D=D, gamma=gamma, Wp=Wp, Bsq=Bsq, QdB=QdB, eflag=eflag) evt.wait() del Qtcon, Bcon, D, gamma, Wp, Bsq, QdB # Trap on flags early in test problems if 'debug' in params and params['debug']: n_nonzero = np.count_nonzero(eflag.get()) if n_nonzero > 0: print("Nonzero eflag in bulk: {}\nFlags: {}".format( n_nonzero, np.argwhere(eflag.get() != 0))) return Pout, eflag
def min(self, a): import pyopencl.array as cl_array return cl_array.min(a, queue=self._array_context.queue).get()[()]