예제 #1
0
def maximum(ary, backend=None):
    if backend is None:
        backend = ary.backend
    if backend == 'cython':
        return ary.dev.max()
    elif backend == 'opencl':
        import pyopencl.array as gpuarray
        return gpuarray.max(ary.dev).get()
    elif backend == 'cuda':
        import pycuda.gpuarray as gpuarray
        return gpuarray.max(ary.dev).get()
예제 #2
0
    def get_divergence_error(vector):
        for mu in range(3):
            fft.idft(vector[mu], vector_x[mu])

        derivs.divergence(queue, vector_x, div)

        derivs(queue, fx=vector_x[0], pdx=pdx[0])
        derivs(queue, fx=vector_x[1], pdy=pdx[1])
        derivs(queue, fx=vector_x[2], pdz=pdx[2])
        norm = sum([clm.fabs(pdx[mu]) for mu in range(3)])

        max_err = cla.max(clm.fabs(div)) / cla.max(norm)
        avg_err = cla.sum(clm.fabs(div)) / cla.sum(norm)
        return max_err, avg_err
예제 #3
0
파일: math_.py 프로젝트: sehlstrom/compas
def maximum_cl(a, b=None):

    """ Maximum values of two GPUArrays.

    Parameters
    ----------
    a : gpuarray
        First GPUArray.
    b : gpuarray
        Second GPUArray.

    Returns
    -------
    gpuarray
        Maximum values from both GPArrays, or single value if one GPUarray.

    Examples
    --------
    >>> a = maximum_cl(give_cl(queue, [1, 2, 3]), give_cl(queue, [3, 2, 1]))
    [3, 2, 3]

    >>> type(a)
    <class 'pyopencl.array.Array'>

    """

    if b is not None:
        return cl_array.maximum(a, b)
    return cl_array.max(a)
예제 #4
0
    def max(t: Tensor) -> np.float32:
        """The maximum of the values in a tensor."""

        if t.gpu:
            return clarray.max(t._data).get().flat[0]

        return np.max(t._data)
예제 #5
0
    def get_divergence_errors(hij):
        max_errors = []
        avg_errors = []
        for i in range(1, 4):
            for mu in range(3):
                fft.idft(hij[tensor_id(i, mu + 1)], vector_x[mu])

            derivs.divergence(queue, vector_x, div)

            derivs(queue, fx=vector_x[0], pdx=pdx[0])
            derivs(queue, fx=vector_x[1], pdy=pdx[1])
            derivs(queue, fx=vector_x[2], pdz=pdx[2])
            norm = sum([clm.fabs(pdx[mu]) for mu in range(3)])

            max_errors.append(cla.max(clm.fabs(div)) / cla.max(norm))
            avg_errors.append(cla.sum(clm.fabs(div)) / cla.sum(norm))

        return np.array(max_errors), np.array(avg_errors)
예제 #6
0
파일: mynp.py 프로젝트: ixtel/neurolabcl
 def max(*args, **kwargs):
     a = args[0]
     if a.ndim==0 or not 'axis' in kwargs.keys():
         res = clarray.max(a, queue=queue) #np.sum(*args, **kwargs)
         if not isinstance(res, myclArray):
             res.__class__ = myclArray
             res.reinit()
         return res
     else:
         kwargs['prg2load'] = programs.max
         return _sum(*args, **kwargs)
예제 #7
0
    def _get_min_max(self):
        # as amax is too slow for bug arrays, do it on the gpu

        if self.dataModel:
            try:
                im = self.renderer.dataImg
                tmp_buf = OCLArray.empty(im.shape, im.dtype)
                tmp_buf.copy_image(im)
                mi = float(cl_array.min(tmp_buf).get())
                ma = float(cl_array.max(tmp_buf).get())

            except Exception as e:
                print(e)
                mi = np.amin(self.dataModel[0])
                ma = np.amax(self.dataModel[0])
        return mi, ma
예제 #8
0
    def _get_min_max(self):
        # as amax is too slow for bug arrays, do it on the gpu

        if self.dataModel:
            try:
                im = self.renderer.dataImg
                tmp_buf = OCLArray.empty(im.shape, im.dtype)
                tmp_buf.copy_image(im)
                mi = float(cl_array.min(tmp_buf).get())
                ma = float(cl_array.max(tmp_buf).get())


            except Exception as e:
                print(e)
                mi = np.amin(self.dataModel[0])
                ma = np.amax(self.dataModel[0])
        return mi, ma
예제 #9
0
def max(q, a, axis=None, keepdims=False):
    assert a.ndim < 3

    if axis is None or a.ndim <= 1:
        out_shape = (1, ) * a.ndim
        return clarray.max(a).reshape(out_shape)
    elif axis < 0:
        axis += 2
    assert axis in (0, 1)

    # TODO generate & cache kernel elsewhere
    prg = cl.Program(
        clplatf.ctx, _cmp_by_axis_kernel_template % {
            'cmp_op': '>',
            'dtype': dtype_to_ctype(a.dtype),
            'init_val': str(np.finfo(a.dtype).min)
        }).build()
    col_max = prg._max_by_cols
    row_max = prg._max_by_rows
    # TODO calculate workgroup size given the array and axis
    element_size = 4 if a.dtype == np.float32 else 8
    n, m = a.shape if a.flags.c_contiguous else (a.shape[1], a.shape[0])
    if (axis == 0 and a.flags.c_contiguous) or (axis == 1
                                                and a.flags.f_contiguous):
        if keepdims:
            out_shape = (1, m) if axis == 0 else (m, 1)
        else:
            out_shape = (m, )
        maxes = clarray.empty(q, out_shape, dtype=a.dtype)
        indices = clarray.empty(q, out_shape, dtype=np.int32)
        ev = col_max(q, (m * 64, ), (64, ), a.data, maxes.data, indices.data,
                     np.int32(m), np.int32(n),
                     cl.LocalMemory(64 * element_size), cl.LocalMemory(64 * 4))
    else:
        if keepdims:
            out_shape = (1, n) if axis == 0 else (n, 1)
        else:
            out_shape = (n, )
        maxes = clarray.empty(q, out_shape, dtype=a.dtype)
        indices = clarray.empty(q, out_shape, dtype=np.int32)
        ev = row_max(q, (n * 64, ), (64, ), a.data, maxes.data, indices.data,
                     np.int32(m), np.int32(n),
                     cl.LocalMemory(64 * element_size), cl.LocalMemory(64 * 4))
    if ev is not None:
        ev.wait()
    return maxes, indices
예제 #10
0
    def minZerrKernSHG_gpu(self):
        krn = self.progs.progs["minZerrSHG"].minZerrSHG
        krn.set_scalar_arg_dtypes((None, None, None, None, None, None, None, None, np.int32))
        krn.set_args(
            self.Esig_t_tau_p_cla.data,
            self.Et_cla.data,
            self.dZ_cla.data,
            self.X0_cla.data,
            self.X1_cla.data,
            self.X2_cla.data,
            self.X3_cla.data,
            self.X4_cla.data,
            self.N,
        )
        ev = cl.enqueue_nd_range_kernel(self.q, krn, self.Et.shape, None)
        ev.wait()

        krn = self.progs.progs["normEsig"].normEsig
        krn.set_scalar_arg_dtypes((None, None, np.int32))
        krn.set_args(self.Esig_t_tau_p_cla.data, self.Esig_t_tau_norm_cla.data, self.N)
        ev = cl.enqueue_nd_range_kernel(self.q, krn, self.Esig_t_tau_p.shape, None)
        ev.wait()
        mx = cla.max(self.Esig_t_tau_norm_cla).get() * self.N * self.N

        #         Esig_t_tau = self.Esig_t_tau_p_cla.get()
        #         mx = ((Esig_t_tau*Esig_t_tau.conj()).real).max() * self.N*self.N

        X0 = cla.sum(self.X0_cla, queue=self.q).get() / mx
        X1 = cla.sum(self.X1_cla, queue=self.q).get() / mx
        X2 = cla.sum(self.X2_cla, queue=self.q).get() / mx
        X3 = cla.sum(self.X3_cla, queue=self.q).get() / mx
        X4 = cla.sum(self.X4_cla, queue=self.q).get() / mx

        root.debug("".join(("X0=", str(X0), ", type ", str(type(X0)))))

        root.debug(
            "".join(("Poly: ", str(X4), " x^4 + ", str(X3), " x^3 + ", str(X2), " x^2 + ", str(X1), " x + ", str(X0)))
        )
        # Polynomial in dZ (expansion of differential)
        X = np.array([X0, X1, X2, X3, X4]).astype(np.double)

        root.debug("".join(("Esig_t_tau_p norm max: ", str(mx / (self.N * self.N)))))

        return X
예제 #11
0
파일: fixup.py 프로젝트: bprather/clHARM
def fixup_floor(params, fflag, G, P):
    s = G.slices
    sh = G.shapes
    # Then apply floors:
    # 1. Precalculate geometric hard floors, not based on fluid relationships
    global rhoflr_geom, uflr_geom, U, D, Padd, Uadd, Dadd, dzero
    if rhoflr_geom is None:
        if "mks" in params['coordinates']:
            # New, steeper floor in rho
            # Previously raw r^-2 or r^-1.5
            r = G.coords.r(G.coord_all())
            rhoscal = 1/(r**2) * 1 / (1 + r/params['floor_char_r'])
            # Impose minimum rho with scaling above, minimum u as rho**gam
            rhoflr_geom = cl_array.to_device(params['queue'],
                                             np.maximum(params['rho_min'] * rhoscal, params['rho_min_limit']))
            uflr_geom = cl_array.to_device(params['queue'],
                                           np.maximum(params['u_min']*(rhoscal**params['gam']), params['u_min_limit']))
        elif "minkowski" in params['coordinates']:
            rhoflr_geom = cl_array.empty(params['queue'], sh.grid_scalar, dtype=np.float64).fill(params['rho_min']*1.e-2)
            uflr_geom = cl_array.empty(params['queue'], sh.grid_scalar, dtype=np.float64).fill(params['u_min']*1.e-2)
        # Arrays we should keep on hand: derived values and values for additional fluid packet
        U = cl_array.zeros_like(P)
        D = get_state(params, G, P, Loci.CENT)
        Padd = cl_array.zeros_like(P)
        Uadd = cl_array.zeros_like(P)
        Dadd = get_state(params, G, P, Loci.CENT)
        dzero = cl_array.zeros_like(P)

    global knl_floors
    if knl_floors is None:
        code = add_ghosts(replace_prim_names("""
        # 2. Magnetic floors: impose maximum magnetization sigma = bsq/rho, inverse beta prop. to bsq/U
        rhoflr_b := bsq[i,j,k] / sig_max
        uflr_b := bsq[i,j,k] / (sig_max * temp_max)
        
        # Maximum U floor
        uflr_max := if(uflr_b > uflr_geom[i,j,k], uflr_b, uflr_geom[i,j,k])
        
        # 3. Temperature ceiling: impose maximum temperature
        temp_real := P[UU,i,j,k] / temp_max
        temp_floor := uflr_max / temp_max
        rhoflr_temp := if(temp_real > temp_floor, temp_real, temp_floor)
        
        # Maximum rho floor
        rhoflr_max1 := if(rhoflr_geom[i,j,k] > rhoflr_b, rhoflr_geom[i,j,k], rhoflr_b)
        rhoflr_max := if(rhoflr_max1 > rhoflr_temp, rhoflr_max1, rhoflr_temp)
        
        # Initialize a dummy fluid parcel with any missing mass and internal energy, but not velocity
        rho_add := rhoflr_max - P[RHO,i,j,k]
        u_add := uflr_max - P[UU,i,j,k]
        Padd[RHO,i,j,k] = if(rho_add > 0, rho_add, 0) {id=p1,nosync=p2}
        Padd[UU,i,j,k] = if(u_add > 0, u_add, 0)      {id=p2,nosync=p1}
        """))
        knl_floors = lp.make_kernel(sh.isl_grid_scalar, code,
                                                [*primsArrayArgs("P", "Padd"),
                                                 *scalarArrayArgs("bsq", "rhoflr_geom", "uflr_geom"),
                                                 ...])
        knl_floors = lp.fix_parameters(knl_floors, sig_max=params['sigma_max'], temp_max=params['u_over_rho_max'],
                                       nprim=params['n_prim'])
        knl_floors = tune_grid_kernel(knl_floors, shape=sh.bulk_scalar, ng=G.NG,
                                      prefetch_args=["bsq", "rhoflr_geom", "uflr_geom"])
        print("Compiled fixup_floors")

    # Bulk call before bsq calculation below
    get_state(params, G, P, Loci.CENT, out=D)  # Reused below!
    bsq = G.dot(D['bcon'], D['bcov'])

    Padd.fill(0)
    evt, _ = knl_floors(params['queue'], P=P, bsq=bsq, rhoflr_geom=rhoflr_geom, uflr_geom=uflr_geom, Padd=Padd)
    evt.wait()

    if params['debug']:
        rhits = np.count_nonzero(Padd[s.RHO].get())
        uhits = np.count_nonzero(Padd[s.UU].get())
        print("Rho floors hit {} times".format(rhits))
        print("U floors hit {} times".format(uhits))

    if cl_array.max(Padd) > 0.:
        print("Fixing up")
        # Get conserved variables for the parcel
        get_state(params, G, Padd, Loci.CENT, out=Dadd)
        prim_to_flux(params, G, Padd, Dadd, 0, Loci.CENT, out=Uadd)

        # And for the current state
        #get_state(G, S, i, j, k, CENT, out=D) # Called just above
        prim_to_flux(params, G, P, D, 0, Loci.CENT, out=U)

        # Recover primitive variables.  Don't touch what we don't have to
        Ptmp, pflag = U_to_P(params, G, U+Uadd, P+Padd)
        P = cl_array.if_positive(Padd, Ptmp, P)
        del Ptmp

    # TODO Record specific floor hits/U_to_P fails
    #fflag |= cl_array.if_positive(rhoflr_geom - P[s.RHO], temp.fill(HIT_FLOOR_GEOM_RHO), zero)
    #fflag |= cl_array.if_positive(uflr_geom - P[s.UU], temp.fill(HIT_FLOOR_GEOM_U), zero)
    #fflag |= cl_array.if_positive(rhoflr_b - P[s.RHO], temp.fill(HIT_FLOOR_B_RHO), zero)
    #fflag |= cl_array.if_positive(uflr_b - P[s.UU], temp.fill(HIT_FLOOR_B_U), zero)
    #fflag |= cl_array.if_positive(rhoflr_temp - P[s.RHO], temp.fill(HIT_FLOOR_TEMP), zero)
    #fflag |= cl_array.if_positive(pflag, temp.fill(FLOOR_UTOP_FAIL), zero)

    if params['electrons']:
        # Reset the entropy after floor application
        P[s.KTOT] = (params['gam'] - 1.) * P[s.UU] / (P[s.RHO]**params['gam'])

    return P, fflag
예제 #12
0
def test_spectral_poisson(ctx_factory, grid_shape, proc_shape, h, dtype,
                          timing=False):
    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)
    mpi = ps.DomainDecomposition(proc_shape, h, grid_shape=grid_shape)
    rank_shape, _ = mpi.get_rank_shape_start(grid_shape)
    fft = ps.DFT(mpi, ctx, queue, grid_shape, dtype)

    L = (3, 5, 7)
    dx = tuple(Li / Ni for Li, Ni in zip(L, grid_shape))
    dk = tuple(2 * np.pi / Li for Li in L)

    if h == 0:
        def get_evals_2(k, dx):
            return - k**2

        derivs = ps.SpectralCollocator(fft, dk)
    else:
        from pystella.derivs import SecondCenteredDifference
        get_evals_2 = SecondCenteredDifference(h).get_eigenvalues
        derivs = ps.FiniteDifferencer(mpi, h, dx, stream=False)

    solver = ps.SpectralPoissonSolver(fft, dk, dx, get_evals_2)

    pencil_shape = tuple(ni + 2*h for ni in rank_shape)

    statistics = ps.FieldStatistics(mpi, 0, rank_shape=rank_shape,
                                    grid_size=np.product(grid_shape))

    fx = cla.empty(queue, pencil_shape, dtype)
    rho = clr.rand(queue, rank_shape, dtype)
    rho -= statistics(rho)["mean"]
    lap = cla.empty(queue, rank_shape, dtype)
    rho_h = rho.get()

    for m_squared in (0, 1.2, 19.2):
        solver(queue, fx, rho, m_squared=m_squared)
        fx_h = fx.get()
        if h > 0:
            fx_h = fx_h[h:-h, h:-h, h:-h]

        derivs(queue, fx=fx, lap=lap)

        diff = np.fabs(lap.get() - rho_h - m_squared * fx_h)
        max_err = np.max(diff) / cla.max(clm.fabs(rho))
        avg_err = np.sum(diff) / cla.sum(clm.fabs(rho))

        max_rtol = 1e-12 if dtype == np.float64 else 1e-4
        avg_rtol = 1e-13 if dtype == np.float64 else 1e-5

        assert max_err < max_rtol and avg_err < avg_rtol, \
            f"solution inaccurate for {h=}, {grid_shape=}, {proc_shape=}"

    if timing:
        from common import timer
        time = timer(lambda: solver(queue, fx, rho, m_squared=m_squared), ntime=10)

        if mpi.rank == 0:
            print(f"poisson took {time:.3f} ms for {grid_shape=}, {proc_shape=}")
예제 #13
0
def test_step(ctx_factory, proc_shape, dtype, Stepper):
    if proc_shape != (1, 1, 1):
        pytest.skip("test step only on one rank")

    if ctx_factory:
        ctx = ctx_factory()
    else:
        ctx = ps.choose_device_and_make_context()

    queue = cl.CommandQueue(ctx)

    from pystella.step import LowStorageRKStepper
    is_low_storage = LowStorageRKStepper in Stepper.__bases__

    rank_shape = (1, 1, 8)
    init_vals = np.linspace(1, 3, 8)
    if is_low_storage:
        y = cla.zeros(queue, rank_shape, dtype)
        y[0, 0, :] = init_vals
        y0 = y.copy()
    else:
        num_copies = Stepper.num_copies
        y = cla.zeros(queue, (num_copies, ) + rank_shape, dtype)
        y[0, 0, 0, :] = init_vals
        y0 = y[0].copy()

    dtlist = [.1, .05, .025]

    for n in [-1., -2., -3., -4.]:
        max_errs = {}
        for dt in dtlist:

            def sol(y0, t):
                return ((-1 + n) * (-t + y0**(1 - n) / (-1 + n)))**(1 /
                                                                    (1 - n))

            _y = ps.Field("y")
            rhs_dict = {_y: _y**n}

            stepper = Stepper(rhs_dict,
                              dt=dt,
                              halo_shape=0,
                              rank_shape=rank_shape)

            if is_low_storage:
                y[0, 0, :] = init_vals
            else:
                y[0, 0, 0, :] = init_vals

            t = 0
            errs = []
            while t < .1:
                for s in range(stepper.num_stages):
                    stepper(s, queue=queue, y=y, filter_args=True)
                t += dt

                if is_low_storage:
                    errs.append(cla.max(clm.fabs(1. - sol(y0, t) / y)).get())
                else:
                    errs.append(
                        cla.max(clm.fabs(1. - sol(y0, t) / y[0])).get())

            max_errs[dt] = np.max(errs)

        order = stepper.expected_order
        print(f"{order=}, {n=}")
        print(max_errs)
        print([
            max_errs[a] / max_errs[b] for a, b in zip(dtlist[:-1], dtlist[1:])
        ])

        order = stepper.expected_order
        rtol = dtlist[-1]**order if dtype == np.float64 else 1e-1
        assert list(max_errs.values())[-1] < rtol, \
            f"Stepper solution inaccurate for {n=}"

        for a, b in zip(dtlist[:-1], dtlist[1:]):
            assert max_errs[a] / max_errs[b] > .9 * (a/b)**order, \
                f"Stepper convergence failing for {n=}"
예제 #14
0
    def _im_convert(norm='local'):
        
        """ Image conversion from np array to hsv PIL """

        assert norm in ('global', 'local')

        if gpu_info != None:
            
            context, device, queue, platform = gpu_info
            import gpu, string
            import pyopencl.array as cla
                
            # build the additional kernels
            hsv_convert = build('complex_to_rgb.cl')
            cl_abs = build('common_abs_f2_f.cl')
            
            # allocate new memory (frames, rows, columns, 3); uchar -> uint8?
            new_shape = gpu_store.shape+(3,)
            image_buffer = cla.empty(queue, new_shape, np.uint8)
            debug_buffer = cla.empty(queue, gpu_store.shape, np.float32)
            
            # each frame must be normalized by SOMETHING. if norm=='local',
            # that something is the max value of the abs of each frame. if
            # norm == 'global', that something is the max value of the entire
            # array. in either case, we make a list of maxvals which will
            # correspond to each frame so that we can use the same hsv
            # conversion kernel.
            
            if norm == 'global':
                # calculate the max for the entire array
                abs_store = cla.empty(queue, gpu_store.shape, np.float32)
                mv_gpu = cla.empty(queue, (gpu_store.shape[0],), np.float32)
                cl_abs.execute(queue, (gpu_store.size,), None, gpu_store.data,
                               abs_store.data)
                mv_gpu.fill(np.float32(cla.max(abs_store).get()))

            if norm == 'local':
                # calculate the max for one frame at a time. pull this value
                # and save it in the max_vals list. when finished, move
                # max_vals to the gpu.
                nz = len(distances)
                max_vals = np.zeros(nz, np.float32)
                slice_frame = build('common_slice_frame_f2.cl')
                slice_store = cla.empty(queue, (rows, cols), np.complex64)
                abs_store = cla.empty(queue, (rows, cols), np.float32)
                for n in xrange(nz):
                    slice_frame.execute(queue, (rows, cols), None,
                                        slice_store.data, gpu_store.data,
                                        np.int32(n))
                    cl_abs.execute(queue, (slice_store.size,), None,
                                   slice_store.data, abs_store.data)
                    max_vals[n] = np.float32(cla.max(abs_store).get())
                mv_gpu = cla.to_device(queue, max_vals.astype(np.float32))

            # run the complex -> hsv -> rgb converter on each pixel
            hsv_convert.execute(queue, gpu_store.shape, None, gpu_store.data,
                                image_buffer.data, mv_gpu.data).wait()

            # now convert to pil objects
            import scipy.misc as smp
            image_buffer = image_buffer.get()
            print image_buffer.shape
            images = [smp.toimage(i) for i in image_buffer]
            return gpu_store.get(), images
        
        if gpu_info == None:
            
            # now convert frames to pil objects
            import scipy.misc as smp
            import io
            images = [smp.toimage(io.complex_hsv_image(x)) for x in store]
            return store, images
예제 #15
0
 def _max_OCL(a, queue=None):
     return cl_array.max(a=a, queue=queue)
예제 #16
0
 def max(self, a):
     import pyopencl.array as cl_array
     return cl_array.max(a, queue=self._array_context.queue).get()[()]