コード例 #1
0
ファイル: cudaCGH.py プロジェクト: mal858/pyfab
 def updateGeometry(self):
     # GPU storage
     self._psi = gpuarray.zeros(self.shape, dtype=np.complex64)
     self._phi = gpuarray.zeros(self.shape, dtype=np.uint8)
     self._theta = gpuarray.zeros(self.shape, dtype=np.float32)
     self._rho = gpuarray.zeros(self.shape, dtype=np.float32)
     self._ex = gpuarray.zeros(self.width, dtype=np.complex64)
     self._ey = gpuarray.zeros(self.height, dtype=np.complex64)
     # Geometry
     x = gpuarray.arange(self.width, dtype=np.float32).astype(np.complex64)
     y = gpuarray.arange(self.height, dtype=np.float32).astype(np.complex64)
     alpha = np.cos(np.radians(self.phis)).astype(np.float32)
     x = alpha * (x - self.xs)
     y = y - self.ys
     self._iqx = 1j * self.qprp * x
     self._iqy = 1j * self.qprp * y
     self._iqxz = 1j * self.qpar * x * x
     self._iqyz = 1j * self.qpar * y * y
     self.outeratan2f(y.real, x.real, self._theta)
     self.outerhypot(y.real, x.real, self._rho)
     self._rho = self.qprp * self._rho
     # CPU versions
     self.phi = np.zeros(self.shape, dtype=np.uint8)
     self.iqx = self._iqx.get()
     self.iqy = self._iqy.get()
     self.theta = self._theta.get()
     self.qr = self._rho.get()
     self.sigUpdateGeometry.emit()
コード例 #2
0
ファイル: test_gpuarray.py プロジェクト: rutsky/pycuda
    def test_take(self):
        idx = gpuarray.arange(0, 10000, 2, dtype=np.uint32)
        for dtype in [np.float32, np.complex64]:
            a = gpuarray.arange(0, 600000, dtype=np.uint32).astype(dtype)
            a_host = a.get()
            result = gpuarray.take(a, idx)

            assert (a_host[idx.get()] == result.get()).all()
コード例 #3
0
ファイル: test_gpuarray.py プロジェクト: inducer/pycuda
    def test_take(self):
        idx = gpuarray.arange(0, 10000, 2, dtype=np.uint32)
        for dtype in [np.float32, np.complex64]:
            a = gpuarray.arange(0, 600000, dtype=np.uint32).astype(dtype)
            a_host = a.get()
            result = gpuarray.take(a, idx)

            assert (a_host[idx.get()] == result.get()).all()
コード例 #4
0
ファイル: test_cumath.py プロジェクト: DirkHaehnel/pycuda
    def test_ldexp(self):
        """tests if the ldexp function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32)
            a2 = gpuarray.arange(s, dtype=np.float32)*1e-3
            b = cumath.ldexp(a, a2)

            a = a.get()
            a2 = a2.get()
            b = b.get()

            for i in range(s):
                assert math.ldexp(a[i], int(a2[i])) == b[i]
コード例 #5
0
ファイル: test_cumath.py プロジェクト: DirkHaehnel/pycuda
    def test_fmod(self):
        """tests if the fmod function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32)/10
            a2 = gpuarray.arange(s, dtype=np.float32)/45.2 + 0.1
            b = cumath.fmod(a, a2)

            a = a.get()
            a2 = a2.get()
            b = b.get()

            for i in range(s):
                assert math.fmod(a[i], a2[i]) == b[i]
コード例 #6
0
    def test_fmod(self):
        """tests if the fmod function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32) / 10
            a2 = gpuarray.arange(s, dtype=np.float32) / 45.2 + 0.1
            b = cumath.fmod(a, a2)

            a = a.get()
            a2 = a2.get()
            b = b.get()

            for i in range(s):
                assert math.fmod(a[i], a2[i]) == b[i]
コード例 #7
0
    def test_ldexp(self):
        """tests if the ldexp function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32)
            a2 = gpuarray.arange(s, dtype=np.float32) * 1e-3
            b = cumath.ldexp(a, a2)

            a = a.get()
            a2 = a2.get()
            b = b.get()

            for i in range(s):
                assert math.ldexp(a[i], int(a2[i])) == b[i]
コード例 #8
0
 def updateGeometry(self):
     shape = (self.w, self.h)
     self._psi = gpuarray.zeros(shape, dtype=np.complex64)
     self._phi = gpuarray.zeros(shape, dtype=np.uint8)
     self.phi = np.zeros(shape, dtype=np.uint8)
     self._ex = gpuarray.zeros(self.w, dtype=np.complex64)
     self._ey = gpuarray.zeros(self.h, dtype=np.complex64)
     qx = gpuarray.arange(self.w, dtype=np.float32).astype(np.complex64)
     qy = gpuarray.arange(self.h, dtype=np.float32).astype(np.complex64)
     qx = self.qpp * (qx - self.rs.x())
     qy = self.alpha * self.qpp * (qy - self.rs.y())
     self.iqx = 1j * qx
     self.iqy = 1j * qy
     self.iqxsq = 1j * qx * qx
     self.iqysq = 1j * qy * qy
コード例 #9
0
    def add_ref_image(self, ref_image):
        """Add a reference image to the array of reference images used for reconstruction"""
        if not self.initialised:
            self._init(ref_image)

        # Hash the image to check for uniqueness:
        imhash = hash(ref_image.tobytes())
        if imhash in self.ref_image_hashes:
            # Ignore duplicate image
            return
        if self.n_ref_images < self.max_ref_images:
            self.ref_image_hashes.append(imhash)
        else:
            self.ref_image_hashes[self.next_ref_image_index] = imhash
        self.n_ref_images = len(self.ref_image_hashes)

        # Send flattened, double precision reference image to the GPU:
        gpu_ref_image = gpuarray.to_gpu(ref_image.flatten().astype(float))
        # Compute 1D indices for the location in BT
        # where the new reference image will be inserted:
        start_index = self.n_pixels * self.next_ref_image_index
        stop_index = start_index + self.n_pixels
        insertion_indices = gpuarray.arange(start_index,
                                            stop_index,
                                            1,
                                            dtype=int)
        # Insert the new image into the correct row of gpu_BT.
        skcuda.misc.set_by_index(self.BT_gpu, insertion_indices, gpu_ref_image)
        # Move our index along by one for where the next reference image will go:
        self.next_ref_image_index += 1
        # Wrap around to overwrite oldest images:
        self.next_ref_image_index %= self.max_ref_images
コード例 #10
0
ファイル: test_cublas.py プロジェクト: lvaleriu/scikit-cuda
def bptrs(a):
    """
    Pointer array when input represents a batch of matrices.
    """

    return gpuarray.arange(a.ptr,a.ptr+a.shape[0]*a.strides[0],a.strides[0],
                dtype=cublas.ctypes.c_void_p)
コード例 #11
0
 def particle_indices_of_slice(self, slice_index):
     '''Return an array of particle indices which are located in the
     slice defined by the given slice_index.
     '''
     return gpuarray.arange(self.lower_bounds[slice_index],
                            self.upper_bounds[slice_index] + 1,
                            dtype=np.int32)
コード例 #12
0
 def particles_within_cuts(self):
     '''All particle indices which are situated within the slicing
     region defined by [z_cut_tail, z_cut_head).'''
     particles_within_cuts_ = gpuarray.arange(self.lower_bounds[0],
                                              self.upper_bounds[-1] + 1,
                                              dtype=np.int32)
     return particles_within_cuts_
コード例 #13
0
ファイル: test_gpuarray.py プロジェクト: inducer/pycuda
    def test_sum_allocator(self):
        # FIXME
        from pytest import skip

        skip("https://github.com/inducer/pycuda/issues/163")
        # crashes with  terminate called after throwing an instance of 'pycuda::error'
        # what():  explicit_context_dependent failed: invalid device context - no currently active context?

        import pycuda.tools

        pool = pycuda.tools.DeviceMemoryPool()

        rng = np.random.randint(low=512, high=1024)

        a = gpuarray.arange(rng, dtype=np.int32)
        b = gpuarray.sum(a)
        c = gpuarray.sum(a, allocator=pool.allocate)

        # Test that we get the correct results
        assert b.get() == rng * (rng - 1) // 2
        assert c.get() == rng * (rng - 1) // 2

        # Test that result arrays were allocated with the appropriate allocator
        assert b.allocator == a.allocator
        assert c.allocator == pool.allocate
コード例 #14
0
    def test_arange(self):
        """test the arrangement of the array"""
        a = gpuarray.arange(12)

        res = a.get()

        for i in range(12):
            self.assert_(res[i] ==i)
コード例 #15
0
 def set_buffer(self, buf):
     x, y, z = buf.shape
     self.data = buf
     self.bptrs = gpuarray.arange(buf.ptr,
                                  buf.ptr + x * y * z * 4,
                                  y * z * 4,
                                  dtype=cublas.ctypes.c_void_p).gpudata
     self.data_ptr = gpu_ptr(buf)
コード例 #16
0
    def test_abs(self):
        """test if the abs function works"""
        a = gpuarray.arange(111)
        a = a * -1

        res = a.get()

        for i in range (111):
            self.assert_(res[i] <= 0)

        a = abs(a)

        res = a.get()

        for i in range (111):
            self.assert_(res[i] >= 0)
            self.assert_(res[i] == i)


        for i in range(100,200):
            a = gpuarray.arange(500 * i)
            self.assert_(a[len(a)-1] == len(a)-1)
コード例 #17
0
    def test():
        gpu_func = getattr(cumath, name)
        cpu_func = getattr(np, numpy_func_names.get(name, name))

        for s in sizes:
            for dtype in dtypes:
                args = gpuarray.arange(a, b, (b - a) / s, dtype=np.float32)
                gpu_results = gpu_func(args).get()
                cpu_results = cpu_func(args.get())

                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), \
                        (max_err, name, dtype)
コード例 #18
0
ファイル: test_cumath.py プロジェクト: abergeron/pycuda
    def test():
        gpu_func = getattr(cumath, name)
        cpu_func = getattr(np, numpy_func_names.get(name, name))

        for s in sizes:
            for dtype in dtypes:
                args = gpuarray.arange(a, b, (b-a)/s, dtype=np.float32)
                gpu_results = gpu_func(args).get()
                cpu_results = cpu_func(args.get())

                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), \
                        (max_err, name, dtype)
コード例 #19
0
ファイル: test_gpuarray.py プロジェクト: rutsky/pycuda
    def test_abs(self):
        a = -gpuarray.arange(111, dtype=np.float32)
        res = a.get()

        for i in range(111):
            assert res[i] <= 0

        a = abs(a)

        res = a.get()

        for i in range(111):
            assert abs(res[i]) >= 0
            assert res[i] == i
コード例 #20
0
ファイル: test_gpuarray.py プロジェクト: inducer/pycuda
    def test_abs(self):
        a = -gpuarray.arange(111, dtype=np.float32)
        res = a.get()

        for i in range(111):
            assert res[i] <= 0

        a = abs(a)

        res = a.get()

        for i in range(111):
            assert abs(res[i]) >= 0
            assert res[i] == i
コード例 #21
0
    def test_frexp(self):
        """tests if the frexp function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32) / 10
            significands, exponents = cumath.frexp(a)

            a = a.get()
            significands = significands.get()
            exponents = exponents.get()

            for i in range(s):
                sig_true, ex_true = math.frexp(a[i])

                assert sig_true == significands[i]
                assert ex_true == exponents[i]
コード例 #22
0
ファイル: test_cumath.py プロジェクト: DirkHaehnel/pycuda
    def test_frexp(self):
        """tests if the frexp function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32)/10
            significands, exponents = cumath.frexp(a)

            a = a.get()
            significands = significands.get()
            exponents = exponents.get()

            for i in range(s):
                sig_true, ex_true = math.frexp(a[i])

                assert sig_true == significands[i]
                assert ex_true == exponents[i]
コード例 #23
0
    def test_modf(self):
        """tests if the modf function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32) / 10
            fracpart, intpart = cumath.modf(a)

            a = a.get()
            intpart = intpart.get()
            fracpart = fracpart.get()

            for i in range(s):
                fracpart_true, intpart_true = math.modf(a[i])

                assert intpart_true == intpart[i]
                assert abs(fracpart_true - fracpart[i]) < 1e-4
コード例 #24
0
ファイル: test_cumath.py プロジェクト: DirkHaehnel/pycuda
    def test_modf(self):
        """tests if the modf function works"""
        for s in sizes:
            a = gpuarray.arange(s, dtype=np.float32)/10
            fracpart, intpart = cumath.modf(a)

            a = a.get()
            intpart = intpart.get()
            fracpart = fracpart.get()

            for i in range(s):
                fracpart_true, intpart_true = math.modf(a[i])

                assert intpart_true == intpart[i]
                assert abs(fracpart_true - fracpart[i]) < 1e-4
コード例 #25
0
ファイル: test_gpuarray.py プロジェクト: anair13/mujoco-torch
    def test_sum_allocator(self):
        import pycuda.tools
        pool = pycuda.tools.DeviceMemoryPool()

        rng = np.random.randint(low=512, high=1024)

        a = gpuarray.arange(rng, dtype=np.int32)
        b = gpuarray.sum(a)
        c = gpuarray.sum(a, allocator=pool.allocate)

        # Test that we get the correct results
        assert b.get() == rng * (rng - 1) // 2
        assert c.get() == rng * (rng - 1) // 2

        # Test that result arrays were allocated with the appropriate allocator
        assert b.allocator == a.allocator
        assert c.allocator == pool.allocate
コード例 #26
0
ファイル: test_gpuarray.py プロジェクト: rutsky/pycuda
    def test_sum_allocator(self):
        import pycuda.tools
        pool = pycuda.tools.DeviceMemoryPool()

        rng = np.random.randint(low=512,high=1024)

        a = gpuarray.arange(rng,dtype=np.int32)
        b = gpuarray.sum(a)
        c = gpuarray.sum(a, allocator=pool.allocate)

        # Test that we get the correct results
        assert b.get() == rng*(rng-1)//2
        assert c.get() == rng*(rng-1)//2

        # Test that result arrays were allocated with the appropriate allocator
        assert b.allocator == a.allocator
        assert c.allocator == pool.allocate
コード例 #27
0
    def slice(self, beam, *args, **kwargs):
        '''Return a SliceSet object according to the saved
        configuration. Generate it using the keywords of the
        self.compute_sliceset_kwargs(beam) method.
        Defines interface to create SliceSet instances
        (factory method).

        Sort beam attributes by slice indices.

        Arguments:
        - statistics=True attaches mean values, standard deviations
        and emittances to the SliceSet for all planes.
        - statistics=['mean_x', 'sigma_dp', 'epsn_z'] only adds the
        listed statistics values (can be used to save time).
        Valid list entries are all statistics functions of Particles.
        '''
        sliceset_kwargs = self.compute_sliceset_kwargs(beam)
        slice_index_of_particle = sliceset_kwargs['slice_index_of_particle']

        sorting_permutation = gpuarray.zeros(beam.macroparticlenumber,
                                             dtype=np.int32)
        # also resorts slice_index_of_particle:
        get_sort_perm_int(slice_index_of_particle, sorting_permutation)
        beam.reorder(sorting_permutation)
        del sorting_permutation
        lower_bounds = gpuarray.empty(self.n_slices, dtype=np.int32)
        upper_bounds = gpuarray.empty(self.n_slices, dtype=np.int32)
        seq = gpuarray.arange(self.n_slices, dtype=np.int32)
        lower_bound_int(slice_index_of_particle, seq, lower_bounds)
        upper_bound_int(slice_index_of_particle, seq, upper_bounds)
        del seq

        sliceset_kwargs['beam_parameters'] = (
            self.extract_beam_parameters(beam))
        sliceset_kwargs['beam_parameters'].update({
            'lower_bounds': lower_bounds,
            'upper_bounds': upper_bounds
        })

        sliceset = MeshSliceSet(**sliceset_kwargs)

        if 'statistics' in kwargs:
            self.add_statistics(sliceset, beam, kwargs['statistics'],
                                lower_bounds, upper_bounds)

        return sliceset
コード例 #28
0
    def compute_sliceset_kwargs(self, beam):
        '''Return argument dictionary to create a new SliceSet
        according to the saved configuration for
        uniformly binned SliceSet objects.
        '''
        z_cut_tail, z_cut_head = self.get_long_cuts(beam)
        slice_width = (z_cut_head - z_cut_tail) / float(self.n_slices)

        z_bins = gpuarray.arange(z_cut_tail,
                                 z_cut_head + 1e-7 * slice_width,
                                 slice_width,
                                 dtype=np.float64)

        slice_index_of_particle = self.mesh.get_node_ids(beam.z)

        return dict(z_bins=z_bins,
                    slice_index_of_particle=slice_index_of_particle,
                    mode=self.mode,
                    mesh=self.mesh,
                    context=self._context)
コード例 #29
0
ファイル: time_test3_cuda.py プロジェクト: tdunn19/sunpy
def run_tests(timer, scale_factor):
    """PyCUDA port of time_test3.pro"""
    #nofileio = True
    
    # Initialize linear algebra extensions to PyCUDA
    scikits.cuda.linalg.init()

    #initialize time
    timer.reset()   

    #
    # khughitt (2011/04/04): Non-CUDA tests from above will go here...
    #
    
    #
    # Begin CUDA tests
    #
    siz = int(384 * math.sqrt(scale_factor))

    # a = curandom.rand((siz,siz), dtype=np.int32)
    a = curandom.rand((siz,siz))

    timer.reset()

    #Test 17 - Transpose byte array, TRANSPOSE function
    for i in range(100):
        b = scikits.cuda.linalg.transpose(a, pycuda.autoinit.device)
    timer.log('Transpose %d^2 byte, TRANSPOSE function x 100' % siz)
    
    n = 2**(17 * scale_factor)
    a  = gpuarray.arange(n, dtype=np.float32)
    timer.reset()
    
    #Test 20 - Forward and inverse FFT
    b = scikits.cuda.fft.fft(a)
    b = scikits.cuda.fft.ifft(b)
    timer.log('%d point forward plus inverse FFT' % n)
コード例 #30
0
ファイル: nccllib_test.py プロジェクト: apark263/nccl_wrapper
ndev = 2
devlist = range(ndev)

# Setup the pycuda side
drv.init()
ctxs = [drv.Device(i).retain_primary_context() for i in devlist]

# Setup the communicator object
nc = NCCLComm(devlist)

# Now create gpuarrays for sending/recv buffers
srcs, dsts, size = [], [], 10

# Create some test arrays
for ctx in ctxs:
    ctx.push()
    srcs.append(gpuarray.arange(100, 200, size, dtype='<f4'))
    dsts.append(gpuarray.zeros((size,), dtype='<f4'))
    ctx.pop()

# Perform the reduction
nc.all_reduce(size, srcs, dsts)
nc.sync()

# Look at the results
for c, i, o in zip(ctxs, srcs, dsts):
    c.push()
    print i.get()
    print o.get()
    c.pop()
コード例 #31
0
import pycuda.gpuarray as gpuarray
import pycuda.cumath as cumath
from pycuda.elementwise import ElementwiseKernel
import pycuda as cuda
import pycuda.autoinit
import numpy as np
from time import time

# is_equal = ElementwiseKernel("unsigned int *x, unsigned int *y, bool *z", "z[i] = x[i] == y[i]", "is_equal")
# is_equal = ElementwiseKernel("bool *z", "z[i] = 0", "equality_checker")
eq_checker = ElementwiseKernel("int *x, int *y, int *z", "z[i] = x[i] == y[i]",
                               "equality_checker")
# modulo = ElementwiseKernel("int *x, int *y, int *z", "z[i] = x[i] % y[i]", "modulo")

eq_gpu = gpuarray.to_gpu(np.empty(num_primes, dtype=np.int32))
a_gpu = gpuarray.arange(5, dtype=np.int32)
b_gpu = gpuarray.arange(1, 6, dtype=np.int32)
eq_checker(a_gpu, b_gpu, eq_gpu)

limit = 100  # limit = int(input('Limit: '))
start_time = time()
with open('primes1.txt') as f:
    primes = np.fromiter(map(int,
                             f.read().strip().split(',')),
                         dtype=np.uint32)
p_gpu = gpuarray.to_gpu(primes)

antiprimes = []
most = 0
for x in gpuarray.arange(2, limit + 1, dtype=np.uint32):
    print(x)
コード例 #32
0
ndev = 2
devlist = range(ndev)

# Setup the pycuda side
drv.init()
ctxs = [drv.Device(i).retain_primary_context() for i in devlist]

# Setup the communicator object
nc = NCCLComm(devlist)

# Now create gpuarrays for sending/recv buffers
srcs, dsts, size = [], [], 10

# Create some test arrays
for ctx in ctxs:
    ctx.push()
    srcs.append(gpuarray.arange(100, 200, size, dtype='<f4'))
    dsts.append(gpuarray.zeros((size, ), dtype='<f4'))
    ctx.pop()

# Perform the reduction
nc.all_reduce(size, srcs, dsts)
nc.sync()

# Look at the results
for c, i, o in zip(ctxs, srcs, dsts):
    c.push()
    print i.get()
    print o.get()
    c.pop()
コード例 #33
0
ファイル: test_gpuarray.py プロジェクト: inducer/pycuda
 def test_arange(self):
     a = gpuarray.arange(12, dtype=np.float32)
     assert (np.arange(12, dtype=np.float32) == a.get()).all()
コード例 #34
0
import pycuda.autoinit
import pycuda.gpuarray as gpuarray
from pycuda.elementwise import ElementwiseKernel
from pycuda.curandom import rand as curand
import numpy

n = 1000

sieveKernel = ElementwiseKernel(
    "int *a", "if(i > 1){for(int j = 2; i * j < n; j++){a[i*j] = 0;}}",
    "sieve")

a_gpu = gpuarray.arange(n, dtype=numpy.int32)
#print(a_gpu)

sieveKernel(a_gpu)

#print("-" * 80)
#print(a_gpu)
#print("-" * 80)
print('[ ', end="")
for i in a_gpu.get():
    if i > 1:
        print(i, end=" ")
print("\b]")
コード例 #35
0
ファイル: test_gpuarray.py プロジェクト: rutsky/pycuda
 def test_arange(self):
     a = gpuarray.arange(12, dtype=np.float32)
     assert (np.arange(12, dtype=np.float32) == a.get()).all()
コード例 #36
0
c = 5*a+6*b
e = time()
print 'cpu elapsed time: %f \n' % (e-s)

###################
# 4) map/reduce kernel

print '\n map/reduce kernel\n'
print '--------------------\n'

from pycuda.reduction import ReductionKernel

sz = 7000

# on device
a = gpuarray.arange(sz, dtype=numpy.float32)
b = gpuarray.arange(sz, dtype=numpy.float32)

krnl = ReductionKernel(numpy.float32, neutral="0",
        reduce_expr="a+2*b+b*b", map_expr="x[i] + y[i]",
        arguments="float *x, float *y")

# device perf
s = time()
my_dot_prod = krnl(a, b).get()
e = time()
print 'kernel time: %f' % (e-s)

# on host
a2 = arange(sz, dtype=numpy.float32)
b2 = arange(sz, dtype=numpy.float32)
コード例 #37
0
                   dtype=dtype)

a = gpuarray.to_gpu(h_array)
print('a:\n{0}\nshape={1}\n'.format(a.get(), a.shape))

stream = drv.Stream()
b = gpuarray.to_gpu_async(h_array, stream=stream)
print('b:\n{0}\nshape={1}\n'.format(b.get(), b.shape))

c = gpuarray.empty((100, 100), dtype=dtype)
print('c:\n{0}\nshape={1}\n'.format(c, c.shape))

d = gpuarray.zeros((100, 100), dtype=dtype)
print('d:\n{0}\nshape={1}\n'.format(d, d.shape))

e = gpuarray.arange(0.0, 100.0, 1.0, dtype=dtype)
print('e:\n{0}\nshape={1}\n'.format(e, e.shape))

f = gpuarray.if_positive(e < 50, e - 100, e + 100)
print('f:\n{0}\nshape={1}\n'.format(f, f.shape))

g = gpuarray.if_positive(e < 50, gpuarray.ones_like(e), gpuarray.zeros_like(e))
print('g:\n{0}\nshape={1}\n'.format(g, g.shape))

h = gpuarray.maximum(e, f)
print('h:\n{0}\nshape={1}\n'.format(h, h.shape))

i = gpuarray.minimum(e, f)
print('i:\n{0}\nshape={1}\n'.format(i, i.shape))

g = gpuarray.sum(a)
コード例 #38
0
ファイル: test_gpuarray.py プロジェクト: spatel81/pycuda
 def test_take(self):
     idx = gpuarray.arange(0, 200000, 2, dtype=np.uint32)
     a = gpuarray.arange(0, 600000, 3, dtype=np.float32)
     result = gpuarray.take(a, idx)
     assert ((3 * idx).get() == result.get()).all()
コード例 #39
0
time = int(round(22.0 * xres / yres))
#print time
xlen = time / float(size[0])
#print xlen

#initialize out
out = np.zeros(0)

#rgb aliases
r=0
g=1
b=2

for x in range(xres):
    #float32 degrades quality, but float64 not support by gpus
    t_gpu = gpuarray.arange(x*xlen, x*xlen + xlen, 1./44100, dtype=np.float32)
    tone_gpu = gpuarray.zeros(t_gpu.size, dtype=np.float32)
    print "{0}%".format(round(100.0 * x / xres, 2))
    for y in range(yres):
        p = d[x+xres*y]
        #keep playing with these values
        amplitude = 10**(1-5.25+4.25*(p[r]+p[g]+p[b])/(255*3))
#        print amplitude, math.log(amplitude+1)
#        amplitude = math.log(amplitude+1)# / math.log(255)
#        print x, y, amplitude
        if p[r] > 10 or p[g] > 10 and p[b] > 10:
            tone_gpu += oscillator(t_gpu,
                                   amp = amplitude,
                                   #amp=(p[r]+p[g]+p[b]),
                                   freq=yscale * (yres - y))
    tone_gpu = tone_gpu + 1
コード例 #40
0
# @copyright: https://gitee.com/weili_yzzcq/C-and-C-plus-plus/CUDA_CPlusPlus/
# @copyright: https://github.com/2694048168/C-and-C-plus-plus/CUDA_CPlusPlus/
# @function: pycuda 高级内核函数之 归约内核函数。

import pycuda.gpuarray as gpuarray
import pycuda.driver as drv
import numpy
from pycuda.reduction import ReductionKernel
import pycuda.autoinit

n = 5
start = drv.Event()
end = drv.Event()
start.record()

d_a = gpuarray.arange(n,dtype= numpy.uint32)
d_b = gpuarray.arange(n,dtype= numpy.uint32)

kernel = ReductionKernel(numpy.uint32,neutral="0",reduce_expr="a+b",map_expr="d_a[i]*d_b[i]",arguments="int *d_a,int *d_b")
d_result = kernel(d_a,d_b).get()

end.record()
end.synchronize()
secs = start.time_till(end)*1e-3

print("Vector A")
print(d_a)

print("Vector B")
print(d_b)
コード例 #41
0
def gpu_rfftfreq(n, d=1.0, result=None):

    factor = 1/(d*n)
    result = factor*gpuarray.arange(0, n//2 + 1, dtype=bm.precision.real_t).get()
    return result
コード例 #42
0
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import numpy
from pycuda.reduction import ReductionKernel

vector_length = 400

input_vector_a = gpuarray.arange(vector_length, dtype=numpy.int)
input_vector_b = gpuarray.arange(vector_length, dtype=numpy.int)
dot_product = ReductionKernel(numpy.int,
                       arguments="int *x, int *y",
                       map_expr="x[i]*y[i]",
                       reduce_expr="a+b", neutral="0")

dot_product = dot_product(input_vector_a, input_vector_b).get()

print("INPUT MATRIX A")
print input_vector_a

print("INPUT MATRIX B")
print input_vector_b

print("RESULT DOT PRODUCT OF A * B")
print dot_product

コード例 #43
0
import pycuda.reduction as rd
import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import numpy as np

a = gpuarray.arange(400, dtype=np.float32)
b = gpuarray.arange(400, dtype=np.float32)

krnl = rd.ReductionKernel(np.float32,
                          neutral='0',
                          reduce_expr='a+b',
                          map_expr='x[i]*y[i]',
                          arguments='float *x, float *y')

my_dot_prod = krnl(a, b).get()

print my_dot_prod

print np.sum(np.arange(400)**2)
コード例 #44
0
def compute_pi(n):
    h = 1.0 / n
    x = h * (ga.arange(1, n, dtype=np.float32) + 0.5)
    s = ga.sum(4.0 / (1.0 + x**2), dtype=np.float32)
    return s.get() * h
コード例 #45
0
ファイル: test_gpuarray.py プロジェクト: leifdenby/pycuda
 def test_take(self):
     idx = gpuarray.arange(0, 200000, 2, dtype=np.uint32)
     a = gpuarray.arange(0, 600000, 3, dtype=np.float32)
     result = gpuarray.take(a, idx)
     assert ((3*idx).get() == result.get()).all()
コード例 #46
0
# Operações de MapReduce em Paralelo na GPU

# Pacotes
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import numpy
from pycuda.reduction import ReductionKernel

# Comprimento do vetor
vector_length = 400

# Vetores A e B
input_vector_a = gpuarray.arange(vector_length, dtype=numpy.int)
input_vector_b = gpuarray.arange(vector_length, dtype=numpy.int)

# Operação de redução em paralelo
dot_product = ReductionKernel(numpy.int,
                              arguments="int *x, int *y",
                              map_expr="x[i]*y[i]",
                              reduce_expr="a+b",
                              neutral="0")

# Execução do kernel
dot_product = dot_product(input_vector_a, input_vector_b).get()

# Imprime os resultados
print("Matriz A")
print(input_vector_a)

print("Matriz B")
print(input_vector_b)
コード例 #47
0
ファイル: vector_expr.py プロジェクト: gimac/hedge
        if stats_callback is not None:
            stats_callback(size,  self,
                    kernel_rec.kernel.prepared_timed_call(vectors[0]._grid, results[0]._block, *args))
        else:
            kernel_rec.kernel.prepared_async_call(vectors[0]._grid, results[0]._block, self.stream, *args)

        return results




if __name__ == "__main__":
    test_dtype = numpy.float32

    import pycuda.autoinit
    from pymbolic import parse
    expr = parse("2*x+3*y+4*z")
    print expr
    cexpr = CompiledVectorExpression(expr,
            lambda expr: (True, test_dtype),
            test_dtype)

    from pymbolic import var
    ctx = {
        var("x"): gpuarray.arange(5, dtype=test_dtype),
        var("y"): gpuarray.arange(5, dtype=test_dtype),
        var("z"): gpuarray.arange(5, dtype=test_dtype),
        }

    print cexpr(lambda expr: ctx[expr])
コード例 #48
0
ファイル: pycudareduction.py プロジェクト: benlansdell/hydra
from pycuda.reduction import ReductionKernel
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
import numpy

a = gpuarray.arange(400, dtype=numpy.float32)
b = gpuarray.arange(400, dtype=numpy.float32)

dot = ReductionKernel(dtype_out=numpy.float32, neutral="0",reduce_expr="a+b", map_expr="x[i]*y[i]",arguments="const float *x, const float *y")

a_dot_b = dot(a, b).get()
a_dot_b_cpu = numpy.dot(a.get(), b.get())
コード例 #49
0
ファイル: Context.py プロジェクト: EelcoHoogendoorn/ThreadPy
 def arange(self, *args, **kwargs):
     return gpuarray.arange(*args, **kwargs)