Exemple #1
0
    def _gen_func(self):
        n_dim = self.system.n_dim
        nb_float = float64
        if self.system.dtype == np.dtype(np.float32):
            nb_float = float32

        @cuda.jit(int32(nb_float[:], nb_float[:], int32[:]), device=True)
        def cu_cell_index(x, box, ibox):
            ret = 0
            n_cell = 1
            for i in range(0, x.shape[0]):
                tmp = x[i] / box[i]
                if tmp < -0.5 or tmp > 0.5:
                    return -1
                ret = ret + floor((tmp + 0.5) * ibox[i]) * n_cell
                n_cell = n_cell * ibox[i]
            return ret

        @cuda.jit(void(int32[:], int32[:], int32[:, :]))
        def cu_cell_map(ibox, dim, ret):
            cell_i = cuda.grid(1)
            if cell_i >= ret.shape[0]:
                return
            cell_vec_i = cuda.local.array(n_dim, int32)
            cell_vec_j = cuda.local.array(n_dim, int32)
            cu_unravel_index_f(cell_i, ibox, cell_vec_i)
            for j in range(ret.shape[1]):
                cu_unravel_index_f(j, dim, cell_vec_j)
                for k in range(n_dim):
                    cell_vec_j[k] = cell_vec_i[k] + cell_vec_j[k] - 1
                cell_j = cu_ravel_index_f_pbc(cell_vec_j, ibox)
                ret[cell_i, j] = cell_j

        @cuda.jit(
            void(nb_float[:, :], nb_float[:], int32[:], int32[:, :], int32[:],
                 int32[:], int32[:], nb_float[:]))
        def cu_cell_list(x, box, ibox, cell_list, cell_counts, cells, cell_max,
                         out_of_box):
            pi = cuda.grid(1)
            if pi >= x.shape[0]:
                return
            # xi = cuda.local.array(ndim, dtype=float64)
            # for k in range(ndim):
            # xi[k] = x[pi, k]
            xi = x[pi]
            ic = cu_cell_index(xi, box, ibox)
            if ic == -1:
                out_of_box[0] = nb_float(pi)
                for d in range(n_dim):
                    out_of_box[d + 1] = x[pi, d]
            cells[pi] = ic
            index = cuda.atomic.add(cell_counts, ic, 1)
            if index < cell_list.shape[0]:
                cell_list[ic, index] = pi
            else:
                cuda.atomic.max(cell_max, 0, index + 1)

        return cu_cell_map, cu_cell_list
Exemple #2
0
    def compile(cls):
        if cls.is_compiled():
            return cls._compiled

        if cls.state_dtype is None:
            cls.state_ntype = numba.void
        else:
            cls.state_ntype = numba.typeof(cls.state_dtype).dtype

        solution_state_ntype = numba.types.Array(
            cls.Optimizer.Problem.state_ntype, 2, 'C')
        solution_losses_ntype = numba.types.Array(Compiler.loss_ntype, 2, 'C')
        query_vector_ntype = numba.types.Array(numba.int32, 1, 'C')

        # Shoud match the signature of cls.schedule_work(...)
        schedule_work_ret_type = numba.types.Tuple((numba.int32, numba.int32))
        schedule_work_signature = schedule_work_ret_type(
            query_vector_ntype, cls.state_ntype, solution_state_ntype,
            solution_losses_ntype, numba.int32)

        # Should match the signature of cls.shuffle(...)
        shuffle_signature = numba.void(cls.state_ntype, solution_state_ntype,
                                       solution_losses_ntype)

        init_signature = numba.void(cls.state_ntype, query_vector_ntype)

        final_result_signature = numba.int32(cls.state_ntype,
                                             solution_state_ntype,
                                             solution_losses_ntype,
                                             numba.int32)

        allocator = Compiler.generate_allocator(cls.__name__, cls.state_dtype)

        compiled_schedule_work = Compiler.jit(cls.__name__, 'schedule_work',
                                              schedule_work_signature,
                                              cls.schedule_work)

        compiled_shuffle = Compiler.jit(cls.__name__, 'shuffle',
                                        shuffle_signature, cls.shuffle)

        compiled_init = Compiler.jit(cls.__name__, 'init', init_signature,
                                     cls.init)

        compiled_final_result = Compiler.jit(cls.__name__, 'final_result',
                                             final_result_signature,
                                             cls.final_result)

        cls._compiled = SimpleNamespace(schedule_work=compiled_schedule_work,
                                        shuffle=compiled_shuffle,
                                        allocator=allocator,
                                        init=compiled_init,
                                        final_result=compiled_final_result)

        return cls._compiled
Exemple #3
0
def _gen_func(dtype, n_dim):
    float = float64
    if dtype == np.dtype(np.float32):
        float = float32

    @cuda.jit(int32(float[:], float[:], int32[:]), device=True)
    def cu_cell_index(x, box, ibox):
        ret = floor((x[0] / box[0] + 0.5) * ibox[0])
        n_cell = ibox[0]
        for i in range(1, x.shape[0]):
            ret = ret + floor((x[i] / box[i] + 0.5) * ibox[i]) * n_cell
            n_cell = n_cell * ibox[i]
        return ret

    @cuda.jit(void(int32[:], int32[:], int32[:, :]))
    def cu_cell_map(ibox, dim, ret):
        cell_i = cuda.grid(1)
        if cell_i >= ret.shape[0]:
            return
        cell_vec_i = cuda.local.array(n_dim, int32)
        cell_vec_j = cuda.local.array(n_dim, int32)
        cu_unravel_index_f(cell_i, ibox, cell_vec_i)
        for j in range(ret.shape[1]):
            cu_unravel_index_f(j, dim, cell_vec_j)
            for k in range(n_dim):
                cell_vec_j[k] = cell_vec_i[k] + cell_vec_j[k] - 1
            cell_j = cu_ravel_index_f_pbc(cell_vec_j, ibox)
            ret[cell_i, j] = cell_j

    @cuda.jit(
        void(float[:, :], float[:], int32[:], float[:, :, :], int32[:],
             int32[:], int32[:]))
    def cu_cell_list(x, box, ibox, cell_list, cell_counts, cells, cell_max):
        pi = cuda.grid(1)
        if pi >= x.shape[0]:
            return
        # xi = cuda.local.array(ndim, dtype=float64)
        # for k in range(ndim):
        # xi[k] = x[pi, k]
        xi = x[pi]
        ic = cu_cell_index(xi, box, ibox)
        cells[pi] = ic
        index = cuda.atomic.add(cell_counts, ic, 1)
        if index < cell_list.shape[0]:
            for k in range(n_dim):
                cell_list[ic, index, k] = xi[k]
            cell_list[ic, index, n_dim] = float(pi)
            #cell_list_index[ic, index] = pi
        else:
            cuda.atomic.max(cell_max, 0, index + 1)

    return cu_cell_index, cu_cell_map, cu_cell_list
Exemple #4
0
    def test_nopython_flag(self):

        def foo(A, B):
            pass

        # nopython = True is fine
        guvectorize([void(float32[:], float32[:])], '(x)->(x)', target='cuda',
                    nopython=True)(foo)

        # nopython = False is bad
        with self.assertRaises(TypeError) as raises:
            guvectorize([void(float32[:], float32[:])], '(x)->(x)',
                        target='cuda', nopython=False)(foo)
        self.assertEqual("nopython flag must be True", str(raises.exception))
Exemple #5
0
    def test_nopython_flag(self):

        def foo(A, B):
            pass

        # nopython = True is fine
        guvectorize([void(float32[:], float32[:])], '(x)->(x)', target='cuda',
                    nopython=True)(foo)

        # nopython = False is bad
        with self.assertRaises(TypeError) as raises:
            guvectorize([void(float32[:], float32[:])], '(x)->(x)',
                        target='cuda', nopython=False)(foo)
        self.assertEqual("nopython flag must be True", str(raises.exception))
Exemple #6
0
    def test_get_regs_per_thread_unspecialized(self):
        # A kernel where the register usage per thread is likely to differ
        # between different specializations
        @cuda.jit
        def pi_sin_array(x, n):
            i = cuda.grid(1)
            if i < n:
                x[i] = 3.14 * math.sin(x[i])

        # Call the kernel with different arguments to create two different
        # definitions within the Dispatcher object
        N = 10
        arr_f32 = np.zeros(N, dtype=np.float32)
        arr_f64 = np.zeros(N, dtype=np.float64)

        pi_sin_array[1, N](arr_f32, N)
        pi_sin_array[1, N](arr_f64, N)

        # Check we get a positive integer for the two different variations
        sig_f32 = void(float32[::1], int64)
        sig_f64 = void(float64[::1], int64)
        regs_per_thread_f32 = pi_sin_array.get_regs_per_thread(sig_f32)
        regs_per_thread_f64 = pi_sin_array.get_regs_per_thread(sig_f64)

        self.assertIsInstance(regs_per_thread_f32, int)
        self.assertIsInstance(regs_per_thread_f64, int)

        self.assertGreater(regs_per_thread_f32, 0)
        self.assertGreater(regs_per_thread_f64, 0)

        # Check that getting the registers per thread for all signatures
        # provides the same values as getting the registers per thread for
        # individual signatures. Note that the returned dict is indexed by
        # (cc, argtypes) pairs (in keeping with definitions, ptx, LLVM IR,
        # etc.)
        regs_per_thread_all = pi_sin_array.get_regs_per_thread()
        cc = cuda.current_context().device.compute_capability
        self.assertEqual(regs_per_thread_all[cc, sig_f32.args],
                         regs_per_thread_f32)
        self.assertEqual(regs_per_thread_all[cc, sig_f64.args],
                         regs_per_thread_f64)

        if regs_per_thread_f32 == regs_per_thread_f64:
            # If the register usage is the same for both variants, there may be
            # a bug, but this may also be an artifact of the compiler / driver
            # / device combination, so produce an informational message only.
            print('f32 and f64 variant thread usages are equal.')
            print('This may warrant some investigation. Devices:')
            cuda.detect()
Exemple #7
0
class MathsMethods:
    @staticmethod
    @numba.njit([void(float64[:], float64[:]),
                 void(float64[:, :], float64[:, :]),
                 void(int64[:, :], int64[:, :]),
                 void(int64[:], int64[:]),
                 void(float64[:, :], int64[:, :]),
                 void(float64[:], float64)],
                **{**conf.JIT_FLAGS, **{'parallel': False}})
    def add(output, addend):
        output += addend

    @staticmethod
    @numba.njit(void(int64[:, :], int64[:]), **conf.JIT_FLAGS)
    def row_modulo(output, divisor):
        for d in range(len(divisor)):
            for i in prange(output.shape[1]):
                output[d, i] %= divisor[d]

    @staticmethod
    @numba.njit(void(float64[:]), **conf.JIT_FLAGS)
    def floor(output):
        output[:] = np.floor(output)

    @staticmethod
    @numba.njit(void(int64[:, :], float64[:, :]), **conf.JIT_FLAGS)
    def floor_out_of_place(output, input_data):
        output[:] = np.floor(input_data)

    @staticmethod
    @numba.njit(**{**conf.JIT_FLAGS, **{'parallel': False}})
    def multiply(output, multiplier):
        output *= multiplier

    @staticmethod
    @numba.njit(**conf.JIT_FLAGS)
    def multiply_out_of_place(output, multiplicand, multiplier):
        output[:] = multiplicand * multiplier

    @staticmethod
    @numba.njit(**conf.JIT_FLAGS)
    def power(output, exponent):
        output[:] = np.power(output, exponent)

    @staticmethod
    @numba.njit(**{**conf.JIT_FLAGS, **{'parallel': False}})
    def subtract(output, subtrahend):
        output[:] -= subtrahend[:]

    @staticmethod
    # @numba.njit(void(float64[:]), **conf.JIT_FLAGS)
    def urand(output, seed=None):
        np.random.seed(seed)
        output.data[:] = np.random.uniform(0, 1, output.shape)
Exemple #8
0
class Arm(ComponentBase):

    def __init__(
            self, name, **kwargs):
        """
        Initialize this Arm component.

        Parameters:
        name (str): the name of this component
        """
        super().__init__(__class__, **kwargs)

        self.name = name
        self.propagate_params = ()

        # Aim a neutron at this arm to cause JIT compilation.
        import mcni
        neutrons = mcni.neutron_buffer(1)
        neutrons[0] = mcni.neutron(r=(0, 0, -1), v=(0, 0, 1), prob=1, time=0)
        self.process(neutrons)

    @cuda.jit(
        void(
            NB_FLOAT[:]
        ), device=True
    )
    def propagate(
            in_neutron
    ):
        pass
Exemple #9
0
 def default_signature(self, method, ext_type):
     if method.name == "__init__":
         argtypes = [numba.object_] * (method.py_func.__code__.co_argcount - 1)
         default_signature = numba.void(*argtypes)
         return default_signature
     else:
         return super(JitMethodMaker, self).default_signature(method, ext_type)
Exemple #10
0
 def guvectorize_compute(target: str, *, cache: bool = True):
     return nb.guvectorize(
         [nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
         '(m, p),(p),()->(m)',
         nopython=True,
         target=target,
         cache=cache)
Exemple #11
0
def makeWorker():
    savethread = pythonapi.PyEval_SaveThread
    savethread.argtypes = []
    savethread.restype = c_void_p

    restorethread = pythonapi.PyEval_RestoreThread
    restorethread.argtypes = [c_void_p]
    restorethread.restype = None

    def worker(p, q, R, job):
        threadstate = savethread()
    
        nQ = q.shape[1]
        for i in xrange(job[0], job[1]):
            for j in xrange(nQ):
                rx = p[i, 0] - q[0, j]
                ry = p[i, 1] - q[1, j]
                rz = p[i, 2] - q[2, j]

                R[i, j] = 1 / (1 + sqrt(rx * rx + ry * ry + rz * rz))
    
        restorethread(threadstate)
        
    signature = void(double[:, :], double[:, :], double[:, :], int64[:])
    worker_ext = jit(signature, nopython=True)(worker)
    
    return worker_ext
Exemple #12
0
def test_vector():
    def fun(s, x, p, out):
        out[0] = s[0] + x[0]
        out[1] = s[1] + x[1]

    s = numpy.random.random((2, ))
    x = numpy.random.random((2, ))
    p = numpy.random.random((2, ))
    out = numpy.zeros((2, ))
    out1 = numpy.zeros((2, ))

    from numba import guvectorize, float64, void
    gfun = guvectorize(
        ftylist=[void(float64[:], float64[:], float64[:], float64[:])],
        signature='(n),(n),(n)->(n)')(fun)

    sfun = standard_function(gfun, 2)

    fun(s, x, p, out)
    sfun(s, x, p, out=out1)

    out2 = sfun(s, x, p)

    out = sfun(s, x, p, diff=True)

    print("OUT")
    print(out)
Exemple #13
0
    def force_functions(self, funcs):  # general pair cases
        # @cuda.jit("void(float64[:], float64[:], float64[:], float64[:], float64[:,:])", device=True)
        # def func(a, b, param, forces):
        #    pass
        nb_float = float64
        if self.system.dtype == np.dtype(np.float32):
            nb_float = float32
        kernels = []
        cu_pbc_dist2 = self.nlist.dist_funcs['cu_pbc_dist2']
        for f in funcs:

            @cuda.jit(
                void(nb_float[:, :], nb_float[:], int32[:], int32[:],
                     nb_float[:], int32[:], int32, nb_float[:]))
            def _f(x, box, nl, nc, params, typeid, n_types, forces):
                i = cuda.grid(1)
                if i >= x.shape[0]:
                    return
                xi = x[i]
                ti = typeid[i]
                for k in range(nc[i]):
                    j = nl[i, k]
                    tj = typeid[j]
                    dij2 = cu_pbc_dist2(xi, x[j], box)
                    f(dij2, box, params[ti * n_types + tj], forces)

            kernels.append(_f)
        return kernels
Exemple #14
0
    def test_gufunc(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Exemple #15
0
    def test_invalid_array_type(self):
        rgx = ".*Cannot infer the type of variable 'arr'.*"

        def unsupported_type():
            arr = cuda.shared.array(10, dtype=np.dtype('O'))  # noqa: F841

        with self.assertRaisesRegex(TypingError, rgx):
            cuda.jit(void())(unsupported_type)

        rgx = ".*Invalid NumPy dtype specified: 'int33'.*"

        def invalid_string_type():
            arr = cuda.shared.array(10, dtype='int33')  # noqa: F841

        with self.assertRaisesRegex(TypingError, rgx):
            cuda.jit(void())(invalid_string_type)
Exemple #16
0
    def test_for_pre(self):
        """Test issue with loop not running due to bad sign-extension at the for loop
        precondition.
        """
        @cuda.jit(void(float32[:, :], float32[:, :], float32[:]))
        def diagproduct(c, a, b):
            startX, startY = cuda.grid(2)
            gridX = cuda.gridDim.x * cuda.blockDim.x
            gridY = cuda.gridDim.y * cuda.blockDim.y
            height = c.shape[0]
            width = c.shape[1]

            for x in range(startX, width, (gridX)):
                for y in range(startY, height, (gridY)):
                    c[y, x] = a[y, x] * b[x]

        N = 8

        A, B = generate_input(N)

        F = np.empty(A.shape, dtype=A.dtype)

        blockdim = (32, 8)
        griddim = (1, 1)

        dA = cuda.to_device(A)
        dB = cuda.to_device(B)
        dF = cuda.to_device(F, copy=False)
        diagproduct[griddim, blockdim](dF, dA, dB)

        E = np.dot(A, np.diag(B))
        np.testing.assert_array_almost_equal(dF.copy_to_host(), E)
Exemple #17
0
class Wavelength_monitor(base):

    def __init__(
            self, name,
            xmin=0., xmax=0., ymin=0., ymax=0.,
            xwidth=0., yheight=0.,
            Lmin=0., Lmax=10., nchan=200,
            filename = "IL.h5",
            **kwargs
    ):
        self.name = name
        self.filename = filename
        if xwidth > 0:
            xmax = xwidth/2; xmin = -xmax
        if yheight > 0:
            ymax = yheight/2; ymin = -ymax
        assert xmin < xmax
        assert ymin < ymax
        dL = (Lmax-Lmin)/nchan
        self.L_centers = np.arange(Lmin+dL/2, Lmax, dL)
        self.out = np.zeros((3,nchan))
        self.out_N = self.out[0]
        self.out_p = self.out[1]
        self.out_p2 = self.out[2]
        self.propagate_params = (
            np.array([xmin, xmax, ymin, ymax, Lmin, Lmax]),
            nchan, self.out
        )

    def getHistogram(self, scale_factor=1.):
        import histogram as H
        axes = [('wavelength', self.L_centers, 'angstrom')]
        return H.histogram(
            'I(wavelength)', axes,
            data=self.out_p*scale_factor,
            errors=self.out_p2*scale_factor*scale_factor)

    @cuda.jit(
        void(NB_FLOAT[:], NB_FLOAT[:], int64, NB_FLOAT[:, :]),
        device=True)
    def propagate(neutron, limits, nchan, out):
        xmin, xmax, ymin, ymax, Lmin, Lmax = limits
        t0 = neutron[-2]
        x,y,z, t = prop_z0(neutron)
        if t0>t:
            return
        p = neutron[-1]
        vx,vy,vz = neutron[3:6]
        #
        if x<=xmin or x>=xmax or y<=ymin or y>=ymax:
            return
        v = math.sqrt(vx*vx+vy*vy+vz*vz)
        L = 2*math.pi/(v*V2K)
        if L<=Lmin or L>=Lmax:
            return
        iL = int(math.floor( (L-Lmin)/(Lmax-Lmin)*nchan ))
        cuda.atomic.add(out, ( 0, iL ), 1)
        cuda.atomic.add(out, ( 1, iL ), p)
        cuda.atomic.add(out, ( 2, iL ), p*p)
        return
Exemple #18
0
    def test_gufunc(self):
        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 1001  # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
    def test_guvectorize_decor(self):
        gufunc = guvectorize(
            [void(float32[:, :], float32[:, :], float32[:, :])],
            '(m,n),(n,p)->(m,p)',
            target=self.target)(matmulcore)

        self.check_matmul_gufunc(gufunc)
Exemple #20
0
def apply_factory(fit_intercept, val):

    jitdec = jit(
        void(get_type(State)),
        nopython=NOPYTHON,
        nogil=NOGIL,
        boundscheck=BOUNDSCHECK,
        fastmath=FASTMATH,
    )

    if fit_intercept:

        @jitdec
        def apply(state):
            weights = state.weights
            for i in range(weights.shape[0]):
                weights[i] = val

    else:

        @jitdec
        def apply(state):
            weights = state.weights
            for i in range(1, weights.shape[0]):
                weights[i] = val

    return apply
Exemple #21
0
 def default_signature(self, method, ext_type):
     if method.name == '__init__':
         default_signature = numba.void(*self.argtypes)
         return default_signature
     else:
         return super(AutojitMethodMaker, self).default_signature(
             method, ext_type)
Exemple #22
0
    def find_active_loop(self):
        """ Build a numba compiled loop to find active nodes """

        if self.__p_find_active_loop is None:

            def __find_active_loop(active, active_nod, nod):
                """ Compiled loop to find active nodes """

                for i in range(active.shape[0]):
                    old_found = 0
                    nodi = nod[i]
                    activei = active[i]
                    active_nodi = active_nod[i]

                    # Iterate while there are changes
                    while True:
                        new_active = nodi[active_nodi].reshape(-1)
                        found = new_active.shape[0]

                        if found == old_found:
                            break

                        activei[new_active] = True
                        old_found = found

            # Compile
            self.__p_find_active_loop = nb.njit(
                [nb.void(nb.b1[:, :], nb.b1[:, :], nb.i8[:, :, :])],
                nogil=True)(__find_active_loop)

        return self.__p_find_active_loop
    def test_gufunc_small(self):
        @guvectorize(
            [void(float32[:, :], float32[:, :], float32[:, :])],
            "(m,n),(n,p)->(m,p)",
            target="cuda",
        )
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
def test_vector():

    def fun(s,x,p,out):
        out[0] = s[0] + x[0]
        out[1] = s[1] + x[1]

    s = numpy.random.random((2,))
    x = numpy.random.random((2,))
    p = numpy.random.random((2,))
    out = numpy.zeros((2,))
    out1 = numpy.zeros((2,))



    from numba import guvectorize, float64, void
    gfun = guvectorize(ftylist=[void(float64[:],float64[:],float64[:],float64[:])],
                        signature='(n),(n),(n)->(n)')(fun)

    sfun = standard_function(gfun,2)


    fun(s,x,p,out)
    sfun(s,x,p,out=out1)

    out2 = sfun(s,x,p)

    out = sfun(s,x,p,diff=True)

    print("OUT")
    print(out)
Exemple #25
0
def make_traverse_graph_via_bfs(callback, result_nb_type):
    """
        callback(vertex, incident_edge, result) - callback function
    """
    @jit(void(int32, planar_graph_nb_type, boolean[:], result_nb_type),
         nopython=True)
    def traverse_graph_via_bfs(start_vertex, graph, used_vertex_flags, result):

        queue = Queue()
        queue.append(start_vertex)

        used_vertex_flags[start_vertex] = True

        while not queue.is_empty():

            vertex = queue.popleft()

            for incident_edge_index in graph.get_incident_edge_indices(vertex):

                adjacent_vertex = graph.edges.get_opposite_vertex(
                    incident_edge_index, vertex)

                if not used_vertex_flags[adjacent_vertex]:

                    callback(vertex, graph.edges, incident_edge_index, result)
                    used_vertex_flags[adjacent_vertex] = True
                    queue.append(adjacent_vertex)

    return traverse_graph_via_bfs
Exemple #26
0
def _process_method_signatures(class_dict, ext_type):
    """
    Process all method signatures:

        * Verify signatures
        * Populate ext_type with method signatures (ExtMethodType)
    """
    for method_name, method in class_dict.iteritems():
        default_signature = None
        if (method_name == '__init__' and
                isinstance(method, types.FunctionType)):
            if inspect.getargspec(method).args:
                warnings.warn(
                    "Constructor for class '%s' has no signature, "
                    "assuming arguments have type 'object'" %
                                        ext_type.py_class.__name__)
            argtypes = [numba.object_] * (method.__code__.co_argcount - 1)
            default_signature = numba.void(*argtypes)

        method, restype, argtypes = _process_signature(ext_type, method,
                                                       default_signature)
        if method is None:
            continue

        signature = typesystem.ExtMethodType(
                    return_type=restype, args=argtypes, name=method.name,
                    is_class=method.is_class, is_static=method.is_static)
        ext_type.add_method(method_name, signature)
        class_dict[method_name] = method
Exemple #27
0
    def test_gufunc_auto_transfer(self):
        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        dB = cuda.to_device(B)

        C = gufunc(A, dB).copy_to_host()
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Exemple #28
0
    def test_struct_model_type_static(self):
        nthreads = 64

        @cuda.jit(void(int32[::1], int32[::1]))
        def write_then_reverse_read_static(outx, outy):
            # Test creation
            arr = cuda.shared.array(nthreads, dtype=test_struct_model_type)

            i = cuda.grid(1)
            ri = nthreads - i - 1

            if i < len(outx) and i < len(outy):
                # Test set to arr
                obj = TestStruct(int32(i), int32(i * 2))
                arr[i] = obj

                cuda.syncthreads()
                # Test get from arr
                outx[i] = arr[ri].x
                outy[i] = arr[ri].y

        arrx = np.zeros((nthreads, ), dtype="int32")
        arry = np.zeros((nthreads, ), dtype="int32")

        write_then_reverse_read_static[1, nthreads](arrx, arry)

        for i, x in enumerate(arrx):
            self.assertEqual(x, nthreads - i - 1)
        for i, y in enumerate(arry):
            self.assertEqual(y, (nthreads - i - 1) * 2)
Exemple #29
0
    def inner(func):
        func_name = func.__name__
        sig = numba.void(
            numba.types.CPointer(numba.boolean)
            if result_type is types.BOOL
            else numba.types.CPointer(arg_type.numba_t),
            numba.types.CPointer(arg_type.numba_t),
            numba.types.CPointer(arg_type.numba_t),
        )
        jitfunc = numba.jit(func, nopython=True)

        @numba.cfunc(sig, nopython=True)
        def wrapper(z, x, y):  # pragma: no cover
            result = jitfunc(x[0], y[0])
            z[0] = result

        out = ffi.new("GrB_BinaryOp*")
        lib.GrB_BinaryOp_new(
            out,
            ffi.cast("GxB_binary_function", wrapper.address),
            result_type.gb_type,
            arg_type.gb_type,
            arg_type.gb_type,
        )

        return BinaryOp(func_name, arg_type.__name__, out[0])
Exemple #30
0
    def test_gufunc_new_axis(self):
        @guvectorize([void(float64[:, :], float64[:, :], float64[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore

        X = np.random.randn(10, 3, 3)
        Y = np.random.randn(3, 3)

        gold = ut.matrix_multiply(X, Y)

        res1 = gufunc(X, Y)
        np.testing.assert_allclose(gold, res1)

        res2 = gufunc(X, np.tile(Y, (10, 1, 1)))
        np.testing.assert_allclose(gold, res2)
Exemple #31
0
    def test_gufunc_auto_transfer(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        dB = cuda.to_device(B)

        C = gufunc(A, dB).copy_to_host()
        Gold = ut.matrix_multiply(A, B)
        self.assertTrue(np.allclose(C, Gold))
Exemple #32
0
    def test_gufunc_small(self):
        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4,
                      dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5,
                      dtype=np.float32).reshape(matrix_ct, 4, 5)

        ts = time()
        C = gufunc(A, B)
        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        non_stream_speedups.append(tcpu / tcuda)

        print(C, Gold)

        self.assertTrue(np.allclose(C, Gold))
Exemple #33
0
    def test_gufunc_new_axis(self):

        @guvectorize([void(float64[:, :], float64[:, :], float64[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore

        X = np.random.randn(10, 3, 3)
        Y = np.random.randn(3, 3)

        gold = ut.matrix_multiply(X, Y)

        res1 = gufunc(X, Y)
        np.testing.assert_allclose(gold, res1)

        res2 = gufunc(X, np.tile(Y, (10, 1, 1)))
        np.testing.assert_allclose(gold, res2)
def _numba_lombscargle_signature(ty):
    return void(
        ty[::1],
        ty[::1],
        ty[::1],
        ty[::1],
        ty[::1],  # x  # y  # freqs  # pgram  # y_dot
    )
Exemple #35
0
    def test_type_with_struct_data_model(self):
        @cuda.jit(void(test_struct_model_type[::1]))
        def f(x):
            l = cuda.local.array(10, dtype=test_struct_model_type)
            l[0] = x[0]
            x[0] = l[0]

        self.check_dtype(f, test_struct_model_type)
Exemple #36
0
 def default_signature(self, method, ext_type):
     if method.name == '__init__':
         argtypes = [numba.object_] * (method.py_func.__code__.co_argcount - 1)
         default_signature = numba.void(*argtypes)
         return default_signature
     else:
         return super(JitMethodMaker, self).default_signature(
             method, ext_type)
Exemple #37
0
    def test_cuda_kernel(self):
        arr = cuda.to_device(np.array([1.]))

        @cuda.jit(void(f8[:]))
        def k1(a):
            a[0] += 2

        @cuda.jit(void(f8[:]))
        def k2(a):
            a[0] *= 3

        n1 = KernelNode(k1, [arr], [])
        n2 = KernelNode(k2, [arr], [n1])
        n2.build().launch()
        cuda.synchronize()

        self.assertTrue(np.all(arr.copy_to_host() == [9]))
def make_inner_func( fn, *args ):
    signature = void( *args )
    @jit(signature)
    def inner_func( result, x ):
        threadstate = savethread()
        for i in range(len(result)):
            result[i] = fn( x[i] )
        restorethread(threadstate)
    return inner_func
Exemple #39
0
    def test_duplicated_output(self):
        @guvectorize([void(float32[:], float32[:])], '(x)->(x)', target='cuda')
        def foo(inp, out):
            pass  # intentionally empty; never executed

        inp = out = np.zeros(10, dtype=np.float32)
        with self.assertRaises(ValueError) as raises:
            foo(inp, out, out=out)
        self.assertEqual(str(raises.exception),
            "cannot specify 'out' as both a positional and keyword argument")
Exemple #40
0
def cf_from_source(source, func_globals):
    "Render the SSA graph given python source code"
    from numba import pipeline
    from numba import environment

    mod = ast.parse(source)
    func_ast = mod.body[0]

    env = environment.NumbaEnvironment.get_environment()
    func_env, _ = pipeline.run_pipeline2(
        env, None, func_ast, void(), pipeline_name="cf", function_globals=dict(func_globals)
    )
    return func_env.symtab, func_env.flow  # func_env.cfg
Exemple #41
0
    def test_copy_odd(self):

        @guvectorize([void(float32[:], float32[:])],
                     '(x)->(x)',
                     target='cuda')
        def copy(A, B):
            for i in range(B.size):
                B[i] = A[i]

        A = np.arange(11, dtype=np.float32) + 1
        B = np.zeros_like(A)
        copy(A, out=B)
        self.assertTrue(np.allclose(A, B))
Exemple #42
0
    def test_copy2d(self):

        @guvectorize([void(float32[:, :], float32[:, :])],
                     '(x, y)->(x, y)',
                     target='cuda')
        def copy2d(A, B):
            for x in range(B.shape[0]):
                for y in range(B.shape[1]):
                    B[x, y] = A[x, y]

        A = np.arange(30, dtype=np.float32).reshape(5, 6) + 1
        B = np.zeros_like(A)
        copy2d(A, out=B)
        self.assertTrue(np.allclose(A, B))
Exemple #43
0
    def test_invalid_flags(self):
        # Check invalid flags
        def foo(A, B):
            pass

        with self.assertRaises(TypeError) as raises:
            guvectorize([void(float32[:], float32[:])], '(x)->(x)',
                        target='cuda', what1=True, ever2=False)(foo)
        head = "The following target options are not supported:"
        msg = str(raises.exception)
        self.assertEqual(msg[:len(head)], head)
        items = msg[len(head):].strip().split(',')
        items = [i.strip("'\" ") for i in items]
        self.assertEqual(set(['what1', 'ever2']), set(items))
Exemple #44
0
def test_uint_int_div_ary():
    NPATHS = 10
    normdist = np.empty(1000) #np.random.normal(0., 1., 1000)
    seed = np.arange(0x80000000, 0x80000000 + NPATHS, dtype=np.uint32)

    gold = np.empty(NPATHS, dtype=np.int32)
    got = gold.copy()
    uint_int_div_ary(gold, normdist, seed)
    print('expect %s' % gold)
    sig = void(uint32[:], double[:], uint32[:])
    numba_func = jit(sig)(uint_int_div_ary)
    numba_func(got, normdist, seed)
    print('got %s' % got)

    assert all(gold == got)
    def test_cpu_guvectorize(self):
        target = 'cpu'

        gufunc = guvectorize([void(float32[:,:], float32[:,:], float32[:,:])],
                             '(m,n),(n,p)->(m,p)',
                             target=target)(matmulcore)

        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2, 4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4, 5)

        C = gufunc(A, B)
        Gold = ut.matrix_multiply(A, B)

        self.assertTrue(np.allclose(C, Gold))
Exemple #46
0
    def test_issue_734(self):
        from numba import jit, void, int32, double

        @jit(void(int32, double[:]), forceobj=True)
        def forloop_with_if(u, a):
            if u == 0:
                for i in range(a.shape[0]):
                    a[i] = a[i] * 2.0
            else:
                for i in range(a.shape[0]):
                    a[i] = a[i] + 1.0

        for u in (0, 1):
            nb_a = np.arange(10, dtype='int32')
            np_a = np.arange(10, dtype='int32')
            forloop_with_if(u, nb_a)
            forloop_with_if.py_func(u, np_a)
            self.assertTrue(np.all(nb_a == np_a))
Exemple #47
0
    def cls_wrapper(cls):
        class_type = cls.class_type
        instance_type = class_type.instance_type
        jitmethods = class_type.jitmethods

        # don't make decisions about what to do with NULL values for users
        step_signature = void(instance_type, *signature.args)
        jitmethods['step'].compile(step_signature)

        # aggregates can always return a NULL value
        finalize_signature = signature.return_type(instance_type)
        jitmethods['finalize'].compile(finalize_signature)

        func_name = camel_to_snake(cls.__name__)
        step_name = '{}_step'.format(func_name)
        finalize_name = '{}_finalize'.format(func_name)

        step_mod = gen_step(cls, step_name)
        finalize_mod = gen_finalize(cls, finalize_name)

        genmod = ast.Module(body=step_mod.body + finalize_mod.body)

        mod = ast.fix_missing_locations(genmod)

        code = compile(mod, __file__, 'exec')
        scope = {
            cls.__name__: cls,
            'sqlite3_aggregate_context': sqlite3_aggregate_context,
            'sqlite3_result_null': sqlite3_result_null,
            'unsafe_cast': unsafe_cast,
            'sizeof': sizeof,
            'not_null': not_null,
            'SQLITE_NULL': SQLITE_NULL,
        }
        scope.update(CONVERTERS)
        scope.update((func.__name__, func) for func in RESULT_SETTERS.values())
        exec(code, scope)

        step = scope[step_name]
        finalize = scope[finalize_name]

        cls.step.address = step.address
        cls.finalize.address = finalize.address
        return cls
Exemple #48
0
    def test_gufunc_stream(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]

        gufunc = matmulcore
        gufunc.max_blocksize = 512

        #cuda.driver.flush_pending_free()
        matrix_ct = 1001 # an odd number to test thread/block division in CUDA
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        ts = time()
        stream = cuda.stream()
        dA = cuda.to_device(A, stream)
        dB = cuda.to_device(B, stream)

        dC = cuda.device_array(shape=(1001, 2, 5), dtype=A.dtype, stream=stream)
        dC = gufunc(dA, dB, out=dC, stream=stream)
        C = dC.copy_to_host(stream=stream)
        stream.synchronize()

        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        stream_speedups.append(tcpu / tcuda)

        self.assertTrue(np.allclose(C, Gold))
def test_columns():

    def fun(s,x,out):
        out[0] = s[0] + x[0]
        out[1] = s[1] + x[1]

    from numba import guvectorize, float64, void
    gfun = guvectorize(ftylist=[void(float64[:],float64[:],float64[:])],
                        signature='(n),(n)->(n)')(fun)


    N = 5

    s = numpy.random.random((N,2,))
    x = numpy.random.random((2,))

    out = numpy.zeros((N,2,))
    out1 = numpy.zeros((N,2,))

    sfun = standard_function(gfun,2)

    for n in range(N):
        fun(s[n,:],x,out[n,:])

    sfun(s,x,out=out1)

    out2 = sfun(s,x)



    # print(out2)
    # print(s+x)
    print(s+x)
    # assert( (abs(out2-s-x).max())<1e-8 )
    print(out)
    print(out1)
    print(out2)

    out, out_s,  = sfun(s,x,diff=True)
Exemple #50
0
    def test_gufunc_small(self):

        @guvectorize([void(float32[:, :], float32[:, :], float32[:, :])],
                     '(m,n),(n,p)->(m,p)',
                     target='cuda')
        def matmulcore(A, B, C):
            m, n = A.shape
            n, p = B.shape
            for i in range(m):
                for j in range(p):
                    C[i, j] = 0
                    for k in range(n):
                        C[i, j] += A[i, k] * B[k, j]


        gufunc = matmulcore
        gufunc.max_blocksize = 512

        matrix_ct = 2
        A = np.arange(matrix_ct * 2 * 4, dtype=np.float32).reshape(matrix_ct, 2,
                                                                   4)
        B = np.arange(matrix_ct * 4 * 5, dtype=np.float32).reshape(matrix_ct, 4,
                                                                   5)

        ts = time()
        C = gufunc(A, B)
        tcuda = time() - ts

        ts = time()
        Gold = ut.matrix_multiply(A, B)
        tcpu = time() - ts

        non_stream_speedups.append(tcpu / tcuda)

        print(C, Gold)

        self.assertTrue(np.allclose(C, Gold))
Exemple #51
0
'''
Only added decorators to the linregr_python.py implementation.
'''

import numbapro
from numba import autojit, jit, f8, int32, void

@jit(void(f8[:], f8[:], f8[:], f8, int32))
def gradient_descent(X, Y, theta, alpha, num_iters):
    m = Y.shape[0]

    theta_x = 0.0
    theta_y = 0.0

    for i in range(num_iters):
        predict = theta_x + theta_y * X
        err_x = (predict - Y)
        err_y = (predict - Y) * X
        theta_x = theta_x - alpha * (1.0 / m) * err_x.sum()
        theta_y = theta_y - alpha * (1.0 / m) * err_y.sum()

    theta[0] = theta_x
    theta[1] = theta_y


Exemple #52
0
Where
	`a` is a scalar
	`x` and `y` are vectors

Prefix 'S' indicates single-precision float32 operations
"""
from __future__ import print_function
import sys
import numpy
from numba import cuda, vectorize, float32, void

# GPU code
# ---------


@cuda.jit(void(float32, float32[:], float32[:], float32[:]))
def saxpy(a, x, y, out):
    # Short for cuda.threadIdx.x + cuda.blockIdx.x * cuda.blockDim.x
    i = cuda.grid(1)
    # Map i to array elements
    if i >= out.size:
        # Out of range?
        return
        # Do actual work
    out[i] = a * x[i] + y[i]


"""
Vectorize turns a scalar function into a
elementwise operation over the input arrays.
"""
Exemple #53
0
import logging; logging.getLogger().setLevel(0)
import numba

def init(): print("init")

def deinit(): print("deinit")

def do_something(): raise Exception('Life is pain')

def handle(exn): print("handled %r" % exn)

@numba.jit(numba.void())
def testfn0():
    init()
    deinit()

'''
This generates the following:

define void @__numba_specialized_0___main___2E_testfn0() {
entry:
  %tuple_result = tail call { i32, i32* }* (i32, ...)* @PyTuple_Pack(i32 0)
  %0 = icmp eq { i32, i32* }* %tuple_result, null
  br i1 %0, label %cleanup_label, label %"no_error_13:0"

cleanup_label:                                    ; preds = %"no_error_13:05", %entry, %"no_error_13:0", %"no_error_13:01"
  %1 = phi { i32, i32* }* [ %tuple_result4, %"no_error_13:05" ], [ null, %entry ], [ null, %"no_error_13:0" ], [ null, %"no_error_13:01" ]
  %2 = phi { i32, i32* }* [ %7, %"no_error_13:05" ], [ null, %entry ], [ null, %"no_error_13:0" ], [ null, %"no_error_13:01" ]
  %3 = phi { i32, i32* }* [ %4, %"no_error_13:05" ], [ null, %entry ], [ null, %"no_error_13:0" ], [ %4, %"no_error_13:01" ]
  tail call void @Py_XDECREF({ i32, i32* }* %tuple_result)
  tail call void @Py_XDECREF({ i32, i32* }* %3)
Exemple #54
0
    def test_guvectorize_decor(self):
        gufunc = guvectorize([void(float32[:,:], float32[:,:], float32[:,:])],
                             '(m,n),(n,p)->(m,p)',
                             target=self.target)(matmulcore)

        self.check_matmul_gufunc(gufunc)
Exemple #55
0
        pmt_gain = c.get('pmt_reference_gain', 2e6)
    if pmt_gain == 0:
        return 0
    return adc_to_e / pmt_gain


def get_detector_by_channel(config):
    """Return a channel -> detector lookup dictionary from a configuration"""
    detector_by_channel = {}
    for name, chs in config['channels_in_detector'].items():
        for ch in chs:
            detector_by_channel[ch] = name
    return detector_by_channel


@numba.jit(numba.void(numba.float64[:], numba.int64[:, :], numba.int64, numba.int64),
           nopython=True)
def extend_intervals(w, intervals, left_extension, right_extension):
    """Extends intervals on w by left_extension to left and right_extension to right, never exceeding w's bounds
    :param w: Waveform intervals live on. Only used for edges (kind of pointless to pass...)
    :param intervals: numpy N*2 array of ints of interval bounds
    :param left_extension: Extend intervals left by this number of samples,
                           or as far as possible until the end of another interval / the end of w.
    :param right_extension: Same, extend to right.
    :return: None, modifes intervals in place
    When two intervals' extension claims compete, right extension has priority.
    Boundary indices are inclusive, i.e. without any extension settings, the right boundary is the last index
    which was still above low_threshold
    """
    n_intervals = len(intervals)
    last_index_in_w = len(w) - 1
Exemple #56
0
def precompile(ex, signature=(), context={}):
    """Compile the expression to an intermediate form.
    """
    if isinstance(ex, (str, unicode)):
        #XXX: we might want to work directly with (python's) AST
        # and do numexpr transformations directly at that level instead of going
        # str -> Expression -> ast -> ...
        types = dict(signature)
        ex = stringToExpression(ex, types, context)

    if signature:
        argnames = [name for (name, type_) in signature]
    else:
        # this can only occur when NumExpr() is called directly without
        # signature, and in that case we have no other choice than use
        # the default type for all arguments (double)
        argnames = get_argnames(ex)
        signature = [(name, double) for name in argnames]

    if ex.value in ('sum', 'prod'):
        reduction_func = getattr(np, ex.value)
        args = ex.children
        # axis is second arg
        assert len(args) == 2
        ex, axis = args
        axis = axis.value
    else:
        reduction_func = None
        axis = None

    ast_expr = ex.toPython()
    # print ast.dump(ast_expr, annotate_fields=False)
    ast_func = ast_expr_to_ast_func(ast_expr, argnames)
    inner_func = ast_func_to_func(ast_func)
    # print ast.dump(ast_func, annotate_fields=False)
    res_type = kind_to_type[ex.astKind]
    full_sig = [('__result__', res_type)] + signature
    arg_types = [type_to_numba[type_] for name, type_ in full_sig]
    jit_signature = void(*[t[:] for t in arg_types])
    inner_func_nb = jit(jit_signature, nopython=True)(inner_func)

    if reduction_func is not None:
        # this is a hack. To do it (more) correctly (with multithreading),
        # I would need to use a different template_func:

        # for i in range(len(__result__)):
            # __result__[0] += __expr_placeholder__
          
        def func(*args, **kwargs):
            # order, casting, ex_uses_vml
            out = kwargs.pop('out', None)
            if out is not None:
                raise NotImplementedError()

            shape = args[0].shape
            args = [a.ravel() for a in args]
            tmp_out = np.empty(shape, dtype=res_type)
            inner_func_nb(tmp_out.ravel(), *args)
            return reduction_func(tmp_out, axis=axis)
    else:
        def func(*args, **kwargs):
            numthreads = utils.num_threads
            # only scalars
            if any(isinstance(arg, np.ndarray) for arg in args):
                shape = args[0].shape
                if any(arg.shape != shape for arg in args[1:]):
                    args = np.broadcast_arrays(*args) 
                    shape = args[0].shape
            
                out = kwargs.pop('out', None)
                if out is None:
                    out = np.empty(shape, dtype=res_type)

                # "flatten" arguments

                # we cannot use order="K" which is most efficient, in case arguments
                # have not the same in-memory layout, because we need the same
                # target memory layout for all arguments.
                #XXX: can't we test for that and convert only if really necessary?
                args = [out.ravel()] + [a.ravel() for a in args]
                length = len(args[0])
                # TODO: it might be better to make sure the starting bounds
                #       are aligned to X bytes
                # TODO: it might be better to not multithread at all if
                #       length < THRESHOLD
                chunklen = (length + numthreads - 1) // numthreads
                bounds = [(i * chunklen, min((i + 1) * chunklen, length))
                          for i in range(numthreads)]
                assert bounds[-1][1] == length
                chunks = [[arg[start:stop] for arg in args]
                           for start, stop in bounds]
                threads = [threading.Thread(target=inner_func_nb, args=chunk)
                           for chunk in chunks[:-1]]
                for thread in threads:
                    thread.start()

                # the main thread handles the last chunk
                inner_func_nb(*chunks[-1])

                for thread in threads:
                    thread.join()
                return out
            else:
                # all arguments are scalar
                out = np.empty(1, dtype=res_type)
                args = [out] + [np.array([a]) for a in args]
                inner_func_nb(*args)
                return out[0]

    def run(*args, **kwargs):
        return func(*args, **kwargs)
    func.run = run
    return func
        if np.sqrt((x[i]-x0)**2+(y[i]-y0)**2) < r:
                exposure_indices = np.vstack((exposure_indices,np.array([x[i],y[i]],dtype=np.int32)))

    return exposure_indices



outfilename = 'test.txt'



@jit(float32(float32,float32,float32,float32),nopython=True)
def dist(x0,y0,x,y):
    return math.sqrt( (x0-x)*(x0-x)+(y0-y)*(y0-y) )

@jit(void(float32[:,:],int32[:,:],float32[:]),nopython=True,parallel= True)
def set_doses_field(field, exposure_indices, doses):
    for i in prange(doses.shape[0]):
        field[exposure_indices[i,0],exposure_indices[i,1]] = doses[i]

@jit(void(float32[:,:],int32[:,:],float32),nopython=True)
def set_target(target, exposure_indices, dose):
    for i in range(exposure_indices.shape[0]):
        target[exposure_indices[i,0],exposure_indices[i,1]] = dose

@njit(void(float32[:,:],float32[:,:],float32[:],float32[:]),parallel=True)
def convolve_with_vector(field,exposure,v,h):
    buf = np.zeros(field.shape,dtype=np.float32)

    for j in prange(field.shape[1]):
        for i in range(field.shape[0]):
Exemple #58
0
  
savethread = pythonapi.PyEval_SaveThread
savethread.argtypes = []
savethread.restype = c_void_p

restorethread = pythonapi.PyEval_RestoreThread
restorethread.argtypes = [c_void_p]
restorethread.restype = None

def inner_func(result, a, b):
    threadstate = savethread()
    for i in range(len(result)):
        result[i] = exp(2.1 * a[i] + 3.2 * b[i])
    restorethread(threadstate)

signature = void(double[:], double[:], double[:])
inner_func_nb = jit(signature, nopython=True)(inner_func)
func_nb = make_singlethread(inner_func_nb)
func_nb_mt = make_multithread(inner_func_nb, nthreads)
            
def func_np(a, b):
    return np.exp(2.1 * a + 3.2 * b)

a = np.random.rand(size)
b = np.random.rand(size)
c = np.random.rand(size)

correct = timefunc(None, "numpy (1 thread)", func_np, a, b)
timefunc(correct, "numba (1 thread)", func_nb, a, b)
timefunc(correct, "numba (%d threads)" % nthreads, func_nb_mt, a, b)
Exemple #59
0
import numpy as np
import unittest

from numba import void, int32, uint32, jit, int64

@jit(void(uint32[:], uint32, uint32))
def prng(X, A, C):
    for i in range(X.shape[0]):
        for j in range(100):
            v = (A * X[i] + C)
            X[i] = v & 0xffffffff

@jit(uint32())
def unsigned_literal():
    return abs(0xFFFFFFFF)

@jit(int64())
def unsigned_literal_64():
    return 0x100000000

@jit(int64(int32))
def constant_int_add(a):
    return 0xffffffff + a

class Test(unittest.TestCase):
    def test_prng(self):
        N = 100
        A = 1664525
        C = 1013904223
        X0 = np.arange(N, dtype=np.uint32)
        X1 = X0.copy()