Python cu_expr Beispiele, tvb.simulator._numba.util.cu_expr Python Beispiele

Beispiel #1

0

Datei anzeigen

def make_kernel(delays, n_thread_per_block, n_inner):
    horizon = next_pow_of_2(delays.max() + 1)
    cfpre = cu_expr('sin(xj - xi)', ('xi', 'xj'), {})
    cfpost = cu_expr('rcp_n * gx', ('gx', ), {'rcp_n': 1.0 / delays.shape[0]})
    n_thread_per_block = int32(n_thread_per_block)
    n_inner = int32(n_inner)
    dcf = cu_delay_cfun(horizon, cfpre, cfpost, 1, n_thread_per_block)

    @cuda.jit
    def kernel(step, state, update, buf, dt, omega, cvars, weights, delays,
               a_values, s_values, Z):
        i_t = cuda.threadIdx.x
        i_thread = cuda.blockIdx.x * cuda.blockDim.x + i_t
        aff = cuda.shared.array((1, 1, 1, n_thread_per_block), float32)
        a = a_values[i_thread]
        s = math.sqrt(dt) * math.sqrt(2.0 * s_values[i_thread])
        sqrt_dt = math.sqrt(dt)
        for i_step in range(n_inner):
            for i_post in range(weights.shape[0]):
                dcf(aff, delays, weights, state, i_post, i_thread, step[0],
                    cvars, buf)
                update[i_post, i_thread] = dt * (omega + a * aff[0, 0, 0, i_t]) \
   + s * Z[i_step, i_post, i_thread]
            for i_post in range(weights.shape[0]):
                state[0, i_post, 0, i_thread] += update[i_post, i_thread]
            if i_thread == 0:
                step[0] += 1
            cuda.syncthreads()

    return horizon, kernel

Beispiel #2

0

Datei anzeigen

    def test_math_functions(self):
        cu_fn = cu_expr('exp(x) + sin(y)', ['x', 'y'], {})
        x, y = numpy.random.randn(2, self.n_thread).astype(numpy.float32)
        out = numpy.zeros((self.n_thread,), numpy.float32)

        @self.jit_and_run(out, x, y)
        def kernel(out, x, y):
            i_thread = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
            out[i_thread] = cu_fn(x[i_thread], y[i_thread])

        numpy.testing.assert_allclose(out, numpy.exp(x) + numpy.sin(y), 1e-5, 1e-6)

Beispiel #3

0

Datei anzeigen

    def test_linear_constant_slopes(self):
        expr = 'ai * xi + aj * xj + offset'
        pars = 'xi xj offset'.split()
        const = {'ai': 0.3, 'aj': -0.84}
        cu_fn, fn = cu_expr(expr, pars, const, return_fn=True)
        pars = numpy.random.randn(3, 10, self.n_thread).astype(numpy.float32)
        out = numpy.zeros((10, self.n_thread), numpy.float32)

        @self.jit_and_run(out, *pars)
        def kernel(out, xi, xj, offset):
            i_thread = cuda.blockDim.x * cuda.blockIdx.x + cuda.threadIdx.x
            for i in range(out.shape[0]):
                out[i, i_thread] = cu_fn(xi[i, i_thread], xj[i, i_thread], offset[i, i_thread])

        numpy.testing.assert_allclose(out, fn(*pars), 1e-5, 1e-6)

Beispiel #4

0

Datei anzeigen

Datei: _numba_tests.py Projekt: maedoc/tvb-library

    def test_kuramoto(self):

        # build & run Python simulations
        numpy.random.seed(42)
        n = 5

        weights = numpy.zeros((n, n), numpy.float32)
        idelays = numpy.zeros((n, n), numpy.int32)
        for i in range(n - 1):
            idelays[i, i + 1] = i + 1
            weights[i, i + 1] = i + 1

        def gen_sim(a):
            dt = 0.1
            conn = connectivity.Connectivity()
            conn.weights = weights
            conn.tract_lengths = idelays * dt
            conn.speed = 1.0
            sim = simulator.Simulator(
                coupling=py_coupling.Kuramoto(a=a),
                connectivity=conn,
                model=models.Kuramoto(omega=100 * 2 * numpy.pi / 1e3),
                monitors=monitors.Raw(),
                integrator=integrators.EulerDeterministic(dt=dt))
            sim.configure()
            sim.history[:] = 0.1
            return sim

        a_values = numpy.r_[:self.n_thread].astype(numpy.float32)
        sims = [gen_sim(a) for a in a_values]

        py_data = []
        py_coupling0 = []
        for sim in sims:
            ys = []
            cs = []
            for (t, y), in sim(simulation_length=10.0):
                ys.append(y[0, :, 0])
                # cs.append(sim.model._coupling_0[:, 0])
            py_data.append(numpy.array(ys))
            # py_coupling0.append(numpy.array(cs))
        py_data = numpy.array(py_data)
        # py_coupling0 = numpy.array(py_coupling0)

        # build CUDA kernels
        cfpre = cu_expr('sin(xj - xi)', ('xi', 'xj'), {})
        cfpost = cu_expr('rcp_n * gx', ('gx', ), {'rcp_n': 1.0 / n})
        horiz2 = next_pow_of_2(sims[0].horizon)
        dcf = cu_delay_cfun(horiz2,
                            cfpre,
                            cfpost,
                            1,
                            self.block_dim[0],
                            aff_node_stride=1)

        # build kernel
        dt = numba.float32(sims[0].integrator.dt)
        omega = numba.float32(sims[0].model.omega[0])
        cvars = numpy.array([0], numpy.int32)
        weights = sims[0].connectivity.weights.astype(numpy.float32)
        delays = sims[0].connectivity.idelays.astype(numpy.int32)

        @cuda.jit
        def kernel(step, state, coupling, aff, buf, dt, omega, cvars, weights,
                   delays, a_values):
            i_thread = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
            a = a_values[i_thread]
            for i_post in range(weights.shape[0]):
                dcf(aff, delays, weights, state, i_post, i_thread, step[0],
                    cvars, buf)
                coupling[i_post, i_thread] = a * aff[0, i_post, 0, i_thread]
                state[0, i_post, 0,
                      i_thread] += dt * (omega +
                                         a * aff[0, i_post, 0, i_thread])

        step = numpy.array([0], numpy.int32)
        state = (numpy.zeros(
            (1, n, 1, self.n_thread)) + 0.1).astype(numpy.float32)
        coupling0 = numpy.zeros((n, self.n_thread), numpy.float32)
        aff = numpy.zeros((1, n, 1, self.n_thread), numpy.float32)
        buf = numpy.zeros((n, horiz2, 1, self.n_thread), numpy.float32)
        buf += 0.1

        cu_data = numpy.zeros(py_data.shape, numpy.float32)
        cu_coupling0 = numpy.zeros((cu_data.shape[1], ) + coupling0.shape)
        for step_ in range(cu_data.shape[1]):
            step[0] = step_
            kernel[self.block_dim,
                   self.grid_dim](step, state, coupling0, aff, buf, dt, omega,
                                  cvars, weights, delays, a_values)
            cu_data[:, step_] = state[0, :, 0].T
            cu_coupling0[step_] = coupling0

        # accept higher error because it accumulates over time
        # TODO test error proportional to time
        numpy.testing.assert_allclose(cu_data, py_data, 1e-2, 1e-2)