예제 #1
0
def test_to_batched_temp(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        ''' { [i,j]: 0<=i,j<n } ''', ''' cnst = 2.0
         out[i] = sum(j, cnst*a[i,j]*x[j])''', [
            lp.TemporaryVariable("cnst",
                                 dtype=np.float32,
                                 shape=(),
                                 scope=lp.temp_var_scope.PRIVATE), '...'
        ])
    knl = lp.add_and_infer_dtypes(
        knl, dict(out=np.float32, x=np.float32, a=np.float32))
    ref_knl = lp.make_kernel(''' { [i,j]: 0<=i,j<n } ''',
                             '''out[i] = sum(j, 2.0*a[i,j]*x[j])''')
    ref_knl = lp.add_and_infer_dtypes(
        ref_knl, dict(out=np.float32, x=np.float32, a=np.float32))

    bknl = lp.to_batched(knl, "nbatches", "out,x")
    bref_knl = lp.to_batched(ref_knl, "nbatches", "out,x")

    # checking that cnst is not being bathced
    assert bknl.temporary_variables['cnst'].shape == ()

    a = np.random.randn(5, 5)
    x = np.random.randn(7, 5)

    # Checking that the program compiles and the logic is correct
    lp.auto_test_vs_ref(bref_knl,
                        ctx,
                        bknl,
                        parameters=dict(a=a, x=x, n=5, nbatches=7))
예제 #2
0
def test_to_batched_temp(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
        """ { [i,j]: 0<=i,j<n } """, """ cnst = 2.0
         out[i] = sum(j, cnst*a[i,j]*x[j])""", [
            lp.TemporaryVariable("cnst",
                                 dtype=np.float32,
                                 shape=(),
                                 address_space=lp.AddressSpace.PRIVATE), "..."
        ])
    knl = lp.add_and_infer_dtypes(
        knl, dict(out=np.float32, x=np.float32, a=np.float32))
    ref_knl = lp.make_kernel(""" { [i,j]: 0<=i,j<n } """,
                             """out[i] = sum(j, 2.0*a[i,j]*x[j])""")
    ref_knl = lp.add_and_infer_dtypes(
        ref_knl, dict(out=np.float32, x=np.float32, a=np.float32))

    bknl = lp.to_batched(knl, "nbatches", "out,x")
    bref_knl = lp.to_batched(ref_knl, "nbatches", "out,x")

    # checking that cnst is not being bathced
    assert bknl["loopy_kernel"].temporary_variables["cnst"].shape == ()

    a = np.random.randn(5, 5)
    x = np.random.randn(7, 5)

    # Checking that the program compiles and the logic is correct
    lp.auto_test_vs_ref(bref_knl,
                        ctx,
                        bknl,
                        parameters=dict(a=a, x=x, n=5, nbatches=7))
예제 #3
0
def test_to_batched_temp(ctx_factory):
    ctx = ctx_factory()

    knl = lp.make_kernel(
         ''' { [i,j]: 0<=i,j<n } ''',
         ''' cnst = 2.0
         out[i] = sum(j, cnst*a[i,j]*x[j])''',
         [lp.TemporaryVariable(
             "cnst",
             dtype=np.float32,
             shape=(),
             scope=lp.temp_var_scope.PRIVATE), '...'])
    knl = lp.add_and_infer_dtypes(knl, dict(out=np.float32,
                                            x=np.float32,
                                            a=np.float32))
    ref_knl = lp.make_kernel(
         ''' { [i,j]: 0<=i,j<n } ''',
         '''out[i] = sum(j, 2.0*a[i,j]*x[j])''')
    ref_knl = lp.add_and_infer_dtypes(ref_knl, dict(out=np.float32,
                                                    x=np.float32,
                                                    a=np.float32))

    bknl = lp.to_batched(knl, "nbatches", "out,x")
    bref_knl = lp.to_batched(ref_knl, "nbatches", "out,x")

    # checking that cnst is not being bathced
    assert bknl.temporary_variables['cnst'].shape == ()

    a = np.random.randn(5, 5)
    x = np.random.randn(7, 5)

    # Checking that the program compiles and the logic is correct
    lp.auto_test_vs_ref(
            bref_knl, ctx, bknl,
            parameters=dict(a=a, x=x, n=5, nbatches=7))
예제 #4
0
def test_to_batched(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(''' { [i,j]: 0<=i,j<n } ''',
                         ''' out[i] = sum(j, a[i,j]*x[j])''')
    knl = lp.add_and_infer_dtypes(
        knl, dict(out=np.float32, x=np.float32, a=np.float32))

    bknl = lp.to_batched(knl, "nbatches", "out,x")

    ref_knl = lp.make_kernel(''' { [i,j,k]: 0<=i,j<n and 0<=k<nbatches} ''',
                             '''out[k, i] = sum(j, a[i,j]*x[k, j])''')
    ref_knl = lp.add_and_infer_dtypes(
        ref_knl, dict(out=np.float32, x=np.float32, a=np.float32))

    a = np.random.randn(5, 5).astype(np.float32)
    x = np.random.randn(7, 5).astype(np.float32)

    # Running both the kernels
    evt, (out1, ) = bknl(queue, a=a, x=x, n=5, nbatches=7)
    evt, (out2, ) = ref_knl(queue, a=a, x=x, n=5, nbatches=7)

    # checking that the outputs are same
    assert np.linalg.norm(out1 - out2) < 1e-15
예제 #5
0
def network_time_step(
        model: model.BaseKernel,
        coupling: coupling.BaseCoupling,
        scheme: scheme.TimeStepScheme,
        target: lp.target.TargetBase=None,
        ):
    target = target or utils.default_target()
    # fuse kernels
    kernels = [
        model.kernel(target),
        network.Network(model, coupling).kernel(target),
        lp.fix_parameters(scheme.kernel(target), nsvar=len(model.state_sym)),
    ]
    data_flow = [
        ('input', 1, 0),
        ('diffs', 0, 2),
        ('drift', 0, 2),
        ('state', 2, 0)
    ]
    knl = lp.fuse_kernels(kernels, data_flow=data_flow)
    # time step
    knl = lp.to_batched(knl, 'nstep', [], 'i_step', sequential=True)
    new_i_time = pm.parse('(i_step + i_step_0) % ntime')
    knl = lp.fix_parameters(knl, i_time=new_i_time)
    knl.args.append(lp.ValueArg('i_step_0', np.uintc))
    knl = lp.add_dtypes(knl, {'i_step_0': np.uintc})
    return knl
예제 #6
0
def make_knl():

    target = NumbaTarget()

    # build individual kernels
    osc = model.Kuramoto()
    osc.dt = 1.0
    osc.const['omega'] = 10.0 * 2.0 * np.pi / 1e3
    osc_knl = osc.kernel(target)

    cfun = coupling.Kuramoto(osc)
    cfun.param['a'] = pm.parse('a')
    net = network.Network(osc, cfun)
    net_knl = net.kernel(target)

    scm = scheme.EulerStep(osc.dt)
    scm_knl = scm.kernel(target)
    scm_knl = lp.fix_parameters(scm_knl, nsvar=len(osc.state_sym))

    # fuse kernels
    knls = osc_knl, net_knl, scm_knl
    data_flow = [('input', 1, 0), ('diffs', 0, 2), ('drift', 0, 2),
                 ('state', 2, 0)]
    knl = lp.fuse_kernels(knls, data_flow=data_flow)

    # and time step
    knl = lp.to_batched(knl, 'nstep', [], 'i_step', sequential=True)
    knl = lp.fix_parameters(knl,
                            i_time=pm.parse('(i_step + i_step_0) % ntime'))
    knl.args.append(lp.ValueArg('i_step_0', np.uintc))
    knl = lp.add_dtypes(knl, {'i_step_0': np.uintc})

    return knl, osc
예제 #7
0
def test_to_batched(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
         ''' { [i,j]: 0<=i,j<n } ''',
         ''' out[i] = sum(j, a[i,j]*x[j])''')
    knl = lp.add_and_infer_dtypes(knl, dict(out=np.float32,
                                            x=np.float32,
                                            a=np.float32))

    bknl = lp.to_batched(knl, "nbatches", "out,x")

    ref_knl = lp.make_kernel(
         ''' { [i,j,k]: 0<=i,j<n and 0<=k<nbatches} ''',
         '''out[k, i] = sum(j, a[i,j]*x[k, j])''')
    ref_knl = lp.add_and_infer_dtypes(ref_knl, dict(out=np.float32,
                                                    x=np.float32,
                                                    a=np.float32))

    a = np.random.randn(5, 5).astype(np.float32)
    x = np.random.randn(7, 5).astype(np.float32)

    # Running both the kernels
    evt, (out1, ) = bknl(queue, a=a, x=x, n=5, nbatches=7)
    evt, (out2, ) = ref_knl(queue, a=a, x=x, n=5, nbatches=7)

    # checking that the outputs are same
    assert np.linalg.norm(out1-out2) < 1e-15
예제 #8
0
 def test_wrap_loop(self):
     "Take kernel, place in larger loop, offsetting certain vars"
     knl = lp.make_kernel("{[i,j]:0<=i,j<n}",
                          "out[i] = sum(j, (i/j)*in[i, j])",
                          target=CTarget())
     # in will depend on t
     knl2 = lp.to_batched(knl, 'T', ['in'], 't')
     print(self._dtype_and_code(knl2))
예제 #9
0
 def test_wrap_loop_with_param(self):
     knl = lp.make_kernel("{[i,j]:0<=i,j<n}",
                          """
                          <> a = a_values[i]
                          out[i] = a * sum(j, (i/j)*in[i, j])
                          """,
                          target=CTarget())
     # in will depend on t
     knl2 = lp.to_batched(knl, 'T', ['in'], 't', sequential=True)
     print(self._dtype_and_code(knl2, a_values=np.float32))
예제 #10
0
def test_to_batched(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(''' { [i,j]: 0<=i,j<n } ''',
                         ''' out[i] = sum(j, a[i,j]*x[j])''')

    bknl = lp.to_batched(knl, "nbatches", "out,x")

    a = np.random.randn(5, 5)
    x = np.random.randn(7, 5)

    bknl(queue, a=a, x=x)
예제 #11
0
def test_to_batched(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    knl = lp.make_kernel(
         ''' { [i,j]: 0<=i,j<n } ''',
         ''' out[i] = sum(j, a[i,j]*x[j])''')

    bknl = lp.to_batched(knl, "nbatches", "out,x")

    a = np.random.randn(5, 5)
    x = np.random.randn(7, 5)

    bknl(queue, a=a, x=x)
예제 #12
0
def batch_knl(knl):
    varying = 'weights delays state input obsrv drift diffs next'.split()
    # wait for bug fix
    #varying.remove('delays')
    return lp.to_batched(knl, 'nsubj', varying, 'i_subj', sequential=True)