def test_to_batched_temp(ctx_factory): ctx = ctx_factory() knl = lp.make_kernel( ''' { [i,j]: 0<=i,j<n } ''', ''' cnst = 2.0 out[i] = sum(j, cnst*a[i,j]*x[j])''', [ lp.TemporaryVariable("cnst", dtype=np.float32, shape=(), scope=lp.temp_var_scope.PRIVATE), '...' ]) knl = lp.add_and_infer_dtypes( knl, dict(out=np.float32, x=np.float32, a=np.float32)) ref_knl = lp.make_kernel(''' { [i,j]: 0<=i,j<n } ''', '''out[i] = sum(j, 2.0*a[i,j]*x[j])''') ref_knl = lp.add_and_infer_dtypes( ref_knl, dict(out=np.float32, x=np.float32, a=np.float32)) bknl = lp.to_batched(knl, "nbatches", "out,x") bref_knl = lp.to_batched(ref_knl, "nbatches", "out,x") # checking that cnst is not being bathced assert bknl.temporary_variables['cnst'].shape == () a = np.random.randn(5, 5) x = np.random.randn(7, 5) # Checking that the program compiles and the logic is correct lp.auto_test_vs_ref(bref_knl, ctx, bknl, parameters=dict(a=a, x=x, n=5, nbatches=7))
def test_to_batched_temp(ctx_factory): ctx = ctx_factory() knl = lp.make_kernel( """ { [i,j]: 0<=i,j<n } """, """ cnst = 2.0 out[i] = sum(j, cnst*a[i,j]*x[j])""", [ lp.TemporaryVariable("cnst", dtype=np.float32, shape=(), address_space=lp.AddressSpace.PRIVATE), "..." ]) knl = lp.add_and_infer_dtypes( knl, dict(out=np.float32, x=np.float32, a=np.float32)) ref_knl = lp.make_kernel(""" { [i,j]: 0<=i,j<n } """, """out[i] = sum(j, 2.0*a[i,j]*x[j])""") ref_knl = lp.add_and_infer_dtypes( ref_knl, dict(out=np.float32, x=np.float32, a=np.float32)) bknl = lp.to_batched(knl, "nbatches", "out,x") bref_knl = lp.to_batched(ref_knl, "nbatches", "out,x") # checking that cnst is not being bathced assert bknl["loopy_kernel"].temporary_variables["cnst"].shape == () a = np.random.randn(5, 5) x = np.random.randn(7, 5) # Checking that the program compiles and the logic is correct lp.auto_test_vs_ref(bref_knl, ctx, bknl, parameters=dict(a=a, x=x, n=5, nbatches=7))
def test_to_batched_temp(ctx_factory): ctx = ctx_factory() knl = lp.make_kernel( ''' { [i,j]: 0<=i,j<n } ''', ''' cnst = 2.0 out[i] = sum(j, cnst*a[i,j]*x[j])''', [lp.TemporaryVariable( "cnst", dtype=np.float32, shape=(), scope=lp.temp_var_scope.PRIVATE), '...']) knl = lp.add_and_infer_dtypes(knl, dict(out=np.float32, x=np.float32, a=np.float32)) ref_knl = lp.make_kernel( ''' { [i,j]: 0<=i,j<n } ''', '''out[i] = sum(j, 2.0*a[i,j]*x[j])''') ref_knl = lp.add_and_infer_dtypes(ref_knl, dict(out=np.float32, x=np.float32, a=np.float32)) bknl = lp.to_batched(knl, "nbatches", "out,x") bref_knl = lp.to_batched(ref_knl, "nbatches", "out,x") # checking that cnst is not being bathced assert bknl.temporary_variables['cnst'].shape == () a = np.random.randn(5, 5) x = np.random.randn(7, 5) # Checking that the program compiles and the logic is correct lp.auto_test_vs_ref( bref_knl, ctx, bknl, parameters=dict(a=a, x=x, n=5, nbatches=7))
def test_to_batched(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel(''' { [i,j]: 0<=i,j<n } ''', ''' out[i] = sum(j, a[i,j]*x[j])''') knl = lp.add_and_infer_dtypes( knl, dict(out=np.float32, x=np.float32, a=np.float32)) bknl = lp.to_batched(knl, "nbatches", "out,x") ref_knl = lp.make_kernel(''' { [i,j,k]: 0<=i,j<n and 0<=k<nbatches} ''', '''out[k, i] = sum(j, a[i,j]*x[k, j])''') ref_knl = lp.add_and_infer_dtypes( ref_knl, dict(out=np.float32, x=np.float32, a=np.float32)) a = np.random.randn(5, 5).astype(np.float32) x = np.random.randn(7, 5).astype(np.float32) # Running both the kernels evt, (out1, ) = bknl(queue, a=a, x=x, n=5, nbatches=7) evt, (out2, ) = ref_knl(queue, a=a, x=x, n=5, nbatches=7) # checking that the outputs are same assert np.linalg.norm(out1 - out2) < 1e-15
def network_time_step( model: model.BaseKernel, coupling: coupling.BaseCoupling, scheme: scheme.TimeStepScheme, target: lp.target.TargetBase=None, ): target = target or utils.default_target() # fuse kernels kernels = [ model.kernel(target), network.Network(model, coupling).kernel(target), lp.fix_parameters(scheme.kernel(target), nsvar=len(model.state_sym)), ] data_flow = [ ('input', 1, 0), ('diffs', 0, 2), ('drift', 0, 2), ('state', 2, 0) ] knl = lp.fuse_kernels(kernels, data_flow=data_flow) # time step knl = lp.to_batched(knl, 'nstep', [], 'i_step', sequential=True) new_i_time = pm.parse('(i_step + i_step_0) % ntime') knl = lp.fix_parameters(knl, i_time=new_i_time) knl.args.append(lp.ValueArg('i_step_0', np.uintc)) knl = lp.add_dtypes(knl, {'i_step_0': np.uintc}) return knl
def make_knl(): target = NumbaTarget() # build individual kernels osc = model.Kuramoto() osc.dt = 1.0 osc.const['omega'] = 10.0 * 2.0 * np.pi / 1e3 osc_knl = osc.kernel(target) cfun = coupling.Kuramoto(osc) cfun.param['a'] = pm.parse('a') net = network.Network(osc, cfun) net_knl = net.kernel(target) scm = scheme.EulerStep(osc.dt) scm_knl = scm.kernel(target) scm_knl = lp.fix_parameters(scm_knl, nsvar=len(osc.state_sym)) # fuse kernels knls = osc_knl, net_knl, scm_knl data_flow = [('input', 1, 0), ('diffs', 0, 2), ('drift', 0, 2), ('state', 2, 0)] knl = lp.fuse_kernels(knls, data_flow=data_flow) # and time step knl = lp.to_batched(knl, 'nstep', [], 'i_step', sequential=True) knl = lp.fix_parameters(knl, i_time=pm.parse('(i_step + i_step_0) % ntime')) knl.args.append(lp.ValueArg('i_step_0', np.uintc)) knl = lp.add_dtypes(knl, {'i_step_0': np.uintc}) return knl, osc
def test_to_batched(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( ''' { [i,j]: 0<=i,j<n } ''', ''' out[i] = sum(j, a[i,j]*x[j])''') knl = lp.add_and_infer_dtypes(knl, dict(out=np.float32, x=np.float32, a=np.float32)) bknl = lp.to_batched(knl, "nbatches", "out,x") ref_knl = lp.make_kernel( ''' { [i,j,k]: 0<=i,j<n and 0<=k<nbatches} ''', '''out[k, i] = sum(j, a[i,j]*x[k, j])''') ref_knl = lp.add_and_infer_dtypes(ref_knl, dict(out=np.float32, x=np.float32, a=np.float32)) a = np.random.randn(5, 5).astype(np.float32) x = np.random.randn(7, 5).astype(np.float32) # Running both the kernels evt, (out1, ) = bknl(queue, a=a, x=x, n=5, nbatches=7) evt, (out2, ) = ref_knl(queue, a=a, x=x, n=5, nbatches=7) # checking that the outputs are same assert np.linalg.norm(out1-out2) < 1e-15
def test_wrap_loop(self): "Take kernel, place in larger loop, offsetting certain vars" knl = lp.make_kernel("{[i,j]:0<=i,j<n}", "out[i] = sum(j, (i/j)*in[i, j])", target=CTarget()) # in will depend on t knl2 = lp.to_batched(knl, 'T', ['in'], 't') print(self._dtype_and_code(knl2))
def test_wrap_loop_with_param(self): knl = lp.make_kernel("{[i,j]:0<=i,j<n}", """ <> a = a_values[i] out[i] = a * sum(j, (i/j)*in[i, j]) """, target=CTarget()) # in will depend on t knl2 = lp.to_batched(knl, 'T', ['in'], 't', sequential=True) print(self._dtype_and_code(knl2, a_values=np.float32))
def test_to_batched(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel(''' { [i,j]: 0<=i,j<n } ''', ''' out[i] = sum(j, a[i,j]*x[j])''') bknl = lp.to_batched(knl, "nbatches", "out,x") a = np.random.randn(5, 5) x = np.random.randn(7, 5) bknl(queue, a=a, x=x)
def test_to_batched(ctx_factory): ctx = ctx_factory() queue = cl.CommandQueue(ctx) knl = lp.make_kernel( ''' { [i,j]: 0<=i,j<n } ''', ''' out[i] = sum(j, a[i,j]*x[j])''') bknl = lp.to_batched(knl, "nbatches", "out,x") a = np.random.randn(5, 5) x = np.random.randn(7, 5) bknl(queue, a=a, x=x)
def batch_knl(knl): varying = 'weights delays state input obsrv drift diffs next'.split() # wait for bug fix #varying.remove('delays') return lp.to_batched(knl, 'nsubj', varying, 'i_subj', sequential=True)