def test_get_split_shape():
    # create opts
    opts = dummy_loopy_opts(depth=8, order='F')

    # create array split
    asplit = array_splitter(opts)

    def __test(splitter, shape):
        arr = np.zeros(shape)
        if splitter.data_order == 'F':
            grow = 1
            split = 0
        else:
            grow = 0
            split = -1

        sh, gr, sp = asplit.split_shape(arr)
        assert gr == grow
        assert sp == split
        assert sh == asplit.split_numpy_arrays(arr)[0].shape

    # test with small square
    __test(asplit, (10, 10))

    # now test with evenly sized
    __test(asplit, (16, 16))

    # finally, try with 3d arrays
    __test(asplit, (10, 10, 10))
    __test(asplit, (16, 16, 16))

    # and finally test with some randomly sized arrays
    for i in range(50):
        shape = np.random.randint(1, 12, size=np.random.randint(2, 5))
        __test(asplit, shape)

    # now repeat with C split
    # create opts
    opts = dummy_loopy_opts(width=8, order='C')

    # create array split
    asplit = array_splitter(opts)

    # test with small square
    __test(asplit, (10, 10))

    # now test with evenly sized
    __test(asplit, (16, 16))

    # finally, try with 3d arrays
    __test(asplit, (10, 10, 10))
    __test(asplit, (16, 16, 16))

    # and finally test with some randomly sized arrays
    for i in range(50):
        shape = np.random.randint(1, 12, size=np.random.randint(2, 5))
        __test(asplit, shape)
Exemple #2
0
def test_indexer(opts):
    asplit = array_splitter(opts)

    def __test(splitter, shape):
        # make a dummy array
        arr = np.arange(np.prod(shape)).reshape(shape)
        index = indexer(splitter, shape)

        # split
        split_arr = splitter.split_numpy_arrays(arr)[0]

        # loop over every index in the array
        check_axes = tuple(range(len(shape)))
        it = np.nditer(arr, flags=['multi_index'], order=opts.order)
        while not it.finished:
            # get indicies
            check_inds = tuple((x, ) for x in it.multi_index)
            new_indicies = index(check_inds, check_axes)
            # check that it matches the old array value
            assert split_arr[new_indicies] == arr[it.multi_index]
            it.iternext()

    # test with small square
    __test(asplit, (10, 10))

    # now test with evenly sized
    __test(asplit, (16, 16))

    # finally, try with 3d arrays
    __test(asplit, (10, 10, 10))
    __test(asplit, (16, 16, 16))
def test_lpy_array_splitter_f_deep():
    from pymbolic.primitives import Subscript, Variable

    # create opts
    opts = dummy_loopy_opts(depth=8, order='F')

    # create array split
    asplit = array_splitter(opts)

    k = lp.split_iname(_create('F'), 'i', 8)
    k = asplit.split_loopy_arrays(k)

    # test that it runs
    k()

    # check dim
    a1 = next(x for x in k.args if x.name == 'a1')
    assert a1.shape == (8, 10, 2)
    # and indexing
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a1')
    assert isinstance(assign,
                      Subscript) and assign.index == (Variable('i_inner'), 0,
                                                      Variable('i_outer'))

    # now test with evenly sized
    a2 = next(x for x in k.args if x.name == 'a2')
    assert a2.shape == (8, 16, 2)
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a2')
    assert isinstance(assign,
                      Subscript) and assign.index == (Variable('i_inner'), 0,
                                                      Variable('i_outer'))
Exemple #4
0
def test_lpy_wide_array_splitter(opts):
    from pymbolic.primitives import Subscript, Variable
    # create array split
    asplit = array_splitter(opts)

    # create a test kernel
    arg1 = lp.GlobalArg('a1', shape=(10, 10), order=opts.order)
    arg2 = lp.GlobalArg('a2', shape=(16, 16), order=opts.order)

    k = lp.make_kernel([
        '{[i]: 0 <= i < 10}', '{{[j_outer]: 0 <= j_outer < {}}}'.format(
            int(np.ceil(10 / VECTOR_WIDTH))),
        '{{[j_inner]: 0 <= j_inner < {}}}'.format(VECTOR_WIDTH)
    ],
                       """
        for i, j_outer, j_inner
            a1[j_outer, i] = 1 {id=a1}
            a2[j_outer, i] = 1 {id=a2}
        end
        """, [arg1, arg2],
                       silenced_warnings=['no_device_in_pre_codegen_checks'],
                       target=lp.OpenCLTarget())

    a1_hold = k.arg_dict['a1'].copy()
    a2_hold = k.arg_dict['a2'].copy()
    k = asplit.split_loopy_arrays(k)
    k = lp.tag_inames(k, {'j_inner': 'l.0' if not opts.is_simd else 'vec'})

    # ensure there's no loopy errors
    lp.generate_code_v2(k).device_code()

    def __indexer():
        if opts.order == 'C':
            return (Variable('j_outer'), Variable('i'), Variable('j_inner'))
        else:
            return (Variable('j_inner'), Variable('j_outer'), Variable('i'))

    # check dim
    a1 = k.arg_dict['a1']
    assert a1.shape == asplit.split_shape(a1_hold)[0]
    # and indexing
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a1')
    # construct index
    assert isinstance(assign, Subscript) and assign.index == __indexer()

    # now test with evenly sized
    a2 = k.arg_dict['a2']
    assert a2.shape == asplit.split_shape(a2_hold)[0]
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a2')
    assert isinstance(assign, Subscript) and assign.index == __indexer()
Exemple #5
0
def test_lpy_deep_array_splitter(opts):
    from pymbolic.primitives import Subscript, Variable
    # create array split
    asplit = array_splitter(opts)

    # create a test kernel
    size = VECTOR_WIDTH * 3
    loop_bound = VECTOR_WIDTH * 2
    arg1 = lp.GlobalArg('a1', shape=(size, size), order=opts.order)
    arg2 = lp.GlobalArg('a2', shape=(16, 16), order=opts.order)

    k = lp.make_kernel('{{[i]: 0 <= i < {}}}'.format(loop_bound),
                       """
            a1[0, i] = 1 {id=a1}
            a2[0, i] = 1 {id=a2}
        """, [arg1, arg2],
                       silenced_warnings=['no_device_in_pre_codegen_checks'],
                       target=lp.OpenCLTarget())

    k = lp.split_iname(k,
                       'i',
                       VECTOR_WIDTH,
                       inner_tag='l.0' if not opts.is_simd else 'vec')
    a1_hold = k.arg_dict['a1'].copy()
    a2_hold = k.arg_dict['a2'].copy()
    k = asplit.split_loopy_arrays(k)

    # ensure there's no loopy errors
    lp.generate_code_v2(k).device_code()

    def __indexer():
        if opts.order == 'C':
            return (0, Variable('i_outer'), Variable('i_inner'))
        else:
            return (Variable('i_inner'), 0, Variable('i_outer'))

    # check dim
    a1 = k.arg_dict['a1']
    assert a1.shape == asplit.split_shape(a1_hold)[0]
    # and indexing
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a1')
    # construct index
    assert isinstance(assign, Subscript) and assign.index == __indexer()

    # now test with evenly sized
    a2 = k.arg_dict['a2']
    assert a2.shape == asplit.split_shape(a2_hold)[0]
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a2')
    assert isinstance(assign, Subscript) and assign.index == __indexer()
Exemple #6
0
def test_select_elements(shape, mask, axes, tiling=True):
    # create array
    arr = np.arange(1, np.prod(shape) + 1).reshape(shape)

    for opts in opts_loop(width=[None], depth=[None], simd=False):
        asplit = array_splitter(opts)
        assert np.array_equal(
            select_elements(arr, mask, axes,
                            tiling=tiling).flatten(order=opts.order),
            # despite the name, this can actually be used for both split & non-split
            # elements and forms a nice test-case answer here
            get_split_elements(arr,
                               asplit,
                               arr.shape,
                               mask,
                               axes,
                               tiling=tiling))
def test_npy_array_splitter_f_deep():
    # create opts
    opts = dummy_loopy_opts(depth=8, order='F')

    # create array split
    asplit = array_splitter(opts)

    def _test(shape):
        __internal(asplit, shape, order='F', wide=opts.depth)

    # test with small square
    _test((10, 10))

    # now test with evenly sized
    _test((16, 16))

    # finally, try with 3d arrays
    _test((10, 10, 10))
    _test((16, 16, 16))
Exemple #8
0
def test_lpy_iname_presplit(opts):
    """
    Tests that inames access to pre-split inames in non-split loopy arrays are
    correctly handled
    """
    from pymbolic.primitives import Subscript, Variable
    # create array split
    asplit = array_splitter(opts)

    # create a test kernel
    arg1 = lp.GlobalArg('a1', shape=(20, 10), order=opts.order)
    arg2 = lp.GlobalArg('a2', shape=(16, 16), order=opts.order)

    k = lp.make_kernel([
        '{[i]: 0 <= i < 10}', '{{[j_outer]: 0 <= j_outer < {}}}'.format(
            int(np.ceil(10 / VECTOR_WIDTH))),
        '{{[j_inner]: 0 <= j_inner < {}}}'.format(VECTOR_WIDTH)
    ],
                       """
            a1[j_outer, i] = 1 {id=a1}
            a2[j_outer, i] = 1 {id=a2}
        """, [arg1, arg2],
                       silenced_warnings=['no_device_in_pre_codegen_checks'],
                       target=lp.OpenCLTarget())

    k = asplit.split_loopy_arrays(k, dont_split=['a1', 'a2'])

    # ensure there's no loopy errors
    lp.generate_code_v2(k).device_code()

    def __indexer():
        return (Variable('j_outer') * VECTOR_WIDTH + Variable('j_inner'),
                Variable('i'))

    # check indexing
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a1')
    # construct index
    assert isinstance(assign, Subscript) and assign.index == __indexer()

    # now test with evenly sized
    assign = next(insn.assignee for insn in k.instructions if insn.id == 'a2')
    assert isinstance(assign, Subscript) and assign.index == __indexer()
Exemple #9
0
def test_npy_array_splitter(opts):
    # create array split
    asplit = array_splitter(opts)

    def _test(shape):
        __internal(asplit,
                   shape,
                   order=opts.order,
                   width=opts.width,
                   depth=opts.depth)

    # test with small square
    _test((10, 10))

    # now test with evenly sized
    _test((16, 16))

    # finally, try with 3d arrays
    _test((10, 10, 10))
    _test((16, 16, 16))
Exemple #10
0
def test_get_split_shape(opts):
    # create array split
    asplit = array_splitter(opts)

    def __test(splitter, shape):
        # make a dummy array
        arr = np.zeros(shape)
        # get the split shape
        sh, gr, vec, spl = asplit.split_shape(arr)
        # first -- test against numpy splitter to ensure we get the right shape
        assert sh == asplit.split_numpy_arrays(arr)[0].shape

        # next, the "grow" axis is either the first axis ("C") or the second axis
        # for "F"
        grow = opts.order == 'F'
        assert gr == grow

        # and the vec_axis is in front if 'F' else in back
        vec_axis = len(shape) if opts.order == 'C' else 0
        assert vec == vec_axis

        # and finally, the split axis
        split_axis = 0 if opts.width else len(shape) - 1
        assert spl == split_axis

    # test with small square
    __test(asplit, (10, 10))

    # now test with evenly sized
    __test(asplit, (16, 16))

    # finally, try with 3d arrays
    __test(asplit, (10, 10, 10))
    __test(asplit, (16, 16, 16))

    # and finally test with some randomly sized arrays
    for i in range(50):
        shape = np.random.randint(1, 12, size=np.random.randint(2, 5))
        __test(asplit, shape)
Exemple #11
0
def test_stride_limiter(dtype):
    # tests an issue, particularly for the Intel OpenCL runtime where integers in
    # array indexing that overflow the int32 max result in segfaults in kernel

    # The long term fix is probably to allow the user to specify the dtype via
    # command line or platform file, but for now we simply limit the maximum # of
    # conditions per run

    from pymbolic import parse
    arry_name = 'a'
    extractor = re.compile(r'{}\[(.+)\] = i'.format(arry_name))
    dim_size = 1000000
    for opt in loopy_opts():
        split = array_splitter(opt)
        # create a really big loopy array
        ary = lp.GlobalArg(arry_name,
                           shape=(problem_size.name, dim_size),
                           dtype=dtype)
        # make a dummy kernel with this argument to populate dim tags
        knl = lp.make_kernel([
            '{{[i]: 0 <= i < {}}}'.format(dim_size),
            '{{[j]: 0 <= j < {}}}'.format(problem_size.name)
        ], '{}[j, i] = i'.format(arry_name), [ary, problem_size])
        # split said array
        knl = split.split_loopy_arrays(knl)
        ary = knl.args[0]
        # get limits object
        limits = None
        with NamedTemporaryFile(suffix='.yaml', mode='w') as temp:
            temp.write("""
                       alloc:
                          # some huge number such that this isn't the limiting factor
                          {0}
                       global:
                          {0}
                       """.format(str(np.iinfo(dtype).max * 10),
                                  str(np.iinfo(dtype).max * 10)))
            temp.seek(0)
            limits = memory_limits.get_limits(
                opt, {memory_type.m_global: [ary]},
                temp.name,
                memory_manager.get_string_strides()[0],
                dtype=dtype,
                limit_int_overflow=True)
        # and feed through stride limiter
        limit = limits.integer_limited_problem_size(ary, dtype=dtype)
        # get the intruction from the kernel
        knl = lp.generate_code_v2(knl).device_code()
        # regex the array indexing out
        index = extractor.search(knl).group(1)
        # sub out 'i', 'j' and 'problem_size'
        repl = {
            'i': str(dim_size - 1),
            'j': str(limit - 1),
            'problem_size': str(limit)
        }
        pattern = re.compile(r'\b(' + '|'.join(repl.keys()) + r')\b')
        index = pattern.sub(lambda x: repl[x.group()], index)
        index = re.sub('/', '//', index)
        max_index = parse(index)
        assert isinstance(max_index, (int, float))
        assert max_index < np.iinfo(dtype).max

        # finally, test that we get the same limit from can_fit
        assert limit == limits.can_fit(mtype=memory_type.m_global)
Exemple #12
0
def test_get_comparable_nosplit(ndim, sparse):
    axis_size = 10
    # create array
    arr = np.arange(axis_size**ndim)
    arr = arr.reshape((axis_size, ) * ndim)

    if sparse:
        # set some array elements to zero to sparsify it
        choice = np.sort(np.random.choice(axis_size, 3, replace=False))
        choice1 = np.sort(np.random.choice(axis_size, 3, replace=False))
        for x1 in choice:
            for x2 in choice1:
                arr[:, x1, x2] = 0

    # create comparable object
    for i1, (masks, axes, tiling) in enumerate(compare_patterns(arr.shape)):
        comparable = get_comparable([masks], [arr],
                                    compare_axis=axes,
                                    tiling=tiling)

        namestore = None
        for i2, opts in enumerate(opts_loop(sparse=sparse)):
            kc = kernel_call('', arr, axes, masks)
            outv = arr.copy()
            if sparse and opts.jac_format == JacobianFormat.sparse:
                if csc_matrix is None:
                    raise SkipTest(
                        'Scipy required for sparse Jacobian testing')
                # get the appropriate matrix type
                matrix = csr_matrix if opts.order == 'C' else csc_matrix
                # get the sparse indicies
                matrix = matrix(arr[0, :, :])
                row, col = (matrix.indptr, matrix.indices) if opts.order == 'C' \
                    else (matrix.indices, matrix.indptr)
                # and get the sparse indicies in flat form
                matrix = coo_matrix(arr[0, :, :])
                flat_row, flat_col = matrix.row, matrix.col

                kc.input_args = {}
                kc.input_args['jac'] = arr.copy()
                namestore = type(
                    '', (object, ), {
                        'jac_row_inds': dummy_init(row),
                        'jac_col_inds': dummy_init(col),
                        'flat_jac_row_inds': dummy_init(flat_row),
                        'flat_jac_col_inds': dummy_init(flat_col)
                    })

                # and finally, sparsify array
                outv = sparsify(outv, col, row, opts.order)

            asplit = array_splitter(opts)
            kc.set_state(asplit,
                         order=opts.order,
                         namestore=namestore,
                         jac_format=opts.jac_format)

            outv = asplit.split_numpy_arrays(outv.copy())[0]
            outv = comparable(kc, outv, 0, False)
            ansv = comparable(kc, kc.transformed_ref_ans[0].copy(), 0, True)

            assert np.array_equal(outv, ansv)
Exemple #13
0
def test_get_split_elements(opts):
    # create opts
    asplit = array_splitter(opts)

    def __test(shape, check_inds=None, check_axes=None, tiling=True):
        # make a dummy array
        arr = np.arange(1, np.prod(shape) + 1).reshape(shape)
        # split
        split_arr = asplit.split_numpy_arrays(arr)[0]

        if check_inds is None:
            assert tiling
            # create the indicies to check
            check_inds = tuple(np.arange(x) for x in shape)
            check_axes = tuple(range(len(shape)))
            ans = arr.flatten(opts.order)
        elif tiling:
            assert check_axes is not None
            assert check_inds is not None
            ans = kernel_call('', arr,
                              check_axes, [check_inds])._get_comparable(
                                  arr, 0, True).flatten(opts.order)
        else:
            slicer = [slice(None)] * arr.ndim
            assert all(check_inds[0].size == ci.size for ci in check_inds[1:])
            for i, ax in enumerate(check_axes):
                slicer[ax] = check_inds[i]
            ans = arr[tuple(slicer)].flatten(opts.order)

        # and compare to the old (unsplit) matrix
        assert np.allclose(
            get_split_elements(split_arr,
                               asplit,
                               arr.shape,
                               check_inds,
                               check_axes,
                               tiling=tiling), ans)

    # test with small square
    __test((10, 10))

    # now test with evenly sized
    __test((16, 16))

    # finally, try with 3d arrays
    __test((10, 10, 10))
    # and some non-full check-inds / axes
    __test((10, 10, 10), [np.arange(3, 7), np.arange(2, 4)], (0, 1))
    __test((10, 10, 10), [np.arange(3, 7), np.arange(2, 4)], (1, 2))
    __test((10, 10, 10), [np.arange(3, 7), np.arange(2, 4)], (0, 2))
    __test((10, 10, 10), [np.arange(3, 7), np.arange(2, 4)], (0, 1))
    __test((16, 16, 16))
    __test((16, 16, 16), [np.arange(3, 7), np.arange(2, 4)], (1, 2))
    __test((16, 16, 16), [np.arange(3, 7), np.arange(2, 4)], (0, 2))
    __test((16, 16, 16), [np.arange(3, 7), np.arange(2, 4)], (0, 1))
    # and some non-tiled axes
    __test((10, 10, 10),
           [np.arange(0, 4), np.arange(3, 7),
            np.arange(2, 6)], (0, 1, 2),
           tiling=False)
    __test((16, 16, 16),
           [np.arange(0, 4), np.arange(3, 7),
            np.arange(2, 6)], (0, 1, 2),
           tiling=False)
    __test((16, 16, 16),
           [np.arange(0, 4), np.arange(3, 7),
            np.arange(2, 6)], (0, 1, 2),
           tiling=False)

    # try with a really large array
    __test((100000, 16, 16),
           [np.arange(3, 50000),
            np.arange(2, 10),
            np.array([7])], (0, 1, 2))
    __test((100000, 16, 16),
           [np.arange(0, 4), np.arange(3, 7),
            np.arange(2, 6)], (0, 1, 2),
           tiling=False)
Exemple #14
0
def test_strided_copy(state):
    lang = state['lang']
    order = state['order']
    depth = state['depth']
    width = state['width']

    # cleanup
    clean_dir(build_dir)
    clean_dir(obj_dir)
    clean_dir(lib_dir)

    # create
    utils.create_dir(build_dir)
    utils.create_dir(obj_dir)
    utils.create_dir(lib_dir)

    vec_size = depth if depth else (width if width else 0)
    # set max per run such that we will have a non-full run (1024 - 1008)
    # this should also be evenly divisible by depth and width
    # (as should the non full run)
    max_per_run = 16
    # number of ics should be divisibly by depth and width
    ics = max_per_run * 8 + vec_size
    if vec_size:
        assert ics % vec_size == 0
        assert max_per_run % vec_size == 0
        assert int(np.floor(ics / max_per_run) * max_per_run) % vec_size == 0
    dtype = np.dtype('float64')

    # create test arrays
    def __create(shape):
        if not isinstance(shape, tuple):
            shape = (shape, )
        shape = (ics, ) + shape
        arr = np.zeros(shape, dtype=dtype, order=order)
        arr.flat[:] = np.arange(np.prod(shape))
        return arr

    arrays = [
        __create(16),
        __create(10),
        __create(20),
        __create((20, 20)),
        __create(())
    ]
    const = [np.arange(10, dtype=dtype)]
    lp_arrays = [lp.GlobalArg('a{}'.format(i), shape=('problem_size',) + a.shape[1:],
                              order=order, dtype=(arrays + const)[i].dtype)
                 for i, a in enumerate(arrays)] + \
                [lp.TemporaryVariable('a{}'.format(i + len(arrays)), dtype=dtype,
                 order=order, initializer=const[i], read_only=True,
                 shape=const[i].shape) for i in range(len(const))]
    const = lp_arrays[len(arrays):]

    dtype = 'double'

    # create array splitter
    opts = type('', (object, ), {
        'width': width,
        'depth': depth,
        'order': order,
        'lang': lang
    })
    asplit = array_splitter(opts)

    # split numpy
    arrays = asplit.split_numpy_arrays(arrays)
    # make dummy knl
    knl = lp.make_kernel(
        '{[i]: 0 <= i <= 1}', """
                            if i > 1
                                a0[i, i] = 0
                                a1[i, i] = 0
                                a2[i, i] = 0
                                a3[i, i, i] = 0
                                a4[i] = 0
                                <> k = a5[i]
                            end
                         """, lp_arrays)
    # split loopy
    lp_arrays = asplit.split_loopy_arrays(knl).args

    # now create a simple library
    mem = memory_manager(opts.lang,
                         opts.order,
                         asplit._have_split(),
                         dev_type=state['device_type'],
                         strided_c_copy=lang == 'c')
    mem.add_arrays([x for x in lp_arrays],
                   in_arrays=[x.name for x in lp_arrays if x not in const],
                   out_arrays=[x.name for x in lp_arrays if x not in const],
                   host_constants=const)

    # create "kernel"
    size_type = 'int'
    lang_headers = []
    if lang == 'opencl':
        lang_headers.extend([
            '#include "memcpy_2d.oclh"', '#include "vectorization.oclh"',
            '#include <CL/cl.h>', '#include "ocl_errorcheck.oclh"'
        ])
        size_type = 'cl_uint'
    elif lang == 'c':
        lang_headers.extend(
            ['#include "memcpy_2d.h"', '#include "error_check.h"'])

    # kernel must copy in and out, using the mem_manager's format
    knl = Template("""
    for (size_t offset = 0; offset < problem_size; offset += per_run)
    {
        ${type} this_run = problem_size - offset < per_run ? \
            problem_size - offset : per_run;
        /* Memory Transfers into the kernel, if any */
        ${mem_transfers_in}

        /* Memory Transfers out */
        ${mem_transfers_out}
    }
    """).safe_substitute(type=size_type,
                         mem_transfers_in=mem._mem_transfers(
                             to_device=True, host_postfix='_save'),
                         mem_transfers_out=mem.get_mem_transfers_out(),
                         problem_size=ics)

    # create the host memory allocations
    host_names = ['h_' + arr.name for arr in lp_arrays]
    host_allocs = mem.get_mem_allocs(True, host_postfix='')

    # device memory allocations
    device_allocs = mem.get_mem_allocs(False)

    # copy to save for test
    host_name_saves = ['h_' + a.name + '_save' for a in lp_arrays]
    host_const_allocs = mem.get_host_constants()
    host_copies = [
        Template("""
        ${type} ${save} [${size}] = {${vals}};
        memset(${host}, 0, ${size} * sizeof(${type}));
        """).safe_substitute(save='h_' + lp_arrays[i].name + '_save',
                             host='h_' + lp_arrays[i].name,
                             size=arrays[i].size,
                             vals=', '.join(
                                 [str(x) for x in arrays[i].flatten()]),
                             type=dtype) for i in range(len(arrays))
    ]

    # and finally checks
    check_template = Template("""
        for(int i = 0; i < ${size}; ++i)
        {
            assert(${host}[i] == ${save}[i]);
        }
    """)
    checks = [
        check_template.safe_substitute(host=host_names[i],
                                       save=host_name_saves[i],
                                       size=arrays[i].size)
        for i in range(len(arrays))
    ]

    # and preambles
    ocl_preamble = """
    double* temp_d;
    int* temp_i;
    // create a context / queue
    int lim = 10;
    cl_uint num_platforms;
    cl_uint num_devices;
    cl_platform_id platform [lim];
    cl_device_id device [lim];
    cl_int return_code;
    cl_context context;
    cl_command_queue queue;
    check_err(clGetPlatformIDs(lim, platform, &num_platforms));
    for (int i = 0; i < num_platforms; ++i)
    {
        check_err(clGetDeviceIDs(platform[i], CL_DEVICE_TYPE_ALL, lim, device,
            &num_devices));
        if(num_devices > 0)
            break;
    }
    context = clCreateContext(NULL, 1, &device[0], NULL, NULL, &return_code);
    check_err(return_code);

    //create queue
    queue = clCreateCommandQueue(context, device[0], 0, &return_code);
    check_err(return_code);
    """
    preamble = ''
    if lang == 'opencl':
        preamble = ocl_preamble

    end = ''
    if lang == 'opencl':
        end = """
        check_err(clFlush(queue));
        check_err(clReleaseCommandQueue(queue));
        check_err(clReleaseContext(context));
    """

    file_src = Template("""
${lang_headers}
#include <stdlib.h>
#include <string.h>
#include <assert.h>


void main()
{
    ${preamble}

    double zero [${max_dim}] = {0};

    ${size_type} problem_size = ${problem_size};
    ${size_type} per_run = ${max_per_run};

    ${host_allocs}
    ${host_const_allocs}
    ${mem_declares}
    ${device_allocs}

    ${mem_saves}

    ${host_constant_copy}

    ${knl}

    ${checks}

    ${end}

    exit(0);
}
    """).safe_substitute(lang_headers='\n'.join(lang_headers),
                         mem_declares=mem.get_defns(),
                         host_allocs=host_allocs,
                         host_const_allocs=host_const_allocs,
                         device_allocs=device_allocs,
                         mem_saves='\n'.join(host_copies),
                         host_constant_copy=mem.get_host_constants_in(),
                         checks='\n'.join(checks),
                         knl=knl,
                         preamble=preamble,
                         end=end,
                         size_type=size_type,
                         max_per_run=max_per_run,
                         problem_size=ics,
                         max_dim=max([x.size for x in arrays]))

    # write file
    fname = os.path.join(build_dir, 'test' + utils.file_ext[lang])
    with open(fname, 'w') as file:
        file.write(file_src)
    files = [fname]

    # write aux
    write_aux(build_dir, opts, [], [])

    # copy any deps
    def __copy_deps(lang,
                    scan_path,
                    out_path,
                    change_extension=True,
                    ffilt=None):
        deps = [
            x for x in os.listdir(scan_path)
            if os.path.isfile(os.path.join(scan_path, x))
            and not x.endswith('.in')
        ]
        if ffilt is not None:
            deps = [x for x in deps if ffilt in x]
        files = []
        for dep in deps:
            dep_dest = dep
            dep_is_header = dep.endswith(utils.header_ext[lang])
            ext = (utils.file_ext[lang]
                   if not dep_is_header else utils.header_ext[lang])
            if change_extension and not dep.endswith(ext):
                dep_dest = dep[:dep.rfind('.')] + ext
            shutil.copyfile(os.path.join(scan_path, dep),
                            os.path.join(out_path, dep_dest))
            if not dep_is_header:
                files.append(os.path.join(out_path, dep_dest))
        return files

    scan = os.path.join(script_dir, os.pardir, 'kernel_utils', lang)
    files += __copy_deps(lang, scan, build_dir)
    scan = os.path.join(script_dir, os.pardir, 'kernel_utils', 'common')
    files += __copy_deps(host_langs[lang],
                         scan,
                         build_dir,
                         change_extension=False,
                         ffilt='memcpy_2d')

    # build
    files = [
        file_struct(lang, lang, f[:f.rindex('.')], [build_dir], [], build_dir,
                    obj_dir, True, True) for f in files
    ]
    assert not any(compiler(x) for x in files)
    lib = libgen(lang, obj_dir, lib_dir, [x.filename for x in files], True,
                 False, True)
    lib = os.path.join(lib_dir, lib)
    # and run
    subprocess.check_call(lib)
Exemple #15
0
    def test_read_initial_conditions(self):
        build_dir = self.store.build_dir
        obj_dir = self.store.obj_dir
        lib_dir = self.store.lib_dir
        setup = test_utils.get_read_ics_source()
        utils.create_dir(build_dir)
        utils.create_dir(obj_dir)
        utils.create_dir(lib_dir)
        oploop = OptionLoop(
            OrderedDict([
                # no need to test conv
                ('conp', [True]),
                ('order', ['C', 'F']),
                ('depth', [4, None]),
                ('width', [4, None]),
                ('lang', ['c'])
            ]))
        for state in oploop:
            if state['depth'] and state['width']:
                continue
            self.__cleanup(False)
            # create dummy loopy opts
            opts = type('', (object, ), state)()
            asplit = array_splitter(opts)

            # get source
            path = os.path.realpath(
                os.path.join(self.store.script_dir, os.pardir, 'kernel_utils',
                             'common', 'read_initial_conditions.c.in'))

            with open(path, 'r') as file:
                ric = Template(file.read())
            # subs
            ric = ric.safe_substitute(mechanism='mechanism.h',
                                      vectorization='vectorization.h')
            # write
            with open(os.path.join(build_dir, 'read_initial_conditions.c'),
                      'w') as file:
                file.write(ric)
            # write header
            write_aux(build_dir, opts, self.store.specs, self.store.reacs)
            # write setup
            with open(os.path.join(build_dir, 'setup.py'), 'w') as file:
                file.write(setup.safe_substitute(buildpath=build_dir))
            # copy read ics header to final dest
            shutil.copyfile(
                os.path.join(self.store.script_dir, os.pardir, 'kernel_utils',
                             'common', 'read_initial_conditions.h'),
                os.path.join(build_dir, 'read_initial_conditions.h'))
            # copy wrapper
            shutil.copyfile(
                os.path.join(self.store.script_dir, 'test_utils',
                             'read_ic_wrapper.pyx'),
                os.path.join(build_dir, 'read_ic_wrapper.pyx'))
            # setup
            python_str = 'python{}.{}'.format(sys.version_info[0],
                                              sys.version_info[1])
            call = [
                python_str,
                os.path.join(build_dir, 'setup.py'), 'build_ext',
                '--build-lib', lib_dir
            ]
            subprocess.check_call(call)
            # copy in tester
            shutil.copyfile(
                os.path.join(self.store.script_dir, 'test_utils',
                             'ric_tester.py'),
                os.path.join(lib_dir, 'ric_tester.py'))

            # For simplicity (and really, lack of need) we test CONP only
            # hence, the extra variable is the volume, while the fixed parameter
            # is the pressure

            # save phi, param in correct order
            phi = (self.store.phi_cp if opts.conp else self.store.phi_cv)
            save_phi, = asplit.split_numpy_arrays(phi)
            save_phi = save_phi.flatten(opts.order)
            param = self.store.P if opts.conp else self.store.V
            save_phi.tofile(os.path.join(lib_dir, 'phi_test.npy'))
            param.tofile(os.path.join(lib_dir, 'param_test.npy'))

            # save bin file
            out_file = np.concatenate(
                (
                    np.reshape(phi[:, 0], (-1, 1)),  # temperature
                    np.reshape(param, (-1, 1)),  # param
                    phi[:, 1:]),
                axis=1  # species
            )
            out_file = out_file.flatten('K')
            with open(os.path.join(lib_dir, 'data.bin'), 'wb') as file:
                out_file.tofile(file)

            # and run
            subprocess.check_call([
                python_str,
                os.path.join(lib_dir, 'ric_tester.py'), opts.order,
                str(self.store.test_size)
            ])