コード例 #1
0
ファイル: transformer.py プロジェクト: sujathakestur/devito
def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat,
                        accessible_origin, par_to_ops_stencil, dims):
    it_range = []
    for tree in trees:
        if isinstance(tree, IterationTree):
            for bounds in [it.bounds() for it in tree]:
                it_range.extend(bounds)

    range_array = Array(name='%s_range' % ops_kernel.name,
                        dimensions=(DefaultDimension(
                            name='range', default_value=len(it_range)), ),
                        dtype=np.int32,
                        scope='stack')

    range_array_init = Expression(
        ClusterizedEq(Eq(range_array, ListInitializer(it_range))))

    ops_par_loop_call = Call(namespace['ops_par_loop'], [
        Literal(ops_kernel.name),
        Literal('"%s"' % ops_kernel.name), block, dims, range_array, *[
            create_ops_arg(p, accessible_origin, name_to_ops_dat,
                           par_to_ops_stencil) for p in parameters
        ]
    ])

    return [range_array_init], ops_par_loop_call
コード例 #2
0
ファイル: test_ops.py プロジェクト: gyc567/devito
    def test_create_ops_dat_function(self):
        grid = Grid(shape=(4))

        u = Function(name='u', grid=grid, space_order=2)

        block = OpsBlock('block')

        name_to_ops_dat = {}

        result = create_ops_dat(u, name_to_ops_dat, block)

        assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name)
        assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type']

        assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name)
        assert result[0].expr.rhs.params == (Integer(4), )

        assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name)
        assert result[1].expr.rhs.params == (Zero(), )

        assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name)
        assert result[2].expr.rhs.params == (Integer(2), )

        assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name)
        assert result[3].expr.rhs.params == (Integer(-2), )

        assert result[4].expr.lhs == name_to_ops_dat['u']
        assert type(result[4].expr.rhs) == namespace['ops_decl_dat']
        assert result[4].expr.rhs.args == (
            block, 1, Symbol(namespace['ops_dat_dim'](u.name)),
            Symbol(namespace['ops_dat_base'](u.name)),
            Symbol(namespace['ops_dat_d_m'](u.name)),
            Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify(
                (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"'))
コード例 #3
0
def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat,
                        accessible_origin, par_to_ops_stencil, dims):
    it_range = []
    devito_to_ops_indexer = 1
    for tree in trees:
        if isinstance(tree, IterationTree):
            for i in tree:
                it_range.extend(
                    [i.symbolic_min, i.symbolic_max + devito_to_ops_indexer])

    range_array = Array(name='%s_range' % ops_kernel.name,
                        dimensions=(DefaultDimension(
                            name='range', default_value=len(it_range)), ),
                        dtype=np.int32,
                        scope='stack')

    range_array_init = Expression(
        ClusterizedEq(Eq(range_array, ListInitializer(it_range))))

    ops_args = []
    for p in parameters:
        ops_arg = create_ops_arg(p, accessible_origin, name_to_ops_dat,
                                 par_to_ops_stencil)

        ops_args.append(
            ops_arg.ops_type(ops_arg.ops_name, ops_arg.elements_per_point,
                             ops_arg.dtype, ops_arg.rw_flag))

    ops_par_loop_call = Call(namespace['ops_par_loop'], [
        Literal(ops_kernel.name),
        Literal('"%s"' % ops_kernel.name), block, dims, range_array, *ops_args
    ])

    return [range_array_init], ops_par_loop_call
コード例 #4
0
ファイル: transformer.py プロジェクト: smartalecH/devito
def create_ops_arg(p, name_to_ops_dat, par_to_ops_stencil):
    if p.is_Constant:
        return namespace['ops_arg_gbl'](
            Byref(Constant(name=p.name[1:])), 1,
            Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read'])
    else:
        return namespace['ops_arg_dat'](
            name_to_ops_dat[p.name], 1, par_to_ops_stencil[p],
            Literal('"%s"' % dtype_to_cstr(p.dtype)),
            namespace['ops_read'] if p.read_only else namespace['ops_write'])
コード例 #5
0
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        ops_block = OpsBlock('block')

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for section, trees in find_affine_trees(iet).items():
            dims.append(len(trees[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(trees[0].root))
            symbols -= set(FindSymbols('defines').visit(trees[0].root))
            to_dat |= symbols

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block))

        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            pre_loop, ops_kernel = opsit(trees, n)

            pre_time_loop.extend(pre_loop)
            self._ops_kernels.append(ops_kernel)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.append('stdio.h')

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            ops_exit
        ]

        return List(body=body)
コード例 #6
0
def create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil):
    elements_per_point = 1
    dtype = Literal('"%s"' % dtype_to_cstr(p.dtype))

    if p.is_Constant:
        ops_type = namespace['ops_arg_gbl']
        ops_name = Byref(Constant(name=p.name[1:]))
        rw_flag = namespace['ops_read']
    else:
        ops_type = namespace['ops_arg_dat']
        accessible_info = accessible_origin[p.name]
        ops_name = name_to_ops_dat[p.name] \
            if accessible_info.time is None \
            else name_to_ops_dat[accessible_info.origin_name].\
            indexify([Add(accessible_info.time, accessible_info.shift)])
        rw_flag = namespace['ops_read'] if p.read_only else namespace[
            'ops_write']

    ops_arg = OpsArgDecl(ops_type=ops_type,
                         ops_name=ops_name,
                         elements_per_point=elements_per_point,
                         dtype=dtype,
                         rw_flag=rw_flag)

    return ops_arg
コード例 #7
0
def to_ops_stencil(param, accesses):
    dims = len(accesses[0])
    pts = len(accesses)
    stencil_name = namespace['ops_stencil_name'](dims, param.name, pts)

    stencil_array = Array(
        name=stencil_name,
        dimensions=(DefaultDimension(name='len', default_value=dims * pts), ),
        dtype=np.int32,
    )

    ops_stencil = OpsStencil(stencil_name.upper())

    return ops_stencil, [
        Expression(
            ClusterizedEq(
                Eq(stencil_array,
                   ListInitializer(list(itertools.chain(*accesses)))))),
        Expression(
            ClusterizedEq(
                Eq(
                    ops_stencil, namespace['ops_decl_stencil'](
                        dims, pts, Symbol(stencil_array.name),
                        Literal('"%s"' % stencil_name.upper())))))
    ]
コード例 #8
0
ファイル: transformer.py プロジェクト: sujathakestur/devito
def create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil):
    if p.is_Constant:
        return namespace['ops_arg_gbl'](
            Byref(Constant(name=p.name[1:])), 1,
            Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read'])
    else:
        accessible_info = accessible_origin[p.name]

        dat_name = name_to_ops_dat[p.name] \
            if accessible_info.time is None \
            else name_to_ops_dat[accessible_info.origin_name].\
            indexify([accessible_info.time])

        return namespace['ops_arg_dat'](
            dat_name, 1, par_to_ops_stencil[p],
            Literal('"%s"' % dtype_to_cstr(p.dtype)),
            namespace['ops_read'] if p.read_only else namespace['ops_write'])
コード例 #9
0
ファイル: test_ops.py プロジェクト: smartalecH/devito
        def test_create_ops_arg_constant(self):
            a = Constant(name='*a')

            res = create_ops_arg(a, {}, {})

            assert res.name == namespace['ops_arg_gbl'].name
            assert res.args == [
                Byref(Constant(name='a')), 1,
                Literal('"%s"' % dtype_to_cstr(a.dtype)), namespace['ops_read']
            ]
コード例 #10
0
    def test_create_ops_arg_constant(self):
        a = Constant(name='*a')

        ops_arg = create_ops_arg(a, {}, {}, {})

        assert ops_arg.ops_type == namespace['ops_arg_gbl']
        assert str(ops_arg.ops_name) == str(Byref(Constant(name='a')))
        assert ops_arg.elements_per_point == 1
        assert ops_arg.dtype == Literal('"%s"' % dtype_to_cstr(a.dtype))
        assert ops_arg.rw_flag == namespace['ops_read']
コード例 #11
0
ファイル: test_ops.py プロジェクト: gyc567/devito
    def test_create_ops_arg_constant(self):
        a = Constant(name='*a')

        res = create_ops_arg(a, {}, {}, {})

        assert type(res) == namespace['ops_arg_gbl']
        assert str(res.args[0]) == str(Byref(Constant(name='a')))
        assert res.args[1] == 1
        assert res.args[2] == Literal('"%s"' % dtype_to_cstr(a.dtype))
        assert res.args[3] == namespace['ops_read']
コード例 #12
0
ファイル: test_ops.py プロジェクト: gyc567/devito
    def test_create_ops_arg_function(self, read):

        u = OpsAccessible('u', np.float32, read)
        dat = OpsDat('u_dat')
        stencil = OpsStencil('stencil')
        info = AccessibleInfo(u, None, None)

        res = create_ops_arg(u, {'u': info}, {'u': dat}, {u: stencil})

        assert type(res) == namespace['ops_arg_dat']
        assert res.args == (dat, 1, stencil,
                            Literal('"%s"' % dtype_to_cstr(u.dtype)),
                            namespace['ops_read']
                            if read else namespace['ops_write'])
コード例 #13
0
    def test_create_ops_arg_function(self, read):

        u = OpsAccessible('u', dtype=np.float32, read_only=read)
        dat = OpsDat('u_dat')
        stencil = OpsStencil('stencil')
        info = AccessibleInfo(u, None, None)

        ops_arg = create_ops_arg(u, {'u': info}, {'u': dat}, {u: stencil})

        assert ops_arg.ops_type == namespace['ops_arg_dat']
        assert ops_arg.ops_name == OpsDat('u_dat')
        assert ops_arg.elements_per_point == 1
        assert ops_arg.dtype == Literal('"%s"' % dtype_to_cstr(u.dtype))
        assert ops_arg.rw_flag == \
            namespace['ops_read'] if read else namespace['ops_write']
コード例 #14
0
ファイル: test_ops.py プロジェクト: gyc567/devito
    def test_to_ops_stencil(self, _accesses):
        param = Symbol('foo')
        accesses = eval(_accesses)

        stencil_name = 's2d_foo_%spt' % len(accesses)

        stencil, result = to_ops_stencil(param, accesses)

        assert stencil.name == stencil_name.upper()

        assert result[0].expr.lhs.name == stencil_name
        assert result[0].expr.rhs.params == tuple(itertools.chain(*accesses))

        assert result[1].expr.lhs == stencil
        assert type(result[1].expr.rhs) == namespace['ops_decl_stencil']
        assert result[1].expr.rhs.args == (2, len(accesses),
                                           Symbol(stencil_name),
                                           Literal('"%s"' %
                                                   stencil_name.upper()))
コード例 #15
0
ファイル: test_ops.py プロジェクト: smartalecH/devito
    def test_create_ops_dat_function(self):
        grid = Grid(shape=(4))

        u = Function(name='u', grid=grid, space_order=2)

        block = OpsBlock('block')

        name_to_ops_dat = {}

        result = create_ops_dat(u, name_to_ops_dat, block)

        assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name)
        assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type']

        assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name)
        assert result[0].expr.rhs.params == (Integer(4), )

        assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name)
        assert result[1].expr.rhs.params == (Zero(), )

        assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name)
        assert result[2].expr.rhs.params == (Integer(2), )

        assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name)
        assert result[3].expr.rhs.params == (Integer(-2), )

        assert result[4].expr.lhs == name_to_ops_dat['u']
        assert result[4].expr.rhs.name == namespace['ops_decl_dat'].name
        assert result[4].expr.rhs.args == (
            block, 1, Symbol(namespace['ops_dat_dim'](u.name)),
            Symbol(namespace['ops_dat_base'](u.name)),
            Symbol(namespace['ops_dat_d_m'](u.name)),
            Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify(
                (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"'))

        def test_create_ops_arg_constant(self):
            a = Constant(name='*a')

            res = create_ops_arg(a, {}, {})

            assert res.name == namespace['ops_arg_gbl'].name
            assert res.args == [
                Byref(Constant(name='a')), 1,
                Literal('"%s"' % dtype_to_cstr(a.dtype)), namespace['ops_read']
            ]

        @pytest.mark.parametrize('read', [True, False])
        def test_create_ops_arg_function(self, read):
            u = OpsAccessible('u', np.float32, read)

            dat = OpsDat('u_dat')
            stencil = OpsStencil('stencil')

            res = create_ops_arg(u, {'u': dat}, {u: stencil})

            assert res.name == namespace['ops_arg_dat'].name
            assert res.args == [
                dat, 1, stencil,
                Literal('"%s"' % dtype_to_cstr(u.dtype)),
                namespace['ops_read'] if read else namespace['ops_write']
            ]
コード例 #16
0
ファイル: transformer.py プロジェクト: smartalecH/devito
def create_ops_dat(f, name_to_ops_dat, block):
    ndim = f.ndim - (1 if f.is_TimeFunction else 0)

    dim = Array(name=namespace['ops_dat_dim'](f.name),
                dimensions=(DefaultDimension(name='dim',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    base = Array(name=namespace['ops_dat_base'](f.name),
                 dimensions=(DefaultDimension(name='base',
                                              default_value=ndim), ),
                 dtype=np.int32,
                 scope='stack')
    d_p = Array(name=namespace['ops_dat_d_p'](f.name),
                dimensions=(DefaultDimension(name='d_p',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    d_m = Array(name=namespace['ops_dat_d_m'](f.name),
                dimensions=(DefaultDimension(name='d_m',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')

    res = []
    base_val = [Zero() for i in range(ndim)]

    # If f is a TimeFunction we need to create a ops_dat for each time stepping
    # variable (eg: t1, t2)
    if f.is_TimeFunction:
        time_pos = f._time_position
        time_index = f.indices[time_pos]
        time_dims = f.shape[time_pos]

        dim_shape = sympify(f.shape[:time_pos] + f.shape[time_pos + 1:])
        padding = f.padding[:time_pos] + f.padding[time_pos + 1:]
        halo = f.halo[:time_pos] + f.halo[time_pos + 1:]
        d_p_val = tuple(sympify([p[0] + h[0] for p, h in zip(padding, halo)]))
        d_m_val = tuple(
            sympify([-(p[1] + h[1]) for p, h in zip(padding, halo)]))

        ops_dat_array = Array(name=namespace['ops_dat_name'](f.name),
                              dimensions=(DefaultDimension(
                                  name='dat', default_value=time_dims), ),
                              dtype='ops_dat',
                              scope='stack')

        dat_decls = []
        for i in range(time_dims):
            name = '%s%s%s' % (f.name, time_index, i)
            name_to_ops_dat[name] = ops_dat_array.indexify(
                [Symbol('%s%s' % (time_index, i))])
            dat_decls.append(namespace['ops_decl_dat'](block, 1,
                                                       Symbol(dim.name),
                                                       Symbol(base.name),
                                                       Symbol(d_m.name),
                                                       Symbol(d_p.name),
                                                       Byref(f.indexify([i])),
                                                       Literal('"%s"' %
                                                               f._C_typedata),
                                                       Literal('"%s"' % name)))

        ops_decl_dat = Expression(
            ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls))))
    else:
        ops_dat = OpsDat("%s_dat" % f.name)
        name_to_ops_dat[f.name] = ops_dat

        d_p_val = tuple(
            sympify([p[0] + h[0] for p, h in zip(f.padding, f.halo)]))
        d_m_val = tuple(
            sympify([-(p[1] + h[1]) for p, h in zip(f.padding, f.halo)]))
        dim_shape = sympify(f.shape)

        ops_decl_dat = Expression(
            ClusterizedEq(
                Eq(
                    ops_dat,
                    namespace['ops_decl_dat'](block, 1, Symbol(dim.name),
                                              Symbol(base.name),
                                              Symbol(d_m.name),
                                              Symbol(d_p.name),
                                              Byref(f.indexify([0])),
                                              Literal('"%s"' % f._C_typedata),
                                              Literal('"%s"' % f.name)))))

    res.append(Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_shape)))))
    res.append(Expression(ClusterizedEq(Eq(base, ListInitializer(base_val)))))
    res.append(Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val)))))
    res.append(Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val)))))
    res.append(ops_decl_dat)

    return res
コード例 #17
0
def create_ops_dat(f, name_to_ops_dat, block):
    ndim = f.ndim - (1 if f.is_TimeFunction else 0)

    dim = Array(name=namespace['ops_dat_dim'](f.name),
                dimensions=(DefaultDimension(name='dim',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    base = Array(name=namespace['ops_dat_base'](f.name),
                 dimensions=(DefaultDimension(name='base',
                                              default_value=ndim), ),
                 dtype=np.int32,
                 scope='stack')
    d_p = Array(name=namespace['ops_dat_d_p'](f.name),
                dimensions=(DefaultDimension(name='d_p',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    d_m = Array(name=namespace['ops_dat_d_m'](f.name),
                dimensions=(DefaultDimension(name='d_m',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')

    base_val = [Zero() for i in range(ndim)]

    # If f is a TimeFunction we need to create a ops_dat for each time stepping
    # variable (eg: t1, t2)
    if f.is_TimeFunction:
        time_pos = f._time_position
        time_index = f.indices[time_pos]
        time_dims = f.shape[time_pos]

        dim_val = f.shape[:time_pos] + f.shape[time_pos + 1:]
        d_p_val = f._size_nodomain.left[time_pos + 1:]
        d_m_val = [-i for i in f._size_nodomain.right[time_pos + 1:]]

        ops_dat_array = Array(name=namespace['ops_dat_name'](f.name),
                              dimensions=(DefaultDimension(
                                  name='dat', default_value=time_dims), ),
                              dtype=namespace['ops_dat_type'],
                              scope='stack')

        dat_decls = []
        for i in range(time_dims):
            name = '%s%s%s' % (f.name, time_index, i)

            dat_decls.append(namespace['ops_decl_dat'](block, 1,
                                                       Symbol(dim.name),
                                                       Symbol(base.name),
                                                       Symbol(d_m.name),
                                                       Symbol(d_p.name),
                                                       Byref(f.indexify([i])),
                                                       Literal('"%s"' %
                                                               f._C_typedata),
                                                       Literal('"%s"' % name)))

        ops_decl_dat = Expression(
            ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls))))

        # Inserting the ops_dat array in case of TimeFunction.
        name_to_ops_dat[f.name] = ops_dat_array

    else:
        ops_dat = OpsDat("%s_dat" % f.name)
        name_to_ops_dat[f.name] = ops_dat

        dim_val = f.shape
        d_p_val = f._size_nodomain.left
        d_m_val = [-i for i in f._size_nodomain.right]

        ops_decl_dat = Expression(
            ClusterizedEq(
                Eq(
                    ops_dat,
                    namespace['ops_decl_dat'](block, 1, Symbol(dim.name),
                                              Symbol(base.name),
                                              Symbol(d_m.name),
                                              Symbol(d_p.name),
                                              Byref(f.indexify([0])),
                                              Literal('"%s"' % f._C_typedata),
                                              Literal('"%s"' % f.name)))))

    dim_val = Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_val))))
    base_val = Expression(ClusterizedEq(Eq(base, ListInitializer(base_val))))
    d_p_val = Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val))))
    d_m_val = Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val))))

    return OpsDatDecl(dim_val=dim_val,
                      base_val=base_val,
                      d_p_val=d_p_val,
                      d_m_val=d_m_val,
                      ops_decl_dat=ops_decl_dat)
コード例 #18
0
def make_ops_kernels(iet):
    warning("The OPS backend is still work-in-progress")

    affine_trees = find_affine_trees(iet).items()

    # If there is no affine trees, then there is no loop to be optimized using OPS.
    if not affine_trees:
        return iet, {}

    ops_init = Call(namespace['ops_init'], [0, 0, 2])
    ops_partition = Call(namespace['ops_partition'], Literal('""'))
    ops_exit = Call(namespace['ops_exit'])

    # Extract all symbols that need to be converted to ops_dat
    dims = []
    to_dat = set()
    for _, tree in affine_trees:
        dims.append(len(tree[0].dimensions))
        symbols = set(FindSymbols('symbolics').visit(tree[0].root))
        symbols -= set(FindSymbols('defines').visit(tree[0].root))
        to_dat |= symbols

    # Create the OPS block for this problem
    ops_block = OpsBlock('block')
    ops_block_init = Expression(
        ClusterizedEq(
            Eq(ops_block, namespace['ops_decl_block'](dims[0],
                                                      Literal('"block"')))))

    # To ensure deterministic code generation we order the datasets to
    # be generated (since a set is an unordered collection)
    to_dat = filter_sorted(to_dat)

    name_to_ops_dat = {}
    pre_time_loop = []
    after_time_loop = []
    for f in to_dat:
        if f.is_Constant:
            continue

        pre_time_loop.extend(
            list(create_ops_dat(f, name_to_ops_dat, ops_block)))
        # Copy data from device to host
        after_time_loop.extend(
            create_ops_fetch(f, name_to_ops_dat, f.grid.time_dim.extreme_max))

    # Generate ops kernels for each offloadable iteration tree
    mapper = {}
    ffuncs = []
    for n, (_, tree) in enumerate(affine_trees):
        pre_loop, ops_kernel, ops_par_loop_call = opsit(
            tree, n, name_to_ops_dat, ops_block, dims[0])

        pre_time_loop.extend(pre_loop)
        ffuncs.append(ops_kernel)
        mapper[tree[0].root] = ops_par_loop_call
        mapper.update({i.root: mapper.get(i.root) for i in tree})  # Drop trees

    iet = Transformer(mapper).visit(iet)

    assert (d == dims[0] for d in dims), \
        "The OPS backend currently assumes that all kernels \
        have the same number of dimensions"

    iet = iet._rebuild(body=flatten([
        ops_init, ops_block_init, pre_time_loop, ops_partition, iet.body,
        after_time_loop, ops_exit
    ]))

    return iet, {
        'includes': ['stdio.h', 'ops_seq.h'],
        'ffuncs': ffuncs,
        'headers': [namespace['ops_define_dimension'](dims[0])]
    }
コード例 #19
0
ファイル: operator.py プロジェクト: BrunoMot/devito
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        affine_trees = find_affine_trees(iet).items()

        # If there is no affine trees, then there is no loop to be optimized using OPS.
        if not affine_trees:
            return iet

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for _, tree in affine_trees:
            dims.append(len(tree[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(tree[0].root))
            symbols -= set(FindSymbols('defines').visit(tree[0].root))
            to_dat |= symbols

        # Create the OPS block for this problem
        ops_block = OpsBlock('block')
        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        after_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(
                list(create_ops_dat(f, name_to_ops_dat, ops_block)))
            # To return the result to Devito, it is necessary to copy the data
            # from the dat object back to the CPU memory.
            after_time_loop.extend(
                create_ops_fetch(f, name_to_ops_dat,
                                 self.time_dimension.extreme_max))

        # Generate ops kernels for each offloadable iteration tree
        mapper = {}
        for n, (_, tree) in enumerate(affine_trees):
            pre_loop, ops_kernel, ops_par_loop_call = opsit(
                tree, n, name_to_ops_dat, ops_block, dims[0])

            pre_time_loop.extend(pre_loop)
            self._func_table[namespace['ops_kernel_file'](ops_kernel.name)] = \
                MetaCall(ops_kernel, False)
            mapper[tree[0].root] = ops_par_loop_call
            mapper.update({i.root: mapper.get(i.root)
                           for i in tree})  # Drop trees

        iet = Transformer(mapper).visit(iet)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.extend(['stdio.h', 'ops_seq.h'])

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            *after_time_loop, ops_exit
        ]

        return List(body=body)