Esempio n. 1
0
 def _getkey(self, grid, dtype, dimensions=None):
     base = (configuration['platform'].isa, dtype)
     if grid is not None:
         dims = filter_sorted((grid.time_dim, grid.stepping_dim) + grid.dimensions)
         return base + (tuple(dims),)
     elif dimensions:
         dims = filter_sorted([i for i in dimensions if i.is_Space])
         return base + (tuple(dims),)
     else:
         return base + ((),)
Esempio n. 2
0
def detect_io(exprs, relax=False):
    """
    ``{exprs} -> ({reads}, {writes})``

    Parameters
    ----------
    exprs : expr-like or list of expr-like
        The searched expressions.
    relax : bool, optional
        If False, as by default, collect only Constants and Functions.
        Otherwise, collect any Basic object.
    """
    exprs = as_tuple(exprs)
    if relax is False:
        rule = lambda i: i.is_Input
    else:
        rule = lambda i: i.is_Scalar or i.is_Tensor

    # Don't forget this nasty case, with indirections on the LHS:
    # >>> u[t, a[x]] = f[x]  -> (reads={a, f}, writes={u})

    roots = []
    for i in exprs:
        try:
            roots.append(i.rhs)
            roots.extend(list(i.lhs.indices))
        except AttributeError:
            # E.g., FunctionFromPointer
            roots.append(i)

    reads = []
    terminals = flatten(retrieve_terminals(i, deep=True) for i in roots)
    for i in terminals:
        candidates = i.free_symbols
        try:
            candidates.update({i.function})
        except AttributeError:
            pass
        for j in candidates:
            try:
                if rule(j):
                    reads.append(j)
            except AttributeError:
                pass

    writes = []
    for i in exprs:
        try:
            f = i.lhs.function
        except AttributeError:
            continue
        if rule(f):
            writes.append(f)

    return filter_sorted(reads), filter_sorted(writes)
Esempio n. 3
0
 def __repr__(self):
     tracked = filter_sorted(set(self.reads) | set(self.writes), key=lambda i: i.name)
     maxlen = max(1, max([len(i.name) for i in tracked]))
     out = "{:>%d} =>  W : {}\n{:>%d}     R : {}" % (maxlen, maxlen)
     pad = " "*(maxlen + 9)
     reads = [self.getreads(i) for i in tracked]
     for i, r in enumerate(list(reads)):
         if not r:
             reads[i] = ''
             continue
         first = "%s" % tuple.__repr__(r[0])
         shifted = "\n".join("%s%s" % (pad, tuple.__repr__(j)) for j in r[1:])
         shifted = "%s%s" % ("\n" if shifted else "", shifted)
         reads[i] = first + shifted
     writes = [self.getwrites(i) for i in tracked]
     for i, w in enumerate(list(writes)):
         if not w:
             writes[i] = ''
             continue
         first = "%s" % tuple.__repr__(w[0])
         shifted = "\n".join("%s%s" % (pad, tuple.__repr__(j)) for j in w[1:])
         shifted = "%s%s" % ("\n" if shifted else "", shifted)
         writes[i] = '\033[1;37;31m%s\033[0m' % (first + shifted)
     return "\n".join([out.format(i.name, w, '', r)
                       for i, r, w in zip(tracked, reads, writes)])
Esempio n. 4
0
    def visit_Operator(self, o):
        # Kernel signature and body
        body = flatten(self._visit(i) for i in o.children)
        decls = self._args_decl(o.parameters)
        signature = c.FunctionDeclaration(c.Value(o.retval, o.name), decls)
        retval = [c.Statement("return 0")]
        kernel = c.FunctionBody(signature, c.Block(body + retval))

        # Elemental functions
        esigns = []
        efuncs = [blankline]
        for i in o._func_table.values():
            if i.local:
                esigns.append(c.FunctionDeclaration(c.Value(i.root.retval, i.root.name),
                                                    self._args_decl(i.root.parameters)))
                efuncs.extend([i.root.ccode, blankline])

        # Header files, extra definitions, ...
        header = [c.Line(i) for i in o._headers]
        includes = [c.Include(i, system=False) for i in o._includes]
        includes += [blankline]
        cdefs = [i._C_typedecl for i in o.parameters if i._C_typedecl is not None]
        cdefs = filter_sorted(cdefs, key=lambda i: i.tpname)
        if o._compiler.src_ext == 'cpp':
            cdefs += [c.Extern('C', signature)]
        cdefs = [i for j in cdefs for i in (j, blankline)]

        return c.Module(header + includes + cdefs +
                        esigns + [blankline, kernel] + efuncs)
Esempio n. 5
0
def dimension_sort(expr):
    """
    Topologically sort the :class:`Dimension`s in ``expr``, based on the order
    in which they appear within :class:`Indexed`s.
    """

    def handle_indexed(indexed):
        relation = []
        for i in indexed.indices:
            try:
                maybe_dim = split_affine(i).var
                if isinstance(maybe_dim, Dimension):
                    relation.append(maybe_dim)
            except ValueError:
                # Maybe there are some nested Indexeds (e.g., the situation is A[B[i]])
                nested = flatten(handle_indexed(n) for n in retrieve_indexed(i))
                if nested:
                    relation.extend(nested)
                else:
                    # Fallback: Just insert all the Dimensions we find, regardless of
                    # what the user is attempting to do
                    relation.extend([d for d in filter_sorted(i.free_symbols)
                                     if isinstance(d, Dimension)])
        return tuple(relation)

    relations = {handle_indexed(i) for i in retrieve_indexed(expr, mode='all')}

    # Add in any implicit dimension (typical of scalar temporaries, or Step)
    relations.add(expr.implicit_dims)

    # Add in leftover free dimensions (not an Indexed' index)
    extra = set([i for i in expr.free_symbols if isinstance(i, Dimension)])

    # Add in pure data dimensions (e.g., those accessed only via explicit values,
    # such as A[3])
    indexeds = retrieve_indexed(expr, deep=True)
    extra.update(set().union(*[set(i.function.indices) for i in indexeds]))

    # Enforce determinism
    extra = filter_sorted(extra, key=attrgetter('name'))

    # Add in implicit relations for parent dimensions
    # -----------------------------------------------
    # 1) Note that (d.parent, d) is what we want, while (d, d.parent) would be
    # wrong; for example, in `((t, time), (t, x, y), (x, y))`, `x` could now
    # preceed `time`, while `t`, and therefore `time`, *must* appear before `x`,
    # as indicated by the second relation
    implicit_relations = {(d.parent, d) for d in extra if d.is_Derived}
    # 2) To handle cases such as `((time, xi), (x,))`, where `xi` a SubDimension
    # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
    # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
    # `(x, time, xi)` might be returned instead, which would be non-sense
    implicit_relations.update({tuple(d.root for d in i) for i in relations})

    ordering = PartialOrderTuple(extra, relations=(relations | implicit_relations))

    return ordering
Esempio n. 6
0
def detect_io(exprs, relax=False):
    """``{exprs} -> ({reads}, {writes})

    :param exprs: The expressions inspected.
    :param relax: (Optional) if False, as by default, collect only
                  :class:`Constant`s and :class:`Function`s. Otherwise,
                  collect any :class:`Basic`s.
    """
    exprs = as_tuple(exprs)
    if relax is False:
        rule = lambda i: i.is_Input
    else:
        rule = lambda i: i.is_Scalar or i.is_Tensor

    reads = []
    for i in flatten(retrieve_terminals(i, deep=True) for i in exprs):
        candidates = i.free_symbols
        try:
            candidates.update({i.base.function})
        except AttributeError:
            pass
        for j in candidates:
            try:
                if rule(j):
                    reads.append(j)
            except AttributeError:
                pass

    writes = []
    for i in exprs:
        try:
            f = i.lhs.base.function
        except AttributeError:
            continue
        if rule(f):
            writes.append(f)

    return filter_sorted(reads), filter_sorted(writes)
Esempio n. 7
0
    def place_ondevice(self, iet, **kwargs):
        efuncs = kwargs['efuncs']

        storage = Storage()

        if iet.is_ElementalFunction:
            return iet, {}

        # Collect written and read-only symbols
        writes = set()
        reads = set()
        for efunc in efuncs:
            for i, v in MapExprStmts().visit(efunc).items():
                if not i.is_Expression:
                    # No-op
                    continue
                if not any(
                        isinstance(j, self._Parallelizer._Iteration)
                        for j in v):
                    # Not an offloaded Iteration tree
                    continue
                if i.write.is_DiscreteFunction:
                    writes.add(i.write)
                reads = (reads | {r
                                  for r in i.reads
                                  if r.is_DiscreteFunction}) - writes

        # Update `storage`
        for i in filter_sorted(writes):
            self._map_function_on_high_bw_mem(i, storage)
        for i in filter_sorted(reads):
            self._map_function_on_high_bw_mem(i, storage, read_only=True)

        iet = self._dump_storage(iet, storage)

        return iet, {}
Esempio n. 8
0
 def handle_indexed(indexed):
     relation = []
     for i in indexed.indices:
         try:
             maybe_dim = split_affine(i).var
             if isinstance(maybe_dim, Dimension):
                 relation.append(maybe_dim)
         except ValueError:
             # Maybe there are some nested Indexeds (e.g., the situation is A[B[i]])
             nested = flatten(handle_indexed(n) for n in retrieve_indexed(i))
             if nested:
                 relation.extend(nested)
             else:
                 # Fallback: Just insert all the Dimensions we find, regardless of
                 # what the user is attempting to do
                 relation.extend([d for d in filter_sorted(i.free_symbols)
                                  if isinstance(d, Dimension)])
     return tuple(relation)
Esempio n. 9
0
 def handle_indexed(indexed):
     relation = []
     for i in indexed.indices:
         try:
             maybe_dim = split_affine(i).var
             if isinstance(maybe_dim, Dimension):
                 relation.append(maybe_dim)
         except ValueError:
             # Maybe there are some nested Indexeds (e.g., the situation is A[B[i]])
             nested = flatten(handle_indexed(n) for n in retrieve_indexed(i))
             if nested:
                 relation.extend(nested)
             else:
                 # Fallback: Just insert all the Dimensions we find, regardless of
                 # what the user is attempting to do
                 relation.extend([d for d in filter_sorted(i.free_symbols)
                                  if isinstance(d, Dimension)])
     return tuple(relation)
Esempio n. 10
0
    def visit_Operator(self, o):
        blankline = c.Line("")

        # Kernel signature and body
        body = flatten(self._visit(i) for i in o.children)
        decls = self._args_decl(o.parameters)
        signature = c.FunctionDeclaration(c.Value(o.retval, o.name), decls)
        retval = [c.Line(), c.Statement("return 0")]
        kernel = c.FunctionBody(signature, c.Block(body + retval))

        # Elemental functions
        esigns = []
        efuncs = [blankline]
        for i in o._func_table.values():
            if i.local:
                esigns.append(
                    c.FunctionDeclaration(c.Value(i.root.retval, i.root.name),
                                          self._args_decl(i.root.parameters)))
                efuncs.extend([i.root.ccode, blankline])

        # Header files, extra definitions, ...
        header = [c.Define(*i) for i in o._headers] + [blankline]
        includes = [
            c.Include(i, system=(False if i.endswith('.h') else True))
            for i in o._includes
        ]
        includes += [blankline]
        cdefs = [
            i._C_typedecl for i in o.parameters if i._C_typedecl is not None
        ]
        for i in o._func_table.values():
            if i.local:
                cdefs.extend([
                    j._C_typedecl for j in i.root.parameters
                    if j._C_typedecl is not None
                ])
        cdefs = filter_sorted(cdefs, key=lambda i: i.tpname)
        if o._compiler.src_ext == 'cpp':
            cdefs += [c.Extern('C', signature)]
        cdefs = [i for j in cdefs for i in (j, blankline)]

        return c.Module(header + includes + cdefs + esigns +
                        [blankline, kernel] + efuncs)
Esempio n. 11
0
    def __init__(self,
                 nodes,
                 dimension,
                 limits,
                 index=None,
                 offsets=None,
                 properties=None,
                 pragmas=None,
                 uindices=None):
        # Ensure we deal with a list of Expression objects internally
        nodes = as_tuple(nodes)
        self.nodes = as_tuple(
            [n if isinstance(n, Node) else Expression(n) for n in nodes])
        assert all(isinstance(i, Node) for i in self.nodes)

        self.dim = dimension
        self.index = index or self.dim.name
        # Store direction, as it might change on the dimension
        # before we use it during code generation.
        self.reverse = self.dim.reverse

        # Generate loop limits
        if isinstance(limits, Iterable):
            assert (len(limits) == 3)
            self.limits = list(limits)
        else:
            self.limits = list((0, limits, 1))

        # Record offsets to later adjust loop limits accordingly
        self.offsets = [0, 0]
        for off in (offsets or {}):
            self.offsets[0] = min(self.offsets[0], int(off))
            self.offsets[1] = max(self.offsets[1], int(off))

        # Track this Iteration's properties, pragmas and unbounded indices
        properties = as_tuple(properties)
        assert (i in IterationProperty._KNOWN for i in properties)
        self.properties = as_tuple(
            filter_sorted(properties, key=lambda i: i.name))
        self.pragmas = as_tuple(pragmas)
        self.uindices = as_tuple(uindices)
        assert all(isinstance(i, UnboundedIndex) for i in self.uindices)
Esempio n. 12
0
File: mpi.py Progetto: ofmla/devito
def mpiize(iet, **kwargs):
    """
    Add MPI routines performing halo exchanges to emit distributed-memory
    parallel code.
    """
    mode = kwargs.pop('mode')
    language = kwargs.pop('language')
    sregistry = kwargs.pop('sregistry')

    # To produce unique object names
    generators = {'msg': generator(), 'comm': generator(), 'comp': generator()}

    sync_heb = HaloExchangeBuilder('basic', language, sregistry, **generators)
    user_heb = HaloExchangeBuilder(mode, language, sregistry, **generators)
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        heb = user_heb if isinstance(hs, OverlappableHaloSpot) else sync_heb
        mapper[hs] = heb.make(hs)

    efuncs = sync_heb.efuncs + user_heb.efuncs
    objs = filter_sorted(sync_heb.objs + user_heb.objs)
    iet = Transformer(mapper, nested=True).visit(iet)

    # Must drop the PARALLEL tag from the Iterations within which halo
    # exchanges are performed
    mapper = {}
    for tree in retrieve_iteration_tree(iet):
        for i in reversed(tree):
            if i in mapper:
                # Already seen this subtree, skip
                break
            if FindNodes(Call).visit(i):
                mapper.update({
                    n: n._rebuild(properties=set(n.properties) - {PARALLEL})
                    for n in tree[:tree.index(i) + 1]
                })
                break
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet, {'includes': ['mpi.h'], 'efuncs': efuncs, 'args': objs}
Esempio n. 13
0
    def __init__(self, nodes, dimension, limits, direction=None, properties=None,
                 pragmas=None, uindices=None):
        self.nodes = as_tuple(nodes)
        self.dim = dimension
        self.index = self.dim.name
        self.direction = direction or Forward

        # Generate loop limits
        if isinstance(limits, Iterable):
            assert(len(limits) == 3)
            self.limits = tuple(limits)
        elif self.dim.is_Incr:
            self.limits = (self.dim.symbolic_min, limits, self.dim.step)
        else:
            self.limits = (0, limits, 1)

        # Track this Iteration's properties, pragmas and unbounded indices
        properties = as_tuple(properties)
        assert (i in Property._KNOWN for i in properties)
        self.properties = as_tuple(filter_sorted(properties))
        self.pragmas = as_tuple(pragmas)
        self.uindices = as_tuple(uindices)
        assert all(i.is_Derived and self.dim in i._defines for i in self.uindices)
Esempio n. 14
0
    def __init__(self,
                 nodes,
                 dimension,
                 limits,
                 index=None,
                 offsets=None,
                 direction=None,
                 properties=None,
                 pragmas=None,
                 uindices=None):
        # Ensure we deal with a list of Expression objects internally
        self.nodes = as_tuple(nodes)

        self.dim = dimension
        self.index = index or self.dim.name
        self.direction = direction or Forward

        # Generate loop limits
        if isinstance(limits, Iterable):
            assert (len(limits) == 3)
            self.limits = tuple(limits)
        else:
            self.limits = (0, limits, 1)

        # Record offsets to later adjust loop limits accordingly
        self.offsets = (0, 0) if offsets is None else as_tuple(offsets)
        assert len(self.offsets) == 2

        # Track this Iteration's properties, pragmas and unbounded indices
        properties = as_tuple(properties)
        assert (i in IterationProperty._KNOWN for i in properties)
        self.properties = as_tuple(
            filter_sorted(properties, key=lambda i: i.name))
        self.pragmas = as_tuple(pragmas)
        self.uindices = as_tuple(uindices)
        assert all(isinstance(i, UnboundedIndex) for i in self.uindices)
Esempio n. 15
0
    def _operator_typedecls(self, o, mode='all'):
        if mode == 'all':
            xfilter = lambda i: True
        else:
            public_types = (AbstractFunction, CompositeObject)
            if mode == 'public':
                xfilter = lambda i: isinstance(i, public_types)
            else:
                xfilter = lambda i: not isinstance(i, public_types)

        typedecls = [
            i._C_typedecl for i in o.parameters
            if xfilter(i) and i._C_typedecl is not None
        ]
        for i in o._func_table.values():
            if not i.local:
                continue
            typedecls.extend([
                j._C_typedecl for j in i.root.parameters
                if xfilter(j) and j._C_typedecl is not None
            ])
        typedecls = filter_sorted(typedecls, key=lambda i: i.tpname)

        return typedecls
Esempio n. 16
0
File: basic.py Progetto: nw0/devito
 def cause(self):
     ret = [i.cause for i in self if i.cause is not None]
     return tuple(filter_sorted(ret, key=lambda i: i.name))
Esempio n. 17
0
    def _build(cls, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Eq) for i in expressions):
            raise InvalidOperator("Only `devito.Eq` expressions are allowed.")

        name = kwargs.get("name", "Kernel")
        dse = kwargs.get("dse", configuration['dse'])

        # Python-level (i.e., compile time) and C-level (i.e., run time) performance
        profiler = create_profile('timers')

        # Lower input expressions to internal expressions (e.g., attaching metadata)
        expressions = cls._lower_exprs(expressions, **kwargs)

        # Group expressions based on their iteration space and data dependences
        # Several optimizations are applied (fusion, lifting, flop reduction via DSE, ...)
        clusters = clusterize(expressions, dse_mode=set_dse_mode(dse))

        # Lower Clusters to a Schedule tree
        stree = st_build(clusters)

        # Lower Schedule tree to an Iteration/Expression tree (IET)
        iet = iet_build(stree)

        # Instrument the IET for C-level profiling
        iet = profiler.instrument(iet)

        # Wrap the IET with a Callable
        parameters = derive_parameters(iet, True)
        op = Callable(name, iet, 'int', parameters, ())

        # Lower IET to a Target-specific IET
        op, target_state = cls._specialize_iet(op, **kwargs)

        # Make it an actual Operator
        op = Callable.__new__(cls, **op.args)
        Callable.__init__(op, **op.args)

        # Header files, etc.
        op._headers = list(cls._default_headers)
        op._headers.extend(target_state.headers)
        op._globals = list(cls._default_globals)
        op._includes = list(cls._default_includes)
        op._includes.extend(profiler._default_includes)
        op._includes.extend(target_state.includes)

        # Required for the jit-compilation
        op._compiler = configuration['compiler']
        op._lib = None
        op._cfunction = None

        # References to local or external routines
        op._func_table = OrderedDict()
        op._func_table.update(
            OrderedDict([(i, MetaCall(None, False))
                         for i in profiler._ext_calls]))
        op._func_table.update(
            OrderedDict([(i.root.name, i) for i in target_state.funcs]))

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        op._state = cls._initialize_state(**kwargs)

        # Produced by the various compilation passes
        op._input = filter_sorted(
            flatten(e.reads + e.writes for e in expressions))
        op._output = filter_sorted(flatten(e.writes for e in expressions))
        op._dimensions = filter_sorted(
            flatten(e.dimensions for e in expressions))
        op._dimensions.extend(target_state.dimensions)
        op._dtype, op._dspace = clusters.meta
        op._profiler = profiler

        return op
Esempio n. 18
0
    def place_definitions(self, iet, **kwargs):
        """
        Create a new IET with symbols allocated/deallocated in some memory space.

        Parameters
        ----------
        iet : Callable
            The input Iteration/Expression tree.
        """
        storage = Storage()

        # Collect and declare symbols
        for k, v in MapExprStmts().visit(iet).items():
            if k.is_Expression:
                if k.is_definition:
                    site = v[-1] if v else iet
                    self._alloc_scalar_on_low_lat_mem(site, k, storage)
                    continue
                objs = [k.write]
            elif k.is_Call:
                objs = k.arguments

            for i in objs:
                try:
                    if i.is_LocalObject:
                        site = v[-1] if v else iet
                        self._alloc_object_on_low_lat_mem(site, i, storage)
                    elif i.is_Array:
                        if i in iet.parameters:
                            # The Array is passed as a Callable argument
                            continue
                        elif i._mem_stack:
                            self._alloc_array_on_low_lat_mem(iet, i, storage)
                        else:
                            self._alloc_array_on_high_bw_mem(i, storage)
                except AttributeError:
                    # E.g., a generic SymPy expression
                    pass

        # Place symbols in a memory space
        if not iet.is_ElementalFunction:
            writes = set()
            reads = set()
            for efunc in kwargs.get('efuncs', []):
                for i in FindNodes(Expression).visit(efunc):
                    if i.write.is_Function:
                        writes.add(i.write)
                    reads = (reads
                             | {r
                                for r in i.reads if r.is_Function}) - writes

            for i in filter_sorted(writes):
                self._map_function_on_high_bw_mem(i, storage)
            for i in filter_sorted(reads):
                self._map_function_on_high_bw_mem(i, storage, read_only=True)

        # Introduce symbol definitions going in the low latency memory
        mapper = dict(storage._on_low_lat_mem)
        iet = Transformer(mapper, nested=True).visit(iet)

        # Introduce symbol definitions going in the high bandwidth memory
        header = []
        footer = []
        for decl, alloc, free in storage._on_high_bw_mem:
            if decl is None:
                header.append(alloc)
            else:
                header.extend([decl, alloc])
            footer.append(free)
        if header or footer:
            body = List(header=header, body=iet.body, footer=footer)
            iet = iet._rebuild(body=body)

        return iet, {}
Esempio n. 19
0
def dimension_sort(expr):
    """
    Topologically sort the Dimensions in ``expr``, based on the order in which they
    appear within Indexeds.
    """
    def handle_indexed(indexed):
        relation = []
        for i in indexed.indices:
            try:
                maybe_dim = split_affine(i).var
                if isinstance(maybe_dim, Dimension):
                    relation.append(maybe_dim)
            except ValueError:
                # Maybe there are some nested Indexeds (e.g., the situation is A[B[i]])
                nested = flatten(
                    handle_indexed(n) for n in retrieve_indexed(i))
                if nested:
                    relation.extend(nested)
                else:
                    # Fallback: Just insert all the Dimensions we find, regardless of
                    # what the user is attempting to do
                    relation.extend([
                        d for d in filter_sorted(i.free_symbols)
                        if isinstance(d, Dimension)
                    ])
        return tuple(relation)

    if isinstance(expr.implicit_dims, IgnoreDimSort):
        relations = set()
    else:
        relations = {handle_indexed(i) for i in retrieve_indexed(expr)}

    # Add in any implicit dimension (typical of scalar temporaries, or Step)
    relations.add(expr.implicit_dims)

    # Add in leftover free dimensions (not an Indexed' index)
    extra = set([i for i in expr.free_symbols if isinstance(i, Dimension)])

    # Add in pure data dimensions (e.g., those accessed only via explicit values,
    # such as A[3])
    indexeds = retrieve_indexed(expr, deep=True)
    extra.update(set().union(*[set(i.function.dimensions) for i in indexeds]))

    # Enforce determinism
    extra = filter_sorted(extra, key=attrgetter('name'))

    # Add in implicit relations for parent dimensions
    # -----------------------------------------------
    # 1) Note that (d.parent, d) is what we want, while (d, d.parent) would be
    # wrong; for example, in `((t, time), (t, x, y), (x, y))`, `x` could now
    # preceed `time`, while `t`, and therefore `time`, *must* appear before `x`,
    # as indicated by the second relation
    implicit_relations = {(d.parent, d) for d in extra if d.is_Derived}
    # 2) To handle cases such as `((time, xi), (x,))`, where `xi` a SubDimension
    # of `x`, besides `(x, xi)`, we also have to add `(time, x)` so that we
    # obtain the desired ordering `(time, x, xi)`. W/o `(time, x)`, the ordering
    # `(x, time, xi)` might be returned instead, which would be non-sense
    implicit_relations.update({tuple(d.root for d in i) for i in relations})

    ordering = PartialOrderTuple(extra,
                                 relations=(relations | implicit_relations))

    return ordering
Esempio n. 20
0
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        affine_trees = find_affine_trees(iet).items()

        # If there is no affine trees, then there is no loop to be optimized using OPS.
        if not affine_trees:
            return iet

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for _, tree in affine_trees:
            dims.append(len(tree[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(tree[0].root))
            symbols -= set(FindSymbols('defines').visit(tree[0].root))
            to_dat |= symbols

        # Create the OPS block for this problem
        ops_block = OpsBlock('block')
        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        after_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(
                list(create_ops_dat(f, name_to_ops_dat, ops_block)))
            # To return the result to Devito, it is necessary to copy the data
            # from the dat object back to the CPU memory.
            after_time_loop.extend(
                create_ops_fetch(f, name_to_ops_dat,
                                 self.time_dimension.extreme_max))

        # Generate ops kernels for each offloadable iteration tree
        mapper = {}
        for n, (_, tree) in enumerate(affine_trees):
            pre_loop, ops_kernel, ops_par_loop_call = opsit(
                tree, n, name_to_ops_dat, ops_block, dims[0])

            pre_time_loop.extend(pre_loop)
            self._func_table[namespace['ops_kernel_file'](ops_kernel.name)] = \
                MetaCall(ops_kernel, False)
            mapper[tree[0].root] = ops_par_loop_call
            mapper.update({i.root: mapper.get(i.root)
                           for i in tree})  # Drop trees

        iet = Transformer(mapper).visit(iet)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.extend(['stdio.h', 'ops_seq.h'])

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            *after_time_loop, ops_exit
        ]

        return List(body=body)
Esempio n. 21
0
    def _make_fetchprefetch(self, iet, sync_ops, pieces, root):
        fid = SharedData._field_id

        fetches = []
        prefetches = []
        presents = []
        for s in sync_ops:
            f = s.function
            dimensions = s.dimensions
            fc = s.fetch
            ifc = s.ifetch
            pfc = s.pfetch
            fcond = s.fcond
            pcond = s.pcond

            # Construct init IET
            imask = [(ifc, s.size) if d.root is s.dim.root else FULL
                     for d in dimensions]
            fetch = PragmaTransfer(self.lang._map_to, f, imask=imask)
            fetches.append(Conditional(fcond, fetch))

            # Construct present clauses
            imask = [(fc, s.size) if d.root is s.dim.root else FULL
                     for d in dimensions]
            presents.append(
                PragmaTransfer(self.lang._map_present, f, imask=imask))

            # Construct prefetch IET
            imask = [(pfc, s.size) if d.root is s.dim.root else FULL
                     for d in dimensions]
            prefetch = PragmaTransfer(self.lang._map_to_wait,
                                      f,
                                      imask=imask,
                                      queueid=fid)
            prefetches.append(Conditional(pcond, prefetch))

        # Turn init IET into a Callable
        functions = filter_ordered(s.function for s in sync_ops)
        name = self.sregistry.make_name(prefix='init_device')
        body = List(body=fetches)
        parameters = filter_sorted(functions + derive_parameters(body))
        func = Callable(name, body, 'void', parameters, 'static')
        pieces.funcs.append(func)

        # Perform initial fetch by the main thread
        pieces.init.append(
            List(header=c.Comment("Initialize data stream"),
                 body=[Call(name, parameters), BlankLine]))

        # Turn prefetch IET into a ThreadFunction
        name = self.sregistry.make_name(prefix='prefetch_host_to_device')
        body = List(header=c.Line(), body=prefetches)
        tctx = make_thread_ctx(name, body, root, None, sync_ops,
                               self.sregistry)
        pieces.funcs.extend(tctx.funcs)

        # Glue together all the IET pieces, including the activation logic
        sdata = tctx.sdata
        threads = tctx.threads
        iet = List(body=[
            BlankLine,
            BusyWait(
                CondNe(
                    FieldFromComposite(sdata._field_flag, sdata[
                        threads.index]), 1))
        ] + presents + [iet, tctx.activate])

        # Fire up the threads
        pieces.init.append(tctx.init)

        # Final wait before jumping back to Python land
        pieces.finalize.append(tctx.finalize)

        # Keep track of created objects
        pieces.objs.add(sync_ops, sdata, threads)

        return iet
Esempio n. 22
0
 def visit_Expression(self, o):
     return filter_sorted([f for f in self.rule(o)], key=attrgetter('name'))
Esempio n. 23
0
 def visit_tuple(self, o):
     symbols = flatten([self._visit(i) for i in o])
     return filter_sorted(symbols, key=attrgetter('name'))
Esempio n. 24
0
 def visit_Expression(self, o):
     return filter_sorted([f for f in self.rule(o)], key=attrgetter('name'))
Esempio n. 25
0
 def visit_Iteration(self, o):
     symbols = flatten([self._visit(i) for i in o.children])
     symbols += self.rule(o)
     return filter_sorted(symbols, key=attrgetter('name'))
Esempio n. 26
0
 def visit_tuple(self, o):
     symbols = flatten([self._visit(i) for i in o])
     return filter_sorted(symbols, key=attrgetter('name'))
Esempio n. 27
0
    def _make_fetchwaitprefetch(self, iet, sync_ops, pieces, root):
        threads = self.__make_threads()

        fetches = []
        prefetches = []
        presents = []
        for s in sync_ops:
            if s.direction is Forward:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_min)
                fsize = s.function._C_get_field(FULL, s.dim).size
                fc_cond = fc + (s.size - 1) < fsize
                pfc = s.fetch + 1
                pfc_cond = pfc + (s.size - 1) < fsize
            else:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_max)
                fc_cond = fc >= 0
                pfc = s.fetch - 1
                pfc_cond = pfc >= 0

            # Construct fetch IET
            imask = [(fc, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            fetch = List(header=self._P._map_to(s.function, imask))
            fetches.append(Conditional(fc_cond, fetch))

            # Construct present clauses
            imask = [(s.fetch, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            presents.extend(as_list(self._P._map_present(s.function, imask)))

            # Construct prefetch IET
            imask = [(pfc, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            prefetch = List(header=self._P._map_to_wait(
                s.function, imask, SharedData._field_id))
            prefetches.append(Conditional(pfc_cond, prefetch))

        functions = filter_ordered(s.function for s in sync_ops)
        casts = [PointerCast(f) for f in functions]

        # Turn init IET into a Callable
        name = self.sregistry.make_name(prefix='init_device')
        body = List(body=casts + fetches)
        parameters = filter_sorted(functions + derive_parameters(body))
        func = Callable(name, body, 'void', parameters, 'static')
        pieces.funcs.append(func)

        # Perform initial fetch by the main thread
        pieces.init.append(
            List(header=c.Comment("Initialize data stream for `%s`" %
                                  threads.name),
                 body=[Call(name, func.parameters), BlankLine]))

        # Turn prefetch IET into a threaded Callable
        name = self.sregistry.make_name(prefix='prefetch_host_to_device')
        body = List(header=c.Line(), body=casts + prefetches)
        tfunc, sdata = self.__make_tfunc(name, body, root, threads)
        pieces.funcs.append(tfunc)

        # Glue together all the IET pieces, including the activation bits
        iet = List(body=[
            BlankLine,
            BusyWait(
                CondNe(
                    FieldFromComposite(sdata._field_flag, sdata[
                        threads.index]), 1)),
            List(header=presents), iet,
            self.__make_activate_thread(threads, sdata, sync_ops)
        ])

        # Fire up the threads
        pieces.init.append(
            self.__make_init_threads(threads, sdata, tfunc, pieces))
        pieces.threads.append(threads)

        # Final wait before jumping back to Python land
        pieces.finalize.append(self.__make_finalize_threads(threads, sdata))

        return iet
Esempio n. 28
0
def make_ops_kernels(iet):
    warning("The OPS backend is still work-in-progress")

    affine_trees = find_affine_trees(iet).items()

    # If there is no affine trees, then there is no loop to be optimized using OPS.
    if not affine_trees:
        return iet, {}

    ops_init = Call(namespace['ops_init'], [0, 0, 2])
    ops_partition = Call(namespace['ops_partition'], Literal('""'))
    ops_exit = Call(namespace['ops_exit'])

    # Extract all symbols that need to be converted to ops_dat
    dims = []
    to_dat = set()
    for _, tree in affine_trees:
        dims.append(len(tree[0].dimensions))
        symbols = set(FindSymbols('symbolics').visit(tree[0].root))
        symbols -= set(FindSymbols('defines').visit(tree[0].root))
        to_dat |= symbols

    # Create the OPS block for this problem
    ops_block = OpsBlock('block')
    ops_block_init = Expression(
        ClusterizedEq(
            Eq(ops_block, namespace['ops_decl_block'](dims[0],
                                                      Literal('"block"')))))

    # To ensure deterministic code generation we order the datasets to
    # be generated (since a set is an unordered collection)
    to_dat = filter_sorted(to_dat)

    name_to_ops_dat = {}
    pre_time_loop = []
    after_time_loop = []
    for f in to_dat:
        if f.is_Constant:
            continue

        pre_time_loop.extend(
            list(create_ops_dat(f, name_to_ops_dat, ops_block)))
        # Copy data from device to host
        after_time_loop.extend(
            create_ops_fetch(f, name_to_ops_dat, f.grid.time_dim.extreme_max))

    # Generate ops kernels for each offloadable iteration tree
    mapper = {}
    ffuncs = []
    for n, (_, tree) in enumerate(affine_trees):
        pre_loop, ops_kernel, ops_par_loop_call = opsit(
            tree, n, name_to_ops_dat, ops_block, dims[0])

        pre_time_loop.extend(pre_loop)
        ffuncs.append(ops_kernel)
        mapper[tree[0].root] = ops_par_loop_call
        mapper.update({i.root: mapper.get(i.root) for i in tree})  # Drop trees

    iet = Transformer(mapper).visit(iet)

    assert (d == dims[0] for d in dims), \
        "The OPS backend currently assumes that all kernels \
        have the same number of dimensions"

    iet = iet._rebuild(body=flatten([
        ops_init, ops_block_init, pre_time_loop, ops_partition, iet.body,
        after_time_loop, ops_exit
    ]))

    return iet, {
        'includes': ['stdio.h', 'ops_seq.h'],
        'ffuncs': ffuncs,
        'headers': [namespace['ops_define_dimension'](dims[0])]
    }
Esempio n. 29
0
    def _build(cls, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Evaluable) for i in expressions):
            raise InvalidOperator("Only `devito.Evaluable` are allowed.")

        # Python-level (i.e., compile time) and C-level (i.e., run time) performance
        profiler = create_profile('timers')

        # Lower input expressions
        expressions = cls._lower_exprs(expressions, **kwargs)

        # Group expressions based on iteration spaces and data dependences
        clusters = cls._lower_clusters(expressions, profiler, **kwargs)

        # Lower Clusters to a ScheduleTree
        stree = cls._lower_stree(clusters, **kwargs)

        # Lower ScheduleTree to an Iteration/Expression Tree
        iet, byproduct = cls._lower_iet(stree, profiler, **kwargs)

        # Make it an actual Operator
        op = Callable.__new__(cls, **iet.args)
        Callable.__init__(op, **op.args)

        # Header files, etc.
        op._headers = list(cls._default_headers)
        op._headers.extend(byproduct.headers)
        op._globals = list(cls._default_globals)
        op._includes = list(cls._default_includes)
        op._includes.extend(profiler._default_includes)
        op._includes.extend(byproduct.includes)

        # Required for the jit-compilation
        op._compiler = kwargs['compiler']
        op._lib = None
        op._cfunction = None

        # References to local or external routines
        op._func_table = OrderedDict()
        op._func_table.update(
            OrderedDict([(i, MetaCall(None, False))
                         for i in profiler._ext_calls]))
        op._func_table.update(
            OrderedDict([(i.root.name, i) for i in byproduct.funcs]))

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        op._state = cls._initialize_state(**kwargs)

        # Produced by the various compilation passes
        op._input = filter_sorted(
            flatten(e.reads + e.writes for e in expressions))
        op._output = filter_sorted(flatten(e.writes for e in expressions))
        op._dimensions = flatten(c.dimensions
                                 for c in clusters) + byproduct.dimensions
        op._dimensions = sorted(set(op._dimensions), key=attrgetter('name'))
        op._dtype, op._dspace = clusters.meta
        op._profiler = profiler

        return op
Esempio n. 30
0
def detect_io(exprs, relax=False):
    """
    ``{exprs} -> ({reads}, {writes})``

    Parameters
    ----------
    exprs : expr-like or list of expr-like
        The searched expressions.
    relax : bool, optional
        If False, as by default, collect only Constants and Functions.
        Otherwise, collect any Basic object.
    """
    exprs = as_tuple(exprs)
    if relax is False:
        rule = lambda i: i.is_Input
    else:
        rule = lambda i: i.is_Scalar or i.is_Tensor

    # Don't forget the nasty case with indirections on the LHS:
    # >>> u[t, a[x]] = f[x]  -> (reads={a, f}, writes={u})

    roots = []
    for i in exprs:
        try:
            roots.append(i.rhs)
            roots.extend(list(i.lhs.indices))
            roots.extend(list(i.conditionals.values()))
        except AttributeError:
            # E.g., FunctionFromPointer
            roots.append(i)

    reads = []
    terminals = flatten(retrieve_terminals(i, deep=True) for i in roots)
    for i in terminals:
        candidates = set(i.free_symbols)
        try:
            candidates.update({i.function})
        except AttributeError:
            pass
        for j in candidates:
            try:
                if rule(j):
                    reads.append(j)
            except AttributeError:
                pass

    writes = []
    for i in exprs:
        try:
            f = i.lhs.function
        except AttributeError:
            continue
        try:
            if rule(f):
                writes.append(f)
        except AttributeError:
            # We only end up here after complex IET transformations which make
            # use of composite types
            assert isinstance(i.lhs, FunctionFromPointer)
            f = i.lhs.base.function
            if rule(f):
                writes.append(f)

    return filter_sorted(reads), filter_sorted(writes)
Esempio n. 31
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Eq) for i in expressions):
            raise InvalidOperator("Only `devito.Eq` expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self._func_table = OrderedDict()

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        self._state = {}

        # Form and gather any required implicit expressions
        expressions = self._add_implicit(expressions)

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = self._apply_substitutions(expressions, subs)
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self._input = filter_sorted(flatten(e.reads + e.writes for e in expressions))
        self._output = filter_sorted(flatten(e.writes for e in expressions))
        self._dimensions = filter_sorted(flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to a Schedule tree
        stree = st_build(clusters)

        # Lower Schedule tree to an Iteration/Expression tree (IET)
        iet = iet_build(stree)
        iet, self._profiler = self._profile_sections(iet)
        iet = self._specialize_iet(iet, **kwargs)

        # Derive all Operator parameters based on the IET
        parameters = derive_parameters(iet, True)

        # Finalization: introduce declarations, type casts, etc
        iet = self._finalize(iet, parameters)

        super(Operator, self).__init__(self.name, iet, 'int', parameters, ())
Esempio n. 32
0
    def _make_fetchwaitprefetch(self, iet, sync_ops, pieces, root):
        fetches = []
        prefetches = []
        presents = []
        for s in sync_ops:
            if s.direction is Forward:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_min)
                pfc = s.fetch + 1
                fc_cond = s.next_cbk(s.dim.symbolic_min)
                pfc_cond = s.next_cbk(s.dim + 1)
            else:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_max)
                pfc = s.fetch - 1
                fc_cond = s.next_cbk(s.dim.symbolic_max)
                pfc_cond = s.next_cbk(s.dim - 1)

            # Construct init IET
            imask = [(fc, s.size) if d.root is s.dim.root else FULL for d in s.dimensions]
            fetch = PragmaList(self.lang._map_to(s.function, imask),
                               {s.function} | fc.free_symbols)
            fetches.append(Conditional(fc_cond, fetch))

            # Construct present clauses
            imask = [(s.fetch, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            presents.extend(as_list(self.lang._map_present(s.function, imask)))

            # Construct prefetch IET
            imask = [(pfc, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            prefetch = PragmaList(self.lang._map_to_wait(s.function, imask,
                                                         SharedData._field_id),
                                  {s.function} | pfc.free_symbols)
            prefetches.append(Conditional(pfc_cond, prefetch))

        # Turn init IET into a Callable
        functions = filter_ordered(s.function for s in sync_ops)
        name = self.sregistry.make_name(prefix='init_device')
        body = List(body=fetches)
        parameters = filter_sorted(functions + derive_parameters(body))
        func = Callable(name, body, 'void', parameters, 'static')
        pieces.funcs.append(func)

        # Perform initial fetch by the main thread
        pieces.init.append(List(
            header=c.Comment("Initialize data stream"),
            body=[Call(name, parameters), BlankLine]
        ))

        # Turn prefetch IET into a ThreadFunction
        name = self.sregistry.make_name(prefix='prefetch_host_to_device')
        body = List(header=c.Line(), body=prefetches)
        tctx = make_thread_ctx(name, body, root, None, sync_ops, self.sregistry)
        pieces.funcs.extend(tctx.funcs)

        # Glue together all the IET pieces, including the activation logic
        sdata = tctx.sdata
        threads = tctx.threads
        iet = List(body=[
            BlankLine,
            BusyWait(CondNe(FieldFromComposite(sdata._field_flag,
                                               sdata[threads.index]), 1)),
            List(header=presents),
            iet,
            tctx.activate
        ])

        # Fire up the threads
        pieces.init.append(tctx.init)
        pieces.threads.append(threads)

        # Final wait before jumping back to Python land
        pieces.finalize.append(tctx.finalize)

        return iet
Esempio n. 33
0
File: basic.py Progetto: nw0/devito
    def _create_elemental_functions(self, nodes, state):
        """
        Extract :class:`Iteration` sub-trees and move them into :class:`Callable`s.

        Currently, only tagged, elementizable Iteration objects are targeted.
        """
        noinline = self._compiler_decoration('noinline',
                                             c.Comment('noinline?'))

        functions = OrderedDict()
        mapper = {}
        for tree in retrieve_iteration_tree(nodes, mode='superset'):
            # Search an elementizable sub-tree (if any)
            tagged = filter_iterations(tree, lambda i: i.tag is not None,
                                       'asap')
            if not tagged:
                continue
            root = tagged[0]
            if not root.is_Elementizable:
                continue
            target = tree[tree.index(root):]

            # Elemental function arguments
            args = []  # Found so far (scalars, tensors)
            maybe_required = set()  # Scalars that *may* have to be passed in
            not_required = set()  # Elemental function locally declared scalars

            # Build a new Iteration/Expression tree with free bounds
            free = []
            for i in target:
                name, bounds = i.dim.name, i.bounds_symbolic
                # Iteration bounds
                start = Scalar(name='%s_start' % name, dtype=np.int32)
                finish = Scalar(name='%s_finish' % name, dtype=np.int32)
                args.extend(zip([ccode(j) for j in bounds], (start, finish)))
                # Iteration unbounded indices
                ufunc = [
                    Scalar(name='%s_ub%d' % (name, j), dtype=np.int32)
                    for j in range(len(i.uindices))
                ]
                args.extend(zip([ccode(j.start) for j in i.uindices], ufunc))
                limits = [Symbol(start.name), Symbol(finish.name), 1]
                uindices = [
                    UnboundedIndex(j.index, i.dim + as_symbol(k))
                    for j, k in zip(i.uindices, ufunc)
                ]
                free.append(
                    i._rebuild(limits=limits, offsets=None, uindices=uindices))
                not_required.update({i.dim}, set(j.index for j in i.uindices))

            # Construct elemental function body, and inspect it
            free = NestedTransformer(dict((zip(target, free)))).visit(root)
            expressions = FindNodes(Expression).visit(free)
            fsymbols = FindSymbols('symbolics').visit(free)

            # Add all definitely-required arguments
            not_required.update({i.output for i in expressions if i.is_scalar})
            for i in fsymbols:
                if i in not_required:
                    continue
                elif i.is_Array:
                    args.append(
                        ("(%s*)%s" % (c.dtype_to_ctype(i.dtype), i.name), i))
                elif i.is_TensorFunction:
                    args.append(("%s_vec" % i.name, i))
                elif i.is_Scalar:
                    args.append((i.name, i))

            # Add all maybe-required arguments that turn out to be required
            maybe_required.update(
                set(FindSymbols(mode='free-symbols').visit(free)))
            for i in fsymbols:
                not_required.update({as_symbol(i), i.indexify()})
                for j in i.symbolic_shape:
                    maybe_required.update(j.free_symbols)
            required = filter_sorted(maybe_required - not_required,
                                     key=attrgetter('name'))
            args.extend([(i.name, Scalar(name=i.name, dtype=i.dtype))
                         for i in required])

            call, params = zip(*args)
            handle = flatten([p.rtargs for p in params])
            name = "f_%d" % root.tag

            # Produce the new Call
            mapper[root] = List(header=noinline, body=Call(name, call))

            # Produce the new Callable
            functions.setdefault(
                name, Callable(name, free, 'void', handle, ('static', )))

        # Transform the main tree
        processed = Transformer(mapper).visit(nodes)

        return processed, {'elemental_functions': functions.values()}
Esempio n. 34
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = [i.xreplace(subs) for i in expressions]
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self.input = filter_sorted(flatten(e.reads for e in expressions))
        self.output = filter_sorted(flatten(e.writes for e in expressions))
        self.dimensions = filter_sorted(flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to a Schedule tree
        stree = schedule(clusters)
        stree = section(stree)

        # Lower Sections to an Iteration/Expression tree (IET)
        iet = iet_build(stree)

        # Insert code for C-level performance profiling
        iet, self.profiler = self._profile_sections(iet)

        # Translate into backend-specific representation
        iet = self._specialize_iet(iet, **kwargs)

        # Insert the required symbol declarations
        iet = iet_insert_C_decls(iet, self.func_table)

        # Insert data and pointer casts for array parameters and profiling structs
        iet = self._build_casts(iet)

        # Derive parameters as symbols not defined in the kernel itself
        parameters = self._build_parameters(iet)

        # Finish instantiation
        super(Operator, self).__init__(self.name, iet, 'int', parameters, ())
Esempio n. 35
0
 def visit_Call(self, o):
     symbols = self._visit(o.children)
     symbols.extend([f for f in self.rule(o)])
     return filter_sorted(symbols, key=attrgetter('name'))
Esempio n. 36
0
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for section, trees in find_affine_trees(iet).items():
            dims.append(len(trees[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(trees[0].root))
            symbols -= set(FindSymbols('defines').visit(trees[0].root))
            to_dat |= symbols

        # Create the OPS block for this problem
        ops_block = OpsBlock('block')
        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block))

        # Generate ops kernels for each offloadable iteration tree
        mapper = {}
        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            pre_loop, ops_kernel, ops_par_loop_call = opsit(
                trees, n, name_to_ops_dat, ops_block, dims[0])

            pre_time_loop.extend(pre_loop)
            self._ops_kernels.append(ops_kernel)
            mapper[trees[0].root] = ops_par_loop_call
            mapper.update({i.root: mapper.get(i.root)
                           for i in trees})  # Drop trees

        iet = Transformer(mapper).visit(iet)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.append('stdio.h')

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            ops_exit
        ]

        return List(body=body)
Esempio n. 37
0
 def visit_Iteration(self, o):
     symbols = flatten([self._visit(i) for i in o.children])
     symbols += self.rule(o)
     return filter_sorted(symbols, key=attrgetter('name'))
Esempio n. 38
0
 def cause(self):
     ret = [i.cause for i in self if i.cause is not None]
     ret.extend([i.parent for i in ret if i.is_Derived])
     return tuple(filter_sorted(ret, key=lambda i: i.name))
Esempio n. 39
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Eq) for i in expressions):
            raise InvalidOperator("Only `devito.Eq` expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self._func_table = OrderedDict()

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        self._state = self._initialize_state(**kwargs)

        # Form and gather any required implicit expressions
        expressions = self._add_implicit(expressions)

        # Expression lowering: evaluation of derivatives, indexification,
        # substitution rules, specialization
        expressions = [i.evaluate for i in expressions]
        expressions = [indexify(i) for i in expressions]
        expressions = self._apply_substitutions(expressions, subs)
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self._input = filter_sorted(
            flatten(e.reads + e.writes for e in expressions))
        self._output = filter_sorted(flatten(e.writes for e in expressions))
        self._dimensions = filter_sorted(
            flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to a Schedule tree
        stree = st_build(clusters)

        # Lower Schedule tree to an Iteration/Expression tree (IET)
        iet = iet_build(stree)
        iet, self._profiler = self._profile_sections(iet)
        iet = self._specialize_iet(iet, **kwargs)

        # Derive all Operator parameters based on the IET
        parameters = derive_parameters(iet, True)

        # Finalization: introduce declarations, type casts, etc
        iet = self._finalize(iet, parameters)

        super(Operator, self).__init__(self.name, iet, 'int', parameters, ())
Esempio n. 40
0
    def __init__(self, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, sympy.Eq) for i in expressions):
            raise InvalidOperator("Only SymPy expressions are allowed.")

        self.name = kwargs.get("name", "Kernel")
        subs = kwargs.get("subs", {})
        dse = kwargs.get("dse", configuration['dse'])
        dle = kwargs.get("dle", configuration['dle'])

        # Header files, etc.
        self._headers = list(self._default_headers)
        self._includes = list(self._default_includes)
        self._globals = list(self._default_globals)

        # Required for compilation
        self._compiler = configuration['compiler']
        self._lib = None
        self._cfunction = None

        # References to local or external routines
        self.func_table = OrderedDict()

        # Expression lowering: indexification, substitution rules, specialization
        expressions = [indexify(i) for i in expressions]
        expressions = [i.xreplace(subs) for i in expressions]
        expressions = self._specialize_exprs(expressions)

        # Expression analysis
        self.input = filter_sorted(flatten(e.reads for e in expressions))
        self.output = filter_sorted(flatten(e.writes for e in expressions))
        self.dimensions = filter_sorted(flatten(e.dimensions for e in expressions))

        # Group expressions based on their iteration space and data dependences,
        # and apply the Devito Symbolic Engine (DSE) for flop optimization
        clusters = clusterize(expressions)
        clusters = rewrite(clusters, mode=set_dse_mode(dse))
        self._dtype, self._dspace = clusters.meta

        # Lower Clusters to an Iteration/Expression tree (IET)
        nodes = iet_build(clusters)

        # Introduce C-level profiling infrastructure
        nodes, self.profiler = self._profile_sections(nodes)

        # Translate into backend-specific representation (e.g., GPU, Yask)
        nodes = self._specialize_iet(nodes)

        # Apply the Devito Loop Engine (DLE) for loop optimization
        dle_state = transform(nodes, *set_dle_mode(dle))

        # Update the Operator state based on the DLE
        self.dle_args = dle_state.arguments
        self.dle_flags = dle_state.flags
        self.func_table.update(OrderedDict([(i.name, MetaCall(i, True))
                                            for i in dle_state.elemental_functions]))
        self.dimensions.extend([i.argument for i in self.dle_args
                                if isinstance(i.argument, Dimension)])
        self._includes.extend(list(dle_state.includes))

        # Introduce the required symbol declarations
        nodes = iet_insert_C_decls(dle_state.nodes, self.func_table)

        # Insert data and pointer casts for array parameters and profiling structs
        nodes = self._build_casts(nodes)

        # Derive parameters as symbols not defined in the kernel itself
        parameters = self._build_parameters(nodes)

        # Finish instantiation
        super(Operator, self).__init__(self.name, nodes, 'int', parameters, ())