Esempio n. 1
0
def simple_function_fissionable(a, b, exprs, iters):
    # void foo(a, b)
    #   for i
    #     for j
    #       for k
    #         expr0
    #         expr2
    symbols = [i.base.function for i in [a, b]]
    body = iters[0](iters[1](iters[2]([exprs[0], exprs[2]])))
    return Callable('foo', body, 'void', symbols, ())
Esempio n. 2
0
def update_halo(f, fixed):
    """
    Construct an IET performing a halo exchange for a :class:`TensorFunction`.
    """
    # Requirements
    assert f.is_Function
    assert f.grid is not None

    distributor = f.grid.distributor
    nb = distributor._C_neighbours.obj
    comm = distributor._C_comm

    fixed = {d: Symbol(name="o%s" % d.root) for d in fixed}

    mapper = get_views(f, fixed)

    body = []
    masks = []
    for d in f.dimensions:
        if d in fixed:
            continue

        rpeer = FieldFromPointer("%sright" % d, nb)
        lpeer = FieldFromPointer("%sleft" % d, nb)

        # Sending to left, receiving from right
        lsizes, loffsets = mapper[(d, LEFT, OWNED)]
        rsizes, roffsets = mapper[(d, RIGHT, HALO)]
        assert lsizes == rsizes
        sizes = lsizes
        parameters = ([f] + list(f.symbolic_shape) + sizes + loffsets +
                      roffsets + [rpeer, lpeer, comm])
        call = Call('sendrecv_%s' % f.name, parameters)
        mask = Symbol(name='m%sl' % d)
        body.append(Conditional(mask, call))
        masks.append(mask)

        # Sending to right, receiving from left
        rsizes, roffsets = mapper[(d, RIGHT, OWNED)]
        lsizes, loffsets = mapper[(d, LEFT, HALO)]
        assert rsizes == lsizes
        sizes = rsizes
        parameters = ([f] + list(f.symbolic_shape) + sizes + roffsets +
                      loffsets + [lpeer, rpeer, comm])
        call = Call('sendrecv_%s' % f.name, parameters)
        mask = Symbol(name='m%sr' % d)
        body.append(Conditional(mask, call))
        masks.append(mask)

    iet = List(body=body)
    parameters = ([f] + masks + [comm, nb] + list(fixed.values()) +
                  [d.symbolic_size for d in f.dimensions])
    return Callable('halo_exchange_%s' % f.name, iet, 'void', parameters,
                    ('static', ))
Esempio n. 3
0
def simple_function_with_paddable_arrays(a_dense, b_dense, exprs, iters):
    # void foo(a_dense, b_dense)
    #   for i
    #     for j
    #       for k
    #         expr0
    symbols = [i.base.function for i in [a_dense, b_dense]]
    body = iters[0](iters[1](iters[2](exprs[6])))
    f = Callable('foo', body, 'void', symbols, ())
    f, subs = ResolveTimeStepping().visit(f)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
Esempio n. 4
0
def simple_function_fissionable(a, b, exprs, iters):
    # void foo(a, b)
    #   for i
    #     for j
    #       for k
    #         expr0
    #         expr2
    symbols = [i.base.function for i in [a, b]]
    body = iters[0](iters[1](iters[2]([exprs[0], exprs[2]])))
    f = Callable('foo', body, 'void', symbols, ())
    f, subs = ResolveTimeStepping().visit(f)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
Esempio n. 5
0
    def _make_haloupdate(self, f, hse, key, msg=None):
        comm = f.grid.distributor._obj_comm

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufg = FieldFromComposite(msg._C_field_bufg, msgi)
        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)
        torank = FieldFromComposite(msg._C_field_to, msgi)

        sizes = [
            FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [
            FieldFromComposite('%s[%d]' % (msg._C_field_ofsg, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [fixed.get(d) or ofsg.pop(0) for d in f.dimensions]

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Call('gather_%s' % key, [bufg] + sizes + [f] + ofsg)
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        # Make Irecv/Isend
        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([recv, gather, send], dim, ncomms - 1)
        parameters = ([f, comm, msg, ncomms]) + list(fixed.values())
        return Callable('haloupdate%d' % key, iet, 'void', parameters,
                        ('static', ))
Esempio n. 6
0
def sendrecv(f, fixed):
    """Construct an IET performing a halo exchange along arbitrary
    dimension and side."""
    assert f.is_Function
    assert f.grid is not None

    comm = f.grid.distributor._C_comm

    buf_dims = [Dimension(name='buf_%s' % d.root) for d in f.dimensions if d not in fixed]
    bufg = Array(name='bufg', dimensions=buf_dims, dtype=f.dtype, scope='heap')
    bufs = Array(name='bufs', dimensions=buf_dims, dtype=f.dtype, scope='heap')

    dat_dims = [Dimension(name='dat_%s' % d.root) for d in f.dimensions]
    dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype, scope='external')

    ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
    ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

    fromrank = Symbol(name='fromrank')
    torank = Symbol(name='torank')

    parameters = [bufg] + list(bufg.shape) + [dat] + list(dat.shape) + ofsg
    gather = Call('gather_%s' % f.name, parameters)
    parameters = [bufs] + list(bufs.shape) + [dat] + list(dat.shape) + ofss
    scatter = Call('scatter_%s' % f.name, parameters)

    # The scatter must be guarded as we must not alter the halo values along
    # the domain boundary, where the sender is actually MPI.PROC_NULL
    scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

    srecv = MPIStatusObject(name='srecv')
    rrecv = MPIRequestObject(name='rrecv')
    rsend = MPIRequestObject(name='rsend')

    count = reduce(mul, bufs.shape, 1)
    recv = Call('MPI_Irecv', [bufs, count, Macro(numpy_to_mpitypes(f.dtype)),
                              fromrank, '13', comm, rrecv])
    send = Call('MPI_Isend', [bufg, count, Macro(numpy_to_mpitypes(f.dtype)),
                              torank, '13', comm, rsend])

    waitrecv = Call('MPI_Wait', [rrecv, srecv])
    waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

    iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
    iet = List(body=[ArrayCast(dat), iet_insert_C_decls(iet)])
    parameters = ([dat] + list(dat.shape) + list(bufs.shape) +
                  ofsg + ofss + [fromrank, torank, comm])
    return Callable('sendrecv_%s' % f.name, iet, 'void', parameters, ('static',))
Esempio n. 7
0
def complex_function(a, b, c, d, exprs, iters):
    # void foo(a, b, c, d)
    #   for i
    #     for s
    #       expr0
    #     for j
    #       for k
    #         expr1
    #         expr2
    #     for p
    #       expr3
    symbols = [i.base.function for i in [a, b, c, d]]
    body = iters[0]([iters[3](exprs[2]),
                     iters[1](iters[2]([exprs[3], exprs[4]])),
                     iters[4](exprs[5])])
    return Callable('foo', body, 'void', symbols, ())
Esempio n. 8
0
    def __make_tfunc(self, name, iet, root, threads):
        # Create the SharedData
        required = derive_parameters(iet)
        known = (root.parameters +
                 tuple(i for i in required if i.is_Array and i._mem_shared))
        parameters, dynamic_parameters = split(required, lambda i: i in known)

        sdata = SharedData(name=self.sregistry.make_name(prefix='sdata'),
                           nthreads_std=threads.size,
                           fields=dynamic_parameters)
        parameters.append(sdata)

        # Prepend the unwinded SharedData fields, available upon thread activation
        preactions = [
            DummyExpr(i, FieldFromPointer(i.name, sdata.symbolic_base))
            for i in dynamic_parameters
        ]
        preactions.append(
            DummyExpr(sdata.symbolic_id,
                      FieldFromPointer(sdata._field_id, sdata.symbolic_base)))

        # Append the flag reset
        postactions = [
            List(body=[
                BlankLine,
                DummyExpr(
                    FieldFromPointer(sdata._field_flag, sdata.symbolic_base),
                    1)
            ])
        ]

        iet = List(body=preactions + [iet] + postactions)

        # Append the flag reset

        # The thread has work to do when it receives the signal that all locks have
        # been set to 0 by the main thread
        iet = Conditional(
            CondEq(FieldFromPointer(sdata._field_flag, sdata.symbolic_base),
                   2), iet)

        # The thread keeps spinning until the alive flag is set to 0 by the main thread
        iet = While(
            CondNe(FieldFromPointer(sdata._field_flag, sdata.symbolic_base),
                   0), iet)

        return Callable(name, iet, 'void', parameters, 'static'), sdata
Esempio n. 9
0
    def _make_haloupdate(self, f, hse, key, **kwargs):
        distributor = f.grid.distributor
        nb = distributor._obj_neighborhood
        comm = distributor._obj_comm
        sendrecv = self._cache_dims[f.dimensions][0]

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        # Only retain the halos required by the Diag scheme
        # Note: `sorted` is only for deterministic code generation
        halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple))

        body = []
        for dims, tosides in halos:
            mapper = OrderedDict(zip(dims, tosides))

            sizes = [
                f._C_get_field(OWNED, d, s).size for d, s in mapper.items()
            ]

            torank = FieldFromPointer(
                ''.join(i.name[0] for i in mapper.values()), nb)
            ofsg = [
                fixed.get(d,
                          f._C_get_field(OWNED, d, mapper.get(d)).offset)
                for d in f.dimensions
            ]

            mapper = OrderedDict(zip(dims, [i.flip() for i in tosides]))
            fromrank = FieldFromPointer(
                ''.join(i.name[0] for i in mapper.values()), nb)
            ofss = [
                fixed.get(d,
                          f._C_get_field(HALO, d, mapper.get(d)).offset)
                for d in f.dimensions
            ]

            kwargs['haloid'] = len(body)

            body.append(
                self._call_sendrecv(sendrecv.name, f, sizes, ofsg, ofss,
                                    fromrank, torank, comm, **kwargs))

        iet = List(body=body)
        parameters = [f, comm, nb] + list(fixed.values())
        return Callable('haloupdate%d' % key, iet, 'void', parameters,
                        ('static', ))
Esempio n. 10
0
    def _make_copy(self, f, fixed, swap=False):
        """
        Construct a Callable performing a copy of:

            * an arbitrary convex region of ``f`` into a contiguous Array, OR
            * if ``swap=True``, a contiguous Array into an arbitrary convex
              region of ``f``.
        """
        buf_dims = []
        buf_indices = []
        for d in f.dimensions:
            if d not in fixed:
                buf_dims.append(Dimension(name='buf_%s' % d.root))
                buf_indices.append(d.root)
        buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype)

        f_offsets = []
        f_indices = []
        for d in f.dimensions:
            offset = Symbol(name='o%s' % d.root)
            f_offsets.append(offset)
            f_indices.append(offset + (d.root if d not in fixed else 0))

        if swap is False:
            eq = DummyEq(buf[buf_indices], f[f_indices])
            name = 'gather%dd' % f.ndim
        else:
            eq = DummyEq(f[f_indices], buf[buf_indices])
            name = 'scatter%dd' % f.ndim

        iet = Expression(eq)
        for i, d in reversed(list(zip(buf_indices, buf_dims))):
            # The -1 below is because an Iteration, by default, generates <=
            iet = Iteration(iet, i, d.symbolic_size - 1, properties=PARALLEL)
        iet = List(body=[ArrayCast(f), ArrayCast(buf), iet])

        # Optimize the memory copy with the DLE
        from devito.dle import transform
        state = transform(iet, 'simd', {'openmp': self._threaded})

        parameters = [buf] + list(buf.shape) + [f] + f_offsets + state.input
        return Callable(name, state.nodes, 'void', parameters,
                        ('static', )), state.input
Esempio n. 11
0
def test_transformer_replace_function_body(block1, block2):
    """Create a Function and replace its body with another."""
    args = FindSymbols().visit(block1)
    f = Callable('foo', block1, 'void', args)
    assert str(f.ccode) == """void foo()
{
  for (int i = 0; i < 3; i += 1)
  {
    for (int j = 0; j < 5; j += 1)
    {
      for (int k = 0; k < 7; k += 1)
      {
        a[i] = a[i] + b[i] + 5.0F;
      }
    }
  }
}"""

    f = Transformer({block1: block2}).visit(f)
    assert str(f.ccode) == """void foo()
Esempio n. 12
0
def complex_function(a, b, c, d, exprs, iters):
    # void foo(a, b, c, d)
    #   for i
    #     for s
    #       expr0
    #     for j
    #       for k
    #         expr1
    #         expr2
    #     for p
    #       expr3
    symbols = [i.base.function for i in [a, b, c, d]]
    body = iters[0]([
        iters[3](exprs[2]), iters[1](iters[2]([exprs[3], exprs[4]])),
        iters[4](exprs[5])
    ])
    f = Callable('foo', body, 'void', symbols, ())
    f, subs = ResolveTimeStepping().visit(f)
    f = SubstituteExpression(subs=subs).visit(f)
    return f
Esempio n. 13
0
def copy(f, fixed, swap=False):
    """
    Construct a :class:`Callable` capable of copying: ::

        * an arbitrary convex region of ``f`` into a contiguous :class:`Array`, OR
        * if ``swap=True``, a contiguous :class:`Array` into an arbitrary convex
          region of ``f``.
    """
    buf_dims = []
    buf_indices = []
    for d in f.dimensions:
        if d not in fixed:
            buf_dims.append(Dimension(name='buf_%s' % d.root))
            buf_indices.append(d.root)
    buf = Array(name='buf', dimensions=buf_dims, dtype=f.dtype)

    dat_dims = []
    dat_offsets = []
    dat_indices = []
    for d in f.dimensions:
        dat_dims.append(Dimension(name='dat_%s' % d.root))
        offset = Symbol(name='o%s' % d.root)
        dat_offsets.append(offset)
        dat_indices.append(offset + (d.root if d not in fixed else 0))
    dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype)

    if swap is False:
        eq = DummyEq(buf[buf_indices], dat[dat_indices])
        name = 'gather_%s' % f.name
    else:
        eq = DummyEq(dat[dat_indices], buf[buf_indices])
        name = 'scatter_%s' % f.name

    iet = Expression(eq)
    for i, d in reversed(list(zip(buf_indices, buf_dims))):
        iet = Iteration(iet, i,
                        d.symbolic_size - 1)  # -1 as Iteration generates <=
    iet = List(body=[ArrayCast(dat), ArrayCast(buf), iet])
    parameters = [buf] + list(buf.shape) + [dat] + list(
        dat.shape) + dat_offsets
    return Callable(name, iet, 'void', parameters, ('static', ))
Esempio n. 14
0
    def _make_sendrecv(self, f, hse, key='', msg=None):
        comm = f.grid.distributor._obj_comm

        bufg = FieldFromPointer(msg._C_field_bufg, msg)
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        sizes = [
            FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
            for i in range(len(f._dist_dimensions))
        ]
        gather = Call('gather%s' % key, [bufg] + sizes + [f] + ofsg)

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        iet = List(body=[recv, gather, send])
        iet = List(body=iet_insert_C_decls(iet))
        parameters = ([f] + ofsg + [fromrank, torank, comm, msg])
        return Callable('sendrecv%s' % key, iet, 'void', parameters,
                        ('static', ))
Esempio n. 15
0
    def _make_halowait(self, f, hse, key, msg=None):
        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)

        sizes = [
            FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofss = [
            FieldFromComposite('%s[%d]' % (msg._C_field_ofss, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofss = [fixed.get(d) or ofss.pop(0) for d in f.dimensions]

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Call('scatter_%s' % key, [bufs] + sizes + [f] + ofss)
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([waitsend, waitrecv, scatter], dim, ncomms - 1)
        parameters = ([f] + list(fixed.values()) + [msg, ncomms])
        return Callable('halowait%d' % key, iet, 'void', parameters,
                        ('static', ))
Esempio n. 16
0
    def _make_wait(self, f, hse, key, msg=None):
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')

        sizes = [FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
                 for i in range(len(f._dist_dimensions))]
        scatter = Call('scatter%s' % key, [bufs] + sizes + [f] + ofss)

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        iet = List(body=[waitsend, waitrecv, scatter])
        parameters = ([f] + ofss + [fromrank, msg])
        return Callable('wait_%s' % key, iet, 'void', parameters, ('static',))
Esempio n. 17
0
    def _make_halowait(self, f, hse, key, msg=None):
        nb = f.grid.distributor._obj_neighborhood
        wait = self._cache_dims[f.dimensions][2]

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        # Only retain the halos required by the Diag scheme
        # Note: `sorted` is only for deterministic code generation
        halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple))

        body = []
        for dims, tosides in halos:
            mapper = OrderedDict(zip(dims, [i.flip() for i in tosides]))
            fromrank = FieldFromPointer(''.join(i.name[0] for i in mapper.values()), nb)
            ofss = [fixed.get(d, f._C_get_field(HALO, d, mapper.get(d)).offset)
                    for d in f.dimensions]

            msgi = Byref(IndexedPointer(msg, len(body)))

            body.append(Call(wait.name, [f] + ofss + [fromrank, msgi]))

        iet = List(body=body)
        parameters = [f] + list(fixed.values()) + [nb, msg]
        return Callable('halowait%d' % key, iet, 'void', parameters, ('static',))
Esempio n. 18
0
    def _make_haloupdate(self, f, fixed, mask, extra=None, uniquekey=None):
        extra = extra or []
        distributor = f.grid.distributor
        nb = distributor._obj_neighborhood
        comm = distributor._obj_comm

        fixed = {d: Symbol(name="o%s" % d.root) for d in fixed}

        # Build a mapper `(dim, side, region) -> (size, ofs)` for `f`. `size` and
        # `ofs` are symbolic objects. This mapper tells what data values should be
        # sent (OWNED) or received (HALO) given dimension and side
        mapper = {}
        for d0, side, region in product(f.dimensions, (LEFT, RIGHT),
                                        (OWNED, HALO)):
            if d0 in fixed:
                continue
            sizes = []
            offsets = []
            for d1 in f.dimensions:
                if d1 in fixed:
                    offsets.append(fixed[d1])
                else:
                    meta = f._C_get_field(region if d0 is d1 else NOPAD, d1,
                                          side)
                    offsets.append(meta.offset)
                    sizes.append(meta.size)
            mapper[(d0, side, region)] = (sizes, offsets)

        body = []
        for d in f.dimensions:
            if d in fixed:
                continue

            name = ''.join('r' if i is d else 'c'
                           for i in distributor.dimensions)
            rpeer = FieldFromPointer(name, nb)
            name = ''.join('l' if i is d else 'c'
                           for i in distributor.dimensions)
            lpeer = FieldFromPointer(name, nb)

            if mask[(d, LEFT)]:
                # Sending to left, receiving from right
                lsizes, loffsets = mapper[(d, LEFT, OWNED)]
                rsizes, roffsets = mapper[(d, RIGHT, HALO)]
                args = [f] + lsizes + loffsets + roffsets + [
                    rpeer, lpeer, comm
                ] + extra
                body.append(Call('sendrecv%dd' % f.ndim, args))

            if mask[(d, RIGHT)]:
                # Sending to right, receiving from left
                rsizes, roffsets = mapper[(d, RIGHT, OWNED)]
                lsizes, loffsets = mapper[(d, LEFT, HALO)]
                args = [f] + rsizes + roffsets + loffsets + [
                    lpeer, rpeer, comm
                ] + extra
                body.append(Call('sendrecv%dd' % f.ndim, args))

        if uniquekey is None:
            uniquekey = ''.join(str(int(i)) for i in mask.values())
        name = 'haloupdate%dd%s' % (f.ndim, uniquekey)
        iet = List(body=body)
        parameters = [f, comm, nb] + list(fixed.values()) + extra
        return Callable(name, iet, 'void', parameters, ('static', ))
Esempio n. 19
0
    def _create_efuncs(self, nodes, state):
        """
        Extract Iteration sub-trees and turn them into Calls+Callables.

        Currently, only tagged, elementizable Iteration objects are targeted.
        """
        noinline = self._compiler_decoration('noinline',
                                             c.Comment('noinline?'))

        efuncs = OrderedDict()
        mapper = {}
        for tree in retrieve_iteration_tree(nodes, mode='superset'):
            # Search an elementizable sub-tree (if any)
            tagged = filter_iterations(tree, lambda i: i.tag is not None,
                                       'asap')
            if not tagged:
                continue
            root = tagged[0]
            if not root.is_Elementizable:
                continue
            target = tree[tree.index(root):]

            # Build a new Iteration/Expression tree with free bounds
            free = []
            defined_args = {}  # Map of argument values defined by loop bounds
            for i in target:
                name, bounds = i.dim.name, i.symbolic_bounds
                # Iteration bounds
                _min = Scalar(name='%sf_m' % name,
                              dtype=np.int32,
                              is_const=True)
                _max = Scalar(name='%sf_M' % name,
                              dtype=np.int32,
                              is_const=True)
                defined_args[_min.name] = bounds[0]
                defined_args[_max.name] = bounds[1]

                # Iteration unbounded indices
                ufunc = [
                    Scalar(name='%s_ub%d' % (name, j), dtype=np.int32)
                    for j in range(len(i.uindices))
                ]
                defined_args.update({
                    uf.name: j.symbolic_min
                    for uf, j in zip(ufunc, i.uindices)
                })
                uindices = [
                    IncrDimension(j.parent, i.dim + as_symbol(k), 1, j.name)
                    for j, k in zip(i.uindices, ufunc)
                ]
                free.append(
                    i._rebuild(limits=(_min, _max, 1),
                               offsets=None,
                               uindices=uindices))

            # Construct elemental function body
            free = Transformer(dict((zip(target, free))),
                               nested=True).visit(root)
            items = FindSymbols().visit(free)

            # Insert array casts
            casts = [ArrayCast(i) for i in items if i.is_Tensor]
            free = List(body=casts + [free])

            # Insert declarations
            external = [i for i in items if i.is_Array]
            free = iet_insert_C_decls(free, external)

            # Create the Callable
            name = "f_%d" % root.tag
            params = derive_parameters(free)
            efuncs.setdefault(name,
                              Callable(name, free, 'void', params, 'static'))

            # Create the Call
            args = [defined_args.get(i.name, i) for i in params]
            mapper[root] = List(header=noinline, body=Call(name, args))

        # Transform the main tree
        processed = Transformer(mapper).visit(nodes)

        return processed, {'efuncs': efuncs.values()}
Esempio n. 20
0
File: basic.py Progetto: nw0/devito
    def _create_elemental_functions(self, nodes, state):
        """
        Extract :class:`Iteration` sub-trees and move them into :class:`Callable`s.

        Currently, only tagged, elementizable Iteration objects are targeted.
        """
        noinline = self._compiler_decoration('noinline',
                                             c.Comment('noinline?'))

        functions = OrderedDict()
        mapper = {}
        for tree in retrieve_iteration_tree(nodes, mode='superset'):
            # Search an elementizable sub-tree (if any)
            tagged = filter_iterations(tree, lambda i: i.tag is not None,
                                       'asap')
            if not tagged:
                continue
            root = tagged[0]
            if not root.is_Elementizable:
                continue
            target = tree[tree.index(root):]

            # Elemental function arguments
            args = []  # Found so far (scalars, tensors)
            maybe_required = set()  # Scalars that *may* have to be passed in
            not_required = set()  # Elemental function locally declared scalars

            # Build a new Iteration/Expression tree with free bounds
            free = []
            for i in target:
                name, bounds = i.dim.name, i.bounds_symbolic
                # Iteration bounds
                start = Scalar(name='%s_start' % name, dtype=np.int32)
                finish = Scalar(name='%s_finish' % name, dtype=np.int32)
                args.extend(zip([ccode(j) for j in bounds], (start, finish)))
                # Iteration unbounded indices
                ufunc = [
                    Scalar(name='%s_ub%d' % (name, j), dtype=np.int32)
                    for j in range(len(i.uindices))
                ]
                args.extend(zip([ccode(j.start) for j in i.uindices], ufunc))
                limits = [Symbol(start.name), Symbol(finish.name), 1]
                uindices = [
                    UnboundedIndex(j.index, i.dim + as_symbol(k))
                    for j, k in zip(i.uindices, ufunc)
                ]
                free.append(
                    i._rebuild(limits=limits, offsets=None, uindices=uindices))
                not_required.update({i.dim}, set(j.index for j in i.uindices))

            # Construct elemental function body, and inspect it
            free = NestedTransformer(dict((zip(target, free)))).visit(root)
            expressions = FindNodes(Expression).visit(free)
            fsymbols = FindSymbols('symbolics').visit(free)

            # Add all definitely-required arguments
            not_required.update({i.output for i in expressions if i.is_scalar})
            for i in fsymbols:
                if i in not_required:
                    continue
                elif i.is_Array:
                    args.append(
                        ("(%s*)%s" % (c.dtype_to_ctype(i.dtype), i.name), i))
                elif i.is_TensorFunction:
                    args.append(("%s_vec" % i.name, i))
                elif i.is_Scalar:
                    args.append((i.name, i))

            # Add all maybe-required arguments that turn out to be required
            maybe_required.update(
                set(FindSymbols(mode='free-symbols').visit(free)))
            for i in fsymbols:
                not_required.update({as_symbol(i), i.indexify()})
                for j in i.symbolic_shape:
                    maybe_required.update(j.free_symbols)
            required = filter_sorted(maybe_required - not_required,
                                     key=attrgetter('name'))
            args.extend([(i.name, Scalar(name=i.name, dtype=i.dtype))
                         for i in required])

            call, params = zip(*args)
            handle = flatten([p.rtargs for p in params])
            name = "f_%d" % root.tag

            # Produce the new Call
            mapper[root] = List(header=noinline, body=Call(name, call))

            # Produce the new Callable
            functions.setdefault(
                name, Callable(name, free, 'void', handle, ('static', )))

        # Transform the main tree
        processed = Transformer(mapper).visit(nodes)

        return processed, {'elemental_functions': functions.values()}
Esempio n. 21
0
def opsit(trees, count):
    node_factory = OPSNodeFactory()
    expressions = []
    for tree in trees:
        expressions.extend(FindNodes(Expression).visit(tree.inner))

    it_range = []
    it_dims = 0
    for tree in trees:
        if isinstance(tree, IterationTree):
            it_range = [it.bounds() for it in tree]
            it_dims = len(tree)

    block = OPSBlock(namespace['ops_block'](count))
    block_init = Element(
        cgen.Initializer(
            block, Call("ops_decl_block",
                        [it_dims, String(block.name)], False)))

    ops_expressions = []
    accesses = defaultdict(set)

    for i in reversed(expressions):
        extend_accesses(accesses, get_accesses(i.expr))
        ops_expressions.insert(0,
                               Expression(make_ops_ast(i.expr, node_factory)))

    ops_stencils_initializers, ops_stencils = generate_ops_stencils(accesses)

    to_remove = [
        f.name for f in FindSymbols('defines').visit(List(body=expressions))
    ]

    parameters = FindSymbols('symbolics').visit(List(body=ops_expressions))
    parameters = [
        p for p in parameters
        if p.name != 'OPS_ACC_size' and p.name not in to_remove
    ]
    parameters = sorted(parameters, key=lambda i: (i.is_Constant, i.name))

    arguments = FindSymbols('symbolics').visit(List(body=expressions))
    arguments = [a for a in arguments if a.name not in to_remove]
    arguments = sorted(arguments, key=lambda i: (i.is_Constant, i.name))

    ops_expressions = [
        Expression(fix_ops_acc(e.expr, [p.name for p in parameters]))
        for e in ops_expressions
    ]

    callable_kernel = Callable(namespace['ops_kernel'](count), ops_expressions,
                               "void", parameters)

    dat_declarations = []
    argname_to_dat = {}

    for a in arguments:
        if a.is_Constant:
            continue

        dat_dec, dat_sym = to_ops_dat(a, block)
        dat_declarations.extend(dat_dec)

        argname_to_dat.update(dat_sym)

    par_loop_range_arr = SymbolicArray(name=namespace['ops_range'](count),
                                       dimensions=(len(it_range) * 2, ),
                                       dtype=np.int32)
    range_vals = []
    for mn, mx in it_range:
        range_vals.append(mn)
        range_vals.append(mx)
    par_loop_range_init = Expression(
        ClusterizedEq(Eq(par_loop_range_arr, ListInitializer(range_vals))))

    ops_args = get_ops_args([p for p in parameters], ops_stencils,
                            argname_to_dat)

    par_loop = Call("ops_par_loop", [
        FunctionPointer(callable_kernel.name),
        String(callable_kernel.name), block, it_dims, par_loop_range_arr,
        *ops_args
    ])

    return (callable_kernel,
            [par_loop_range_init, block_init] + ops_stencils_initializers +
            dat_declarations + [Call("ops_partition", [String("")])],
            List(body=[par_loop]), it_dims)
Esempio n. 22
0
    def _build(cls, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Evaluable) for i in expressions):
            raise InvalidOperator("Only `devito.Evaluable` are allowed.")

        # Python-level (i.e., compile time) and C-level (i.e., run time) performance
        profiler = create_profile('timers')

        # Lower input expressions
        expressions = cls._lower_exprs(expressions, **kwargs)

        # Group expressions based on iteration spaces and data dependences
        clusters = cls._lower_clusters(expressions, profiler, **kwargs)

        # Lower Clusters to a ScheduleTree
        stree = cls._lower_stree(clusters, **kwargs)

        # Lower ScheduleTree to an Iteration/Expression Tree
        iet, byproduct = cls._lower_iet(stree, profiler, **kwargs)

        # Make it an actual Operator
        op = Callable.__new__(cls, **iet.args)
        Callable.__init__(op, **op.args)

        # Header files, etc.
        op._headers = list(cls._default_headers)
        op._headers.extend(byproduct.headers)
        op._globals = list(cls._default_globals)
        op._includes = list(cls._default_includes)
        op._includes.extend(profiler._default_includes)
        op._includes.extend(byproduct.includes)

        # Required for the jit-compilation
        op._compiler = kwargs['compiler']
        op._lib = None
        op._cfunction = None

        # References to local or external routines
        op._func_table = OrderedDict()
        op._func_table.update(
            OrderedDict([(i, MetaCall(None, False))
                         for i in profiler._ext_calls]))
        op._func_table.update(
            OrderedDict([(i.root.name, i) for i in byproduct.funcs]))

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        op._state = cls._initialize_state(**kwargs)

        # Produced by the various compilation passes
        op._input = filter_sorted(
            flatten(e.reads + e.writes for e in expressions))
        op._output = filter_sorted(flatten(e.writes for e in expressions))
        op._dimensions = flatten(c.dimensions
                                 for c in clusters) + byproduct.dimensions
        op._dimensions = sorted(set(op._dimensions), key=attrgetter('name'))
        op._dtype, op._dspace = clusters.meta
        op._profiler = profiler

        return op
Esempio n. 23
0
    def _build(cls, expressions, **kwargs):
        expressions = as_tuple(expressions)

        # Input check
        if any(not isinstance(i, Eq) for i in expressions):
            raise InvalidOperator("Only `devito.Eq` expressions are allowed.")

        name = kwargs.get("name", "Kernel")
        dse = kwargs.get("dse", configuration['dse'])

        # Python-level (i.e., compile time) and C-level (i.e., run time) performance
        profiler = create_profile('timers')

        # Lower input expressions to internal expressions (e.g., attaching metadata)
        expressions = cls._lower_exprs(expressions, **kwargs)

        # Group expressions based on their iteration space and data dependences
        # Several optimizations are applied (fusion, lifting, flop reduction via DSE, ...)
        clusters = clusterize(expressions, dse_mode=set_dse_mode(dse))

        # Lower Clusters to a Schedule tree
        stree = st_build(clusters)

        # Lower Schedule tree to an Iteration/Expression tree (IET)
        iet = iet_build(stree)

        # Instrument the IET for C-level profiling
        iet = profiler.instrument(iet)

        # Wrap the IET with a Callable
        parameters = derive_parameters(iet, True)
        op = Callable(name, iet, 'int', parameters, ())

        # Lower IET to a Target-specific IET
        op, target_state = cls._specialize_iet(op, **kwargs)

        # Make it an actual Operator
        op = Callable.__new__(cls, **op.args)
        Callable.__init__(op, **op.args)

        # Header files, etc.
        op._headers = list(cls._default_headers)
        op._headers.extend(target_state.headers)
        op._globals = list(cls._default_globals)
        op._includes = list(cls._default_includes)
        op._includes.extend(profiler._default_includes)
        op._includes.extend(target_state.includes)

        # Required for the jit-compilation
        op._compiler = configuration['compiler']
        op._lib = None
        op._cfunction = None

        # References to local or external routines
        op._func_table = OrderedDict()
        op._func_table.update(
            OrderedDict([(i, MetaCall(None, False))
                         for i in profiler._ext_calls]))
        op._func_table.update(
            OrderedDict([(i.root.name, i) for i in target_state.funcs]))

        # Internal state. May be used to store information about previous runs,
        # autotuning reports, etc
        op._state = cls._initialize_state(**kwargs)

        # Produced by the various compilation passes
        op._input = filter_sorted(
            flatten(e.reads + e.writes for e in expressions))
        op._output = filter_sorted(flatten(e.writes for e in expressions))
        op._dimensions = filter_sorted(
            flatten(e.dimensions for e in expressions))
        op._dimensions.extend(target_state.dimensions)
        op._dtype, op._dspace = clusters.meta
        op._profiler = profiler

        return op
Esempio n. 24
0
    def _make_fetchprefetch(self, iet, sync_ops, pieces, root):
        fid = SharedData._field_id

        fetches = []
        prefetches = []
        presents = []
        for s in sync_ops:
            f = s.function
            dimensions = s.dimensions
            fc = s.fetch
            ifc = s.ifetch
            pfc = s.pfetch
            fcond = s.fcond
            pcond = s.pcond

            # Construct init IET
            imask = [(ifc, s.size) if d.root is s.dim.root else FULL
                     for d in dimensions]
            fetch = PragmaTransfer(self.lang._map_to, f, imask=imask)
            fetches.append(Conditional(fcond, fetch))

            # Construct present clauses
            imask = [(fc, s.size) if d.root is s.dim.root else FULL
                     for d in dimensions]
            presents.append(
                PragmaTransfer(self.lang._map_present, f, imask=imask))

            # Construct prefetch IET
            imask = [(pfc, s.size) if d.root is s.dim.root else FULL
                     for d in dimensions]
            prefetch = PragmaTransfer(self.lang._map_to_wait,
                                      f,
                                      imask=imask,
                                      queueid=fid)
            prefetches.append(Conditional(pcond, prefetch))

        # Turn init IET into a Callable
        functions = filter_ordered(s.function for s in sync_ops)
        name = self.sregistry.make_name(prefix='init_device')
        body = List(body=fetches)
        parameters = filter_sorted(functions + derive_parameters(body))
        func = Callable(name, body, 'void', parameters, 'static')
        pieces.funcs.append(func)

        # Perform initial fetch by the main thread
        pieces.init.append(
            List(header=c.Comment("Initialize data stream"),
                 body=[Call(name, parameters), BlankLine]))

        # Turn prefetch IET into a ThreadFunction
        name = self.sregistry.make_name(prefix='prefetch_host_to_device')
        body = List(header=c.Line(), body=prefetches)
        tctx = make_thread_ctx(name, body, root, None, sync_ops,
                               self.sregistry)
        pieces.funcs.extend(tctx.funcs)

        # Glue together all the IET pieces, including the activation logic
        sdata = tctx.sdata
        threads = tctx.threads
        iet = List(body=[
            BlankLine,
            BusyWait(
                CondNe(
                    FieldFromComposite(sdata._field_flag, sdata[
                        threads.index]), 1))
        ] + presents + [iet, tctx.activate])

        # Fire up the threads
        pieces.init.append(tctx.init)

        # Final wait before jumping back to Python land
        pieces.finalize.append(tctx.finalize)

        # Keep track of created objects
        pieces.objs.add(sync_ops, sdata, threads)

        return iet
Esempio n. 25
0
    def _make_fetchwaitprefetch(self, iet, sync_ops, pieces, root):
        fetches = []
        prefetches = []
        presents = []
        for s in sync_ops:
            if s.direction is Forward:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_min)
                pfc = s.fetch + 1
                fc_cond = s.next_cbk(s.dim.symbolic_min)
                pfc_cond = s.next_cbk(s.dim + 1)
            else:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_max)
                pfc = s.fetch - 1
                fc_cond = s.next_cbk(s.dim.symbolic_max)
                pfc_cond = s.next_cbk(s.dim - 1)

            # Construct init IET
            imask = [(fc, s.size) if d.root is s.dim.root else FULL for d in s.dimensions]
            fetch = PragmaList(self.lang._map_to(s.function, imask),
                               {s.function} | fc.free_symbols)
            fetches.append(Conditional(fc_cond, fetch))

            # Construct present clauses
            imask = [(s.fetch, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            presents.extend(as_list(self.lang._map_present(s.function, imask)))

            # Construct prefetch IET
            imask = [(pfc, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            prefetch = PragmaList(self.lang._map_to_wait(s.function, imask,
                                                         SharedData._field_id),
                                  {s.function} | pfc.free_symbols)
            prefetches.append(Conditional(pfc_cond, prefetch))

        # Turn init IET into a Callable
        functions = filter_ordered(s.function for s in sync_ops)
        name = self.sregistry.make_name(prefix='init_device')
        body = List(body=fetches)
        parameters = filter_sorted(functions + derive_parameters(body))
        func = Callable(name, body, 'void', parameters, 'static')
        pieces.funcs.append(func)

        # Perform initial fetch by the main thread
        pieces.init.append(List(
            header=c.Comment("Initialize data stream"),
            body=[Call(name, parameters), BlankLine]
        ))

        # Turn prefetch IET into a ThreadFunction
        name = self.sregistry.make_name(prefix='prefetch_host_to_device')
        body = List(header=c.Line(), body=prefetches)
        tctx = make_thread_ctx(name, body, root, None, sync_ops, self.sregistry)
        pieces.funcs.extend(tctx.funcs)

        # Glue together all the IET pieces, including the activation logic
        sdata = tctx.sdata
        threads = tctx.threads
        iet = List(body=[
            BlankLine,
            BusyWait(CondNe(FieldFromComposite(sdata._field_flag,
                                               sdata[threads.index]), 1)),
            List(header=presents),
            iet,
            tctx.activate
        ])

        # Fire up the threads
        pieces.init.append(tctx.init)
        pieces.threads.append(threads)

        # Final wait before jumping back to Python land
        pieces.finalize.append(tctx.finalize)

        return iet
Esempio n. 26
0
    def _make_fetchwaitprefetch(self, iet, sync_ops, pieces, root):
        threads = self.__make_threads()

        fetches = []
        prefetches = []
        presents = []
        for s in sync_ops:
            if s.direction is Forward:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_min)
                fsize = s.function._C_get_field(FULL, s.dim).size
                fc_cond = fc + (s.size - 1) < fsize
                pfc = s.fetch + 1
                pfc_cond = pfc + (s.size - 1) < fsize
            else:
                fc = s.fetch.subs(s.dim, s.dim.symbolic_max)
                fc_cond = fc >= 0
                pfc = s.fetch - 1
                pfc_cond = pfc >= 0

            # Construct fetch IET
            imask = [(fc, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            fetch = List(header=self._P._map_to(s.function, imask))
            fetches.append(Conditional(fc_cond, fetch))

            # Construct present clauses
            imask = [(s.fetch, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            presents.extend(as_list(self._P._map_present(s.function, imask)))

            # Construct prefetch IET
            imask = [(pfc, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            prefetch = List(header=self._P._map_to_wait(
                s.function, imask, SharedData._field_id))
            prefetches.append(Conditional(pfc_cond, prefetch))

        functions = filter_ordered(s.function for s in sync_ops)
        casts = [PointerCast(f) for f in functions]

        # Turn init IET into a Callable
        name = self.sregistry.make_name(prefix='init_device')
        body = List(body=casts + fetches)
        parameters = filter_sorted(functions + derive_parameters(body))
        func = Callable(name, body, 'void', parameters, 'static')
        pieces.funcs.append(func)

        # Perform initial fetch by the main thread
        pieces.init.append(
            List(header=c.Comment("Initialize data stream for `%s`" %
                                  threads.name),
                 body=[Call(name, func.parameters), BlankLine]))

        # Turn prefetch IET into a threaded Callable
        name = self.sregistry.make_name(prefix='prefetch_host_to_device')
        body = List(header=c.Line(), body=casts + prefetches)
        tfunc, sdata = self.__make_tfunc(name, body, root, threads)
        pieces.funcs.append(tfunc)

        # Glue together all the IET pieces, including the activation bits
        iet = List(body=[
            BlankLine,
            BusyWait(
                CondNe(
                    FieldFromComposite(sdata._field_flag, sdata[
                        threads.index]), 1)),
            List(header=presents), iet,
            self.__make_activate_thread(threads, sdata, sync_ops)
        ])

        # Fire up the threads
        pieces.init.append(
            self.__make_init_threads(threads, sdata, tfunc, pieces))
        pieces.threads.append(threads)

        # Final wait before jumping back to Python land
        pieces.finalize.append(self.__make_finalize_threads(threads, sdata))

        return iet
Esempio n. 27
0
    def _create_elemental_functions(self, nodes, state):
        """
        Extract :class:`Iteration` sub-trees and move them into :class:`Callable`s.

        Currently, only tagged, elementizable Iteration objects are targeted.
        """
        noinline = self._compiler_decoration('noinline',
                                             c.Comment('noinline?'))

        functions = OrderedDict()
        mapper = {}
        for tree in retrieve_iteration_tree(nodes, mode='superset'):
            # Search an elementizable sub-tree (if any)
            tagged = filter_iterations(tree, lambda i: i.tag is not None,
                                       'asap')
            if not tagged:
                continue
            root = tagged[0]
            if not root.is_Elementizable:
                continue
            target = tree[tree.index(root):]

            # Elemental function arguments
            args = []  # Found so far (scalars, tensors)
            defined_args = {}  # Map of argument values defined by loop bounds

            # Build a new Iteration/Expression tree with free bounds
            free = []
            for i in target:
                name, bounds = i.dim.name, i.bounds_symbolic
                # Iteration bounds
                start = Scalar(name='%s_start' % name, dtype=np.int32)
                finish = Scalar(name='%s_finish' % name, dtype=np.int32)
                defined_args[start.name] = bounds[0]
                defined_args[finish.name] = bounds[1]

                # Iteration unbounded indices
                ufunc = [
                    Scalar(name='%s_ub%d' % (name, j), dtype=np.int32)
                    for j in range(len(i.uindices))
                ]
                defined_args.update(
                    {uf.name: j.start
                     for uf, j in zip(ufunc, i.uindices)})
                limits = [
                    Scalar(name=start.name, dtype=np.int32),
                    Scalar(name=finish.name, dtype=np.int32), 1
                ]
                uindices = [
                    UnboundedIndex(j.index, i.dim + as_symbol(k))
                    for j, k in zip(i.uindices, ufunc)
                ]
                free.append(
                    i._rebuild(limits=limits, offsets=None, uindices=uindices))

            # Construct elemental function body, and inspect it
            free = NestedTransformer(dict((zip(target, free)))).visit(root)

            # Insert array casts for all non-defined
            f_symbols = FindSymbols('symbolics').visit(free)
            defines = [s.name for s in FindSymbols('defines').visit(free)]
            casts = [
                ArrayCast(f) for f in f_symbols
                if f.is_Tensor and f.name not in defines
            ]
            free = (List(body=casts), free)

            for i in derive_parameters(free):
                if i.name in defined_args:
                    args.append((defined_args[i.name], i))
                elif i.is_Dimension:
                    d = Scalar(name=i.name, dtype=i.dtype)
                    args.append((d, d))
                else:
                    args.append((i, i))

            call, params = zip(*args)
            name = "f_%d" % root.tag

            # Produce the new Call
            mapper[root] = List(header=noinline, body=Call(name, call))

            # Produce the new Callable
            functions.setdefault(
                name,
                Callable(name, free, 'void', flatten(params), ('static', )))

        # Transform the main tree
        processed = Transformer(mapper).visit(nodes)

        return processed, {'elemental_functions': functions.values()}
Esempio n. 28
0
    def _make_sendrecv(self, f, fixed, extra=None):
        extra = extra or []
        comm = f.grid.distributor._obj_comm

        buf_dims = [
            Dimension(name='buf_%s' % d.root) for d in f.dimensions
            if d not in fixed
        ]
        bufg = Array(name='bufg',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     scope='heap')
        bufs = Array(name='bufs',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     scope='heap')

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        args = [bufg] + list(bufg.shape) + [f] + ofsg + extra
        gather = Call('gather%dd' % f.ndim, args)
        args = [bufs] + list(bufs.shape) + [f] + ofss + extra
        scatter = Call('scatter%dd' % f.ndim, args)

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)
        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        srecv = MPIStatusObject(name='srecv')
        ssend = MPIStatusObject(name='ssend')
        rrecv = MPIRequestObject(name='rrecv')
        rsend = MPIRequestObject(name='rsend')

        count = reduce(mul, bufs.shape, 1)
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        waitrecv = Call('MPI_Wait', [rrecv, srecv])
        waitsend = Call('MPI_Wait', [rsend, ssend])

        iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
        iet = List(body=iet_insert_C_decls(iet))
        parameters = ([f] + list(bufs.shape) + ofsg + ofss +
                      [fromrank, torank, comm] + extra)
        return Callable('sendrecv%dd' % f.ndim, iet, 'void', parameters,
                        ('static', ))