コード例 #1
0
ファイル: rewriters.py プロジェクト: varinic/devito
 def _loop_wrapping(self, iet):
     """
     Emit a performance warning if WRAPPABLE Iterations are found,
     as these are a symptom that unnecessary memory is being allocated.
     """
     for i in FindNodes(Iteration).visit(iet):
         if not i.is_Wrappable:
             continue
         perf_adv("Functions using modulo iteration along Dimension `%s` "
                  "may safely allocate a one slot smaller buffer" % i.dim)
     return iet, {}
コード例 #2
0
ファイル: profiling.py プロジェクト: speglich/devito
    def analyze(self, iet):
        """
        Analyze the Sections in the given IET. This populates `self._sections`.
        """
        sections = FindNodes(Section).visit(iet)
        for s in sections:
            if s.name in self._sections:
                continue

            bundles = FindNodes(ExpressionBundle).visit(s)

            # Total operation count
            ops = sum(i.ops*i.ispace.size for i in bundles)

            # Operation count at each section iteration
            sops = sum(i.ops for i in bundles)

            # Total memory traffic
            mapper = {}
            for i in bundles:
                for k, v in i.traffic.items():
                    mapper.setdefault(k, []).append(v)
            traffic = 0
            for i in mapper.values():
                try:
                    traffic += IntervalGroup.generate('union', *i).size
                except (ValueError, TypeError):
                    # Over different iteration spaces
                    traffic += sum(j.size for j in i)

            # Each ExpressionBundle lives in its own iteration space
            itermaps = [i.ispace.dimension_map for i in bundles if i.ops != 0]

            # Track how many grid points are written within `s`
            points = set()
            for i in bundles:
                if any(e.write.is_TimeFunction for e in i.exprs):
                    points.add(i.size)
            points = sum(points, S.Zero)

            self._sections[s.name] = SectionData(ops, sops, points, traffic, itermaps)
コード例 #3
0
    def test_no_fusion_simple(self):
        """
        If ConditionalDimensions are present, then Clusters must not be fused so
        that ultimately Eqs get scheduled to different loop nests.
        """
        grid = Grid(shape=(4, 4, 4))
        time = grid.time_dim

        f = TimeFunction(name='f', grid=grid)
        g = Function(name='g', grid=grid)
        h = Function(name='h', grid=grid)

        # No ConditionalDimensions yet. Will be fused and optimized
        eqns = [Eq(f.forward, f + 1), Eq(h, f + 1), Eq(g, f + 1)]

        op = Operator(eqns)

        exprs = FindNodes(Expression).visit(op._func_table['bf0'].root)
        assert len(exprs) == 4
        assert exprs[1].expr.rhs is exprs[0].output
        assert exprs[2].expr.rhs is exprs[0].output
        assert exprs[3].expr.rhs is exprs[0].output

        # Now with a ConditionalDimension. No fusion, no optimization
        ctime = ConditionalDimension(name='ctime',
                                     parent=time,
                                     condition=time > 4)

        eqns = [
            Eq(f.forward, f + 1),
            Eq(h, f + 1),
            Eq(g, f + 1, implicit_dims=[ctime])
        ]

        op = Operator(eqns)
        exprs = FindNodes(Expression).visit(op._func_table['bf0'].root)
        assert len(exprs) == 3
        assert exprs[1].expr.rhs is exprs[0].output
        assert exprs[2].expr.rhs is exprs[0].output
        exprs = FindNodes(Expression).visit(op._func_table['bf1'].root)
        assert len(exprs) == 1
コード例 #4
0
    def test_topofusion_w_subdims_conddims_v3(self):
        """
        Like `test_topofusion_w_subdims_conddims_v2` but with an extra anti-dependence,
        which causes scheduling over more loop nests.
        """
        grid = Grid(shape=(4, 4, 4))
        time = grid.time_dim

        f = TimeFunction(name='f', grid=grid, time_order=2)
        g = TimeFunction(name='g', grid=grid, time_order=2)
        h = TimeFunction(name='h', grid=grid, time_order=2)
        fsave = TimeFunction(name='fsave', grid=grid, time_order=2, save=5)
        gsave = TimeFunction(name='gsave', grid=grid, time_order=2, save=5)

        ctime = ConditionalDimension(name='ctime', parent=time, condition=time > 4)

        eqns = [Eq(f.forward, f + 1, subdomain=grid.interior),
                Eq(g.forward, g + 1, subdomain=grid.interior),
                Eq(fsave, f.dt2, implicit_dims=[ctime]),
                Eq(h, f.dt2.dx + g, subdomain=grid.interior),
                Eq(gsave, g.dt2, implicit_dims=[ctime])]

        op = Operator(eqns)

        # Check generated code -- expect the gsave equation to be scheduled together
        # in the same loop nest with the fsave equation
        assert len(op._func_table) == 3

        exprs = FindNodes(Expression).visit(op._func_table['bf0'].root)
        assert len(exprs) == 2
        assert exprs[0].write is f
        assert exprs[1].write is g

        exprs = FindNodes(Expression).visit(op._func_table['bf1'].root)
        assert len(exprs) == 3
        assert exprs[1].write is fsave
        assert exprs[2].write is gsave

        exprs = FindNodes(Expression).visit(op._func_table['bf2'].root)
        assert len(exprs) == 2
        assert exprs[1].write is h
コード例 #5
0
    def test_stencil_nowrite_implies_haloupdate(self):
        grid = Grid(shape=(12, ))
        x = grid.dimensions[0]
        t = grid.stepping_dim

        f = TimeFunction(name='f', grid=grid)
        g = Function(name='g', grid=grid)

        op = Operator(Eq(g, f[t, x - 1] + f[t, x + 1] + 1.))

        calls = FindNodes(Call).visit(op)
        assert len(calls) == 1
コード例 #6
0
ファイル: orchestration.py プロジェクト: kenhester/devito
    def process(self, iet):
        sync_spots = FindNodes(SyncSpot).visit(iet)
        if not sync_spots:
            return iet, {}

        def key(s):
            # The SyncOps are to be processed in the following order
            return [
                WaitLock, WithLock, Delete, FetchUpdate, FetchPrefetch,
                PrefetchUpdate, WaitPrefetch
            ].index(s)

        callbacks = {
            WaitLock: self._make_waitlock,
            WithLock: self._make_withlock,
            Delete: self._make_delete,
            FetchUpdate: self._make_fetchupdate,
            FetchPrefetch: self._make_fetchprefetch,
            PrefetchUpdate: self._make_prefetchupdate
        }
        postponed_callbacks = {WaitPrefetch: self._make_waitprefetch}
        all_callbacks = [callbacks, postponed_callbacks]

        pieces = namedtuple('Pieces', 'init finalize funcs objs')([], [], [],
                                                                  Objs())

        # The processing is a two-step procedure; first, we apply the `callbacks`;
        # then, the `postponed_callbacks`, as these depend on objects produced by the
        # `callbacks`
        subs = {}
        for cbks in all_callbacks:
            for n in sync_spots:
                mapper = as_mapper(n.sync_ops, lambda i: type(i))
                for _type in sorted(mapper, key=key):
                    try:
                        subs[n] = cbks[_type](subs.get(n, n), mapper[_type],
                                              pieces, iet)
                    except KeyError:
                        pass

        iet = Transformer(subs).visit(iet)

        # Add initialization and finalization code
        init = List(body=pieces.init, footer=c.Line())
        finalize = List(header=c.Line(), body=pieces.finalize)
        iet = iet._rebuild(body=(init, ) + iet.body + (finalize, ))

        return iet, {
            'efuncs': pieces.funcs,
            'includes': ['pthread.h'],
            'args':
            [i.size for i in pieces.objs.threads if not is_integer(i.size)]
        }
コード例 #7
0
def fold_blockable_tree(node, exclude_innermost=False):
    """
    Create IterationFolds from sequences of nested Iterations.
    """
    found = FindAdjacent(Iteration).visit(node)

    mapper = {}
    for k, v in found.items():
        for i in v:
            # Pre-condition: they all must be perfect iterations
            assert len(i) > 1
            if any(not IsPerfectIteration().visit(j) for j in i):
                continue
            # Only retain consecutive trees having same depth
            trees = [retrieve_iteration_tree(j)[0] for j in i]
            handle = []
            for j in trees:
                if len(j) != len(trees[0]):
                    break
                handle.append(j)
            trees = handle
            if not trees:
                continue
            # Check foldability
            pairwise_folds = list(zip(*reversed(trees)))
            if any(not is_foldable(j) for j in pairwise_folds):
                continue
            # Maybe heuristically exclude innermost Iteration
            if exclude_innermost is True:
                pairwise_folds = pairwise_folds[:-1]
            # Perhaps there's nothing to fold
            if len(pairwise_folds) == 1:
                continue
            # TODO: we do not currently support blocking if any of the foldable
            # iterations writes to user data (need min/max loop bounds?)
            exprs = flatten(
                FindNodes(Expression).visit(j.root) for j in trees[:-1])
            if any(j.write.is_Input for j in exprs):
                continue
            # Perform folding
            for j in pairwise_folds:
                root, remainder = j[0], j[1:]
                folds = [(tuple(y - x
                                for x, y in zip(i.offsets, root.offsets)),
                          i.nodes) for i in remainder]
                mapper[root] = IterationFold(folds=folds, **root.args)
                for k in remainder:
                    mapper[k] = None

    # Insert the IterationFolds in the Iteration/Expression tree
    processed = Transformer(mapper, nested=True).visit(node)

    return processed
コード例 #8
0
ファイル: test_mpi.py プロジェクト: ponykid/SNIST
    def test_avoid_haloupdate_if_distr_but_sequential(self):
        grid = Grid(shape=(12, ))
        x = grid.dimensions[0]
        t = grid.stepping_dim

        f = TimeFunction(name='f', grid=grid)

        # There is an anti-dependence between the first and second Eqs, so
        # the compiler places them in different x-loops. However, none of the
        # two loops should be preceded by a halo exchange, though for different
        # reasons:
        # * the equation in the first loop has no stencil
        # * the equation in the second loop is inherently sequential, so the
        #   compiler should be sufficiently smart to see that there is no point
        #   in adding a halo exchange
        op = Operator([Eq(f, f + 1), Eq(f, f[t, x - 1] + f[t, x + 1] + 1.)])

        iterations = FindNodes(Iteration).visit(op)
        assert len(iterations) == 3
        calls = FindNodes(Call).visit(op)
        assert len(calls) == 0
コード例 #9
0
ファイル: rewriters.py プロジェクト: BrunoMot/devito
    def _simdize(self, iet):
        # No SIMD-ization for devices. We then drop the VECTOR property
        # so that later passes can perform more aggressive transformations
        mapper = {}
        for i in FindNodes(Iteration).visit(iet):
            if i.is_Vectorizable:
                properties = [p for p in i.properties if p is not VECTOR]
                mapper[i] = i._rebuild(properties=properties)

        iet = Transformer(mapper).visit(iet)

        return iet, {}
コード例 #10
0
ファイル: mpi.py プロジェクト: ofmla/devito
def mpiize(iet, **kwargs):
    """
    Add MPI routines performing halo exchanges to emit distributed-memory
    parallel code.
    """
    mode = kwargs.pop('mode')
    language = kwargs.pop('language')
    sregistry = kwargs.pop('sregistry')

    # To produce unique object names
    generators = {'msg': generator(), 'comm': generator(), 'comp': generator()}

    sync_heb = HaloExchangeBuilder('basic', language, sregistry, **generators)
    user_heb = HaloExchangeBuilder(mode, language, sregistry, **generators)
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        heb = user_heb if isinstance(hs, OverlappableHaloSpot) else sync_heb
        mapper[hs] = heb.make(hs)

    efuncs = sync_heb.efuncs + user_heb.efuncs
    objs = filter_sorted(sync_heb.objs + user_heb.objs)
    iet = Transformer(mapper, nested=True).visit(iet)

    # Must drop the PARALLEL tag from the Iterations within which halo
    # exchanges are performed
    mapper = {}
    for tree in retrieve_iteration_tree(iet):
        for i in reversed(tree):
            if i in mapper:
                # Already seen this subtree, skip
                break
            if FindNodes(Call).visit(i):
                mapper.update({
                    n: n._rebuild(properties=set(n.properties) - {PARALLEL})
                    for n in tree[:tree.index(i) + 1]
                })
                break
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet, {'includes': ['mpi.h'], 'efuncs': efuncs, 'args': objs}
コード例 #11
0
def test_cache_blocking_structure(blockinner, exp_calls, exp_iters):
    # Check code structure
    _, op = _new_operator2((10, 31, 45),
                           time_order=2,
                           opt=('blocking', {
                               'blockinner': blockinner
                           }))
    calls = FindNodes(Call).visit(op)
    assert len(calls) == exp_calls
    trees = retrieve_iteration_tree(op._func_table['bf0'].root)
    assert len(trees) == 1
    tree = trees[0]
    assert len(tree) == exp_iters
    if blockinner:
        assert all(tree[i].dim.is_Incr for i in range(exp_iters))
    else:
        assert all(tree[i].dim.is_Incr for i in range(exp_iters - 1))
        assert not tree[-1].dim.is_Incr

    # Check presence of openmp pragmas at the right place
    _, op = _new_operator2((10, 31, 45),
                           time_order=2,
                           opt=('blocking', {
                               'openmp': True,
                               'blockinner': blockinner
                           }))
    trees = retrieve_iteration_tree(op._func_table['bf0'].root)
    assert len(trees) == 1
    tree = trees[0]
    assert len(tree.root.pragmas) == 1
    assert 'omp for' in tree.root.pragmas[0].value
    # Also, with omp parallelism enabled, the step increment must be != 0
    # to avoid omp segfaults at scheduling time (only certain omp implementations,
    # including Intel's)
    conditionals = FindNodes(Conditional).visit(op._func_table['bf0'].root)
    assert len(conditionals) == 1
    conds = conditionals[0].condition.args
    expected_guarded = tree[:2 + blockinner]
    assert len(conds) == len(expected_guarded)
    assert all(i.lhs == j.step for i, j in zip(conds, expected_guarded))
コード例 #12
0
    def test_subsampling(self):
        grid = Grid(shape=(40, ))
        x = grid.dimensions[0]
        t = grid.stepping_dim
        time = grid.time_dim

        nt = 9

        f = TimeFunction(name='f', grid=grid)
        f.data_with_halo[:] = 1.

        # Setup subsampled function
        factor = 4
        nsamples = (nt + factor - 1) // factor
        times = ConditionalDimension('t_sub', parent=time, factor=factor)
        fsave = TimeFunction(name='fsave',
                             grid=grid,
                             save=nsamples,
                             time_dim=times)

        eqns = [Eq(f.forward, f[t, x - 1] + f[t, x + 1]), Eq(fsave, f)]
        op = Operator(eqns)
        op.apply(time=nt - 1)

        assert np.all(f.data_ro_domain[0] == fsave.data_ro_domain[nsamples -
                                                                  1])
        glb_pos_map = f.grid.distributor.glb_pos_map
        if LEFT in glb_pos_map[x]:
            assert np.all(fsave.data_ro_domain[nsamples - 1, nt - 1:] == 256.)
        else:
            assert np.all(fsave.data_ro_domain[nsamples -
                                               1, :-(nt - 1)] == 256.)

        # Also check there are no redundant halo exchanges
        calls = FindNodes(Call).visit(op)
        assert len(calls) == 1
        # In particular, there is no need for a halo exchange within the conditional
        conditional = FindNodes(Conditional).visit(op)
        assert len(conditional) == 1
        assert len(FindNodes(Call).visit(conditional[0])) == 0
コード例 #13
0
def mpi_gpu_direct(iet, **kwargs):
    """
    Modify MPI Callables to enable multiple GPUs performing GPU-Direct communication.
    """
    mapper = {}
    for node in FindNodes((IsendCall, IrecvCall)).visit(iet):
        header = c.Pragma('omp target data use_device_ptr(%s)' %
                          node.arguments[0].name)
        mapper[node] = Block(header=header, body=node)

    iet = Transformer(mapper).visit(iet)

    return iet, {}
コード例 #14
0
    def _dist_parallelize(self, iet):
        """
        Add MPI routines performing halo exchanges to emit distributed-memory
        parallel code.
        """
        # Build send/recv Callables and Calls
        heb = HaloExchangeBuilder(self.params['mpi'])
        call_trees, calls = heb.make(FindNodes(HaloSpot).visit(iet))

        # Transform the IET by adding in the `haloupdate` Calls
        iet = Transformer(calls, nested=True).visit(iet)

        return iet, {'includes': ['mpi.h'], 'call_trees': call_trees}
コード例 #15
0
    def instrument(self, iet):
        sections = FindNodes(Section).visit(iet)

        # Transform the Iteration/Expression tree introducing Advisor calls that
        # resume and stop data collection
        mapper = {
            i: List(body=[Call(self._api_resume), i,
                          Call(self._api_pause)])
            for i in sections
        }
        iet = Transformer(mapper).visit(iet)

        return iet
コード例 #16
0
    def test_no_fusion_convoluted(self):
        """
        Conceptually like `test_no_fusion_simple`, but with more expressions
        and non-trivial data flow.
        """
        grid = Grid(shape=(4, 4, 4))
        time = grid.time_dim

        f = TimeFunction(name='f', grid=grid)
        g = Function(name='g', grid=grid)
        h = Function(name='h', grid=grid)

        ctime = ConditionalDimension(name='ctime',
                                     parent=time,
                                     condition=time > 4)

        eqns = [
            Eq(f.forward, f + 1),
            Eq(h, f + 1),
            Eq(g, f + 1, implicit_dims=[ctime]),
            Eq(f.forward, f + 1, implicit_dims=[ctime]),
            Eq(f.forward, f + 1),
            Eq(g, f + 1)
        ]

        op = Operator(eqns)

        exprs = FindNodes(Expression).visit(op._func_table['bf0'].root)
        assert len(exprs) == 3
        assert exprs[1].expr.rhs is exprs[0].output
        assert exprs[2].expr.rhs is exprs[0].output

        exprs = FindNodes(Expression).visit(op._func_table['bf1'].root)
        assert len(exprs) == 3

        exprs = FindNodes(Expression).visit(op._func_table['bf2'].root)
        assert len(exprs) == 3
        assert exprs[1].expr.rhs is exprs[0].output
        assert exprs[2].expr.rhs is exprs[0].output
コード例 #17
0
def iet_insert_casts(iet, parameters):
    """
    Transform the input IET inserting the necessary type casts.
    The type casts are placed at the top of the IET.

    Parameters
    ----------
    iet : Node
        The input Iteration/Expression tree.
    parameters : tuple, optional
        The symbol that might require casting.
    """
    # Make the generated code less verbose: if a non-Array parameter does not
    # appear in any Expression, that is, if the parameter is merely propagated
    # down to another Call, then there's no need to cast it
    exprs = FindNodes(Expression).visit(iet)
    calls = FindNodes(Call).visit(iet)
    need_cast = {
        i
        for i in set().union(*[i.functions for i in exprs]) if i.is_Tensor
    }
    need_cast.update({
        i
        for i in set().union(*[i.arguments for i in calls])
        if not isinstance(i, Call) and not isinstance(i, (int, Byref))
        and i.is_Tensor
    })
    need_cast.update({
        i.function
        for i in set().union(*[i.arguments for i in calls])
        if not isinstance(i, Call) and not isinstance(i, (int, Byref))
        and i.is_ArrayAccess
    })
    need_cast.update({i for i in parameters if i.is_Array})

    casts = [ArrayCast(i) for i in parameters if i in need_cast]
    iet = List(body=casts + [iet])
    return iet
コード例 #18
0
    def test_subdimmiddle_notparallel(self):
        """
        Tests application of an Operator consisting of a subdimension
        defined over different sub-regions, explicitly created through the
        use of :class:`SubDimension`s.

        Different from ``test_subdimmiddle_parallel`` because an interior
        dimension cannot be evaluated in parallel.
        """
        grid = Grid(shape=(20, 20))
        x, y = grid.dimensions
        t = grid.stepping_dim
        thickness = 4

        u = TimeFunction(name='u',
                         save=None,
                         grid=grid,
                         space_order=0,
                         time_order=1)

        xi = SubDimension.middle(name='xi',
                                 parent=x,
                                 thickness_left=thickness,
                                 thickness_right=thickness)

        yi = SubDimension.middle(name='yi',
                                 parent=y,
                                 thickness_left=thickness,
                                 thickness_right=thickness)

        # flow dependencies in x and y which should force serial execution
        # in reverse direction
        centre = Eq(u[t + 1, xi, yi], u[t, xi, yi] + u[t + 1, xi + 1, yi + 1])
        u.data[0, 10, 10] = 1.0

        op = Operator([centre])

        iterations = FindNodes(Iteration).visit(op)
        assert all(i.is_Affine and i.is_Sequential for i in iterations
                   if i.dim == xi)
        assert all(i.is_Affine and i.is_Parallel for i in iterations
                   if i.dim == yi)

        op.apply(time_m=0, time_M=0)

        for i in range(4, 11):
            assert u.data[1, i, i] == 1.0
            u.data[1, i, i] = 0.0

        assert np.all(u.data[1, :] == 0)
コード例 #19
0
    def instrument(self, iet, timer):
        # Look for the presence of a time loop within the IET of the Operator
        mapper = {}
        for i in FindNodes(Iteration).visit(iet):
            if i.dim.is_Time:
                # The calls to Advisor's Collection Control API are only for Operators
                # with a time loop
                mapper[i] = List(header=c.Statement('%s()' % self._api_resume),
                                 body=i,
                                 footer=c.Statement('%s()' % self._api_pause))
                return Transformer(mapper).visit(iet)

        # Return the IET intact if no time loop is found
        return iet
コード例 #20
0
 def instrument(self, iet, timer):
     """
     Instrument the given IET for C-level performance profiling.
     """
     sections = FindNodes(Section).visit(iet)
     if sections:
         mapper = {}
         for i in sections:
             n = i.name
             assert n in timer.fields
             mapper[i] = i._rebuild(body=TimedList(timer=timer, lname=n, body=i.body))
         return Transformer(mapper, nested=True).visit(iet)
     else:
         return iet
コード例 #21
0
 def test_consistency_anti_dependences(self, exprs, axis, expected, visit,
                                       ti0, ti1, ti3, tu, tv, tw):
     """
     Test that anti dependences end up generating multi loop nests, rather
     than a single loop nest enclosing all of the equations.
     """
     eq1, eq2, eq3 = EVAL(exprs, ti0.base, ti1.base, ti3.base,
                          tu.base, tv.base, tw.base)
     op = Operator([eq1, eq2, eq3], dse='noop', dle='noop', time_axis=axis)
     trees = retrieve_iteration_tree(op)
     assert len(trees) == len(expected)
     assert ["".join(i.dim.name for i in j) for j in trees] == expected
     iters = FindNodes(Iteration).visit(op)
     assert "".join(i.dim.name for i in iters) == visit
コード例 #22
0
    def test_gpu_direct(self):
        grid = Grid(shape=(3, 3, 3))

        u = TimeFunction(name='u', grid=grid)

        op = Operator(Eq(u.forward, u.dx+1), opt=('advanced', {'gpu-direct': True}),
                      language='openmp')

        for f, v in op._func_table.items():
            for node in FindNodes(Block).visit(v.root):
                if type(node.children[0][0]) in (IrecvCall, IsendCall):
                    assert node.header[0].value ==\
                        ('omp target data use_device_ptr(%s)' %
                         node.children[0][0].arguments[0].name)
コード例 #23
0
    def _generate_mpi(self, iet, **kwargs):
        if configuration['mpi'] is False:
            return iet

        halo_spots = FindNodes(HaloSpot).visit(iet)

        # For each MPI-distributed TensorFunction, generate all necessary
        # C-level routines to perform a halo update
        callables = OrderedDict()
        for hs in halo_spots:
            for f, v in hs.fmapper.items():
                callables[f] = [update_halo(f, v.loc_indices)]
                callables[f].append(sendrecv(f, v.loc_indices))
                callables[f].append(copy(f, v.loc_indices))
                callables[f].append(copy(f, v.loc_indices, True))
        callables = flatten(callables.values())

        # Replace HaloSpots with suitable calls performing the halo update
        mapper = {}
        for hs in halo_spots:
            for f, v in hs.fmapper.items():
                stencil = [int(i) for i in hs.mask[f].values()]
                comm = f.grid.distributor._C_comm
                nb = f.grid.distributor._C_neighbours.obj
                loc_indices = list(v.loc_indices.values())
                dsizes = [d.symbolic_size for d in f.dimensions]
                parameters = [f] + stencil + [comm, nb] + loc_indices + dsizes
                call = Call('halo_exchange_%s' % f.name, parameters)
                mapper.setdefault(hs, []).append(call)

        # Sorting is for deterministic code generation. However, in practice,
        # we don't expect `cstructs` to contain more than one element because
        # there should always be one grid per Operator (though we're not really
        # enforcing it)
        cstructs = {
            f.grid.distributor._C_neighbours.cdef
            for f in flatten(i.fmapper for i in halo_spots)
        }
        self._globals.extend(sorted(cstructs, key=lambda i: i.tpname))

        self._includes.append('mpi.h')

        self._func_table.update(
            OrderedDict([(i.name, MetaCall(i, True)) for i in callables]))

        # Add in the halo update calls
        mapper = {k: List(body=v + list(k.body)) for k, v in mapper.items()}
        iet = Transformer(mapper, nested=True).visit(iet)

        return iet
コード例 #24
0
    def test_subdimmiddle_parallel(self):
        """
        Tests application of an Operator consisting of a subdimension
        defined over different sub-regions, explicitly created through the
        use of :class:`SubDimension`s.
        """
        grid = Grid(shape=(20, 20))
        x, y = grid.dimensions
        t = grid.stepping_dim
        thickness = 4

        u = TimeFunction(name='u',
                         save=None,
                         grid=grid,
                         space_order=0,
                         time_order=1)

        xi = SubDimension.middle(name='xi',
                                 parent=x,
                                 thickness_left=thickness,
                                 thickness_right=thickness)

        yi = SubDimension.middle(name='yi',
                                 parent=y,
                                 thickness_left=thickness,
                                 thickness_right=thickness)

        # a 5 point stencil that can be computed in parallel
        centre = Eq(
            u[t + 1, xi, yi], u[t, xi, yi] + u[t, xi - 1, yi] +
            u[t, xi + 1, yi] + u[t, xi, yi - 1] + u[t, xi, yi + 1])

        u.data[0, 10, 10] = 1.0

        op = Operator([centre])

        iterations = FindNodes(Iteration).visit(op)
        assert all(i.is_Affine and i.is_Parallel for i in iterations
                   if i.dim in [xi, yi])

        op.apply(time_m=0, time_M=0)

        assert np.all(u.data[1, 9:12, 10] == 1.0)
        assert np.all(u.data[1, 10, 9:12] == 1.0)

        # Other than those, it should all be 0
        u.data[1, 9:12, 10] = 0.0
        u.data[1, 10, 9:12] = 0.0
        assert np.all(u.data[1, :] == 0)
コード例 #25
0
def make_grid_accesses(node, yk_grid_objs):
    """
    Construct a new Iteration/Expression based on ``node``, in which all
    :class:`types.Indexed` accesses have been converted into YASK grid
    accesses.
    """
    def make_grid_gets(expr):
        mapper = {}
        indexeds = retrieve_indexed(expr)
        data_carriers = [i for i in indexeds if i.base.function.from_YASK]
        for i in data_carriers:
            args = [
                ListInitializer([INT(make_grid_gets(j)) for j in i.indices])
            ]
            mapper[i] = make_sharedptr_funcall(
                namespace['code-grid-get'], args,
                yk_grid_objs[i.base.function.name])
        return expr.xreplace(mapper)

    mapper = {}
    for i, e in enumerate(FindNodes(Expression).visit(node)):
        if e.is_ForeignExpression:
            continue

        lhs, rhs = e.expr.args

        # RHS translation
        rhs = make_grid_gets(rhs)

        # LHS translation
        if e.write.from_YASK:
            args = [rhs]
            args += [
                ListInitializer([INT(make_grid_gets(i)) for i in lhs.indices])
            ]
            call = namespace['code-grid-add' if e.
                             is_Increment else 'code-grid-put']
            handle = make_sharedptr_funcall(call, args,
                                            yk_grid_objs[e.write.name])
            processed = ForeignExpression(handle,
                                          e.dtype,
                                          is_Increment=e.is_Increment)
        else:
            # Writing to a scalar temporary
            processed = e._rebuild(expr=e.expr.func(lhs, rhs))

        mapper.update({e: processed})

    return Transformer(mapper).visit(node)
コード例 #26
0
    def test_multiple_steppers(self, expr, exp_uindices, exp_mods):
        """Tests generation of multiple, mixed time stepping indices."""
        grid = Grid(shape=(3, 3, 3))
        x, y, z = grid.dimensions

        u = TimeFunction(name='u', grid=grid)  # noqa
        v = TimeFunction(name='v', grid=grid, time_order=4)  # noqa

        op = Operator(eval(expr), dle='noop')

        iters = FindNodes(Iteration).visit(op)
        time_iter = [i for i in iters if i.dim.is_Time]
        assert len(time_iter) == 1
        time_iter = time_iter[0]

        # Check uindices in Iteration header
        signatures = [i._properties for i in time_iter.uindices]
        assert len(signatures) == len(exp_uindices)
        assert all(i in signatures for i in exp_uindices)

        # Check uindices within each TimeFunction
        exprs = [i.expr for i in FindNodes(Expression).visit(op)]
        assert(i.indices[i.function._time_position].modulo == exp_mods[i.function.name]
               for i in flatten(retrieve_indexed(i) for i in exprs))
コード例 #27
0
 def test_issue_1592(self):
     grid = Grid(shape=(11, 11))
     time = grid.time_dim
     time_sub = ConditionalDimension('t_sub', parent=time, factor=2)
     v = TimeFunction(name="v",
                      grid=grid,
                      space_order=4,
                      time_dim=time_sub,
                      save=5)
     w = Function(name="w", grid=grid, space_order=4)
     Operator(Eq(w, v.dx))(time=6)
     op = Operator(Eq(v.forward, v.dx))
     op.apply(time=6)
     exprs = FindNodes(Expression).visit(op)
     assert exprs[-1].expr.lhs.indices[0] == IntDiv(time, 2) + 1
コード例 #28
0
ファイル: test_mpi.py プロジェクト: ponykid/SNIST
    def test_stencil_nowrite_implies_haloupdate_anyway(self):
        grid = Grid(shape=(12, ))
        x = grid.dimensions[0]
        t = grid.stepping_dim

        f = TimeFunction(name='f', grid=grid)
        g = Function(name='g', grid=grid)

        # It does a halo update, even though there's no data dependence,
        # because when the halo updates are placed, the compiler conservatively
        # assumes there might have been another equation writing to `f` before.
        op = Operator(Eq(g, f[t, x - 1] + f[t, x + 1] + 1.))

        calls = FindNodes(Call).visit(op)
        assert len(calls) == 1
コード例 #29
0
def find_affine_trees(iet):
    """
    Find affine trees. A tree is affine when all of the array accesses are
    constant/affine functions of the Iteration variables and the Iteration bounds
    are fixed (but possibly symbolic).

    Parameters
    ----------
    iet : `Node`
        The searched tree

    Returns
    -------
    list of `Node`
        Each item in the list is the root of an affine tree
    """
    affine = OrderedDict()
    roots = [i for i in FindNodes(Iteration).visit(iet) if i.dim.is_Time]
    for root in roots:
        sections = FindNodes(Section).visit(root)
        for section in sections:
            for tree in retrieve_iteration_tree(section):
                if not all(i.is_Affine for i in tree):
                    # Non-affine array accesses not supported
                    break
                exprs = [
                    i.expr for i in FindNodes(Expression).visit(tree.root)
                ]
                grid = ReducerMap([('', i.grid) for i in exprs
                                   if i.grid]).unique('')
                writeto_dimensions = tuple(i.dim.root for i in tree)
                if grid.dimensions == writeto_dimensions:
                    affine.setdefault(section, []).append(tree)
                else:
                    break
    return affine
コード例 #30
0
ファイル: engine.py プロジェクト: vkrGitHub/devito_fork_vkr
    def apply(self, func, **kwargs):
        """
        Apply ``func`` to all nodes in the Graph. This changes the state of the Graph.
        """
        dag = self._create_call_graph()

        # Apply `func`
        for i in dag.topological_sort():
            self.efuncs[i], metadata = func(self.efuncs[i], **kwargs)

            # Track any new Dimensions introduced by `func`
            self.dimensions.extend(list(metadata.get('dimensions', [])))

            # Track any new #include and #define required by `func`
            self.includes.extend(list(metadata.get('includes', [])))
            self.includes = filter_ordered(self.includes)
            self.headers.extend(list(metadata.get('headers', [])))
            self.headers = filter_ordered(self.headers)

            # Tracky any new external function
            self.ffuncs.extend(list(metadata.get('ffuncs', [])))
            self.ffuncs = filter_ordered(self.ffuncs)

            # Track any new ElementalFunctions
            self.efuncs.update(OrderedDict([(i.name, i)
                                            for i in metadata.get('efuncs', [])]))

            # If there's a change to the `args` and the `iet` is an efunc, then
            # we must update the call sites as well, as the arguments dropped down
            # to the efunc have just increased
            args = as_tuple(metadata.get('args'))
            if args:
                # `extif` avoids redundant updates to the parameters list, due
                # to multiple children wanting to add the same input argument
                extif = lambda v: list(v) + [e for e in args if e not in v]
                stack = [i] + dag.all_downstreams(i)
                for n in stack:
                    efunc = self.efuncs[n]
                    calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack]
                    mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls}
                    efunc = Transformer(mapper).visit(efunc)
                    if efunc.is_Callable:
                        efunc = efunc._rebuild(parameters=extif(efunc.parameters))
                    self.efuncs[n] = efunc

        # Apply `func` to the external functions
        for i in range(len(self.ffuncs)):
            self.ffuncs[i], _ = func(self.ffuncs[i], **kwargs)