예제 #1
0
파일: mpi.py 프로젝트: scdlresearch/devito
def mpiize(iet, **kwargs):
    """
    Add MPI routines performing halo exchanges to emit distributed-memory
    parallel code.
    """
    mode = kwargs.pop('mode')

    # To produce unique object names
    generators = {'msg': generator(), 'comm': generator(), 'comp': generator()}
    sync_heb = HaloExchangeBuilder('basic', **generators)
    user_heb = HaloExchangeBuilder(mode, **generators)
    mapper = {}
    for hs in FindNodes(HaloSpot).visit(iet):
        heb = user_heb if isinstance(hs, OverlappableHaloSpot) else sync_heb
        mapper[hs] = heb.make(hs)

    efuncs = sync_heb.efuncs + user_heb.efuncs
    objs = filter_sorted(sync_heb.objs + user_heb.objs)
    iet = Transformer(mapper, nested=True).visit(iet)

    # Must drop the PARALLEL tag from the Iterations within which halo
    # exchanges are performed
    mapper = {}
    for tree in retrieve_iteration_tree(iet):
        for i in reversed(tree):
            if i in mapper:
                # Already seen this subtree, skip
                break
            if FindNodes(Call).visit(i):
                mapper.update({n: n._rebuild(properties=set(n.properties)-{PARALLEL})
                               for n in tree[:tree.index(i)+1]})
                break
    iet = Transformer(mapper, nested=True).visit(iet)

    return iet, {'includes': ['mpi.h'], 'efuncs': efuncs, 'args': objs}
예제 #2
0
파일: routines.py 프로젝트: rhodrin/devito
    def __new__(cls, mode, **generators):
        if mode is True or mode == 'basic':
            obj = object.__new__(BasicHaloExchangeBuilder)
        elif mode == 'diag':
            obj = object.__new__(DiagHaloExchangeBuilder)
        elif mode == 'overlap':
            obj = object.__new__(OverlapHaloExchangeBuilder)
        elif mode == 'overlap2':
            obj = object.__new__(Overlap2HaloExchangeBuilder)
        elif mode == 'full':
            obj = object.__new__(FullHaloExchangeBuilder)
        else:
            assert False, "unexpected value `mode=%s`" % mode

        # Unique name generators
        obj._gen_msgkey = generators.get('msg', generator())
        obj._gen_commkey = generators.get('comm', generator())
        obj._gen_compkey = generators.get('comp', generator())

        obj._cache_halo = OrderedDict()
        obj._cache_dims = OrderedDict()
        obj._objs = OrderedSet()
        obj._regions = OrderedDict()
        obj._msgs = OrderedDict()
        obj._efuncs = []

        return obj
예제 #3
0
def test_common_subexprs_elimination(tu, tv, tw, ti0, ti1, t0, t1, exprs, expected):
    counter = generator()
    make = lambda: Scalar(name='r%d' % counter()).indexify()
    processed = common_subexprs_elimination(EVAL(exprs, tu, tv, tw, ti0, ti1, t0, t1),
                                            make)
    assert len(processed) == len(expected)
    assert all(str(i.rhs) == j for i, j in zip(processed, expected))
예제 #4
0
def test_cse(exprs, expected):
    """Test common subexpressions elimination."""
    grid = Grid((3, 3, 3))
    dims = grid.dimensions

    tu = TimeFunction(name="tu", grid=grid, space_order=2)  # noqa
    tv = TimeFunction(name="tv", grid=grid, space_order=2)  # noqa
    tw = TimeFunction(name="tw", grid=grid, space_order=2)  # noqa
    tz = TimeFunction(name="tz", grid=grid, space_order=2)  # noqa
    ti0 = Array(name='ti0', shape=(3, 5, 7),
                dimensions=dims).indexify()  # noqa
    ti1 = Array(name='ti1', shape=(3, 5, 7),
                dimensions=dims).indexify()  # noqa
    t0 = Scalar(name='t0')  # noqa
    t1 = Scalar(name='t1')  # noqa
    t2 = Scalar(name='t2')  # noqa

    # List comprehension would need explicit locals/globals mappings to eval
    for i, e in enumerate(list(exprs)):
        exprs[i] = DummyEq(indexify(eval(e).evaluate))

    counter = generator()
    make = lambda: Scalar(name='r%d' % counter()).indexify()
    processed = _cse(exprs, make)
    assert len(processed) == len(expected)
    assert all(str(i.rhs) == j for i, j in zip(processed, expected))
예제 #5
0
파일: aliases.py 프로젝트: ofmla/devito
    def _do_generate(self, exprs, exclude, cbk_search, cbk_compose=None):
        """
        Carry out the bulk of the work of ``_generate``.
        """
        counter = generator()
        make = lambda: Symbol(name='dummy%d' % counter())

        if cbk_compose is None:
            cbk_compose = lambda *args: None

        mapper = Uxmapper()
        for e in exprs:
            for i in cbk_search(e):
                if not i.is_commutative:
                    continue

                terms = cbk_compose(i)

                # Make sure we won't break any data dependencies
                if terms:
                    free_symbols = set().union(
                        *[i.free_symbols for i in terms])
                else:
                    free_symbols = i.free_symbols
                if {a.function for a in free_symbols} & exclude:
                    continue

                mapper.add(i, make, terms)

        return mapper
예제 #6
0
파일: test_dse.py 프로젝트: opesci/devito
def test_common_subexprs_elimination(tu, tv, tw, ti0, ti1, t0, t1, exprs, expected):
    counter = generator()
    make = lambda: Scalar(name='r%d' % counter()).indexify()
    processed = common_subexprs_elimination(EVAL(exprs, tu, tv, tw, ti0, ti1, t0, t1),
                                            make)
    assert len(processed) == len(expected)
    assert all(str(i.rhs) == j for i, j in zip(processed, expected))
예제 #7
0
def optimize(clusters, dse_mode):
    """
    Optimize a topologically-ordered sequence of Clusters by applying the
    following transformations:

        * [cross-cluster] Fusion
        * [intra-cluster] Several flop-reduction passes via the DSE
        * [cross-cluster] Lifting
        * [cross-cluster] Scalarization
        * [cross-cluster] Arrays Elimination
    """
    # To create temporaries
    counter = generator()
    template = lambda: "r%d" % counter()

    # Fusion
    clusters = fuse(clusters)

    from devito.dse import rewrite
    clusters = rewrite(clusters, template, mode=dse_mode)

    # Lifting
    clusters = Lift().process(clusters)

    # Lifting may create fusion opportunities
    clusters = fuse(clusters)

    # Fusion may create opportunities to eliminate Arrays (thus shrinking the
    # working set) if these store identical expressions
    clusters = eliminate_arrays(clusters, template)

    # Fusion may create scalarization opportunities
    clusters = scalarize(clusters, template)

    return ClusterGroup(clusters)
예제 #8
0
    def _specialize_iet(cls, graph, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']

        # Flush denormal numbers
        avoid_denormals(graph)

        # Distributed-memory parallelism
        optimize_halospots(graph)
        if options['mpi']:
            mpiize(graph, mode=options['mpi'])

        # Lower IncrDimensions so that blocks of arbitrary shape may be used
        relax_incr_dimensions(graph, counter=generator())

        # SIMD-level parallelism
        ompizer = Ompizer()
        ompizer.make_simd(graph, simd_reg_size=platform.simd_reg_size)

        # Shared-memory parallelism
        ompizer.make_parallel(graph)

        # Misc optimizations
        hoist_prodders(graph)

        # Symbol definitions
        data_manager = DataManager()
        data_manager.place_definitions(graph)
        data_manager.place_casts(graph)

        return graph
예제 #9
0
    def _specialize_clusters(cls, clusters, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']

        # To create temporaries
        counter = generator()
        template = lambda: "r%d" % counter()

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = Toposort().process(clusters)
        clusters = fuse(clusters)

        # Hoist and optimize Dimension-invariant sub-expressions
        clusters = cire(clusters, template, 'invariants', options, platform)
        clusters = Lift().process(clusters)

        # Reduce flops (potential arithmetic alterations)
        clusters = extract_increments(clusters, template)
        clusters = cire(clusters, template, 'sops', options, platform)
        clusters = factorize(clusters)
        clusters = optimize_pows(clusters)

        # Reduce flops (no arithmetic alterations)
        clusters = cse(clusters, template)

        # The previous passes may have created fusion opportunities, which in
        # turn may enable further optimizations
        clusters = fuse(clusters)
        clusters = eliminate_arrays(clusters, template)

        # Blocking to improve data locality
        clusters = Blocking(options).process(clusters)

        return clusters
예제 #10
0
    def _specialize_clusters(cls, clusters, **kwargs):
        """
        Optimize Clusters for better runtime performance.
        """
        # To create temporaries
        counter = generator()
        template = lambda: "r%d" % counter()

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = Toposort().process(clusters)
        clusters = fuse(clusters)

        # Flop reduction via the DSE
        clusters = rewrite(clusters, template, **kwargs)

        # Lifting
        clusters = Lift().process(clusters)

        # Lifting may create fusion opportunities, which in turn may enable
        # further optimizations
        clusters = fuse(clusters)
        clusters = eliminate_arrays(clusters, template)
        clusters = scalarize(clusters, template)

        return clusters
예제 #11
0
    def _specialize_clusters(cls, clusters, **kwargs):
        # TODO: this is currently identical to CPU64NoopOperator._specialize_clusters,
        # but it will have to change

        # To create temporaries
        counter = generator()
        template = lambda: "r%d" % counter()

        # Toposort+Fusion (the former to expose more fusion opportunities)
        clusters = Toposort().process(clusters)
        clusters = fuse(clusters)

        # Flop reduction via the DSE
        clusters = rewrite(clusters, template, **kwargs)

        # Lifting
        clusters = Lift().process(clusters)

        # Lifting may create fusion opportunities, which in turn may enable
        # further optimizations
        clusters = fuse(clusters)
        clusters = eliminate_arrays(clusters, template)
        clusters = scalarize(clusters, template)

        return clusters
예제 #12
0
    def __new__(cls, mode, **generators):
        obj = object.__new__(mpi_registry[mode])

        # Unique name generators
        obj._gen_msgkey = generators.get('msg', generator())
        obj._gen_commkey = generators.get('comm', generator())
        obj._gen_compkey = generators.get('comp', generator())

        obj._cache_halo = OrderedDict()
        obj._cache_dims = OrderedDict()
        obj._objs = OrderedSet()
        obj._regions = OrderedDict()
        obj._msgs = OrderedDict()
        obj._efuncs = []

        return obj
예제 #13
0
def test_yreplace_time_invariants(tu, tv, tw, ti0, ti1, t0, t1, exprs, expected):
    exprs = EVAL(exprs, tu, tv, tw, ti0, ti1, t0, t1)
    counter = generator()
    make = lambda: Scalar(name='r%d' % counter()).indexify()
    processed, found = yreplace(exprs, make,
                                make_is_time_invariant(exprs),
                                lambda i: estimate_cost(i) > 0)
    assert len(found) == len(expected)
    assert all(str(i.rhs) == j for i, j in zip(found, expected))
예제 #14
0
    def __init__(self, profile=True, template=None):
        self.profile = profile

        # Used to build globally-unique temporaries
        if template is None:
            counter = generator()
            self.template = lambda: "%s%d" % (AbstractRewriter.tempname, counter())
        else:
            assert callable(template)
            self.template = template
예제 #15
0
def test_xreplace_constrained_time_varying(tu, tv, tw, ti0, ti1, t0, t1, exprs,
                                           expected):
    exprs = EVAL(exprs, tu, tv, tw, ti0, ti1, t0, t1)
    counter = generator()
    make = lambda: Scalar(name='r%d' % counter()).indexify()
    processed, found = xreplace_constrained(exprs, make,
                                            iq_timevarying(FlowGraph(exprs)),
                                            lambda i: estimate_cost(i) > 0)
    assert len(found) == len(expected)
    assert all(str(i.rhs) == j for i, j in zip(found, expected))
예제 #16
0
파일: test_dse.py 프로젝트: opesci/devito
def test_xreplace_constrained_time_varying(tu, tv, tw, ti0, ti1, t0, t1,
                                           exprs, expected):
    exprs = EVAL(exprs, tu, tv, tw, ti0, ti1, t0, t1)
    counter = generator()
    make = lambda: Scalar(name='r%d' % counter()).indexify()
    processed, found = xreplace_constrained(exprs, make,
                                            iq_timevarying(FlowGraph(exprs)),
                                            lambda i: estimate_cost(i) > 0)
    assert len(found) == len(expected)
    assert all(str(i.rhs) == j for i, j in zip(found, expected))
예제 #17
0
    def make_name(self, prefix=None):
        # By default we're creating a new symbol
        if prefix is None:
            prefix = self._symbol_prefix

        try:
            counter = self.counters[prefix]
        except KeyError:
            counter = self.counters.setdefault(prefix, generator())

        return "%s%d" % (prefix, counter())
예제 #18
0
파일: common.py 프로젝트: opesci/devito
    def __init__(self, profile=True, template=None):
        self.profile = profile

        # Used to build globally-unique temporaries
        if template is None:
            counter = generator()
            self.template = lambda: "%s%d" % (AbstractRewriter.tempname, counter())
        else:
            assert callable(template)
            self.template = template

        # Track performance of each cluster
        self.run_summary = []
예제 #19
0
    def _make_iet_passes_mapper(cls, **kwargs):
        options = kwargs['options']
        platform = kwargs['platform']

        ompizer = Ompizer()

        return {
            'denormals': avoid_denormals,
            'optcomms': optimize_halospots,
            'wrapping': loop_wrapping,
            'blocking': partial(relax_incr_dimensions, counter=generator()),
            'openmp': ompizer.make_parallel,
            'mpi': partial(mpiize, mode=options['mpi']),
            'simd': partial(ompizer.make_simd, simd_reg_size=platform.simd_reg_size),
            'prodders': hoist_prodders
        }
예제 #20
0
def test_yreplace_time_invariants(exprs, expected):
    grid = Grid((3, 3, 3))
    dims = grid.dimensions
    tu = TimeFunction(name="tu", grid=grid, space_order=4).indexify()
    tv = TimeFunction(name="tv", grid=grid, space_order=4).indexify()
    tw = TimeFunction(name="tw", grid=grid, space_order=4).indexify()
    ti0 = Array(name='ti0', shape=(3, 5, 7), dimensions=dims).indexify()
    ti1 = Array(name='ti1', shape=(3, 5, 7), dimensions=dims).indexify()
    t0 = Scalar(name='t0').indexify()
    t1 = Scalar(name='t1').indexify()
    exprs = EVAL(exprs, tu, tv, tw, ti0, ti1, t0, t1)
    counter = generator()
    make = lambda: Scalar(name='r%d' % counter()).indexify()
    processed, found = yreplace(exprs, make, make_is_time_invariant(exprs),
                                lambda i: estimate_cost(i) > 0)
    assert len(found) == len(expected)
    assert all(str(i.rhs) == j for i, j in zip(found, expected))
예제 #21
0
def optimize(clusters, dse_mode):
    """
    Optimize a topologically-ordered sequence of Clusters by applying the
    following transformations:

        * [cross-cluster] Fusion
        * [intra-cluster] Several flop-reduction passes via the DSE
        * [cross-cluster] Lifting
        * [cross-cluster] Scalarization
        * [cross-cluster] Arrays Elimination
    """
    # To create temporaries
    counter = generator()
    template = lambda: "r%d" % counter()

    # Toposort+Fusion (the former to expose more fusion opportunities)
    clusters = Toposort().process(clusters)
    clusters = fuse(clusters)

    # Flop reduction via the DSE
    from devito.dse import rewrite
    clusters = rewrite(clusters, template, mode=dse_mode)

    # Lifting
    clusters = Lift().process(clusters)

    # Lifting may create fusion opportunities
    clusters = fuse(clusters)

    # Fusion may create opportunities to eliminate Arrays (thus shrinking the
    # working set) if these store identical expressions
    clusters = eliminate_arrays(clusters, template)

    # Fusion may create scalarization opportunities
    clusters = scalarize(clusters, template)

    # Determine computational properties (e.g., parallelism) that will be
    # necessary for the later passes
    clusters = analyze(clusters)

    return ClusterGroup(clusters)