Beispiel #1
0
def test_transformer_add_replace(exprs, block2, block3):
    """Basic transformer test that adds one expression and replaces another"""
    line1 = '// Replaced expression'
    line2 = '// Adding a simple line'
    replacer = Block(c.Line(line1))
    adder = lambda n: Block(c.Line(line2), n)
    transformer = Transformer({exprs[0]: replacer, exprs[1]: adder(exprs[1])})

    for block in [block2, block3]:
        newblock = transformer.visit(block)
        newcode = str(newblock.ccode)
        oldnumlines = len(str(block.ccode).split('\n'))
        newnumlines = len(newcode.split('\n'))
        assert newnumlines >= oldnumlines + 1
        assert line1 in newcode
        assert line2 in newcode
        assert "a[i0] = a[i0] + b[i0] + 5.0F;" not in newcode
Beispiel #2
0
def test_transformer_replace(exprs, block1, block2, block3):
    """Basic transformer test that replaces an expression"""
    line1 = '// Replaced expression'
    replacer = Block(c.Line(line1))
    transformer = Transformer({exprs[0]: replacer})

    for block in [block1, block2, block3]:
        newblock = transformer.visit(block)
        newcode = str(newblock.ccode)
        oldnumlines = len(str(block.ccode).split('\n'))
        newnumlines = len(newcode.split('\n'))
        assert newnumlines >= oldnumlines
        assert line1 in newcode
        assert "a[i0] = a[i0] + b[i0] + 5.0F;" not in newcode
Beispiel #3
0
def test_transformer_wrap(exprs, block1, block2, block3):
    """Basic transformer test that wraps an expression in comments"""
    line1 = '// This is the opening comment'
    line2 = '// This is the closing comment'
    wrapper = lambda n: Block(c.Line(line1), n, c.Line(line2))
    transformer = Transformer({exprs[0]: wrapper(exprs[0])})

    for block in [block1, block2, block3]:
        newblock = transformer.visit(block)
        newcode = str(newblock.ccode)
        oldnumlines = len(str(block.ccode).split('\n'))
        newnumlines = len(newcode.split('\n'))
        assert newnumlines >= oldnumlines + 2
        assert line1 in newcode
        assert line2 in newcode
        assert "a[i] = a[i] + b[i] + 5.0F;" in newcode
Beispiel #4
0
    def _ompize(self, state, **kwargs):
        """
        Add OpenMP pragmas to the Iteration/Expression tree to emit parallel code
        """

        processed = []
        for node in state.nodes:

            # Reset denormals flag each time a parallel region is entered
            denormals = FindNodes(Denormals).visit(state.nodes)
            mapper = {
                i: List(c.Comment('DLE: moved denormals flag'))
                for i in denormals
            }

            # Handle parallelizable loops
            for tree in retrieve_iteration_tree(node):
                # Determine the number of consecutive parallelizable Iterations
                key = lambda i: i.is_Parallel and not i.is_Vectorizable
                candidates = filter_iterations(tree,
                                               key=key,
                                               stop='consecutive')
                if not candidates:
                    continue

                # Heuristic: if at least two parallel loops are available and the
                # physical core count is greater than self.thresholds['collapse'],
                # then omp-collapse the loops
                nparallel = len(candidates)
                if psutil.cpu_count(logical=False) < self.thresholds['collapse'] or\
                        nparallel < 2:
                    parallelism = omplang['for']
                else:
                    parallelism = omplang['collapse'](nparallel)

                root = candidates[0]
                mapper[root] = Block(header=omplang['par-region'],
                                     body=denormals +
                                     [Element(parallelism), root])

            processed.append(Transformer(mapper).visit(node))

        return {'nodes': processed}
Beispiel #5
0
    def _ompize(self, state, **kwargs):
        """
        Add OpenMP pragmas to the Iteration/Expression tree to emit parallel code
        """

        processed = []
        for node in state.nodes:

            # Reset denormals flag each time a parallel region is entered
            denormals = FindNodes(Denormals).visit(state.nodes)
            mapper = OrderedDict([(i, None) for i in denormals])

            # Group by outer loop so that we can embed within the same parallel region
            was_tagged = False
            groups = OrderedDict()
            for tree in retrieve_iteration_tree(node):
                # Determine the number of consecutive parallelizable Iterations
                key = lambda i: i.is_Parallel and\
                    not (i.is_Elementizable or i.is_Vectorizable)
                candidates = filter_iterations(tree, key=key, stop='asap')
                if not candidates:
                    was_tagged = False
                    continue
                # Consecutive tagged Iteration go in the same group
                is_tagged = any(i.tag is not None for i in tree)
                key = len(groups) - (is_tagged & was_tagged)
                handle = groups.setdefault(key, OrderedDict())
                handle[candidates[0]] = candidates
                was_tagged = is_tagged

            # Handle parallelizable loops
            for group in groups.values():
                private = []
                for root, tree in group.items():
                    # Heuristic: if at least two parallel loops are available and the
                    # physical core count is greater than self.thresholds['collapse'],
                    # then omp-collapse the loops
                    nparallel = len(tree)
                    if psutil.cpu_count(logical=False) < self.thresholds['collapse'] or\
                            nparallel < 2:
                        parallel = omplang['for']
                    else:
                        parallel = omplang['collapse'](nparallel)

                    mapper[root] = root._rebuild(pragmas=root.pragmas +
                                                 (parallel, ))

                    # Track the thread-private and thread-shared variables
                    private.extend([
                        i for i in FindSymbols('symbolics').visit(root)
                        if i.is_TensorFunction and i._mem_stack
                    ])

                # Build the parallel region
                private = sorted(set([i.name for i in private]))
                private = ('private(%s)' %
                           ','.join(private)) if private else ''
                rebuilt = [v for k, v in mapper.items() if k in group]
                par_region = Block(header=omplang['par-region'](private),
                                   body=denormals + rebuilt)
                for k, v in list(mapper.items()):
                    if isinstance(v, Iteration):
                        mapper[k] = None if v.is_Remainder else par_region

            handle = Transformer(mapper).visit(node)
            if handle is not None:
                processed.append(handle)

        return {'nodes': processed}