def _make_partree(self, candidates, omp_pragma):
        """Parallelize `root` attaching a suitable OpenMP pragma."""
        assert candidates
        root = candidates[0]

        # Get the collapsable Iterations
        collapsable = []
        if ncores() >= Ompizer.COLLAPSE and IsPerfectIteration().visit(root):
            for n, i in enumerate(candidates[1:], 1):
                # The OpenMP specification forbids collapsed loops to use iteration
                # variables in initializer expressions. E.g., the following is forbidden:
                #
                # #pragma omp ... collapse(2)
                # for (i = ... )
                #   for (j = i ...)
                #     ...
                #
                # Here, we make sure this won't happen
                if any(j.dim in i.symbolic_min.free_symbols for j in candidates[:n]):
                    break

                # Also, we do not want to collapse vectorizable Iterations
                if i.is_Vectorizable:
                    break

                collapsable.append(i)

        # Attach an OpenMP pragma-for with a collapse clause
        ncollapse = 1 + len(collapsable)
        partree = root._rebuild(pragmas=root.pragmas + (omp_pragma(ncollapse),),
                                properties=root.properties + (COLLAPSED(ncollapse),))

        collapsed = [partree] + collapsable

        return root, partree, collapsed
예제 #2
0
    def _make_partree(self, candidates, omp_pragma=None):
        """Parallelize `root` attaching a suitable OpenMP pragma."""
        assert candidates
        root = candidates[0]

        # Pick up an omp-pragma template
        # Caller-provided -> stick to it
        # Affine -> ... schedule(static,1) ...
        # Non-affine -> ... schedule(static) ...
        if omp_pragma is None:
            if all(i.is_Affine for i in candidates):
                omp_pragma = self.lang['for-static-1']
            else:
                omp_pragma = self.lang['for-static']

        # Get the collapsable Iterations
        collapsable = []
        if ncores() >= Ompizer.COLLAPSE_NCORES and IsPerfectIteration().visit(
                root):
            for n, i in enumerate(candidates[1:], 1):
                # The OpenMP specification forbids collapsed loops to use iteration
                # variables in initializer expressions. E.g., the following is forbidden:
                #
                # #pragma omp ... collapse(2)
                # for (i = ... )
                #   for (j = i ...)
                #     ...
                #
                # Here, we make sure this won't happen
                if any(j.dim in i.symbolic_min.free_symbols
                       for j in candidates[:n]):
                    break

                # Also, we do not want to collapse vectorizable Iterations
                if i.is_Vectorizable:
                    break

                # Would there be enough work per parallel iteration?
                try:
                    work = prod(
                        [int(j.dim.symbolic_size) for j in candidates[n + 1:]])
                    if work < Ompizer.COLLAPSE_WORK:
                        break
                except TypeError:
                    pass

                collapsable.append(i)

        # Attach an OpenMP pragma-for with a collapse clause
        ncollapse = 1 + len(collapsable)
        partree = root._rebuild(
            pragmas=root.pragmas + (omp_pragma(ncollapse), ),
            properties=root.properties + (COLLAPSED(ncollapse), ))

        collapsed = [partree] + collapsable

        return root, partree, collapsed
예제 #3
0
    def __init__(self, *args, **kwargs):
        kwargs.pop('pragmas', None)
        pragma = self._make_header(**kwargs)

        properties = as_tuple(kwargs.pop('properties', None))
        properties += (COLLAPSED(kwargs.get('ncollapse', 1)),)

        self.parallel = kwargs.pop('parallel', False)
        self.ncollapse = kwargs.pop('ncollapse', None)
        self.chunk_size = kwargs.pop('chunk_size', None)
        self.nthreads = kwargs.pop('nthreads', None)
        self.reduction = kwargs.pop('reduction', None)

        super(ParallelIteration, self).__init__(*args, pragmas=[pragma],
                                                properties=properties, **kwargs)
예제 #4
0
    def _make_partree(self, candidates, nthreads=None):
        """Parallelize the `candidates` Iterations attaching suitable OpenMP pragmas."""
        assert candidates
        root = candidates[0]

        # Get the collapsable Iterations
        collapsable = self._find_collapsable(root, candidates)
        ncollapse = 1 + len(collapsable)

        # Prepare to build a ParallelTree
        prefix = []
        if all(i.is_Affine for i in candidates):
            if nthreads is None:
                # pragma omp for ... schedule(..., 1)
                nthreads = self.nthreads
                omp_pragma = self.lang['for'](ncollapse, 1)
            else:
                # pragma omp parallel for ... schedule(..., 1)
                omp_pragma = self.lang['par-for'](ncollapse, 1, nthreads)
        else:
            # pragma omp for ... schedule(..., expr)
            assert nthreads is None
            nthreads = self.nthreads_nonaffine

            chunk_size = Symbol(name='chunk_size')
            omp_pragma = self.lang['for'](ncollapse, chunk_size)

            niters = prod([root.symbolic_size] +
                          [j.symbolic_size for j in collapsable])
            value = INT(Max(niters / (nthreads * self.CHUNKSIZE_NONAFFINE), 1))
            prefix.append(
                Expression(DummyEq(chunk_size, value, dtype=np.int32)))

        # Create a ParallelTree
        body = root._rebuild(pragmas=root.pragmas + (omp_pragma, ),
                             properties=root.properties +
                             (COLLAPSED(ncollapse), ))
        partree = ParallelTree(prefix, body, nthreads=nthreads)

        collapsed = [partree] + collapsable

        return root, partree, collapsed
예제 #5
0
    def _make_partree(self, candidates, nthreads=None):
        """Parallelize `root` attaching a suitable OpenMP pragma."""
        assert candidates
        root = candidates[0]

        # Get the collapsable Iterations
        collapsable = []
        if ncores() >= Ompizer.COLLAPSE_NCORES and IsPerfectIteration().visit(
                root):
            for n, i in enumerate(candidates[1:], 1):
                # The OpenMP specification forbids collapsed loops to use iteration
                # variables in initializer expressions. E.g., the following is forbidden:
                #
                # #pragma omp ... collapse(2)
                # for (i = ... )
                #   for (j = i ...)
                #     ...
                #
                # Here, we make sure this won't happen
                if any(j.dim in i.symbolic_min.free_symbols
                       for j in candidates[:n]):
                    break

                # Also, we do not want to collapse vectorizable Iterations
                if i.is_Vectorizable:
                    break

                # Would there be enough work per parallel iteration?
                try:
                    work = prod(
                        [int(j.dim.symbolic_size) for j in candidates[n + 1:]])
                    if work < Ompizer.COLLAPSE_WORK:
                        break
                except TypeError:
                    pass

                collapsable.append(i)
        ncollapse = 1 + len(collapsable)

        # Prepare to build a ParallelTree
        prefix = []
        if all(i.is_Affine for i in candidates):
            if nthreads is None:
                # pragma omp for ... schedule(..., 1)
                nthreads = self.nthreads
                omp_pragma = self.lang['for'](ncollapse, 1)
            else:
                # pragma omp parallel for ... schedule(..., 1)
                omp_pragma = self.lang['par-for'](ncollapse, 1, nthreads)
        else:
            # pragma omp for ... schedule(..., expr)
            assert nthreads is None
            nthreads = self.nthreads_nonaffine

            chunk_size = Symbol(name='chunk_size')
            omp_pragma = self.lang['for'](ncollapse, chunk_size)

            niters = prod([root.symbolic_size] +
                          [j.symbolic_size for j in collapsable])
            value = INT(Max(niters / (nthreads * self.CHUNKSIZE_NONAFFINE), 1))
            prefix.append(
                Expression(DummyEq(chunk_size, value, dtype=np.int32)))

        # Create a ParallelTree
        body = root._rebuild(pragmas=root.pragmas + (omp_pragma, ),
                             properties=root.properties +
                             (COLLAPSED(ncollapse), ))
        partree = ParallelTree(prefix, body, nthreads=nthreads)

        collapsed = [partree] + collapsable

        return root, partree, collapsed