Example #1
0
def create_profile(name, iet):
    """
    Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level
    performance profiling. In particular, turn all :class:`Section`s within ``iet``
    into :class:`TimedList`s.

    A :class:`Profiler` is returned to access profiling data.
    """
    sections = FindNodes(Section).visit(iet)

    # Construct the Profiler
    profiler = Profiler(name)
    for section in sections:
        # All ExpressionBundles within `section`
        bundles = FindNodes(ExpressionBundle).visit(section)

        # Total operation count
        ops = sum(i.ops for i in bundles)

        # Operation count at each section iteration
        sops = sum(
            estimate_cost(i.expr) for i in flatten(b.exprs for b in bundles))

        # Total memory traffic
        mapper = {}
        for i in bundles:
            for k, v in i.traffic.items():
                mapper.setdefault(k, []).append(v)
        traffic = [
            IntervalGroup.generate('merge', *i) for i in mapper.values()
        ]
        traffic = sum(i.extent for i in traffic)

        # Each ExpressionBundle lives in its own iteration space
        itershapes = [i.shape for i in bundles]

        # Track how many grid points are written within `section`
        points = []
        for i in bundles:
            writes = {
                e.write
                for e in i.exprs if e.is_tensor and e.write.is_TimeFunction
            }
            points.append(reduce(mul, i.shape) * len(writes))
        points = sum(points)

        profiler.add(section,
                     SectionData(ops, sops, points, traffic, itershapes))

    # Transform the Iteration/Expression tree introducing the C-level timers
    mapper = {
        i: TimedList(gname=name, lname=i.name, body=i.body)
        for i in sections
    }
    iet = Transformer(mapper).visit(iet)

    return iet, profiler
Example #2
0
    def instrument(self, iet):
        """
        Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level
        performance profiling. In particular, turn all Sections within ``iet``
        into TimedLists.
        """
        sections = FindNodes(Section).visit(iet)
        for section in sections:
            bundles = FindNodes(ExpressionBundle).visit(section)

            # Total operation count
            ops = sum(i.ops for i in bundles)

            # Operation count at each section iteration
            sops = sum(
                estimate_cost(i.expr)
                for i in flatten(b.exprs for b in bundles))

            # Total memory traffic
            mapper = {}
            for i in bundles:
                for k, v in i.traffic.items():
                    mapper.setdefault(k, []).append(v)
            traffic = 0
            for i in mapper.values():
                try:
                    traffic += IntervalGroup.generate('union', *i).size
                except ValueError:
                    # Over different iteration spaces
                    traffic += sum(j.size for j in i)

            # Each ExpressionBundle lives in its own iteration space
            itermaps = [i.ispace.dimension_map for i in bundles]

            # Track how many grid points are written within `section`
            points = []
            for i in bundles:
                writes = {
                    e.write
                    for e in i.exprs if e.is_tensor and e.write.is_TimeFunction
                }
                points.append(i.size * len(writes))
            points = sum(points)

            self._sections[section] = SectionData(ops, sops, points, traffic,
                                                  itermaps)

        # Transform the Iteration/Expression tree introducing the C-level timers
        mapper = {
            i: TimedList(timer=self.timer, lname=i.name, body=i)
            for i in sections
        }
        iet = Transformer(mapper).visit(iet)

        return iet
Example #3
0
    def analyze(self, iet):
        """
        Analyze the Sections in the given IET. This populates `self._sections`.
        """
        sections = FindNodes(Section).visit(iet)
        for s in sections:
            if s.name in self._sections:
                continue

            bundles = FindNodes(ExpressionBundle).visit(s)

            # Total operation count
            ops = sum(i.ops*i.ispace.size for i in bundles)

            # Operation count at each section iteration
            sops = sum(i.ops for i in bundles)

            # Total memory traffic
            mapper = {}
            for i in bundles:
                for k, v in i.traffic.items():
                    mapper.setdefault(k, []).append(v)
            traffic = 0
            for i in mapper.values():
                try:
                    traffic += IntervalGroup.generate('union', *i).size
                except ValueError:
                    # Over different iteration spaces
                    traffic += sum(j.size for j in i)

            # Each ExpressionBundle lives in its own iteration space
            itermaps = [i.ispace.dimension_map for i in bundles]

            # Track how many grid points are written within `s`
            points = []
            for i in bundles:
                writes = {e.write for e in i.exprs
                          if e.is_tensor and e.write.is_TimeFunction}
                points.append(i.size*len(writes))
            points = sum(points)

            self._sections[s.name] = SectionData(ops, sops, points, traffic, itermaps)
Example #4
0
    def instrument(self, iet):
        """
        Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level
        performance profiling. In particular, turn all :class:`Section`s within ``iet``
        into :class:`TimedList`s.
        """
        sections = FindNodes(Section).visit(iet)
        for section in sections:
            bundles = FindNodes(ExpressionBundle).visit(section)

            # Total operation count
            ops = sum(i.ops for i in bundles)

            # Operation count at each section iteration
            sops = sum(estimate_cost(i.expr) for i in flatten(b.exprs for b in bundles))

            # Total memory traffic
            mapper = {}
            for i in bundles:
                for k, v in i.traffic.items():
                    mapper.setdefault(k, []).append(v)
            traffic = [IntervalGroup.generate('merge', *i) for i in mapper.values()]
            traffic = sum(i.size for i in traffic)

            # Each ExpressionBundle lives in its own iteration space
            itershapes = [i.shape for i in bundles]

            # Track how many grid points are written within `section`
            points = []
            for i in bundles:
                writes = {e.write for e in i.exprs
                          if e.is_tensor and e.write.is_TimeFunction}
                points.append(reduce(mul, i.shape)*len(writes))
            points = sum(points)

            self._sections[section] = SectionData(ops, sops, points, traffic, itershapes)

        # Transform the Iteration/Expression tree introducing the C-level timers
        mapper = {i: TimedList(timer=self.timer, lname=i.name, body=i) for i in sections}
        iet = Transformer(mapper).visit(iet)

        return iet
Example #5
0
    def dspace(self):
        """
        Derive the DataSpace of the Cluster from its expressions, IterationSpace,
        and Guards.
        """
        accesses = detect_accesses(self.exprs)

        # Construct the `parts` of the DataSpace, that is a projection of the data
        # space for each Function appearing in `self.exprs`
        parts = {}
        for f, v in accesses.items():
            if f is None:
                continue

            intervals = [
                Interval(d, min(offs), max(offs)) for d, offs in v.items()
            ]
            intervals = IntervalGroup(intervals)

            # Factor in the IterationSpace -- if the min/max points aren't zero,
            # then the data intervals need to shrink/expand accordingly
            intervals = intervals.promote(lambda d: d.is_Block)
            shift = self.ispace.intervals.promote(lambda d: d.is_Block)
            intervals = intervals.add(shift)

            # Map SubIterators to the corresponding data space Dimension
            # E.g., `xs -> x -> x0_blk0 -> x` or `t0 -> t -> time`
            intervals = intervals.promote(lambda d: d.is_SubIterator)

            # If the bound of a Dimension is explicitly guarded, then we should
            # shrink the `parts` accordingly
            for d, v in self.guards.items():
                ret = v.find(BaseGuardBoundNext)
                assert len(ret) <= 1
                if len(ret) != 1:
                    continue
                if ret.pop().direction is Forward:
                    intervals = intervals.translate(d, v1=-1)
                else:
                    intervals = intervals.translate(d, 1)

            # Special case: if the factor of a ConditionalDimension has value 1,
            # then we can safely resort to the parent's Interval
            intervals = intervals.promote(
                lambda d: d.is_Conditional and d.factor == 1)

            parts[f] = intervals

        # Determine the Dimensions requiring shifted min/max points to avoid
        # OOB accesses
        oobs = set()
        for f, v in parts.items():
            for i in v:
                if i.dim.is_Sub:
                    d = i.dim.parent
                else:
                    d = i.dim
                try:
                    if i.lower < 0 or \
                       i.upper > f._size_nodomain[d].left + f._size_halo[d].right:
                        # It'd mean trying to access a point before the
                        # left halo (test0) or after the right halo (test1)
                        oobs.update(d._defines)
                except (KeyError, TypeError):
                    # Unable to detect presence of OOB accesses (e.g., `d` not in
                    # `f._size_halo`, that is typical of indirect accesses `A[B[i]]`)
                    pass

        # Construct the `intervals` of the DataSpace, that is a global,
        # Dimension-centric view of the data space
        intervals = IntervalGroup.generate('union', *parts.values())
        # E.g., `db0 -> time`, but `xi NOT-> x`
        intervals = intervals.promote(lambda d: not d.is_Sub)
        intervals = intervals.zero(set(intervals.dimensions) - oobs)

        return DataSpace(intervals, parts)