Ejemplo n.º 1
0
def instrument_sections(iet, **kwargs):
    """
    Instrument the Sections of the input IET based on `profiler.sections`.
    """
    profiler = kwargs['profiler']
    timer = kwargs['timer']

    piet = profiler.instrument(iet, timer)

    if piet is iet:
        return piet, {}

    headers = [TimedList._start_timer_header(), TimedList._stop_timer_header()]

    return piet, {'args': timer, 'headers': headers}
Ejemplo n.º 2
0
def create_profile(name, iet):
    """
    Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level
    performance profiling. In particular, turn all :class:`Section`s within ``iet``
    into :class:`TimedList`s.

    A :class:`Profiler` is returned to access profiling data.
    """
    sections = FindNodes(Section).visit(iet)

    # Construct the Profiler
    profiler = Profiler(name)
    for section in sections:
        # All ExpressionBundles within `section`
        bundles = FindNodes(ExpressionBundle).visit(section)

        # Total operation count
        ops = sum(i.ops for i in bundles)

        # Operation count at each section iteration
        sops = sum(
            estimate_cost(i.expr) for i in flatten(b.exprs for b in bundles))

        # Total memory traffic
        mapper = {}
        for i in bundles:
            for k, v in i.traffic.items():
                mapper.setdefault(k, []).append(v)
        traffic = [
            IntervalGroup.generate('merge', *i) for i in mapper.values()
        ]
        traffic = sum(i.extent for i in traffic)

        # Each ExpressionBundle lives in its own iteration space
        itershapes = [i.shape for i in bundles]

        # Track how many grid points are written within `section`
        points = []
        for i in bundles:
            writes = {
                e.write
                for e in i.exprs if e.is_tensor and e.write.is_TimeFunction
            }
            points.append(reduce(mul, i.shape) * len(writes))
        points = sum(points)

        profiler.add(section,
                     SectionData(ops, sops, points, traffic, itershapes))

    # Transform the Iteration/Expression tree introducing the C-level timers
    mapper = {
        i: TimedList(gname=name, lname=i.name, body=i.body)
        for i in sections
    }
    iet = Transformer(mapper).visit(iet)

    return iet, profiler
Ejemplo n.º 3
0
    def instrument(self, iet):
        """
        Enrich the Iteration/Expression tree ``iet`` adding nodes for C-level
        performance profiling. In particular, turn all Sections within ``iet``
        into TimedLists.
        """
        sections = FindNodes(Section).visit(iet)
        for section in sections:
            bundles = FindNodes(ExpressionBundle).visit(section)

            # Total operation count
            ops = sum(i.ops for i in bundles)

            # Operation count at each section iteration
            sops = sum(
                estimate_cost(i.expr)
                for i in flatten(b.exprs for b in bundles))

            # Total memory traffic
            mapper = {}
            for i in bundles:
                for k, v in i.traffic.items():
                    mapper.setdefault(k, []).append(v)
            traffic = 0
            for i in mapper.values():
                try:
                    traffic += IntervalGroup.generate('union', *i).size
                except ValueError:
                    # Over different iteration spaces
                    traffic += sum(j.size for j in i)

            # Each ExpressionBundle lives in its own iteration space
            itermaps = [i.ispace.dimension_map for i in bundles]

            # Track how many grid points are written within `section`
            points = []
            for i in bundles:
                writes = {
                    e.write
                    for e in i.exprs if e.is_tensor and e.write.is_TimeFunction
                }
                points.append(i.size * len(writes))
            points = sum(points)

            self._sections[section] = SectionData(ops, sops, points, traffic,
                                                  itermaps)

        # Transform the Iteration/Expression tree introducing the C-level timers
        mapper = {
            i: TimedList(timer=self.timer, lname=i.name, body=i)
            for i in sections
        }
        iet = Transformer(mapper).visit(iet)

        return iet
Ejemplo n.º 4
0
 def instrument(self, iet, timer):
     """
     Instrument the given IET for C-level performance profiling.
     """
     sections = FindNodes(Section).visit(iet)
     if sections:
         mapper = {}
         for i in sections:
             n = i.name
             assert n in timer.fields
             mapper[i] = i._rebuild(body=TimedList(timer=timer, lname=n, body=i.body))
         return Transformer(mapper, nested=True).visit(iet)
     else:
         return iet
Ejemplo n.º 5
0
def create_profile(name, node):
    """
    Create a :class:`Profiler` for the Iteration/Expression tree ``node``.
    The following code sections are profiled: ::

        * The whole ``node``;
        * A sequence of perfectly nested loops that have common :class:`Iteration`
          dimensions, but possibly different extent. For example: ::

            for x = 0 to N
              ..
            for x = 1 to N-1
              ..

          Both Iterations have dimension ``x``, and will be profiled as a single
          section, though their extent is different.
        * Any perfectly nested loops.
    """
    profiler = Profiler(name)

    trees = retrieve_iteration_tree(node)
    if not trees:
        return node, profiler

    adjacents = [
        flatten(i) for i in FindAdjacentIterations().visit(node).values() if i
    ]

    def are_adjacent(tree, last):
        for i, j in zip(tree, last):
            if i == j:
                continue
            try:
                return any(
                    abs(a.index(j) - a.index(i)) == 1 for a in adjacents)
            except ValueError:
                return False

    # Group Iterations based on timing region
    key, groups = lambda itspace: {i.defines for i in itspace}, []
    handle = [trees[0]]
    for tree in trees[1:]:
        last = handle[-1]
        if key(tree) == key(last) and are_adjacent(tree, last):
            handle.append(tree)
        else:
            groups.append(tuple(handle))
            handle = [tree]
    groups.append(tuple(handle))

    # Create and track C-level timers
    mapper = OrderedDict()
    for group in groups:
        # We time at the single timestep level
        for i in zip(*group):
            root = i[0]
            remainder = tuple(j for j in i if j is not root)
            if not root.dim.is_Time:
                break
        if root in mapper:
            continue

        # Prepare to transform the Iteration/Expression tree
        body = (root, ) + remainder
        lname = 'section_%d' % len(mapper)
        mapper[root] = TimedList(gname=name, lname=lname, body=body)
        mapper.update(OrderedDict([(j, None) for j in remainder]))

        # Estimate computational properties of the profiled section
        expressions = FindNodes(Expression).visit(body)
        ops = estimate_cost([e.expr for e in expressions])
        memory = estimate_memory([e.expr for e in expressions])

        # Keep track of the new profiled section
        profiler.add(lname, group[0], ops, memory)

    # Transform the Iteration/Expression tree introducing the C-level timers
    processed = Transformer(mapper).visit(node)

    return processed, profiler
Ejemplo n.º 6
0
def create_profile(node):
    """
    Create a :class:`Profiler` for the Iteration/Expression tree ``node``.
    The following code sections are profiled: ::

        * The whole ``node``;
        * A sequence of perfectly nested loops that have common :class:`Iteration`
          dimensions, but possibly different extent. For example: ::

            for x = 0 to N
              ..
            for x = 1 to N-1
              ..

          Both Iterations have dimension ``x``, and will be profiled as a single
          section, though their extent is different.
        * Any perfectly nested loops.
    """
    profiler = Profiler()

    # Group by root Iteration
    mapper = OrderedDict()
    for itspace in FindSections().visit(node):
        mapper.setdefault(itspace[0], []).append(itspace)

    # Group sections if their iteration spaces overlap
    key = lambda itspace: set([i.dim for i in itspace])
    found = []
    for v in mapper.values():
        queue = list(v)
        handle = []
        while queue:
            item = queue.pop(0)
            if not handle or key(item) == key(handle[0]):
                handle.append(item)
            else:
                # Found a timing section
                found.append(tuple(handle))
                handle = [item]
        if handle:
            found.append(tuple(handle))

    # Create and track C-level timers
    mapper = OrderedDict()
    for i, group in enumerate(found):
        name = 'section_%d' % i

        # We time at the single timestep level
        for i in zip(*group):
            root = i[0]
            remainder = tuple(j for j in i if j is not root)
            if not (root.dim.is_Time or root.dim.is_Stepping):
                break

        # Prepare to transform the Iteration/Expression tree
        body = (root, ) + remainder
        mapper[root] = TimedList(gname=profiler.varname, lname=name, body=body)
        mapper.update(OrderedDict([(j, None) for j in remainder]))

        # Estimate computational properties of the profiled section
        expressions = FindNodes(Expression).visit(body)
        ops = estimate_cost([e.expr for e in expressions])
        memory = estimate_memory([e.expr for e in expressions])

        # Keep track of the new profiled section
        profiler.add(name, group[0], ops, memory)

    # Transform the Iteration/Expression tree introducing the C-level timers
    processed = Transformer(mapper).visit(node)

    return processed, profiler