Ejemplo n.º 1
0
    def _make_parallel_tree(self, root, candidates):
        ncollapse = self._ncollapse(root, candidates)
        parallel = self.lang['for'](ncollapse)

        yask_add = namespace['code-grid-add']

        # Introduce the `omp for` pragma
        mapper = OrderedDict()
        if root.is_ParallelAtomic:
            # Turn increments into atomic increments
            subs = {}
            for e in FindNodes(Expression).visit(root):
                if not e.is_Increment:
                    continue
                # Try getting the increment components
                try:
                    target, value, indices = split_increment(e.expr)
                except (AttributeError, ValueError):
                    warning(
                        "Found a parallelizable tree, but couldn't ompize it "
                        "because couldn't understand the increment %s" %
                        e.expr)
                    return mapper
                # All good, can atomicize the increment
                subs[e] = e._rebuild(
                    expr=e.expr.func(yask_add, target, (value, indices)))
            handle = Transformer(subs).visit(root)
            mapper[root] = handle._rebuild(pragmas=root.pragmas + (parallel, ))
        else:
            mapper[root] = root._rebuild(pragmas=root.pragmas + (parallel, ))

        return mapper
Ejemplo n.º 2
0
def process(func, state):
    """
    Apply ``func`` to the IETs in ``state._efuncs``, and update ``state`` accordingly.
    """
    # Create a Call graph. `func` will be applied to each node in the Call graph.
    # `func` might change an `efunc` signature; the Call graph will be used to
    # propagate such change through the `efunc` callers
    dag = DAG(nodes=['root'])
    queue = ['root']
    while queue:
        caller = queue.pop(0)
        callees = FindNodes(Call).visit(state._efuncs[caller])
        for callee in filter_ordered([i.name for i in callees]):
            if callee in state._efuncs:  # Exclude foreign Calls, e.g., MPI calls
                try:
                    dag.add_node(callee)
                    queue.append(callee)
                except KeyError:
                    # `callee` already in `dag`
                    pass
                dag.add_edge(callee, caller)
    assert dag.size == len(state._efuncs)

    # Apply `func`
    for i in dag.topological_sort():
        state._efuncs[i], metadata = func(state._efuncs[i])

        # Track any new Dimensions introduced by `func`
        state._dimensions.extend(list(metadata.get('dimensions', [])))

        # Track any new #include required by `func`
        state._includes.extend(list(metadata.get('includes', [])))
        state._includes = filter_ordered(state._includes)

        # Track any new ElementalFunctions
        state._efuncs.update(
            OrderedDict([(i.name, i) for i in metadata.get('efuncs', [])]))

        # If there's a change to the `args` and the `iet` is an efunc, then
        # we must update the call sites as well, as the arguments dropped down
        # to the efunc have just increased
        args = as_tuple(metadata.get('args'))
        if args:
            # `extif` avoids redundant updates to the parameters list, due
            # to multiple children wanting to add the same input argument
            extif = lambda v: list(v) + [e for e in args if e not in v]
            stack = [i] + dag.all_downstreams(i)
            for n in stack:
                efunc = state._efuncs[n]
                calls = [
                    c for c in FindNodes(Call).visit(efunc) if c.name in stack
                ]
                mapper = {
                    c: c._rebuild(arguments=extif(c.arguments))
                    for c in calls
                }
                efunc = Transformer(mapper).visit(efunc)
                if efunc.is_Callable:
                    efunc = efunc._rebuild(parameters=extif(efunc.parameters))
                state._efuncs[n] = efunc
Ejemplo n.º 3
0
    def _make_omp_parallel_tree(self, root, candidates):
        """
        Return a mapper to parallelize the :class:`Iteration`s within /root/.
        """
        mapper = OrderedDict()

        # Heuristic: if at least two parallel loops are available and the
        # physical core count is greater than COLLAPSE, then omp-collapse them
        nparallel = len(candidates)
        if psutil.cpu_count(logical=False) < Ompizer.COLLAPSE or\
                nparallel < 2:
            parallel = self.lang['for']
        else:
            parallel = self.lang['collapse'](nparallel)

        # Introduce the `omp parallel` pragma
        if root.is_ParallelAtomic:
            # Introduce the `omp atomic` pragmas
            exprs = FindNodes(Expression).visit(root)
            subs = {i: List(header=self.lang['atomic'], body=i)
                    for i in exprs if i.is_increment}
            handle = Transformer(subs).visit(root)
            mapper[root] = handle._rebuild(pragmas=root.pragmas + (parallel,))
        else:
            mapper[root] = root._rebuild(pragmas=root.pragmas + (parallel,))

        return mapper
Ejemplo n.º 4
0
    def _make_parallel_tree(self, root, candidates):
        """Parallelize the IET rooted in `root`."""
        ncollapse = self._ncollapse(root, candidates)
        parallel = self.lang['for'](ncollapse)

        pragmas = root.pragmas + (parallel, )
        properties = root.properties + (COLLAPSED(ncollapse), )

        # Introduce the `omp for` pragma
        mapper = OrderedDict()
        if root.is_ParallelAtomic:
            # Introduce the `omp atomic` pragmas
            exprs = FindNodes(Expression).visit(root)
            subs = {
                i: List(header=self.lang['atomic'], body=i)
                for i in exprs if i.is_Increment
            }
            handle = Transformer(subs).visit(root)
            mapper[root] = handle._rebuild(pragmas=pragmas,
                                           properties=properties)
        else:
            mapper[root] = root._rebuild(pragmas=pragmas,
                                         properties=properties)

        root = Transformer(mapper).visit(root)

        return root
Ejemplo n.º 5
0
def process(func, state):
    """
    Apply ``func`` to the IETs in ``state._efuncs``, and update ``state`` accordingly.
    """
    # Create a Call graph. `func` will be applied to each node in the Call graph.
    # `func` might change an `efunc` signature; the Call graph will be used to
    # propagate such change through the `efunc` callers
    dag = DAG(nodes=['root'])
    queue = ['root']
    while queue:
        caller = queue.pop(0)
        callees = FindNodes(Call).visit(state._efuncs[caller])
        for callee in filter_ordered([i.name for i in callees]):
            if callee in state._efuncs:  # Exclude foreign Calls, e.g., MPI calls
                try:
                    dag.add_node(callee)
                    queue.append(callee)
                except KeyError:
                    # `callee` already in `dag`
                    pass
                dag.add_edge(callee, caller)
    assert dag.size == len(state._efuncs)

    # Apply `func`
    for i in dag.topological_sort():
        state._efuncs[i], metadata = func(state._efuncs[i])

        # Track any new Dimensions introduced by `func`
        state._dimensions.extend(list(metadata.get('dimensions', [])))

        # Track any new #include required by `func`
        state._includes.extend(list(metadata.get('includes', [])))
        state._includes = filter_ordered(state._includes)

        # Track any new ElementalFunctions
        state._efuncs.update(OrderedDict([(i.name, i)
                                          for i in metadata.get('efuncs', [])]))

        # If there's a change to the `args` and the `iet` is an efunc, then
        # we must update the call sites as well, as the arguments dropped down
        # to the efunc have just increased
        args = as_tuple(metadata.get('args'))
        if args:
            # `extif` avoids redundant updates to the parameters list, due
            # to multiple children wanting to add the same input argument
            extif = lambda v: list(v) + [e for e in args if e not in v]
            stack = [i] + dag.all_downstreams(i)
            for n in stack:
                efunc = state._efuncs[n]
                calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack]
                mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls}
                efunc = Transformer(mapper).visit(efunc)
                if efunc.is_Callable:
                    efunc = efunc._rebuild(parameters=extif(efunc.parameters))
                state._efuncs[n] = efunc
Ejemplo n.º 6
0
    def apply(self, func, **kwargs):
        """
        Apply ``func`` to all nodes in the Graph. This changes the state of the Graph.
        """
        dag = self._create_call_graph()

        # Apply `func`
        for i in dag.topological_sort():
            self.efuncs[i], metadata = func(self.efuncs[i], **kwargs)

            # Track any new Dimensions introduced by `func`
            self.dimensions.extend(list(metadata.get('dimensions', [])))

            # Track any new #include and #define required by `func`
            self.includes.extend(list(metadata.get('includes', [])))
            self.includes = filter_ordered(self.includes)
            self.headers.extend(list(metadata.get('headers', [])))
            self.headers = filter_ordered(self.headers)

            # Tracky any new external function
            self.ffuncs.extend(list(metadata.get('ffuncs', [])))
            self.ffuncs = filter_ordered(self.ffuncs)

            # Track any new ElementalFunctions
            self.efuncs.update(
                OrderedDict([(i.name, i) for i in metadata.get('efuncs', [])]))

            # If there's a change to the `args` and the `iet` is an efunc, then
            # we must update the call sites as well, as the arguments dropped down
            # to the efunc have just increased
            args = as_tuple(metadata.get('args'))
            if args:
                # `extif` avoids redundant updates to the parameters list, due
                # to multiple children wanting to add the same input argument
                extif = lambda v: list(v) + [e for e in args if e not in v]
                stack = [i] + dag.all_downstreams(i)
                for n in stack:
                    efunc = self.efuncs[n]
                    calls = [
                        c for c in FindNodes(Call).visit(efunc)
                        if c.name in stack
                    ]
                    mapper = {
                        c: c._rebuild(arguments=extif(c.arguments))
                        for c in calls
                    }
                    efunc = Transformer(mapper).visit(efunc)
                    if efunc.is_Callable:
                        efunc = efunc._rebuild(
                            parameters=extif(efunc.parameters))
                    self.efuncs[n] = efunc

        # Apply `func` to the external functions
        for i in range(len(self.ffuncs)):
            self.ffuncs[i], _ = func(self.ffuncs[i], **kwargs)
Ejemplo n.º 7
0
    def process(self, iet):
        sync_spots = FindNodes(SyncSpot).visit(iet)
        if not sync_spots:
            return iet, {}

        def key(s):
            # The SyncOps are to be processed in the following order
            return [
                WaitLock, WithLock, Delete, FetchUpdate, FetchPrefetch,
                PrefetchUpdate, WaitPrefetch
            ].index(s)

        callbacks = {
            WaitLock: self._make_waitlock,
            WithLock: self._make_withlock,
            Delete: self._make_delete,
            FetchUpdate: self._make_fetchupdate,
            FetchPrefetch: self._make_fetchprefetch,
            PrefetchUpdate: self._make_prefetchupdate
        }
        postponed_callbacks = {WaitPrefetch: self._make_waitprefetch}
        all_callbacks = [callbacks, postponed_callbacks]

        pieces = namedtuple('Pieces', 'init finalize funcs objs')([], [], [],
                                                                  Objs())

        # The processing is a two-step procedure; first, we apply the `callbacks`;
        # then, the `postponed_callbacks`, as these depend on objects produced by the
        # `callbacks`
        subs = {}
        for cbks in all_callbacks:
            for n in sync_spots:
                mapper = as_mapper(n.sync_ops, lambda i: type(i))
                for _type in sorted(mapper, key=key):
                    try:
                        subs[n] = cbks[_type](subs.get(n, n), mapper[_type],
                                              pieces, iet)
                    except KeyError:
                        pass

        iet = Transformer(subs).visit(iet)

        # Add initialization and finalization code
        init = List(body=pieces.init, footer=c.Line())
        finalize = List(header=c.Line(), body=pieces.finalize)
        body = iet.body._rebuild(body=(init, ) + iet.body.body + (finalize, ))
        iet = iet._rebuild(body=body)

        return iet, {
            'efuncs': pieces.funcs,
            'includes': ['pthread.h'],
            'args':
            [i.size for i in pieces.objs.threads if not is_integer(i.size)]
        }
Ejemplo n.º 8
0
    def _make_parallel_tree(self, root, candidates):
        """
        Return a mapper to parallelize the :class:`Iteration`s within /root/.
        """
        parallel = self._pragma_for(root, candidates)

        # Introduce the `omp for` pragma
        mapper = OrderedDict()
        if root.is_ParallelAtomic:
            # Introduce the `omp atomic` pragmas
            exprs = FindNodes(Expression).visit(root)
            subs = {i: List(header=self.lang['atomic'], body=i)
                    for i in exprs if i.is_increment}
            handle = Transformer(subs).visit(root)
            mapper[root] = handle._rebuild(pragmas=root.pragmas + (parallel,))
        else:
            mapper[root] = root._rebuild(pragmas=root.pragmas + (parallel,))

        return mapper
Ejemplo n.º 9
0
    def process(self, iet):
        def key(s):
            # The SyncOps are to be processed in the following order
            return [WaitLock, WithLock, Delete, FetchWait,
                    FetchWaitPrefetch].index(s)

        callbacks = {
            WaitLock: self._make_waitlock,
            WithLock: self._make_withlock,
            FetchWait: self._make_fetchwait,
            FetchWaitPrefetch: self._make_fetchwaitprefetch,
            Delete: self._make_delete
        }

        sync_spots = FindNodes(SyncSpot).visit(iet)

        if not sync_spots:
            return iet, {}

        pieces = namedtuple('Pieces', 'init finalize funcs threads')([], [],
                                                                     [], [])

        subs = {}
        for n in sync_spots:
            mapper = as_mapper(n.sync_ops, lambda i: type(i))
            for _type in sorted(mapper, key=key):
                subs[n] = callbacks[_type](subs.get(n, n), mapper[_type],
                                           pieces, iet)

        iet = Transformer(subs).visit(iet)

        # Add initialization and finalization code
        init = List(body=pieces.init, footer=c.Line())
        finalize = List(header=c.Line(), body=pieces.finalize)
        iet = iet._rebuild(body=(init, ) + iet.body + (finalize, ))

        return iet, {
            'efuncs': pieces.funcs,
            'includes': ['pthread.h'],
            'args': [i.size for i in pieces.threads if not is_integer(i.size)]
        }
Ejemplo n.º 10
0
    def _process(self, func):
        """Apply ``func`` to all tracked ``IETs``."""

        for i in self._call_graph.topological_sort():
            self._efuncs[i], metadata = func(self._efuncs[i])

            # Track any new Dimensions and includes introduced by `func`
            self._dimensions.extend(list(metadata.get('dimensions', [])))
            self._includes.extend(list(metadata.get('includes', [])))

            # If there's a change to the `input` and the `iet` is an efunc, then
            # we must update the call sites as well, as the arguments dropped down
            # to the efunc have just increased
            _input = as_tuple(metadata.get('input'))
            if _input:
                # `extif` avoids redundant updates to the parameters list, due
                # to multiple children wanting to add the same input argument
                extif = lambda v: list(v) + [e for e in _input if e not in v]
                stack = [i] + self._call_graph.all_downstreams(i)
                for n in stack:
                    efunc = self._efuncs[n]
                    calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack]
                    mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls}
                    efunc = Transformer(mapper).visit(efunc)
                    if efunc.is_Callable:
                        efunc = efunc._rebuild(parameters=extif(efunc.parameters))
                    self._efuncs[n] = efunc
                self._input.extend(list(_input))

            for k, v in metadata.get('efuncs', {}).items():
                # Update the efuncs
                if k.is_Callable:
                    self._efuncs[k.name] = k
                # Update the call graph
                self._call_graph.add_node(k.name, ignore_existing=True)
                for target in (v or [None]):
                    self._call_graph.add_edge(k.name, target or 'main', force_add=True)
Ejemplo n.º 11
0
    def apply(self, func, **kwargs):
        """
        Apply ``func`` to all nodes in the Graph. This changes the state of the Graph.
        """
        dag = self._create_call_graph()

        # Apply `func`
        for i in dag.topological_sort():
            self.efuncs[i], metadata = func(self.efuncs[i], **kwargs)

            # Track any new Dimensions introduced by `func`
            self.dimensions.extend(list(metadata.get('dimensions', [])))

            # Track any new #include and #define required by `func`
            self.includes.extend(list(metadata.get('includes', [])))
            self.includes = filter_ordered(self.includes)
            self.headers.extend(list(metadata.get('headers', [])))
            self.headers = filter_ordered(self.headers, key=str)

            # Tracky any new external function
            self.ffuncs.extend(list(metadata.get('ffuncs', [])))
            self.ffuncs = filter_ordered(self.ffuncs)

            # Track any new ElementalFunctions
            self.efuncs.update(
                OrderedDict([(i.name, i) for i in metadata.get('efuncs', [])]))

            # If there's a change to the `args` and the `iet` is an efunc, then
            # we must update the call sites as well, as the arguments dropped down
            # to the efunc have just increased
            args = as_tuple(metadata.get('args'))
            if not args:
                continue

            def filter_args(v, efunc=None):
                processed = list(v)
                for _a in args:
                    try:
                        # Should the arg actually be dropped?
                        a, drop = _a
                        if drop:
                            if a in processed:
                                processed.remove(a)
                            continue
                    except (TypeError, ValueError, IndexException):
                        a = _a

                    if a in processed:
                        # A child efunc trying to add a symbol alredy added by a
                        # sibling efunc
                        continue

                    if efunc is self.root and not (a.is_Input or a.is_Object):
                        # Temporaries (ie, Symbol, Arrays) *cannot* be args in `root`
                        continue

                    processed.append(a)

                return processed

            stack = [i] + dag.all_downstreams(i)
            for n in stack:
                efunc = self.efuncs[n]

                mapper = {}
                for c in FindNodes(Call).visit(efunc):
                    if c.name not in stack:
                        continue
                    mapper[c] = c._rebuild(arguments=filter_args(c.arguments))

                parameters = filter_args(efunc.parameters, efunc)
                efunc = Transformer(mapper).visit(efunc)
                efunc = efunc._rebuild(parameters=parameters)

                self.efuncs[n] = efunc

        # Apply `func` to the external functions
        for i in range(len(self.ffuncs)):
            self.ffuncs[i], _ = func(self.ffuncs[i], **kwargs)
Ejemplo n.º 12
0
def make_yask_kernels(iet, **kwargs):
    yk_solns = kwargs.pop('yk_solns')

    mapper = {}
    for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
        dimensions = tuple(filter_ordered(i.dim.root for i in flatten(trees)))

        # Retrieve the section dtype
        exprs = FindNodes(Expression).visit(section)
        dtypes = {e.dtype for e in exprs}
        if len(dtypes) != 1:
            log("Unable to offload in presence of mixed-precision arithmetic")
            continue
        dtype = dtypes.pop()

        context = contexts.fetch(dimensions, dtype)

        # A unique name for the 'real' compiler and kernel solutions
        name = namespace['jit-soln'](Signer._digest(configuration,
                                                    *[i.root for i in trees]))

        # Create a YASK compiler solution for this Operator
        yc_soln = context.make_yc_solution(name)

        try:
            # Generate YASK vars and populate `yc_soln` with equations
            local_vars = yaskit(trees, yc_soln)

            # Build the new IET nodes
            yk_soln_obj = YASKSolnObject(namespace['code-soln-name'](n))
            funcall = make_sharedptr_funcall(namespace['code-soln-run'],
                                             ['time'], yk_soln_obj)
            funcall = Offloaded(funcall, dtype)
            mapper[trees[0].root] = funcall
            mapper.update({i.root: mapper.get(i.root)
                           for i in trees})  # Drop trees

            # JIT-compile the newly-created YASK kernel
            yk_soln = context.make_yk_solution(name, yc_soln, local_vars)
            yk_solns[(dimensions, yk_soln_obj)] = yk_soln

            # Print some useful information about the newly constructed solution
            log("Solution '%s' contains %d var(s) and %d equation(s)." %
                (yc_soln.get_name(), yc_soln.get_num_vars(),
                 yc_soln.get_num_equations()))
        except NotImplementedError as e:
            log("Unable to offload a candidate tree. Reason: [%s]" % str(e))
    iet = Transformer(mapper).visit(iet)

    if not yk_solns:
        log("No offloadable trees found")

    # Some Iteration/Expression trees are not offloaded to YASK and may
    # require further processing to be executed through YASK, due to the
    # different storage layout
    yk_var_objs = {
        i.name: YASKVarObject(i.name)
        for i in FindSymbols().visit(iet) if i.from_YASK
    }
    yk_var_objs.update({i: YASKVarObject(i) for i in get_local_vars(yk_solns)})
    iet = make_var_accesses(iet, yk_var_objs)

    # The signature needs to be updated
    # TODO: this could be done automagically through the iet pass engine, but
    # currently it only supports *appending* to the parameters list. While here
    # we actually need to change it as some parameters may disappear (x_m, x_M, ...)
    parameters = derive_parameters(iet, True)
    iet = iet._rebuild(parameters=parameters)

    return iet, {}
Ejemplo n.º 13
0
    def _specialize_iet(cls, iet, **kwargs):
        """
        Transform the Iteration/Expression tree to offload the computation of
        one or more loop nests onto YASK. This involves calling the YASK compiler
        to generate YASK code. Such YASK code is then called from within the
        transformed Iteration/Expression tree.
        """
        mapper = {}
        yk_solns = kwargs.pop('yk_solns')
        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            dimensions = tuple(
                filter_ordered(i.dim.root for i in flatten(trees)))

            # Retrieve the section dtype
            exprs = FindNodes(Expression).visit(section)
            dtypes = {e.dtype for e in exprs}
            if len(dtypes) != 1:
                log("Unable to offload in presence of mixed-precision arithmetic"
                    )
                continue
            dtype = dtypes.pop()

            context = contexts.fetch(dimensions, dtype)

            # A unique name for the 'real' compiler and kernel solutions
            name = namespace['jit-soln'](Signer._digest(
                configuration, *[i.root for i in trees]))

            # Create a YASK compiler solution for this Operator
            yc_soln = context.make_yc_solution(name)

            try:
                # Generate YASK vars and populate `yc_soln` with equations
                local_vars = yaskit(trees, yc_soln)

                # Build the new IET nodes
                yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n))
                funcall = make_sharedptr_funcall(namespace['code-soln-run'],
                                                 ['time'], yk_soln_obj)
                funcall = Offloaded(funcall, dtype)
                mapper[trees[0].root] = funcall
                mapper.update({i.root: mapper.get(i.root)
                               for i in trees})  # Drop trees

                # JIT-compile the newly-created YASK kernel
                yk_soln = context.make_yk_solution(name, yc_soln, local_vars)
                yk_solns[(dimensions, yk_soln_obj)] = yk_soln

                # Print some useful information about the newly constructed solution
                log("Solution '%s' contains %d var(s) and %d equation(s)." %
                    (yc_soln.get_name(), yc_soln.get_num_vars(),
                     yc_soln.get_num_equations()))
            except NotImplementedError as e:
                log("Unable to offload a candidate tree. Reason: [%s]" %
                    str(e))
        iet = Transformer(mapper).visit(iet)

        if not yk_solns:
            log("No offloadable trees found")

        # Some Iteration/Expression trees are not offloaded to YASK and may
        # require further processing to be executed through YASK, due to the
        # different storage layout
        yk_var_objs = {
            i.name: YaskVarObject(i.name)
            for i in FindSymbols().visit(iet) if i.from_YASK
        }
        yk_var_objs.update(
            {i: YaskVarObject(i)
             for i in cls._get_local_vars(yk_solns)})
        iet = make_var_accesses(iet, yk_var_objs)

        # The signature needs to be updated
        parameters = derive_parameters(iet, True)
        iet = iet._rebuild(parameters=parameters)

        return super(OperatorYASK, cls)._specialize_iet(iet, **kwargs)