예제 #1
0
    def map_product(self, expr, derivatives):
        from grudge.symbolic.tools import is_scalar
        from pytools import partition
        scalars, nonscalars = partition(is_scalar, expr.children)

        if len(nonscalars) != 1:
            return DerivativeJoiner()(expr)
        else:
            from pymbolic import flattened_product
            factor = flattened_product(scalars)
            nonscalar, = nonscalars

            sub_derivatives = {}
            nonscalar = self.rec(nonscalar, sub_derivatives)

            def do_map(expr):
                if is_scalar(expr):
                    return expr
                else:
                    return self.rec(expr, derivatives)

            for operator, operands in sub_derivatives.items():
                for operand in operands:
                    derivatives.setdefault(operator,
                                           []).append(factor * operand)

            return factor * nonscalar
예제 #2
0
    def __init__(self, vec_expr_info_list, result_dtype_getter):
        self.result_dtype_getter = result_dtype_getter

        from hedge.optemplate.primitives import ScalarParameter
        from hedge.optemplate.mappers import (DependencyMapper,
                                              GeometricFactorCollector)

        from operator import or_
        from pymbolic import var

        dep_mapper = DependencyMapper(include_subscripts=True,
                                      include_lookups=True,
                                      include_calls="descend_args")
        gfc = GeometricFactorCollector()

        # gather all dependencies

        deps = (reduce(or_,
                       (dep_mapper(vei.expr) for vei in vec_expr_info_list))
                | reduce(or_, (gfc(vei.expr) for vei in vec_expr_info_list)))

        # We're compiling a batch of vector expressions, some of which may
        # depend on results generated in this same batch. These dependencies
        # are also captured above, but they're not genuine external dependencies.
        # Hence we remove them here:

        deps -= set(var(vei.name) for vei in vec_expr_info_list)

        from pytools import partition

        def is_vector_pred(dep):
            return not isinstance(dep, ScalarParameter)

        vdeps, sdeps = partition(is_vector_pred, deps)

        vdeps = [(str(vdep), vdep) for vdep in vdeps]
        sdeps = [(str(sdep), sdep) for sdep in sdeps]
        vdeps.sort()
        sdeps.sort()
        self.vector_deps = [vdep for key, vdep in vdeps]
        self.scalar_deps = [sdep for key, sdep in sdeps]

        self.vector_dep_names = [
            "hedge_v%d" % i for i in range(len(self.vector_deps))
        ]
        self.scalar_dep_names = [
            "hedge_s%d" % i for i in range(len(self.scalar_deps))
        ]

        self.constant_dtypes = [
            numpy.array(const).dtype for vei in vec_expr_info_list
            for const in ConstantGatherMapper()(vei.expr)
        ]

        var_i = var("i")
        subst_map = dict(
            list(
                zip(self.vector_deps,
                    [var(vecname)[var_i]
                     for vecname in self.vector_dep_names])) +
            list(
                zip(self.scalar_deps,
                    [var(scaname) for scaname in self.scalar_dep_names])) +
            [(var(vei.name), var(vei.name)[var_i])
             for vei in vec_expr_info_list if not vei.do_not_return])

        def subst_func(expr):
            try:
                return subst_map[expr]
            except KeyError:
                return None

        self.vec_expr_info_list = [
            vei.copy(expr=DefaultingSubstitutionMapper(subst_func)(vei.expr))
            for vei in vec_expr_info_list
        ]

        self.result_vec_expr_info_list = [
            vei for vei in vec_expr_info_list if not vei.do_not_return
        ]
예제 #3
0
def aggregate_assignments(inf_mapper, instructions, result,
        max_vectors_in_batch_expr):
    from pymbolic.primitives import Variable

    function_registry = inf_mapper.function_registry

    # {{{ aggregation helpers

    def get_complete_origins_set(insn, skip_levels=0):
        try:
            return insn_to_origins_cache[insn]
        except KeyError:
            pass

        if skip_levels < 0:
            skip_levels = 0

        result = set()
        for dep in insn.get_dependencies():
            if isinstance(dep, Variable):
                dep_origin = origins_map.get(dep.name, None)
                if dep_origin is not None:
                    if skip_levels <= 0:
                        result.add(dep_origin)
                    result |= get_complete_origins_set(
                            dep_origin, skip_levels-1)

        insn_to_origins_cache[insn] = result

        return result

    var_assignees_cache = {}

    def get_var_assignees(insn):
        try:
            return var_assignees_cache[insn]
        except KeyError:
            result = {Variable(assignee) for assignee in insn.get_assignees()}
            var_assignees_cache[insn] = result
            return result

    def aggregate_two_assignments(ass_1, ass_2):
        names = ass_1.names + ass_2.names

        from pymbolic.primitives import Variable
        deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \
                - {Variable(name) for name in names}

        return Assign(
                names=names, exprs=ass_1.exprs + ass_2.exprs,
                _dependencies=deps,
                priority=max(ass_1.priority, ass_2.priority))

    # }}}

    # {{{ main aggregation pass

    insn_to_origins_cache = {}

    origins_map = {
                assignee: insn
                for insn in instructions
                for assignee in insn.get_assignees()}

    from pytools import partition
    from grudge.symbolic.primitives import DTAG_SCALAR

    unprocessed_assigns, other_insns = partition(
            lambda insn: (
                isinstance(insn, Assign)
                and not isinstance(insn, ToDiscretizationScopedAssign)
                and not isinstance(insn, FromDiscretizationScopedAssign)
                and not is_external_call(insn.exprs[0], function_registry)
                and not any(
                    inf_mapper.infer_for_name(n).domain_tag == DTAG_SCALAR
                    for n in insn.names)),
            instructions)

    # filter out zero-flop-count assigns--no need to bother with those
    processed_assigns, unprocessed_assigns = partition(
            lambda ass: ass.flop_count() == 0,
            unprocessed_assigns)

    # filter out zero assignments
    from grudge.tools import is_zero

    i = 0

    while i < len(unprocessed_assigns):
        my_assign = unprocessed_assigns[i]
        if any(is_zero(expr) for expr in my_assign.exprs):
            processed_assigns.append(unprocessed_assigns.pop(i))
        else:
            i += 1

    # greedy aggregation
    while unprocessed_assigns:
        my_assign = unprocessed_assigns.pop()

        my_deps = my_assign.get_dependencies()
        my_assignees = get_var_assignees(my_assign)

        agg_candidates = []
        for i, other_assign in enumerate(unprocessed_assigns):
            other_deps = other_assign.get_dependencies()
            other_assignees = get_var_assignees(other_assign)

            if ((my_deps & other_deps
                    or my_deps & other_assignees
                    or other_deps & my_assignees)
                    and my_assign.priority == other_assign.priority):
                agg_candidates.append((i, other_assign))

        did_work = False

        if agg_candidates:
            my_indirect_origins = get_complete_origins_set(
                    my_assign, skip_levels=1)

            for other_assign_index, other_assign in agg_candidates:
                if max_vectors_in_batch_expr is not None:
                    new_assignee_count = len(
                            set(my_assign.get_assignees())
                            | set(other_assign.get_assignees()))
                    new_dep_count = len(
                            my_assign.get_dependencies(
                                each_vector=True)
                            | other_assign.get_dependencies(
                                each_vector=True))

                    if (new_assignee_count + new_dep_count
                            > max_vectors_in_batch_expr):
                        continue

                other_indirect_origins = get_complete_origins_set(
                        other_assign, skip_levels=1)

                if (my_assign not in other_indirect_origins
                        and other_assign not in my_indirect_origins):
                    did_work = True

                    # aggregate the two assignments
                    new_assignment = aggregate_two_assignments(
                            my_assign, other_assign)
                    del unprocessed_assigns[other_assign_index]
                    unprocessed_assigns.append(new_assignment)
                    for assignee in new_assignment.get_assignees():
                        origins_map[assignee] = new_assignment

                    break

        if not did_work:
            processed_assigns.append(my_assign)

    externally_used_names = {
            expr
            for insn in processed_assigns + other_insns
            for expr in insn.get_dependencies()}

    if isinstance(result, np.ndarray) and result.dtype.char == "O":
        externally_used_names |= {expr for expr in result}
    else:
        externally_used_names |= {result}

    def schedule_and_finalize_assignment(ass):
        dep_mapper = _make_dep_mapper(include_subscripts=False)

        names_exprs = list(zip(ass.names, ass.exprs))

        my_assignees = {name for name, expr in names_exprs}
        names_exprs_deps = [
                (name, expr,
                    {dep.name for dep in dep_mapper(expr) if
                        isinstance(dep, Variable)} & my_assignees)
                for name, expr in names_exprs]

        ordered_names_exprs = []
        available_names = set()

        while names_exprs_deps:
            schedulable = []

            i = 0
            while i < len(names_exprs_deps):
                name, expr, deps = names_exprs_deps[i]

                unsatisfied_deps = deps - available_names

                if not unsatisfied_deps:
                    schedulable.append((str(expr), name, expr))
                    del names_exprs_deps[i]
                else:
                    i += 1

            # make sure these come out in a constant order
            schedulable.sort()

            if schedulable:
                for key, name, expr in schedulable:
                    ordered_names_exprs.append((name, expr))
                    available_names.add(name)
            else:
                raise RuntimeError("aggregation resulted in an "
                        "impossible assignment")

        return Assign(
                names=[name for name, expr in ordered_names_exprs],
                exprs=[expr for name, expr in ordered_names_exprs],
                do_not_return=[Variable(name) not in externally_used_names
                    for name, expr in ordered_names_exprs],
                priority=ass.priority)

    return [schedule_and_finalize_assignment(ass)
        for ass in processed_assigns] + other_insns
예제 #4
0
    def __init__(self, vec_expr_info_list, result_dtype_getter):
        self.result_dtype_getter = result_dtype_getter

        from hedge.optemplate.primitives import ScalarParameter
        from hedge.optemplate.mappers import (
                DependencyMapper, GeometricFactorCollector)

        from operator import or_
        from pymbolic import var

        dep_mapper = DependencyMapper(
                include_subscripts=True,
                include_lookups=True,
                include_calls="descend_args")
        gfc = GeometricFactorCollector()

        # gather all dependencies

        deps = (reduce(or_, (dep_mapper(vei.expr) for vei in vec_expr_info_list))
                | reduce(or_, (gfc(vei.expr) for vei in vec_expr_info_list)))

        # We're compiling a batch of vector expressions, some of which may
        # depend on results generated in this same batch. These dependencies
        # are also captured above, but they're not genuine external dependencies.
        # Hence we remove them here:

        deps -= set(var(vei.name) for vei in vec_expr_info_list)

        from pytools import partition

        def is_vector_pred(dep):
            return not isinstance(dep, ScalarParameter)

        vdeps, sdeps  = partition(is_vector_pred, deps)

        vdeps = [(str(vdep), vdep) for vdep in vdeps]
        sdeps = [(str(sdep), sdep) for sdep in sdeps]
        vdeps.sort()
        sdeps.sort()
        self.vector_deps = [vdep for key, vdep in vdeps]
        self.scalar_deps = [sdep for key, sdep in sdeps]

        self.vector_dep_names = ["hedge_v%d" % i for i in range(len(self.vector_deps))]
        self.scalar_dep_names = ["hedge_s%d" % i for i in range(len(self.scalar_deps))]

        self.constant_dtypes = [
                numpy.array(const).dtype
                for vei in vec_expr_info_list
                for const in ConstantGatherMapper()(vei.expr)]

        var_i = var("i")
        subst_map = dict(
                list(zip(self.vector_deps, [var(vecname).index(var_i)
                    for vecname in self.vector_dep_names]))
                +list(zip(self.scalar_deps,
                    [var(scaname) for scaname in self.scalar_dep_names]))
                +[(var(vei.name), var(vei.name).index(var_i))
                    for vei in vec_expr_info_list
                    if not vei.do_not_return])

        def subst_func(expr):
            try:
                return subst_map[expr]
            except KeyError:
                return None

        self.vec_expr_info_list = [
                vei.copy(expr=DefaultingSubstitutionMapper(subst_func)(vei.expr))
                for vei in vec_expr_info_list]

        self.result_vec_expr_info_list = [
                vei for vei in vec_expr_info_list if not vei.do_not_return]
예제 #5
0
파일: compiler.py 프로젝트: felipeh/hedge
    def aggregate_assignments(self, instructions, result):
        from pymbolic.primitives import Variable

        # aggregation helpers -------------------------------------------------
        def get_complete_origins_set(insn, skip_levels=0):
            if skip_levels < 0:
                skip_levels = 0

            result = set()
            for dep in insn.get_dependencies():
                if isinstance(dep, Variable):
                    dep_origin = origins_map.get(dep.name, None)
                    if dep_origin is not None:
                        if skip_levels <= 0:
                            result.add(dep_origin)
                        result |= get_complete_origins_set(
                                dep_origin, skip_levels-1)

            return result

        var_assignees_cache = {}
        def get_var_assignees(insn):
            try:
                return var_assignees_cache[insn]
            except KeyError:
                result = set(Variable(assignee)
                        for assignee in insn.get_assignees())
                var_assignees_cache[insn] = result
                return result

        def aggregate_two_assignments(ass_1, ass_2):
            names = ass_1.names + ass_2.names

            from pymbolic.primitives import Variable
            deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \
                    - set(Variable(name) for name in names)

            return Assign(
                    names=names, exprs=ass_1.exprs + ass_2.exprs,
                    _dependencies=deps,
                    dep_mapper_factory=self.dep_mapper_factory,
                    priority=max(ass_1.priority, ass_2.priority))

        # main aggregation pass -----------------------------------------------
        origins_map = dict(
                    (assignee, insn)
                    for insn in instructions
                    for assignee in insn.get_assignees())

        from pytools import partition
        unprocessed_assigns, other_insns = partition(
                lambda insn: isinstance(insn, Assign),
                instructions)

        # filter out zero-flop-count assigns--no need to bother with those
        processed_assigns, unprocessed_assigns = partition(
                lambda ass: ass.flop_count() == 0,
                unprocessed_assigns)

        # filter out zero assignments
        from pytools import any
        from hedge.tools import is_zero

        i = 0

        while i < len(unprocessed_assigns):
            my_assign = unprocessed_assigns[i]
            if any(is_zero(expr) for expr in my_assign.exprs):
                processed_assigns.append(unprocessed_assigns.pop())
            else:
                i += 1

        # greedy aggregation
        while unprocessed_assigns:
            my_assign = unprocessed_assigns.pop()

            my_deps = my_assign.get_dependencies()
            my_assignees = get_var_assignees(my_assign)

            agg_candidates = []
            for i, other_assign in enumerate(unprocessed_assigns):
                other_deps = other_assign.get_dependencies()
                other_assignees = get_var_assignees(other_assign)

                if ((my_deps & other_deps
                        or my_deps & other_assignees
                        or other_deps & my_assignees)
                        and my_assign.priority == other_assign.priority):
                    agg_candidates.append((i, other_assign))

            did_work = False

            if agg_candidates:
                my_indirect_origins = get_complete_origins_set(
                        my_assign, skip_levels=1)

                for other_assign_index, other_assign in agg_candidates:
                    if self.max_vectors_in_batch_expr is not None:
                        new_assignee_count = len(
                                set(my_assign.get_assignees())
                                | set(other_assign.get_assignees()))
                        new_dep_count = len(
                                my_assign.get_dependencies(
                                    each_vector=True)
                                | other_assign.get_dependencies(
                                    each_vector=True))

                        if (new_assignee_count + new_dep_count \
                                > self.max_vectors_in_batch_expr):
                            continue

                    other_indirect_origins = get_complete_origins_set(
                            other_assign, skip_levels=1)

                    if (my_assign not in other_indirect_origins and
                            other_assign not in my_indirect_origins):
                        did_work = True

                        # aggregate the two assignments
                        new_assignment = aggregate_two_assignments(
                                my_assign, other_assign)
                        del unprocessed_assigns[other_assign_index]
                        unprocessed_assigns.append(new_assignment)
                        for assignee in new_assignment.get_assignees():
                            origins_map[assignee] = new_assignment

                        break

            if not did_work:
                processed_assigns.append(my_assign)

        externally_used_names = set(
                expr
                for insn in processed_assigns + other_insns
                for expr in insn.get_dependencies())

        from hedge.tools import is_obj_array
        if is_obj_array(result):
            externally_used_names |= set(expr for expr in result)
        else:
            externally_used_names |= set([result])

        def schedule_and_finalize_assignment(ass):
            dep_mapper = self.dep_mapper_factory()

            names_exprs = zip(ass.names, ass.exprs)

            my_assignees = set(name for name, expr in names_exprs)
            names_exprs_deps = [
                    (name, expr,
                        set(dep.name for dep in dep_mapper(expr) if
                            isinstance(dep, Variable)) & my_assignees)
                    for name, expr in names_exprs]

            ordered_names_exprs = []
            available_names = set()

            while names_exprs_deps:
                schedulable = []

                i = 0
                while i < len(names_exprs_deps):
                    name, expr, deps = names_exprs_deps[i]

                    unsatisfied_deps = deps - available_names

                    if not unsatisfied_deps:
                        schedulable.append((str(expr), name, expr))
                        del names_exprs_deps[i]
                    else:
                        i += 1

                # make sure these come out in a constant order
                schedulable.sort()

                if schedulable:
                    for key, name, expr in schedulable:
                        ordered_names_exprs.append((name, expr))
                        available_names.add(name)
                else:
                    raise RuntimeError("aggregation resulted in an "
                            "impossible assignment")

            return self.finalize_multi_assign(
                    names=[name for name, expr in ordered_names_exprs],
                    exprs=[expr for name, expr in ordered_names_exprs],
                    do_not_return=[Variable(name) not in externally_used_names
                        for name, expr in ordered_names_exprs],
                    priority=ass.priority)

        return [schedule_and_finalize_assignment(ass)
            for ass in processed_assigns] + other_insns
예제 #6
0
    def aggregate_assignments(self, instructions, result):
        from pymbolic.primitives import Variable

        # {{{ aggregation helpers

        def get_complete_origins_set(insn, skip_levels=0):
            if skip_levels < 0:
                skip_levels = 0

            result = set()
            for dep in insn.get_dependencies():
                if isinstance(dep, Variable):
                    dep_origin = origins_map.get(dep.name, None)
                    if dep_origin is not None:
                        if skip_levels <= 0:
                            result.add(dep_origin)
                        result |= get_complete_origins_set(
                            dep_origin, skip_levels - 1)

            return result

        var_assignees_cache = {}

        def get_var_assignees(insn):
            try:
                return var_assignees_cache[insn]
            except KeyError:
                result = set(
                    Variable(assignee) for assignee in insn.get_assignees())
                var_assignees_cache[insn] = result
                return result

        def aggregate_two_assignments(ass_1, ass_2):
            names = ass_1.names + ass_2.names

            from pymbolic.primitives import Variable
            deps = (ass_1.get_dependencies() | ass_2.get_dependencies()) \
                    - set(Variable(name) for name in names)

            return Assign(names=names,
                          exprs=ass_1.exprs + ass_2.exprs,
                          _dependencies=deps,
                          dep_mapper_factory=self.dep_mapper_factory,
                          priority=max(ass_1.priority, ass_2.priority))

        # }}}

        # {{{ main aggregation pass

        origins_map = dict((assignee, insn) for insn in instructions
                           for assignee in insn.get_assignees())

        from pytools import partition
        unprocessed_assigns, other_insns = partition(
            lambda insn: isinstance(insn, Assign) and not insn.
            is_scalar_valued, instructions)

        # filter out zero-flop-count assigns--no need to bother with those
        processed_assigns, unprocessed_assigns = partition(
            lambda ass: ass.flop_count() == 0, unprocessed_assigns)

        # filter out zero assignments
        from pytools import any
        from hedge.tools import is_zero

        i = 0

        while i < len(unprocessed_assigns):
            my_assign = unprocessed_assigns[i]
            if any(is_zero(expr) for expr in my_assign.exprs):
                processed_assigns.append(unprocessed_assigns.pop())
            else:
                i += 1

        # greedy aggregation
        while unprocessed_assigns:
            my_assign = unprocessed_assigns.pop()

            my_deps = my_assign.get_dependencies()
            my_assignees = get_var_assignees(my_assign)

            agg_candidates = []
            for i, other_assign in enumerate(unprocessed_assigns):
                other_deps = other_assign.get_dependencies()
                other_assignees = get_var_assignees(other_assign)

                if ((my_deps & other_deps or my_deps & other_assignees
                     or other_deps & my_assignees)
                        and my_assign.priority == other_assign.priority):
                    agg_candidates.append((i, other_assign))

            did_work = False

            if agg_candidates:
                my_indirect_origins = get_complete_origins_set(my_assign,
                                                               skip_levels=1)

                for other_assign_index, other_assign in agg_candidates:
                    if self.max_vectors_in_batch_expr is not None:
                        new_assignee_count = len(
                            set(my_assign.get_assignees())
                            | set(other_assign.get_assignees()))
                        new_dep_count = len(
                            my_assign.get_dependencies(each_vector=True)
                            | other_assign.get_dependencies(each_vector=True))

                        if (new_assignee_count + new_dep_count >
                                self.max_vectors_in_batch_expr):
                            continue

                    other_indirect_origins = get_complete_origins_set(
                        other_assign, skip_levels=1)

                    if (my_assign not in other_indirect_origins
                            and other_assign not in my_indirect_origins):
                        did_work = True

                        # aggregate the two assignments
                        new_assignment = aggregate_two_assignments(
                            my_assign, other_assign)
                        del unprocessed_assigns[other_assign_index]
                        unprocessed_assigns.append(new_assignment)
                        for assignee in new_assignment.get_assignees():
                            origins_map[assignee] = new_assignment

                        break

            if not did_work:
                processed_assigns.append(my_assign)

        externally_used_names = set(expr
                                    for insn in processed_assigns + other_insns
                                    for expr in insn.get_dependencies())

        from hedge.tools import is_obj_array
        if is_obj_array(result):
            externally_used_names |= set(expr for expr in result)
        else:
            externally_used_names |= set([result])

        def schedule_and_finalize_assignment(ass):
            dep_mapper = self.dep_mapper_factory()

            names_exprs = zip(ass.names, ass.exprs)

            my_assignees = set(name for name, expr in names_exprs)
            names_exprs_deps = [
                (name, expr,
                 set(dep.name
                     for dep in dep_mapper(expr) if isinstance(dep, Variable))
                 & my_assignees) for name, expr in names_exprs
            ]

            ordered_names_exprs = []
            available_names = set()

            while names_exprs_deps:
                schedulable = []

                i = 0
                while i < len(names_exprs_deps):
                    name, expr, deps = names_exprs_deps[i]

                    unsatisfied_deps = deps - available_names

                    if not unsatisfied_deps:
                        schedulable.append((str(expr), name, expr))
                        del names_exprs_deps[i]
                    else:
                        i += 1

                # make sure these come out in a constant order
                schedulable.sort()

                if schedulable:
                    for key, name, expr in schedulable:
                        ordered_names_exprs.append((name, expr))
                        available_names.add(name)
                else:
                    raise RuntimeError("aggregation resulted in an "
                                       "impossible assignment")

            return self.finalize_multi_assign(
                names=[name for name, expr in ordered_names_exprs],
                exprs=[expr for name, expr in ordered_names_exprs],
                do_not_return=[
                    Variable(name) not in externally_used_names
                    for name, expr in ordered_names_exprs
                ],
                priority=ass.priority)

        return [
            schedule_and_finalize_assignment(ass) for ass in processed_assigns
        ] + other_insns