Beispiel #1
0
def get_usable_inames_for_conditional(kernel, sched_index):
    from loopy.schedule import (
        find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within)
    from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase,
                                   VectorizeTag,
                                   IlpBaseTag)

    result = find_active_inames_at(kernel, sched_index)
    crosses_barrier = has_barrier_within(kernel, sched_index)

    # Find our containing subkernel. Grab inames for all insns from there.
    within_subkernel = False

    for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index]):
        from loopy.schedule import CallKernel, ReturnFromKernel
        if isinstance(sched_item, CallKernel):
            within_subkernel = True
            subkernel_index = sched_item_index
        elif isinstance(sched_item, ReturnFromKernel):
            within_subkernel = False

    if not within_subkernel:
        # Outside all subkernels - use only inames available to host.
        return frozenset(result)

    insn_ids_for_subkernel = get_insn_ids_for_block_at(
        kernel.schedule, subkernel_index)

    inames_for_subkernel = (
        iname
        for insn in insn_ids_for_subkernel
        for iname in kernel.insn_inames(insn))

    for iname in inames_for_subkernel:
        # Parallel inames are defined within a subkernel, BUT:
        #
        # - local indices may not be used in conditionals that cross barriers.
        #
        # - ILP indices and vector lane indices are not available in loop
        #   bounds, they only get defined at the innermost level of nesting.

        if (
                kernel.iname_tags_of_type(iname, ConcurrentTag)
                and not kernel.iname_tags_of_type(iname, VectorizeTag)
                and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase)
                    and crosses_barrier)
                and not kernel.iname_tags_of_type(iname, IlpBaseTag)
        ):
            result.add(iname)

    return frozenset(result)
Beispiel #2
0
def get_usable_inames_for_conditional(kernel, sched_index):
    from loopy.schedule import (
        find_active_inames_at, get_insn_ids_for_block_at, has_barrier_within)
    from loopy.kernel.data import (ConcurrentTag, LocalIndexTagBase,
                                   IlpBaseTag)

    result = find_active_inames_at(kernel, sched_index)
    crosses_barrier = has_barrier_within(kernel, sched_index)

    # Find our containing subkernel. Grab inames for all insns from there.
    within_subkernel = False

    for sched_item_index, sched_item in enumerate(kernel.schedule[:sched_index+1]):
        from loopy.schedule import CallKernel, ReturnFromKernel
        if isinstance(sched_item, CallKernel):
            within_subkernel = True
            subkernel_index = sched_item_index
        elif isinstance(sched_item, ReturnFromKernel):
            within_subkernel = False

    if not within_subkernel:
        # Outside all subkernels - use only inames available to host.
        return frozenset(result)

    insn_ids_for_subkernel = get_insn_ids_for_block_at(
        kernel.schedule, subkernel_index)

    inames_for_subkernel = (
        iname
        for insn in insn_ids_for_subkernel
        for iname in kernel.insn_inames(insn))

    for iname in inames_for_subkernel:
        # Parallel inames are defined within a subkernel, BUT:
        #
        # - local indices may not be used in conditionals that cross barriers.
        #
        # - ILP indices are not available in loop bounds, they only get defined
        #   at the innermost level of nesting.

        if (
                kernel.iname_tags_of_type(iname, ConcurrentTag)
                and not (kernel.iname_tags_of_type(iname, LocalIndexTagBase)
                    and crosses_barrier)
                and not kernel.iname_tags_of_type(iname, IlpBaseTag)
        ):
            result.add(iname)

    return frozenset(result)
Beispiel #3
0
def get_admissible_conditional_inames_for(kernel, sched_index):
    """This function disallows conditionals on local-idx tagged
    inames if there is a barrier nested somewhere within.
    """

    from loopy.kernel.data import LocalIndexTag, HardwareParallelTag

    from loopy.schedule import find_active_inames_at, has_barrier_within
    result = find_active_inames_at(kernel, sched_index)

    has_barrier = has_barrier_within(kernel, sched_index)

    for iname, tag in six.iteritems(kernel.iname_to_tag):
        if isinstance(tag, HardwareParallelTag):
            if not has_barrier or not isinstance(tag, LocalIndexTag):
                result.add(iname)

    return frozenset(result)
Beispiel #4
0
def get_admissible_conditional_inames_for(codegen_state, sched_index):
    """This function disallows conditionals on local-idx tagged
    inames if there is a barrier nested somewhere within.
    """

    kernel = codegen_state.kernel

    from loopy.kernel.data import LocalIndexTag, HardwareParallelTag

    from loopy.schedule import find_active_inames_at, has_barrier_within
    result = find_active_inames_at(kernel, sched_index)

    has_barrier = has_barrier_within(kernel, sched_index)

    for iname, tag in six.iteritems(kernel.iname_to_tag):
        if (isinstance(tag, HardwareParallelTag)
                and codegen_state.is_generating_device_code):
            if not has_barrier or not isinstance(tag, LocalIndexTag):
                result.add(iname)

    return frozenset(result)
Beispiel #5
0
def get_admissible_conditional_inames_for(codegen_state, sched_index):
    """This function disallows conditionals on local-idx tagged
    inames if there is a barrier nested somewhere within.
    """

    kernel = codegen_state.kernel

    from loopy.kernel.data import (LocalIndexTag, HardwareConcurrentTag,
                                   filter_iname_tags_by_type)

    from loopy.schedule import find_active_inames_at, has_barrier_within
    result = find_active_inames_at(kernel, sched_index)

    has_barrier = has_barrier_within(kernel, sched_index)

    for iname, tags in six.iteritems(kernel.iname_to_tags):
        if (filter_iname_tags_by_type(tags, HardwareConcurrentTag)
                and codegen_state.is_generating_device_code):
            if not has_barrier or not filter_iname_tags_by_type(tags, LocalIndexTag):
                result.add(iname)

    return frozenset(result)
Beispiel #6
0
def get_usable_inames_for_conditional(kernel, sched_index):
    from loopy.schedule import (find_active_inames_at,
                                get_insn_ids_for_block_at, has_barrier_within)
    from loopy.kernel.data import ParallelTag, LocalIndexTagBase, IlpBaseTag

    result = find_active_inames_at(kernel, sched_index)
    crosses_barrier = has_barrier_within(kernel, sched_index)

    # Find our containing subkernel, grab inames for all insns from there.

    subkernel_index = sched_index
    from loopy.schedule import CallKernel

    while not isinstance(kernel.schedule[subkernel_index], CallKernel):
        subkernel_index -= 1

    insn_ids_for_subkernel = get_insn_ids_for_block_at(kernel.schedule,
                                                       subkernel_index)

    inames_for_subkernel = (iname for insn in insn_ids_for_subkernel
                            for iname in kernel.insn_inames(insn))

    for iname in inames_for_subkernel:
        tag = kernel.iname_to_tag.get(iname)

        # Parallel inames are defined within a subkernel, BUT:
        #
        # - local indices may not be used in conditionals that cross barriers.
        #
        # - ILP indices are not available in loop bounds, they only get defined
        #   at the innermost level of nesting.

        if (isinstance(tag, ParallelTag) and
                not (isinstance(tag, LocalIndexTagBase) and crosses_barrier)
                and not isinstance(tag, IlpBaseTag)):
            result.add(iname)

    return frozenset(result)