Beispiel #1
0
def _nvcc_gencode_options(cuda_version: int) -> List[str]:
    """Returns NVCC GPU code generation options."""

    if sys.argv == ['setup.py', 'develop']:
        return []

    envcfg = os.getenv('CUPY_NVCC_GENERATE_CODE', None)
    if envcfg is not None and envcfg != 'current':
        return [
            '--generate-code={}'.format(arch) for arch in envcfg.split(';')
            if len(arch) > 0
        ]
    if envcfg == 'current' and build.get_compute_capabilities() is not None:
        ccs = build.get_compute_capabilities()
        arch_list = [
            f'compute_{cc}' if cc < 60 else (f'compute_{cc}', f'sm_{cc}')
            for cc in ccs
        ]
    else:
        # The arch_list specifies virtual architectures, such as 'compute_61',
        # and real architectures, such as 'sm_61', for which the CUDA
        # input files are to be compiled.
        #
        # The syntax of an entry of the list is
        #
        #     entry ::= virtual_arch | (virtual_arch, real_arch)
        #
        # where virtual_arch is a string which means a virtual architecture and
        # real_arch is a string which means a real architecture.
        #
        # If a virtual architecture is supplied, NVCC generates a PTX code
        # the virtual architecture. If a pair of a virtual architecture and a
        # real architecture is supplied, NVCC generates a PTX code for the
        # virtual architecture as well as a cubin code for the real one.
        #
        # For example, making NVCC generate a PTX code for 'compute_60' virtual
        # architecture, the arch_list has an entry of 'compute_60'.
        #
        #     arch_list = ['compute_60']
        #
        # For another, making NVCC generate a PTX code for 'compute_61' virtual
        # architecture and a cubin code for 'sm_61' real architecture, the
        # arch_list has an entry of ('compute_61', 'sm_61').
        #
        #     arch_list = [('compute_61', 'sm_61')]
        #
        # See the documentation of each CUDA version for the list of supported
        # architectures:
        #
        #   https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation

        if cuda_version >= 11040:
            arch_list = [
                'compute_35', 'compute_50', ('compute_60', 'sm_60'),
                ('compute_61', 'sm_61'), ('compute_70', 'sm_70'),
                ('compute_75', 'sm_75'), ('compute_80', 'sm_80'),
                ('compute_86', 'sm_86'), ('compute_87', 'sm_87'), 'compute_87'
            ]
        elif cuda_version >= 11010:
            arch_list = [
                'compute_35', 'compute_50', ('compute_60', 'sm_60'),
                ('compute_61', 'sm_61'), ('compute_70', 'sm_70'),
                ('compute_75', 'sm_75'), ('compute_80', 'sm_80'),
                ('compute_86', 'sm_86'), 'compute_86'
            ]
        elif cuda_version >= 11000:
            arch_list = [
                'compute_35', 'compute_50', ('compute_60', 'sm_60'),
                ('compute_61', 'sm_61'), ('compute_70', 'sm_70'),
                ('compute_75', 'sm_75'), ('compute_80', 'sm_80'), 'compute_80'
            ]
        elif cuda_version >= 10000:
            arch_list = [
                'compute_30', 'compute_50', ('compute_60', 'sm_60'),
                ('compute_61', 'sm_61'), ('compute_70', 'sm_70'),
                ('compute_75', 'sm_75'), 'compute_70'
            ]
        else:
            # This should not happen.
            assert False

    options = []
    for arch in arch_list:
        if type(arch) is tuple:
            virtual_arch, real_arch = arch
            options.append('--generate-code=arch={},code={}'.format(
                virtual_arch, real_arch))
        else:
            options.append('--generate-code=arch={},code={}'.format(
                arch, arch))

    return options
Beispiel #2
0
def _nvcc_gencode_options(cuda_version: int) -> List[str]:
    """Returns NVCC GPU code generation options."""

    if sys.argv == ['setup.py', 'develop']:
        return []

    envcfg = os.getenv('CUPY_NVCC_GENERATE_CODE', None)
    if envcfg is not None and envcfg != 'current':
        return [
            '--generate-code={}'.format(arch) for arch in envcfg.split(';')
            if len(arch) > 0
        ]
    if envcfg == 'current' and build.get_compute_capabilities() is not None:
        ccs = build.get_compute_capabilities()
        arch_list = [
            f'compute_{cc}' if cc < 60 else (f'compute_{cc}', f'sm_{cc}')
            for cc in ccs
        ]
    else:
        # The arch_list specifies virtual architectures, such as 'compute_61',
        # and real architectures, such as 'sm_61', for which the CUDA
        # input files are to be compiled.
        #
        # The syntax of an entry of the list is
        #
        #     entry ::= virtual_arch | (virtual_arch, real_arch)
        #
        # where virtual_arch is a string which means a virtual architecture and
        # real_arch is a string which means a real architecture.
        #
        # If a virtual architecture is supplied, NVCC generates a PTX code
        # the virtual architecture. If a pair of a virtual architecture and a
        # real architecture is supplied, NVCC generates a PTX code for the
        # virtual architecture as well as a cubin code for the real one.
        #
        # For example, making NVCC generate a PTX code for 'compute_60' virtual
        # architecture, the arch_list has an entry of 'compute_60'.
        #
        #     arch_list = ['compute_60']
        #
        # For another, making NVCC generate a PTX code for 'compute_61' virtual
        # architecture and a cubin code for 'sm_61' real architecture, the
        # arch_list has an entry of ('compute_61', 'sm_61').
        #
        #     arch_list = [('compute_61', 'sm_61')]
        #
        # See the documentation of each CUDA version for the list of supported
        # architectures:
        #
        #   https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#options-for-steering-gpu-code-generation

        aarch64 = (platform.machine() == 'aarch64')
        if cuda_version >= 11040:
            # To utilize CUDA Minor Version Compatibility (`cupy-cuda11x`),
            # CUBIN must be generated for all supported compute capabilities
            # instead of PTX:
            # https://docs.nvidia.com/deploy/cuda-compatibility/index.html#application-considerations
            arch_list = [('compute_35', 'sm_35'), ('compute_37', 'sm_37'),
                         ('compute_50', 'sm_50'), ('compute_52', 'sm_52'),
                         ('compute_60', 'sm_60'), ('compute_61', 'sm_61'),
                         ('compute_70', 'sm_70'), ('compute_75', 'sm_75'),
                         ('compute_80', 'sm_80'), ('compute_86', 'sm_86'),
                         'compute_86']
            if aarch64:
                # Jetson TX1/TX2 are excluded as they don't support JetPack 5
                # (CUDA 11.4).
                arch_list += [
                    # ('compute_53', 'sm_53'),  # Jetson (TX1 / Nano)
                    # ('compute_62', 'sm_62'),  # Jetson (TX2)
                    ('compute_72', 'sm_72'),  # Jetson (Xavier)
                    ('compute_87', 'sm_87'),  # Jetson (Orin)
                ]
        elif cuda_version >= 11010:
            arch_list = [
                'compute_35', 'compute_50', ('compute_60', 'sm_60'),
                ('compute_61', 'sm_61'), ('compute_70', 'sm_70'),
                ('compute_75', 'sm_75'), ('compute_80', 'sm_80'),
                ('compute_86', 'sm_86'), 'compute_86'
            ]
        elif cuda_version >= 11000:
            arch_list = [
                'compute_35', 'compute_50', ('compute_60', 'sm_60'),
                ('compute_61', 'sm_61'), ('compute_70', 'sm_70'),
                ('compute_75', 'sm_75'), ('compute_80', 'sm_80'), 'compute_80'
            ]
        elif cuda_version >= 10000:
            arch_list = [
                'compute_30', 'compute_50', ('compute_60', 'sm_60'),
                ('compute_61', 'sm_61'), ('compute_70', 'sm_70'),
                ('compute_75', 'sm_75'), 'compute_70'
            ]
        else:
            # This should not happen.
            assert False

    options = []
    for arch in arch_list:
        if type(arch) is tuple:
            virtual_arch, real_arch = arch
            options.append('--generate-code=arch={},code={}'.format(
                virtual_arch, real_arch))
        else:
            options.append('--generate-code=arch={},code={}'.format(
                arch, arch))

    return options