Ejemplo n.º 1
0
    def run(self):
        if (build.get_nvcc_path() is not None
                or build.get_hipcc_path() is not None):

            def wrap_new_compiler(func):
                def _wrap_new_compiler(*args, **kwargs):
                    try:
                        return func(*args, **kwargs)
                    except errors.DistutilsPlatformError:
                        if not PLATFORM_WIN32:
                            CCompiler = _UnixCCompiler
                        else:
                            CCompiler = _MSVCCompiler
                        return CCompiler(None, kwargs['dry_run'],
                                         kwargs['force'])

                return _wrap_new_compiler

            ccompiler.new_compiler = wrap_new_compiler(ccompiler.new_compiler)
            # Intentionally causes DistutilsPlatformError in
            # ccompiler.new_compiler() function to hook.
            self.compiler = 'nvidia'
        ctx = cupy_builder.get_context()
        ext_modules = get_ext_modules(True, ctx)  # get .pyx modules
        cythonize(ext_modules, ctx)
        check_extensions(self.extensions)
        build_ext.build_ext.run(self)
Ejemplo n.º 2
0
    def build_extensions(self) -> None:
        num_jobs = int(os.environ.get('CUPY_NUM_BUILD_JOBS', '4'))
        if num_jobs > 1:
            self.parallel = num_jobs
            if hasattr(self.compiler, 'initialize'):
                # Workarounds a bug in setuptools/distutils on Windows by
                # initializing the compiler before starting a thread.
                # By default, MSVCCompiler performs initialization in the
                # first compilation. However, in parallel compilation mode,
                # the init code runs in each thread and messes up the internal
                # state as the init code is not locked and is not idempotent.
                # https://github.com/pypa/setuptools/blob/v60.0.0/setuptools/_distutils/_msvccompiler.py#L322-L327
                self.compiler.initialize()

        # Compile "*.pyx" files into "*.cpp" files.
        print('Cythonizing...')
        cythonize(self.extensions, cupy_builder.get_context())

        # Change an extension in each source filenames from "*.pyx" to "*.cpp".
        # c.f. `Cython.Distutils.old_build_ext`
        for ext in self.extensions:
            sources_pyx, sources_others = filter_files_by_extension(
                ext.sources, '.pyx')
            sources_cpp = [
                '{}.cpp'.format(os.path.splitext(src)[0])
                for src in sources_pyx
            ]
            ext.sources = sources_cpp + sources_others
            for src in ext.sources:
                if not os.path.isfile(src):
                    raise RuntimeError(f'Fatal error: missing file: {src}')
        super().build_extensions()
Ejemplo n.º 3
0
    def build_extension(self, ext: setuptools.Extension) -> None:
        ctx = cupy_builder.get_context()

        # Compile "*.cu" files into object files.
        sources_cpp, extra_objects = compile_device_code(ctx, ext)

        # Remove device code from list of sources, and instead add compiled
        # object files to link.
        ext.sources = sources_cpp  # type: ignore[attr-defined]
        ext.extra_objects += extra_objects  # type: ignore[attr-defined]

        # Let setuptools do the rest of the build process, i.e., compile
        # "*.cpp" files and link object files generated from "*.cu".
        super().build_extension(ext)
Ejemplo n.º 4
0
def cythonize(extensions, ctx: Context):
    # Delay importing Cython as it may be installed via setup_requires if
    # the user does not have Cython installed.
    import Cython
    import Cython.Build
    cython_version = pkg_resources.parse_version(Cython.__version__)

    directives = {
        'linetrace': ctx.linetrace,
        'profile': ctx.profile,
        # Embed signatures for Sphinx documentation.
        'embedsignature': True,
    }

    cythonize_options = {'annotate': ctx.annotate}

    # Compile-time constants to be used in Cython code
    compile_time_env = cythonize_options.get('compile_time_env')
    if compile_time_env is None:
        compile_time_env = {}
        cythonize_options['compile_time_env'] = compile_time_env

    # Enable CUDA Python.
    # TODO: add `cuda` to `setup_requires` only when this flag is set
    use_cuda_python = cupy_builder.get_context().use_cuda_python
    compile_time_env['CUPY_USE_CUDA_PYTHON'] = use_cuda_python
    if use_cuda_python:
        print('Using CUDA Python')

    compile_time_env['CUPY_CUFFT_STATIC'] = False
    compile_time_env['CUPY_CYTHON_VERSION'] = str(cython_version)
    if ctx.use_stub:  # on RTD
        compile_time_env['CUPY_CUDA_VERSION'] = 0
        compile_time_env['CUPY_HIP_VERSION'] = 0
    elif use_hip:  # on ROCm/HIP
        compile_time_env['CUPY_CUDA_VERSION'] = 0
        compile_time_env['CUPY_HIP_VERSION'] = build.get_hip_version()
    else:  # on CUDA
        compile_time_env['CUPY_CUDA_VERSION'] = build.get_cuda_version()
        compile_time_env['CUPY_HIP_VERSION'] = 0

    return Cython.Build.cythonize(extensions,
                                  verbose=True,
                                  language_level=3,
                                  compiler_directives=directives,
                                  **cythonize_options)
Ejemplo n.º 5
0
def get_compiler_setting(use_hip):
    cuda_path = None
    rocm_path = None

    if use_hip:
        rocm_path = get_rocm_path()
    else:
        cuda_path = get_cuda_path()

    include_dirs = []
    library_dirs = []
    define_macros = []
    extra_compile_args = []

    if cuda_path:
        include_dirs.append(os.path.join(cuda_path, 'include'))
        if PLATFORM_WIN32:
            library_dirs.append(os.path.join(cuda_path, 'bin'))
            library_dirs.append(os.path.join(cuda_path, 'lib', 'x64'))
        else:
            library_dirs.append(os.path.join(cuda_path, 'lib64'))
            library_dirs.append(os.path.join(cuda_path, 'lib'))

    if rocm_path:
        include_dirs.append(os.path.join(rocm_path, 'include'))
        include_dirs.append(os.path.join(rocm_path, 'include', 'hip'))
        include_dirs.append(os.path.join(rocm_path, 'include', 'rocrand'))
        include_dirs.append(os.path.join(rocm_path, 'include', 'hiprand'))
        include_dirs.append(os.path.join(rocm_path, 'include', 'roctracer'))
        library_dirs.append(os.path.join(rocm_path, 'lib'))

    if use_hip:
        extra_compile_args.append('-std=c++11')

    if PLATFORM_WIN32:
        nvtoolsext_path = os.environ.get('NVTOOLSEXT_PATH', '')
        if os.path.exists(nvtoolsext_path):
            include_dirs.append(os.path.join(nvtoolsext_path, 'include'))
            library_dirs.append(os.path.join(nvtoolsext_path, 'lib', 'x64'))
        else:
            define_macros.append(('CUPY_NO_NVTX', '1'))

    # For CUB, we need the complex and CUB headers. The search precedence for
    # the latter is:
    #   1. built-in CUB (for CUDA 11+ and ROCm)
    #   2. CuPy's CUB bundle
    # Note that starting CuPy v8 we no longer use CUB_PATH

    # for <cupy/complex.cuh>
    cupy_header = os.path.join(
        cupy_builder.get_context().source_root, 'cupy/_core/include')
    global _jitify_path
    _jitify_path = os.path.join(cupy_header, 'cupy/jitify')
    if cuda_path:
        cuda_cub_path = os.path.join(cuda_path, 'include', 'cub')
        if not os.path.exists(cuda_cub_path):
            cuda_cub_path = None
    elif rocm_path:
        cuda_cub_path = os.path.join(rocm_path, 'include', 'hipcub')
        if not os.path.exists(cuda_cub_path):
            cuda_cub_path = None
    else:
        cuda_cub_path = None
    global _cub_path
    if cuda_cub_path:
        _cub_path = cuda_cub_path
    elif not use_hip:  # CuPy's bundle doesn't work for ROCm
        _cub_path = os.path.join(cupy_header, 'cupy', 'cub')
    else:
        raise Exception('Please install hipCUB and retry')
    include_dirs.insert(0, _cub_path)
    include_dirs.insert(1, cupy_header)

    return {
        'include_dirs': include_dirs,
        'library_dirs': library_dirs,
        'define_macros': define_macros,
        'language': 'c++',
        'extra_compile_args': extra_compile_args,
    }
Ejemplo n.º 6
0
def make_extensions(ctx: Context, compiler, use_cython):
    """Produce a list of Extension instances which passed to cythonize()."""

    MODULES = cupy_builder.get_modules(cupy_builder.get_context())

    no_cuda = ctx.use_stub
    use_hip = not no_cuda and ctx.use_hip
    settings = build.get_compiler_setting(use_hip)

    include_dirs = settings['include_dirs']

    settings['include_dirs'] = [x for x in include_dirs if os.path.exists(x)]
    settings['library_dirs'] = [
        x for x in settings['library_dirs'] if os.path.exists(x)
    ]

    # Adjust rpath to use CUDA libraries in `cupy/.data/lib/*.so`) from CuPy.
    use_wheel_libs_rpath = (0 < len(ctx.wheel_libs) and not PLATFORM_WIN32)

    # In the environment with CUDA 7.5 on Ubuntu 16.04, gcc5.3 does not
    # automatically deal with memcpy because string.h header file has
    # been changed. This is a workaround for that environment.
    # See details in the below discussions:
    # https://github.com/BVLC/caffe/issues/4046
    # https://groups.google.com/forum/#!topic/theano-users/3ihQYiTRG4E
    settings['define_macros'].append(('_FORCE_INLINES', '1'))

    if ctx.linetrace:
        settings['define_macros'].append(('CYTHON_TRACE', '1'))
        settings['define_macros'].append(('CYTHON_TRACE_NOGIL', '1'))
    if no_cuda:
        settings['define_macros'].append(('CUPY_NO_CUDA', '1'))
    if use_hip:
        settings['define_macros'].append(('CUPY_USE_HIP', '1'))
        # introduced since ROCm 4.2.0
        settings['define_macros'].append(('__HIP_PLATFORM_AMD__', '1'))
        # deprecated since ROCm 4.2.0
        settings['define_macros'].append(('__HIP_PLATFORM_HCC__', '1'))

    available_modules = []
    if no_cuda:
        available_modules = [m['name'] for m in MODULES]
    else:
        available_modules, settings = preconfigure_modules(
            MODULES, compiler, settings)
        required_modules = get_required_modules(MODULES)
        if not (set(required_modules) <= set(available_modules)):
            raise Exception('Your CUDA environment is invalid. '
                            'Please check above error log.')

    ret = []
    for module in MODULES:
        if module['name'] not in available_modules:
            continue

        s = copy.deepcopy(settings)
        if not no_cuda:
            s['libraries'] = module['libraries']

        compile_args = s.setdefault('extra_compile_args', [])
        link_args = s.setdefault('extra_link_args', [])

        if module['name'] == 'cusolver':
            # cupy_backends/cupy_lapack.h has C++ template code
            compile_args.append('--std=c++11')
            # openmp is required for cusolver
            if use_hip:
                pass
            elif compiler.compiler_type == 'unix':
                compile_args.append('-fopenmp')
                link_args.append('-fopenmp')
            elif compiler.compiler_type == 'msvc':
                compile_args.append('/openmp')

        if module['name'] == 'random':
            if compiler.compiler_type == 'msvc':
                compile_args.append('-D_USE_MATH_DEFINES')

        if module['name'] == 'jitify':
            # this fixes RTD (no_cuda) builds...
            compile_args.append('--std=c++11')
            # if any change is made to the Jitify header, we force recompiling
            s['depends'] = ['./cupy/_core/include/cupy/jitify/jitify.hpp']

        if module['name'] == 'dlpack':
            # if any change is made to the DLPack header, we force recompiling
            s['depends'] = ['./cupy/_core/include/cupy/dlpack/dlpack.h']

        for f in module['file']:
            s_file = copy.deepcopy(s)
            name = module_extension_name(f)

            if name.endswith('fft._callback') and not PLATFORM_LINUX:
                continue

            rpath = []
            if not ctx.no_rpath:
                # Add library directories (e.g., `/usr/local/cuda/lib64`) to
                # RPATH.
                rpath += s_file['library_dirs']

            if use_wheel_libs_rpath:
                # Add `cupy/.data/lib` (where shared libraries included in
                # wheels reside) to RPATH.
                # The path is resolved relative to the module, e.g., use
                # `$ORIGIN/../cupy/.data/lib` for `cupy/cudnn.so` and
                # `$ORIGIN/../../../cupy/.data/lib` for
                # `cupy_backends/cuda/libs/cudnn.so`.
                depth = name.count('.')
                rpath.append('{}{}/cupy/.data/lib'.format(
                    _rpath_base(), '/..' * depth))

            if not PLATFORM_WIN32 and not PLATFORM_LINUX:
                assert False, "macOS is no longer supported"
            if (PLATFORM_LINUX and len(rpath) != 0):
                ldflag = '-Wl,'
                if PLATFORM_LINUX:
                    ldflag += '--disable-new-dtags,'
                ldflag += ','.join('-rpath,' + p for p in rpath)
                args = s_file.setdefault('extra_link_args', [])
                args.append(ldflag)

            sources = module_extension_sources(f, use_cython, no_cuda)
            extension = setuptools.Extension(name, sources, **s_file)
            ret.append(extension)

    return ret