def run(self): if (build.get_nvcc_path() is not None or build.get_hipcc_path() is not None): def wrap_new_compiler(func): def _wrap_new_compiler(*args, **kwargs): try: return func(*args, **kwargs) except errors.DistutilsPlatformError: if not PLATFORM_WIN32: CCompiler = _UnixCCompiler else: CCompiler = _MSVCCompiler return CCompiler(None, kwargs['dry_run'], kwargs['force']) return _wrap_new_compiler ccompiler.new_compiler = wrap_new_compiler(ccompiler.new_compiler) # Intentionally causes DistutilsPlatformError in # ccompiler.new_compiler() function to hook. self.compiler = 'nvidia' ctx = cupy_builder.get_context() ext_modules = get_ext_modules(True, ctx) # get .pyx modules cythonize(ext_modules, ctx) check_extensions(self.extensions) build_ext.build_ext.run(self)
def build_extensions(self) -> None: num_jobs = int(os.environ.get('CUPY_NUM_BUILD_JOBS', '4')) if num_jobs > 1: self.parallel = num_jobs if hasattr(self.compiler, 'initialize'): # Workarounds a bug in setuptools/distutils on Windows by # initializing the compiler before starting a thread. # By default, MSVCCompiler performs initialization in the # first compilation. However, in parallel compilation mode, # the init code runs in each thread and messes up the internal # state as the init code is not locked and is not idempotent. # https://github.com/pypa/setuptools/blob/v60.0.0/setuptools/_distutils/_msvccompiler.py#L322-L327 self.compiler.initialize() # Compile "*.pyx" files into "*.cpp" files. print('Cythonizing...') cythonize(self.extensions, cupy_builder.get_context()) # Change an extension in each source filenames from "*.pyx" to "*.cpp". # c.f. `Cython.Distutils.old_build_ext` for ext in self.extensions: sources_pyx, sources_others = filter_files_by_extension( ext.sources, '.pyx') sources_cpp = [ '{}.cpp'.format(os.path.splitext(src)[0]) for src in sources_pyx ] ext.sources = sources_cpp + sources_others for src in ext.sources: if not os.path.isfile(src): raise RuntimeError(f'Fatal error: missing file: {src}') super().build_extensions()
def build_extension(self, ext: setuptools.Extension) -> None: ctx = cupy_builder.get_context() # Compile "*.cu" files into object files. sources_cpp, extra_objects = compile_device_code(ctx, ext) # Remove device code from list of sources, and instead add compiled # object files to link. ext.sources = sources_cpp # type: ignore[attr-defined] ext.extra_objects += extra_objects # type: ignore[attr-defined] # Let setuptools do the rest of the build process, i.e., compile # "*.cpp" files and link object files generated from "*.cu". super().build_extension(ext)
def cythonize(extensions, ctx: Context): # Delay importing Cython as it may be installed via setup_requires if # the user does not have Cython installed. import Cython import Cython.Build cython_version = pkg_resources.parse_version(Cython.__version__) directives = { 'linetrace': ctx.linetrace, 'profile': ctx.profile, # Embed signatures for Sphinx documentation. 'embedsignature': True, } cythonize_options = {'annotate': ctx.annotate} # Compile-time constants to be used in Cython code compile_time_env = cythonize_options.get('compile_time_env') if compile_time_env is None: compile_time_env = {} cythonize_options['compile_time_env'] = compile_time_env # Enable CUDA Python. # TODO: add `cuda` to `setup_requires` only when this flag is set use_cuda_python = cupy_builder.get_context().use_cuda_python compile_time_env['CUPY_USE_CUDA_PYTHON'] = use_cuda_python if use_cuda_python: print('Using CUDA Python') compile_time_env['CUPY_CUFFT_STATIC'] = False compile_time_env['CUPY_CYTHON_VERSION'] = str(cython_version) if ctx.use_stub: # on RTD compile_time_env['CUPY_CUDA_VERSION'] = 0 compile_time_env['CUPY_HIP_VERSION'] = 0 elif use_hip: # on ROCm/HIP compile_time_env['CUPY_CUDA_VERSION'] = 0 compile_time_env['CUPY_HIP_VERSION'] = build.get_hip_version() else: # on CUDA compile_time_env['CUPY_CUDA_VERSION'] = build.get_cuda_version() compile_time_env['CUPY_HIP_VERSION'] = 0 return Cython.Build.cythonize(extensions, verbose=True, language_level=3, compiler_directives=directives, **cythonize_options)
def get_compiler_setting(use_hip): cuda_path = None rocm_path = None if use_hip: rocm_path = get_rocm_path() else: cuda_path = get_cuda_path() include_dirs = [] library_dirs = [] define_macros = [] extra_compile_args = [] if cuda_path: include_dirs.append(os.path.join(cuda_path, 'include')) if PLATFORM_WIN32: library_dirs.append(os.path.join(cuda_path, 'bin')) library_dirs.append(os.path.join(cuda_path, 'lib', 'x64')) else: library_dirs.append(os.path.join(cuda_path, 'lib64')) library_dirs.append(os.path.join(cuda_path, 'lib')) if rocm_path: include_dirs.append(os.path.join(rocm_path, 'include')) include_dirs.append(os.path.join(rocm_path, 'include', 'hip')) include_dirs.append(os.path.join(rocm_path, 'include', 'rocrand')) include_dirs.append(os.path.join(rocm_path, 'include', 'hiprand')) include_dirs.append(os.path.join(rocm_path, 'include', 'roctracer')) library_dirs.append(os.path.join(rocm_path, 'lib')) if use_hip: extra_compile_args.append('-std=c++11') if PLATFORM_WIN32: nvtoolsext_path = os.environ.get('NVTOOLSEXT_PATH', '') if os.path.exists(nvtoolsext_path): include_dirs.append(os.path.join(nvtoolsext_path, 'include')) library_dirs.append(os.path.join(nvtoolsext_path, 'lib', 'x64')) else: define_macros.append(('CUPY_NO_NVTX', '1')) # For CUB, we need the complex and CUB headers. The search precedence for # the latter is: # 1. built-in CUB (for CUDA 11+ and ROCm) # 2. CuPy's CUB bundle # Note that starting CuPy v8 we no longer use CUB_PATH # for <cupy/complex.cuh> cupy_header = os.path.join( cupy_builder.get_context().source_root, 'cupy/_core/include') global _jitify_path _jitify_path = os.path.join(cupy_header, 'cupy/jitify') if cuda_path: cuda_cub_path = os.path.join(cuda_path, 'include', 'cub') if not os.path.exists(cuda_cub_path): cuda_cub_path = None elif rocm_path: cuda_cub_path = os.path.join(rocm_path, 'include', 'hipcub') if not os.path.exists(cuda_cub_path): cuda_cub_path = None else: cuda_cub_path = None global _cub_path if cuda_cub_path: _cub_path = cuda_cub_path elif not use_hip: # CuPy's bundle doesn't work for ROCm _cub_path = os.path.join(cupy_header, 'cupy', 'cub') else: raise Exception('Please install hipCUB and retry') include_dirs.insert(0, _cub_path) include_dirs.insert(1, cupy_header) return { 'include_dirs': include_dirs, 'library_dirs': library_dirs, 'define_macros': define_macros, 'language': 'c++', 'extra_compile_args': extra_compile_args, }
def make_extensions(ctx: Context, compiler, use_cython): """Produce a list of Extension instances which passed to cythonize().""" MODULES = cupy_builder.get_modules(cupy_builder.get_context()) no_cuda = ctx.use_stub use_hip = not no_cuda and ctx.use_hip settings = build.get_compiler_setting(use_hip) include_dirs = settings['include_dirs'] settings['include_dirs'] = [x for x in include_dirs if os.path.exists(x)] settings['library_dirs'] = [ x for x in settings['library_dirs'] if os.path.exists(x) ] # Adjust rpath to use CUDA libraries in `cupy/.data/lib/*.so`) from CuPy. use_wheel_libs_rpath = (0 < len(ctx.wheel_libs) and not PLATFORM_WIN32) # In the environment with CUDA 7.5 on Ubuntu 16.04, gcc5.3 does not # automatically deal with memcpy because string.h header file has # been changed. This is a workaround for that environment. # See details in the below discussions: # https://github.com/BVLC/caffe/issues/4046 # https://groups.google.com/forum/#!topic/theano-users/3ihQYiTRG4E settings['define_macros'].append(('_FORCE_INLINES', '1')) if ctx.linetrace: settings['define_macros'].append(('CYTHON_TRACE', '1')) settings['define_macros'].append(('CYTHON_TRACE_NOGIL', '1')) if no_cuda: settings['define_macros'].append(('CUPY_NO_CUDA', '1')) if use_hip: settings['define_macros'].append(('CUPY_USE_HIP', '1')) # introduced since ROCm 4.2.0 settings['define_macros'].append(('__HIP_PLATFORM_AMD__', '1')) # deprecated since ROCm 4.2.0 settings['define_macros'].append(('__HIP_PLATFORM_HCC__', '1')) available_modules = [] if no_cuda: available_modules = [m['name'] for m in MODULES] else: available_modules, settings = preconfigure_modules( MODULES, compiler, settings) required_modules = get_required_modules(MODULES) if not (set(required_modules) <= set(available_modules)): raise Exception('Your CUDA environment is invalid. ' 'Please check above error log.') ret = [] for module in MODULES: if module['name'] not in available_modules: continue s = copy.deepcopy(settings) if not no_cuda: s['libraries'] = module['libraries'] compile_args = s.setdefault('extra_compile_args', []) link_args = s.setdefault('extra_link_args', []) if module['name'] == 'cusolver': # cupy_backends/cupy_lapack.h has C++ template code compile_args.append('--std=c++11') # openmp is required for cusolver if use_hip: pass elif compiler.compiler_type == 'unix': compile_args.append('-fopenmp') link_args.append('-fopenmp') elif compiler.compiler_type == 'msvc': compile_args.append('/openmp') if module['name'] == 'random': if compiler.compiler_type == 'msvc': compile_args.append('-D_USE_MATH_DEFINES') if module['name'] == 'jitify': # this fixes RTD (no_cuda) builds... compile_args.append('--std=c++11') # if any change is made to the Jitify header, we force recompiling s['depends'] = ['./cupy/_core/include/cupy/jitify/jitify.hpp'] if module['name'] == 'dlpack': # if any change is made to the DLPack header, we force recompiling s['depends'] = ['./cupy/_core/include/cupy/dlpack/dlpack.h'] for f in module['file']: s_file = copy.deepcopy(s) name = module_extension_name(f) if name.endswith('fft._callback') and not PLATFORM_LINUX: continue rpath = [] if not ctx.no_rpath: # Add library directories (e.g., `/usr/local/cuda/lib64`) to # RPATH. rpath += s_file['library_dirs'] if use_wheel_libs_rpath: # Add `cupy/.data/lib` (where shared libraries included in # wheels reside) to RPATH. # The path is resolved relative to the module, e.g., use # `$ORIGIN/../cupy/.data/lib` for `cupy/cudnn.so` and # `$ORIGIN/../../../cupy/.data/lib` for # `cupy_backends/cuda/libs/cudnn.so`. depth = name.count('.') rpath.append('{}{}/cupy/.data/lib'.format( _rpath_base(), '/..' * depth)) if not PLATFORM_WIN32 and not PLATFORM_LINUX: assert False, "macOS is no longer supported" if (PLATFORM_LINUX and len(rpath) != 0): ldflag = '-Wl,' if PLATFORM_LINUX: ldflag += '--disable-new-dtags,' ldflag += ','.join('-rpath,' + p for p in rpath) args = s_file.setdefault('extra_link_args', []) args.append(ldflag) sources = module_extension_sources(f, use_cython, no_cuda) extension = setuptools.Extension(name, sources, **s_file) ret.append(extension) return ret