def build_torch_extension(build_ext, options, torch_version): pytorch_compile_flags = [ "-std=c++14" if flag == "-std=c++11" else flag for flag in options['COMPILE_FLAGS'] ] have_cuda = is_torch_cuda(build_ext, include_dirs=options['INCLUDES'], extra_compile_args=pytorch_compile_flags) if not have_cuda and check_macro(options['MACROS'], 'HAVE_CUDA'): raise DistutilsPlatformError( 'byteps build with GPU support was requested, but this PyTorch ' 'installation does not support CUDA.') # Update HAVE_CUDA to mean that PyTorch supports CUDA. updated_macros = set_macro(options['MACROS'], 'HAVE_CUDA', str(int(have_cuda))) # Export TORCH_VERSION equal to our representation of torch.__version__. Internally it's # used for backwards compatibility checks. updated_macros = set_macro(updated_macros, 'TORCH_VERSION', str(torch_version)) # Always set _GLIBCXX_USE_CXX11_ABI, since PyTorch can only detect whether it was set to 1. import torch updated_macros = set_macro(updated_macros, '_GLIBCXX_USE_CXX11_ABI', str(int(torch.compiled_with_cxx11_abi()))) # PyTorch requires -DTORCH_API_INCLUDE_EXTENSION_H updated_macros = set_macro(updated_macros, 'TORCH_API_INCLUDE_EXTENSION_H', '1') if have_cuda: from torch.utils.cpp_extension import CUDAExtension as TorchExtension else: # CUDAExtension fails with `ld: library not found for -lcudart` if CUDA is not present from torch.utils.cpp_extension import CppExtension as TorchExtension ext = TorchExtension( pytorch_lib.name, define_macros=updated_macros, include_dirs=options['INCLUDES'], sources=options['SOURCES'] + [ 'byteps/torch/ops.cc', 'byteps/torch/ready_event.cc', 'byteps/torch/cuda_util.cc', 'byteps/torch/adapter.cc', 'byteps/torch/handle_manager.cc' ], extra_compile_args=pytorch_compile_flags, extra_link_args=options['LINK_FLAGS'], extra_objects=options['EXTRA_OBJECTS'], library_dirs=options['LIBRARY_DIRS'], libraries=options['LIBRARIES']) # Patch an existing pytorch_lib extension object. for k, v in ext.__dict__.items(): pytorch_lib.__dict__[k] = v build_ext.build_extension(pytorch_lib)
def build_torch_extension_v2(build_ext, options, torch_version): have_cuda = is_torch_cuda_v2(build_ext, include_dirs=options['INCLUDES'], extra_compile_args=options['COMPILE_FLAGS']) if not have_cuda and check_macro(options['MACROS'], 'HAVE_CUDA'): raise DistutilsPlatformError( 'Horovod build with GPU support was requested, but this PyTorch ' 'installation does not support CUDA.') # Update HAVE_CUDA to mean that PyTorch supports CUDA. Internally, we will be checking # HOROVOD_GPU_(ALLREDUCE|ALLGATHER|BROADCAST) to decide whether we should use GPU # version or transfer tensors to CPU memory for those operations. updated_macros = set_macro(options['MACROS'], 'HAVE_CUDA', str(int(have_cuda))) # Export TORCH_VERSION equal to our representation of torch.__version__. Internally it's # used for backwards compatibility checks. updated_macros = set_macro(updated_macros, 'TORCH_VERSION', str(torch_version)) # Always set _GLIBCXX_USE_CXX11_ABI, since PyTorch can only detect whether it was set to 1. import torch updated_macros = set_macro(updated_macros, '_GLIBCXX_USE_CXX11_ABI', str(int(torch.compiled_with_cxx11_abi()))) # PyTorch requires -DTORCH_API_INCLUDE_EXTENSION_H updated_macros = set_macro(updated_macros, 'TORCH_API_INCLUDE_EXTENSION_H', '1') if have_cuda: from torch.utils.cpp_extension import CUDAExtension as TorchExtension else: # CUDAExtension fails with `ld: library not found for -lcudart` if CUDA is not present from torch.utils.cpp_extension import CppExtension as TorchExtension ext = TorchExtension( torch_mpi_lib_v2.name, define_macros=updated_macros, include_dirs=options['INCLUDES'], sources=options['SOURCES'] + [ 'horovod/torch/mpi_ops_v2.cc', 'horovod/torch/handle_manager.cc', 'horovod/torch/ready_event.cc', 'horovod/torch/cuda_util.cc', 'horovod/torch/adapter_v2.cc' ], extra_compile_args=options['COMPILE_FLAGS'], extra_link_args=options['LINK_FLAGS'], library_dirs=options['LIBRARY_DIRS'], libraries=options['LIBRARIES']) # Patch an existing torch_mpi_lib_v2 extension object. for k, v in ext.__dict__.items(): torch_mpi_lib_v2.__dict__[k] = v build_ext.build_extension(torch_mpi_lib_v2)
def build_extensions(self): pre_setup.setup() make_option = "" # To resolve tf-gcc incompatibility has_cxx_flag = False glibcxx_flag = False if not int(os.environ.get('BYTEPS_WITHOUT_TENSORFLOW', 0)): try: import tensorflow as tf make_option += 'ADD_CFLAGS="' for flag in tf.sysconfig.get_compile_flags(): if 'D_GLIBCXX_USE_CXX11_ABI' in flag: has_cxx_flag = True glibcxx_flag = False if (flag[-1]=='0') else True make_option += flag + ' ' break make_option += '" ' except: pass # To resolve torch-gcc incompatibility if not int(os.environ.get('BYTEPS_WITHOUT_PYTORCH', 0)): try: import torch torch_flag = torch.compiled_with_cxx11_abi() if has_cxx_flag: if glibcxx_flag != torch_flag: raise DistutilsError( '-D_GLIBCXX_USE_CXX11_ABI is not consistent between TensorFlow and PyTorch, ' 'consider install them separately.') else: pass else: make_option += 'ADD_CFLAGS=-D_GLIBCXX_USE_CXX11_ABI=' + \ str(int(torch_flag)) + ' ' has_cxx_flag = True glibcxx_flag = torch_flag except: pass print("build_ucx is", build_ucx()) if build_ucx(): ucx_path = pre_setup.ucx_path.strip() if not ucx_path: ucx_path = "https://codeload.github.com/openucx/ucx/zip/824c9f03" print("ucx_path is", ucx_path) cmd = "sudo apt install -y build-essential libtool autoconf automake libnuma-dev unzip;" +\ "rm -rf ucx*;" +\ "curl " + ucx_path + " -o ucx.zip; " + \ "unzip -o ./ucx.zip -d tmp; " + \ "rm -rf ucx-build; mkdir -p ucx-build; mv tmp/ucx-*/* ucx-build/;" +\ "cd ucx-build; pwd; which libtoolize; " + \ "./autogen.sh; ./autogen.sh && ./contrib/configure-release --enable-mt && make -j && sudo make install -j" make_process = subprocess.Popen(cmd, cwd='3rdparty', stdout=sys.stdout, stderr=sys.stderr, shell=True) make_process.communicate() if make_process.returncode: raise DistutilsSetupError('An ERROR occured while running the ' 'Makefile for the ucx library. ' 'Exit code: {0}'.format(make_process.returncode)) if not os.path.exists("3rdparty/ps-lite/build/libps.a") or \ not os.path.exists("3rdparty/ps-lite/deps/lib"): if os.environ.get('CI', 'false') == 'false': make_option += "-j " if has_rdma_header(): make_option += "USE_RDMA=1 " if build_ucx(): make_option += 'USE_UCX=1 ' make_option += pre_setup.extra_make_option() make_process = subprocess.Popen('make ' + make_option, cwd='3rdparty/ps-lite', stdout=sys.stdout, stderr=sys.stderr, shell=True) make_process.communicate() if make_process.returncode: raise DistutilsSetupError('An ERROR occured while running the ' 'Makefile for the ps-lite library. ' 'Exit code: {0}'.format(make_process.returncode)) options = get_common_options(self) if has_cxx_flag: options['COMPILE_FLAGS'] += ['-D_GLIBCXX_USE_CXX11_ABI=' + str(int(glibcxx_flag))] built_plugins = [] try: build_server(self, options) except: raise DistutilsSetupError('An ERROR occured while building the server module.\n\n' '%s' % traceback.format_exc()) # If PyTorch is installed, it must be imported before others, otherwise # we may get an error: dlopen: cannot load any more object with static TLS if not int(os.environ.get('BYTEPS_WITHOUT_PYTORCH', 0)): dummy_import_torch() if not int(os.environ.get('BYTEPS_WITHOUT_TENSORFLOW', 0)): try: build_tf_extension(self, options) built_plugins.append(True) print('INFO: Tensorflow extension is built successfully.') except: if not int(os.environ.get('BYTEPS_WITH_TENSORFLOW', 0)): print('INFO: Unable to build TensorFlow plugin, will skip it.\n\n' '%s' % traceback.format_exc()) built_plugins.append(False) else: raise if not int(os.environ.get('BYTEPS_WITHOUT_PYTORCH', 0)): try: torch_version = check_torch_version() build_torch_extension(self, options, torch_version) built_plugins.append(True) print('INFO: PyTorch extension is built successfully.') except: if not int(os.environ.get('BYTEPS_WITH_PYTORCH', 0)): print('INFO: Unable to build PyTorch plugin, will skip it.\n\n' '%s' % traceback.format_exc()) built_plugins.append(False) else: raise if not int(os.environ.get('BYTEPS_WITHOUT_MXNET', 0)): # fix "libcuda.so.1 not found" issue cuda_home = os.environ.get('BYTEPS_CUDA_HOME', '/usr/local/cuda') cuda_stub_path = cuda_home + '/lib64/stubs' ln_command = "cd " + cuda_stub_path + "; ln -sf libcuda.so libcuda.so.1" os.system(ln_command) try: build_mx_extension(self, options) built_plugins.append(True) print('INFO: MXNet extension is built successfully.') except: if not int(os.environ.get('BYTEPS_WITH_MXNET', 0)): print('INFO: Unable to build MXNet plugin, will skip it.\n\n' '%s' % traceback.format_exc()) built_plugins.append(False) else: raise finally: os.system("rm -rf " + cuda_stub_path + "/libcuda.so.1") if not built_plugins: print('INFO: Only server module is built.') return if not any(built_plugins): raise DistutilsError( 'None of TensorFlow, MXNet, PyTorch plugins were built. See errors above.')
- haskell-torch-matio - matplotlib-haskell {2} extra-include-dirs: - {1}/include - {1}/include/TH - {1}/pytorch/ extra-lib-dirs: - {0}/lib/ - {1}/lib/ """ with_cxx11_abi = 0 if torch.compiled_with_cxx11_abi(): with_cxx11_abi = 1 print("Found installed PyTorch with built C++ bindings") with open("config.yaml", "w") as f: f.write( defaults_yaml.format((1 if withCuda else 0), os.environ['CONDA_PREFIX'], pytorch_root, with_cxx11_abi)) with open("stack.yaml", "w") as f: f.write( stack_yaml.format(os.environ['CONDA_PREFIX'], pytorch_root, ("- ihaskell" if withJupyter else ""))) if withCuda: print('CUDA is enabled')
def build_torch_extension(build_ext, global_options, torch_version): # Backup the options, preventing other plugins access libs that # compiled with compiler of this plugin import torch is_cxx11_abi = torch.compiled_with_cxx11_abi() options = copy.deepcopy(global_options) have_cuda = is_torch_cuda(build_ext, include_dirs=options['INCLUDES'], extra_compile_args=options['COMPILE_FLAGS']) have_cuda = have_cuda and torch.cuda.is_available() if have_cuda: cuda_include_dirs, cuda_lib_dirs = get_cuda_dirs( build_ext, options['COMPILE_FLAGS']) nvcc_cmd = get_nvcc_cmd() cuda_extra_objects = build_nvcc_extra_objects(nvcc_cmd, is_cxx11_abi) options['EXTRA_OBJECTS'] += cuda_extra_objects options['INCLUDES'] += cuda_include_dirs options['LIBRARY_DIRS'] += cuda_lib_dirs options['LIBRARIES'] += ['cudart'] print('INFO: Try PyTorch extension with CUDA.') # Update HAVE_CUDA to mean that PyTorch supports CUDA. updated_macros = set_macro(options['MACROS'], 'HAVE_CUDA', str(int(have_cuda))) # TODO(ybc) make this into common options? have_nccl = os.getenv('BLUEFOG_WITH_NCCL', '0') assert have_nccl in ['0', '1'], "BLUEFOG_WITH_NCCL has to be either 0 or 1" if have_cuda and have_nccl == '1': nccl_include_dirs, nccl_lib_dirs, nccl_lib = get_nccl_dirs( build_ext, cuda_include_dirs, cuda_lib_dirs, options['COMPILE_FLAGS']) options['INCLUDES'] += nccl_include_dirs options['LIBRARY_DIRS'] += nccl_lib_dirs options['LIBRARIES'] += nccl_lib options['SOURCES'] += [ "bluefog/common/nccl_controller.cc", "bluefog/common/nccl_win.cc" ] print('INFO: Try PyTorch extension with NCCL.') updated_macros = set_macro(updated_macros, 'HAVE_NCCL', str(int(have_nccl))) updated_macros = set_macro(updated_macros, 'TORCH_VERSION', str(torch_version)) # Always set _GLIBCXX_USE_CXX11_ABI, since PyTorch can only detect whether it was set to 1. updated_macros = set_macro(updated_macros, '_GLIBCXX_USE_CXX11_ABI', str(int(is_cxx11_abi))) # PyTorch requires -DTORCH_API_INCLUDE_EXTENSION_H updated_macros = set_macro(updated_macros, 'TORCH_API_INCLUDE_EXTENSION_H', '1') if have_cuda: from torch.utils.cpp_extension import CUDAExtension as TorchExtension else: # CUDAExtension fails with `ld: library not found for -lcudart` if CUDA is not present from torch.utils.cpp_extension import CppExtension as TorchExtension ext = TorchExtension( bluefog_torch_mpi_lib.name, define_macros=updated_macros, include_dirs=options['INCLUDES'], sources=options['SOURCES'] + [ "bluefog/torch/adapter.cc", "bluefog/torch/handle_manager.cc", "bluefog/torch/mpi_ops.cc", "bluefog/torch/mpi_win_ops.cc" ], extra_compile_args=options['COMPILE_FLAGS'], extra_link_args=options['LINK_FLAGS'], library_dirs=options['LIBRARY_DIRS'], extra_objects=options['EXTRA_OBJECTS'], libraries=options['LIBRARIES']) # Patch an existing bluefog_torch_mpi_lib extension object. for k, v in ext.__dict__.items(): bluefog_torch_mpi_lib.__dict__[k] = v build_ext.build_extension(bluefog_torch_mpi_lib)