Beispiel #1
0
def build_torch_extension(build_ext, options, torch_version):
    pytorch_compile_flags = [
        "-std=c++14" if flag == "-std=c++11" else flag
        for flag in options['COMPILE_FLAGS']
    ]
    have_cuda = is_torch_cuda(build_ext,
                              include_dirs=options['INCLUDES'],
                              extra_compile_args=pytorch_compile_flags)
    if not have_cuda and check_macro(options['MACROS'], 'HAVE_CUDA'):
        raise DistutilsPlatformError(
            'byteps build with GPU support was requested, but this PyTorch '
            'installation does not support CUDA.')

    # Update HAVE_CUDA to mean that PyTorch supports CUDA.
    updated_macros = set_macro(options['MACROS'], 'HAVE_CUDA',
                               str(int(have_cuda)))

    # Export TORCH_VERSION equal to our representation of torch.__version__. Internally it's
    # used for backwards compatibility checks.
    updated_macros = set_macro(updated_macros, 'TORCH_VERSION',
                               str(torch_version))

    # Always set _GLIBCXX_USE_CXX11_ABI, since PyTorch can only detect whether it was set to 1.
    import torch
    updated_macros = set_macro(updated_macros, '_GLIBCXX_USE_CXX11_ABI',
                               str(int(torch.compiled_with_cxx11_abi())))

    # PyTorch requires -DTORCH_API_INCLUDE_EXTENSION_H
    updated_macros = set_macro(updated_macros, 'TORCH_API_INCLUDE_EXTENSION_H',
                               '1')

    if have_cuda:
        from torch.utils.cpp_extension import CUDAExtension as TorchExtension
    else:
        # CUDAExtension fails with `ld: library not found for -lcudart` if CUDA is not present
        from torch.utils.cpp_extension import CppExtension as TorchExtension

    ext = TorchExtension(
        pytorch_lib.name,
        define_macros=updated_macros,
        include_dirs=options['INCLUDES'],
        sources=options['SOURCES'] + [
            'byteps/torch/ops.cc', 'byteps/torch/ready_event.cc',
            'byteps/torch/cuda_util.cc', 'byteps/torch/adapter.cc',
            'byteps/torch/handle_manager.cc'
        ],
        extra_compile_args=pytorch_compile_flags,
        extra_link_args=options['LINK_FLAGS'],
        extra_objects=options['EXTRA_OBJECTS'],
        library_dirs=options['LIBRARY_DIRS'],
        libraries=options['LIBRARIES'])

    # Patch an existing pytorch_lib extension object.
    for k, v in ext.__dict__.items():
        pytorch_lib.__dict__[k] = v
    build_ext.build_extension(pytorch_lib)
Beispiel #2
0
def build_torch_extension_v2(build_ext, options, torch_version):
    have_cuda = is_torch_cuda_v2(build_ext,
                                 include_dirs=options['INCLUDES'],
                                 extra_compile_args=options['COMPILE_FLAGS'])
    if not have_cuda and check_macro(options['MACROS'], 'HAVE_CUDA'):
        raise DistutilsPlatformError(
            'Horovod build with GPU support was requested, but this PyTorch '
            'installation does not support CUDA.')

    # Update HAVE_CUDA to mean that PyTorch supports CUDA. Internally, we will be checking
    # HOROVOD_GPU_(ALLREDUCE|ALLGATHER|BROADCAST) to decide whether we should use GPU
    # version or transfer tensors to CPU memory for those operations.
    updated_macros = set_macro(options['MACROS'], 'HAVE_CUDA',
                               str(int(have_cuda)))

    # Export TORCH_VERSION equal to our representation of torch.__version__. Internally it's
    # used for backwards compatibility checks.
    updated_macros = set_macro(updated_macros, 'TORCH_VERSION',
                               str(torch_version))

    # Always set _GLIBCXX_USE_CXX11_ABI, since PyTorch can only detect whether it was set to 1.
    import torch
    updated_macros = set_macro(updated_macros, '_GLIBCXX_USE_CXX11_ABI',
                               str(int(torch.compiled_with_cxx11_abi())))

    # PyTorch requires -DTORCH_API_INCLUDE_EXTENSION_H
    updated_macros = set_macro(updated_macros, 'TORCH_API_INCLUDE_EXTENSION_H',
                               '1')

    if have_cuda:
        from torch.utils.cpp_extension import CUDAExtension as TorchExtension
    else:
        # CUDAExtension fails with `ld: library not found for -lcudart` if CUDA is not present
        from torch.utils.cpp_extension import CppExtension as TorchExtension
    ext = TorchExtension(
        torch_mpi_lib_v2.name,
        define_macros=updated_macros,
        include_dirs=options['INCLUDES'],
        sources=options['SOURCES'] + [
            'horovod/torch/mpi_ops_v2.cc', 'horovod/torch/handle_manager.cc',
            'horovod/torch/ready_event.cc', 'horovod/torch/cuda_util.cc',
            'horovod/torch/adapter_v2.cc'
        ],
        extra_compile_args=options['COMPILE_FLAGS'],
        extra_link_args=options['LINK_FLAGS'],
        library_dirs=options['LIBRARY_DIRS'],
        libraries=options['LIBRARIES'])

    # Patch an existing torch_mpi_lib_v2 extension object.
    for k, v in ext.__dict__.items():
        torch_mpi_lib_v2.__dict__[k] = v
    build_ext.build_extension(torch_mpi_lib_v2)
Beispiel #3
0
    def build_extensions(self):
        pre_setup.setup()

        make_option = ""
        # To resolve tf-gcc incompatibility
        has_cxx_flag = False
        glibcxx_flag = False
        if not int(os.environ.get('BYTEPS_WITHOUT_TENSORFLOW', 0)):
            try:
                import tensorflow as tf
                make_option += 'ADD_CFLAGS="'
                for flag in tf.sysconfig.get_compile_flags():
                    if 'D_GLIBCXX_USE_CXX11_ABI' in flag:
                        has_cxx_flag = True
                        glibcxx_flag = False if (flag[-1]=='0') else True
                        make_option += flag + ' '
                        break
                make_option += '" '
            except:
                pass

        # To resolve torch-gcc incompatibility
        if not int(os.environ.get('BYTEPS_WITHOUT_PYTORCH', 0)):
            try:
                import torch
                torch_flag = torch.compiled_with_cxx11_abi()
                if has_cxx_flag:
                    if glibcxx_flag != torch_flag:
                        raise DistutilsError(
                            '-D_GLIBCXX_USE_CXX11_ABI is not consistent between TensorFlow and PyTorch, '
                            'consider install them separately.')
                    else:
                        pass
                else:
                    make_option += 'ADD_CFLAGS=-D_GLIBCXX_USE_CXX11_ABI=' + \
                                    str(int(torch_flag)) + ' '
                    has_cxx_flag = True
                    glibcxx_flag = torch_flag
            except:
                pass

        print("build_ucx is", build_ucx())
        if build_ucx():
            ucx_path = pre_setup.ucx_path.strip()
            if not ucx_path:
                ucx_path = "https://codeload.github.com/openucx/ucx/zip/824c9f03"
            print("ucx_path is", ucx_path)
            cmd = "sudo apt install -y build-essential libtool autoconf automake libnuma-dev unzip;" +\
            "rm -rf ucx*;" +\
            "curl " + ucx_path + " -o ucx.zip; " + \
                "unzip -o ./ucx.zip -d tmp; " + \
                "rm -rf ucx-build; mkdir -p ucx-build; mv tmp/ucx-*/* ucx-build/;" +\
                "cd ucx-build; pwd; which libtoolize; " + \
                "./autogen.sh; ./autogen.sh && ./contrib/configure-release --enable-mt && make -j && sudo make install -j"
            make_process = subprocess.Popen(cmd,
                                            cwd='3rdparty',
                                            stdout=sys.stdout,
                                            stderr=sys.stderr,
                                            shell=True)
            make_process.communicate()
            if make_process.returncode:
                raise DistutilsSetupError('An ERROR occured while running the '
                                          'Makefile for the ucx library. '
                                          'Exit code: {0}'.format(make_process.returncode))

        if not os.path.exists("3rdparty/ps-lite/build/libps.a") or \
           not os.path.exists("3rdparty/ps-lite/deps/lib"):
            if os.environ.get('CI', 'false') == 'false':
                make_option += "-j "
            if has_rdma_header():
                make_option += "USE_RDMA=1 "
            if build_ucx():
                make_option += 'USE_UCX=1 '

            make_option += pre_setup.extra_make_option()


            make_process = subprocess.Popen('make ' + make_option,
                                            cwd='3rdparty/ps-lite',
                                            stdout=sys.stdout,
                                            stderr=sys.stderr,
                                            shell=True)
            make_process.communicate()
            if make_process.returncode:
                raise DistutilsSetupError('An ERROR occured while running the '
                                          'Makefile for the ps-lite library. '
                                          'Exit code: {0}'.format(make_process.returncode))

        options = get_common_options(self)
        if has_cxx_flag:
            options['COMPILE_FLAGS'] += ['-D_GLIBCXX_USE_CXX11_ABI=' + str(int(glibcxx_flag))]

        built_plugins = []
        try:
            build_server(self, options)
        except:
            raise DistutilsSetupError('An ERROR occured while building the server module.\n\n'
                                      '%s' % traceback.format_exc())

        # If PyTorch is installed, it must be imported before others, otherwise
        # we may get an error: dlopen: cannot load any more object with static TLS
        if not int(os.environ.get('BYTEPS_WITHOUT_PYTORCH', 0)):
            dummy_import_torch()

        if not int(os.environ.get('BYTEPS_WITHOUT_TENSORFLOW', 0)):
            try:
                build_tf_extension(self, options)
                built_plugins.append(True)
                print('INFO: Tensorflow extension is built successfully.')
            except:
                if not int(os.environ.get('BYTEPS_WITH_TENSORFLOW', 0)):
                    print('INFO: Unable to build TensorFlow plugin, will skip it.\n\n'
                          '%s' % traceback.format_exc())
                    built_plugins.append(False)
                else:
                    raise
        if not int(os.environ.get('BYTEPS_WITHOUT_PYTORCH', 0)):
            try:
                torch_version = check_torch_version()
                build_torch_extension(self, options, torch_version)
                built_plugins.append(True)
                print('INFO: PyTorch extension is built successfully.')
            except:
                if not int(os.environ.get('BYTEPS_WITH_PYTORCH', 0)):
                    print('INFO: Unable to build PyTorch plugin, will skip it.\n\n'
                          '%s' % traceback.format_exc())
                    built_plugins.append(False)
                else:
                    raise
        if not int(os.environ.get('BYTEPS_WITHOUT_MXNET', 0)):
            # fix "libcuda.so.1 not found" issue
            cuda_home = os.environ.get('BYTEPS_CUDA_HOME', '/usr/local/cuda')
            cuda_stub_path = cuda_home + '/lib64/stubs'
            ln_command = "cd " + cuda_stub_path + "; ln -sf libcuda.so libcuda.so.1"
            os.system(ln_command)
            try:
                build_mx_extension(self, options)
                built_plugins.append(True)
                print('INFO: MXNet extension is built successfully.')
            except:
                if not int(os.environ.get('BYTEPS_WITH_MXNET', 0)):
                    print('INFO: Unable to build MXNet plugin, will skip it.\n\n'
                          '%s' % traceback.format_exc())
                    built_plugins.append(False)
                else:
                    raise
            finally:
                os.system("rm -rf " + cuda_stub_path + "/libcuda.so.1")

        if not built_plugins:
            print('INFO: Only server module is built.')
            return

        if not any(built_plugins):
            raise DistutilsError(
                'None of TensorFlow, MXNet, PyTorch plugins were built. See errors above.')
Beispiel #4
0
- haskell-torch-matio
- matplotlib-haskell
{2}

extra-include-dirs:
  - {1}/include
  - {1}/include/TH
  - {1}/pytorch/

extra-lib-dirs:
  - {0}/lib/
  - {1}/lib/
"""

with_cxx11_abi = 0
if torch.compiled_with_cxx11_abi():
    with_cxx11_abi = 1

print("Found installed PyTorch with built C++ bindings")
with open("config.yaml", "w") as f:
    f.write(
        defaults_yaml.format((1 if withCuda else 0),
                             os.environ['CONDA_PREFIX'], pytorch_root,
                             with_cxx11_abi))
with open("stack.yaml", "w") as f:
    f.write(
        stack_yaml.format(os.environ['CONDA_PREFIX'], pytorch_root,
                          ("- ihaskell" if withJupyter else "")))

if withCuda:
    print('CUDA is enabled')
Beispiel #5
0
def build_torch_extension(build_ext, global_options, torch_version):
    # Backup the options, preventing other plugins access libs that
    # compiled with compiler of this plugin
    import torch
    is_cxx11_abi = torch.compiled_with_cxx11_abi()

    options = copy.deepcopy(global_options)
    have_cuda = is_torch_cuda(build_ext,
                              include_dirs=options['INCLUDES'],
                              extra_compile_args=options['COMPILE_FLAGS'])
    have_cuda = have_cuda and torch.cuda.is_available()
    if have_cuda:
        cuda_include_dirs, cuda_lib_dirs = get_cuda_dirs(
            build_ext, options['COMPILE_FLAGS'])
        nvcc_cmd = get_nvcc_cmd()
        cuda_extra_objects = build_nvcc_extra_objects(nvcc_cmd, is_cxx11_abi)
        options['EXTRA_OBJECTS'] += cuda_extra_objects

        options['INCLUDES'] += cuda_include_dirs
        options['LIBRARY_DIRS'] += cuda_lib_dirs
        options['LIBRARIES'] += ['cudart']
        print('INFO: Try PyTorch extension with CUDA.')
    # Update HAVE_CUDA to mean that PyTorch supports CUDA.
    updated_macros = set_macro(options['MACROS'], 'HAVE_CUDA',
                               str(int(have_cuda)))

    # TODO(ybc) make this into common options?
    have_nccl = os.getenv('BLUEFOG_WITH_NCCL', '0')
    assert have_nccl in ['0', '1'], "BLUEFOG_WITH_NCCL has to be either 0 or 1"
    if have_cuda and have_nccl == '1':
        nccl_include_dirs, nccl_lib_dirs, nccl_lib = get_nccl_dirs(
            build_ext, cuda_include_dirs, cuda_lib_dirs,
            options['COMPILE_FLAGS'])
        options['INCLUDES'] += nccl_include_dirs
        options['LIBRARY_DIRS'] += nccl_lib_dirs
        options['LIBRARIES'] += nccl_lib
        options['SOURCES'] += [
            "bluefog/common/nccl_controller.cc", "bluefog/common/nccl_win.cc"
        ]
        print('INFO: Try PyTorch extension with NCCL.')

    updated_macros = set_macro(updated_macros, 'HAVE_NCCL',
                               str(int(have_nccl)))

    updated_macros = set_macro(updated_macros, 'TORCH_VERSION',
                               str(torch_version))

    # Always set _GLIBCXX_USE_CXX11_ABI, since PyTorch can only detect whether it was set to 1.
    updated_macros = set_macro(updated_macros, '_GLIBCXX_USE_CXX11_ABI',
                               str(int(is_cxx11_abi)))

    # PyTorch requires -DTORCH_API_INCLUDE_EXTENSION_H
    updated_macros = set_macro(updated_macros, 'TORCH_API_INCLUDE_EXTENSION_H',
                               '1')

    if have_cuda:
        from torch.utils.cpp_extension import CUDAExtension as TorchExtension
    else:
        # CUDAExtension fails with `ld: library not found for -lcudart` if CUDA is not present
        from torch.utils.cpp_extension import CppExtension as TorchExtension

    ext = TorchExtension(
        bluefog_torch_mpi_lib.name,
        define_macros=updated_macros,
        include_dirs=options['INCLUDES'],
        sources=options['SOURCES'] + [
            "bluefog/torch/adapter.cc", "bluefog/torch/handle_manager.cc",
            "bluefog/torch/mpi_ops.cc", "bluefog/torch/mpi_win_ops.cc"
        ],
        extra_compile_args=options['COMPILE_FLAGS'],
        extra_link_args=options['LINK_FLAGS'],
        library_dirs=options['LIBRARY_DIRS'],
        extra_objects=options['EXTRA_OBJECTS'],
        libraries=options['LIBRARIES'])

    # Patch an existing bluefog_torch_mpi_lib extension object.
    for k, v in ext.__dict__.items():
        bluefog_torch_mpi_lib.__dict__[k] = v

    build_ext.build_extension(bluefog_torch_mpi_lib)