def setup_cuda_lib(lib_name, link=True, extra_flags=""): globals()[lib_name+"_ops"] = None globals()[lib_name] = None if not has_cuda: return LOG.v(f"setup {lib_name}...") culib_path = os.path.join(cuda_lib, f"lib{lib_name}.so") jt_cuda_include = os.path.join(jittor_path, "extern", "cuda", "inc") jt_culib_include = os.path.join(jittor_path, "extern", "cuda", lib_name, "inc") link_flags = "" if link: extra_include_path = os.path.abspath(os.path.join(cuda_include, "..", "targets/x86_64-linux/include")) extra_lib_path = os.path.abspath(os.path.join(cuda_lib, "..", "targets/x86_64-linux/lib")) cuda_include_name = search_file([cuda_include, extra_include_path, "/usr/include"], lib_name+".h") # cuda11 prefer cudnn 8 nvcc_version = get_int_version(nvcc_path) prefer_version = () if nvcc_version[0] == 11: prefer_version = ("8",) culib_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu"], f"lib{lib_name}.so", prefer_version) if lib_name == "cudnn": # cudnn cannot found libcudnn_cnn_train.so.8, we manual link for it. if nvcc_version >= (11,0,0): libs = ["libcudnn_ops_infer.so", "libcudnn_ops_train.so", "libcudnn_cnn_infer.so", "libcudnn_cnn_train.so"] for l in libs: ex_cudnn_path = search_file([cuda_lib, extra_lib_path, "/usr/lib/x86_64-linux-gnu"], l, prefer_version) ctypes.CDLL(ex_cudnn_path, dlopen_flags) # dynamic link cuda library ctypes.CDLL(culib_path, dlopen_flags) link_flags = f"-l{lib_name} -L'{cuda_lib}'" # find all source files culib_src_dir = os.path.join(jittor_path, "extern", "cuda", lib_name) culib_src_files = [] for r, _, f in os.walk(culib_src_dir): for fname in f: culib_src_files.append(os.path.join(r, fname)) if len(culib_src_files) == 0: return # compile and get operators culib = compile_custom_ops(culib_src_files, return_module=True, extra_flags=f" -I'{jt_cuda_include}' -I'{jt_culib_include}' {link_flags} {extra_flags} ") culib_ops = culib.ops globals()[lib_name+"_ops"] = culib_ops globals()[lib_name] = culib LOG.vv(f"Get {lib_name}_ops: "+str(dir(culib_ops)))
cc_flags += f" -l\"jit_utils_core{lib_suffix}\" " compile(cc_path, cc_flags + opt_flags, files, 'jittor_core' + extension_suffix) cc_flags += f" -l\"jittor_core{lib_suffix}\" " # TODO: move to compile_extern.py # compile_extern() with jit_utils.import_scope(import_flags): import jittor_core as core flags = core.Flags() if has_cuda: nvcc_flags = convert_nvcc_flags(cc_flags) nvcc_version = list(jit_utils.get_int_version(nvcc_path)) max_arch = 1000 if nvcc_version < [ 11, ]: max_arch = 75 elif nvcc_version < [11, 1]: max_arch = 80 if len(flags.cuda_archs): min_arch = 30 archs = [] for arch in flags.cuda_archs: if arch < min_arch: LOG.w(f"CUDA arch({arch})<{min_arch} is not supported") continue if arch > max_arch:
def setup_cuda_lib(lib_name, link=True, extra_flags=""): arch_key = "x86_64" if platform.machine() not in ["x86_64", "AMD64"]: arch_key = "aarch64" globals()[lib_name+"_ops"] = None globals()[lib_name] = None if not has_cuda: return LOG.v(f"setup {lib_name}...") culib_path = os.path.join(cuda_lib, f"lib{lib_name}.so") jt_cuda_include = os.path.join(jittor_path, "extern", "cuda", "inc") jt_culib_include = os.path.join(jittor_path, "extern", "cuda", lib_name, "inc") link_flags = "" if link: extra_include_path = os.path.abspath(os.path.join(cuda_include, "..", f"targets/{arch_key}-linux/include")) extra_lib_path = os.path.abspath(os.path.join(cuda_lib, "..", f"targets/{arch_key}-linux/lib")) cuda_include_name = search_file([cuda_include, extra_include_path, "/usr/include"], lib_name+".h") # cuda11 prefer cudnn 8 nvcc_version = get_int_version(nvcc_path) prefer_version = () if nvcc_version[0] == 11: prefer_version = ("8",) culib_path = search_file([cuda_bin, cuda_lib, extra_lib_path, f"/usr/lib/{arch_key}-linux-gnu", "/usr/lib"], f"lib{lib_name}.so", prefer_version) if lib_name == "cublas" and nvcc_version[0] >= 10: # manual link libcublasLt.so try: cublas_lt_lib_path = search_file([cuda_bin, cuda_lib, extra_lib_path, f"/usr/lib/{arch_key}-linux-gnu", "/usr/lib"], f"libcublasLt.so", nvcc_version) ctypes.CDLL(cublas_lt_lib_path, dlopen_flags) except: # some aarch64 os, such as uos with FT2000 cpu, # it's cuda 10 doesn't have libcublasLt.so pass if lib_name == "cudnn": # cudnn cannot found libcudnn_cnn_train.so.8, we manual link for it. if nvcc_version >= (11,0,0): libs = ["libcudnn_ops_infer.so", "libcudnn_ops_train.so", "libcudnn_cnn_infer.so", "libcudnn_cnn_train.so"] for l in libs: ex_cudnn_path = search_file([cuda_bin, cuda_lib, extra_lib_path, f"/usr/lib/{arch_key}-linux-gnu", "/usr/lib"], l, prefer_version) ctypes.CDLL(ex_cudnn_path, dlopen_flags) # dynamic link cuda library # ctypes.CDLL(culib_path, dlopen_flags) # link_flags = f"-l{lib_name} -L\"{cuda_lib}\"" link_flags = f"-l{lib_name} -L\"{os.path.dirname(culib_path)}\"" # print("link_flags", link_flags, culib_path) # find all source files culib_src_dir = os.path.join(jittor_path, "extern", "cuda", lib_name) culib_src_files = [] for r, _, f in os.walk(culib_src_dir): for fname in f: culib_src_files.append(os.path.join(r, fname)) if len(culib_src_files) == 0: return # compile and get operators culib = compile_custom_ops(culib_src_files, return_module=True, extra_flags=f" -I\"{jt_cuda_include}\" -I\"{jt_culib_include}\" {link_flags} {extra_flags} ") culib_ops = culib.ops globals()[lib_name+"_ops"] = culib_ops globals()[lib_name] = culib LOG.vv(f"Get {lib_name}_ops: "+str(dir(culib_ops)))