def compile_extern(): # compile llvm passes if cc_type != "clang": return global kernel_opt_flags cache_path_llvm = os.path.join(cache_path, "llvm") jittor_path_llvm = os.path.join(jittor_path, "extern", "llvm") clang_dir = os.path.dirname(get_full_path_of_executable(cc_path)) assert clang_dir.endswith( "bin") and "llvm" in clang_dir, f"Wrong clang_dir: {clang_dir}" llvm_include = os.path.abspath(os.path.join(clang_dir, "..", "include")) assert os.path.isdir(llvm_include), "LLVM include path not found" make_cache_dir(cache_path_llvm) files = os.listdir(jittor_path_llvm) # test_pass.cc is used for test link problem of llvm pass plugin test_pass_path = os.path.join(cache_path_llvm, "test_pass.cc") with open(test_pass_path, 'w') as f: f.write("int main() {return 0;}") # -fno-rtti fix link error # -Wl,-znodelete fix segfault # https://github.com/sampsyo/llvm-pass-skeleton/issues/7#issuecomment-401834287 # -D_GLIBCXX_USE_CXX11_ABI=0 fix undefined symbol: createPrinterPass # https://stackoverflow.com/questions/37366291/undefined-symbol-for-self-built-llvm-opt # try different flags try_flags = [ " -Wl,-znodelete -D_GLIBCXX_USE_CXX11_ABI=0 ", " -Wl,-znodelete ", ] found_flags_id = -1 for fname in files: for i, flag in enumerate(try_flags): if found_flags_id != -1 and found_flags_id != i: continue so_name = os.path.join(cache_path_llvm, os.path.splitext(fname)[0] + f".{i}.so") compile(cc_path, f"{cc_flags} {opt_flags} {flag} -I'{llvm_include}'", [os.path.join(jittor_path_llvm, fname)], so_name) # if not found available flags, we test it. if found_flags_id == -1: try: s = run_cmd( f"{cc_path} {cc_flags} -Xclang -load -Xclang '{so_name}' {test_pass_path}", cache_path_llvm, print_error=False) except Exception as e: LOG.v(f"Try flag {flag} failed: {e}") continue found_flags_id = i kernel_opt_flags += f" -Xclang -load -Xclang '{so_name}' " break else: LOG.w("Clang is used, but LLVM pass plugin is unable to link.") break LOG.vv(f"Compile extern llvm passes: {str(files)}")
def check_pybt(gdb_path, python_path): if gdb_path == '' or python_path == '': return False ret = sp.getoutput(f"{gdb_path} --batch {python_path} -ex 'help py-bt'") if 'python frame' in ret: LOG.v("py-bt found in gdb.") return True return False
def calculate_md5(file_path, chunk_size=1024 * 1024): md5 = hashlib.md5() with open(file_path, 'rb') as f: for chunk in iter(lambda: f.read(chunk_size), b''): md5.update(chunk) md5 = md5.hexdigest() LOG.v(f"file {file_path} md5: {md5}") return md5
def try_find_exe(*args): try: return find_exe(*args) except: LOG.v(f"{args[0]} not found.") return ""
pybind_include = run_cmd(python_path + " -m pybind11 --includes") LOG.i(f"pybind_include: {pybind_include}") extension_suffix = run_cmd(py3_config_path + " --extension-suffix") LOG.i(f"extension_suffix: {extension_suffix}") make_cache_dir(cache_path) make_cache_dir(os.path.join(cache_path, "jit")) make_cache_dir(os.path.join(cache_path, "obj_files")) make_cache_dir(os.path.join(cache_path, "gen")) # build cache_compile cc_flags += pybind_include cc_flags += f" -I{jittor_path}/src " check_cache_compile() LOG.v(f"Get cache_compile: {jit_utils.cc}") # check cuda has_cuda = 0 check_cuda() nvcc_flags = os.environ.get("nvcc_flags", "") if has_cuda: nvcc_flags += cc_flags + link_flags def convert_nvcc_flags(nvcc_flags): # nvcc don't support -Wall option nvcc_flags = nvcc_flags.replace("-Wall", "") nvcc_flags = nvcc_flags.replace("-Wno-unknown-pragmas", "") nvcc_flags = nvcc_flags.replace("-fopenmp", "") nvcc_flags = nvcc_flags.replace("-march", "-Xcompiler -march") nvcc_flags = nvcc_flags.replace("-Werror", "")
def compile_custom_ops(filenames, extra_flags="", return_module=False, dlopen_flags=None, gen_name_=""): """Compile custom ops filenames: path of op source files, filenames must be pairs of xxx_xxx_op.cc and xxx_xxx_op.h, and the type name of op must be XxxXxxOp. extra_flags: extra compile flags return_module: return module rather than ops(default: False) return: compiled ops """ if dlopen_flags is None: dlopen_flags = os.RTLD_GLOBAL | os.RTLD_NOW if platform.system() == 'Linux': dlopen_flags |= os.RTLD_DEEPBIND srcs = {} headers = {} builds = [] includes = [] pyjt_includes = [] for name in filenames: name = os.path.realpath(name) if name.endswith(".cc") or name.endswith(".cpp") or name.endswith( ".cu"): builds.append(name) if name.endswith(".h"): dirname = os.path.dirname(name) if dirname.endswith("inc"): includes.append(dirname) with open(name, "r") as f: if "@pyjt" in f.read(): pyjt_includes.append(name) bname = os.path.basename(name) bname = os.path.splitext(bname)[0] if bname.endswith("_op"): bname = bname[:-3] if name.endswith(".cc"): srcs[bname] = name elif name.endswith(".h"): includes.append(os.path.dirname(name)) headers[bname] = name assert len(srcs) == len(headers), "Source and header names not match" for name in srcs: assert name in headers, f"Header of op {name} not found" gen_name = "gen_ops_" + "_".join(headers.keys()) if gen_name_ != "": gen_name = gen_name_ if len(gen_name) > 50: gen_name = gen_name[:50] + "___hash" + hashlib.md5( gen_name.encode()).hexdigest()[:6] includes = sorted(list(set(includes))) includes = "".join(map(lambda x: f" -I\"{x}\" ", includes)) LOG.vvvv(f"Include flags:{includes}") op_extra_flags = includes + extra_flags lib_path = os.path.join(cache_path, "custom_ops") make_cache_dir(lib_path) gen_src_fname = os.path.join(lib_path, gen_name + ".cc") gen_head_fname = os.path.join(lib_path, gen_name + ".h") gen_lib = os.path.join(lib_path, gen_name + extension_suffix) libname = gen_name + lib_suffix op_extra_flags += f" -L\"{lib_path}\" -l\"{libname}\" " gen_src = gen_jit_op_maker(headers.values(), export=gen_name, extra_flags=op_extra_flags) pyjt_compiler.compile_single(gen_head_fname, gen_src_fname, src=gen_src) # gen src initialize first builds.insert(0, gen_src_fname) def insert_anchor(gen_src, anchor_str, insert_str): # insert insert_str after anchor_str into gen_src return gen_src.replace(anchor_str, anchor_str + insert_str, 1) for name in pyjt_includes: LOG.v("handle pyjt_include ", name) bname = os.path.basename(name).split(".")[0] gen_src_fname = os.path.join(cache_path, "custom_ops", gen_name + "_" + bname + ".cc") pyjt_compiler.compile_single(name, gen_src_fname) builds.insert(1, gen_src_fname) gen_src = insert_anchor(gen_src, "namespace jittor {", f"extern void pyjt_def_{bname}(PyObject* m);") gen_src = insert_anchor( gen_src, "init_module(PyModuleDef* mdef, PyObject* m) {", f"jittor::pyjt_def_{bname}(m);") with open(gen_head_fname, "w") as f: f.write(gen_src) LOG.vvv(f"Build custum ops lib:{gen_lib}") LOG.vvvv(f"Build sources:{builds}") compile(cc_path, extra_flags + cc_flags + opt_flags + includes, builds, gen_lib) # add python path and import LOG.vvv(f"Import custum ops lib:{gen_lib}") lib_path = os.path.join(cache_path, "custom_ops") if lib_path not in os.sys.path: os.sys.path.append(lib_path) # unlock scope when initialize with lock.unlock_scope(): with jit_utils.import_scope(dlopen_flags): exec(f"import {gen_name}") mod = locals()[gen_name] if return_module: return mod return mod.ops
cc_flags += " -I/opt/homebrew/include " # 3. User specified flags if "cc_flags" in os.environ: cc_flags += os.environ["cc_flags"] + ' ' cc_flags += " -lstdc++ -ldl -shared " if platform.system() == 'Darwin': # TODO: if not using apple clang, there is no need to add -lomp cc_flags += "-undefined dynamic_lookup -lomp " if platform.machine() == "arm64": cc_flags += " -L/opt/homebrew/lib " opt_flags = "" py_include = jit_utils.get_py3_include_path() LOG.v(f"py_include: {py_include}") extension_suffix = jit_utils.get_py3_extension_suffix() lib_suffix = extension_suffix.rsplit(".", 1)[0] LOG.v(f"extension_suffix: {extension_suffix}") so = ".so" if os.name != 'nt' else ".dll" kernel_opt_flags = os.environ.get("kernel_flags", "") + opt_flags if platform.system() == 'Darwin': # TODO: if not using apple clang, cannot add -Xpreprocessor kernel_opt_flags += " -Xpreprocessor -fopenmp " elif cc_type != 'cl': kernel_opt_flags += " -fopenmp " def fix_cl_flags(cmd): output = shsplit(cmd)
def __init__(self, filename): self.handle = open(filename, 'w') LOG.v(f'OPEN LOCK path: {filename} PID: {os.getpid()}') self.is_locked = False