class LinkHelper: """ Helps with creating and running a custom linking command for YugabyteDB outside of the build system. """ dep_graph: DependencyGraph initial_node: Node # Arguments to the linker in the original command that produces the initial_node target. # Does not include the compiler driver executable. original_link_args: List[str] build_root: str build_paths: BuildPaths llvm_path: str thirdparty_path: str clang_cpp_path: str # Dependency graph nodes corresponding to the object files present in the original linker # command. Used for deduplication. obj_file_graph_nodes: Set[Node] new_args: LinkCommand # Build directory of the Postgres backend. pg_backend_build_dir: str # The command for linking the yb_pgbackend library. yb_pgbackend_link_cmd: List[str] lto_output_suffix: Optional[str] # Populated by consume_original_link_cmd. final_output_name: str def __init__(self, dep_graph: DependencyGraph, initial_node: Node, lto_output_suffix: Optional[str]) -> None: self.dep_graph = dep_graph self.initial_node = initial_node self.build_root = self.dep_graph.conf.build_root self.build_paths = BuildPaths(self.build_root) self.llvm_path = self.build_paths.get_llvm_path() self.thirdparty_path = self.build_paths.get_thirdparty_path() self.clang_cpp_path = self.build_paths.get_llvm_tool_path('clang++') assert initial_node.link_cmd self.original_link_args = process_original_link_cmd( initial_node.link_cmd) self.static_lib_paths = get_static_lib_paths(self.thirdparty_path) self.new_args = LinkCommand([self.clang_cpp_path]) self.obj_file_graph_nodes = set() self.pg_backend_build_dir, self.yb_pgbackend_link_cmd = get_yb_pgbackend_link_cmd( self.build_root) self.lto_output_suffix = lto_output_suffix def convert_to_static_lib(self, arg: str) -> Optional[str]: """ Given an argument to the original linker command, try to interpret it as a library, either specified as a shared library path, or using -l... syntax, and return the corresponding static library path if available. """ if arg.startswith('/') and arg.endswith('.so'): arg_static_prefix = arg[:-3] static_found = False for suffix in ['.a', '-s.a']: arg_static = arg_static_prefix + suffix if os.path.exists(arg_static): logging.info( "Using static library %s instead of shared library %s", arg_static, arg) return arg_static logging.info("Did not find static library corresponding to %s", arg) if arg.startswith('-l'): static_found = False logging.info("Looking for static lib for: %s", arg) lib_name = arg[2:] for static_lib_path in self.static_lib_paths: static_lib_basename = os.path.basename(static_lib_path) if (static_lib_basename == 'lib' + lib_name + '.a' or static_lib_basename == 'lib' + lib_name + '-s.a'): logging.info("Found static lib for %s: %s", lib_name, static_lib_path) return static_lib_path logging.info("Did not find a static lib for %s", lib_name) if arg.endswith('.so') or '.so.' in arg: logging.info("Still using a shared library: %s", arg) return None def process_arg(self, arg: str) -> None: if arg in SKIPPED_ARGS: logging.info("Skipping argument: %s", arg) return new_arg = self.convert_to_static_lib(arg) if new_arg: if not self.new_args.contains(new_arg): self.new_args.add_new_arg(new_arg) else: self.new_args.add_new_arg(arg) def consume_original_link_cmd(self) -> None: """ Goes over the original linker command and reuses some of its arguments for the new command. """ with WorkDirContext(self.build_root): expect_output_name = False output_name: Optional[str] = None for arg in self.original_link_args: if arg == '-o': expect_output_name = True continue if expect_output_name: if output_name: raise ValueError( "Found multiple output names in the original link command: " "%s and %s" % (output_name, arg)) output_name = arg expect_output_name = False continue expect_output_name = False if is_yb_library(arg): logging.info("Skipping YB library: %s", arg) continue if arg.endswith('.cc.o'): # E.g. tablet_server_main.cc.o. # Remember this node for later deduplication. self.obj_file_graph_nodes.add( self.dep_graph.find_node(os.path.realpath(arg))) self.process_arg(arg) if not output_name: raise ValueError( "Did not find an output name in the original link command") self.final_output_name = os.path.abspath(output_name) logging.info("Final output file name: %s", self.final_output_name) if self.lto_output_suffix is not None: self.final_output_name += self.lto_output_suffix self.new_args.extend(['-o', self.final_output_name]) def add_leaf_object_files(self) -> None: """ Goes over all the object files that the original node transitively depends on, and adds them to the link command if they have not already been added. """ transitive_deps = self.initial_node.get_recursive_deps( skip_node_types=set([NodeType.EXECUTABLE])) with WorkDirContext(self.build_root): # Sort nodes by path for determinism. for node in sorted(list(transitive_deps), key=lambda dep: dep.path): if node in self.obj_file_graph_nodes: # Dedup .cc.o files already existing on the command line. continue if node.node_type == NodeType.OBJECT: self.new_args.add_new_arg(node.path) for arg in self.yb_pgbackend_link_cmd: if arg.endswith('.o'): if os.path.basename(arg) == 'main_cpp_wrapper.cc.o': # TOOD: why is this file even linked into libyb_pgbackend? continue self.new_args.append( os.path.join(self.pg_backend_build_dir, arg)) continue if (arg.startswith('-l') and not self.new_args.contains(arg) and not arg.startswith('-lyb_')): self.process_arg(arg) def add_final_args(self, lto_type: str) -> None: assert lto_type in ['full', 'thin'] self.new_args.extend([ '-L%s' % os.path.join(self.build_root, 'postgres', 'lib'), '-l:libpgcommon.a', '-l:libpgport.a', '-l:libpq.a', '-fwhole-program', '-Wl,-v', '-nostdlib++', '-flto=' + lto_type, ]) for lib_name in ['libc++.a', 'libc++abi.a']: self.new_args.append( os.path.join(self.thirdparty_path, 'installed', 'uninstrumented', 'libcxx', 'lib', lib_name)) with WorkDirContext(self.build_root): self.write_link_cmd_file(self.final_output_name + '_lto_link_cmd_args.txt') def run_linker(self) -> None: with WorkDirContext(self.build_root): start_time_sec = time.time() logging.info("Running linker") try: subprocess.check_call(self.new_args.args) except subprocess.CalledProcessError as ex: # Avoid printing the extremely long command line. logging.error("Linker returned exit code %d", ex.returncode) elapsed_time_sec = time.time() - start_time_sec logging.info("Linking finished in %.1f sec", elapsed_time_sec) def write_link_cmd_file(self, out_path: str) -> None: logging.info( "Writing the linker command line (one argument per line) to %s", os.path.abspath(out_path)) write_file('\n'.join(self.new_args.args), out_path)
class LinkHelper: """ Helps with creating and running a custom linking command for YugabyteDB outside of the build system. """ dep_graph: DependencyGraph initial_node: Node # Arguments to the linker in the original command that produces the initial_node target. # Does not include the compiler driver executable. original_link_args: List[str] build_root: str build_paths: BuildPaths llvm_path: str thirdparty_path: str clang_cpp_path: str # Dependency graph nodes corresponding to the object files present in the original linker # command. Used for deduplication. obj_file_graph_nodes: Set[Node] new_args: LinkCommand # Build directory of the Postgres backend. pg_backend_build_dir: str # The command for linking the yb_pgbackend library. yb_pgbackend_link_cmd: List[str] lto_output_suffix: Optional[str] # Populated by consume_original_link_cmd. final_output_name: str # We look at shared library dependencies (detected using ldd) of the libraries we add, and for # those dependencies that fall within the third-party directory, we determine the corresponding # static libraries and add them to the list below so we can link with them. This is necessary # because in some cases, e.g. for libgssapi_krb5, the static libraries we need to add cannot be # determined in any other way. The dictionary below maps the static library to the list of # shared libraries that necessitated its addition. static_libs_from_ldd: Dict[str, Set[str]] # Set of shared library file paths for which we have already examined dependencies using ldd # as described above. processed_shared_lib_deps_for: Set[str] def __init__(self, dep_graph: DependencyGraph, initial_node: Node, lto_output_suffix: Optional[str]) -> None: self.dep_graph = dep_graph self.initial_node = initial_node self.build_root = self.dep_graph.conf.build_root self.build_paths = BuildPaths(self.build_root) self.llvm_path = self.build_paths.get_llvm_path() self.thirdparty_path = self.build_paths.get_thirdparty_path() self.clang_cpp_path = self.build_paths.get_llvm_tool_path('clang++') assert initial_node.link_cmd self.original_link_args = process_original_link_cmd( initial_node.link_cmd) self.static_lib_paths = get_static_lib_paths(self.thirdparty_path) self.new_args = LinkCommand([self.clang_cpp_path]) self.obj_file_graph_nodes = set() self.pg_backend_build_dir, self.yb_pgbackend_link_cmd = get_yb_pgbackend_link_cmd( self.build_root) self.lto_output_suffix = lto_output_suffix self.static_libs_from_ldd = {} self.processed_shared_lib_deps_for = set() def convert_to_static_lib(self, arg: str) -> Optional[str]: """ Given an argument to the original linker command, try to interpret it as a library, either specified as a shared library path, or using -l... syntax, and return the corresponding static library path if available. """ assert not is_system_lib(arg) if os.path.isabs(arg): lib_path_prefix, shared_lib_suffix = split_shared_lib_ext(arg) if lib_path_prefix is not None: static_found = False lib_path_prefixes: List[str] = [ item for item in [ lib_path_prefix, remove_dash_numeric_suffix(lib_path_prefix) ] if item is not None ] static_lib_candidates = [ lib_path_prefix + suffix for lib_path_prefix in lib_path_prefixes for suffix in STATIC_LIBRARY_SUFFIXES ] for static_lib_path in static_lib_candidates: if os.path.exists(static_lib_path): logging.info( "Using static library %s instead of shared library %s", static_lib_path, arg) return static_lib_path raise ValueError( "Did not find static library corresponding to %s" % arg) if arg.startswith('-l'): static_found = False logging.info("Looking for static lib for: %s", arg) lib_name = arg[2:] for static_lib_path in self.static_lib_paths: static_lib_basename = os.path.basename(static_lib_path) if any(static_lib_basename == 'lib' + lib_name + suffix for suffix in STATIC_LIBRARY_SUFFIXES): logging.info("Found static lib for %s: %s", lib_name, static_lib_path) self.add_shared_library_dependencies( find_shared_lib_from_static(static_lib_path)) return static_lib_path logging.info("Did not find a static lib for %s", lib_name) if is_shared_lib(arg): raise ValueError("Still using a shared library: %s" % arg) return None def add_shared_library_dependencies(self, shared_library_path: str) -> None: if shared_library_path in self.processed_shared_lib_deps_for: return self.processed_shared_lib_deps_for.add(shared_library_path) if not os.path.exists(shared_library_path): logging.info("File does ont exist, not running ldd: %s", shared_library_path) return ldd_output = subprocess.check_output(['ldd', shared_library_path ]).decode('utf-8') for line in ldd_output.split('\n'): line = line.strip() ldd_output_line_match = LDD_OUTPUT_LINE_RE.match(line) if ldd_output_line_match: so_name = ldd_output_line_match.group(1) so_path = ldd_output_line_match.group(2) if so_path.startswith(self.thirdparty_path + '/'): static_lib_path = self.convert_to_static_lib(so_path) if static_lib_path: if os.path.basename( static_lib_path) in LIBCXX_STATIC_LIB_NAMES: # Skip libc++ and libc++abi, we will add them explicitly later. # All third-party libraries written in C++ will depend on these and it # is not very useful to include that in the output. continue if static_lib_path not in self.static_libs_from_ldd: self.static_libs_from_ldd[static_lib_path] = set() self.static_libs_from_ldd[static_lib_path].add( os.path.realpath(shared_library_path)) def process_arg(self, arg: str) -> None: assert arg is not None if arg in SKIPPED_ARGS: logging.info("Skipping argument: %s", arg) return if is_system_lib(arg): if is_static_lib(arg): raise ValueError( "Linking with a system static library is not allowed: %s" % arg) if is_shared_lib(arg): name = os.path.basename(arg) if name == 'librt.so': arg = '-lrt' else: raise ValueError("System shared library: %s" % arg) else: if is_shared_lib(arg): self.add_shared_library_dependencies(arg) arg = self.convert_to_static_lib(arg) or arg self.new_args.add_new_arg(arg) def consume_original_link_cmd(self) -> None: """ Goes over the original linker command and reuses some of its arguments for the new command. """ with WorkDirContext(self.build_root): expect_output_name = False output_name: Optional[str] = None for arg in self.original_link_args: if arg == '-o': expect_output_name = True continue if expect_output_name: if output_name: raise ValueError( "Found multiple output names in the original link command: " "%s and %s" % (output_name, arg)) output_name = arg expect_output_name = False continue expect_output_name = False if is_yb_library(arg): logging.info("Skipping YB library: %s", arg) continue if arg.endswith('.cc.o'): # E.g. tablet_server_main.cc.o. # Remember this node for later deduplication. self.obj_file_graph_nodes.add( self.dep_graph.find_node(os.path.realpath(arg))) self.process_arg(arg) if not output_name: raise ValueError( "Did not find an output name in the original link command") self.final_output_name = os.path.abspath(output_name) logging.info("Final output file name: %s", self.final_output_name) if self.lto_output_suffix is not None: self.final_output_name += self.lto_output_suffix self.new_args.extend(['-o', self.final_output_name]) def add_leaf_object_files(self) -> None: """ Goes over all the object files that the original node transitively depends on, and adds them to the link command if they have not already been added. """ transitive_deps = self.initial_node.get_recursive_deps( skip_node_types=set([NodeType.EXECUTABLE])) with WorkDirContext(self.build_root): # Sort nodes by path for determinism. for node in sorted(list(transitive_deps), key=lambda dep: dep.path): if node in self.obj_file_graph_nodes: # Dedup .cc.o files already existing on the command line. continue if node.node_type == NodeType.OBJECT: self.new_args.add_new_arg(node.path) for arg in self.yb_pgbackend_link_cmd: if arg.endswith('.o'): if os.path.basename(arg) == 'main_cpp_wrapper.cc.o': # TOOD: why is this file even linked into libyb_pgbackend? continue self.new_args.append( os.path.join(self.pg_backend_build_dir, arg)) continue if (arg.startswith('-l') and not self.new_args.contains(arg) and not arg.startswith('-lyb_')): self.process_arg(arg) def add_final_args(self, lto_type: str) -> None: assert lto_type in ['full', 'thin'] for static_lib_path in sorted(self.static_libs_from_ldd): if not self.new_args.contains(static_lib_path): logging.info( "Adding a static library determined using shared library dependencies: %s " "(needed by: %s)", static_lib_path, # The static_libs_from_ldd dictionary stores the set of shared library paths # that caused us to add a particular static library dependency as the value # corresponding to that static library's path in the key. ', '.join( sorted(self.static_libs_from_ldd[static_lib_path]))) self.new_args.append(static_lib_path) self.new_args.extend([ '-L%s' % os.path.join(self.build_root, 'postgres', 'lib'), '-l:libpgcommon.a', '-l:libpgport.a', '-l:libpq.a', '-fwhole-program', '-Wl,-v', '-nostdlib++', # For __res_nsearch, ns_initparse, ns_parserr, ns_name_uncompress. # See https://github.com/yugabyte/yugabyte-db/issues/12738 for details. '-lresolv', '-flto=' + lto_type, ]) for lib_name in LIBCXX_STATIC_LIB_NAMES: self.new_args.append( os.path.join(self.thirdparty_path, 'installed', 'uninstrumented', 'libcxx', 'lib', lib_name)) with WorkDirContext(self.build_root): self.write_link_cmd_file(self.final_output_name + '_lto_link_cmd_args.txt') def run_linker(self) -> None: with WorkDirContext(self.build_root): start_time_sec = time.time() logging.info("Running linker") try: subprocess.check_call(self.new_args.as_list()) except subprocess.CalledProcessError as ex: # Avoid printing the extremely long command line. logging.error("Linker returned exit code %d", ex.returncode) elapsed_time_sec = time.time() - start_time_sec logging.info("Linking finished in %.1f sec", elapsed_time_sec) def write_link_cmd_file(self, out_path: str) -> None: logging.info( "Writing the linker command line (one argument per line) to %s", os.path.abspath(out_path)) write_file('\n'.join(self.new_args.as_list()), out_path)