def main(): init_logging() if get_bool_env_var('YB_SKIP_POSTGRES_BUILD'): logging.info( "Skipping PostgreSQL build (YB_SKIP_POSTGRES_BUILD is set)") return PostgresBuilder().run()
def run(self): if get_bool_env_var('YB_SKIP_POSTGRES_BUILD'): logging.info("Skipping PostgreSQL build (YB_SKIP_POSTGRES_BUILD is set)") return self.parse_args() self.build_postgres()
def configure_postgres(self): if is_verbose_mode(): logging.info("Running configure in the postgres build directory") # Don't enable -Werror when running configure -- that can affect the resulting # configuration. configure_cmd_line = [ './configure', '--prefix', self.pg_prefix, '--with-extra-version=-YB-' + self.get_yb_version(), '--enable-depend', '--with-openssl', '--with-libedit-preferred', '--with-includes=' + self.openssl_include_dir, '--with-libraries=' + self.openssl_lib_dir, # We're enabling debug symbols for all types of builds. '--enable-debug' ] if not get_bool_env_var('YB_NO_PG_CONFIG_CACHE'): configure_cmd_line.append('--config-cache') # We get readline-related errors in ASAN/TSAN, so let's disable readline there. if self.build_type in ['asan', 'tsan']: # TODO: do we still need this limitation? configure_cmd_line += ['--without-readline'] if self.build_type != 'release': configure_cmd_line += ['--enable-cassert'] configure_result = run_program(configure_cmd_line, error_ok=True) if configure_result.failure(): rerun_configure = False for line in configure_result.stderr.splitlines(): if REMOVE_CONFIG_CACHE_MSG_RE.search(line.strip()): logging.info( "Configure failed because of stale config.cache, re-running." ) run_program('rm -f config.cache') rerun_configure = True break if not rerun_configure: logging.error("Standard error from configure:\n" + configure_result.stderr) raise RuntimeError("configure failed") configure_result = run_program(configure_cmd_line, shell=True, stdout_stderr_prefix='configure', error_ok=True) if is_verbose_mode() and configure_result.success(): configure_result.print_output_to_stdout() configure_result.print_output_and_raise_error_if_failed() logging.info( "Successfully ran configure in the postgres build directory")
def configure_postgres(self): if is_verbose_mode(): logging.info("Running configure in the postgres build directory") # Don't enable -Werror when running configure -- that can affect the resulting # configuration. self.set_env_vars('configure') configure_cmd_line = [ './configure', '--prefix', self.pg_prefix, '--enable-depend', # We're enabling debug symbols for all types of builds. '--enable-debug' ] if not get_bool_env_var('YB_NO_PG_CONFIG_CACHE'): configure_cmd_line.append('--config-cache') # We get readline-related errors in ASAN/TSAN, so let's disable readline there. if self.build_type in ['asan', 'tsan']: configure_cmd_line += ['--without-readline'] if self.build_type != 'release': configure_cmd_line += ['--enable-cassert'] configure_result = run_program(configure_cmd_line, error_ok=True) if configure_result.failure(): rerun_configure = False for line in configure_result.stderr.splitlines(): if REMOVE_CONFIG_CACHE_MSG_RE.search(line.strip()): logging.info( "Configure failed because of stale config.cache, re-running." ) run_program('rm -f config.cache') rerun_configure = True break if not rerun_configure: logging.error("Standard error from configure:\n" + configure_result.stderr) raise RuntimeError("configure failed") configure_result = run_program(configure_cmd_line) if is_verbose_mode(): configure_result.print_output_to_stdout() write_program_output_to_file('configure', configure_result, self.pg_build_root) logging.info( "Successfully ran configure in the postgres build directory")
def configure_postgres(self): if is_verbose_mode(): logging.info("Running configure in the postgres build directory") # Don't enable -Werror when running configure -- that can affect the resulting # configuration. self.set_env_vars('configure') configure_cmd_line = [ './configure', '--prefix', self.pg_prefix, '--enable-depend', # We're enabling debug symbols for all types of builds. '--enable-debug'] if not get_bool_env_var('YB_NO_PG_CONFIG_CACHE'): configure_cmd_line.append('--config-cache') # We get readline-related errors in ASAN/TSAN, so let's disable readline there. if self.build_type in ['asan', 'tsan']: configure_cmd_line += ['--without-readline'] if self.build_type != 'release': configure_cmd_line += ['--enable-cassert'] configure_result = run_program(configure_cmd_line, error_ok=True) if configure_result.failure(): rerun_configure = False for line in configure_result.stderr.splitlines(): if REMOVE_CONFIG_CACHE_MSG_RE.search(line.strip()): logging.info("Configure failed because of stale config.cache, re-running.") run_program('rm -f config.cache') rerun_configure = True break if not rerun_configure: logging.error("Standard error from configure:\n" + configure_result.stderr) raise RuntimeError("configure failed") configure_result = run_program(configure_cmd_line) if is_verbose_mode(): configure_result.print_output_to_stdout() write_program_output_to_file('configure', configure_result, self.pg_build_root) logging.info("Successfully ran configure in the postgres build directory")
def main() -> None: parser = argparse.ArgumentParser( description='A tool for working with the dependency graph') parser.add_argument('--verbose', action='store_true', help='Enable debug output') parser.add_argument( '-r', '--rebuild-graph', action='store_true', help='Rebuild the dependecy graph and save it to a file') parser.add_argument('--node-type', help='Node type to look for', type=NodeType, choices=list(NodeType), default=NodeType.ANY) parser.add_argument( '--file-regex', help='Regular expression for file names to select as initial nodes for ' 'querying the dependency graph.') parser.add_argument( '--file-name-glob', help='Like file-regex, but applies only to file name and uses the glob ' 'syntax instead of regex.') parser.add_argument( '--git-diff', help='Figure out the list of files to use as starting points in the ' 'dependency graph traversal by diffing the current state of the code ' 'against this commit. This could also be anything that could be ' 'passed to "git diff" as a single argument.') parser.add_argument( '--git-commit', help='Similar to --git-diff, but takes a git commit ref (e.g. sha1 or ' 'branch) and uses the set of files from that commit.') parser.add_argument( '--build-root', required=True, help='E.g. <some_root>/build/debug-gcc-dynamic-community') parser.add_argument('command', type=Command, choices=list(Command), help='Command to perform') parser.add_argument( '--output-test-config', help= 'Output a "test configuration file", which is a JSON containing the ' 'resulting list of C++ tests to run to this file, a flag indicating ' 'wheter to run Java tests or not, etc.') parser.add_argument( '--incomplete-build', action='store_true', help='Skip checking for file existence. Allows using the tool after ' 'build artifacts have been deleted.') parser.add_argument( '--build-args', help='Extra arguments to pass to yb_build.sh. The build is invoked e.g. ' 'if the compilation database file is missing.') parser.add_argument( '--link-cmd-out-file', help='For the %s command, write the linker arguments (one per line ) ' 'to the given file.') parser.add_argument( '--lto-output-suffix', default="-lto", help='The suffix to append to LTO-enabled binaries produced by ' 'the %s command' % Command.LINK_WHOLE_PROGRAM.value) parser.add_argument( '--run-linker', help= 'Whether to actually run the linker. Setting this to false might be useful when ' 'debugging, combined with --link-cmd-out-file.', type=arg_str_to_bool, default=True) args = parser.parse_args() if args.file_regex and args.file_name_glob: raise RuntimeError( '--file-regex and --file-name-glob are incompatible') cmd = args.command if (not args.file_regex and not args.file_name_glob and not args.rebuild_graph and not args.git_diff and not args.git_commit and cmd not in COMMANDS_NOT_NEEDING_TARGET_SET): raise RuntimeError( "Neither of --file-regex, --file-name-glob, --git-{diff,commit}, or " "--rebuild-graph are specified, and the command is not one of: " + ", ".join([cmd.value for cmd in COMMANDS_NOT_NEEDING_TARGET_SET])) log_level = logging.INFO logging.basicConfig( level=log_level, format= "[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s") conf = DepGraphConf(verbose=args.verbose, build_root=args.build_root, incomplete_build=args.incomplete_build, file_regex=args.file_regex, file_name_glob=args.file_name_glob, build_args=args.build_args) if conf.file_regex and args.git_diff: raise RuntimeError( "--git-diff is incompatible with --file-{regex,name-glob}") if args.git_diff and args.git_commit: raise RuntimeError('--git-diff and --git-commit are incompatible') if args.git_commit: args.git_diff = "{}^..{}".format(args.git_commit, args.git_commit) graph_cache_path = os.path.join(args.build_root, 'dependency_graph.json') if args.rebuild_graph or not os.path.isfile(graph_cache_path): logging.info( "Generating a dependency graph at '{}'".format(graph_cache_path)) dep_graph_builder = DependencyGraphBuilder(conf) dep_graph = dep_graph_builder.build() dep_graph.save_as_json(graph_cache_path) else: start_time = datetime.now() with open(graph_cache_path) as graph_input_file: dep_graph = DependencyGraph(conf, json_data=json.load(graph_input_file)) logging.info("Loaded dependency graph from '%s' in %.2f sec" % (graph_cache_path, (datetime.now() - start_time).total_seconds())) dep_graph.validate_node_existence() # --------------------------------------------------------------------------------------------- # Commands that do not require an "initial set of targets" # --------------------------------------------------------------------------------------------- if cmd == Command.SELF_TEST: run_self_test(dep_graph) return if cmd == Command.DEBUG_DUMP: dep_graph.dump_debug_info() return # --------------------------------------------------------------------------------------------- # Figure out the initial set of targets based on a git commit, a regex, etc. # --------------------------------------------------------------------------------------------- updated_categories: Set[SourceFileCategory] = set() file_changes = [] initial_nodes: Iterable[Node] if args.git_diff: old_working_dir = os.getcwd() with WorkDirContext(conf.yb_src_root): git_diff_output = subprocess.check_output( ['git', 'diff', args.git_diff, '--name-only']).decode('utf-8') initial_nodes = set() file_paths = set() for file_path in git_diff_output.split("\n"): file_path = file_path.strip() if not file_path: continue file_changes.append(file_path) # It is important that we invoke os.path.realpath with the current directory set to # the git repository root. file_path = os.path.realpath(file_path) file_paths.add(file_path) node = dep_graph.node_by_path.get(file_path) if node: initial_nodes.add(node) if not initial_nodes: logging.warning( "Did not find any graph nodes for this set of files: %s", file_paths) for basename in set( [os.path.basename(file_path) for file_path in file_paths]): logging.warning("Nodes for basename '{}': {}".format( basename, dep_graph.find_nodes_by_basename(basename))) elif conf.file_regex: logging.info("Using file name regex: {}".format(conf.file_regex)) initial_nodes = dep_graph.find_nodes_by_regex(conf.file_regex) if not initial_nodes: logging.warning( "Did not find any graph nodes for this pattern: %s", conf.file_regex) for node in initial_nodes: file_changes.append(node.path) else: raise RuntimeError( "Could not figure out how to generate the initial set of files") file_changes = [(os.path.relpath(file_path, conf.yb_src_root) if os.path.isabs(file_path) else file_path) for file_path in file_changes] if cmd == Command.LINK_WHOLE_PROGRAM: link_whole_program(dep_graph=dep_graph, initial_nodes=initial_nodes, link_cmd_out_file=args.link_cmd_out_file, run_linker=args.run_linker, lto_output_suffix=args.lto_output_suffix) return file_changes_by_category: Dict[SourceFileCategory, List[str]] = group_by( file_changes, get_file_category) # Same as file_changes_by_category, but with string values of categories instead of enum # elements. file_changes_by_category_str: Dict[str, List[str]] = {} for category, changes in file_changes_by_category.items(): logging.info("File changes in category '%s':", category) for change in sorted(changes): logging.info(" %s", change) file_changes_by_category_str[category.value] = changes updated_categories = set(file_changes_by_category.keys()) results: Set[Node] = set() if cmd == Command.AFFECTED: results = dep_graph.find_affected_nodes(set(initial_nodes), args.node_type) elif cmd == Command.DEPS: for node in initial_nodes: results.update(node.deps) elif cmd == Command.REVERSE_DEPS: for node in initial_nodes: results.update(node.reverse_deps) else: raise ValueError("Unimplemented command '{}'".format(cmd)) if args.output_test_config: test_basename_list = sorted([ os.path.basename(node.path) for node in results if node.node_type == NodeType.TEST ]) affected_basenames = set( [os.path.basename(node.path) for node in results]) # These are ALL tests, not just tests affected by the changes in question, used mostly # for logging. all_test_programs = [ node for node in dep_graph.get_nodes() if node.node_type == NodeType.TEST ] all_test_basenames = set( [os.path.basename(node.path) for node in all_test_programs]) # A very conservative way to decide whether to run all tests. If there are changes in any # categories (meaning the changeset is non-empty), and there are changes in categories other # than C++ / Java / files known not to affect unit tests, we force re-running all tests. unsafe_categories = updated_categories - CATEGORIES_NOT_CAUSING_RERUN_OF_ALL_TESTS user_said_all_tests = get_bool_env_var('YB_RUN_ALL_TESTS') test_filter_re = os.getenv('YB_TEST_EXECUTION_FILTER_RE') manual_test_filtering_with_regex = bool(test_filter_re) select_all_tests_for_now = (bool(unsafe_categories) or user_said_all_tests or manual_test_filtering_with_regex) user_said_all_cpp_tests = get_bool_env_var('YB_RUN_ALL_CPP_TESTS') user_said_all_java_tests = get_bool_env_var('YB_RUN_ALL_JAVA_TESTS') cpp_files_changed = SourceFileCategory.CPP in updated_categories java_files_changed = SourceFileCategory.JAVA in updated_categories yb_master_or_tserver_changed = bool(affected_basenames & set(['yb-master', 'yb-tserver'])) run_cpp_tests = select_all_tests_for_now or cpp_files_changed or user_said_all_cpp_tests run_java_tests = (select_all_tests_for_now or java_files_changed or yb_master_or_tserver_changed or user_said_all_java_tests) if select_all_tests_for_now: if user_said_all_tests: logging.info( "User explicitly specified that all tests should be run") elif manual_test_filtering_with_regex: logging.info( "YB_TEST_EXECUTION_FILTER_RE specified: %s, will filter tests at a later step", test_filter_re) else: logging.info( "All tests should be run based on file changes in these categories: {}" .format(', '.join( sorted([ category.value for category in unsafe_categories ])))) else: if run_cpp_tests: if user_said_all_cpp_tests: logging.info( "User explicitly specified that all C++ tests should be run" ) else: logging.info( 'Will run some C++ tests, some C++ files changed') if run_java_tests: if user_said_all_java_tests: logging.info( "User explicitly specified that all Java tests should be run" ) else: logging.info('Will run all Java tests, ' + ' and '.join( (['some Java files changed'] if java_files_changed else []) + (['yb-{master,tserver} binaries changed'] if yb_master_or_tserver_changed else []))) if run_cpp_tests and not test_basename_list and not select_all_tests_for_now: logging.info( 'There are no C++ test programs affected by the changes, ' 'will skip running C++ tests.') run_cpp_tests = False test_conf = dict(run_cpp_tests=run_cpp_tests, run_java_tests=run_java_tests, file_changes_by_category=file_changes_by_category_str) if test_filter_re: test_conf.update(test_filter_re=test_filter_re) if not select_all_tests_for_now: # We only have this kind of fine-grained filtering for C++ test programs, and for Java # tests we either run all of them or none. test_conf['cpp_test_programs'] = test_basename_list if len(all_test_basenames) > 0: logging.info( "{} C++ test programs should be run (out of {} possible, {}%)" .format( len(test_basename_list), len(all_test_basenames), "%.1f" % (100.0 * len(test_basename_list) / len(all_test_basenames)))) if len(test_basename_list) != len(all_test_basenames): logging.info( "The following C++ test programs will be run: {}".format( ", ".join(sorted(test_basename_list)))) with open(args.output_test_config, 'w') as output_file: output_file.write(json.dumps(test_conf, indent=2) + "\n") logging.info("Wrote a test configuration to {}".format( args.output_test_config)) else: # For ad-hoc command-line use, mostly for testing and sanity-checking. for node in sorted(results, key=lambda node: [node.node_type.value, node.path]): print(node) logging.info("Found {} results".format(len(results)))
def configure_postgres(self) -> None: if is_verbose_mode(): logging.info("Running configure in the postgres build directory") # Don't enable -Werror when running configure -- that can affect the resulting # configuration. configure_cmd_line = [ './configure', '--prefix', self.pg_prefix, '--with-extra-version=-YB-' + self.get_yb_version(), '--enable-depend', '--with-icu', '--with-ldap', '--with-openssl', '--with-gssapi', # Options are ossp (original/old implementation), bsd (BSD) and e2fs # (libuuid-based for Unix/Mac). '--with-uuid=e2fs', '--with-libedit-preferred', '--with-includes=' + self.openssl_include_dir, '--with-libraries=' + self.openssl_lib_dir, # We're enabling debug symbols for all types of builds. '--enable-debug' ] if is_macos_arm64(): configure_cmd_line.insert(0, '/opt/homebrew/bin/bash') if not get_bool_env_var('YB_NO_PG_CONFIG_CACHE'): configure_cmd_line.append('--config-cache') # We get readline-related errors in ASAN/TSAN, so let's disable readline there. if self.build_type in ['asan', 'tsan']: # TODO: do we still need this limitation? configure_cmd_line += ['--without-readline'] if self.build_type != 'release': configure_cmd_line += ['--enable-cassert'] # Unset YB_SHOW_COMPILER_COMMAND_LINE when configuring postgres to avoid unintended side # effects from additional compiler output. with EnvVarContext(YB_SHOW_COMPILER_COMMAND_LINE=None): configure_result = run_program(configure_cmd_line, error_ok=True) if configure_result.failure(): rerun_configure = False for line in configure_result.stderr.splitlines(): if REMOVE_CONFIG_CACHE_MSG_RE.search(line.strip()): logging.info( "Configure failed because of stale config.cache, re-running." ) run_program('rm -f config.cache') rerun_configure = True break if not rerun_configure: logging.error("Standard error from configure:\n" + configure_result.stderr) config_log_path = os.path.join(self.pg_build_root, "config.log") if os.path.exists(config_log_path): with open(config_log_path) as config_log_file: config_log_str = config_log_file.read() logging.info(f"Contents of {config_log_path}:") sys.stderr.write(config_log_str + "\n") else: logging.warning(f"File not found: {config_log_path}") raise RuntimeError("configure failed") configure_result = run_program(configure_cmd_line, shell=True, stdout_stderr_prefix='configure', error_ok=True) if is_verbose_mode() and configure_result.success(): configure_result.print_output_to_stdout() configure_result.print_output_and_raise_error_if_failed() logging.info( "Successfully ran configure in the postgres build directory")
def main(): parser = argparse.ArgumentParser( description='A tool for working with the dependency graph') parser.add_argument('--verbose', action='store_true', help='Enable debug output') parser.add_argument( '-r', '--rebuild-graph', action='store_true', help='Rebuild the dependecy graph and save it to a file') parser.add_argument('--node-type', help='Node type to look for', default='any', choices=['test', 'object', 'library', 'source', 'any']) parser.add_argument( '--file-regex', help='Regular expression for file names to select as initial nodes for ' 'querying the dependency graph.') parser.add_argument( '--file-name-glob', help='Like file-regex, but applies only to file name and uses the glob ' 'syntax instead of regex.') parser.add_argument( '--git-diff', help='Figure out the list of files to use as starting points in the ' 'dependency graph traversal by diffing the current state of the code ' 'against this commit. This could also be anything that could be ' 'passed to "git diff" as a single argument.') parser.add_argument( '--git-commit', help='Similar to --git-diff, but takes a git commit ref (e.g. sha1 or ' 'branch) and uses the set of files from that commit.') parser.add_argument( '--build-root', required=True, help='E.g. <some_root>/build/debug-gcc-dynamic-community') parser.add_argument('command', choices=COMMANDS, help='Command to perform') parser.add_argument( '--output-test-config', help= 'Output a "test configuration file", which is a JSON containing the ' 'resulting list of C++ tests to run to this file, a flag indicating ' 'wheter to run Java tests or not, etc.') parser.add_argument( '--incomplete-build', action='store_true', help='Skip checking for file existence. Allows using the tool after ' 'build artifacts have been deleted.') args = parser.parse_args() if args.file_regex and args.file_name_glob: raise RuntimeError( '--file-regex and --file-name-glob are incompatible') cmd = args.command if (not args.file_regex and not args.file_name_glob and not args.rebuild_graph and not args.git_diff and not args.git_commit and cmd != SELF_TEST_CMD): raise RuntimeError( "Neither of --file-regex, --file-name-glob, --git-{diff,commit}, or " "--rebuild-graph are specified, and the command is not " + SELF_TEST_CMD) log_level = logging.INFO logging.basicConfig( level=log_level, format= "[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s") conf = Configuration(args) if conf.file_regex and args.git_diff: raise RuntimeError( "--git-diff is incompatible with --file-{regex,name-glob}") if args.git_diff and args.git_commit: raise RuntimeError('--git-diff and --git-commit are incompatible') if args.git_commit: args.git_diff = "{}^..{}".format(args.git_commit, args.git_commit) graph_cache_path = os.path.join(args.build_root, 'dependency_graph.json') if args.rebuild_graph or not os.path.isfile(graph_cache_path): logging.info( "Generating a dependency graph at '{}'".format(graph_cache_path)) dep_graph_builder = DependencyGraphBuilder(conf) dep_graph = dep_graph_builder.build() dep_graph.save_as_json(graph_cache_path) else: start_time = datetime.now() with open(graph_cache_path) as graph_input_file: dep_graph = DependencyGraph(conf, json_data=json.load(graph_input_file)) logging.info("Loaded dependency graph from '%s' in %.2f sec" % (graph_cache_path, (datetime.now() - start_time).total_seconds())) dep_graph.validate_node_existence() if cmd == SELF_TEST_CMD: run_self_test(dep_graph) return updated_categories = None file_changes = [] if args.git_diff: old_working_dir = os.getcwd() with WorkDirContext(conf.yb_src_root): git_diff_output = subprocess.check_output( ['git', 'diff', args.git_diff, '--name-only']) initial_nodes = set() file_paths = set() for file_path in git_diff_output.split("\n"): file_path = file_path.strip() if not file_path: continue file_changes.append(file_path) # It is important that we invoke os.path.realpath with the current directory set to # the git repository root. file_path = os.path.realpath(file_path) file_paths.add(file_path) node = dep_graph.node_by_path.get(file_path) if node: initial_nodes.add(node) if not initial_nodes: logging.warning( "Did not find any graph nodes for this set of files: {}". format(file_paths)) for basename in set( [os.path.basename(file_path) for file_path in file_paths]): logging.warning("Nodes for basename '{}': {}".format( basename, dep_graph.find_nodes_by_basename(basename))) file_changes_by_category = group_by(file_changes, get_file_category) for category, changes in file_changes_by_category.items(): logging.info("File changes in category '{}':".format(category)) for change in sorted(changes): logging.info(" {}".format(change)) updated_categories = set(file_changes_by_category.keys()) elif conf.file_regex: logging.info("Using file name regex: {}".format(conf.file_regex)) initial_nodes = dep_graph.find_nodes_by_regex(conf.file_regex) else: raise RuntimeError( "Could not figure out how to generate the initial set of files") results = set() if cmd == LIST_AFFECTED_CMD: results = dep_graph.find_affected_nodes(initial_nodes, args.node_type) elif cmd == LIST_DEPS_CMD: for node in initial_nodes: results.update(node.deps) elif cmd == LIST_REVERSE_DEPS_CMD: for node in initial_nodes: results.update(node.reverse_deps) else: raise RuntimeError("Unimplemented command '{}'".format(command)) if args.output_test_config: test_basename_list = sorted([ os.path.basename(node.path) for node in results if node.node_type == 'test' ]) affected_basenames = set( [os.path.basename(node.path) for node in results]) # These are ALL tests, not just tests affected by the changes in question, used mostly # for logging. all_test_programs = [ node for node in dep_graph.get_nodes() if node.node_type == 'test' ] all_test_basenames = set( [os.path.basename(node.path) for node in all_test_programs]) # A very conservative way to decide whether to run all tests. If there are changes in any # categories (meaning the changeset is non-empty), and there are changes in categories other # than C++ / Java / files known not to affect unit tests, we force re-running all tests. unsafe_categories = updated_categories - CATEGORIES_NOT_CAUSING_RERUN_OF_ALL_TESTS user_said_all_tests = get_bool_env_var('YB_RUN_ALL_TESTS') run_all_tests = bool(unsafe_categories) or user_said_all_tests user_said_all_cpp_tests = get_bool_env_var('YB_RUN_ALL_CPP_TESTS') user_said_all_java_tests = get_bool_env_var('YB_RUN_ALL_JAVA_TESTS') cpp_files_changed = 'c++' in updated_categories java_files_changed = 'java' in updated_categories yb_master_or_tserver_changed = bool(affected_basenames & set(['yb-master', 'yb-tserver'])) run_cpp_tests = run_all_tests or cpp_files_changed or user_said_all_cpp_tests run_java_tests = (run_all_tests or java_files_changed or yb_master_or_tserver_changed or user_said_all_java_tests) if run_all_tests: if user_said_all_tests: logging.info( "User explicitly specified that all tests should be run") else: logging.info( "All tests should be run based on file changes in these categories: {}" .format(', '.join(sorted(unsafe_categories)))) else: if run_cpp_tests: if user_said_all_cpp_tests: logging.info( "User explicitly specified that all C++ tests should be run" ) else: logging.info( 'Will run some C++ tests, some C++ files changed') if run_java_tests: if user_said_all_java_tests: logging.info( "User explicitly specified that all Java tests should be run" ) else: logging.info('Will run all Java tests, ' + ' and '.join( (['some Java files changed'] if java_files_changed else []) + (['yb-{master,tserver} binaries changed'] if yb_master_or_tserver_changed else []))) if run_cpp_tests and not test_basename_list and not run_all_tests: logging.info( 'There are no C++ test programs affected by the changes, ' 'will skip running C++ tests.') run_cpp_tests = False test_conf = dict(run_cpp_tests=run_cpp_tests, run_java_tests=run_java_tests, file_changes_by_category=file_changes_by_category) if not run_all_tests: test_conf['cpp_test_programs'] = test_basename_list logging.info( "{} C++ test programs should be run (out of {} possible, {}%)". format( len(test_basename_list), len(all_test_basenames), "%.1f" % (100.0 * len(test_basename_list) / len(all_test_basenames)))) if len(test_basename_list) != len(all_test_basenames): logging.info( "The following C++ test programs will be run: {}".format( ", ".join(sorted(test_basename_list)))) with open(args.output_test_config, 'w') as output_file: output_file.write(json.dumps(test_conf, indent=2) + "\n") logging.info("Wrote a test configuration to {}".format( args.output_test_config)) else: # For ad-hoc command-line use, mostly for testing and sanity-checking. for node in sorted(results, key=lambda node: [node.node_type, node.path]): print(node) logging.info("Found {} results".format(len(results)))
def main(): parser = argparse.ArgumentParser( description='A tool for working with the dependency graph') parser.add_argument('--verbose', action='store_true', help='Enable debug output') parser.add_argument('-r', '--rebuild-graph', action='store_true', help='Rebuild the dependecy graph and save it to a file') parser.add_argument('--node-type', help='Node type to look for', default='any', choices=['test', 'object', 'library', 'source', 'any']) parser.add_argument('--file-regex', help='Regular expression for file names to select as initial nodes for ' 'querying the dependency graph.') parser.add_argument('--file-name-glob', help='Like file-regex, but applies only to file name and uses the glob ' 'syntax instead of regex.') parser.add_argument('--git-diff', help='Figure out the list of files to use as starting points in the ' 'dependency graph traversal by diffing the current state of the code ' 'against this commit. This could also be anything that could be ' 'passed to "git diff" as a single argument.') parser.add_argument('--git-commit', help='Similar to --git-diff, but takes a git commit ref (e.g. sha1 or ' 'branch) and uses the set of files from that commit.') parser.add_argument('--build-root', required=True, help='E.g. <some_root>/build/debug-gcc-dynamic-community') parser.add_argument('command', choices=COMMANDS, help='Command to perform') parser.add_argument('--output-test-config', help='Output a "test configuration file", which is a JSON containing the ' 'resulting list of C++ tests to run to this file, a flag indicating ' 'wheter to run Java tests or not, etc.') parser.add_argument('--incomplete-build', action='store_true', help='Skip checking for file existence. Allows using the tool after ' 'build artifacts have been deleted.') args = parser.parse_args() if args.file_regex and args.file_name_glob: raise RuntimeError('--file-regex and --file-name-glob are incompatible') cmd = args.command if (not args.file_regex and not args.file_name_glob and not args.rebuild_graph and not args.git_diff and not args.git_commit and cmd != SELF_TEST_CMD): raise RuntimeError( "Neither of --file-regex, --file-name-glob, --git-{diff,commit}, or " "--rebuild-graph are specified, and the command is not " + SELF_TEST_CMD) log_level = logging.INFO logging.basicConfig( level=log_level, format="[%(filename)s:%(lineno)d] %(asctime)s %(levelname)s: %(message)s") conf = Configuration(args) if conf.file_regex and args.git_diff: raise RuntimeError( "--git-diff is incompatible with --file-{regex,name-glob}") if args.git_diff and args.git_commit: raise RuntimeError('--git-diff and --git-commit are incompatible') if args.git_commit: args.git_diff = "{}^..{}".format(args.git_commit, args.git_commit) graph_cache_path = os.path.join(args.build_root, 'dependency_graph.json') if args.rebuild_graph or not os.path.isfile(graph_cache_path): logging.info("Generating a dependency graph at '{}'".format(graph_cache_path)) dep_graph_builder = DependencyGraphBuilder(conf) dep_graph = dep_graph_builder.build() dep_graph.save_as_json(graph_cache_path) else: start_time = datetime.now() with open(graph_cache_path) as graph_input_file: dep_graph = DependencyGraph(conf, json_data=json.load(graph_input_file)) logging.info("Loaded dependency graph from '%s' in %.2f sec" % (graph_cache_path, (datetime.now() - start_time).total_seconds())) dep_graph.validate_node_existence() if cmd == SELF_TEST_CMD: run_self_test(dep_graph) return updated_categories = None file_changes = [] if args.git_diff: old_working_dir = os.getcwd() os.chdir(conf.yb_src_root) git_diff_output = subprocess.check_output( ['git', 'diff', args.git_diff, '--name-only']) initial_nodes = set() file_paths = set() for file_path in git_diff_output.split("\n"): file_path = file_path.strip() if not file_path: continue file_changes.append(file_path) # It is important that we invoke os.path.realpath with the current directory set to # the git repository root. file_path = os.path.realpath(file_path) file_paths.add(file_path) node = dep_graph.node_by_path.get(file_path) if node: initial_nodes.add(node) os.chdir(old_working_dir) if not initial_nodes: logging.warning("Did not find any graph nodes for this set of files: {}".format( file_paths)) for basename in set([os.path.basename(file_path) for file_path in file_paths]): logging.warning("Nodes for basename '{}': {}".format( basename, dep_graph.find_nodes_by_basename(basename))) file_changes_by_category = group_by(file_changes, get_file_category) for category, changes in file_changes_by_category.items(): logging.info("File changes in category '{}':".format(category)) for change in sorted(changes): logging.info(" {}".format(change)) updated_categories = set(file_changes_by_category.keys()) elif conf.file_regex: logging.info("Using file name regex: {}".format(conf.file_regex)) initial_nodes = dep_graph.find_nodes_by_regex(conf.file_regex) else: raise RuntimeError("Could not figure out how to generate the initial set of files") results = set() if cmd == LIST_AFFECTED_CMD: results = dep_graph.find_affected_nodes(initial_nodes, args.node_type) elif cmd == LIST_DEPS_CMD: for node in initial_nodes: results.update(node.deps) elif cmd == LIST_REVERSE_DEPS_CMD: for node in initial_nodes: results.update(node.reverse_deps) else: raise RuntimeError("Unimplemented command '{}'".format(command)) if args.output_test_config: test_basename_list = sorted( [os.path.basename(node.path) for node in results if node.node_type == 'test']) affected_basenames = set([os.path.basename(node.path) for node in results]) # These are ALL tests, not just tests affected by the changes in question, used mostly # for logging. all_test_programs = [node for node in dep_graph.get_nodes() if node.node_type == 'test'] all_test_basenames = set([os.path.basename(node.path) for node in all_test_programs]) # A very conservative way to decide whether to run all tests. If there are changes in any # categories (meaning the changeset is non-empty), and there are changes in categories other # than C++ / Java / files known not to affect unit tests, we force re-running all tests. unsafe_categories = updated_categories - CATEGORIES_NOT_CAUSING_RERUN_OF_ALL_TESTS user_said_all_tests = get_bool_env_var('YB_RUN_ALL_TESTS') run_all_tests = bool(unsafe_categories) or user_said_all_tests user_said_all_cpp_tests = get_bool_env_var('YB_RUN_ALL_CPP_TESTS') user_said_all_java_tests = get_bool_env_var('YB_RUN_ALL_JAVA_TESTS') cpp_files_changed = 'c++' in updated_categories java_files_changed = 'java' in updated_categories yb_master_or_tserver_changed = bool(affected_basenames & set(['yb-master', 'yb-tserver'])) run_cpp_tests = run_all_tests or cpp_files_changed or user_said_all_cpp_tests run_java_tests = ( run_all_tests or java_files_changed or yb_master_or_tserver_changed or user_said_all_java_tests ) if run_all_tests: if user_said_all_tests: logging.info("User explicitly specified that all tests should be run") else: logging.info( "All tests should be run based on file changes in these categories: {}".format( ', '.join(sorted(unsafe_categories)))) else: if run_cpp_tests: if user_said_all_cpp_tests: logging.info("User explicitly specified that all C++ tests should be run") else: logging.info('Will run some C++ tests, some C++ files changed') if run_java_tests: if user_said_all_java_tests: logging.info("User explicitly specified that all Java tests should be run") else: logging.info('Will run all Java tests, ' + ' and '.join( (['some Java files changed'] if java_files_changed else []) + (['yb-{master,tserver} binaries changed'] if yb_master_or_tserver_changed else []))) if run_cpp_tests and not test_basename_list and not run_all_tests: logging.info('There are no C++ test programs affected by the changes, ' 'will skip running C++ tests.') run_cpp_tests = False test_conf = dict( run_cpp_tests=run_cpp_tests, run_java_tests=run_java_tests, file_changes_by_category=file_changes_by_category ) if not run_all_tests: test_conf['cpp_test_programs'] = test_basename_list logging.info( "{} C++ test programs should be run (out of {} possible, {}%)".format( len(test_basename_list), len(all_test_basenames), "%.1f" % (100.0 * len(test_basename_list) / len(all_test_basenames)))) if len(test_basename_list) != len(all_test_basenames): logging.info("The following C++ test programs will be run: {}".format( ", ".join(sorted(test_basename_list)))) with open(args.output_test_config, 'w') as output_file: output_file.write(json.dumps(test_conf, indent=2) + "\n") logging.info("Wrote a test configuration to {}".format(args.output_test_config)) else: # For ad-hoc command-line use, mostly for testing and sanity-checking. for node in sorted(results, key=lambda node: [node.node_type, node.path]): print(node) logging.info("Found {} results".format(len(results)))