def collect_cpp_tests(max_tests, cpp_test_program_filter, cpp_test_program_re_str): """ Collect C++ test programs to run. @param max_tests: maximum number of tests to run. Used in debugging. @param cpp_test_program_filter: a collection of C++ test program names to be used as a filter @param cpp_test_program_re_str: a regular expression string to be used as a filter for the set of C++ test programs. """ global_conf = yb_dist_tests.global_conf logging.info("Collecting the list of C++ test programs") start_time_sec = time.time() ctest_cmd_result = command_util.run_program([ '/bin/bash', '-c', 'cd "{}" && YB_LIST_CTEST_TESTS_ONLY=1 ctest -j8 --verbose'.format( global_conf.build_root) ]) test_programs = [] for line in ctest_cmd_result.stdout.split("\n"): re_match = CTEST_TEST_PROGRAM_RE.match(line) if re_match: rel_ctest_prog_path = os.path.relpath(re_match.group(1), global_conf.build_root) test_programs.append(rel_ctest_prog_path) test_programs = sorted(set(test_programs)) elapsed_time_sec = time.time() - start_time_sec logging.info("Collected %d test programs in %.2f sec" % (len(test_programs), elapsed_time_sec)) if cpp_test_program_re_str: cpp_test_program_re = re.compile(cpp_test_program_re_str) test_programs = [ test_program for test_program in test_programs if cpp_test_program_re.search(test_program) ] logging.info( "Filtered down to %d test programs using regular expression '%s'" % (len(test_programs), cpp_test_program_re_str)) if cpp_test_program_filter: cpp_test_program_filter = set(cpp_test_program_filter) unfiltered_test_programs = test_programs # test_program contains test paths relative to the root directory (including directory # names), and cpp_test_program_filter contains basenames only. test_programs = sorted( set([ test_program for test_program in test_programs if os.path.basename(test_program) in cpp_test_program_filter ])) logging.info( "Filtered down to %d test programs using the list from test conf file" % len(test_programs)) if unfiltered_test_programs and not test_programs: # This means we've filtered the list of C++ test programs down to an empty set. logging.info(( "NO MATCHING C++ TEST PROGRAMS FOUND! Test programs from conf file: {}, " "collected from ctest before filtering: {}").format( set_to_comma_sep_str(cpp_test_program_filter), set_to_comma_sep_str(unfiltered_test_programs))) if max_tests and len(test_programs) > max_tests: logging.info( "Randomly selecting {} test programs out of {} possible".format( max_tests, len(test_programs))) random.shuffle(test_programs) test_programs = test_programs[:max_tests] if not test_programs: logging.info("Found no test programs") return [] fine_granularity_gtest_programs = [] one_shot_test_programs = [] for test_program in test_programs: if is_one_shot_test(test_program): one_shot_test_programs.append(test_program) else: fine_granularity_gtest_programs.append(test_program) logging.info( ("Found {} gtest test programs where tests will be run separately, " "{} test programs to be run on one shot").format( len(fine_granularity_gtest_programs), len(one_shot_test_programs))) test_programs = fine_granularity_gtest_programs logging.info( "Collecting gtest tests for {} test programs where tests will be run separately" .format(len(test_programs))) start_time_sec = time.time() all_test_programs = fine_granularity_gtest_programs + one_shot_test_programs if len(all_test_programs) <= 5: app_name_details = [ 'test programs: [{}]'.format(', '.join(all_test_programs)) ] else: app_name_details = ['{} test programs'.format(len(all_test_programs))] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 all_test_descriptor_lists = run_spark_action( lambda: spark_context.parallelize(test_programs, numSlices=num_slices). map(parallel_list_test_descriptors).collect()) elapsed_time_sec = time.time() - start_time_sec test_descriptor_strs = one_shot_test_programs + [ test_descriptor_str for test_descriptor_str_list in all_test_descriptor_lists for test_descriptor_str in test_descriptor_str_list ] logging.info("Collected the list of %d gtest tests in %.2f sec" % (len(test_descriptor_strs), elapsed_time_sec)) return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs]
def collect_cpp_tests(max_tests, cpp_test_program_filter, cpp_test_program_re_str): """ Collect C++ test programs to run. @param max_tests: maximum number of tests to run. Used in debugging. @param cpp_test_program_filter: a collection of C++ test program names to be used as a filter @param cpp_test_program_re_str: a regular expression string to be used as a filter for the set of C++ test programs. """ global_conf = yb_dist_tests.global_conf logging.info("Collecting the list of C++ test programs") start_time_sec = time.time() ctest_cmd_result = command_util.run_program( ['/bin/bash', '-c', 'cd "{}" && YB_LIST_CTEST_TESTS_ONLY=1 ctest -j8 --verbose'.format( global_conf.build_root)]) test_programs = [] for line in ctest_cmd_result.stdout.split("\n"): re_match = CTEST_TEST_PROGRAM_RE.match(line) if re_match: rel_ctest_prog_path = os.path.relpath(re_match.group(1), global_conf.build_root) test_programs.append(rel_ctest_prog_path) test_programs = sorted(set(test_programs)) elapsed_time_sec = time.time() - start_time_sec logging.info("Collected %d test programs in %.2f sec" % ( len(test_programs), elapsed_time_sec)) if cpp_test_program_re_str: cpp_test_program_re = re.compile(cpp_test_program_re_str) test_programs = [test_program for test_program in test_programs if cpp_test_program_re.search(test_program)] logging.info("Filtered down to %d test programs using regular expression '%s'" % (len(test_programs), cpp_test_program_re_str)) if cpp_test_program_filter: cpp_test_program_filter = set(cpp_test_program_filter) unfiltered_test_programs = test_programs # test_program contains test paths relative to the root directory (including directory # names), and cpp_test_program_filter contains basenames only. test_programs = sorted(set([ test_program for test_program in test_programs if os.path.basename(test_program) in cpp_test_program_filter ])) logging.info("Filtered down to %d test programs using the list from test conf file" % len(test_programs)) if unfiltered_test_programs and not test_programs: # This means we've filtered the list of C++ test programs down to an empty set. logging.info( ("NO MATCHING C++ TEST PROGRAMS FOUND! Test programs from conf file: {}, " "collected from ctest before filtering: {}").format( set_to_comma_sep_str(cpp_test_program_filter), set_to_comma_sep_str(unfiltered_test_programs))) if max_tests and len(test_programs) > max_tests: logging.info("Randomly selecting {} test programs out of {} possible".format( max_tests, len(test_programs))) random.shuffle(test_programs) test_programs = test_programs[:max_tests] if not test_programs: logging.info("Found no test programs") return [] fine_granularity_gtest_programs = [] one_shot_test_programs = [] for test_program in test_programs: if is_one_shot_test(test_program): one_shot_test_programs.append(test_program) else: fine_granularity_gtest_programs.append(test_program) logging.info(("Found {} gtest test programs where tests will be run separately, " "{} test programs to be run on one shot").format( len(fine_granularity_gtest_programs), len(one_shot_test_programs))) test_programs = fine_granularity_gtest_programs logging.info( "Collecting gtest tests for {} test programs where tests will be run separately".format( len(test_programs))) start_time_sec = time.time() all_test_programs = fine_granularity_gtest_programs + one_shot_test_programs if len(all_test_programs) <= 5: app_name_details = ['test programs: [{}]'.format(', '.join(all_test_programs))] else: app_name_details = ['{} test programs'.format(len(all_test_programs))] init_spark_context(app_name_details) set_global_conf_for_spark_jobs() # Use fewer "slices" (tasks) than there are test programs, in hope to get some batching. num_slices = (len(test_programs) + 1) / 2 all_test_descriptor_lists = run_spark_action( lambda: spark_context.parallelize( test_programs, numSlices=num_slices).map(parallel_list_test_descriptors).collect() ) elapsed_time_sec = time.time() - start_time_sec test_descriptor_strs = one_shot_test_programs + [ test_descriptor_str for test_descriptor_str_list in all_test_descriptor_lists for test_descriptor_str in test_descriptor_str_list] logging.info("Collected the list of %d gtest tests in %.2f sec" % ( len(test_descriptor_strs), elapsed_time_sec)) return [yb_dist_tests.TestDescriptor(s) for s in test_descriptor_strs]