def run_all_combos(self, dirs): tests = list(self.generate_tests(dirs)) runtimes = self.get_runtimes(tests) for total in range(1, len(dirs) + 1): chunks = [] for this in range(1, total + 1): f = chunk_by_runtime(this, total, runtimes) ret = list(f(tests, {})) chunks.append(ret) # chunk_by_runtime will mess up order, but chained chunks should # contain all of the original tests and be the same length all_chunks = list(chain.from_iterable(chunks)) self.assertEqual(len(all_chunks), len(tests)) for t in tests: self.assertIn(t, all_chunks) # calculate delta between slowest and fastest chunks def runtime_delta(chunks): totals = [] for chunk in chunks: manifests = set([t['manifest_relpath'] for t in chunk]) total = sum(runtimes[m] for m in manifests) totals.append(total) return max(totals) - min(totals) delta = runtime_delta(chunks) # redo the chunking a second time using a round robin style # algorithm chunks = self.chunk_by_round_robin(tests, total, runtimes) # sanity check the round robin algorithm all_chunks = list(chain.from_iterable(chunks)) self.assertEqual(len(all_chunks), len(tests)) for t in tests: self.assertIn(t, all_chunks) # since chunks will never have exactly equal runtimes, it's hard # to tell if they were chunked optimally. Make sure it at least # beats a naive round robin approach. self.assertLessEqual(delta, runtime_delta(chunks))
def test_chunk_by_runtime(self): random.seed(42) chunk = chunk_by_runtime(1, 1, {}) self.assertEqual(list(chunk([], {})), []) dirs = { 'a': 2, } self.run_all_combos(dirs) dirs = { '': 1, 'foo': 1, 'bar': 0, '/foobar': 1, } self.run_all_combos(dirs) dirs = { 'a': 1, 'b': 1, 'a/b': 2, 'a/c': 1, } self.run_all_combos(dirs) dirs = { 'a': 5, 'a/b': 4, 'a/b/c': 7, 'a/b/c/d': 1, 'a/b/c/e': 3, 'b/c': 2, 'b/d': 5, 'b/d/e': 6, 'c': 8, 'c/d/e/f/g/h/i/j/k/l': 5, 'c/d/e/f/g/i/j/k/l/m/n': 2, 'c/e': 1, } self.run_all_combos(dirs)
def test_chunk_by_runtime(self): random.seed(42) chunk = chunk_by_runtime(1, 1, {}) self.assertEqual(list(chunk([], {})), []) dirs = { "a": 2, } self.run_all_combos(dirs) dirs = { "": 1, "foo": 1, "bar": 0, "/foobar": 1, } self.run_all_combos(dirs) dirs = { "a": 1, "b": 1, "a/b": 2, "a/c": 1, } self.run_all_combos(dirs) dirs = { "a": 5, "a/b": 4, "a/b/c": 7, "a/b/c/d": 1, "a/b/c/e": 3, "b/c": 2, "b/d": 5, "b/d/e": 6, "c": 8, "c/d/e/f/g/h/i/j/k/l": 5, "c/d/e/f/g/i/j/k/l/m/n": 2, "c/e": 1, } self.run_all_combos(dirs)
def get_chunked_manifests(flavor, subsuite, chunks, mozinfo): """Compute which manifests should run in which chunks with the given category of tests. Args: flavor (str): The suite to run. Values are defined by the 'build_flavor' key in `moztest.resolve.TEST_SUITES`. subsuite (str): The subsuite to run or 'undefined' to denote no subsuite. chunks (int): Number of chunks to split manifests across. mozinfo (frozenset): Set of data in the form of (<key>, <value>) used for filtering. Returns: A list of manifests where each item contains the manifest that should run in the corresponding chunk. """ mozinfo = dict(mozinfo) # Compute all tests for the given suite/subsuite. tests = get_tests(flavor, subsuite) all_manifests = set(t['manifest_relpath'] for t in tests) # Compute only the active tests. m = TestManifest() m.tests = tests tests = m.active_tests(disabled=False, exists=False, **mozinfo) active_manifests = set(t['manifest_relpath'] for t in tests) # Run the chunking algorithm. chunked_manifests = [ c[1] for c in chunk_by_runtime( None, chunks, get_runtimes(mozinfo['os']) ).get_chunked_manifests(tests) ] # Add all skipped manifests to the first chunk so they still show up in the # logs. They won't impact runtime much. skipped_manifests = all_manifests - active_manifests chunked_manifests[0].extend(skipped_manifests) return chunked_manifests
def chunk_manifests(suite, platform, chunks, manifests): """Run the chunking algorithm. Args: platform (str): Platform used to find runtime info. chunks (int): Number of chunks to split manifests into. manifests(list): Manifests to chunk. Returns: A list of length `chunks` where each item contains a list of manifests that run in that chunk. """ manifests = set(manifests) if "web-platform-tests" not in suite: runtimes = { k: v for k, v in get_runtimes(platform, suite).items() if k in manifests } return [ c[1] for c in chunk_by_runtime( None, chunks, runtimes).get_chunked_manifests(manifests) ] # Keep track of test paths for each chunk, and the runtime information. chunked_manifests = [[] for _ in range(chunks)] # Spread out the test manifests evenly across all chunks. for index, key in enumerate(sorted(manifests)): chunked_manifests[index % chunks].append(key) # One last sort by the number of manifests. Chunk size should be more or less # equal in size. chunked_manifests.sort(key=lambda x: len(x)) # Return just the chunked test paths. return chunked_manifests
def run_all_combos(self, dirs): tests = list(self.generate_tests(dirs)) runtimes = self.get_runtimes(tests) for total in range(1, len(dirs) + 1): chunks = [] for this in range(1, total + 1): f = chunk_by_runtime(this, total, runtimes) ret = list(f(tests, {})) chunks.append(ret) # chunk_by_runtime will mess up order, but chained chunks should # contain all of the original tests and be the same length all_chunks = list(chain.from_iterable(chunks)) self.assertEqual(len(all_chunks), len(tests)) for t in tests: self.assertIn(t, all_chunks) # calculate delta between slowest and fastest chunks def runtime_delta(chunks): totals = [] for chunk in chunks: total = sum(runtimes[t['relpath']] for t in chunk if 'disabled' not in t) totals.append(total) return max(totals) - min(totals) delta = runtime_delta(chunks) # redo the chunking a second time using a round robin style # algorithm chunks = self.chunk_by_round_robin(tests, runtimes) # since chunks will never have exactly equal runtimes, it's hard # to tell if they were chunked optimally. Make sure it at least # beats a naive round robin approach. self.assertLessEqual(delta, runtime_delta(chunks))
def chunk_manifests(flavor, subsuite, platform, chunks, manifests): """Run the chunking algorithm. Args: platform (str): Platform used to find runtime info. chunks (int): Number of chunks to split manifests into. manifests(list): Manifests to chunk. Returns: A list of length `chunks` where each item contains a list of manifests that run in that chunk. """ # Obtain the suite definition given the flavor and subsuite which often # do not perfectly map onto the actual suite name in taskgraph. # This value will be used to retrive runtime information for that suite. suite_name, _ = get_suite_definition(flavor, subsuite) runtimes = get_runtimes(platform, suite_name) if flavor != "web-platform-tests": return [ c[1] for c in chunk_by_runtime( None, chunks, runtimes).get_chunked_manifests(manifests) ] paths = {k: v for k, v in wpt_group_translation.items() if k in manifests} # Python2 does not support native dictionary sorting, so use an OrderedDict # instead, appending in order of highest to lowest runtime. runtimes = OrderedDict( sorted(runtimes.items(), key=lambda x: x[1], reverse=True)) # Keep track of test paths for each chunk, and the runtime information. chunked_manifests = [[[], 0] for _ in range(chunks)] # Begin chunking the test paths in order from highest running time to lowest. # The keys of runtimes dictionary should match the keys of the test paths # dictionary. for key, rt in runtimes.items(): # Sort the chunks from fastest to slowest, based on runtime info # at x[1], then the number of test paths. chunked_manifests.sort(key=lambda x: (x[1], len(x[0]))) # Look up if there are any test paths under the key in the paths dict. test_paths = paths[key] if test_paths: # Add the full set of paths that live under the key and increase the # total runtime counter by the value reported in runtimes. chunked_manifests[0][0].extend(test_paths) # chunked_manifests[0][0].append(key) chunked_manifests[0][1] += rt # Remove the key and test_paths since it has been scheduled. paths.pop(key) # Same goes for the value in runtimes dict. runtimes.pop(key) # Sort again prior to the next step. chunked_manifests.sort(key=lambda x: (x[1], len(x[0]))) # Spread out the remaining test paths that were not scheduled in the previous # step. Such paths did not have runtime information associated, likely due to # implementation status. for index, key in enumerate(paths.keys()): # Append both the key and value in case the value is empty. chunked_manifests[index % chunks][0].append(key) # One last sort by the runtime, then number of test paths. chunked_manifests.sort(key=lambda x: (x[1], len(x[0]))) # Return just the chunked test paths. return [c[0] for c in chunked_manifests]
def get_chunked_manifests(flavor, subsuite, chunks, mozinfo): """Compute which manifests should run in which chunks with the given category of tests. Args: flavor (str): The suite to run. Values are defined by the 'build_flavor' key in `moztest.resolve.TEST_SUITES`. subsuite (str): The subsuite to run or 'undefined' to denote no subsuite. chunks (int): Number of chunks to split manifests across. mozinfo (frozenset): Set of data in the form of (<key>, <value>) used for filtering. Returns: A list of manifests where each item contains the manifest that should run in the corresponding chunk. """ mozinfo = dict(mozinfo) # Compute all tests for the given suite/subsuite. tests = get_tests(flavor, subsuite) if flavor == 'web-platform-tests': paths = tests_by_top_directory(tests, 3) # Filter out non-web-platform-test runtime information. runtimes = get_runtimes(mozinfo['os']) runtimes = [(k, v) for k, v in runtimes.items() if k.startswith('/') and not os.path.splitext(k)[-1]] # Keep track of test paths for each chunk, and the runtime information. chunked_manifests = [[[], 0] for _ in range(chunks)] # First, chunk tests that have runtime information available. for key, rt in sorted(runtimes, key=lambda x: x[1], reverse=True): # Sort the chunks from fastest to slowest, based on runtimme info # at x[1], then the number of test paths. chunked_manifests.sort(key=lambda x: (x[1], len(x[0]))) test_paths = set(paths[key]) if test_paths: # The full set of paths that live under the key must be added # to the chunks. # The smart scheduling algorithm uses paths of up to depth of 3 # as generated by the WPT harness and adding the set of paths will # enable the scheduling algorithm to evaluate if the given path # should be scheduled or not. chunked_manifests[0][0].extend(test_paths) chunked_manifests[0][1] += rt # Remove from the list of paths that need scheduling. paths.pop(key) # Chunk remaining test paths that were not chunked in the previous step. # These are test paths that did not have runtime information available. for test_paths in paths.values(): chunked_manifests.sort(key=lambda x: (x[1], len(x[0]))) chunked_manifests[0][0].extend(set(test_paths)) # One last sort by the runtime, then number of test paths. chunked_manifests.sort(key=lambda x: (x[1], len(x[0]))) # Reassign variable to contain just the chunked test paths. chunked_manifests = [c[0] for c in chunked_manifests] else: chunker = chunk_by_runtime(None, chunks, get_runtimes(mozinfo['os'])) all_manifests = set(chunker.get_manifest(t) for t in tests) # Compute only the active tests. m = TestManifest() m.tests = tests tests = m.active_tests(disabled=False, exists=False, **mozinfo) active_manifests = set(chunker.get_manifest(t) for t in tests) # Run the chunking algorithm. chunked_manifests = [ c[1] for c in chunker.get_chunked_manifests(active_manifests) ] # Add all skipped manifests to the first chunk so they still show up in the # logs. They won't impact runtime much. skipped_manifests = all_manifests - active_manifests chunked_manifests[0].extend(skipped_manifests) return chunked_manifests