def _grouped_atoms(self, atoms, max_executors, timing_file_path, project_directory): """ Return atoms that are grouped for optimal CI performance. If a timing file exists, then use the TimeBasedAtomGrouper. If not, use the default AtomGrouper (groups each atom into its own subjob). :param atoms: all of the atoms to be run this time :type atoms: list[app.master.atom.Atom] :param max_executors: the maximum number of executors for this build :type max_executors: int :param timing_file_path: path to where the timing data file would be stored (if it exists) for this job :type timing_file_path: str :type project_directory: str :return: the grouped atoms (in the form of list of lists of strings) :rtype: list[list[app.master.atom.Atom]] """ atom_time_map = None if os.path.isfile(timing_file_path): with open(timing_file_path, 'r') as json_file: try: atom_time_map = json.load(json_file) except ValueError: self._logger.warning('Failed to load timing data from file that exists {}', timing_file_path) if atom_time_map is not None and len(atom_time_map) > 0: atom_grouper = TimeBasedAtomGrouper(atoms, max_executors, atom_time_map, project_directory) else: atom_grouper = AtomGrouper(atoms, max_executors) return atom_grouper.groupings()
def test_groupings_data_set_1(self): new_atoms = self._mock_atoms([ 'atom_1', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9', 'atom_10' ]) old_atoms_with_times = { 'atom_1': 1.0, 'atom_2': 10.0, 'atom_3': 11.0, 'atom_4': 2.0, 'atom_5': 10.0, 'atom_6': 5.0, 'atom_7': 2.0, 'atom_8': 8.0, 'atom_9': 10.0, 'atom_10': 3.0 } expected_groupings = [['atom_2', 'atom_3', 'atom_10'], ['atom_4', 'atom_5', 'atom_7', 'atom_9'], ['atom_8'], ['atom_6'], ['atom_1']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 2, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self._assert_subjobs_match_expected_groupings(subjobs, expected_groupings)
def test_groupings_data_set_1(self): new_atoms = [ 'atom_1', 'atom_2', 'atom_3', 'atom_4', 'atom_5', 'atom_6', 'atom_7', 'atom_8', 'atom_9', 'atom_10' ] old_atoms_with_times = dict({ 'atom_1': 1.0, 'atom_2': 10.0, 'atom_3': 11.0, 'atom_4': 2.0, 'atom_5': 10.0, 'atom_6': 5.0, 'atom_7': 2.0, 'atom_8': 8.0, 'atom_9': 10.0, 'atom_10': 3.0 }) expected_groupings = [['atom_2', 'atom_3', 'atom_10'], ['atom_4', 'atom_5', 'atom_7', 'atom_9'], ['atom_8'], ['atom_6'], ['atom_1']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 2, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self._assert_groupings_equal(expected_groupings, subjobs)
def _grouped_atoms(self, atoms, max_executors, timing_file_path, project_directory): """ Return atoms that are grouped for optimal CI performance. If a timing file exists, then use the TimeBasedAtomGrouper. If not, use the default AtomGrouper (groups each atom into its own subjob). :param atoms: all of the atoms to be run this time :type atoms: list[app.master.atom.Atom] :param max_executors: the maximum number of executors for this build :type max_executors: int :param timing_file_path: path to where the timing data file would be stored (if it exists) for this job :type timing_file_path: str :type project_directory: str :return: the grouped atoms (in the form of list of lists of strings) :rtype: list[list[app.master.atom.Atom]] """ atom_time_map = None if os.path.isfile(timing_file_path): with open(timing_file_path, 'r') as json_file: try: atom_time_map = json.load(json_file) except ValueError: self._logger.warning('Failed to load timing data from file that exists {}', timing_file_path) if atom_time_map is not None and len(atom_time_map) > 0: atom_grouper = TimeBasedAtomGrouper(atoms, max_executors, atom_time_map, project_directory) else: atom_grouper = AtomGrouper(atoms, max_executors) return atom_grouper.groupings()
def test_coalesce_new_atoms_with_no_atom_times(self): new_atoms = self._mock_atoms(['atom_1', 'atom_2', 'atom_3']) old_atoms_with_times = {} project_directory = 'some_project_directory' atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, project_directory) with self.assertRaises(_AtomTimingDataError): atom_grouper._set_expected_atom_times(new_atoms, old_atoms_with_times, project_directory)
def test_coalesce_new_atoms_with_no_atom_times(self): new_atoms = ['atom_1', 'atom_2', 'atom_3'] old_atoms_with_times = dict() project_directory = 'some_project_directory' atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, project_directory) with self.assertRaises(_AtomTimingDataError): atom_grouper._coalesce_new_atoms_with_historic_times(new_atoms, old_atoms_with_times, project_directory)
def test_grouping_makes_atoms_with_no_timing_as_separate_subjobs(self): new_atoms = self._mock_atoms(['atom_0', 'atom_1']) old_atoms_with_times = {} expected_number_of_subjobs = 2 atom_grouper = TimeBasedAtomGrouper(new_atoms, 2, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self.assertEquals(expected_number_of_subjobs, len(subjobs))
def test_coalesce_new_atoms_with_some_atom_times(self): new_atoms = ['atom_2', 'atom_3', 'atom_4', 'atom_5'] old_atoms_with_times = dict({'atom_1': 1.0, 'atom_2': 2.0, 'atom_3': 3.0}) expected_contents = {'atom_2': 2.0, 'atom_3': 3.0, 'atom_4': 3.0, 'atom_5': 3.0} atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, 'some_project_directory') groups, total_time = atom_grouper._coalesce_new_atoms_with_historic_times(new_atoms, old_atoms_with_times, 'some_project_directory') self.assertEquals(total_time, 11.0) self._assert_coalesced_contents(groups, expected_contents)
def test_coalesce_new_atoms_with_some_atom_times(self): new_atoms = self._mock_atoms(['atom_2', 'atom_3', 'atom_4', 'atom_5']) old_atoms_with_times = {'atom_1': 1.0, 'atom_2': 2.0, 'atom_3': 3.0} expected_contents = {'atom_2': 2.0, 'atom_3': 3.0, 'atom_4': 3.0, 'atom_5': 3.0} atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, 'some_project_directory') total_time = atom_grouper._set_expected_atom_times(new_atoms, old_atoms_with_times, 'some_project_directory') self.assertEquals(total_time, 11.0) self._assert_coalesced_contents(new_atoms, expected_contents)
def test_grouping_makes_atoms_with_no_timing_as_separate_subjobs(self): new_atoms = ['atom_0', 'atom_1'] old_atoms_with_times = {} expected_number_of_subjobs = 2 atom_grouper = TimeBasedAtomGrouper(new_atoms, 2, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self.assertEquals(expected_number_of_subjobs, len(subjobs))
def test_grouping_defaults_to_atom_grouper_when_no_timing_data_exists(self): num_atoms = 1000 max_executors = 2 new_atoms = self._mock_atoms(['atom_{}'.format(i) for i in range(num_atoms)]) old_atoms_with_times = {} atom_grouper = TimeBasedAtomGrouper(new_atoms, max_executors, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self.assertEquals(num_atoms, len(subjobs))
def test_coalesce_new_atoms_with_no_atom_times(self): new_atoms = ['atom_1', 'atom_2', 'atom_3'] old_atoms_with_times = dict() project_directory = 'some_project_directory' atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, project_directory) with self.assertRaises(_AtomTimingDataError): atom_grouper._coalesce_new_atoms_with_historic_times( new_atoms, old_atoms_with_times, project_directory)
def test_groupings_data_set_3(self): new_atoms = self._mock_atoms(['atom_1', 'atom_2', 'atom_3', 'atom_4', 'atom_5']) old_atoms_with_times = { 'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 50.0, 'atom_4': 2.0, 'atom_5': 2.0 } expected_groupings = [['atom_1'], ['atom_2'], ['atom_3'], ['atom_4', 'atom_5']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 2, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self._assert_subjobs_match_expected_groupings(subjobs, expected_groupings)
def test_groupings_data_set_3(self): new_atoms = ['atom_1', 'atom_2', 'atom_3', 'atom_4', 'atom_5'] old_atoms_with_times = dict({ 'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 50.0, 'atom_4': 2.0, 'atom_5': 2.0 }) expected_groupings = [['atom_1'], ['atom_2'], ['atom_3'], ['atom_4', 'atom_5']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 2, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self._assert_groupings_equal(expected_groupings, subjobs)
def test_coalesce_new_atoms_with_all_atom_times(self): new_atoms = self._mock_atoms(['atom_1', 'atom_2', 'atom_3']) old_atoms_with_times = {'atom_1': 1.0, 'atom_2': 2.0, 'atom_3': 3.0} expected_contents = {'atom_1': 1.0, 'atom_2': 2.0, 'atom_3': 3.0} atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, 'some_project_directory') total_time = atom_grouper._set_expected_atom_times( new_atoms, old_atoms_with_times, 'some_project_directory') self.assertEquals(total_time, 6.0) self._assert_coalesced_contents(new_atoms, expected_contents)
def test_coalesce_new_atoms_with_no_atom_times(self): new_atoms = self._mock_atoms(['atom_1', 'atom_2', 'atom_3']) old_atoms_with_times = {} project_directory = 'some_project_directory' atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, project_directory) with self.assertRaises(_AtomTimingDataError): atom_grouper._set_expected_atom_times(new_atoms, old_atoms_with_times, project_directory)
def test_grouping_defaults_to_atom_grouper_when_no_timing_data_exists( self): num_atoms = 1000 max_executors = 2 new_atoms = self._mock_atoms( ['atom_{}'.format(i) for i in range(num_atoms)]) old_atoms_with_times = {} atom_grouper = TimeBasedAtomGrouper(new_atoms, max_executors, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self.assertEquals(num_atoms, len(subjobs))
def test_coalesce_new_atoms_with_all_atom_times(self): new_atoms = ['atom_1', 'atom_2', 'atom_3'] old_atoms_with_times = dict({ 'atom_1': 1.0, 'atom_2': 2.0, 'atom_3': 3.0 }) expected_contents = {'atom_1': 1.0, 'atom_2': 2.0, 'atom_3': 3.0} atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, 'some_project_directory') groups, total_time = atom_grouper._coalesce_new_atoms_with_historic_times( new_atoms, old_atoms_with_times, 'some_project_directory') self.assertEquals(total_time, 6.0) self._assert_coalesced_contents(groups, expected_contents)
def test_groupings_maintains_project_directory_in_returned_atoms(self): new_atoms = self._mock_atoms([ '/var/clusterrunner/repos/scm/atom_1', '/var/clusterrunner/repos/scm/atom_2', '/var/clusterrunner/repos/scm/atom_3' ]) old_atoms_with_times = {'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 100.0} expected_groupings = [ ['/var/clusterrunner/repos/scm/atom_1'], ['/var/clusterrunner/repos/scm/atom_2'], ['/var/clusterrunner/repos/scm/atom_3'] ] atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, '/var/clusterrunner/repos') subjobs = atom_grouper.groupings() self._assert_subjobs_match_expected_groupings(subjobs, expected_groupings)
def test_groupings_maintains_project_directory_in_returned_atoms(self): new_atoms = [ '/var/clusterrunner/repos/scm/atom_1', '/var/clusterrunner/repos/scm/atom_2', '/var/clusterrunner/repos/scm/atom_3' ] old_atoms_with_times = dict({'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 100.0}) expected_groupings = [ ['/var/clusterrunner/repos/scm/atom_1'], ['/var/clusterrunner/repos/scm/atom_2'], ['/var/clusterrunner/repos/scm/atom_3'] ] atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, '/var/clusterrunner/repos') subjobs = atom_grouper.groupings() self._assert_groupings_equal(expected_groupings, subjobs)
def test_groupings_data_set_4(self): new_atoms = ['atom_1', 'atom_2', 'atom_3', 'atom_4', 'atom_5'] old_atoms_with_times = dict({ 'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 50.0, 'atom_4': 2.0, 'atom_5': 2.0 }) expected_groupings = [['atom_1'], ['atom_2'], ['atom_3'], ['atom_4'], ['atom_5']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 5, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self._assert_groupings_equal(expected_groupings, subjobs)
def test_groupings_maintains_project_directory_in_returned_atoms(self): new_atoms = [ '/var/clusterrunner/repos/scm/atom_1', '/var/clusterrunner/repos/scm/atom_2', '/var/clusterrunner/repos/scm/atom_3' ] old_atoms_with_times = dict({ 'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 100.0 }) expected_groupings = [['/var/clusterrunner/repos/scm/atom_1'], ['/var/clusterrunner/repos/scm/atom_2'], ['/var/clusterrunner/repos/scm/atom_3']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, '/var/clusterrunner/repos') subjobs = atom_grouper.groupings() self._assert_groupings_equal(expected_groupings, subjobs)
def test_groupings_data_set_4(self): new_atoms = self._mock_atoms( ['atom_1', 'atom_2', 'atom_3', 'atom_4', 'atom_5']) old_atoms_with_times = { 'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 50.0, 'atom_4': 2.0, 'atom_5': 2.0 } expected_groupings = [['atom_1'], ['atom_2'], ['atom_3'], ['atom_4'], ['atom_5']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 5, old_atoms_with_times, 'some_project_directory') subjobs = atom_grouper.groupings() self._assert_subjobs_match_expected_groupings(subjobs, expected_groupings)
def test_groupings_maintains_project_directory_in_returned_atoms(self): new_atoms = self._mock_atoms([ '/var/clusterrunner/repos/scm/atom_1', '/var/clusterrunner/repos/scm/atom_2', '/var/clusterrunner/repos/scm/atom_3' ]) old_atoms_with_times = { 'atom_1': 100.0, 'atom_2': 100.0, 'atom_3': 100.0 } expected_groupings = [['/var/clusterrunner/repos/scm/atom_1'], ['/var/clusterrunner/repos/scm/atom_2'], ['/var/clusterrunner/repos/scm/atom_3']] atom_grouper = TimeBasedAtomGrouper(new_atoms, 3, old_atoms_with_times, '/var/clusterrunner/repos') subjobs = atom_grouper.groupings() self._assert_subjobs_match_expected_groupings(subjobs, expected_groupings)