def _LockAllMachines(self, experiment): """Attempt to globally lock all of the machines requested for run. This method will use the AFE server to globally lock all of the machines requested for this crosperf run, to prevent any other crosperf runs from being able to update/use the machines while this experiment is running. """ if test_flag.GetTestMode(): self.locked_machines = self._GetMachineList() self._experiment.locked_machines = self.locked_machines else: lock_mgr = afe_lock_machine.AFELockManager( self._GetMachineList(), '', experiment.labels[0].chromeos_root, None, log=self.l, ) for m in lock_mgr.machines: if not lock_mgr.MachineIsKnown(m): lock_mgr.AddLocalMachine(m) machine_states = lock_mgr.GetMachineStates('lock') lock_mgr.CheckMachineLocks(machine_states, 'lock') self.locked_machines = lock_mgr.UpdateMachines(True) self._experiment.locked_machines = self.locked_machines self._UpdateMachineList(self.locked_machines) self._experiment.machine_manager.RemoveNonLockedMachines( self.locked_machines) if len(self.locked_machines) == 0: raise RuntimeError('Unable to lock any machines.')
def Run(self, chromeos_root, xbuddy_label, autotest_path): build_id = self.GetBuildID(chromeos_root, xbuddy_label) image_name = ( 'gs://chromeos-image-archive/%s/chromiumos_test_image.tar.xz' % build_id) # Verify that image exists for build_id, before attempting to # download it. status = 0 if not test_flag.GetTestMode(): gsutil_cmd = os.path.join(chromeos_root, GS_UTIL) cmd = '%s ls %s' % (gsutil_cmd, image_name) status = self._ce.RunCommand(cmd) if status != 0: raise MissingImage('Cannot find official image: %s.' % image_name) image_path = self.DownloadImage(chromeos_root, build_id, image_name) self.UncompressImage(chromeos_root, build_id) if self.log_level != 'quiet': self._logger.LogOutput('Using image from %s.' % image_path) if autotest_path == '': autotest_path = self.DownloadAutotestFiles(chromeos_root, build_id) return image_path, autotest_path
def test_test_flag(self): # Verify that test_flag.is_test exists, that it is a list, # and that it contains 1 element. self.assertTrue(type(test_flag.is_test) is list) self.assertEqual(len(test_flag.is_test), 1) # Verify that the getting the flag works and that the flag # contains False, its starting value. save_flag = test_flag.GetTestMode() self.assertFalse(save_flag) # Verify that setting the flat to True, then getting it, works. test_flag.SetTestMode(True) self.assertTrue(test_flag.GetTestMode()) # Verify that setting the flag to False, then getting it, works. test_flag.SetTestMode(save_flag) self.assertFalse(test_flag.GetTestMode()) # Verify that test_flag.is_test still exists, that it still is a # list, and that it still contains 1 element. self.assertTrue(type(test_flag.is_test) is list) self.assertEqual(len(test_flag.is_test), 1)
def StoreToCacheDir(self, cache_dir, machine_manager, key_list): # Create the dir if it doesn't exist. temp_dir = tempfile.mkdtemp() # Store to the temp directory. with open(os.path.join(temp_dir, RESULTS_FILE), 'w') as f: pickle.dump(self.out, f) pickle.dump(self.err, f) pickle.dump(self.retval, f) if not test_flag.GetTestMode(): with open(os.path.join(temp_dir, CACHE_KEYS_FILE), 'w') as f: f.write('%s\n' % self.label.name) f.write('%s\n' % self.label.chrome_version) f.write('%s\n' % self.machine.checksum_string) for k in key_list: f.write(k) f.write('\n') if self.results_dir: tarball = os.path.join(temp_dir, AUTOTEST_TARBALL) command = ('cd %s && ' 'tar ' '--exclude=var/spool ' '--exclude=var/log ' '-cjf %s .' % (self.results_dir, tarball)) ret = self.ce.RunCommand(command) if ret: raise RuntimeError("Couldn't store autotest output directory.") # Store machine info. # TODO(asharif): Make machine_manager a singleton, and don't pass it into # this function. with open(os.path.join(temp_dir, MACHINE_FILE), 'w') as f: f.write(machine_manager.machine_checksum_string[self.label.name]) if os.path.exists(cache_dir): command = 'rm -rf {0}'.format(cache_dir) self.ce.RunCommand(command) command = 'mkdir -p {0} && '.format(os.path.dirname(cache_dir)) command += 'chmod g+x {0} && '.format(temp_dir) command += 'mv {0} {1}'.format(temp_dir, cache_dir) ret = self.ce.RunCommand(command) if ret: command = 'rm -rf {0}'.format(temp_dir) self.ce.RunCommand(command) raise RuntimeError('Could not move dir %s to dir %s' % (temp_dir, cache_dir))
def VerifyAutotestFilesExist(self, chromeos_root, build_id, package_file): # Quickly verify if the files are there status = 0 gs_package_name = ('gs://chromeos-image-archive/%s/%s' % (build_id, package_file)) gsutil_cmd = os.path.join(chromeos_root, GS_UTIL) if not test_flag.GetTestMode(): cmd = '%s ls %s' % (gsutil_cmd, gs_package_name) if self.log_level != 'verbose': self._logger.LogOutput('CMD: %s' % cmd) status = self._ce.RunCommand(cmd) if status != 0: print('(Warning: Could not find file %s )' % gs_package_name) return 1 # Package exists on server return 0
def _UnlockAllMachines(self, experiment): """Attempt to globally unlock all of the machines requested for run. The method will use the AFE server to globally unlock all of the machines requested for this crosperf run. """ if not self.locked_machines or test_flag.GetTestMode(): return lock_mgr = afe_lock_machine.AFELockManager( self.locked_machines, '', experiment.labels[0].chromeos_root, None, log=self.l, ) machine_states = lock_mgr.GetMachineStates('unlock') lock_mgr.CheckMachineLocks(machine_states, 'unlock') lock_mgr.UpdateMachines(False)
def Cleanup(self): """Make sure all machines are unlocked.""" if self.locks_dir: # We are using the file locks mechanism, so call machine_manager.Cleanup # to unlock everything. self.machine_manager.Cleanup() else: if test_flag.GetTestMode(): return all_machines = self.locked_machines if not all_machines: return # If we locked any machines earlier, make sure we unlock them now. lock_mgr = afe_lock_machine.AFELockManager( all_machines, '', self.labels[0].chromeos_root, None) machine_states = lock_mgr.GetMachineStates('unlock') for k, state in machine_states.iteritems(): if state['locked']: lock_mgr.UpdateLockInAFE(False, k)
def Telemetry_Run(self, machine, label, benchmark, profiler_args): telemetry_run_path = '' if not os.path.isdir(label.chrome_src): self.logger.LogFatal('Cannot find chrome src dir to' ' run telemetry.') else: telemetry_run_path = os.path.join(label.chrome_src, 'src/tools/perf') if not os.path.exists(telemetry_run_path): self.logger.LogFatal('Cannot find %s directory.' % telemetry_run_path) if profiler_args: self.logger.LogFatal( 'Telemetry does not support the perf profiler.') # Check for and remove temporary file that may have been left by # previous telemetry runs (and which might prevent this run from # working). if not test_flag.GetTestMode(): self.RemoveTelemetryTempFile(machine, label.chromeos_root) rsa_key = os.path.join( label.chromeos_root, 'src/scripts/mod_for_test_scripts/ssh_keys/testing_rsa') cmd = ('cd {0} && ' './run_measurement ' '--browser=cros-chrome ' '--output-format=csv ' '--remote={1} ' '--identity {2} ' '{3} {4}'.format(telemetry_run_path, machine, rsa_key, benchmark.test_name, benchmark.test_args)) if self.log_level != 'verbose': self.logger.LogOutput('Running test.') self.logger.LogOutput('CMD: %s' % cmd) return self._ce.RunCommandWOutput(cmd, print_to_console=False)
def __init__(self, experiment): self._experiment = experiment self._logger = logger.GetLogger(experiment.log_dir) # Create shortcuts to nested data structure. "_duts" points to a list of # locked machines. _labels points to a list of all labels. self._duts = self._experiment.machine_manager.GetMachines() self._labels = self._experiment.labels # Bookkeeping for synchronization. self._workers_lock = Lock() # pylint: disable=unnecessary-lambda self._lock_map = defaultdict(lambda: Lock()) # Test mode flag self._in_test_mode = test_flag.GetTestMode() # Read benchmarkrun cache. self._read_br_cache() # Mapping from label to a list of benchmark_runs. self._label_brl_map = dict((l, []) for l in self._labels) for br in self._experiment.benchmark_runs: assert br.label in self._label_brl_map # Only put no-cache-hit br into the map. if br not in self._cached_br_list: self._label_brl_map[br.label].append(br) # Use machine image manager to calculate initial label allocation. self._mim = MachineImageManager(self._labels, self._duts) self._mim.compute_initial_allocation() # Create worker thread, 1 per dut. self._active_workers = [DutWorker(dut, self) for dut in self._duts] self._finished_workers = [] # Termination flag. self._terminated = False
def DownloadSingleAutotestFile(self, chromeos_root, build_id, package_file_name): # Verify if package files exist status = 0 gs_package_name = ('gs://chromeos-image-archive/%s/%s' % (build_id, package_file_name)) gsutil_cmd = os.path.join(chromeos_root, GS_UTIL) if not test_flag.GetTestMode(): cmd = '%s ls %s' % (gsutil_cmd, gs_package_name) status = self._ce.RunCommand(cmd) if status != 0: raise MissingFile('Cannot find autotest package file: %s.' % package_file_name) if self.log_level == 'average': self._logger.LogOutput('Preparing to download %s package to local ' 'directory.' % package_file_name) # Make sure the directory for downloading the package exists. download_path = os.path.join(chromeos_root, 'chroot/tmp', build_id) package_path = os.path.join(download_path, package_file_name) if not os.path.exists(download_path): os.makedirs(download_path) # Check to see if the package file has already been downloaded. If not, # download it. if not os.path.exists(package_path): command = '%s cp %s %s' % (gsutil_cmd, gs_package_name, download_path) if self.log_level != 'verbose': self._logger.LogOutput('CMD: %s' % command) status = self._ce.RunCommand(command) if status != 0 or not os.path.exists(package_path): raise MissingFile('Cannot download package: %s .' % package_path)
def ImageMachine(self, machine, label): checksum = label.checksum if checksum and (machine.checksum == checksum): return chromeos_root = label.chromeos_root if not chromeos_root: chromeos_root = self.chromeos_root image_chromeos_args = [ image_chromeos.__file__, '--no_lock', '--chromeos_root=%s' % chromeos_root, '--image=%s' % label.chromeos_image, '--image_args=%s' % label.image_args, '--remote=%s' % machine.name, '--logging_level=%s' % self.log_level ] if label.board: image_chromeos_args.append('--board=%s' % label.board) # Currently can't image two machines at once. # So have to serialized on this lock. save_ce_log_level = self.ce.log_level if self.log_level != 'verbose': self.ce.log_level = 'average' with self.image_lock: if self.log_level != 'verbose': self.logger.LogOutput('Pushing image onto machine.') self.logger.LogOutput( 'Running image_chromeos.DoImage with %s' % ' '.join(image_chromeos_args)) retval = 0 if not test_flag.GetTestMode(): retval = image_chromeos.DoImage(image_chromeos_args) if retval: cmd = 'reboot && exit' if self.log_level != 'verbose': self.logger.LogOutput('reboot & exit.') self.ce.CrosRunCommand(cmd, machine=machine.name, chromeos_root=self.chromeos_root) time.sleep(60) if self.log_level != 'verbose': self.logger.LogOutput('Pushing image onto machine.') self.logger.LogOutput( 'Running image_chromeos.DoImage with %s' % ' '.join(image_chromeos_args)) retval = image_chromeos.DoImage(image_chromeos_args) if retval: raise RuntimeError("Could not image machine: '%s'." % machine.name) else: self.num_reimages += 1 machine.checksum = checksum machine.image = label.chromeos_image machine.label = label if not label.chrome_version: label.chrome_version = self.GetChromeVersion(machine) self.ce.log_level = save_ce_log_level return retval
def __init__(self, name, remote, working_directory, chromeos_root, cache_conditions, labels, benchmarks, experiment_file, email_to, acquire_timeout, log_dir, log_level, share_cache, results_directory, locks_directory): self.name = name self.working_directory = working_directory self.remote = remote self.chromeos_root = chromeos_root self.cache_conditions = cache_conditions self.experiment_file = experiment_file self.email_to = email_to if not results_directory: self.results_directory = os.path.join(self.working_directory, self.name + '_results') else: self.results_directory = misc.CanonicalizePath(results_directory) self.log_dir = log_dir self.log_level = log_level self.labels = labels self.benchmarks = benchmarks self.num_complete = 0 self.num_run_complete = 0 self.share_cache = share_cache self.active_threads = [] # If locks_directory (self.lock_dir) not blank, we will use the file # locking mechanism; if it is blank then we will use the AFE server # locking mechanism. self.locks_dir = locks_directory self.locked_machines = [] if not remote: raise RuntimeError('No remote hosts specified') if not self.benchmarks: raise RuntimeError('No benchmarks specified') if not self.labels: raise RuntimeError('No labels specified') # We need one chromeos_root to run the benchmarks in, but it doesn't # matter where it is, unless the ABIs are different. if not chromeos_root: for label in self.labels: if label.chromeos_root: chromeos_root = label.chromeos_root break if not chromeos_root: raise RuntimeError('No chromeos_root given and could not determine ' 'one from the image path.') machine_manager_fn = MachineManager if test_flag.GetTestMode(): machine_manager_fn = MockMachineManager self.machine_manager = machine_manager_fn(chromeos_root, acquire_timeout, log_level, locks_directory) self.l = logger.GetLogger(log_dir) for machine in self.remote: # machine_manager.AddMachine only adds reachable machines. self.machine_manager.AddMachine(machine) # Now machine_manager._all_machines contains a list of reachable # machines. This is a subset of self.remote. We make both lists the same. self.remote = [m.name for m in self.machine_manager.GetAllMachines()] if not self.remote: raise RuntimeError('No machine available for running experiment.') for label in labels: # We filter out label remotes that are not reachable (not in # self.remote). So each label.remote is a sublist of experiment.remote. label.remote = [r for r in label.remote if r in self.remote] try: self.machine_manager.ComputeCommonCheckSum(label) except BadChecksum: # Force same image on all machines, then we do checksum again. No # bailout if checksums still do not match. self.machine_manager.ForceSameImageToAllMachines(label) self.machine_manager.ComputeCommonCheckSum(label) self.machine_manager.ComputeCommonCheckSumString(label) self.start_time = None self.benchmark_runs = self._GenerateBenchmarkRuns() self._schedv2 = None self._internal_counter_lock = Lock()
def GetExperiment(self, experiment_file, working_directory, log_dir): """Construct an experiment from an experiment file.""" global_settings = experiment_file.GetGlobalSettings() experiment_name = global_settings.GetField('name') board = global_settings.GetField('board') remote = global_settings.GetField('remote') # This is used to remove the ",' from the remote if user # add them to the remote string. new_remote = [] if remote: for i in remote: c = re.sub('["\']', '', i) new_remote.append(c) remote = new_remote chromeos_root = global_settings.GetField('chromeos_root') rm_chroot_tmp = global_settings.GetField('rm_chroot_tmp') perf_args = global_settings.GetField('perf_args') acquire_timeout = global_settings.GetField('acquire_timeout') cache_dir = global_settings.GetField('cache_dir') cache_only = global_settings.GetField('cache_only') config.AddConfig('no_email', global_settings.GetField('no_email')) share_cache = global_settings.GetField('share_cache') results_dir = global_settings.GetField('results_dir') use_file_locks = global_settings.GetField('use_file_locks') locks_dir = global_settings.GetField('locks_dir') # If we pass a blank locks_dir to the Experiment, it will use the AFE server # lock mechanism. So if the user specified use_file_locks, but did not # specify a locks dir, set the locks dir to the default locks dir in # file_lock_machine. if use_file_locks and not locks_dir: locks_dir = file_lock_machine.Machine.LOCKS_DIR chrome_src = global_settings.GetField('chrome_src') show_all_results = global_settings.GetField('show_all_results') log_level = global_settings.GetField('logging_level') if log_level not in ('quiet', 'average', 'verbose'): log_level = 'verbose' # Default cache hit conditions. The image checksum in the cache and the # computed checksum of the image must match. Also a cache file must exist. cache_conditions = [ CacheConditions.CACHE_FILE_EXISTS, CacheConditions.CHECKSUMS_MATCH ] if global_settings.GetField('rerun_if_failed'): cache_conditions.append(CacheConditions.RUN_SUCCEEDED) if global_settings.GetField('rerun'): cache_conditions.append(CacheConditions.FALSE) if global_settings.GetField('same_machine'): cache_conditions.append(CacheConditions.SAME_MACHINE_MATCH) if global_settings.GetField('same_specs'): cache_conditions.append(CacheConditions.MACHINES_MATCH) # Construct benchmarks. # Some fields are common with global settings. The values are # inherited and/or merged with the global settings values. benchmarks = [] all_benchmark_settings = experiment_file.GetSettings('benchmark') for benchmark_settings in all_benchmark_settings: benchmark_name = benchmark_settings.name test_name = benchmark_settings.GetField('test_name') if not test_name: test_name = benchmark_name test_args = benchmark_settings.GetField('test_args') iterations = benchmark_settings.GetField('iterations') suite = benchmark_settings.GetField('suite') retries = benchmark_settings.GetField('retries') run_local = benchmark_settings.GetField('run_local') if suite == 'telemetry_Crosperf': if test_name == 'all_perfv2': self.AppendBenchmarkSet(benchmarks, telemetry_perfv2_tests, test_args, iterations, rm_chroot_tmp, perf_args, suite, show_all_results, retries, run_local) elif test_name == 'all_pagecyclers': self.AppendBenchmarkSet(benchmarks, telemetry_pagecycler_tests, test_args, iterations, rm_chroot_tmp, perf_args, suite, show_all_results, retries, run_local) elif test_name == 'all_toolchain_perf': self.AppendBenchmarkSet(benchmarks, telemetry_toolchain_perf_tests, test_args, iterations, rm_chroot_tmp, perf_args, suite, show_all_results, retries, run_local) # Add non-telemetry toolchain-perf benchmarks: benchmarks.append( Benchmark('graphics_WebGLAquarium', 'graphics_WebGLAquarium', '', iterations, rm_chroot_tmp, perf_args, '', show_all_results, retries, run_local=False)) elif test_name == 'all_toolchain_perf_old': self.AppendBenchmarkSet( benchmarks, telemetry_toolchain_old_perf_tests, test_args, iterations, rm_chroot_tmp, perf_args, suite, show_all_results, retries, run_local) else: benchmark = Benchmark(test_name, test_name, test_args, iterations, rm_chroot_tmp, perf_args, suite, show_all_results, retries, run_local) benchmarks.append(benchmark) else: if test_name == 'all_graphics_perf': self.AppendBenchmarkSet(benchmarks, graphics_perf_tests, '', iterations, rm_chroot_tmp, perf_args, '', show_all_results, retries, run_local=False) elif test_name == 'all_crosbolt_perf': self.AppendBenchmarkSet( benchmarks, telemetry_crosbolt_perf_tests, test_args, iterations, rm_chroot_tmp, perf_args, 'telemetry_Crosperf', show_all_results, retries, run_local) self.AppendBenchmarkSet(benchmarks, crosbolt_perf_tests, '', iterations, rm_chroot_tmp, perf_args, '', show_all_results, retries, run_local=False) else: # Add the single benchmark. benchmark = Benchmark(benchmark_name, test_name, test_args, iterations, rm_chroot_tmp, perf_args, suite, show_all_results, retries, run_local=False) benchmarks.append(benchmark) if not benchmarks: raise RuntimeError('No benchmarks specified') # Construct labels. # Some fields are common with global settings. The values are # inherited and/or merged with the global settings values. labels = [] all_label_settings = experiment_file.GetSettings('label') all_remote = list(remote) for label_settings in all_label_settings: label_name = label_settings.name image = label_settings.GetField('chromeos_image') autotest_path = label_settings.GetField('autotest_path') chromeos_root = label_settings.GetField('chromeos_root') my_remote = label_settings.GetField('remote') compiler = label_settings.GetField('compiler') new_remote = [] if my_remote: for i in my_remote: c = re.sub('["\']', '', i) new_remote.append(c) my_remote = new_remote if image == '': build = label_settings.GetField('build') if len(build) == 0: raise RuntimeError("Can not have empty 'build' field!") image, autotest_path = label_settings.GetXbuddyPath( build, autotest_path, board, chromeos_root, log_level) cache_dir = label_settings.GetField('cache_dir') chrome_src = label_settings.GetField('chrome_src') # TODO(yunlian): We should consolidate code in machine_manager.py # to derermine whether we are running from within google or not if ('corp.google.com' in socket.gethostname() and (not my_remote or my_remote == remote and global_settings.GetField('board') != board)): my_remote = self.GetDefaultRemotes(board) if global_settings.GetField('same_machine') and len(my_remote) > 1: raise RuntimeError( 'Only one remote is allowed when same_machine ' 'is turned on') all_remote += my_remote image_args = label_settings.GetField('image_args') if test_flag.GetTestMode(): # pylint: disable=too-many-function-args label = MockLabel(label_name, image, autotest_path, chromeos_root, board, my_remote, image_args, cache_dir, cache_only, log_level, compiler, chrome_src) else: label = Label(label_name, image, autotest_path, chromeos_root, board, my_remote, image_args, cache_dir, cache_only, log_level, compiler, chrome_src) labels.append(label) if not labels: raise RuntimeError('No labels specified') email = global_settings.GetField('email') all_remote += list(set(my_remote)) all_remote = list(set(all_remote)) experiment = Experiment(experiment_name, all_remote, working_directory, chromeos_root, cache_conditions, labels, benchmarks, experiment_file.Canonicalize(), email, acquire_timeout, log_dir, log_level, share_cache, results_dir, locks_dir) return experiment