def test_status_command_with_sched(self): """Test status command when test is 'SCHEDULED'.""" cfg = file_format.TestConfigLoader().validate({ 'scheduler': 'raw', 'run': { 'env': { 'foo': 'bar', }, 'cmds': ['sleep 1'], }, }) cfg['name'] = 'testytest' test = self._quick_test(cfg, build=False, finalize=False) test.build() schedulers.get_plugin(test.scheduler) \ .schedule_test(self.pav_cfg, test) status_cmd = commands.get_command('status') status_cmd.outfile = io.StringIO() parser = argparse.ArgumentParser() status_cmd._setup_arguments(parser) args = parser.parse_args([str(test.id)]) test.status.set(status_file.STATES.SCHEDULED, "faker") self.assertEqual(status_cmd.run(self.pav_cfg, args), 0) parser = argparse.ArgumentParser() status_cmd._setup_arguments(parser) args = parser.parse_args(['-j', str(test.id)]) test.status.set(status_file.STATES.SCHEDULED, "faker") self.assertEqual(status_cmd.run(self.pav_cfg, args), 0)
def test_node_range(self): """Make sure node ranges work properly.""" slurm = schedulers.get_plugin('slurm') cfg = self._quick_test_cfg() cfg['scheduler'] = 'slurm' for num_nodes in '1-10000000', '1-all': # We're testing that everything works when we ask for a max number # of nodes and don't get them all. cfg['slurm']['num_nodes'] = num_nodes test = self._quick_test(cfg=cfg, name='slurm_test') test.build() slurm.schedule_test(self.pav_cfg, test) timeout = time.time() + self.TEST_TIMEOUT while time.time() < timeout: status = slurm.job_status(self.pav_cfg, test) if status.state == STATES.COMPLETE: break time.sleep(.5) else: # We timed out. slurm.cancel_job(test) self.fail("Test {} at {} did not complete within {} secs with " "num_nodes of {}.".format(test.id, test.path, self.TEST_TIMEOUT, num_nodes)) results = test.load_results() self.assertEqual(results['result'], result_parsers.PASS)
def is_done(self): """Returns True if all the tests in the set are completed.""" if self.done: return True all_tests_passed = True for test_name, test_obj in self.series_obj.tests.items(): # check if test object even exists if test_obj is None: return False # update the status if test_obj.job_id: test_sched = schedulers.get_plugin(test_obj.scheduler) test_sched.job_status(self.pav_cfg, test_obj) # check if RUN_COMPLETE exists if not (test_obj.path/'RUN_COMPLETE').exists(): return False # check if test passed try: if test_obj.results['result'] != 'PASS': all_tests_passed = False except KeyError: all_tests_passed = False # if all_tests_passed is still True, update object variable self.all_pass = all_tests_passed self.done = True return True
def test_job_status(self): """Make sure we can get a slurm job status.""" cfg = self._quick_test_cfg() cfg['scheduler'] = 'slurm' test = self._quick_test(cfg, name='slurm_job_status', finalize=False) slurm = schedulers.get_plugin('slurm') # Steal a running job's ID, and then check our status. test.status.set(STATES.SCHEDULED, "not really though.") test.job_id = self._get_job('JobState=RUNNING') status = slurm.job_status(self.pav_cfg, test) self.assertEqual(status.state, STATES.SCHEDULED) self.assertIn('RUNNING', status.note) # Steal a canceled jobs id test.status.set(STATES.SCHEDULED, "not really though.") test.job_id = self._get_job('JobState=CANCELLED') sched_status = slurm.job_status(self.pav_cfg, test) self.assertEqual(sched_status.state, STATES.SCHED_CANCELLED) status = test.status.current() self.assertEqual(status.state, STATES.SCHED_CANCELLED) # Check another random state. In this case, all pavilion will # just consider the test still scheduled. test.status.set(STATES.SCHEDULED, "not really though.") test.job_id = self._get_job('JobState=COMPLETED') sched_status = slurm.job_status(self.pav_cfg, test) self.assertEqual(sched_status.state, STATES.SCHEDULED) self.assertIn('COMPLETED', sched_status.note)
def test_status_history(self): # Testing that status works with history flag status_cmd = commands.get_command('status') out = io.StringIO() status_cmd.outfile = out parser = argparse.ArgumentParser() status_cmd._setup_arguments(parser) test = self._quick_test() raw = schedulers.get_plugin('raw') raw.schedule_test(self.pav_cfg, test) end = time.time() + 5 while test.check_run_complete() is None and time.time() < end: time.sleep(.1) args = parser.parse_args(['--history', str(test.id)]) self.assertEqual(status_cmd.run(self.pav_cfg, args), 0) out.seek(0) output = out.readlines()[4:] statuses = test.status.history() self.assertEqual(len(output), len(statuses)) for i in range(len(output)): self.assertTrue(statuses[i].state in output[i])
def test_cancel_job(self): """Create a series of tests and kill them under different circumstances.""" # This test will just sleep for a bit. cfg = self._quick_test_cfg() cfg['run']['cmds'] = ['sleep 100'] test = self._quick_test(cfg=cfg) test.build() raw = schedulers.get_plugin('raw') raw.schedule_test(self.pav_cfg, test) timeout = time.time() + 1 while (raw.job_status(self.pav_cfg, test).state == STATES.SCHEDULED and time.time() < timeout): time.sleep(.1) # The test should be running self.assertEqual(test.status.current().state, STATES.RUNNING) _, pid = test.job_id.split('_') self.assertEqual(raw.cancel_job(test).state, STATES.SCHED_CANCELLED)
def status_from_test_obj(pav_cfg, test_obj): """Takes a test object or list of test objects and creates the dictionary expected by the print_status function. :param dict pav_cfg: Pavilion base configuration. :param Union[TestRun,[TestRun] test_obj: Pavilion test object. :return: List of dictionary objects containing the test ID, name, statt time of state update, and note associated with that state. :rtype: list(dict) """ if not isinstance(test_obj, list): test_obj = [test_obj] test_statuses = [] for test in test_obj: status_f = test.status.current() if status_f.state == STATES.SCHEDULED: sched = schedulers.get_plugin(test.scheduler) status_f = sched.job_status(pav_cfg, test) test_statuses.append({ 'test_id': test.id, 'name': test.name, 'state': status_f.state, 'time': status_f.when, 'note': status_f.note, }) test_statuses.sort(key=lambda x: x['test_id']) return test_statuses
def test_log_arguments(self): log_cmd = commands.get_command('log') parser = argparse.ArgumentParser() log_cmd._setup_arguments(parser) # run a simple test test = self._quick_test(finalize=False) raw = schedulers.get_plugin('raw') raw.schedule_test(self.pav_cfg, test) state = test.status.current().state end = time.time() + 1 while ('ERROR' not in state and 'FAIL' not in state and state != STATES.COMPLETE and time.time() < end): time.sleep(.1) # test `pav log run test` args = parser.parse_args(['run', str(test.id)]) self.assertEqual(args.test, test.id) out = io.StringIO() err = io.StringIO() log_cmd.outfile = out log_cmd.errfile = err result = log_cmd.run(self.pav_cfg, args) err.seek(0) out.seek(0) self.assertEqual(err.read(), '') self.assertEqual(out.read(), 'Hello World.\n') self.assertEqual(result, 0) # test `pav log build test` # note: echo-ing hello world should not require anything to be built out.truncate(0) err.truncate(0) args = parser.parse_args(['build', str(test.id)]) log_cmd.run(self.pav_cfg, args) out.seek(0) err.seek(0) self.assertEqual(out.read(), '') # test `pav log kickoff test` # note: in general, kickoff.log should be an empty file out.truncate(0) err.truncate(0) args = parser.parse_args(['kickoff', str(test.id)]) result = log_cmd.run(self.pav_cfg, args) out.seek(0) err.seek(0) self.assertEqual(out.read(), '') self.assertEqual(err.read(), '') self.assertEqual(result, 0) log_cmd.outfile = sys.stdout log_cmd.outfile = sys.stderr
def _cancel_all(tests_by_sched): """Cancel each of the given tests using the appropriate scheduler.""" for sched_name, tests in tests_by_sched.items(): sched = schedulers.get_plugin(sched_name) for test in tests: sched.cancel_job(test)
def cancel_series(self): """Goes through all test objects assigned to series and cancels tests that haven't been completed. """ for test_id, test_obj in self.tests.items(): if not (test_obj.path/'RUN_COMPLETE').exists(): sched = schedulers.get_plugin(test_obj.scheduler) sched.cancel_job(test_obj) test_obj.status.set(STATES.COMPLETE, "Killed by SIGTERM.") test_obj.set_run_complete()
def test_sched_vars(self): """Make sure all the slurm scheduler variable methods work when not on a node.""" raw = schedulers.get_plugin('raw') svars = raw.get_vars(self._quick_test()) for key, value in svars.items(): self.assertNotEqual(int(value), 0)
def test_sched_vars(self): """Make sure the scheduler variable class works as expected.""" test = self._quick_test() raw_sched = schedulers.get_plugin('raw') vars = raw_sched.get_vars(test.config['raw']) for key in vars.keys(): _ = vars[key]
def _get_sched(test): """Get the scheduler for the given test. :param TestRun test: The test. """ try: return schedulers.get_plugin(test.scheduler) except Exception: test.status.set( STATES.BUILD_ERROR, "Unknown error getting the scheduler. Refer to " "the kickoff log.") raise
def test_kickoff_env(self): pav_cfg = self.pav_cfg pav_cfg['env_setup'] = ['test1', 'test2', 'test3'] config = {'name': 'sched-vars', 'scheduler': 'dummy'} test = self._quick_test(config) dummy_sched = schedulers.get_plugin('dummy') path = dummy_sched._create_kickoff_script(pav_cfg, test) with path.open() as file: lines = file.readlines() for i in range(0, len(lines)): lines[i] = lines[i].strip() testlist = pav_cfg['env_setup'] self.assertTrue(set(testlist).issubset(lines)) self.assertTrue(re.match(r'pav _run.*', lines[-1]))
def test_schedule_test(self): """Try to schedule a test.""" slurm = schedulers.get_plugin('slurm') cfg = self._quick_test_cfg() cfg['scheduler'] = 'slurm' test = self._quick_test(cfg=cfg, name='slurm_test') slurm.schedule_test(self.pav_cfg, test) status = slurm.job_status(self.pav_cfg, test) self.assertEqual(status.state, STATES.SCHEDULED) status = slurm.cancel_job(test) self.assertEqual(status.state, STATES.SCHED_CANCELLED)
def test_schedule_test(self): """Make sure the scheduler can run a test.""" raw = schedulers.get_plugin('raw') test = self._quick_test(build=False, finalize=False) self.assertTrue(test.build(), msg=test) raw.schedule_tests(self.pav_cfg, [test]) try: test.wait(2) except Exception: self.fail() self.assertEqual(test.status.current().state, STATES.COMPLETE)
def test_sched_vars(self): """Make sure the scheduler vars are reasonable when not on a node.""" slurm = schedulers.get_plugin('slurm') cfg = self._quick_test_cfg() cfg['scheduler'] = 'slurm' test = self._quick_test(cfg, name='slurm_vars', finalize=False) sched_conf = test.config['slurm'] # Check all the variables to make sure they work outside an allocation, # or at least return a DeferredVariable var_list = list() for k, v in slurm.get_vars(sched_conf).items(): # Make sure everything has a value of some sort. self.assertNotIn(v, ['None', '']) var_list.append(k) # Now check all the vars for real, when a test is running. cfg = self._quick_test_cfg() cfg['scheduler'] = 'slurm' # Ask for each var in our test comands. cfg['run']['cmds'] = [ 'echo "{var}={{{{sched.{var}}}}}"'.format(var=var) for var in var_list ] sched_vars = slurm.get_vars(sched_conf) test = self._quick_test(cfg, name='slurm_vars2', finalize=False, sched_vars=sched_vars) slurm.schedule_test(self.pav_cfg, test) timeout = time.time() + self.TEST_TIMEOUT state = test.status.current() while time.time() < timeout: state = test.status.current() if state.state == STATES.COMPLETE: return 0 else: self.fail("Test never completed. Has state: {}".format(state))
def test_check_job(self): """Make sure we can get the test's scheduler status.""" cfg = self._quick_test_cfg() cfg['run']['cmds'] = ['sleep 2'] test = self._quick_test(cfg=cfg) test.status.set('SCHEDULED', 'but not really') with Path('/proc/sys/kernel/pid_max').open() as pid_max_file: max_pid = int(pid_max_file.read()) hostname = socket.gethostname() raw = schedulers.get_plugin('raw') # Make a test from another host. test.job_id = 'garbledhostnameasldfkjasd_{}'.format(os.getpid()) status = raw.job_status(self.pav_cfg, test) self.assertEqual(status.state, STATES.SCHEDULED) # Make a test with a non-existent pid. test.job_id = '{}_{}'.format(hostname, max_pid + 1) status = raw.job_status(self.pav_cfg, test) self.assertEqual(status.state, STATES.SCHED_ERROR) # Check the 'race condition' case of check_job test.status.set(STATES.COMPLETE, 'not really this either.') status = raw.job_status(self.pav_cfg, test) self.assertEqual(status.state, STATES.COMPLETE) test.status.set(STATES.SCHEDULED, "reseting.") # Make a test with a re-used pid. test.job_id = '{}_{}'.format(hostname, os.getpid()) status = raw.job_status(self.pav_cfg, test) self.assertEqual(status.state, STATES.SCHED_ERROR) raw.schedule_test(self.pav_cfg, test) status = raw.job_status(self.pav_cfg, test) self.assertEqual(status.state, STATES.SCHEDULED)
def build_variable_manager(self, raw_test_cfg): """Get all of the different kinds of Pavilion variables into a single variable set manager for this test. :param raw_test_cfg: A raw test configuration. It should be from before any variables are resolved. :rtype: variables.VariableSetManager """ user_vars = raw_test_cfg.get('variables', {}) var_man = copy.deepcopy(self.base_var_man) # Since per vars are the highest in resolution order, we can make things # a bit faster by adding these after we find the used per vars. try: var_man.add_var_set('var', user_vars) except variables.VariableError as err: raise TestConfigError("Error in variables section: {}".format(err)) scheduler = raw_test_cfg.get('scheduler', '<undefined>') try: sched = schedulers.get_plugin(scheduler) except schedulers.SchedulerPluginError: raise TestConfigError( "Could not find scheduler '{}'" .format(scheduler)) try: sched_vars = sched.get_vars(raw_test_cfg.get(scheduler, {})) var_man.add_var_set('sched', sched_vars) except schedulers.SchedulerPluginError as err: raise TestConfigError( "Could not get variables for scheduler {}: {}" .format(scheduler, err) ) except variables.VariableError as err: raise TestConfigError("Error in scheduler variables: {}" .format(err)) return var_man
def test_log_tail(self): log_cmd = commands.get_command('log') parser = argparse.ArgumentParser() log_cmd._setup_arguments(parser) out = io.StringIO() err = io.StringIO() log_cmd.outfile = out log_cmd.errfile = err # test 'pav log --tail X run test' test_cfg = self._quick_test_cfg() test_cfg['run']['cmds'] = [ 'echo "this"', 'echo "is"', 'echo "some"', 'echo "crazy"', 'echo "long"', 'echo "output"' ] test = self._quick_test(cfg=test_cfg) raw = schedulers.get_plugin('raw') raw.schedule_test(self.pav_cfg, test) end = time.time() + 5 while test.check_run_complete() is None and time.time() < end: time.sleep(.1) args = parser.parse_args(['--tail', '2', 'run', str(test.id)]) out.truncate(0) err.truncate(0) result = log_cmd.run(self.pav_cfg, args) self.assertEqual(result, 0) out.seek(0) err.seek(0) self.assertEqual(err.read(), '') self.assertEqual(out.read(), 'long\noutput\n') log_cmd.outfile = sys.stdout log_cmd.outfile = sys.stderr
def test_node_list_parsing(self): """Make sure the node list regex matches what it's supposed to.""" slurm = schedulers.get_plugin('slurm') # type: Slurm examples = ( (None, []), ('', []), ('ab03', ['ab03']), ('ab-bc[3-004]', ['ab-bc3', 'ab-bc4']), ('ab_bc[03-10]', ['ab_bc{:02d}'.format(d) for d in range(3, 11)]), ('n[003-143]', ['n{:03d}'.format(d) for d in range(3, 144)]), # Duplicates are accepted ('nid03,nid[03-04]', ['nid03', 'nid03', 'nid04']), ('nid03,nid[04-06],nid[12-33]', ['nid03', 'nid04', 'nid05', 'nid06'] + ['nid{:02d}'.format(d) for d in range(12, 34)]), ) for ex, answer in examples: nodes = slurm.parse_node_list(ex) self.assertEqual(nodes, answer) bad_examples = ( ('n03d', "Trailing characters"), ('nid03!@#', "Trailing junk (whole string match)."), ('n03.n04', "Not comma separated"), ('n[03', "No closing bracket"), ('n03]', "No open bracket"), ('nid[12-03]', "Out of order range"), ) for ex, problem in bad_examples: with self.assertRaises( ValueError, msg="Did not throw error for {}".format(problem)): slurm.parse_node_list(ex)
def test_include_exclude(self): """Test that we can schedule tests that require or exclude nodes.""" slurm = schedulers.get_plugin('slurm') dummy_test = self._quick_test(build=False, finalize=False) svars = slurm.get_vars(dummy_test.config['slurm']) up_nodes = svars['node_up_list'].split() cfg = self._quick_test_cfg() cfg['scheduler'] = 'slurm' cfg['slurm']['num_nodes'] = '2' cfg['slurm']['include_nodes'] = up_nodes[1] cfg['slurm']['exclude_nodes'] = up_nodes[2] test = self._quick_test(cfg, finalize=False) # We mainly care if this step completes successfully. slurm.schedule_test(self.pav_cfg, test) try: test.wait(timeout=5) except TimeoutError: slurm.cancel_job(test)
def status_from_test_obj(pav_cfg: dict, test: TestRun): """Takes a test object or list of test objects and creates the dictionary expected by the print_status function. :param pav_cfg: Pavilion base configuration. :param test: Pavilion test object. :return: List of dictionary objects containing the test ID, name, stat time of state update, and note associated with that state. :rtype: list(dict) """ status_f = test.status.current() if status_f.state == STATES.SCHEDULED: sched = schedulers.get_plugin(test.scheduler) status_f = sched.job_status(pav_cfg, test) elif status_f.state == STATES.BUILDING: last_update = test.builder.log_updated() status_f.note = ' '.join([ status_f.note, '\nLast updated: ', str(last_update) if last_update is not None else '<unknown>' ]) elif status_f.state == STATES.RUNNING: last_update = get_last_ctime(test.path / 'run.log') status_f.note = ' '.join([ status_f.note, '\nLast updated:', str(last_update) if last_update is not None else '<unknown>' ]) return { 'test_id': test.id, 'name': test.name, 'state': status_f.state, 'time': status_f.when, 'note': status_f.note, }
def _get_tests(self, pav_cfg, host, test_files, tests, modes, overrides, sys_vars): """Translate a general set of pavilion test configs into the final, resolved configurations. These objects will be organized in a dictionary by scheduler, and have a scheduler object instantiated and attached. :param pav_cfg: The pavilion config :param str host: The host config to target these tests with :param list(str) modes: The mode configs to use. :param list(Path) test_files: Files containing a newline separated list of tests. :param list(str) tests: The tests to run. :param list(str) overrides: Overrides to apply to the configurations. :param system_variables.SysVarDict sys_vars: The system variables dict. :returns: A dictionary (by scheduler type name) of lists of tuples test configs and their variable managers. """ self.logger.debug("Finding Configs") # Use the sys_host if a host isn't specified. if host is None: host = sys_vars.get('sys_name') tests = list(tests) for file in test_files: try: with pathlib.PosixPath(file).open() as test_file: for line in test_file.readlines(): line = line.strip() if line and not line.startswith('#'): tests.append(line) except (OSError, IOError) as err: msg = "Could not read test file {}: {}".format(file, err) self.logger.error(msg) raise commands.CommandError(msg) try: raw_tests = test_config.load_test_configs(pav_cfg, host, modes, tests) except test_config.TestConfigError as err: self.logger.error(str(err)) raise commands.CommandError(str(err)) raw_tests_by_sched = defaultdict(lambda: []) tests_by_scheduler = defaultdict(lambda: []) # Apply config overrides. for test_cfg in raw_tests: # Apply the overrides to each of the config values. try: test_config.apply_overrides(test_cfg, overrides) except test_config.TestConfigError as err: msg = 'Error applying overrides to test {} from {}: {}' \ .format(test_cfg['name'], test_cfg['suite_path'], err) self.logger.error(msg) raise commands.CommandError(msg) # Resolve all configuration permutations. try: p_cfg, permutes = test_config.resolve_permutations( test_cfg, pav_cfg.pav_vars, sys_vars) for p_var_man in permutes: # Get the scheduler from the config. sched = p_cfg['scheduler'] sched = test_config.resolve_section_vars( component=sched, var_man=p_var_man, allow_deferred=False, deferred_only=False, ) raw_tests_by_sched[sched].append((p_cfg, p_var_man)) except test_config.TestConfigError as err: msg = 'Error resolving permutations for test {} from {}: {}' \ .format(test_cfg['name'], test_cfg['suite_path'], err) self.logger.error(msg) raise commands.CommandError(msg) # Get the schedulers for the tests, and the scheduler variables. # The scheduler variables are based on all of the for sched_name in raw_tests_by_sched.keys(): try: sched = schedulers.get_plugin(sched_name) except KeyError: msg = "Could not find scheduler '{}'.".format(sched_name) self.logger.error(msg) raise commands.CommandError(msg) nondeferred_cfg_sctns = schedulers.list_plugins() # Builds must have the values of all their variables now. nondeferred_cfg_sctns.append('build') # Set the scheduler variables for each test. for test_cfg, test_var_man in raw_tests_by_sched[sched_name]: sched_config = test_config.resolve_section_vars( component=test_cfg[sched_name], var_man=test_var_man, allow_deferred=False, deferred_only=False, ) test_var_man.add_var_set('sched', sched.get_vars(sched_config)) # Resolve all variables for the test (that aren't deferred). try: resolved_config = test_config.resolve_config( test_cfg, test_var_man, no_deferred_allowed=nondeferred_cfg_sctns) except (ResolveError, KeyError) as err: msg = "Error resolving variables in config at '{}': {}" \ .format(test_cfg['suite_path'].resolve(test_var_man), err) self.logger.error(msg) raise commands.CommandError(msg) tests_by_scheduler[sched.name].append( (resolved_config, test_var_man)) return tests_by_scheduler
def run(self, pav_cfg, args): """Resolve the test configurations into individual tests and assign to schedulers. Have those schedulers kick off jobs to run the individual tests themselves. :param pav_cfg: The pavilion configuration. :param args: The parsed command line argument object. """ # 1. Resolve the test configs # - Get sched vars from scheduler. # - Compile variables. # overrides = {} for ovr in args.overrides: if '=' not in ovr: fprint( "Invalid override value. Must be in the form: " "<key>=<value>. Ex. -c run.modules=['gcc'] ", file=self.errfile) return errno.EINVAL key, value = ovr.split('=', 1) overrides[key] = value sys_vars = system_variables.get_vars(True) try: configs_by_sched = self._get_tests( pav_cfg=pav_cfg, host=args.host, test_files=args.files, tests=args.tests, modes=args.modes, overrides=overrides, sys_vars=sys_vars, ) tests_by_sched = self._configs_to_tests( pav_cfg=pav_cfg, configs_by_sched=configs_by_sched, ) except commands.CommandError as err: # Our error messages get escaped to a silly degree err = codecs.decode(str(err), 'unicode-escape') fprint(err, file=self.errfile) return errno.EINVAL all_tests = sum(tests_by_sched.values(), []) if not all_tests: fprint("You must specify at least one test.", file=self.errfile) return errno.EINVAL series = TestSeries(pav_cfg, all_tests) rp_errors = [] for test in all_tests: # Make sure the result parsers have reasonable arguments. try: result_parsers.check_args(test.config['results']) except TestRunError as err: rp_errors.append(str(err)) if rp_errors: fprint("Result Parser configurations had errors:", file=self.errfile, color=output.RED) for msg in rp_errors: fprint(msg, bullet=' - ', file=self.errfile) return errno.EINVAL failed_build = None # Building any tests that specify that they should be built before for test in all_tests: if test.config['build']['on_nodes'] not in ['true', 'True']: if not test.build(): fprint("Error building test: ", file=self.errfile, color=output.RED) fprint("status {status.state} - {status.note}".format( status=test.status.current()), file=self.errfile) fprint( "For more information, run 'pav log build {}'".format( test.id), file=self.errfile) failed_build = test break if failed_build is not None: for test in all_tests: if test is not failed_build: test.status.set( STATES.ABORTED, "Canceled due to problems with other tests in run") return errno.EINVAL for sched_name, tests in tests_by_sched.items(): sched = schedulers.get_plugin(sched_name) try: sched.schedule_tests(pav_cfg, tests) except schedulers.SchedulerPluginError as err: fprint('Error scheduling tests:', file=self.errfile, color=output.RED) fprint(err, bullet=' ', file=self.errfile) fprint('Cancelling already kicked off tests.', file=self.errfile) self._cancel_all(tests_by_sched) # Tests should all be scheduled now, and have the SCHEDULED state # (at some point, at least). Wait until something isn't scheduled # anymore (either running or dead), or our timeout expires. wait_result = None if args.wait is not None: end_time = time.time() + args.wait while time.time() < end_time and wait_result is None: last_time = time.time() for sched_name, tests in tests_by_sched.items(): sched = schedulers.get_plugin(sched_name) for test in tests: status = test.status.current() if status == STATES.SCHEDULED: status = sched.job_status(pav_cfg, test) if status != STATES.SCHEDULED: # The test has moved past the scheduled state. wait_result = None break break if wait_result is None: # Sleep at most SLEEP INTERVAL seconds, minus the time # we spent checking our jobs. time.sleep(self.SLEEP_INTERVAL - (time.time() - last_time)) fprint("{} test{} started as test series {}.".format( len(all_tests), 's' if len(all_tests) > 1 else '', series.id), file=self.outfile, color=output.GREEN) if args.status: tests = list(series.tests.keys()) tests, _ = test_obj_from_id(pav_cfg, tests) return print_from_test_obj(pav_cfg, tests, self.outfile, args.json) return 0
def _scheduler_cmd(self, _, args): """ :param argparse.Namespace args: """ sched = None # type : schedulers.SchedulerPlugin sched_name = None if args.vars is not None or args.config is not None: sched_name = args.vars if args.vars is not None else args.config try: sched = schedulers.get_plugin(sched_name) except schedulers.SchedulerPluginError: output.fprint( "Invalid scheduler plugin '{}'.".format(sched_name), color=output.RED, ) return errno.EINVAL if args.vars is not None: sched_vars = [] empty_config = file_format.TestConfigLoader().load_empty() svars = sched.get_vars(empty_config[sched_name]) for key in sorted(list(svars.keys())): sched_vars.append(svars.info(key)) output.draw_table( self.outfile, fields=['name', 'deferred', 'example', 'help'], rows=sched_vars, title="Variables for the {} scheduler plugin.".format( args.vars)) elif args.config is not None: sched_config = sched.get_conf() class Loader(yaml_config.YamlConfigLoader): """Loader for just a scheduler's config.""" ELEMENTS = [sched_config] defaults = Loader().load_empty() Loader().dump(self.outfile, values=defaults) else: # Assuming --list was given scheds = [] for sched_name in schedulers.list_plugins(): sched = schedulers.get_plugin(sched_name) scheds.append({ 'name': sched_name, 'description': sched.description, 'path': sched.path }) fields = ['name', 'description'] if args.verbose: fields.append('path') output.draw_table(self.outfile, fields=fields, rows=scheds, title="Available Scheduler Plugins")
def run(self, pav_cfg, args): """Cancel the given tests.""" user_id = os.geteuid() # gets unique user id if not args.tests: # user wants to cancel all current tests if args.all: tests_dir = pav_cfg.working_dir / 'test_runs' # iterate through all the tests in the tests directory for test in tests_dir.iterdir(): test_owner_id = test.stat().st_uid if test_owner_id == user_id: if not (test / 'RUN_COMPLETE').exists(): test_id = test.name args.tests.append(test_id) else: # Get the last series ran by this user. series_id = series.TestSeries.load_user_series_id(pav_cfg) if series_id is not None: args.tests.append(series_id) test_list = [] for test_id in args.tests: if test_id.startswith('s'): try: test_list.extend( series.TestSeries.from_id(pav_cfg, test_id).tests) except series.TestSeriesError as err: output.fprint("Series {} could not be found.\n{}".format( test_id, err), file=self.errfile, color=output.RED) return errno.EINVAL except ValueError as err: output.fprint( "Series {} is not a valid series.\n{}".format( test_id, err), color=output.RED, file=self.errfile) return errno.EINVAL else: try: test_list.append(int(test_id)) except ValueError as err: output.fprint("Test {} is not a valid test.\n{}".format( test_id, err), file=self.errfile, color=output.RED) return errno.EINVAL cancel_failed = False test_object_list = [] for test_id in test_list: try: test = TestRun.load(pav_cfg, test_id) sched = schedulers.get_plugin(test.scheduler) test_object_list.append(test) status = test.status.current() # Won't try to cancel a completed job or a job that was # previously cancelled. if status.state not in (STATES.COMPLETE, STATES.SCHED_CANCELLED): # Sets status based on the result of sched.cancel_job. # Ran into trouble when 'cancelling' jobs that never # actually started, ie. build errors/created job states. cancel_status = sched.cancel_job(test) test.status.set(cancel_status.state, cancel_status.note) test.set_run_complete() output.fprint("Test {} cancelled.".format(test_id), file=self.outfile, color=output.GREEN) else: output.fprint( "Test {} could not be cancelled has state: {}.".format( test_id, status.state), file=self.outfile, color=output.RED) except TestRunError as err: output.fprint( "Test {} could not be cancelled, cannot be found. \n{}". format(test_id, err), file=self.errfile, color=output.RED) return errno.EINVAL # Only prints statuses of tests if option is selected # and test_list is not empty if args.status and test_object_list: print_from_test_obj(pav_cfg, test_object_list, self.outfile, args.json) return cancel_failed return cancel_failed
def _run(self, pav_cfg, test): """Run an already prepped test in the current environment. """ try: sched = schedulers.get_plugin(test.scheduler) except Exception: test.status.set(STATES.BUILD_ERROR, "Unknown error getting the scheduler. Refer to " "the kickoff log.") raise # Re-add var sets that may have had deferred variables. try: var_man = VariableSetManager() var_man.add_var_set('sys', system_variables.get_vars(defer=False)) sched_config = test.config[test.scheduler] var_man.add_var_set('sched', sched.get_vars(sched_config)) except Exception: test.status.set(STATES.RUN_ERROR, "Unknown error getting pavilion variables at " "run time.") raise try: test.finalize(var_man) except Exception: test.status.set(STATES.RUN_ERROR, "Unknown error finalizing test.") raise try: if test.config['build']['on_nodes'] in ['true', 'True']: if not test.build(): self.logger.warning( "Test {t.id} failed to build:" ) except Exception: test.status.set(STATES.BUILD_ERROR, "Unknown build error. Refer to the kickoff log.") raise # Optionally wait on other tests running under the same scheduler. # This depends on the scheduler and the test configuration. lock = sched.lock_concurrency(pav_cfg, test) try: run_result = test.run() except TestRunError as err: test.status.set(STATES.RUN_ERROR, err) return 1 except TimeoutError: return 1 except Exception: test.status.set( STATES.RUN_ERROR, "Unknown error while running test. Refer to the kickoff log.") raise finally: sched.unlock_concurrency(lock) try: rp_errors = [] # Make sure the result parsers have reasonable arguments. # We check here because the parser code itself will likely assume # the args are valid form _check_args, but those might not be # checkable before kickoff due to deferred variables. try: result_parsers.check_args(test.config['results']) except TestRunError as err: rp_errors.append(str(err)) if rp_errors: for msg in rp_errors: test.status.set(STATES.RESULTS_ERROR, msg) test.set_run_complete() return 1 results = test.gather_results(run_result) except result_parsers.ResultParserError as err: self.logger.error("Unexpected error gathering results: %s", err) test.status.set(STATES.RESULTS_ERROR, "Error parsing results: {}".format(err)) return 1 try: test.save_results(results) result_logger = logging.getLogger('results') result_logger.info(output.json_dumps(results)) except Exception: test.status.set( STATES.RESULTS_ERROR, "Unknown error while saving results. Refer to the kickoff log.") raise try: test.status.set(STATES.COMPLETE, "The test completed with result: {}" .format(results.get('result', '<unknown>'))) except Exception: test.status.set( STATES.UNKNOWN, "Unknown error while setting test completion. Refer to the " "kickoff log.") raise
def run_tests(self, pav_cfg, tests_by_sched, series, wait, report_status): """ :param pav_cfg: :param dict[str,[TestRun]] tests_by_sched: A dict by scheduler name of the tests (in a list). :param series: The test series. :param int wait: Wait this long for a test to start before exiting. :param bool report_status: Do a 'pav status' after tests have started. on nodes, and kick them off in build only mode. :return: """ all_tests = sum(tests_by_sched.values(), []) for sched_name in tests_by_sched.keys(): sched = schedulers.get_plugin(sched_name) if not sched.available(): fprint("{} tests started with the {} scheduler, but " "that scheduler isn't available on this system." .format(len(tests_by_sched[sched_name]), sched_name), file=self.errfile, color=output.RED) return errno.EINVAL for sched_name, tests in tests_by_sched.items(): tests = [test for test in tests if not test.skipped] sched = schedulers.get_plugin(sched_name) # Filter out any 'build_only' tests (it should be all or none) # that shouldn't be scheduled. tests = [test for test in tests if # The non-build only tests (not test.build_only) or # The build only tests that are built on nodes (not test.build_local and # As long they need to be built. (test.rebuild or not test.builder.exists()))] # Skip this scheduler if it doesn't have tests that need to run. if not tests: continue try: sched.schedule_tests(pav_cfg, tests) except schedulers.SchedulerPluginError as err: fprint('Error scheduling tests:', file=self.errfile, color=output.RED) fprint(err, bullet=' ', file=self.errfile) fprint('Cancelling already kicked off tests.', file=self.errfile) self._cancel_all(tests_by_sched) # return so the rest of the tests don't actually run return errno.EINVAL # Tests should all be scheduled now, and have the SCHEDULED state # (at some point, at least). Wait until something isn't scheduled # anymore (either running or dead), or our timeout expires. wait_result = None if wait is not None: end_time = time.time() + wait while time.time() < end_time and wait_result is None: last_time = time.time() for sched_name, tests in tests_by_sched.items(): sched = schedulers.get_plugin(sched_name) for test in tests: status = test.status.current() if status == STATES.SCHEDULED: status = sched.job_status(pav_cfg, test) if status != STATES.SCHEDULED: # The test has moved past the scheduled state. wait_result = None break break if wait_result is None: # Sleep at most SLEEP INTERVAL seconds, minus the time # we spent checking our jobs. time.sleep(self.SLEEP_INTERVAL - (time.time() - last_time)) fprint("{} test{} started as test series {}." .format(len(all_tests), 's' if len(all_tests) > 1 else '', series.id), file=self.outfile, color=output.GREEN) if report_status: tests = list(series.tests.keys()) tests, _ = test_obj_from_id(pav_cfg, tests) return print_from_test_obj( pav_cfg=pav_cfg, test_obj=tests, outfile=self.outfile, json=False) return 0
def run_tests(self, wait: Union[None, int] = None, tests: List[TestRun] = None) -> int: """Run the tests for this test series. :param int wait: Wait this long for a test to start before exiting. :param tests: Manually specified list of tests to run. Defaults to the series' test list. :return: A return code based on the success of this action. """ if tests is None: tests = list(self.tests.values()) all_tests = tests for test in tests: sched_name = test.scheduler sched = schedulers.get_plugin(sched_name) if not sched.available(): fprint("1 test started with the {} scheduler, but" "that scheduler isn't available on this system." .format(sched_name), file=self.errfile, color=output.RED) return errno.EINVAL for test in tests: # don't run this test if it was meant to be skipped if test.skipped: continue # tests that are build-only or build-local should # already be completed, therefore don't run these if test.complete: continue sched = schedulers.get_plugin(test.scheduler) try: sched.schedule_tests(self.pav_cfg, [test]) except schedulers.SchedulerPluginError as err: fprint('Error scheduling test: ', file=self.errfile, color=output.RED) fprint(err, bullet=' ', file=self.errfile) fprint('Cancelling already kicked off tests.', file=self.errfile) sched.cancel_job(test) return errno.EINVAL # Tests should all be scheduled now, and have the SCHEDULED state # (at some point, at least). Wait until something isn't scheduled # anymore (either running or dead), or our timeout expires. wait_result = None if wait is not None: end_time = time.time() + wait while time.time() < end_time and wait_result is None: last_time = time.time() for test in tests: sched = schedulers.get_plugin(test.scheduler) status = test.status.current() if status == STATES.SCHEDULED: status = sched.job_status(self.pav_cfg, test) if status != STATES.SCHEDULED: # The test has moved past the scheduled state wait_result = None break if wait_result is None: # Sleep at most SLEEP INTERVAL seconds, minus the time # we spent checking our jobs. time.sleep(self.WAIT_INTERVAL - (time.time() - last_time)) fprint("{} test{} started as test series {}." .format(len(all_tests), 's' if len(all_tests) > 1 else '', self.sid), file=self.outfile, color=output.GREEN) return 0