class Runner(RunnerInterface): name = 'nrunner' description = 'nrunner based implementation of job compliant runner' def _save_to_file(self, filename, buff, mode='wb'): with open(filename, mode) as fp: fp.write(buff) def _populate_task_logdir(self, base_path, task, statuses, debug=False): # We are copying here to avoid printing duplicated information local_statuses = copy(statuses) last = local_statuses[-1] try: stdout = last.pop('stdout') except KeyError: stdout = None try: stderr = last.pop('stderr') except KeyError: stderr = None # Create task dir task_path = os.path.join(base_path, task.identifier.str_filesystem) os.makedirs(task_path, exist_ok=True) # Save stdout and stderr if stdout is not None: stdout_file = os.path.join(task_path, 'stdout') self._save_to_file(stdout_file, stdout) if stderr is not None: stderr_file = os.path.join(task_path, 'stderr') self._save_to_file(stderr_file, stderr) # Save debug if debug: debug = os.path.join(task_path, 'debug') with open(debug, 'w') as fp: json.dump(local_statuses, fp) data_file = os.path.join(task_path, 'data') with open(data_file, 'w') as fp: fp.write("{}\n".format(task.output_dir)) def _get_all_runtime_tasks(self, test_suite): result = [] no_digits = len(str(len(test_suite))) for index, task in enumerate(test_suite.tests, start=1): task.known_runners = nrunner.RUNNERS_REGISTRY_PYTHON_CLASS # this is all rubbish data if test_suite.name: prefix = "{}-{}".format(test_suite.name, index) else: prefix = index test_id = TestID(prefix, task.runnable.uri, None, no_digits) task.identifier = test_id result.append(RuntimeTask(task)) return result def _start_status_server(self, status_server_listen): # pylint: disable=W0201 self.status_repo = StatusRepo() # pylint: disable=W0201 self.status_server = StatusServer(status_server_listen, self.status_repo) asyncio.ensure_future(self.status_server.serve_forever()) async def _update_status(self, job): tasks_by_id = { str(runtime_task.task.identifier): runtime_task.task for runtime_task in self.tasks } while True: try: (task_id, status, _) = self.status_repo.status_journal_summary.pop(0) except IndexError: await asyncio.sleep(0.05) continue task = tasks_by_id.get(task_id) early_state = { 'name': task.identifier, 'job_logdir': job.logdir, 'job_unique_id': job.unique_id } if status == 'started': job.result.start_test(early_state) job.result_events_dispatcher.map_method( 'start_test', job.result, early_state) elif status == 'finished': this_task_data = self.status_repo.get_task_data(task_id) last_task_status = this_task_data[-1] test_state = {'status': last_task_status.get('result').upper()} test_state.update(early_state) time_start = this_task_data[0]['time'] time_end = last_task_status['time'] time_elapsed = time_end - time_start test_state['time_start'] = time_start test_state['time_end'] = time_end test_state['time_elapsed'] = time_elapsed # fake log dir, needed by some result plugins such as HTML test_state['logdir'] = '' base_path = os.path.join(job.logdir, 'test-results') self._populate_task_logdir(base_path, task, this_task_data, job.config.get('core.debug')) job.result.check_test(test_state) job.result_events_dispatcher.map_method( 'end_test', job.result, test_state) if not mapping[test_state['status']]: self.summary.add("FAIL") def run_suite(self, job, test_suite): # pylint: disable=W0201 self.summary = set() test_suite.tests, _ = nrunner.check_tasks_requirements( test_suite.tests) job.result.tests_total = test_suite.size # no support for variants yet listen = test_suite.config.get('nrunner.status_server_listen') self._start_status_server(listen) # pylint: disable=W0201 self.tasks = self._get_all_runtime_tasks(test_suite) if test_suite.config.get('nrunner.shuffle'): random.shuffle(self.tasks) tsm = TaskStateMachine(self.tasks) spawner_name = test_suite.config.get('nrunner.spawner') spawner = SpawnerDispatcher(test_suite.config)[spawner_name].obj max_running = min(test_suite.config.get('nrunner.max_parallel_tasks'), len(self.tasks)) workers = [ Worker(tsm, spawner, max_running=max_running).run() for _ in range(max_running) ] asyncio.ensure_future(self._update_status(job)) loop = asyncio.get_event_loop() try: loop.run_until_complete( asyncio.wait_for(asyncio.gather(*workers), job.timeout or None)) except (KeyboardInterrupt, asyncio.TimeoutError): self.summary.add("INTERRUPTED") # Wait until all messages may have been processed by the # status_updater. This should be replaced by a mechanism # that only waits if there are missing status messages to # be processed, and, only for a given amount of time. # Tests with non received status will always show as SKIP # because of result reconciliation. loop.run_until_complete(asyncio.sleep(0.05)) job.result.end_tests() self.status_server.close() return self.summary
class Runner(RunnerInterface): name = 'nrunner' description = 'nrunner based implementation of job compliant runner' @staticmethod def _get_requirements_runtime_tasks(runnable, prefix): if runnable.requirements is None: return # creates the runnables for the requirements requirements_runnables = RequirementsResolver.resolve(runnable) requirements_runtime_tasks = [] # creates the tasks and runtime tasks for the requirements for requirement_runnable in requirements_runnables: name = '%s-%s' % (requirement_runnable.kind, requirement_runnable.kwargs.get('name')) # the human UI works with TestID objects, so we need to # use it to name other tasks task_id = TestID(prefix, name, None) # with --dry-run we don't want to run requirement if runnable.kind == 'dry-run': requirement_runnable.kind = 'noop' # creates the requirement task requirement_task = nrunner.Task(requirement_runnable, identifier=task_id, category='requirement') # make sure we track the dependencies of a task # runtime_task.task.dependencies.add(requirement_task) # created the requirement runtime task requirements_runtime_tasks.append(RuntimeTask(requirement_task)) return requirements_runtime_tasks @staticmethod def _create_runtime_tasks_for_test(test_suite, runnable, no_digits, index, variant): """Creates runtime tasks for both tests, and for its requirements.""" result = [] # test related operations # create test ID if test_suite.name: prefix = "{}-{}".format(test_suite.name, index) else: prefix = index test_id = TestID(prefix, runnable.uri, variant, no_digits) # inject variant on runnable runnable.variant = dump_variant(variant) # handles the test task task = nrunner.Task( runnable, identifier=test_id, known_runners=nrunner.RUNNERS_REGISTRY_PYTHON_CLASS) runtime_task = RuntimeTask(task) result.append(runtime_task) # handles the requirements requirements_runtime_tasks = (Runner._get_requirements_runtime_tasks( runnable, prefix)) # extend the list of tasks with the requirements runtime tasks if requirements_runtime_tasks is not None: for requirement_runtime_task in requirements_runtime_tasks: # make sure we track the dependencies of a task runtime_task.task.dependencies.add( requirement_runtime_task.task) result.extend(requirements_runtime_tasks) return result @staticmethod def _get_all_runtime_tasks(test_suite): runtime_tasks = [] test_result_total = test_suite.variants.get_number_of_tests( test_suite.tests) no_digits = len(str(test_result_total)) # define execution order execution_order = test_suite.config.get('run.execution_order') if execution_order == "variants-per-test": test_variant = [(test, variant) for test in test_suite.tests for variant in test_suite.variants.itertests()] elif execution_order == "tests-per-variant": test_variant = [(test, variant) for variant in test_suite.variants.itertests() for test in test_suite.tests] # decide if a copy of the runnable is needed, in case of more # variants than tests copy_runnable = len(test_variant) > len(test_suite.tests) # create runtime tasks for index, (runnable, variant) in enumerate(test_variant, start=1): if copy_runnable: runnable = deepcopy(runnable) runtime_tasks.extend( Runner._create_runtime_tasks_for_test(test_suite, runnable, no_digits, index, variant)) return runtime_tasks def _start_status_server(self, status_server_listen): # pylint: disable=W0201 self.status_repo = StatusRepo() # pylint: disable=W0201 self.status_server = StatusServer(status_server_listen, self.status_repo) asyncio.ensure_future(self.status_server.serve_forever()) async def _update_status(self, job): tasks_by_id = { str(runtime_task.task.identifier): runtime_task.task for runtime_task in self.runtime_tasks } message_handler = MessageHandler() while True: try: (task_id, _, _, index) = \ self.status_repo.status_journal_summary.pop(0) except IndexError: await asyncio.sleep(0.05) continue message = self.status_repo.get_task_data(task_id, index) task = tasks_by_id.get(task_id) message_handler.process_message(message, task, job) def run_suite(self, job, test_suite): summary = set() test_suite.tests, _ = nrunner.check_runnables_runner_requirements( test_suite.tests) job.result.tests_total = test_suite.variants.get_number_of_tests( test_suite.tests) listen = test_suite.config.get('nrunner.status_server_listen') self._start_status_server(listen) # pylint: disable=W0201 self.runtime_tasks = self._get_all_runtime_tasks(test_suite) if test_suite.config.get('nrunner.shuffle'): random.shuffle(self.runtime_tasks) test_ids = [ rt.task.identifier for rt in self.runtime_tasks if rt.task.category == 'test' ] tsm = TaskStateMachine(self.runtime_tasks, self.status_repo) spawner_name = test_suite.config.get('nrunner.spawner') spawner = SpawnerDispatcher(test_suite.config)[spawner_name].obj max_running = min(test_suite.config.get('nrunner.max_parallel_tasks'), len(self.runtime_tasks)) timeout = test_suite.config.get('task.timeout.running') workers = [ Worker(state_machine=tsm, spawner=spawner, max_running=max_running, task_timeout=timeout).run() for _ in range(max_running) ] asyncio.ensure_future(self._update_status(job)) loop = asyncio.get_event_loop() try: loop.run_until_complete( asyncio.wait_for(asyncio.gather(*workers), job.timeout or None)) except (KeyboardInterrupt, asyncio.TimeoutError, TestFailFast) as ex: LOG_JOB.info(str(ex)) job.interrupted_reason = str(ex) summary.add("INTERRUPTED") # Wait until all messages may have been processed by the # status_updater. This should be replaced by a mechanism # that only waits if there are missing status messages to # be processed, and, only for a given amount of time. # Tests with non received status will always show as SKIP # because of result reconciliation. loop.run_until_complete(asyncio.sleep(0.05)) job.result.end_tests() self.status_server.close() # Update the overall summary with found test statuses, which will # determine the Avocado command line exit status summary.update([ status.upper() for status in self.status_repo.get_result_set_for_tasks(test_ids) ]) return summary
class Runner(RunnerInterface): name = 'nrunner' description = 'nrunner based implementation of job compliant runner' def _determine_status_server_uri(self, test_suite): # pylint: disable=W0201 self.status_server_dir = None if test_suite.config.get('nrunner.status_server_auto'): # no UNIX domain sockets on Windows if platform.system() != 'Windows': self.status_server_dir = tempfile.TemporaryDirectory( prefix='avocado_') return os.path.join(self.status_server_dir.name, '.status_server.sock') return test_suite.config.get('nrunner.status_server_listen') def _create_status_server(self, test_suite, job): listen = self._determine_status_server_uri(test_suite) # pylint: disable=W0201 self.status_repo = StatusRepo(job.unique_id) # pylint: disable=W0201 self.status_server = StatusServer(listen, self.status_repo) async def _update_status(self, job): tasks_by_id = { str(runtime_task.task.identifier): runtime_task.task for runtime_task in self.runtime_tasks } message_handler = MessageHandler() while True: try: (task_id, _, _, index) = \ self.status_repo.status_journal_summary.pop(0) except IndexError: await asyncio.sleep(0.05) continue message = self.status_repo.get_task_data(task_id, index) task = tasks_by_id.get(task_id) message_handler.process_message(message, task, job) @staticmethod def _abort_if_missing_runners(runnables): if runnables: missing_kinds = set([runnable.kind for runnable in runnables]) msg = ("Could not find runners for runnable(s) of kind(s): %s" % ", ".join(missing_kinds)) raise JobError(msg) def run_suite(self, job, test_suite): summary = set() if not test_suite.enabled: job.interrupted_reason = f"Suite {test_suite.name} is disabled." return summary test_suite.tests, missing_requirements = nrunner.check_runnables_runner_requirements( test_suite.tests) self._abort_if_missing_runners(missing_requirements) job.result.tests_total = test_suite.variants.get_number_of_tests( test_suite.tests) self._create_status_server(test_suite, job) graph = RuntimeTaskGraph(test_suite.get_test_variants(), test_suite.name, self.status_server.uri, job.unique_id) # pylint: disable=W0201 self.runtime_tasks = graph.get_tasks_in_topological_order() # Start the status server asyncio.ensure_future(self.status_server.serve_forever()) if test_suite.config.get('nrunner.shuffle'): random.shuffle(self.runtime_tasks) test_ids = [ rt.task.identifier for rt in self.runtime_tasks if rt.task.category == 'test' ] tsm = TaskStateMachine(self.runtime_tasks, self.status_repo) spawner_name = test_suite.config.get('nrunner.spawner') spawner = SpawnerDispatcher(test_suite.config, job)[spawner_name].obj max_running = min(test_suite.config.get('nrunner.max_parallel_tasks'), len(self.runtime_tasks)) timeout = test_suite.config.get('task.timeout.running') failfast = test_suite.config.get('run.failfast') workers = [ Worker(state_machine=tsm, spawner=spawner, max_running=max_running, task_timeout=timeout, failfast=failfast).run() for _ in range(max_running) ] asyncio.ensure_future(self._update_status(job)) loop = asyncio.get_event_loop() try: loop.run_until_complete( asyncio.wait_for(asyncio.gather(*workers), job.timeout or None)) except (KeyboardInterrupt, asyncio.TimeoutError, TestFailFast) as ex: LOG_JOB.info(str(ex)) job.interrupted_reason = str(ex) summary.add("INTERRUPTED") # Wait until all messages may have been processed by the # status_updater. This should be replaced by a mechanism # that only waits if there are missing status messages to # be processed, and, only for a given amount of time. # Tests with non received status will always show as SKIP # because of result reconciliation. loop.run_until_complete(asyncio.sleep(0.05)) job.result.end_tests() self.status_server.close() if self.status_server_dir is not None: self.status_server_dir.cleanup() # Update the overall summary with found test statuses, which will # determine the Avocado command line exit status summary.update([ status.upper() for status in self.status_repo.get_result_set_for_tasks(test_ids) ]) return summary
class Runner(RunnerInterface): name = 'nrunner' description = 'nrunner based implementation of job compliant runner' def _get_requirements_runtime_tasks(self, runnable, prefix, job_id): if runnable.requirements is None: return # creates the runnables for the requirements requirements_runnables = RequirementsResolver.resolve(runnable) requirements_runtime_tasks = [] # creates the tasks and runtime tasks for the requirements for requirement_runnable in requirements_runnables: name = '%s-%s' % (requirement_runnable.kind, requirement_runnable.kwargs.get('name')) # the human UI works with TestID objects, so we need to # use it to name other tasks task_id = TestID(prefix, name, None) # with --dry-run we don't want to run requirement if runnable.kind == 'dry-run': requirement_runnable.kind = 'noop' # creates the requirement task requirement_task = nrunner.Task(requirement_runnable, identifier=task_id, status_uris=[self.status_server.uri], category='requirement', job_id=job_id) # make sure we track the dependencies of a task # runtime_task.task.dependencies.add(requirement_task) # created the requirement runtime task requirements_runtime_tasks.append(RuntimeTask(requirement_task)) return requirements_runtime_tasks def _create_runtime_tasks_for_test(self, test_suite, runnable, no_digits, index, variant, job_id): """Creates runtime tasks for both tests, and for its requirements.""" result = [] # test related operations # create test ID if test_suite.name: prefix = "{}-{}".format(test_suite.name, index) else: prefix = index test_id = TestID(prefix, runnable.identifier, variant, no_digits) # inject variant on runnable runnable.variant = dump_variant(variant) # handles the test task task = nrunner.Task(runnable, identifier=test_id, known_runners=nrunner.RUNNERS_REGISTRY_PYTHON_CLASS, status_uris=[self.status_server.uri], job_id=job_id) runtime_task = RuntimeTask(task) result.append(runtime_task) # handles the requirements requirements_runtime_tasks = ( self._get_requirements_runtime_tasks(runnable, prefix, job_id)) # extend the list of tasks with the requirements runtime tasks if requirements_runtime_tasks is not None: for requirement_runtime_task in requirements_runtime_tasks: # make sure we track the dependencies of a task runtime_task.task.dependencies.add( requirement_runtime_task.task) result.extend(requirements_runtime_tasks) return result def _get_all_runtime_tasks(self, test_suite, job_id): runtime_tasks = [] test_result_total = test_suite.variants.get_number_of_tests(test_suite.tests) no_digits = len(str(test_result_total)) if test_suite.test_parameters: paths = ['/'] tree_nodes = TreeNode().get_node(paths[0], True) tree_nodes.value = test_suite.test_parameters variant = {"variant": tree_nodes, "variant_id": None, "paths": paths} test_variant = [(test, variant) for test in test_suite.tests] else: # let's use variants when parameters are not available # define execution order execution_order = test_suite.config.get('run.execution_order') if execution_order == "variants-per-test": test_variant = [(test, variant) for test in test_suite.tests for variant in test_suite.variants.itertests()] elif execution_order == "tests-per-variant": test_variant = [(test, variant) for variant in test_suite.variants.itertests() for test in test_suite.tests] # decide if a copy of the runnable is needed, in case of more # variants than tests copy_runnable = len(test_variant) > len(test_suite.tests) # create runtime tasks for index, (runnable, variant) in enumerate(test_variant, start=1): if copy_runnable: runnable = deepcopy(runnable) runtime_tasks.extend(self._create_runtime_tasks_for_test( test_suite, runnable, no_digits, index, variant, job_id)) return runtime_tasks def _determine_status_server_uri(self, test_suite): # pylint: disable=W0201 self.status_server_dir = None if test_suite.config.get('nrunner.status_server_auto'): # no UNIX domain sockets on Windows if platform.system() != 'Windows': self.status_server_dir = tempfile.TemporaryDirectory( prefix='avocado_') return os.path.join(self.status_server_dir.name, '.status_server.sock') return test_suite.config.get('nrunner.status_server_listen') def _create_status_server(self, test_suite, job): listen = self._determine_status_server_uri(test_suite) # pylint: disable=W0201 self.status_repo = StatusRepo(job.unique_id) # pylint: disable=W0201 self.status_server = StatusServer(listen, self.status_repo) async def _update_status(self, job): tasks_by_id = {str(runtime_task.task.identifier): runtime_task.task for runtime_task in self.runtime_tasks} message_handler = MessageHandler() while True: try: (task_id, _, _, index) = \ self.status_repo.status_journal_summary.pop(0) except IndexError: await asyncio.sleep(0.05) continue message = self.status_repo.get_task_data(task_id, index) task = tasks_by_id.get(task_id) message_handler.process_message(message, task, job) @staticmethod def _abort_if_missing_runners(runnables): if runnables: missing_kinds = set([runnable.kind for runnable in runnables]) msg = ("Could not find runners for runnable(s) of kind(s): %s" % ", ".join(missing_kinds)) raise JobError(msg) def run_suite(self, job, test_suite): summary = set() if not test_suite.enabled: job.interrupted_reason = f"Suite {test_suite.name} is disabled." return summary test_suite.tests, missing_requirements = nrunner.check_runnables_runner_requirements( test_suite.tests) self._abort_if_missing_runners(missing_requirements) job.result.tests_total = test_suite.variants.get_number_of_tests(test_suite.tests) self._create_status_server(test_suite, job) # pylint: disable=W0201 self.runtime_tasks = self._get_all_runtime_tasks(test_suite, job.unique_id) # Start the status server asyncio.ensure_future(self.status_server.serve_forever()) if test_suite.config.get('nrunner.shuffle'): random.shuffle(self.runtime_tasks) test_ids = [rt.task.identifier for rt in self.runtime_tasks if rt.task.category == 'test'] tsm = TaskStateMachine(self.runtime_tasks, self.status_repo) spawner_name = test_suite.config.get('nrunner.spawner') spawner = SpawnerDispatcher(test_suite.config)[spawner_name].obj spawner.job_output_dir = job.test_results_path max_running = min(test_suite.config.get('nrunner.max_parallel_tasks'), len(self.runtime_tasks)) timeout = test_suite.config.get('task.timeout.running') failfast = test_suite.config.get('run.failfast') workers = [Worker(state_machine=tsm, spawner=spawner, max_running=max_running, task_timeout=timeout, failfast=failfast).run() for _ in range(max_running)] asyncio.ensure_future(self._update_status(job)) loop = asyncio.get_event_loop() try: loop.run_until_complete(asyncio.wait_for(asyncio.gather(*workers), job.timeout or None)) except (KeyboardInterrupt, asyncio.TimeoutError, TestFailFast) as ex: LOG_JOB.info(str(ex)) job.interrupted_reason = str(ex) summary.add("INTERRUPTED") # Wait until all messages may have been processed by the # status_updater. This should be replaced by a mechanism # that only waits if there are missing status messages to # be processed, and, only for a given amount of time. # Tests with non received status will always show as SKIP # because of result reconciliation. loop.run_until_complete(asyncio.sleep(0.05)) job.result.end_tests() self.status_server.close() if self.status_server_dir is not None: self.status_server_dir.cleanup() # Update the overall summary with found test statuses, which will # determine the Avocado command line exit status summary.update([status.upper() for status in self.status_repo.get_result_set_for_tasks(test_ids)]) return summary
class CartesianRunner(RunnerInterface): """Test runner for Cartesian graph traversal.""" name = 'traverser' description = 'Runs tests through a Cartesian graph traversal' @property def all_tests_ok(self): """ Evaluate if all tests run under this runner have an ok status. :returns: whether all tests ended with acceptable status :rtype: bool """ mapped_status = { STATUSES_MAPPING[t["status"]] for t in self.job.result.tests } return all(mapped_status) def __init__(self): """Construct minimal attributes for the Cartesian runner.""" self.tasks = [] self.slots = [] self.status_repo = None self.status_server = None """running functionality""" async def _update_status(self, job): message_handler = MessageHandler() while True: try: (task_id, _, _, index) = \ self.status_repo.status_journal_summary.pop(0) except IndexError: await asyncio.sleep(0.05) continue message = self.status_repo.get_task_data(task_id, index) tasks_by_id = { str(runtime_task.task.identifier): runtime_task.task for runtime_task in self.tasks } task = tasks_by_id.get(task_id) message_handler.process_message(message, task, job) async def run_test(self, job, node): """ Run a test instance inside a subprocess. :param job: job that includes the test suite :type job: :py:class:`avocado.core.job.Job` :param node: test node to run :type node: :py:class:`TestNode` """ if node.spawner is None: default_slot = self.slots[0] if len(self.slots) > 0 else "" node.set_environment(job, default_slot) # once the slot is set (here or earlier), the hostname reflects it hostname = node.params["hostname"] hostname = "localhost" if not hostname else hostname logging.debug(f"Running {node.id} on {hostname}") if not self.status_repo: self.status_repo = StatusRepo(job.unique_id) self.status_server = StatusServer( job.config.get('nrunner.status_server_listen'), self.status_repo) asyncio.ensure_future(self.status_server.serve_forever()) # TODO: this needs more customization asyncio.ensure_future(self._update_status(job)) raw_task = nrunner.Task(node.get_runnable(), node.id_test, [job.config.get('nrunner.status_server_uri')], nrunner.RUNNERS_REGISTRY_PYTHON_CLASS, job_id=self.job.unique_id) task = RuntimeTask(raw_task) self.tasks += [task] # TODO: use a single state machine for all test nodes when we are able # to at least add requested tasks to it safely (using its locks) await Worker( state_machine=TaskStateMachine([task], self.status_repo), spawner=node.spawner, max_running=1, task_timeout=job.config.get('task.timeout.running')).run() async def run_test_node(self, node, can_retry=False): """ Run a node once, and optionally re-run it depending on the parameters. :param node: test node to run :type node: :py:class:`TestNode` :param bool can_retry: whether this node can be re-run :returns: run status of :py:meth:`run_test` :rtype: bool :raises: :py:class:`AssertionError` if the ran test node contains no objects The retry parameters are `retry_attempts` and `retry_stop`. The first is the maximum number of retries, and the second indicates when to stop retrying. The possible combinations of these values are: - `retry_stop = error`: retry until error or a maximum of `retry_attempts` number of times - `retry_stop = success`: retry until success or a maximum of `retry_attempts` number of times - `retry_stop = none`: retry a maximum of `retry_attempts` number of times Only tests with the status of pass, warning, error or failure will be retried. Other statuses will be ignored and the test will run only once. This method also works as a convenience wrapper around :py:meth:`run_test`, providing some default arguments. """ if node.is_objectless(): raise AssertionError( "Cannot run test nodes not using any test objects, here %s" % node) retry_stop = node.params.get("retry_stop", "none") # ignore the retry parameters for nodes that cannot be re-run (need to run at least once) runs_left = 1 + node.params.get_numeric("retry_attempts", 0) if can_retry else 1 # do not log when the user is not using the retry feature if runs_left > 1: logging.debug( f"Running test with retry_stop={retry_stop} and retry_attempts={runs_left}" ) assert runs_left >= 1, "retry_attempts cannot be less than zero" assert retry_stop in [ "none", "error", "success" ], "retry_stop must be one of 'none', 'error' or 'success'" original_prefix = node.prefix for r in range(runs_left): # appending a suffix to retries so we can tell them apart if r > 0: node.prefix = original_prefix + f"r{r}" uid = node.long_prefix name = node.params["name"] await self.run_test(self.job, node) try: test_result = next( (x for x in self.job.result.tests if x["name"].name == name and x["name"].uid == uid)) test_status = test_result["status"] except StopIteration: test_status = "ERROR" logging.info( "Test result wasn't found and cannot be extracted") if test_status not in ["PASS", "WARN", "ERROR", "FAIL"]: # it doesn't make sense to retry with other status logging.info( f"Will not attempt to retry test with status {test_status}" ) break if retry_stop == "success" and test_status in ["PASS", "WARN"]: logging.info("Stopping after first successful run") break if retry_stop == "error" and test_status in ["ERROR", "FAIL"]: logging.info("Stopping after first failed run") break node.prefix = original_prefix logging.info(f"Finished running test with status {test_status}") # no need to log when test was not repeated if runs_left > 1: logging.info(f"Finished running test {r+1} times") # FIX: as VT's retval is broken (always True), we fix its handling here if test_status in ["ERROR", "FAIL"]: return False else: return True def _run_available_children(self, node, graph, params): loop = asyncio.get_event_loop() # TODO: parallelize only leaf nodes with just this setup node as parent for now # but later on run together also internal nodes if they don't modify the same vm run_children = [ n for n in node.cleanup_nodes if len(n.setup_nodes) == 1 and len(n.cleanup_nodes) == 0 and n.should_run ] while len(run_children) > 0: current_nodes = run_children[:len(self.slots)] if len(current_nodes) == 0: raise ValueError("Not enough container run slots") logging.debug( "Traversal advance running in parallel the tests:\n%s", "\n".join([n.id for n in current_nodes])) for i, n in enumerate(current_nodes): current_nodes[i].set_environment(self.job, self.slots[i]) run_children.remove(current_nodes[i]) to_traverse = [ self._traverse_test_node(graph, n, params) for n in current_nodes ] loop.run_until_complete( asyncio.wait_for(asyncio.gather(*to_traverse), self.job.timeout or None)) def run_traversal(self, graph, params): """ Run all user and system defined tests optimizing the setup reuse and minimizing the repetition of demanded tests. :param graph: test graph to traverse :type graph: :py:class:`TestGraph` :param params: runtime parameters used for extra customization :type params: {str, str} :raises: :py:class:`AssertionError` if some traversal assertions are violated The highest priority is at the setup tests (parents) since the test cannot be run without the required setup, then the current test, then a single child of its children (DFS), and finally the other children (tests that can benefit from the fact that this test/setup was done) followed by the other siblings (tests benefiting from its parent/setup. Of course all possible children are restricted by the user-defined "only" and the number of internal test nodes is minimized for achieving this goal. """ shared_roots = graph.get_nodes_by("shared_root", "yes") assert len( shared_roots ) == 1, "There can be only exactly one starting node (shared root)" root = shared_roots[0] if log.getLogger('graph').level <= log.DEBUG: traverse_dir = os.path.join(self.job.logdir, "graph_traverse") if not os.path.exists(traverse_dir): os.makedirs(traverse_dir) step = 0 loop = asyncio.get_event_loop() traverse_path = [root] while not root.is_cleanup_ready(): next = traverse_path[-1] if len(traverse_path) > 1: previous = traverse_path[-2] else: # since the loop is discontinued if len(traverse_path) == 0 or root.is_cleanup_ready() # a valid current node with at least one child is guaranteed traverse_path.append(next.pick_next_child()) continue logging.debug( "At test node %s which is %sready with setup, %sready with cleanup," " should %srun, should %sbe cleaned, and %sbe scanned", next.params["shortname"], "not " if not next.is_setup_ready() else "", "not " if not next.is_cleanup_ready() else "", "not " if not next.should_run else "", "not " if not next.should_clean else "", "not " if not next.should_scan else "") logging.debug( "Current traverse path/stack:\n%s", "\n".join([n.params["shortname"] for n in traverse_path])) # if previous in path is the child of the next, then the path is reversed # looking for setup so if the next is setup ready and already run, remove # the previous' reference to it and pop the current next from the path if previous in next.cleanup_nodes or previous in next.visited_cleanup_nodes: if next.is_setup_ready(): loop.run_until_complete( self._traverse_test_node(graph, next, params)) previous.visit_node(next) traverse_path.pop() else: # inverse DFS traverse_path.append(next.pick_next_parent()) elif previous in next.setup_nodes or previous in next.visited_setup_nodes: # stop if test is not a setup leaf since parents have higher priority than children if not next.is_setup_ready(): traverse_path.append(next.pick_next_parent()) continue else: loop.run_until_complete( self._traverse_test_node(graph, next, params)) if next.is_cleanup_ready(): loop.run_until_complete( self._reverse_test_node(graph, next, params)) for setup in next.visited_setup_nodes: setup.visit_node(next) traverse_path.pop() graph.report_progress() else: # parallel pocket lookahead if next != root and len(self.slots) > 1: self._run_available_children(next, graph, params) graph.report_progress() # normal DFS traverse_path.append(next.pick_next_child()) else: raise AssertionError( "Discontinuous path in the test dependency graph detected") if log.getLogger('graph').level <= log.DEBUG: step += 1 graph.visualize(traverse_dir, step) def run_suite(self, job, test_suite): """ Run one or more tests and report with test result. :param job: job that includes the test suite :type test_suite: :py:class:`avocado.core.job.Job` :param test_suite: test suite with some tests to run :type test_suite: :py:class:`avocado.core.suite.TestSuite` :returns: a set with types of test failures :rtype: :py:class:`set` """ self.job = job self.status_repo = StatusRepo(job.unique_id) self.status_server = StatusServer( job.config.get('nrunner.status_server_listen'), self.status_repo) asyncio.ensure_future(self.status_server.serve_forever()) # TODO: this needs more customization asyncio.ensure_future(self._update_status(job)) graph = self._graph_from_suite(test_suite) summary = set() params = self.job.config["param_dict"] self.tasks = [] self.slots = params.get("slots", "").split(" ") # TODO: fix other run_traversal calls try: graph.visualize(self.job.logdir) self.run_traversal(graph, params) if not self.all_tests_ok: # the summary is a set so only a single failed test is enough summary.add('FAIL') except KeyboardInterrupt: summary.add('INTERRUPTED') # TODO: the avocado implementation needs a workaround here: # Wait until all messages may have been processed by the # status_updater. This should be replaced by a mechanism # that only waits if there are missing status messages to # be processed, and, only for a given amount of time. # Tests with non received status will always show as SKIP # because of result reconciliation. time.sleep(0.05) self.job.result.end_tests() self.job.funcatexit.run() self.status_server.close() signal.signal(signal.SIGTSTP, signal.SIG_IGN) return summary """custom nodes""" async def run_terminal_node(self, graph, object_name, params): """ Run the set of tests necessary for creating a given test object. :param graph: test graph to run create node from :type graph: :py:class:`TestGraph` :param str object_name: name of the test object to be created :param params: runtime parameters used for extra customization :type params: {str, str} :raises: :py:class:`NotImplementedError` if using incompatible installation variant The current implementation with implicit knowledge on the types of test objects internal spawns an original (otherwise unmodified) install test. """ object_suffix, object_variant = object_name.split( "-")[:1][0], "-".join(object_name.split("-")[1:]) object_image, object_vm = object_suffix.split("_") objects = graph.get_objects_by(param_val="^" + object_variant + "$", subset=graph.get_objects_by( "images", object_suffix.split("_")[0])) vms = [o for o in objects if o.key == "vms"] assert len( vms) == 1, "Test object %s's vm not existing or unique in: %s" % ( object_name, objects) test_object = objects[0] nodes = graph.get_nodes_by("object_root", object_name) assert len( nodes ) == 1, "There should exist one unique root for %s" % object_name test_node = nodes[0] if test_object.is_permanent( ) and not test_node.params.get_boolean("create_permanent_vm"): raise AssertionError( "Reached a permanent object root for %s due to incorrect setup" % test_object.suffix) logging.info("Configuring creation/installation for %s on %s", object_vm, object_image) setup_dict = test_node.params.copy() setup_dict.update({} if params is None else params.copy()) setup_dict.update({ "type": "shared_configure_install", "check_mode": "rr", # explicit root handling # overwrite some params inherited from the modified install node f"set_state_images_{object_image}_{object_vm}": "root", "start_vm": "no" }) install_config = test_object.config.get_copy() install_config.parse_next_batch(base_file="sets.cfg", ovrwrt_file=param.tests_ovrwrt_file(), ovrwrt_str=param.re_str("all..noop"), ovrwrt_dict=setup_dict) status = await self.run_test_node( TestNode("0t", install_config, test_node.objects[0])) if not status: logging.error("Could not configure the installation for %s on %s", object_vm, object_image) return status logging.info("Installing virtual machine %s", test_object.suffix) test_node.params["type"] = test_node.params["configure_install"] return await self.run_test_node(test_node) """internals""" async def _traverse_test_node(self, graph, test_node, params): """Run a single test according to user defined policy and state availability.""" if test_node.should_scan: test_node.scan_states() test_node.should_scan = False if test_node.should_run: # the primary setup nodes need special treatment if params.get("dry_run", "no") == "yes": logging.info("Running a dry %s", test_node.params["shortname"]) elif test_node.is_scan_node(): logging.debug("Test run started from the shared root") elif test_node.is_object_root(): status = await self.run_terminal_node( graph, test_node.params["object_root"], params) if not status: logging.error("Could not perform the installation from %s", test_node) else: # finally, good old running of an actual test status = await self.run_test_node(test_node, can_retry=True) if not status: logging.error("Got nonzero status from the test %s", test_node) for test_object in test_node.objects: object_params = test_object.object_typed_params( test_node.params) # if a state was set it is final and the retrieved state was overwritten object_state = object_params.get( "set_state", object_params.get("get_state")) if object_state is not None and object_state != "": test_object.current_state = object_state test_node.should_run = False else: logging.debug("Skipping test %s", test_node.params["shortname"]) async def _reverse_test_node(self, graph, test_node, params): """ Clean up any states that could be created by this node (will be skipped by default but the states can be removed with "unset_mode=f."). """ if test_node.should_clean: if params.get("dry_run", "no") == "yes": logging.info("Cleaning a dry %s", test_node.params["shortname"]) elif test_node.is_shared_root(): logging.debug("Test run ended at the shared root") elif test_node.produces_setup(): setup_dict = {} if params is None else params.copy() setup_dict["vm_action"] = "unset" setup_dict["vms"] = test_node.params["vms"] # the cleanup will be performed if at least one selected object has a cleanable state has_selected_object_setup = False for test_object in test_node.objects: object_params = test_object.object_typed_params( test_node.params) object_state = object_params.get("set_state") if not object_state: continue # avoid running any test unless the user really requires cleanup and such is needed if object_params.get("unset_mode", "ri")[0] != "f": continue # avoid running any test for unselected vms if test_object.key == "nets": logging.warning("Net state cleanup is not supported") continue vm_name = test_object.suffix if test_object.key == "vms" else test_object.composites[ 0].suffix if vm_name in params.get("vms", param.all_objects("vms")): has_selected_object_setup = True else: continue # TODO: cannot remove ad-hoc root states, is this even needed? if test_object.key == "vms": vm_params = object_params setup_dict["images_" + vm_name] = vm_params["images"] for image_name in vm_params.objects("images"): image_params = vm_params.object_params(image_name) setup_dict[ f"image_name_{image_name}_{vm_name}"] = image_params[ "image_name"] setup_dict[ f"image_format_{image_name}_{vm_name}"] = image_params[ "image_format"] if image_params.get_boolean("create_image", False): setup_dict[ f"remove_image_{image_name}_{vm_name}"] = "yes" setup_dict["skip_image_processing"] = "no" # reverse the state setup for the given test object unset_suffixes = f"_{test_object.key}_{test_object.suffix}" unset_suffixes += f"_{vm_name}" if test_object.key == "images" else "" # NOTE: we are forcing the unset_mode to be the one defined for the test node because # the unset manual step behaves differently now (all this extra complexity starts from # the fact that it has different default value which is noninvasive setup_dict.update({ f"unset_state{unset_suffixes}": object_state, f"unset_mode{unset_suffixes}": object_params.get("unset_mode", "ri") }) if has_selected_object_setup: logging.info("Cleaning up %s", test_node) setup_str = param.re_str("all..internal..manage.unchanged") net = test_node.objects[0] forward_config = net.config.get_copy() forward_config.parse_next_batch( base_file="sets.cfg", ovrwrt_file=param.tests_ovrwrt_file(), ovrwrt_str=setup_str, ovrwrt_dict=setup_dict) await self.run_test_node( TestNode(test_node.prefix + "c", forward_config, net)) else: logging.info("No need to clean up %s", test_node) else: logging.debug("The test %s should not be cleaned up", test_node.params["shortname"]) def _graph_from_suite(self, test_suite): """ Restore a Cartesian graph from the digested list of test object factories. """ # HACK: pass the constructed graph to the runner using static attribute hack # since the currently digested test suite contains factory arguments obtained # from an irreversible (information destructive) approach graph = TestGraph.REFERENCE # validate the test suite refers to the same test graph assert len(test_suite) == len(graph.nodes) for node1, node2 in zip(test_suite.tests, graph.nodes): assert node1.uri == node2.get_runnable().uri return graph
class Runner(RunnerInterface): name = 'nrunner' description = 'nrunner based implementation of job compliant runner' @staticmethod def _get_all_runtime_tasks(test_suite): runtime_tasks = [] no_digits = len(str(len(test_suite))) status_uris = [test_suite.config.get('nrunner.status_server_uri')] for index, runnable in enumerate(test_suite.tests, start=1): # this is all rubbish data if test_suite.name: prefix = "{}-{}".format(test_suite.name, index) else: prefix = index test_id = TestID(prefix, runnable.uri, None, no_digits) task = nrunner.Task(runnable, test_id, status_uris, nrunner.RUNNERS_REGISTRY_PYTHON_CLASS) runtime_tasks.append(RuntimeTask(task)) return runtime_tasks def _start_status_server(self, status_server_listen): # pylint: disable=W0201 self.status_repo = StatusRepo() # pylint: disable=W0201 self.status_server = StatusServer(status_server_listen, self.status_repo) asyncio.ensure_future(self.status_server.serve_forever()) async def _update_status(self, job): tasks_by_id = { str(runtime_task.task.identifier): runtime_task.task for runtime_task in self.tasks } message_handler = MessageHandler() while True: try: (task_id, _, _, index) = \ self.status_repo.status_journal_summary.pop(0) except IndexError: await asyncio.sleep(0.05) continue message = self.status_repo.get_task_data(task_id, index) task = tasks_by_id.get(task_id) message_handler.process_message(message, task, job) def run_suite(self, job, test_suite): # pylint: disable=W0201 self.summary = set() test_suite.tests, _ = nrunner.check_runnables_runner_requirements( test_suite.tests) job.result.tests_total = test_suite.size # no support for variants yet listen = test_suite.config.get('nrunner.status_server_listen') self._start_status_server(listen) # pylint: disable=W0201 self.tasks = self._get_all_runtime_tasks(test_suite) if test_suite.config.get('nrunner.shuffle'): random.shuffle(self.tasks) tsm = TaskStateMachine(self.tasks) spawner_name = test_suite.config.get('nrunner.spawner') spawner = SpawnerDispatcher(test_suite.config)[spawner_name].obj max_running = min(test_suite.config.get('nrunner.max_parallel_tasks'), len(self.tasks)) timeout = test_suite.config.get('task.timeout.running') workers = [ Worker(state_machine=tsm, spawner=spawner, max_running=max_running, task_timeout=timeout).run() for _ in range(max_running) ] asyncio.ensure_future(self._update_status(job)) loop = asyncio.get_event_loop() try: loop.run_until_complete( asyncio.wait_for(asyncio.gather(*workers), job.timeout or None)) except (KeyboardInterrupt, asyncio.TimeoutError): self.summary.add("INTERRUPTED") # Wait until all messages may have been processed by the # status_updater. This should be replaced by a mechanism # that only waits if there are missing status messages to # be processed, and, only for a given amount of time. # Tests with non received status will always show as SKIP # because of result reconciliation. loop.run_until_complete(asyncio.sleep(0.05)) job.result.end_tests() self.status_server.close() return self.summary