def _report_unschedulable(self, unschedulable, err_msg=None): if not unschedulable: return self._log( logging.ERROR, f"There are {len(unschedulable)} tests which cannot be run due to insufficient cluster resources" ) for tc in unschedulable: if err_msg: msg = err_msg else: msg = f"Test {tc.test_id} requires more resources than are available in the whole cluster. " \ f"{self.cluster.all().nodes.attempt_remove_spec(tc.expected_cluster_spec)}" self._log(logging.ERROR, msg) result = TestResult(tc, self.test_counter, self.session_context, test_status=FAIL, summary=msg, start_time=time.time(), stop_time=time.time()) self.results.append(result) result.report() self.test_counter += 1
def run_all_tests(self): self.results.start_time = time.time() self.log(logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self.log(logging.INFO, "running %d tests..." % len(self.tests)) for test_num, test_context in enumerate(self.tests, 1): # Create single testable unit and corresponding test result object self.current_test_context = test_context self.current_test = test_context.cls(test_context) result = TestResult(self.current_test_context) # Run the test unit result.start_time = time.time() self.log(logging.INFO, "running test %d of %d" % (test_num, len(self.tests))) try: self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") result.data = self.run_single_test() self.log(logging.INFO, "PASS") except BaseException as e: self.log(logging.INFO, "FAIL") result.success = False result.summary += str(e.message) + "\n" + traceback.format_exc(limit=16) self.stop_testing = self.session_context.exit_first or isinstance(e, KeyboardInterrupt) finally: if not self.session_context.no_teardown: self.log(logging.INFO, "tearing down") self.teardown_single_test() result.stop_time = time.time() self.results.append(result) test_reporter = SingleResultFileReporter(result) test_reporter.report() test_reporter = SingleResultStdoutReporter(result) test_reporter.report() self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results
def run_all_tests(self): self.results.start_time = time.time() for test in self.tests: # Create single testable unit and corresponding test result object self.current_test_context, self.current_test = create_test_case(test, self.session_context) result = TestResult(self.current_test_context, self.current_test_context.test_name) # Run the test unit try: self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") result.start_time = time.time() result.data = self.run_single_test() self.log(logging.INFO, "PASS") except BaseException as e: self.log(logging.INFO, "FAIL") result.success = False result.summary += e.message + "\n" + traceback.format_exc(limit=16) self.stop_testing = self.session_context.exit_first or isinstance(e, KeyboardInterrupt) finally: self.log(logging.INFO, "tearing down") self.teardown_single_test() result.stop_time = time.time() self.results.append(result) self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results
def run_all_tests(self): self.results.start_time = time.time() self.log( logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self.log(logging.INFO, "running %d tests..." % len(self.tests)) for test_num, test_context in enumerate(self.tests, 1): # Create single testable unit and corresponding test result object self.current_test_context = test_context self.current_test = test_context.cls(test_context) result = TestResult(self.current_test_context) # Run the test unit result.start_time = time.time() self.log(logging.INFO, "running test %d of %d" % (test_num, len(self.tests))) try: self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") result.data = self.run_single_test() self.log(logging.INFO, "PASS") except BaseException as e: self.log(logging.INFO, "FAIL") result.success = False result.summary += e.message + "\n" + traceback.format_exc( limit=16) self.stop_testing = self.session_context.exit_first or isinstance( e, KeyboardInterrupt) finally: if not self.session_context.no_teardown: self.log(logging.INFO, "tearing down") self.teardown_single_test() result.stop_time = time.time() self.results.append(result) test_reporter = SingleResultFileReporter(result) test_reporter.report() test_reporter = SingleResultStdoutReporter(result) test_reporter.report() self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results
def run_all_tests(self): self.results.start_time = time.time() for test in self.tests: # Create single testable unit and corresponding test result object self.current_test_context, self.current_test = create_test_case( test, self.session_context) result = TestResult(self.current_test_context, self.current_test_context.test_name) # Run the test unit try: self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") result.start_time = time.time() result.data = self.run_single_test() self.log(logging.INFO, "PASS") except BaseException as e: self.log(logging.INFO, "FAIL") result.success = False result.summary += e.message + "\n" + traceback.format_exc( limit=16) self.stop_testing = self.session_context.exit_first or isinstance( e, KeyboardInterrupt) finally: self.log(logging.INFO, "tearing down") self.teardown_single_test() result.stop_time = time.time() self.results.append(result) self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results
def run_all_tests(self): self.receiver.start() self.results.start_time = time.time() # Report tests which cannot be run if len(self.scheduler.unschedulable) > 0: self._log( logging.ERROR, "There are %d tests which cannot be run due to insufficient cluster resources" % len(self.scheduler.unschedulable)) for tc in self.scheduler.unschedulable: msg = "Test %s expects more nodes than are available in the entire cluster: " % tc.test_id msg += "expected_num_nodes: %s, " % str(tc.expected_node_spec) msg += "cluster size: %s." % str(self.cluster.node_spec) self._log(logging.ERROR, msg) result = TestResult(tc, self.test_counter, self.session_context, test_status=FAIL, summary=msg, start_time=time.time(), stop_time=time.time()) self.results.append(result) result.report() self.test_counter += 1 # Run the tests! self._log( logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self._log(logging.INFO, "running %d tests..." % len(self.scheduler)) while self._ready_to_trigger_more_tests or self._expect_client_requests: try: while self._ready_to_trigger_more_tests: next_test_context = self.scheduler.next() self._preallocate_subcluster(next_test_context) self._run_single_test(next_test_context) if self._expect_client_requests: try: event = self.receiver.recv() self._handle(event) except Exception as e: err_str = "Exception receiving message: %s: %s" % (str( type(e)), str(e)) err_str += "\n" + traceback.format_exc(limit=16) self._log(logging.ERROR, err_str) # All processes are on the same machine, so treat communication failure as a fatal error raise except KeyboardInterrupt: # If SIGINT is received, stop triggering new tests, and let the currently running tests finish self._log( logging.INFO, "Received KeyboardInterrupt. Now waiting for currently running tests to finish..." ) self.stop_testing = True for proc in self._client_procs.values(): proc.join() self.receiver.close() return self.results
def run(self): self.log(logging.INFO, "Loading test %s" % str(self.test_metadata)) self.test_context = self._collect_test_context(**self.test_metadata) self.test_context.test_index = self.test_index self.send(self.message.running()) if self.test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.test_context, self.test_index, self.session_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) result.report() # Tell the server we are finished self.send(self.message.finished(result=result)) return # Results from this test, as well as logs will be dumped here mkdir_p(TestContext.results_dir(self.test_context, self.test_index)) start_time = -1 stop_time = -1 test_status = PASS summary = "" data = None try: # Instantiate test self.test = self.test_context.cls(self.test_context) self.log(logging.DEBUG, "Checking if there are enough nodes...") min_cluster_spec = self.test.min_cluster_spec() # Check test resource msg = self.cluster.all().nodes.attempt_remove_spec(min_cluster_spec) if len(msg) > 0: raise RuntimeError("There are not enough nodes available in the cluster to run this test. " + msg) # Run the test unit start_time = time.time() self.setup_test() data = self.run_test() test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: err_trace = str(e) + "\n" + traceback.format_exc(limit=16) self.log(logging.INFO, "FAIL: " + err_trace) test_status = FAIL summary += err_trace finally: self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status) stop_time = time.time() if hasattr(self, "services"): service_errors = self.test_context.services.errors() if service_errors: summary += "\n\n" + service_errors result = TestResult( self.test_context, self.test_index, self.session_context, test_status, summary, data, start_time, stop_time) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) result.report() # Tell the server we are finished self._do_safely(lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:") # Release test_context resources only after creating the result and finishing logging activity # The Sender object uses the same logger, so we postpone closing until after the finished message is sent self.test_context.close() self.test_context = None self.test = None
def run_all_tests(self): self.results.start_time = time.time() self.log( logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self.log(logging.INFO, "running %d tests..." % len(self.tests)) for test_num, test_context in enumerate(self.tests, 1): if len(self.cluster) != self.cluster.num_available_nodes(): # Sanity check - are we leaking cluster nodes? raise RuntimeError( "Expected all nodes to be available. Instead, %d of %d are available" % (self.cluster.num_available_nodes(), len(self.cluster))) self.current_test_context = test_context result = TestResult(self.current_test_context) if self.current_test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result.test_status = IGNORE result.start_time = time.time() result.stop_time = result.start_time self.results.append(result) self.log(logging.INFO, "Ignoring, and moving to next test...") continue # Results from this test, as well as logs will be dumped here mkdir_p(self.current_test_context.results_dir) try: # Instantiate test self.current_test = test_context.cls(test_context) # Run the test unit result.start_time = time.time() self.log(logging.INFO, "test %d of %d" % (test_num, len(self.tests))) self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") result.data = self.run_single_test() result.test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: self.log(logging.INFO, "FAIL") result.test_status = FAIL result.summary += str( e.message) + "\n" + traceback.format_exc(limit=16) self.stop_testing = self.session_context.exit_first or isinstance( e, KeyboardInterrupt) finally: self.teardown_single_test( teardown_services=not self.session_context.no_teardown) result.stop_time = time.time() self.results.append(result) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) if test_num < len(self.tests): terminal_width, y = get_terminal_size() print "~" * int(2 * terminal_width / 3) test_reporter = SingleResultFileReporter(result) test_reporter.report() self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results
def run(self): self.log(logging.INFO, "Loading test %s" % str(self.test_metadata)) self.test_context = self._collect_test_context(**self.test_metadata) self.test_context.test_index = self.test_index self.send(self.message.running()) if self.test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.test_context, self.test_index, self.session_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) result.report() # Tell the server we are finished self.send(self.message.finished(result=result)) return # Results from this test, as well as logs will be dumped here mkdir_p(TestContext.results_dir(self.test_context, self.test_index)) start_time = -1 stop_time = -1 test_status = PASS summary = "" data = None try: # Instantiate test self.test = self.test_context.cls(self.test_context) self.log(logging.DEBUG, "Checking if there are enough nodes...") for (operating_system, node_count) in self.test.min_cluster_size().iteritems(): if node_count > self.cluster.num_nodes_for_operating_system( operating_system): raise RuntimeError( "There are not enough nodes available in the cluster to run this test. " "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" % (operating_system, self.cluster.num_nodes_for_operating_system( operating_system), node_count, self.test_context.services)) # Run the test unit start_time = time.time() self.setup_test() data = self.run_test() test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: err_trace = str(e.message) + "\n" + traceback.format_exc(limit=16) self.log(logging.INFO, "FAIL: " + err_trace) test_status = FAIL summary += err_trace finally: self.teardown_test( teardown_services=not self.session_context.no_teardown, test_status=test_status) stop_time = time.time() result = TestResult(self.test_context, self.test_index, self.session_context, test_status, summary, data, start_time, stop_time) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) result.report() # Tell the server we are finished self._do_safely( lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:") # Release test_context resources only after creating the result and finishing logging activity # The Sender object uses the same logger, so we postpone closing until after the finished message is sent self.test_context.close() self.test_context = None self.test = None
def run_all_tests(self): self.receiver.start() self.results.start_time = time.time() # Report tests which cannot be run if len(self.scheduler.unschedulable) > 0: self._log(logging.ERROR, "There are %d tests which cannot be run due to insufficient cluster resources" % len(self.scheduler.unschedulable)) for tc in self.scheduler.unschedulable: msg = "Test %s requires more resources than are available in the whole cluster. " % tc.test_id msg += self.cluster.all().nodes.attempt_remove_spec(tc.expected_cluster_spec) self._log(logging.ERROR, msg) result = TestResult( tc, self.test_counter, self.session_context, test_status=FAIL, summary=msg, start_time=time.time(), stop_time=time.time()) self.results.append(result) result.report() self.test_counter += 1 # Run the tests! self._log(logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self._log(logging.INFO, "running %d tests..." % len(self.scheduler)) while self._ready_to_trigger_more_tests or self._expect_client_requests: try: while self._ready_to_trigger_more_tests: next_test_context = self.scheduler.next() self._preallocate_subcluster(next_test_context) self._run_single_test(next_test_context) if self._expect_client_requests: try: event = self.receiver.recv() self._handle(event) except Exception as e: err_str = "Exception receiving message: %s: %s" % (str(type(e)), str(e)) err_str += "\n" + traceback.format_exc(limit=16) self._log(logging.ERROR, err_str) # All processes are on the same machine, so treat communication failure as a fatal error raise except KeyboardInterrupt: # If SIGINT is received, stop triggering new tests, and let the currently running tests finish self._log(logging.INFO, "Received KeyboardInterrupt. Now waiting for currently running tests to finish...") self.stop_testing = True for proc in self._client_procs.values(): proc.join() self.receiver.close() return self.results
def run(self): self.log(logging.INFO, "Loading test %s" % str(self.test_metadata)) self.test_context = self._collect_test_context(**self.test_metadata) self.test_context.test_index = self.test_index self.send(self.message.running()) if self.test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.test_context, self.test_index, self.session_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) result.report() # Tell the server we are finished self.send(self.message.finished(result=result)) return # Results from this test, as well as logs will be dumped here mkdir_p(TestContext.results_dir(self.test_context, self.test_index)) start_time = -1 stop_time = -1 test_status = PASS summary = "" data = None try: # Instantiate test self.test = self.test_context.cls(self.test_context) self.log(logging.DEBUG, "Checking if there are enough nodes...") min_cluster_spec = self.test.min_cluster_spec() os_to_num_nodes = {} for node_spec in min_cluster_spec: if not os_to_num_nodes.get(node_spec.operating_system): os_to_num_nodes[node_spec.operating_system] = 1 else: os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1 for (operating_system, node_count) in os_to_num_nodes.iteritems(): num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system))) if node_count > num_avail: raise RuntimeError( "There are not enough nodes available in the cluster to run this test. " "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" % (operating_system, num_avail, node_count, self.test_context.services)) # Run the test unit start_time = time.time() self.setup_test() data = self.run_test() test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: err_trace = str(e.message) + "\n" + traceback.format_exc(limit=16) self.log(logging.INFO, "FAIL: " + err_trace) test_status = FAIL summary += err_trace finally: self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status) stop_time = time.time() service_errors = self.test_context.services.errors() if service_errors: summary += "\n\n" + service_errors result = TestResult( self.test_context, self.test_index, self.session_context, test_status, summary, data, start_time, stop_time) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) result.report() # Tell the server we are finished self._do_safely(lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:") # Release test_context resources only after creating the result and finishing logging activity # The Sender object uses the same logger, so we postpone closing until after the finished message is sent self.test_context.close() self.test_context = None self.test = None
def run(self): self.log(logging.INFO, "Loading test %s" % str(self.test_metadata)) self.test_context = self._collect_test_context(**self.test_metadata) self.test_context.test_index = self.test_index self.send(self.message.running()) if self.test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.test_context, self.test_index, self.session_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) result.report() # Tell the server we are finished self.send(self.message.finished(result=result)) return start_time = -1 stop_time = -1 test_status = FAIL summary = [] data = None self.all_services = ServiceRegistry() num_runs = 0 try: while test_status == FAIL and num_runs < self.deflake_num: num_runs += 1 self.log(logging.INFO, "on run {}/{}".format(num_runs, self.deflake_num)) start_time = time.time() test_status, summary, data = self._do_run(num_runs) if test_status == PASS and num_runs > 1: test_status = FLAKY msg = str(test_status.to_json()) if summary: msg += ": {}".format(summary) if num_runs != self.deflake_num: msg += "\n" + "~" * max(len(line) for line in summary.split('\n')) self.log(logging.INFO, msg) finally: stop_time = time.time() test_status, summary = self._check_cluster_utilization(test_status, summary) if num_runs > 1: # for reporting purposes report all services self.test_context.services = self.all_services # for flaky tests, we report the start and end time of the successful run, and not the whole run period result = TestResult( self.test_context, self.test_index, self.session_context, test_status, summary, data, start_time, stop_time) self.log(logging.INFO, "Data: %s" % str(result.data)) result.report() # Tell the server we are finished self._do_safely(lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message for " + str(self.test_metadata) + ":\n") # Release test_context resources only after creating the result and finishing logging activity # The Sender object uses the same logger, so we postpone closing until after the finished message is sent self.test_context.close() self.all_services = None self.test_context = None self.test = None
def run_all_tests(self): self.results.start_time = time.time() self.log(logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self.log(logging.INFO, "running %d tests..." % len(self.tests)) for test_num, test_context in enumerate(self.tests, 1): if len(self.cluster) != self.cluster.num_available_nodes(): # Sanity check - are we leaking cluster nodes? raise RuntimeError( "Expected all nodes to be available. Instead, %d of %d are available" % (self.cluster.num_available_nodes(), len(self.cluster))) self.current_test_context = test_context result = TestResult(self.current_test_context) if self.current_test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result.test_status = IGNORE result.start_time = time.time() result.stop_time = result.start_time self.results.append(result) self.log(logging.INFO, "Ignoring, and moving to next test...") continue # Results from this test, as well as logs will be dumped here mkdir_p(self.current_test_context.results_dir) try: # Instantiate test self.current_test = test_context.cls(test_context) # Run the test unit result.start_time = time.time() self.log(logging.INFO, "test %d of %d" % (test_num, len(self.tests))) self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") result.data = self.run_single_test() result.test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: self.log(logging.INFO, "FAIL") result.test_status = FAIL result.summary += str(e.message) + "\n" + traceback.format_exc(limit=16) self.stop_testing = self.session_context.exit_first or isinstance(e, KeyboardInterrupt) finally: self.teardown_single_test(teardown_services=not self.session_context.no_teardown) result.stop_time = time.time() self.results.append(result) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) if test_num < len(self.tests): terminal_width, y = get_terminal_size() print "~" * int(2 * terminal_width / 3) test_reporter = SingleResultFileReporter(result) test_reporter.report() self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results