def configure_logger(self): """Set up the logger to log to stdout and files. This creates a directory and a few files as a side-effect. """ if self.configured: return self._logger.setLevel(logging.DEBUG) mkdir_p(self.log_dir) # Create info and debug level handlers to pipe to log files info_fh = logging.FileHandler( os.path.join(self.log_dir, "test_log.info")) debug_fh = logging.FileHandler( os.path.join(self.log_dir, "test_log.debug")) info_fh.setLevel(logging.INFO) debug_fh.setLevel(logging.DEBUG) formatter = logging.Formatter(ConsoleDefaults.TEST_LOG_FORMATTER) info_fh.setFormatter(formatter) debug_fh.setFormatter(formatter) self._logger.addHandler(info_fh) self._logger.addHandler(debug_fh) ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) if self.debug: # If debug flag is set, pipe debug logs to stdout ch.setLevel(logging.DEBUG) else: # default - pipe warning level logging to stdout ch.setLevel(logging.WARNING) self._logger.addHandler(ch)
def setup_results_directory(new_results_dir): """Make directory in which results will be stored""" if os.path.exists(new_results_dir): raise Exception( "A file or directory at %s already exists. Exiting without overwriting." % new_results_dir) mkdir_p(new_results_dir)
def __init__(self, session_context, module=None, cls=None, function=None, injected_args=None): """ :type session_context: ducktape.tests.session.SessionContext """ self.module = module self.cls = cls self.function = function self.injected_args = injected_args self.session_context = session_context self.services = ServiceRegistry() # dict for toggling service log collection on/off self.log_collect = {} # Individual test results go here mkdir_p(self.results_dir) self._logger_configured = False self.configure_logger()
def copy_service_logs(self): """Copy logs from service nodes to the results directory.""" for service in self.test_context.services: if not hasattr(service, 'logs') or len(service.logs) == 0: self.test_context.logger.debug("Won't collect service logs from %s - no logs to collect." % service.__class__.__name__) return log_dirs = service.logs for node in service.nodes: # Gather locations of logs to collect node_logs = [] for log_name in log_dirs.keys(): if self.should_collect_log(log_name, service): node_logs.append(log_dirs[log_name]["path"]) if len(node_logs) > 0: # Create directory into which service logs will be copied dest = os.path.join( self.test_context.results_dir, service.__class__.__name__, node.account.hostname) if not os.path.isdir(dest): mkdir_p(dest) # Try to copy the service logs try: node.account.scp_from(node_logs, dest, recursive=True) except Exception as e: self.test_context.logger.warn( "Error copying log %(log_name)s from %(source)s to %(dest)s. \ service %(service)s: %(message)s" % {'log_name': log_name, 'source': log_dirs[log_name], 'dest': dest, 'service': service, 'message': e.message})
def configure_logger(self): if self._logger_configured: raise RuntimeError("test logger should only be configured once.") self.logger.setLevel(logging.DEBUG) mkdir_p(self.results_dir) # Create info and debug level handlers to pipe to log files info_fh = logging.FileHandler(os.path.join(self.results_dir, "test_log.info")) debug_fh = logging.FileHandler(os.path.join(self.results_dir, "test_log.debug")) info_fh.setLevel(logging.INFO) debug_fh.setLevel(logging.DEBUG) formatter = logging.Formatter(ConsoleConfig.TEST_LOG_FORMATTER) info_fh.setFormatter(formatter) debug_fh.setFormatter(formatter) self.logger.addHandler(info_fh) self.logger.addHandler(debug_fh) # If debug flag is set, pipe verbose test logging to stdout if self.session_context.debug: ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) ch.setFormatter(formatter) self.logger.addHandler(ch) self._logger_configured = True
def report(self): if not os.path.exists(self.results_dir): mkdir_p(self.results_dir) self.dump_json() test_reporter = SingleResultFileReporter(self) test_reporter.report()
def configure_logger(self): """Set up the logger to log to stdout and files. This creates a directory and a few files as a side-effect. """ if self._logger_configured: raise RuntimeError("test logger should only be configured once.") self._logger.setLevel(logging.DEBUG) mkdir_p(self.results_dir) # Create info and debug level handlers to pipe to log files info_fh = logging.FileHandler( os.path.join(self.results_dir, "test_log.info")) debug_fh = logging.FileHandler( os.path.join(self.results_dir, "test_log.debug")) info_fh.setLevel(logging.INFO) debug_fh.setLevel(logging.DEBUG) formatter = logging.Formatter(ConsoleConfig.TEST_LOG_FORMATTER) info_fh.setFormatter(formatter) debug_fh.setFormatter(formatter) self._logger.addHandler(info_fh) self._logger.addHandler(debug_fh) ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) if self.session_context.debug: # If debug flag is set, pipe verbose test logging to stdout ch.setLevel(logging.DEBUG) else: # default - pipe warning level logging to stdout ch.setLevel(logging.WARNING) self._logger.addHandler(ch)
def configure_logger(self): """Set up the logger to log to stdout and files. This creates a directory and a few files as a side-effect. """ if self._logger_configured: raise RuntimeError("test logger should only be configured once.") self._logger.setLevel(logging.DEBUG) mkdir_p(self.results_dir) # Create info and debug level handlers to pipe to log files info_fh = logging.FileHandler(os.path.join(self.results_dir, "test_log.info")) debug_fh = logging.FileHandler(os.path.join(self.results_dir, "test_log.debug")) info_fh.setLevel(logging.INFO) debug_fh.setLevel(logging.DEBUG) formatter = logging.Formatter(ConsoleDefaults.TEST_LOG_FORMATTER) info_fh.setFormatter(formatter) debug_fh.setFormatter(formatter) self._logger.addHandler(info_fh) self._logger.addHandler(debug_fh) ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) if self.session_context.debug: # If debug flag is set, pipe verbose test logging to stdout ch.setLevel(logging.DEBUG) else: # default - pipe warning level logging to stdout ch.setLevel(logging.WARNING) self._logger.addHandler(ch)
def copy_service_logs(self): """Copy logs from service nodes to the results directory.""" for service in self.test_context.services: if not hasattr(service, 'logs') or len(service.logs) == 0: self.test_context.logger.debug("Won't collect service logs from %s - no logs to collect." % service.service_id) continue log_dirs = service.logs for node in service.nodes: # Gather locations of logs to collect node_logs = [] for log_name in log_dirs.keys(): if self.should_collect_log(log_name, service): node_logs.append(log_dirs[log_name]["path"]) if len(node_logs) > 0: # Create directory into which service logs will be copied dest = os.path.join( self.test_context.results_dir, service.service_id, node.account.hostname) if not os.path.isdir(dest): mkdir_p(dest) # Try to copy the service logs try: node.account.scp_from(node_logs, dest, recursive=True) except Exception as e: self.test_context.logger.warn( "Error copying log %(log_name)s from %(source)s to %(dest)s. \ service %(service)s: %(message)s" % {'log_name': log_name, 'source': log_dirs[log_name], 'dest': dest, 'service': service, 'message': e.message})
def copy_service_logs(self, test_status): """ Copy logs from service nodes to the results directory. If the test passed, only the default set will be collected. If the the test failed, all logs will be collected. """ for service in self.test_context.services: if not hasattr(service, 'logs') or len(service.logs) == 0: self.test_context.logger.debug( "Won't collect service logs from %s - no logs to collect." % service.service_id) continue log_dirs = service.logs for node in service.nodes: # Gather locations of logs to collect node_logs = [] for log_name in log_dirs.keys(): if test_status == FAIL or self.should_collect_log( log_name, service): node_logs.append(log_dirs[log_name]["path"]) self.test_context.logger.debug( "Preparing to copy logs from %s: %s" % (node.account.hostname, node_logs)) if self.test_context.session_context.compress: self.test_context.logger.debug("Compressing logs...") node_logs = self.compress_service_logs( node, service, node_logs) if len(node_logs) > 0: # Create directory into which service logs will be copied dest = os.path.join( TestContext.results_dir(self.test_context, self.test_context.test_index), service.service_id, node.account.hostname) if not os.path.isdir(dest): mkdir_p(dest) # Try to copy the service logs self.test_context.logger.debug("Copying logs...") try: for log in node_logs: node.account.copy_from(log, dest) except Exception as e: self.test_context.logger.warn( "Error copying log %(log_name)s from %(source)s to %(dest)s. \ service %(service)s: %(message)s" % { 'log_name': log_name, 'source': log_dirs[log_name], 'dest': dest, 'service': service, 'message': e })
def setup_results_directory(results_dir, session_id): """Make directory in which results will be stored""" if os.path.isdir(results_dir): raise Exception( "A test results directory with session id %s already exists. Exiting without overwriting..." % session_id) mkdir_p(results_dir) latest_test_dir = os.path.join(ConsoleConfig.RESULTS_ROOT_DIRECTORY, "latest") if os.path.exists(latest_test_dir): os.unlink(latest_test_dir) os.symlink(results_dir, latest_test_dir)
def setup_results_directory(results_root, new_results_dir): """Make directory in which results will be stored""" if os.path.exists(new_results_dir): raise Exception( "A file or directory at %s already exists. Exiting without overwriting." % new_results_dir) mkdir_p(new_results_dir) # Create or update symlink "latest" which points to the new test results directory latest_test_dir = os.path.join(results_root, "latest") if os.path.islink(latest_test_dir): os.unlink(latest_test_dir) os.symlink(new_results_dir, latest_test_dir)
def copy_service_logs(self, test_status): """ Copy logs from service nodes to the results directory. If the test passed, only the default set will be collected. If the the test failed, all logs will be collected. """ for service in self.test_context.services: if not hasattr(service, 'logs') or len(service.logs) == 0: self.test_context.logger.debug("Won't collect service logs from %s - no logs to collect." % service.service_id) continue log_dirs = service.logs for node in service.nodes: # Gather locations of logs to collect node_logs = [] for log_name in log_dirs.keys(): if test_status == FAIL or self.should_collect_log(log_name, service): node_logs.append(log_dirs[log_name]["path"]) self.test_context.logger.debug("Preparing to copy logs from %s: %s" % (node.account.hostname, node_logs)) if self.test_context.session_context.compress: self.test_context.logger.debug("Compressing logs...") node_logs = self.compress_service_logs(node, service, node_logs) if len(node_logs) > 0: # Create directory into which service logs will be copied dest = os.path.join( TestContext.results_dir(self.test_context, self.test_context.test_index), service.service_id, node.account.hostname) if not os.path.isdir(dest): mkdir_p(dest) # Try to copy the service logs self.test_context.logger.debug("Copying logs...") try: for log in node_logs: node.account.copy_from(log, dest) except Exception as e: self.test_context.logger.warn( "Error copying log %(log_name)s from %(source)s to %(dest)s. \ service %(service)s: %(message)s" % {'log_name': log_name, 'source': log_dirs[log_name], 'dest': dest, 'service': service, 'message': e.message})
def _do_run(self, num_runs): test_status = FAIL summary = [] data = None sid_factory = MultiRunServiceIdFactory(num_runs) if self.deflake_num > 1 else service_id_factory try: # Results from this test, as well as logs will be dumped here mkdir_p(TestContext.results_dir(self.test_context, self.test_index)) # Instantiate test self.test = self.test_context.cls(self.test_context) # Check if there are enough nodes self._check_min_cluster_spec() # Run the test unit self.setup_test() data = self.run_test() test_status = PASS except BaseException as e: # mark the test as failed before doing anything else test_status = FAIL err_trace = self._exc_msg(e) summary.append(err_trace) finally: for service in self.test_context.services: service.service_id_factory = sid_factory self.all_services.append(service) self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status) if hasattr(self.test_context, "services"): service_errors = self.test_context.services.errors() if service_errors: summary.extend(["\n\n", service_errors]) # free nodes if self.test: self.log(logging.DEBUG, "Freeing nodes...") self._do_safely(self.test.free_nodes, "Error freeing nodes:") return test_status, "".join(summary), data
def __init__(self, session_context, module=None, cls=None, function=None, config=None): """ :type session_context: ducktape.tests.session.SessionContext """ self.module = module self.cls = cls self.function = function self.config = config self.session_context = session_context self.cluster = session_context.cluster self.services = ServiceRegistry() # dict for toggling service log collection on/off self.log_collect = {} self.results_dir = self.session_context.results_dir if self.cls is not None: self.results_dir = os.path.join(self.results_dir, self.cls.__name__) if self.function is not None: self.results_dir = os.path.join(self.results_dir, self.function.__name__) mkdir_p(self.results_dir) self._logger_configured = False self.configure_logger()
def run(self): self.log(logging.INFO, "Loading test %s" % str(self.test_metadata)) self.test_context = self._collect_test_context(**self.test_metadata) self.test_context.test_index = self.test_index self.send(self.message.running()) if self.test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.test_context, self.test_index, self.session_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) result.report() # Tell the server we are finished self.send(self.message.finished(result=result)) return # Results from this test, as well as logs will be dumped here mkdir_p(TestContext.results_dir(self.test_context, self.test_index)) start_time = -1 stop_time = -1 test_status = PASS summary = "" data = None try: # Instantiate test self.test = self.test_context.cls(self.test_context) self.log(logging.DEBUG, "Checking if there are enough nodes...") min_cluster_spec = self.test.min_cluster_spec() os_to_num_nodes = {} for node_spec in min_cluster_spec: if not os_to_num_nodes.get(node_spec.operating_system): os_to_num_nodes[node_spec.operating_system] = 1 else: os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1 for (operating_system, node_count) in os_to_num_nodes.iteritems(): num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system))) if node_count > num_avail: raise RuntimeError( "There are not enough nodes available in the cluster to run this test. " "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" % (operating_system, num_avail, node_count, self.test_context.services)) # Run the test unit start_time = time.time() self.setup_test() data = self.run_test() test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: err_trace = str(e.message) + "\n" + traceback.format_exc(limit=16) self.log(logging.INFO, "FAIL: " + err_trace) test_status = FAIL summary += err_trace finally: self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status) stop_time = time.time() service_errors = self.test_context.services.errors() if service_errors: summary += "\n\n" + service_errors result = TestResult( self.test_context, self.test_index, self.session_context, test_status, summary, data, start_time, stop_time) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) result.report() # Tell the server we are finished self._do_safely(lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:") # Release test_context resources only after creating the result and finishing logging activity # The Sender object uses the same logger, so we postpone closing until after the finished message is sent self.test_context.close() self.test_context = None self.test = None
def run_all_tests(self): self.results.start_time = time.time() self.log(logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self.log(logging.INFO, "running %d tests..." % len(self.tests)) for test_num, test_context in enumerate(self.tests, 1): if len(self.cluster) != self.cluster.num_available_nodes(): # Sanity check - are we leaking cluster nodes? raise RuntimeError( "Expected all nodes to be available. Instead, %d of %d are available" % (self.cluster.num_available_nodes(), len(self.cluster))) self.current_test_context = test_context if self.current_test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.current_test_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) self.results.append(result) self.log(logging.INFO, "Ignoring, and moving to next test...") continue # Results from this test, as well as logs will be dumped here mkdir_p(self.current_test_context.results_dir) start_time = -1 stop_time = -1 test_status = PASS summary = "" data = None try: # Instantiate test self.current_test = test_context.cls(test_context) # Run the test unit start_time = time.time() self.log(logging.INFO, "test %d of %d" % (test_num, len(self.tests))) self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") data = self.run_single_test() test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: err_trace = str(e.message) + "\n" + traceback.format_exc(limit=16) self.log(logging.INFO, "FAIL: " + err_trace) test_status = FAIL summary += err_trace self.stop_testing = self.session_context.exit_first or isinstance(e, KeyboardInterrupt) finally: self.teardown_single_test(teardown_services=not self.session_context.no_teardown) stop_time = time.time() result = TestResult(self.current_test_context, test_status, summary, data, start_time, stop_time) self.results.append(result) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) if test_num < len(self.tests): terminal_width, y = get_terminal_size() print "~" * int(2 * terminal_width / 3) test_reporter = SingleResultFileReporter(result) test_reporter.report() self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results
def run(self): self.log(logging.INFO, "Loading test %s" % str(self.test_metadata)) self.test_context = self._collect_test_context(**self.test_metadata) self.test_context.test_index = self.test_index self.send(self.message.running()) if self.test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.test_context, self.test_index, self.session_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) result.report() # Tell the server we are finished self.send(self.message.finished(result=result)) return # Results from this test, as well as logs will be dumped here mkdir_p(TestContext.results_dir(self.test_context, self.test_index)) start_time = -1 stop_time = -1 test_status = PASS summary = "" data = None try: # Instantiate test self.test = self.test_context.cls(self.test_context) self.log(logging.DEBUG, "Checking if there are enough nodes...") min_cluster_spec = self.test.min_cluster_spec() # Check test resource msg = self.cluster.all().nodes.attempt_remove_spec(min_cluster_spec) if len(msg) > 0: raise RuntimeError("There are not enough nodes available in the cluster to run this test. " + msg) # Run the test unit start_time = time.time() self.setup_test() data = self.run_test() test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: err_trace = str(e) + "\n" + traceback.format_exc(limit=16) self.log(logging.INFO, "FAIL: " + err_trace) test_status = FAIL summary += err_trace finally: self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status) stop_time = time.time() if hasattr(self, "services"): service_errors = self.test_context.services.errors() if service_errors: summary += "\n\n" + service_errors result = TestResult( self.test_context, self.test_index, self.session_context, test_status, summary, data, start_time, stop_time) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) result.report() # Tell the server we are finished self._do_safely(lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:") # Release test_context resources only after creating the result and finishing logging activity # The Sender object uses the same logger, so we postpone closing until after the finished message is sent self.test_context.close() self.test_context = None self.test = None
def run(self): self.log(logging.INFO, "Loading test %s" % str(self.test_metadata)) self.test_context = self._collect_test_context(**self.test_metadata) self.test_context.test_index = self.test_index self.send(self.message.running()) if self.test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result = TestResult(self.test_context, self.test_index, self.session_context, test_status=IGNORE, start_time=time.time(), stop_time=time.time()) result.report() # Tell the server we are finished self.send(self.message.finished(result=result)) return # Results from this test, as well as logs will be dumped here mkdir_p(TestContext.results_dir(self.test_context, self.test_index)) start_time = -1 stop_time = -1 test_status = PASS summary = "" data = None try: # Instantiate test self.test = self.test_context.cls(self.test_context) self.log(logging.DEBUG, "Checking if there are enough nodes...") for (operating_system, node_count) in self.test.min_cluster_size().iteritems(): if node_count > self.cluster.num_nodes_for_operating_system( operating_system): raise RuntimeError( "There are not enough nodes available in the cluster to run this test. " "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" % (operating_system, self.cluster.num_nodes_for_operating_system( operating_system), node_count, self.test_context.services)) # Run the test unit start_time = time.time() self.setup_test() data = self.run_test() test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: err_trace = str(e.message) + "\n" + traceback.format_exc(limit=16) self.log(logging.INFO, "FAIL: " + err_trace) test_status = FAIL summary += err_trace finally: self.teardown_test( teardown_services=not self.session_context.no_teardown, test_status=test_status) stop_time = time.time() result = TestResult(self.test_context, self.test_index, self.session_context, test_status, summary, data, start_time, stop_time) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) result.report() # Tell the server we are finished self._do_safely( lambda: self.send(self.message.finished(result=result)), "Problem sending FINISHED message:") # Release test_context resources only after creating the result and finishing logging activity # The Sender object uses the same logger, so we postpone closing until after the finished message is sent self.test_context.close() self.test_context = None self.test = None
def run_all_tests(self): self.results.start_time = time.time() self.log( logging.INFO, "starting test run with session id %s..." % self.session_context.session_id) self.log(logging.INFO, "running %d tests..." % len(self.tests)) for test_num, test_context in enumerate(self.tests, 1): if len(self.cluster) != self.cluster.num_available_nodes(): # Sanity check - are we leaking cluster nodes? raise RuntimeError( "Expected all nodes to be available. Instead, %d of %d are available" % (self.cluster.num_available_nodes(), len(self.cluster))) self.current_test_context = test_context result = TestResult(self.current_test_context) if self.current_test_context.ignore: # Skip running this test, but keep track of the fact that we ignored it result.test_status = IGNORE result.start_time = time.time() result.stop_time = result.start_time self.results.append(result) self.log(logging.INFO, "Ignoring, and moving to next test...") continue # Results from this test, as well as logs will be dumped here mkdir_p(self.current_test_context.results_dir) try: # Instantiate test self.current_test = test_context.cls(test_context) # Run the test unit result.start_time = time.time() self.log(logging.INFO, "test %d of %d" % (test_num, len(self.tests))) self.log(logging.INFO, "setting up") self.setup_single_test() self.log(logging.INFO, "running") result.data = self.run_single_test() result.test_status = PASS self.log(logging.INFO, "PASS") except BaseException as e: self.log(logging.INFO, "FAIL") result.test_status = FAIL result.summary += str( e.message) + "\n" + traceback.format_exc(limit=16) self.stop_testing = self.session_context.exit_first or isinstance( e, KeyboardInterrupt) finally: self.teardown_single_test( teardown_services=not self.session_context.no_teardown) result.stop_time = time.time() self.results.append(result) self.log(logging.INFO, "Summary: %s" % str(result.summary)) self.log(logging.INFO, "Data: %s" % str(result.data)) if test_num < len(self.tests): terminal_width, y = get_terminal_size() print "~" * int(2 * terminal_width / 3) test_reporter = SingleResultFileReporter(result) test_reporter.report() self.current_test_context, self.current_test = None, None if self.stop_testing: break self.results.stop_time = time.time() return self.results