def store_raw_results_stage(cfg, ctx): if os.path.exists(cfg.raw_results): cont = yaml_load(open(cfg.raw_results).read()) else: cont = [] cont.extend(utils.yamable(ctx.results).items()) raw_data = pretty_yaml.dumps(cont) with open(cfg.raw_results, "w") as fd: fd.write(raw_data)
def save_nodes_stage(cfg, ctx): cluster = {} for node in ctx.nodes: roles = node.roles[:] if "testnode" in roles: roles.remove("testnode") if len(roles) != 0: cluster[node.conn_url] = roles with open(cfg.nodes_report_file, "w") as fd: fd.write(pretty_yaml.dumps(cluster))
def save_nodes_stage(cfg, ctx): cluster = {} for node in ctx.nodes: roles = node.roles[:] if 'testnode' in roles: roles.remove('testnode') if len(roles) != 0: cluster[node.conn_url] = roles with open(cfg.nodes_report_file, "w") as fd: fd.write(pretty_yaml.dumps(cluster))
def main(argv): if faulthandler is not None: faulthandler.register(signal.SIGUSR1, all_threads=True) opts = parse_args(argv) stages = [] report_stages = [] ctx = Context() ctx.results = {} ctx.sensors_data = SensorDatastore() if opts.subparser_name == 'test': cfg = load_config(opts.config_file) make_storage_dir_struct(cfg) cfg.comment = opts.comment save_run_params(cfg) with open(cfg.saved_config_file, 'w') as fd: fd.write(pretty_yaml.dumps(cfg.__dict__)) stages = [ run_test.discover_stage ] stages.extend([ run_test.reuse_vms_stage, log_nodes_statistic_stage, run_test.save_nodes_stage, run_test.connect_stage]) if cfg.settings.get('collect_info', True): stages.append(run_test.collect_hw_info_stage) stages.extend([ # deploy_sensors_stage, run_test.run_tests_stage, run_test.store_raw_results_stage, # gather_sensors_stage ]) cfg.keep_vm = opts.keep_vm cfg.no_tests = opts.no_tests cfg.dont_discover_nodes = opts.dont_discover_nodes ctx.build_meta['build_id'] = opts.build_id ctx.build_meta['build_descrption'] = opts.build_description ctx.build_meta['build_type'] = opts.build_type elif opts.subparser_name == 'ls': list_results(opts.result_storage) return 0 elif opts.subparser_name == 'report': cfg = load_config(get_test_files(opts.data_dir)['saved_config_file']) stages.append(run_test.load_data_from(opts.data_dir)) opts.no_report = False # load build meta elif opts.subparser_name == 'compare': x = run_test.load_data_from_path(opts.data_path1) y = run_test.load_data_from_path(opts.data_path2) print(run_test.IOPerfTest.format_diff_for_console( [x['io'][0], y['io'][0]])) return 0 if not opts.no_report: report_stages.append(run_test.console_report_stage) if opts.load_report: report_stages.append(run_test.test_load_report_stage) report_stages.append(run_test.html_report_stage) if opts.log_level is not None: str_level = opts.log_level else: str_level = cfg.settings.get('log_level', 'INFO') setup_loggers(getattr(logging, str_level), cfg.log_file) logger.info("All info would be stored into " + cfg.results_dir) for stage in stages: ok = False with log_stage(stage): stage(cfg, ctx) ok = True if not ok: break exc, cls, tb = sys.exc_info() for stage in ctx.clear_calls_stack[::-1]: with log_stage(stage): stage(cfg, ctx) logger.debug("Start utils.cleanup") for clean_func, args, kwargs in utils.iter_clean_func(): with log_stage(clean_func): clean_func(*args, **kwargs) if exc is None: for report_stage in report_stages: with log_stage(report_stage): report_stage(cfg, ctx) logger.info("All info stored into " + cfg.results_dir) if exc is None: logger.info("Tests finished successfully") return 0 else: logger.error("Tests are failed. See detailed error above") return 1
def run(self): logger.debug("Run preparation") self.pre_run() self.show_test_execution_time() tname = os.path.basename(self.config_fname) if tname.endswith('.cfg'): tname = tname[:-4] barrier = Barrier(len(self.config.nodes)) results = [] # set of Operation_Mode_BlockSize str's # which should not be tested anymore, as # they already too slow with previous thread count lat_bw_limit_reached = set() with ThreadPoolExecutor(len(self.config.nodes)) as pool: for pos, fio_cfg in enumerate(self.fio_configs): test_descr = get_test_summary(fio_cfg.vals).split("th")[0] if test_descr in lat_bw_limit_reached: continue else: logger.info("Will run {0} test".format(fio_cfg.name)) templ = "Test should takes about {0}." + \ " Should finish at {1}," + \ " will wait at most till {2}" exec_time = execution_time(fio_cfg) exec_time_str = sec_to_str(exec_time) timeout = int(exec_time + max(300, exec_time)) now_dt = datetime.datetime.now() end_dt = now_dt + datetime.timedelta(0, exec_time) wait_till = now_dt + datetime.timedelta(0, timeout) logger.info( templ.format(exec_time_str, end_dt.strftime("%H:%M:%S"), wait_till.strftime("%H:%M:%S"))) func = functools.partial(self.do_run, barrier=barrier, fio_cfg=fio_cfg, pos=pos) max_retr = 3 for idx in range(max_retr): try: intervals = list(pool.map(func, self.config.nodes)) if None not in intervals: break except (EnvironmentError, SSHException) as exc: logger.exception("During fio run") if idx == max_retr - 1: raise StopTestError("Fio failed", exc) logger.info("Reconnectiong, sleeping %ss and retrying", self.retry_time) wait([ pool.submit(node.connection.close) for node in self.config.nodes ]) time.sleep(self.retry_time) wait([ pool.submit(reconnect, node.connection, node.conn_url) for node in self.config.nodes ]) fname = "{0}_task.fio".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(str(fio_cfg)) params = {'vm_count': len(self.config.nodes)} params['name'] = fio_cfg.name params['vals'] = dict(fio_cfg.vals.items()) params['intervals'] = intervals params['nodes'] = [ node.get_conn_id() for node in self.config.nodes ] fname = "{0}_params.yaml".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(dumps(params)) res = load_test_results(self.config.log_directory, pos) results.append(res) if self.max_latency is not None: lat_50, _ = res.get_lat_perc_50_95_multy() # conver us to ms if self.max_latency < lat_50: logger.info( ("Will skip all subsequent tests of {0} " + "due to lat/bw limits").format(fio_cfg.name)) lat_bw_limit_reached.add(test_descr) test_res = res.get_params_from_fio_report() if self.min_bw_per_thread is not None: if self.min_bw_per_thread > average(test_res['bw']): lat_bw_limit_reached.add(test_descr) return IOTestResults(self.config.params['cfg'], results, self.config.log_directory)
def run(self): logger.debug("Run preparation") self.pre_run() self.show_test_execution_time() tname = os.path.basename(self.config_fname) if tname.endswith('.cfg'): tname = tname[:-4] barrier = Barrier(len(self.config.nodes)) results = [] # set of Operation_Mode_BlockSize str's # which should not be tested anymore, as # they already too slow with previous thread count lat_bw_limit_reached = set() with ThreadPoolExecutor(len(self.config.nodes)) as pool: for pos, fio_cfg in enumerate(self.fio_configs): test_descr = get_test_summary(fio_cfg.vals).split("th")[0] if test_descr in lat_bw_limit_reached: continue else: logger.info("Will run {0} test".format(fio_cfg.name)) templ = "Test should takes about {0}." + \ " Should finish at {1}," + \ " will wait at most till {2}" exec_time = execution_time(fio_cfg) exec_time_str = sec_to_str(exec_time) timeout = int(exec_time + max(300, exec_time)) now_dt = datetime.datetime.now() end_dt = now_dt + datetime.timedelta(0, exec_time) wait_till = now_dt + datetime.timedelta(0, timeout) logger.info(templ.format(exec_time_str, end_dt.strftime("%H:%M:%S"), wait_till.strftime("%H:%M:%S"))) func = functools.partial(self.do_run, barrier=barrier, fio_cfg=fio_cfg, pos=pos) max_retr = 3 for idx in range(max_retr): try: intervals = list(pool.map(func, self.config.nodes)) break except (EnvironmentError, SSHException) as exc: logger.exception("During fio run") if idx == max_retr - 1: raise StopTestError("Fio failed", exc) logger.info("Sleeping 30s and retrying") time.sleep(30) fname = "{0}_task.fio".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(str(fio_cfg)) params = {'vm_count': len(self.config.nodes)} params['name'] = fio_cfg.name params['vals'] = dict(fio_cfg.vals.items()) params['intervals'] = intervals params['nodes'] = [node.get_conn_id() for node in self.config.nodes] fname = "{0}_params.yaml".format(pos) with open(os.path.join(self.config.log_directory, fname), "w") as fd: fd.write(dumps(params)) res = load_test_results(self.config.log_directory, pos) results.append(res) if self.max_latency is not None: lat_50, _ = res.get_lat_perc_50_95_multy() # conver us to ms if self.max_latency < lat_50: logger.info(("Will skip all subsequent tests of {0} " + "due to lat/bw limits").format(fio_cfg.name)) lat_bw_limit_reached.add(test_descr) test_res = res.get_params_from_fio_report() if self.min_bw_per_thread is not None: if self.min_bw_per_thread > average(test_res['bw']): lat_bw_limit_reached.add(test_descr) return IOTestResults(self.config.params['cfg'], results, self.config.log_directory)
def main(argv): if faulthandler is not None: faulthandler.register(signal.SIGUSR1, all_threads=True) opts = parse_args(argv) stages = [] report_stages = [] ctx = Context() ctx.results = {} ctx.sensors_data = SensorDatastore() if opts.subparser_name == 'test': cfg = load_config(opts.config_file) make_storage_dir_struct(cfg) cfg.comment = opts.comment save_run_params(cfg) with open(cfg.saved_config_file, 'w') as fd: fd.write(pretty_yaml.dumps(cfg.__dict__)) stages = [run_test.discover_stage] stages.extend([ run_test.reuse_vms_stage, log_nodes_statistic_stage, run_test.save_nodes_stage, run_test.connect_stage ]) if cfg.settings.get('collect_info', True): stages.append(run_test.collect_hw_info_stage) stages.extend([ # deploy_sensors_stage, run_test.run_tests_stage, run_test.store_raw_results_stage, # gather_sensors_stage ]) cfg.keep_vm = opts.keep_vm cfg.no_tests = opts.no_tests cfg.dont_discover_nodes = opts.dont_discover_nodes ctx.build_meta['build_id'] = opts.build_id ctx.build_meta['build_descrption'] = opts.build_description ctx.build_meta['build_type'] = opts.build_type elif opts.subparser_name == 'ls': list_results(opts.result_storage) return 0 elif opts.subparser_name == 'report': cfg = load_config(get_test_files(opts.data_dir)['saved_config_file']) stages.append(run_test.load_data_from(opts.data_dir)) opts.no_report = False # load build meta elif opts.subparser_name == 'compare': x = run_test.load_data_from_path(opts.data_path1) y = run_test.load_data_from_path(opts.data_path2) print( run_test.IOPerfTest.format_diff_for_console( [x['io'][0], y['io'][0]])) return 0 if not opts.no_report: report_stages.append(run_test.console_report_stage) if opts.load_report: report_stages.append(run_test.test_load_report_stage) report_stages.append(run_test.html_report_stage) if opts.log_level is not None: str_level = opts.log_level else: str_level = cfg.settings.get('log_level', 'INFO') setup_loggers(getattr(logging, str_level), cfg.log_file) logger.info("All info would be stored into " + cfg.results_dir) for stage in stages: ok = False with log_stage(stage): stage(cfg, ctx) ok = True if not ok: break exc, cls, tb = sys.exc_info() for stage in ctx.clear_calls_stack[::-1]: with log_stage(stage): stage(cfg, ctx) logger.debug("Start utils.cleanup") for clean_func, args, kwargs in utils.iter_clean_func(): with log_stage(clean_func): clean_func(*args, **kwargs) if exc is None: for report_stage in report_stages: with log_stage(report_stage): report_stage(cfg, ctx) logger.info("All info stored into " + cfg.results_dir) if exc is None: logger.info("Tests finished successfully") return 0 else: logger.error("Tests are failed. See detailed error above") return 1