def test_without_errors(self): event = KclStressEvent.failure(node=[], stress_cmd="c-s", log_file_name="1.log") self.assertEqual(event.severity, Severity.ERROR) self.assertEqual(event.node, "[]") self.assertEqual(event.stress_cmd, "c-s") self.assertEqual(event.log_file_name, "1.log") self.assertIsNone(event.errors) self.assertEqual(str(event), "(KclStressEvent Severity.ERROR): type=failure node=[]\nstress_cmd=c-s") self.assertEqual(event, pickle.loads(pickle.dumps(event)))
def test_with_errors(self): event = KclStressEvent.failure(node="node1", errors=["e1", "e2"]) self.assertEqual(event.severity, Severity.ERROR) self.assertEqual(event.node, "node1") self.assertIsNone(event.stress_cmd) self.assertIsNone(event.log_file_name) self.assertEqual(event.errors, ["e1", "e2"]) self.assertEqual( str(event), "(KclStressEvent Severity.ERROR): type=failure node=node1\nstress_cmd=None\nerrors:\n\ne1\ne2" ) self.assertEqual(event, pickle.loads(pickle.dumps(event)))
def test_without_errors(self): event = KclStressEvent.failure(node=[], stress_cmd="c-s", log_file_name="1.log") self.assertEqual(event.severity, Severity.ERROR) self.assertEqual(event.node, "[]") self.assertEqual(event.stress_cmd, "c-s") self.assertEqual(event.log_file_name, "1.log") self.assertIsNone(event.errors) event.event_id = "1acd4202-3a38-4b0d-9464-62f4825ee148" self.assertEqual(str(event), "(KclStressEvent Severity.ERROR) period_type=not-set " "event_id=1acd4202-3a38-4b0d-9464-62f4825ee148: type=failure node=[]\nstress_cmd=c-s") self.assertEqual(event, pickle.loads(pickle.dumps(event)))
def _run_stress(self, loader, loader_idx, cpu_idx): KclStressEvent.start(node=loader, stress_cmd=self.stress_cmd).publish() try: options_str = self.stress_cmd.replace('table_compare', '').strip() options = dict(item.strip().split("=") for item in options_str.split(";")) interval = int(options.get('interval', 20)) src_table = options.get('src_table') dst_table = options.get('dst_table') while not self._stop_event.is_set(): node: BaseNode = self.db_node_to_query(loader) node.run_nodetool('flush') src_size = node.get_cfstats(src_table)['Number of partitions (estimate)'] dst_size = node.get_cfstats(dst_table)['Number of partitions (estimate)'] status = f"== CompareTablesSizesThread: dst table/src table number of partitions: {dst_size}/{src_size} ==" LOGGER.info(status) InfoEvent(status) if src_size == 0: continue if dst_size >= src_size: InfoEvent("== CompareTablesSizesThread: Done ==") break time.sleep(interval) return None except Exception as exc: # pylint: disable=broad-except errors_str = format_stress_cmd_error(exc) KclStressEvent.failure(node=loader, stress_cmd=self.stress_cmd, errors=[errors_str, ]).publish() raise finally: KclStressEvent.finish(node=loader).publish()
def test_with_errors(self): event = KclStressEvent.failure(node="node1", errors=["e1", "e2"]) self.assertEqual(event.severity, Severity.ERROR) self.assertEqual(event.node, "node1") self.assertIsNone(event.stress_cmd) self.assertIsNone(event.log_file_name) self.assertEqual(event.errors, ["e1", "e2"]) event.event_id = "d169ca02-c119-49f2-9eb7-23f152098cb7" self.assertEqual( str(event), "(KclStressEvent Severity.ERROR) period_type=not-set event_id=d169ca02-c119-49f2-9eb7-23f152098cb7: " "type=failure node=node1\nerrors:\n\ne1\ne2") self.assertEqual(event, pickle.loads(pickle.dumps(event)))
def _run_stress(self, loader, loader_idx, cpu_idx): docker = RemoteDocker( loader, "scylladb/hydra-loaders:kcl-jdk8-20210215", extra_docker_opts=f'--label shell_marker={self.shell_marker}') stress_cmd = self.build_stress_cmd() if not os.path.exists(loader.logdir): os.makedirs(loader.logdir, exist_ok=True) log_file_name = os.path.join( loader.logdir, 'kcl-l%s-c%s-%s.log' % (loader_idx, cpu_idx, uuid.uuid4())) LOGGER.debug('kcl-stress local log: %s', log_file_name) LOGGER.debug("'running: %s", stress_cmd) if self.stress_num > 1: node_cmd = 'taskset -c %s bash -c "%s"' % (cpu_idx, stress_cmd) else: node_cmd = stress_cmd node_cmd = 'cd /hydra-kcl && {}'.format(node_cmd) KclStressEvent.start(node=loader, stress_cmd=stress_cmd).publish() try: result = docker.run( cmd=node_cmd, timeout=self.timeout + self.shutdown_timeout, log_file=log_file_name, ) return result except Exception as exc: # pylint: disable=broad-except errors_str = format_stress_cmd_error(exc) KclStressEvent.failure( node=loader, stress_cmd=self.stress_cmd, log_file_name=log_file_name, errors=[ errors_str, ], ).publish() raise finally: KclStressEvent.finish(node=loader, stress_cmd=stress_cmd, log_file_name=log_file_name).publish()
def _run_stress(self, loader, loader_idx, cpu_idx): KclStressEvent.start(node=loader, stress_cmd=self.stress_cmd).publish() try: options_str = self.stress_cmd.replace('table_compare', '').strip() options = dict(item.strip().split("=") for item in options_str.split(";")) interval = int(options.get('interval', 20)) timeout = int(options.get('timeout', 28800)) src_table = options.get('src_table') dst_table = options.get('dst_table') start_time = time.time() while not self._stop_event.is_set(): node: BaseNode = self.db_node_to_query(loader) node.running_nemesis = "Compare tables size by cf-stats" node.run_nodetool('flush') dst_size = node.get_cfstats( dst_table)['Number of partitions (estimate)'] src_size = node.get_cfstats( src_table)['Number of partitions (estimate)'] node.running_nemesis = None elapsed_time = time.time() - start_time status = f"== CompareTablesSizesThread: dst table/src table number of partitions: {dst_size}/{src_size} ==" LOGGER.info(status) status_msg = f'[{elapsed_time}/{timeout}] {status}' InfoEvent(status_msg).publish() if src_size == 0: continue if elapsed_time > timeout: InfoEvent( f"== CompareTablesSizesThread: exiting on timeout of {timeout}" ).publish() break time.sleep(interval) return None except Exception as exc: # pylint: disable=broad-except errors_str = format_stress_cmd_error(exc) KclStressEvent.failure(node=loader, stress_cmd=self.stress_cmd, errors=[ errors_str, ]).publish() raise finally: KclStressEvent.finish(node=loader).publish()
def _run_stress(self, loader, loader_idx, cpu_idx): dns_options = "" if self.params.get('alternator_use_dns_routing'): dns = RemoteDocker(loader, "scylladb/hydra-loaders:alternator-dns-0.2", command_line=f'python3 /dns_server.py {self.db_node_to_query(loader)} ' f'{self.params.get("alternator_port")}', extra_docker_opts=f'--label shell_marker={self.shell_marker}') dns_options += f'--dns {dns.internal_ip_address} --dns-option use-vc' docker = RemoteDocker(loader, "scylladb/hydra-loaders:kcl-jdk8-20201104", extra_docker_opts=f'{dns_options} --label shell_marker={self.shell_marker}') stress_cmd = self.build_stress_cmd() if not os.path.exists(loader.logdir): os.makedirs(loader.logdir, exist_ok=True) log_file_name = os.path.join(loader.logdir, 'kcl-l%s-c%s-%s.log' % (loader_idx, cpu_idx, uuid.uuid4())) LOGGER.debug('kcl-stress local log: %s', log_file_name) LOGGER.debug("'running: %s", stress_cmd) if self.stress_num > 1: node_cmd = 'taskset -c %s bash -c "%s"' % (cpu_idx, stress_cmd) else: node_cmd = stress_cmd node_cmd = 'cd /hydra-kcl && {}'.format(node_cmd) KclStressEvent.start(node=loader, stress_cmd=stress_cmd).publish() try: result = docker.run(cmd=node_cmd, timeout=self.timeout + self.shutdown_timeout, log_file=log_file_name, ) return result except Exception as exc: # pylint: disable=broad-except errors_str = format_stress_cmd_error(exc) KclStressEvent.failure( node=loader, stress_cmd=self.stress_cmd, log_file_name=log_file_name, errors=[errors_str, ], ).publish() raise finally: KclStressEvent.finish(node=loader, stress_cmd=stress_cmd, log_file_name=log_file_name).publish()
def _run_stress(self, loader, loader_idx, cpu_idx): KclStressEvent.start(node=loader, stress_cmd=self.stress_cmd).publish() try: src_table = self._options.get('src_table') dst_table = self._options.get('dst_table') end_time = time.time() + self._timeout while not self._stop_event.is_set(): node: BaseNode = self.db_node_to_query(loader) node.running_nemesis = "Compare tables size by cf-stats" node.run_nodetool('flush') dst_size = node.get_cfstats( dst_table)['Number of partitions (estimate)'] src_size = node.get_cfstats( src_table)['Number of partitions (estimate)'] node.running_nemesis = None status = f"== CompareTablesSizesThread: dst table/src table number of partitions: {dst_size}/{src_size} ==" LOGGER.info(status) InfoEvent(f'[{time.time()}/{end_time}] {status}').publish() if src_size == 0: continue if time.time() > end_time: InfoEvent( f"== CompareTablesSizesThread: exiting on timeout of {self._timeout}" ).publish() break time.sleep(self._interval) return None except Exception as exc: # pylint: disable=broad-except KclStressEvent.failure(node=loader, stress_cmd=self.stress_cmd, errors=[ format_stress_cmd_error(exc), ]).publish() raise finally: KclStressEvent.finish(node=loader).publish()