def kill_running_monitoring_stack_services(): lr = LocalCmdRunner() # pylint: disable=invalid-name for docker in get_monitoring_stack_services(): LOGGER.info("Killing %s", docker['service']) lr.run('docker rm -f {name}-{port}'.format(name=docker['name'], port=docker['port']), ignore_status=True)
def start_auto_ssh(docker_name, node, local_port, remote_port, ssh_mode="-R"): """ Starts a reverse port forwarding with autossh inside a docker container :param docker_name: prefix of the docker name (cluster.Setup.test_id() usually would be used) :param node: an instance of a class derived from BaseNode that has ssh_login_info :param local_port: the destination port on local machine :param remote_port: the source port on the remote :param ssh_mode: define tunnel mode: -R reverse mode, -L direct mode :return: None """ # pylint: disable=protected-access host_name = node.ssh_login_info['hostname'] user_name = node.ssh_login_info['user'] key_path = node.ssh_login_info['key_file'] local_runner = LocalCmdRunner() res = local_runner.run(''' docker run -d --network=host \ -e SSH_HOSTNAME={host_name} \ -e SSH_HOSTUSER={user_name} \ -e SSH_TUNNEL_HOST=127.0.0.1 \ -e SSH_MODE={ssh_mode} \ -e SSH_TUNNEL_LOCAL={local_port} \ -e SSH_TUNNEL_REMOTE={remote_port} \ -e AUTOSSH_GATETIME=0 \ -v {key_path}:/id_rsa \ --restart always \ --name {docker_name}-{host_name}-autossh jnovack/autossh '''.format(host_name=host_name, user_name=user_name, ssh_mode=ssh_mode, local_port=local_port, remote_port=remote_port, key_path=key_path, docker_name=docker_name)) atexit.register(stop_auto_ssh, docker_name, node) LOGGER.debug('{docker_name}-{host_name}-autossh {res.stdout}'.format(docker_name=docker_name, host_name=host_name, res=res))
def stop_rsyslog(): global RSYSLOG_DOCKER_ID, RSYSLOG_CONF_PATH if RSYSLOG_DOCKER_ID: local_runner = LocalCmdRunner() local_runner.run("docker kill {id}".format(id=RSYSLOG_DOCKER_ID), ignore_status=True) if RSYSLOG_CONF_PATH: try: os.remove(RSYSLOG_CONF_PATH) except Exception: pass RSYSLOG_CONF_PATH = None RSYSLOG_DOCKER_ID = None
def start_rsyslog(docker_name, log_dir, port="514"): """ Start rsyslog in a docker, for getting logs from db-nodes :param docker_name: name of the docker instance :param log_dir: directory where to store the logs :param port: [Optional] the port binding for the docker run :return: the listening port """ global RSYSLOG_DOCKER_ID, RSYSLOG_CONF_PATH # pylint: disable=global-statement log_dir = os.path.abspath(log_dir) # cause of docker-in-docker, we need to capture the host log dir for mounting it # _SCT_BASE_DIR is set in hydra.sh base_dir = os.environ.get("_SCT_BASE_DIR", None) if base_dir: mount_log_dir = os.path.join(base_dir, os.path.basename(log_dir)) else: mount_log_dir = log_dir conf_path = generate_conf_file() RSYSLOG_CONF_PATH = conf_path local_runner = LocalCmdRunner() res = local_runner.run(''' mkdir -p {log_dir}; docker run --rm -d \ -v /etc/passwd:/etc/passwd:ro \ -v /etc/group:/etc/group:ro \ -v {mount_log_dir}:/logs \ -v {conf_path}:/etc/rsyslog.conf \ -p {port} \ --name {docker_name}-rsyslogd rsyslog/syslog_appliance_alpine '''.format(log_dir=log_dir, mount_log_dir=mount_log_dir, conf_path=conf_path, port=port, docker_name=docker_name)) RSYSLOG_DOCKER_ID = res.stdout.strip() LOGGER.info("Rsyslog started. Container id: %s", RSYSLOG_DOCKER_ID) atexit.register(stop_rsyslog) res = local_runner.run('docker port {0} 514'.format(RSYSLOG_DOCKER_ID)) listening_port = res.stdout.strip().split(':')[1] return listening_port
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): # pylint: disable=unused-argument graf_port = GRAFANA_DOCKER_PORT alert_port = ALERT_DOCKER_PORT prom_port = PROMETHEUS_DOCKER_PORT lr = LocalCmdRunner() # pylint: disable=invalid-name lr.run('cd {monitoring_dockers_dir}; ./kill-all.sh -g {graf_port} -m {alert_port} -p {prom_port}'.format(**locals()), ignore_status=True, verbose=False) cmd = dedent("""cd {monitoring_dockers_dir}; ./start-all.sh \ -g {graf_port} -m {alert_port} -p {prom_port} \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version}""".format(**locals())) res = lr.run(cmd) if res.ok: LOGGER.info("Docker containers for monitoring stack are started")
def __init__(self, name, ssh_login_info=None, instance=None, global_ip=None, grafana_ip=None, tags=None, logdir=None): # pylint: disable=too-many-arguments if logdir: self.logdir = logdir self._containers = {} self.name = name if ssh_login_info is None: self.remoter = LocalCmdRunner() else: self.remoter = RemoteCmdRunner(**ssh_login_info) self.ssh_login_info = ssh_login_info self._instance = instance self.external_address = global_ip if grafana_ip is None: self.grafana_address = global_ip else: self.grafana_address = grafana_ip self.tags = { **(tags or {}), "Name": self.name, }
def test_run_in_mainthread( # pylint: disable=too-many-arguments self, remoter_type, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool, retry: int, timeout: Union[float, None]): kwargs = { 'verbose': verbose, 'ignore_status': ignore_status, 'new_session': new_session, 'retry': retry, 'timeout': timeout} try: expected = LocalCmdRunner().run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except expected = exc if issubclass(remoter_type, (RemoteCmdRunner, RemoteLibSSH2CmdRunner)): remoter = remoter_type(hostname=host, user=getpass.getuser(), key_file=self.key_file) else: remoter = KubernetesCmdRunner( FakeKluster('http://127.0.0.1:8001'), pod='sct-cluster-gce-minikube-0', container="scylla", namespace="scylla") try: result = remoter.run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except result = exc remoter._reconnect() try: result2 = remoter.run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except result2 = exc remoter.stop() self._compare_results(expected, result, stmt=stmt, kwargs=kwargs) self._compare_results(expected, result2, stmt=stmt, kwargs=kwargs)
def test_create_and_run_in_same_thread( # pylint: disable=too-many-arguments,too-many-locals self, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool, retry: int, timeout: Union[float, None]): kwargs = { 'verbose': verbose, 'ignore_status': ignore_status, 'new_session': new_session, 'retry': retry, 'timeout': timeout} self.log.info(repr({stmt: stmt, **kwargs})) try: expected = LocalCmdRunner().run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except expected = exc paramiko_thread_results = [] self._run_parallel( 3, thread_body=self._create_and_run_in_same_thread, args=(RemoteCmdRunner, host, self.key_file, stmt, kwargs, paramiko_thread_results), kwargs={}) libssh2_thread_results = [] self._run_parallel( 3, thread_body=self._create_and_run_in_same_thread, args=(RemoteLibSSH2CmdRunner, host, self.key_file, stmt, kwargs, libssh2_thread_results), kwargs={}) for paramiko_result in paramiko_thread_results: self._compare_results(expected, paramiko_result, stmt=stmt, kwargs=kwargs) for libssh2_result in libssh2_thread_results: self._compare_results(expected, libssh2_result, stmt=stmt, kwargs=kwargs)
def test_create_and_run_in_separate_thread( # pylint: disable=too-many-arguments self, remoter_type, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool, retry: int, timeout: Union[float, None]): kwargs = { 'verbose': verbose, 'ignore_status': ignore_status, 'new_session': new_session, 'retry': retry, 'timeout': timeout} self.log.info(repr({stmt: stmt, **kwargs})) try: expected = LocalCmdRunner().run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except expected = exc # Paramiko fails too often when it is invoked like that, that is why it is not in the test if issubclass(remoter_type, (RemoteCmdRunner, RemoteLibSSH2CmdRunner)): remoter = remoter_type(hostname=host, user=getpass.getuser(), key_file=self.key_file) else: remoter = KubernetesCmdRunner( FakeKluster('http://127.0.0.1:8001'), pod='sct-cluster-gce-minikube-0', container="scylla", namespace="scylla") libssh2_thread_results = [] self._run_parallel( 3, thread_body=self._create_and_run_in_separate_thread, args=(remoter, stmt, kwargs, libssh2_thread_results), kwargs={}) for libssh2_result in libssh2_thread_results: self.log.error(str(libssh2_result)) self._compare_results(expected, libssh2_result, stmt=stmt, kwargs=kwargs)
def test_create_and_run_in_separate_thread( # pylint: disable=too-many-arguments self, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool, retry: int, timeout: Union[float, None]): kwargs = { 'verbose': verbose, 'ignore_status': ignore_status, 'new_session': new_session, 'retry': retry, 'timeout': timeout} self.log.info(repr({stmt: stmt, **kwargs})) try: expected = LocalCmdRunner().run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except expected = exc # Paramiko fails too often when it is invoked like that, that is why it is not in the test remoter = RemoteLibSSH2CmdRunner(hostname=host, user=getpass.getuser(), key_file=self.key_file) libssh2_thread_results = [] self._run_parallel( 3, thread_body=self._create_and_run_in_separate_thread, args=(remoter, stmt, kwargs, libssh2_thread_results), kwargs={}) for libssh2_result in libssh2_thread_results: self.log.error(str(libssh2_result)) self._compare_results(expected, libssh2_result, stmt=stmt, kwargs=kwargs)
def test_load_1000_threads(self, remoter_type, stmt: str): kwargs = { 'verbose': True, 'ignore_status': False, 'new_session': True, 'retry': 2 } self.log.info(repr({stmt: stmt, **kwargs})) try: expected = LocalCmdRunner().run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except expected = exc libssh2_thread_results = [] self._run_parallel(1000, thread_body=self._create_and_run_in_same_thread, args=(remoter_type, self.key_file, stmt, kwargs, libssh2_thread_results), kwargs={}) for libssh2_result in libssh2_thread_results: self.log.error(str(libssh2_result)) self._compare_results(expected, libssh2_result, stmt=stmt, kwargs=kwargs)
def get_docker_instances_by_testid(self): instances = list_instances_gce({"TestId": self.test_id}, running=True) filtered_instances = filter_gce_instances_by_type(instances) for instance in filtered_instances['db_nodes']: self.db_cluster.append( CollectingNode(name=instance.name, ssh_login_info={ "hostname": instance.public_ips[0], "user": '******', "key_file": self.params['user_credentials_path'] }, instance=instance, global_ip=instance.public_ips[0])) self.monitor_set.append( CollectingNode(name=f"monitor-node-{self.test_id}-0", global_ip='127.0.0.1', grafana_ip=get_docker_bridge_gateway( LocalCmdRunner()))) for instance in filtered_instances['loader_nodes']: self.loader_set.append( CollectingNode(name=instance.name, ssh_login_info={ "hostname": instance.public_ips[0], "user": '******', "key_file": self.params['user_credentials_path'] }, instance=instance, global_ip=instance.public_ips[0]))
def _get_screenshot_link(self, grafana_url, screenshot_path, resolution="1920x1280"): LocalCmdRunner().run( "cd {0.phantomjs_dir} && bin/phantomjs {0.sct_base_path}/data_dir/make_screenshot.js \"{1}\" \"{2}\" {3}" .format(self, grafana_url, screenshot_path, resolution), ignore_status=True)
def __init__(self): self.remoter = LocalCmdRunner() self.remoter.receive_files = types.MethodType(send_receive_files, self.remoter) self.remoter.send_files = types.MethodType(send_receive_files, self.remoter) self.ip_address = "127.0.0.1" self.logdir = os.path.dirname(__file__)
def is_docker_available(): LOGGER.info("Checking that docker is available...") result = LocalCmdRunner().run('docker ps', ignore_status=True, verbose=False) if result.ok: LOGGER.info('Docker is available') return True else: LOGGER.warning('Docker is not available on your computer. Please install docker software before continue') return False
def is_auto_ssh_running(docker_name, node): """ check it auto_ssh docker running :param docker_name: docker name :type docker_name: str :param node: Node instance where tunnel was up :type node: BaseNode :returns: true if running, false otherwise :rtype: {boolean} """ local_runner = LocalCmdRunner() host_name = node.ssh_login_info['hostname'] container_name = f"{docker_name}-{host_name}-autossh" result = local_runner.run('docker ps', ignore_status=True) return container_name in result.stdout
def verify_dockers_are_running(): result = LocalCmdRunner().run("docker ps --format '{{.Names}}'", ignore_status=True) # pylint: disable=invalid-name docker_names = result.stdout.strip().split() if result.ok and docker_names: if f"{GRAFANA_DOCKER_NAME}-{GRAFANA_DOCKER_PORT}" in docker_names \ and f"{PROMETHEUS_DOCKER_NAME}-{PROMETHEUS_DOCKER_PORT}" in docker_names: LOGGER.info("Monitoring stack docker containers are running.\n%s", result.stdout) return True LOGGER.error("Monitoring stack containers are not running\nStdout:\n%s\nstderr:%s", result.stdout, result.stderr) return False
def __init__(self, name, parent_cluster, ssh_login_info=None, base_logdir=None, node_prefix=None, dc_idx=0): super().__init__(name, parent_cluster) self.remoter = LocalCmdRunner() self.logdir = os.path.dirname(__file__)
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): # pylint: disable=unused-argument graf_port = GRAFANA_DOCKER_PORT alert_port = ALERT_DOCKER_PORT prom_port = PROMETHEUS_DOCKER_PORT lr = LocalCmdRunner() # pylint: disable=invalid-name lr.run( 'cd {monitoring_dockers_dir}; ./kill-all.sh -g {graf_port} -m {alert_port} -p {prom_port}' .format(**locals()), ignore_status=True, verbose=False) # clear scylla nodes from configuration servers_yaml = Path( monitoring_dockers_dir) / 'config' / 'scylla_servers.yml' servers_yaml.write_text("- targets: []") # clear SCT scrape configurations prom_tmpl_file = Path( monitoring_dockers_dir) / 'prometheus' / 'prometheus.yml.template' templ_yaml = yaml.safe_load(prom_tmpl_file.read_text()) def remove_sct_metrics(metric): return '_metrics' not in metric['job_name'] templ_yaml["scrape_configs"] = list( filter(remove_sct_metrics, templ_yaml["scrape_configs"])) prom_tmpl_file.write_text(yaml.safe_dump(templ_yaml)) cmd = dedent("""cd {monitoring_dockers_dir}; echo "" > UA.sh ./start-all.sh \ $(grep -q -- --no-renderer ./start-all.sh && echo "--no-renderer") \ $(grep -q -- --no-loki ./start-all.sh && echo "--no-loki") \ -g {graf_port} -m {alert_port} -p {prom_port} \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version} \ -b '-storage.tsdb.retention.time=100y' \ -c 'GF_USERS_DEFAULT_THEME=dark'""".format(**locals())) res = lr.run(cmd) if res.ok: LOGGER.info("Docker containers for monitoring stack are started")
def install_phantom_js(self): """Install phantom_js to sct root dir If sct runs outside the docker container, sct will install the phantomjs to sct root dir """ localrunner = LocalCmdRunner() if not self.phantomjs_installed: LOGGER.debug("Installing phantomjs to sct root dir") # pylint: disable=unused-variable phantomjs_tar = "{0.phantomjs_base}.tar.bz2".format(self) phantomjs_url = "https://bitbucket.org/ariya/phantomjs/downloads/{phantomjs_tar}".format( **locals()) install_phantom_js_script = dedent(""" curl {phantomjs_url} -o {phantomjs_tar} -L tar xvfj {phantomjs_tar} """.format(**locals())) localrunner.run("bash -ce '%s'" % install_phantom_js_script) self.phantomjs_dir = os.path.join(self.sct_base_path, self.phantomjs_base) else: LOGGER.debug("PhantomJS is already installed!")
def create_monitoring_stack_dir(base_dir, archive): cmd = dedent(""" cd {data_dir} cp {archive} ./ tar -xvf {archive_name} chmod -R 777 {data_dir} """.format(data_dir=base_dir, archive_name=os.path.basename(archive), archive=archive)) result = LocalCmdRunner().run(cmd, ignore_status=True) if result.exited > 0: LOGGER.error("Error during extracting monitoring stack") return False return get_monitoring_stack_dir(base_dir)
def create_monitoring_data_dir(base_dir, archive): monitoring_data_base_dir = os.path.join(base_dir, 'monitoring_data_dir') cmd = dedent(""" mkdir -p {data_dir} cd {data_dir} cp {archive} ./ tar -xvf {archive_name} chmod -R 777 {data_dir} """.format(data_dir=monitoring_data_base_dir, archive=archive, archive_name=os.path.basename(archive))) result = LocalCmdRunner().run(cmd, timeout=COMMAND_TIMEOUT, ignore_status=True) if result.exited > 0: LOGGER.error("Error during extracting prometheus snapshot. Switch to next archive") return False return get_monitoring_data_dir(monitoring_data_base_dir)
def _get_shared_snapshot_link(self, grafana_url): result = LocalCmdRunner().run( "cd {0.phantomjs_dir} && bin/phantomjs {0.sct_base_path}/data_dir/share_snapshot.js \"{1}\"" .format(self, grafana_url), ignore_status=True) # since there is only one monitoring node returning here output = result.stdout.strip() if "Error" in output: LOGGER.error(output) return "" else: matched = re.search( r"https://snapshot.raintank.io/dashboard/snapshot/\w+", output) LOGGER.info("Shared grafana snapshot: {}".format(matched.group())) return matched.group()
def __init__(self, name, ssh_login_info=None, instance=None, global_ip=None, grafana_ip=None): # pylint: disable=too-many-arguments self.name = name if ssh_login_info is None: self.remoter = LocalCmdRunner() else: self.remoter = RemoteCmdRunner(**ssh_login_info) self.ssh_login_info = ssh_login_info self._instance = instance self.external_address = global_ip if grafana_ip is None: self.grafana_address = global_ip else: self.grafana_address = grafana_ip
def test_run_in_mainthread( # pylint: disable=too-many-arguments self, host: str, stmt: str, verbose: bool, ignore_status: bool, new_session: bool, retry: int, timeout: Union[float, None]): kwargs = { 'verbose': verbose, 'ignore_status': ignore_status, 'new_session': new_session, 'retry': retry, 'timeout': timeout} try: expected = LocalCmdRunner().run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except expected = exc remoter = RemoteCmdRunner( hostname=host, user=getpass.getuser(), key_file=self.key_file) try: paramiko_result = remoter.run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except paramiko_result = exc remoter._reconnect() try: paramiko_result2 = remoter.run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except paramiko_result2 = exc remoter.stop() remoter = RemoteLibSSH2CmdRunner( hostname='127.0.0.1', user=getpass.getuser(), key_file=self.key_file) try: lib2ssh_result = remoter.run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except lib2ssh_result = exc remoter._reconnect() try: lib2ssh_result2 = remoter.run(stmt, **kwargs) except Exception as exc: # pylint: disable=broad-except lib2ssh_result2 = exc remoter.stop() self._compare_results(expected, paramiko_result, stmt=stmt, kwargs=kwargs) self._compare_results(expected, paramiko_result2, stmt=stmt, kwargs=kwargs) self._compare_results(expected, lib2ssh_result, stmt=stmt, kwargs=kwargs) self._compare_results(expected, lib2ssh_result2, stmt=stmt, kwargs=kwargs)
def stop_auto_ssh(docker_name, node): """ stops an autossh docker instance :param docker_name: prefix of the docker name (cluster.Setup.test_id() usually would be used) :param node: an instance of a class derived from BaseNode that has ssh_login_info :return: None """ # pylint: disable=protected-access host_name = node.ssh_login_info['hostname'] container_name = f"{docker_name}-{host_name}-autossh" local_runner = LocalCmdRunner() LOGGER.debug("Saving autossh container logs") local_runner.run(f"docker logs {container_name} &> {node.logdir}/autossh.log", ignore_status=True) LOGGER.debug(f"Killing {container_name}") local_runner.run(f"docker rm -f {container_name}", ignore_status=True)
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): # pylint: disable=unused-argument lr = LocalCmdRunner() # pylint: disable=invalid-name lr.run('cd {}; ./kill-all.sh'.format(monitoring_dockers_dir)) cmd = dedent("""cd {monitoring_dockers_dir}; ./start-all.sh \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version}""".format( **locals())) res = lr.run(cmd, ignore_status=True) if res.ok: r = lr.run('docker ps') # pylint: disable=invalid-name LOGGER.info(r.stdout.encode('utf-8')) return True else: raise Exception('dockers start failed. {}'.format(res))
class TestHydraSh(unittest.TestCase): cmd_runner = LocalCmdRunner() @staticmethod def prepare_environment(env): for name in os.environ: if any( name.startswith(prefix) for prefix in ['SCT_', 'AWS_', 'GOOGLE_']): del os.environ[name] for name, value in env.items(): os.environ[name] = value @staticmethod @contextlib.contextmanager def environ(): old_environment = os.environ.copy() yield os.environ.clear() for name, value in old_environment.items(): os.environ[name] = value @staticmethod def validate_result( result, expected_status: int, expected: Sequence[Union[str, re.Pattern]], not_expected: Sequence[Union[str, re.Pattern]], ): errors = [] if expected_status is not None: if result.return_code != expected_status: errors.append( f'Returned status {result.return_code}, while expected {expected_status}' ) for pattern_expected in expected: if isinstance(pattern_expected, re.Pattern): if not pattern_expected.search(result.stdout): errors.append( f"Can't find regex {pattern_expected.pattern}") elif isinstance(pattern_expected, str): if pattern_expected not in result.stdout: errors.append(f"Can't find {pattern_expected}") for pattern_not_expected in not_expected: if isinstance(pattern_not_expected, re.Pattern): if pattern_not_expected.search(result.stdout): errors.append( f"Found pattern that should not be there: {pattern_not_expected}" ) elif isinstance(pattern_not_expected, str): if pattern_not_expected in result.stdout: errors.append( f"Found pattern that should not be there: {pattern_not_expected}" ) return errors @parameterized.expand( LongevityPipelineTest( backend='aws', runner=False, aws_creds=True, gce_creds=True).hydra_test_cases + LongevityPipelineTest( backend='aws', runner=True, aws_creds=True, gce_creds=True).hydra_test_cases + LongevityPipelineTest( backend='gce', runner=False, aws_creds=True, gce_creds=True).hydra_test_cases + LongevityPipelineTest( backend='gce', runner=True, aws_creds=True, gce_creds=True).hydra_test_cases + LongevityPipelineTest( backend='gce-siren', runner=False, aws_creds=True, gce_creds=True).hydra_test_cases + LongevityPipelineTest( backend='gce-siren', runner=True, aws_creds=True, gce_creds=True).hydra_test_cases) def test_run_test(self, test_case_params: HydraTestCaseParams, tmp_dir: HydraTestCaseTmpDir): with tmp_dir, self.environ(): cmd, expected, not_expected, expected_status, env = test_case_params.as_tuple self.prepare_environment(env) result = self.cmd_runner.run(sct_abs_path('docker/env/hydra.sh') + ' --dry-run-hydra ' + cmd, ignore_status=True) errors = self.validate_result( result=result, expected_status=expected_status, expected=expected, not_expected=not_expected, ) assert not errors, f'Case: {cmd}\nReturned:\n{result}\nFound following errors:\n' + ( '\n'.join(errors))
from sdcm.sct_events.system import SpotTerminationEvent from sdcm.utils.aws_utils import tags_as_ec2_tags, ec2_instance_wait_public_ip from sdcm.utils.common import list_instances_aws, get_ami_tags, MAX_SPOT_DURATION_TIME from sdcm.utils.decorators import retrying from sdcm.wait import exponential_retry LOGGER = logging.getLogger(__name__) INSTANCE_PROVISION_ON_DEMAND = 'on_demand' INSTANCE_PROVISION_SPOT_FLEET = 'spot_fleet' INSTANCE_PROVISION_SPOT_LOW_PRICE = 'spot_low_price' INSTANCE_PROVISION_SPOT_DURATION = 'spot_duration' SPOT_CNT_LIMIT = 20 SPOT_FLEET_LIMIT = 50 SPOT_TERMINATION_CHECK_OVERHEAD = 15 LOCAL_CMD_RUNNER = LocalCmdRunner() EBS_VOLUME = "attached" INSTANCE_STORE = "instance_store" P = ParamSpec("P") # pylint: disable=invalid-name R = TypeVar("R") # pylint: disable=invalid-name # pylint: disable=too-many-lines class AWSCluster(cluster.BaseCluster): # pylint: disable=too-many-instance-attributes,abstract-method, """ Cluster of Node objects, started on Amazon EC2. """
def restore_monitoring_stack(test_id): from sdcm.remote import LocalCmdRunner lr = LocalCmdRunner() logger.info("Checking that docker is available...") result = lr.run('docker ps', ignore_status=True, verbose=False) if result.ok: logger.info('Docker is available') else: logger.warning( 'Docker is not available on your computer. Please install docker software before continue' ) return False monitor_stack_base_dir = tempfile.mkdtemp() stored_files_by_test_id = list_logs_by_test_id(test_id) monitor_stack_archives = [] for f in stored_files_by_test_id: if f['type'] in ['monitoring_data_stack', 'prometheus']: monitor_stack_archives.append(f) if not monitor_stack_archives or len(monitor_stack_archives) < 2: logger.warning( 'There is no available archive files for monitoring data stack restoring for test id : {}' .format(test_id)) return False for arch in monitor_stack_archives: logger.info('Download file {} to directory {}'.format( arch['link'], monitor_stack_base_dir)) local_path_monitor_stack = S3Storage().download_file( arch['link'], dst_dir=monitor_stack_base_dir) monitor_stack_workdir = os.path.dirname(local_path_monitor_stack) monitoring_stack_archive_file = os.path.basename( local_path_monitor_stack) logger.info('Extracting data from archive {}'.format( arch['file_path'])) if arch['type'] == 'prometheus': monitoring_stack_data_dir = os.path.join(monitor_stack_workdir, 'monitor_data_dir') cmd = dedent(""" mkdir -p {data_dir} cd {data_dir} cp ../{archive} ./ tar -xvf {archive} chmod -R 777 {data_dir} """.format(data_dir=monitoring_stack_data_dir, archive=monitoring_stack_archive_file)) result = lr.run(cmd, ignore_status=True) else: branches = re.search( '(?P<monitoring_branch>branch-[\d]+\.[\d]+?)_(?P<scylla_version>[\d]+\.[\d]+?)', monitoring_stack_archive_file) monitoring_branch = branches.group('monitoring_branch') scylla_version = branches.group('scylla_version') cmd = dedent(""" cd {workdir} tar -xvf {archive} """.format(workdir=monitor_stack_workdir, archive=monitoring_stack_archive_file)) result = lr.run(cmd, ignore_status=True) if not result.ok: logger.warning( "During restoring file {} next errors occured:\n {}".format( arch['link'], result)) return False logger.info("Extracting data finished") logger.info( 'Monitoring stack files available {}'.format(monitor_stack_workdir)) monitoring_dockers_dir = os.path.join( monitor_stack_workdir, 'scylla-monitoring-{}'.format(monitoring_branch)) def upload_sct_dashboards(): sct_dashboard_file_name = "scylla-dash-per-server-nemesis.{}.json".format( scylla_version) sct_dashboard_file = os.path.join(monitoring_dockers_dir, 'sct_monitoring_addons', sct_dashboard_file_name) if not os.path.exists(sct_dashboard_file): logger.info('There is no dashboard {}. Skip load dashboard'.format( sct_dashboard_file_name)) return False dashboard_url = 'http://*****:*****@retrying(n=3, sleep_time=1, message='Start docker containers') def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): lr.run('cd {}; ./kill-all.sh'.format(monitoring_dockers_dir)) cmd = dedent("""cd {monitoring_dockers_dir}; ./start-all.sh \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version}""".format( **locals())) res = lr.run(cmd, ignore_status=True) if res.ok: r = lr.run('docker ps') logger.info(r.stdout.encode('utf-8')) return True else: raise Exception('dockers start failed. {}'.format(res)) status = False status = start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version) upload_sct_dashboards() upload_annotations() return status