def kill_running_monitoring_stack_services(): lr = LocalCmdRunner() # pylint: disable=invalid-name for docker in get_monitoring_stack_services(): LOGGER.info("Killing %s", docker['service']) lr.run('docker rm -f {name}-{port}'.format(name=docker['name'], port=docker['port']), ignore_status=True)
def stop_rsyslog(): global RSYSLOG_DOCKER_ID, RSYSLOG_CONF_PATH if RSYSLOG_DOCKER_ID: local_runner = LocalCmdRunner() local_runner.run("docker kill {id}".format(id=RSYSLOG_DOCKER_ID), ignore_status=True) if RSYSLOG_CONF_PATH: try: os.remove(RSYSLOG_CONF_PATH) except Exception: pass RSYSLOG_CONF_PATH = None RSYSLOG_DOCKER_ID = None
def start_rsyslog(docker_name, log_dir, port="514"): """ Start rsyslog in a docker, for getting logs from db-nodes :param docker_name: name of the docker instance :param log_dir: directory where to store the logs :param port: [Optional] the port binding for the docker run :return: the listening port """ global RSYSLOG_DOCKER_ID, RSYSLOG_CONF_PATH # pylint: disable=global-statement log_dir = os.path.abspath(log_dir) # cause of docker-in-docker, we need to capture the host log dir for mounting it # _SCT_BASE_DIR is set in hydra.sh base_dir = os.environ.get("_SCT_BASE_DIR", None) if base_dir: mount_log_dir = os.path.join(base_dir, os.path.basename(log_dir)) else: mount_log_dir = log_dir conf_path = generate_conf_file() RSYSLOG_CONF_PATH = conf_path local_runner = LocalCmdRunner() res = local_runner.run(''' mkdir -p {log_dir}; docker run --rm -d \ -v /etc/passwd:/etc/passwd:ro \ -v /etc/group:/etc/group:ro \ -v {mount_log_dir}:/logs \ -v {conf_path}:/etc/rsyslog.conf \ -p {port} \ --name {docker_name}-rsyslogd rsyslog/syslog_appliance_alpine '''.format(log_dir=log_dir, mount_log_dir=mount_log_dir, conf_path=conf_path, port=port, docker_name=docker_name)) RSYSLOG_DOCKER_ID = res.stdout.strip() LOGGER.info("Rsyslog started. Container id: %s", RSYSLOG_DOCKER_ID) atexit.register(stop_rsyslog) res = local_runner.run('docker port {0} 514'.format(RSYSLOG_DOCKER_ID)) listening_port = res.stdout.strip().split(':')[1] return listening_port
def stop_auto_ssh(docker_name, node): """ stops an autossh docker instance :param docker_name: prefix of the docker name (cluster.Setup.test_id() usually would be used) :param node: an instance of a class derived from BaseNode that has ssh_login_info :return: None """ # pylint: disable=protected-access host_name = node.ssh_login_info['hostname'] container_name = f"{docker_name}-{host_name}-autossh" local_runner = LocalCmdRunner() LOGGER.debug("Saving autossh container logs") local_runner.run(f"docker logs {container_name} &> {node.logdir}/autossh.log", ignore_status=True) LOGGER.debug(f"Killing {container_name}") local_runner.run(f"docker rm -f {container_name}", ignore_status=True)
def start_auto_ssh(docker_name, node, local_port, remote_port, ssh_mode="-R"): """ Starts a reverse port forwarding with autossh inside a docker container :param docker_name: prefix of the docker name (cluster.Setup.test_id() usually would be used) :param node: an instance of a class derived from BaseNode that has ssh_login_info :param local_port: the destination port on local machine :param remote_port: the source port on the remote :param ssh_mode: define tunnel mode: -R reverse mode, -L direct mode :return: None """ # pylint: disable=protected-access host_name = node.ssh_login_info['hostname'] user_name = node.ssh_login_info['user'] key_path = node.ssh_login_info['key_file'] local_runner = LocalCmdRunner() res = local_runner.run(''' docker run -d --network=host \ -e SSH_HOSTNAME={host_name} \ -e SSH_HOSTUSER={user_name} \ -e SSH_TUNNEL_HOST=127.0.0.1 \ -e SSH_MODE={ssh_mode} \ -e SSH_TUNNEL_LOCAL={local_port} \ -e SSH_TUNNEL_REMOTE={remote_port} \ -e AUTOSSH_GATETIME=0 \ -v {key_path}:/id_rsa \ --restart always \ --name {docker_name}-{host_name}-autossh jnovack/autossh '''.format(host_name=host_name, user_name=user_name, ssh_mode=ssh_mode, local_port=local_port, remote_port=remote_port, key_path=key_path, docker_name=docker_name)) atexit.register(stop_auto_ssh, docker_name, node) LOGGER.debug('{docker_name}-{host_name}-autossh {res.stdout}'.format(docker_name=docker_name, host_name=host_name, res=res))
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): # pylint: disable=unused-argument graf_port = GRAFANA_DOCKER_PORT alert_port = ALERT_DOCKER_PORT prom_port = PROMETHEUS_DOCKER_PORT lr = LocalCmdRunner() # pylint: disable=invalid-name lr.run('cd {monitoring_dockers_dir}; ./kill-all.sh -g {graf_port} -m {alert_port} -p {prom_port}'.format(**locals()), ignore_status=True, verbose=False) cmd = dedent("""cd {monitoring_dockers_dir}; ./start-all.sh \ -g {graf_port} -m {alert_port} -p {prom_port} \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version}""".format(**locals())) res = lr.run(cmd) if res.ok: LOGGER.info("Docker containers for monitoring stack are started")
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): # pylint: disable=unused-argument lr = LocalCmdRunner() # pylint: disable=invalid-name lr.run('cd {}; ./kill-all.sh'.format(monitoring_dockers_dir)) cmd = dedent("""cd {monitoring_dockers_dir}; ./start-all.sh \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version}""".format( **locals())) res = lr.run(cmd, ignore_status=True) if res.ok: r = lr.run('docker ps') # pylint: disable=invalid-name LOGGER.info(r.stdout.encode('utf-8')) return True else: raise Exception('dockers start failed. {}'.format(res))
def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): # pylint: disable=unused-argument graf_port = GRAFANA_DOCKER_PORT alert_port = ALERT_DOCKER_PORT prom_port = PROMETHEUS_DOCKER_PORT lr = LocalCmdRunner() # pylint: disable=invalid-name lr.run( 'cd {monitoring_dockers_dir}; ./kill-all.sh -g {graf_port} -m {alert_port} -p {prom_port}' .format(**locals()), ignore_status=True, verbose=False) # clear scylla nodes from configuration servers_yaml = Path( monitoring_dockers_dir) / 'config' / 'scylla_servers.yml' servers_yaml.write_text("- targets: []") # clear SCT scrape configurations prom_tmpl_file = Path( monitoring_dockers_dir) / 'prometheus' / 'prometheus.yml.template' templ_yaml = yaml.safe_load(prom_tmpl_file.read_text()) def remove_sct_metrics(metric): return '_metrics' not in metric['job_name'] templ_yaml["scrape_configs"] = list( filter(remove_sct_metrics, templ_yaml["scrape_configs"])) prom_tmpl_file.write_text(yaml.safe_dump(templ_yaml)) cmd = dedent("""cd {monitoring_dockers_dir}; echo "" > UA.sh ./start-all.sh \ $(grep -q -- --no-renderer ./start-all.sh && echo "--no-renderer") \ $(grep -q -- --no-loki ./start-all.sh && echo "--no-loki") \ -g {graf_port} -m {alert_port} -p {prom_port} \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version} \ -b '-storage.tsdb.retention.time=100y' \ -c 'GF_USERS_DEFAULT_THEME=dark'""".format(**locals())) res = lr.run(cmd) if res.ok: LOGGER.info("Docker containers for monitoring stack are started")
def install_phantom_js(self): """Install phantom_js to sct root dir If sct runs outside the docker container, sct will install the phantomjs to sct root dir """ localrunner = LocalCmdRunner() if not self.phantomjs_installed: LOGGER.debug("Installing phantomjs to sct root dir") # pylint: disable=unused-variable phantomjs_tar = "{0.phantomjs_base}.tar.bz2".format(self) phantomjs_url = "https://bitbucket.org/ariya/phantomjs/downloads/{phantomjs_tar}".format( **locals()) install_phantom_js_script = dedent(""" curl {phantomjs_url} -o {phantomjs_tar} -L tar xvfj {phantomjs_tar} """.format(**locals())) localrunner.run("bash -ce '%s'" % install_phantom_js_script) self.phantomjs_dir = os.path.join(self.sct_base_path, self.phantomjs_base) else: LOGGER.debug("PhantomJS is already installed!")
def is_auto_ssh_running(docker_name, node): """ check it auto_ssh docker running :param docker_name: docker name :type docker_name: str :param node: Node instance where tunnel was up :type node: BaseNode :returns: true if running, false otherwise :rtype: {boolean} """ local_runner = LocalCmdRunner() host_name = node.ssh_login_info['hostname'] container_name = f"{docker_name}-{host_name}-autossh" result = local_runner.run('docker ps', ignore_status=True) return container_name in result.stdout
def restore_monitoring_stack(test_id): from sdcm.remote import LocalCmdRunner lr = LocalCmdRunner() logger.info("Checking that docker is available...") result = lr.run('docker ps', ignore_status=True, verbose=False) if result.ok: logger.info('Docker is available') else: logger.warning( 'Docker is not available on your computer. Please install docker software before continue' ) return False monitor_stack_base_dir = tempfile.mkdtemp() stored_files_by_test_id = list_logs_by_test_id(test_id) monitor_stack_archives = [] for f in stored_files_by_test_id: if f['type'] in ['monitoring_data_stack', 'prometheus']: monitor_stack_archives.append(f) if not monitor_stack_archives or len(monitor_stack_archives) < 2: logger.warning( 'There is no available archive files for monitoring data stack restoring for test id : {}' .format(test_id)) return False for arch in monitor_stack_archives: logger.info('Download file {} to directory {}'.format( arch['link'], monitor_stack_base_dir)) local_path_monitor_stack = S3Storage().download_file( arch['link'], dst_dir=monitor_stack_base_dir) monitor_stack_workdir = os.path.dirname(local_path_monitor_stack) monitoring_stack_archive_file = os.path.basename( local_path_monitor_stack) logger.info('Extracting data from archive {}'.format( arch['file_path'])) if arch['type'] == 'prometheus': monitoring_stack_data_dir = os.path.join(monitor_stack_workdir, 'monitor_data_dir') cmd = dedent(""" mkdir -p {data_dir} cd {data_dir} cp ../{archive} ./ tar -xvf {archive} chmod -R 777 {data_dir} """.format(data_dir=monitoring_stack_data_dir, archive=monitoring_stack_archive_file)) result = lr.run(cmd, ignore_status=True) else: branches = re.search( '(?P<monitoring_branch>branch-[\d]+\.[\d]+?)_(?P<scylla_version>[\d]+\.[\d]+?)', monitoring_stack_archive_file) monitoring_branch = branches.group('monitoring_branch') scylla_version = branches.group('scylla_version') cmd = dedent(""" cd {workdir} tar -xvf {archive} """.format(workdir=monitor_stack_workdir, archive=monitoring_stack_archive_file)) result = lr.run(cmd, ignore_status=True) if not result.ok: logger.warning( "During restoring file {} next errors occured:\n {}".format( arch['link'], result)) return False logger.info("Extracting data finished") logger.info( 'Monitoring stack files available {}'.format(monitor_stack_workdir)) monitoring_dockers_dir = os.path.join( monitor_stack_workdir, 'scylla-monitoring-{}'.format(monitoring_branch)) def upload_sct_dashboards(): sct_dashboard_file_name = "scylla-dash-per-server-nemesis.{}.json".format( scylla_version) sct_dashboard_file = os.path.join(monitoring_dockers_dir, 'sct_monitoring_addons', sct_dashboard_file_name) if not os.path.exists(sct_dashboard_file): logger.info('There is no dashboard {}. Skip load dashboard'.format( sct_dashboard_file_name)) return False dashboard_url = 'http://*****:*****@retrying(n=3, sleep_time=1, message='Start docker containers') def start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version): lr.run('cd {}; ./kill-all.sh'.format(monitoring_dockers_dir)) cmd = dedent("""cd {monitoring_dockers_dir}; ./start-all.sh \ -s {monitoring_dockers_dir}/config/scylla_servers.yml \ -n {monitoring_dockers_dir}/config/node_exporter_servers.yml \ -d {monitoring_stack_data_dir} -v {scylla_version}""".format( **locals())) res = lr.run(cmd, ignore_status=True) if res.ok: r = lr.run('docker ps') logger.info(r.stdout.encode('utf-8')) return True else: raise Exception('dockers start failed. {}'.format(res)) status = False status = start_dockers(monitoring_dockers_dir, monitoring_stack_data_dir, scylla_version) upload_sct_dashboards() upload_annotations() return status