def test_helm(k8s_cluster): with k8s_cluster.create_resources([ NGINX_YAML_PATH ]), tiller_rbac_resources(k8s_cluster), fake_backend.start() as backend: init_helm(k8s_cluster) with k8s_cluster.run_tunnels(backend) as proxy_pod_ip: with release_values_yaml(k8s_cluster, proxy_pod_ip, backend) as values_path: install_helm_chart(k8s_cluster, values_path) try: assert wait_for( p(has_datapoint, backend, dimensions={"plugin": "nginx"})) assert wait_for( p(has_datapoint, backend, dimensions={"plugin": "signalfx-metadata"})) finally: for pod in get_pods_by_labels( "app=signalfx-agent", namespace=k8s_cluster.test_namespace): print("pod/%s:" % pod.metadata.name) status = exec_pod_command( pod.metadata.name, AGENT_STATUS_COMMAND, namespace=k8s_cluster.test_namespace) print("Agent Status:\n%s" % status) logs = get_pod_logs( pod.metadata.name, namespace=k8s_cluster.test_namespace) print("Agent Logs:\n%s" % logs)
def run_init_system_image( base_image, with_socat=True, path=DOCKERFILES_DIR, dockerfile=None, ingest_host="ingest.us0.signalfx.com", # Whatever value is used here needs a self-signed cert in ./images/certs/ api_host="api.us0.signalfx.com", # Whatever value is used here needs a self-signed cert in ./images/certs/ command=None, buildargs=None, ): # pylint: disable=too-many-arguments image_id = retry( lambda: build_base_image(base_image, path, dockerfile, buildargs), docker.errors.BuildError) print("Image ID: %s" % image_id) if with_socat: backend_ip = "127.0.0.1" else: backend_ip = get_host_ip() with fake_backend.start(ip_addr=backend_ip) as backend: container_options = { # Init systems running in the container want permissions "privileged": True, "volumes": { "/sys/fs/cgroup": { "bind": "/sys/fs/cgroup", "mode": "ro" }, "/tmp/scratch": { "bind": "/tmp/scratch", "mode": "rw" }, }, "extra_hosts": { # Socat will be running on localhost to forward requests to # these hosts to the fake backend ingest_host: backend.ingest_host, api_host: backend.api_host, }, } if command: container_options["command"] = command with run_container(image_id, wait_for_ip=True, **container_options) as cont: if with_socat: # Proxy the backend calls through a fake HTTPS endpoint so that we # don't have to change the default configuration default by the # package. The base_image used should trust the self-signed certs # default in the images dir so that the agent doesn't throw TLS # verification errors. with socat_https_proxy(cont, backend.ingest_host, backend.ingest_port, ingest_host, "127.0.0.1"), socat_https_proxy( cont, backend.api_host, backend.api_port, api_host, "127.0.0.2"): yield [cont, backend] else: yield [cont, backend]
def test_collectd_dogstatsd(): with fake_backend.start() as fake_services: # configure the dogstatsd plugin to send to fake ingest config = DOGSTATSD_CONFIG.substitute(ingestEndpoint=fake_services.ingest_url) # start the agent with the dogstatsd plugin config with Agent.run(config, fake_services=fake_services) as agent: # wait until the dogstatsd plugin logs the address and port it is listening on assert wait_for(p(regex_search_matches_output, agent.get_output, DOGSTATSD_RE.search)) # scrape the host and port that the dogstatsd plugin is listening on regex_results = DOGSTATSD_RE.search(agent.output) host = regex_results.groups()[1] port = int(regex_results.groups()[2]) # wait for dogstatsd port to open assert wait_for(p(udp_port_open_locally, port)) # send datapoints to the dogstatsd listener for _ in range(0, 10): send_udp_message(host, port, "dogstatsd.test.metric:55555|g|#dimension1:value1,dimension2:value2") time.sleep(1) # wait for fake ingest to receive the dogstatsd metrics assert wait_for(p(has_datapoint_with_metric_name, agent.fake_services, "dogstatsd.test.metric"))
def run_init_system_image(base_image): image_id = build_base_image(base_image) print("Image ID: %s" % image_id) with fake_backend.start() as backend: container_options = { # Init systems running in the container want permissions "privileged": True, "volumes": { "/sys/fs/cgroup": { "bind": "/sys/fs/cgroup", "mode": "ro" }, "/tmp/scratch": { "bind": "/tmp/scratch", "mode": "rw" }, }, "extra_hosts": { # Socat will be running on localhost to forward requests to # these hosts to the fake backend "ingest.signalfx.com": '127.0.0.1', "api.signalfx.com": '127.0.0.2', }, } with run_container(image_id, wait_for_ip=False, **container_options) as cont: # Proxy the backend calls through a fake HTTPS endpoint so that we # don't have to change the default configuration included by the # package. The base_image used should trust the self-signed certs # included in the images dir so that the agent doesn't throw TLS # verification errors. with socat_https_proxy(cont, backend.ingest_host, backend.ingest_port, "ingest.signalfx.com", "127.0.0.1"), \ socat_https_proxy(cont, backend.api_host, backend.api_port, "api.signalfx.com", "127.0.0.2"): yield [cont, backend]
def ensure_fake_backend(host=None, backend_options=None, fake_services=None): if host is None: host = get_unique_localhost() if fake_services is None: with fake_backend.start(host, **(backend_options or {})) as started_fake_services: yield started_fake_services else: yield fake_services
def run_splunk(version, ingest_host="ingest.us0.signalfx.com", buildargs=None, print_logs=True, **runargs): if buildargs is None: buildargs = {} buildargs["SPLUNK_VERSION"] = version if runargs.get("environment") is None: runargs["environment"] = { "SPLUNK_START_ARGS": "--accept-license", "SPLUNK_PASSWORD": "******" } client = get_docker_client() image, _ = retry( lambda: client.images.build( path=str(REPO_ROOT_DIR), dockerfile=str(SPLUNK_DOCKER_DIR / "Dockerfile"), rm=True, forcerm=True, buildargs=buildargs, ), docker.errors.BuildError, ) with fake_backend.start() as backend: runargs["volumes"] = { "/tmp/scratch": { "bind": "/tmp/scratch", "mode": "rw" } } runargs["extra_hosts"] = { # Socat will be running on localhost to forward requests to # these hosts to the fake backend ingest_host: backend.ingest_host, } with run_container(image.id, print_logs=print_logs, **runargs) as cont: # Workaround for https://bugzilla.redhat.com/show_bug.cgi?id=1769831 which # causes yum/dnf to exit with error code 141 when importing GPG keys. cont.exec_run("mkdir -p /run/user/0", user="******") # Proxy the backend calls through a fake HTTPS endpoint so that we # don't have to change the default configuration default by the # package. The base_image used should trust the self-signed certs # default in the images dir so that the agent doesn't throw TLS # verification errors. with socat_https_proxy(cont, backend.ingest_host, backend.ingest_port, ingest_host, "127.0.0.1"): yield [cont, backend]
def test_helm(k8s_cluster, helm_version): helm_major_version = int(helm_version.split(".")[0]) with run_helm_image(k8s_cluster, helm_version) as cont: with k8s_cluster.create_resources( [APP_YAML_PATH]), tiller_rbac_resources( k8s_cluster, helm_major_version), fake_backend.start() as backend: if helm_major_version < 3: init_helm(k8s_cluster, cont, helm_major_version) with k8s_cluster.run_tunnels(backend) as proxy_pod_ip: with release_values_yaml(k8s_cluster, proxy_pod_ip, backend) as values_path: copy_file_into_container(values_path, cont, values_path) install_helm_chart(k8s_cluster, values_path, cont, helm_major_version) try: assert wait_for( p( has_datapoint, backend, dimensions={ "container_name": "prometheus", "application": "helm-test" }, ), timeout_seconds=60, ) assert wait_for(p( has_datapoint, backend, dimensions={"plugin": "signalfx-metadata"}), timeout_seconds=60) finally: for pod in get_pods_by_labels( "app=signalfx-agent", namespace=k8s_cluster.test_namespace): print("pod/%s:" % pod.metadata.name) status = exec_pod_command( pod.metadata.name, AGENT_STATUS_COMMAND, namespace=k8s_cluster.test_namespace) print("Agent Status:\n%s" % status) logs = get_pod_logs( pod.metadata.name, namespace=k8s_cluster.test_namespace) print("Agent Logs:\n%s" % logs) print("\nDatapoints received:") for dp in backend.datapoints: print_dp_or_event(dp) print("\nEvents received:") for event in backend.events: print_dp_or_event(event) print(f"\nDimensions set: {backend.dims}")
def test_helm(minikube, k8s_namespace): with minikube.create_resources([NGINX_YAML_PATH], namespace=k8s_namespace): with fake_backend.start(ip_addr=get_host_ip()) as backend: create_cluster_admin_rolebinding(minikube) init_helm(minikube) update_values_yaml(minikube, backend, k8s_namespace) install_helm_chart(minikube, k8s_namespace) assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "nginx")) assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "signalfx-metadata"))
def test_monitor_with_endpoints(k8s_monitor_with_endpoints, k8s_observer, k8s_test_timeout, agent_image, minikube): monitor, yamls = k8s_monitor_with_endpoints monitor_doc = os.path.join(DOCS_DIR, "monitors", monitor["type"].replace("/", "-") + ".md") observer_doc = os.path.join(DOCS_DIR, "observers", k8s_observer + ".md") metrics_txt = os.path.join( CUR_DIR, monitor["type"].replace("/", "-") + '-metrics.txt') expected_metrics = get_metrics_from_doc(monitor_doc) expected_dims = get_dims_from_doc(monitor_doc).union( get_dims_from_doc(observer_doc), {"kubernetes_cluster"}) if len(expected_metrics) == 0 and os.path.isfile(metrics_txt): with open(metrics_txt, "r") as fd: expected_metrics = { m.strip() for m in fd.readlines() if len(m.strip()) > 0 } assert len(expected_metrics) > 0 and len( expected_dims ) > 0, "expected metrics and dimensions lists are both empty!" assert len(yamls) > 0, "yamls list is empty!" with fake_backend.start(ip=get_host_ip()) as backend: with minikube.deploy_yamls(yamls=yamls): with minikube.deploy_agent(AGENT_CONFIGMAP_PATH, AGENT_DAEMONSET_PATH, AGENT_SERVICEACCOUNT_PATH, observer=k8s_observer, monitors=[monitor], cluster_name="minikube", backend=backend, image_name=agent_image["name"], image_tag=agent_image["tag"], namespace="default") as agent: print( "\nCollected %d metric(s) and %d dimension(s) to test for %s." % (len(expected_metrics), len(expected_dims), monitor["type"])) if len(expected_metrics) > 0 and len(expected_dims) > 0: assert wait_for(p(any_metric_has_any_dim_key, backend, expected_metrics, expected_dims)), \ "timed out waiting for any metric in %s with any dimension key in %s!\n\nAGENT STATUS:\n%s\n\nAGENT CONTAINER LOGS:\n%s\n" % \ (expected_metrics, expected_dims, agent.get_status(), agent.get_container_logs()) elif len(expected_metrics) > 0: assert wait_for(p(any_metric_found, backend, expected_metrics)), \ "timed out waiting for any metric in %s!\n\nAGENT STATUS:\n%s\n\nAGENT CONTAINER LOGS:\n%s\n" % \ (expected_metrics, agent.get_status(), agent.get_container_logs()) else: assert wait_for(p(any_dim_key_found, backend, expected_dims)), \ "timed out waiting for any dimension key in %s!\n\nAGENT STATUS:\n%s\n\nAGENT CONTAINER LOGS:\n%s\n" % \ (expected_dims, agent.get_status(), agent.get_container_logs())
def test_plaintext_passwords(agent_image, minikube): with fake_backend.start(ip=get_host_ip()) as backend: with minikube.deploy_agent(AGENT_CONFIGMAP_PATH, AGENT_DAEMONSET_PATH, AGENT_SERVICEACCOUNT_PATH, observer="k8s-api", monitors=MONITORS_WITHOUT_ENDPOINTS + [m[0] for m in MONITORS_WITH_ENDPOINTS], cluster_name="minikube", backend=backend, image_name=agent_image["name"], image_tag=agent_image["tag"], namespace="default") as agent: agent_status = agent.get_status() container_logs = agent.get_container_logs() assert "testing123" not in agent_status, "plaintext password(s) found in agent-status output!\n\n%s\n" % agent_status assert "testing123" not in container_logs, "plaintext password(s) found in agent container logs!\n\n%s\n" % container_logs
def test_monitor_without_endpoints(k8s_monitor_without_endpoints, k8s_test_timeout, agent_image, minikube): monitor = k8s_monitor_without_endpoints monitors = [monitor] if monitor["type"] == "collectd/cpu": monitors.append({"type": "collectd/signalfx-metadata"}) elif monitor["type"] == "collectd/signalfx-metadata": monitors.append({"type": "collectd/cpu"}) if monitor["type"] == "collectd/docker": expected_metrics = get_metrics_from_doc( os.path.join(DOCS_DIR, "monitors", "docker-container-stats.md")) expected_dims = get_dims_from_doc( os.path.join(DOCS_DIR, "monitors", "docker-container-stats.md")) elif monitor["type"] == "collectd/statsd": expected_metrics = {"gauge.statsd.test"} expected_dims = {"foo", "dim"} else: monitor_doc = os.path.join(DOCS_DIR, "monitors", monitor["type"].replace("/", "-") + ".md") expected_metrics = get_metrics_from_doc(monitor_doc) expected_dims = get_dims_from_doc(monitor_doc) observer_doc = os.path.join(DOCS_DIR, "observers", "k8s-api.md") expected_dims = expected_dims.union(get_dims_from_doc(observer_doc), {"kubernetes_cluster"}) metrics_txt = os.path.join( CUR_DIR, monitor["type"].replace("/", "-") + '-metrics.txt') if len(expected_metrics) == 0 and os.path.isfile(metrics_txt): with open(metrics_txt, "r") as fd: expected_metrics = { m.strip() for m in fd.readlines() if len(m.strip()) > 0 } with fake_backend.start(ip=get_host_ip()) as backend: with minikube.deploy_agent(AGENT_CONFIGMAP_PATH, AGENT_DAEMONSET_PATH, AGENT_SERVICEACCOUNT_PATH, observer="k8s-api", monitors=monitors, cluster_name="minikube", backend=backend, image_name=agent_image["name"], image_tag=agent_image["tag"], namespace="default") as agent: if monitor["type"] == "collectd/statsd": agent.container.exec_run([ "/bin/bash", "-c", 'while true; do echo "statsd.[foo=bar,dim=val]test:1|g" | nc -w 1 -u 127.0.0.1 8125; sleep 1; done' ], detach=True) if monitor["type"] not in [ "collectd/cpufreq", "collectd/custom", "kubernetes-events" ]: print( "\nCollected %d metric(s) and %d dimension(s) to test for %s." % (len(expected_metrics), len(expected_dims), monitor["type"])) if len(expected_metrics) > 0 and len(expected_dims) > 0: assert wait_for(p(any_metric_has_any_dim_key, backend, expected_metrics, expected_dims)), \ "timed out waiting for any metric in %s with any dimension key in %s!\n\nAGENT STATUS:\n%s\n\nAGENT CONTAINER LOGS:\n%s\n" % \ (expected_metrics, expected_dims, agent.get_status(), agent.get_container_logs()) elif len(expected_metrics) > 0: assert wait_for(p(any_metric_found, backend, expected_metrics)), \ "timed out waiting for any metric in %s!\n\nAGENT STATUS:\n%s\n\nAGENT CONTAINER LOGS:\n%s\n" % \ (expected_metrics, agent.get_status(), agent.get_container_logs()) elif len(expected_dims) > 0: assert wait_for(p(any_dim_key_found, backend, expected_dims)), \ "timed out waiting for any dimension key in %s!\n\nAGENT STATUS:\n%s\n\nAGENT CONTAINER LOGS:\n%s\n" % \ (expected_dims, agent.get_status(), agent.get_container_logs())
def test_signalfx_forwarder_app(splunk_version): with fake_backend.start(ip_addr=get_host_ip()) as backend: with run_splunk(splunk_version) as cont: splunk_host = container_ip(cont) assert wait_for( p(http_status, url=f"http://{splunk_host}:8000", status=[200]), 120), "service didn't start" time.sleep(5) assert container_cmd_exit_0( cont, "/test/install-app.sh", environment={"INGEST_HOST": backend.ingest_url}, user="******"), "failed to install app" assert wait_for( p(http_status, url=f"http://{splunk_host}:8000", status=[200]), 120), "service didn't start" assert wait_for( p(has_series_data, cont), timeout_seconds=60, interval_seconds=2), "timed out waiting for series data" try: # test tosfx query with time cmd = ( "search 'index=_internal series=* | table _time kb ev max_age | `gauge(kb)` " "| `counter(ev)` | `cumulative_counter(max_age)` | tosfx'") code, output = run_splunk_cmd(cont, cmd) assert code == 0, output.decode("utf-8") assert wait_for( p(has_datapoint, backend, metric="kb", metric_type="gauge", has_timestamp=True)) assert wait_for( p(has_datapoint, backend, metric="ev", metric_type="counter", has_timestamp=True)) assert wait_for( p(has_datapoint, backend, metric="max_age", metric_type="cumulative_counter", has_timestamp=True)) # check that datapoints are not streaming num_datapoints = len(backend.datapoints) assert ensure_always( lambda: len(backend.datapoints) == num_datapoints, timeout_seconds=60) # test tosfx query without time backend.reset_datapoints() cmd = ( "search 'index=_internal series=* | table kb ev max_age | `gauge(kb)` " "| `counter(ev)` | `cumulative_counter(max_age)` | tosfx'") code, output = run_splunk_cmd(cont, cmd) assert code == 0, output.decode("utf-8") assert wait_for( p(has_datapoint, backend, metric="kb", metric_type="gauge", has_timestamp=False)) assert wait_for( p(has_datapoint, backend, metric="ev", metric_type="counter", has_timestamp=False)) assert wait_for( p(has_datapoint, backend, metric="max_age", metric_type="cumulative_counter", has_timestamp=False)) finally: print_datapoints(backend) code, output = cont.exec_run( "cat /opt/splunk/var/log/splunk/python.log") if code == 0 and output: print("/opt/splunk/var/log/splunk/python.log:") print(output.decode("utf-8"))