def run_elasticsearch(**kwargs): with run_service("elasticsearch/6.4.2", **kwargs) as es_container: host = container_ip(es_container) url = f"http://{host}:9200" assert wait_for( p(http_status, url=f"{url}/_nodes/_local", status=[200]), 180), "service didn't start" requests.put(f"{url}/twitter").raise_for_status() requests.put(f"{url}/twitter/tweet/1", json={ "user": "******", "message": "tweet tweet" }).raise_for_status() yield es_container
def create_daemonset(body, namespace=None, timeout=K8S_CREATE_TIMEOUT, wait_for_ready=True): name = body["metadata"]["name"] api = api_client_from_version(body["apiVersion"]) daemonset = create_resource(body, api, namespace=namespace, timeout=timeout) if wait_for_ready: assert wait_for( p(daemonset_is_ready, name, namespace=namespace), timeout_seconds=timeout), ( 'timed out waiting for daemonset "%s" to be ready!' % name) return daemonset
def test_min_interval(): # Config to get every possible dimensions (and metrics so) to OK with Agent.run( f""" monitors: - type: ntp host: {HOST} """ ) as agent: # configured host should be in dimension of metric assert wait_for( p(has_datapoint_with_dim, agent.fake_services, MONITOR, HOST) ), "Didn't get ntp datapoints with {}:{} dimension".format(MONITOR, HOST) # should have only one metric while default interval should be enforced if len(METADATA.default_metrics) != len(agent.fake_services.datapoints): assert False
def test_chrony(): """ Unfortunately, chronyd is very hard to run in a test environment without giving it the ability to change the time which we don't want, so just check for an error message ensuring that the monitor actually did configure it, even if it doesn't emit any metrics. """ with Agent.run(CHRONY_CONFIG) as agent: def has_error(): return has_log_message( agent.output, level="error", message="chrony plugin: chrony_query (REQ_TRACKING) failed") assert wait_for(has_error), "Didn't get chrony error message"
def test_k8s_api_observer_basic(k8s_cluster): nginx_yaml = TEST_SERVICES_DIR / "nginx/nginx-k8s.yaml" with k8s_cluster.create_resources([nginx_yaml]): config = f""" observers: - type: k8s-api monitors: - type: collectd/nginx discoveryRule: 'discovered_by == "k8s-api" && kubernetes_namespace == "{k8s_cluster.test_namespace}" && port == 80 && container_spec_name == "nginx"' url: "http://{{{{.Host}}}}:{{{{.Port}}}}/nginx_status" """ with k8s_cluster.run_agent(config) as agent: assert wait_for( p(has_datapoint, agent.fake_services, dimensions={"plugin": "nginx"}))
def test_k8s_annotations_in_discovery(k8s_cluster): nginx_yaml = TEST_SERVICES_DIR / "nginx/nginx-k8s.yaml" with k8s_cluster.create_resources([nginx_yaml]): config = """ observers: - type: k8s-api monitors: - type: collectd/nginx discoveryRule: 'Get(kubernetes_annotations, "allowScraping") == "true" && port == 80' """ with k8s_cluster.run_agent(config) as agent: assert wait_for( p(has_datapoint, agent.fake_services, dimensions={"plugin": "nginx"}))
def test_python_monitor_basics(script): config = dedent(f""" monitors: - type: python-monitor scriptFilePath: {script} intervalSeconds: 1 a: test """) with Agent.run(config) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name="my.gauge", dimensions={"a": "test"}, count=5)), "Didn't get datapoints"
def delete_resource(name, kind, api_client, namespace="default", timeout=K8S_DELETE_TIMEOUT): if not has_resource(name, kind, api_client, namespace=namespace): return getattr(api_client, "delete_namespaced_" + camel_case_to_snake_case(kind))( name=name, body=kube_client.V1DeleteOptions(grace_period_seconds=0, propagation_policy="Background"), namespace=namespace, ) assert wait_for( lambda: not has_resource(name, kind, api_client, namespace=namespace), timeout), 'timed out waiting for %s "%s" to be deleted!' % (kind, name)
def test_installer_mode(distro, version, mode): install_cmd = f"sh -x /test/install.sh -- testing123 --realm {REALM} --memory {TOTAL_MEMORY} --mode {mode}" if version != "latest": install_cmd = f"{install_cmd} --collector-version {version.lstrip('v')}" if STAGE != "release": assert STAGE in ("test", "beta"), f"Unsupported stage '{STAGE}'!" install_cmd = f"{install_cmd} --{STAGE}" print(f"Testing installation on {distro} from {STAGE} stage ...") with run_distro_container(distro) as container: # run installer script copy_file_into_container(container, INSTALLER_PATH, "/test/install.sh") try: run_container_cmd(container, install_cmd, env={"VERIFY_ACCESS_TOKEN": "false"}) time.sleep(5) # verify env file created with configured parameters verify_env_file(container, mode=mode) # verify collector service status assert wait_for(lambda: service_is_running( container, service_owner=SERVICE_OWNER)) if "opensuse" not in distro: assert container.exec_run( "systemctl status td-agent").exit_code == 0 # test support bundle script verify_support_bundle(container) run_container_cmd(container, "sh -x /test/install.sh --uninstall") finally: if "opensuse" not in distro: run_container_cmd(container, "journalctl -u td-agent --no-pager") if container.exec_run("test -f /var/log/td-agent/td-agent.log" ).exit_code == 0: run_container_cmd(container, "cat /var/log/td-agent/td-agent.log") run_container_cmd(container, f"journalctl -u {SERVICE_NAME} --no-pager")
def install_helm_chart(k8s_cluster, values_path, cont, helm_major_version): options = f"--values {values_path} --namespace={k8s_cluster.test_namespace} --debug {CONTAINER_CHART_DIR}" if helm_major_version >= 3: options = f"--generate-name {options}" install_cmd = helm_command_prefix(k8s_cluster, helm_major_version) + f" install {options}" print(f"Running Helm install: {install_cmd}") output = exec_helm(cont, install_cmd) print(f"Helm chart install output:\n{output}") daemonset_name = get_daemonset_name(k8s_cluster, cont, helm_major_version) print("Waiting for daemonset %s to be ready ..." % daemonset_name) try: assert wait_for( p(daemonset_is_ready, daemonset_name, k8s_cluster.test_namespace), timeout_seconds=120, interval_seconds=2 ), ("timed out waiting for %s daemonset to be ready!" % daemonset_name) finally: print(k8s_cluster.exec_kubectl(f"describe daemonset {daemonset_name}", namespace=k8s_cluster.test_namespace))
def test_kubelet_stats_extra_pod_metric(k8s_cluster): _skip_if_1_18_or_newer(k8s_cluster) config = f""" monitors: - type: kubelet-stats kubeletAPI: skipVerify: true authType: serviceAccount extraMetrics: - {CUSTOM_METRIC_POD_METRIC} """ with k8s_cluster.run_agent(agent_yaml=config) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name=CUSTOM_METRIC_POD_METRIC))
def test_omitting_kafka_metrics(version="1.0.1"): with run_kafka(version) as kafka: kafkahost = container_ip(kafka) with run_agent( textwrap.dedent(""" monitors: - type: collectd/kafka host: {0} port: 7099 clusterName: testCluster mBeansToOmit: - kafka-active-controllers """.format(kafkahost))) as [backend, _, _]: assert not wait_for( p(has_datapoint_with_metric_name, backend, "gauge.kafka-active-controllers"), timeout_seconds=60), "Didn't get kafka datapoints"
def test_protocols(): """ Test that we get any datapoints without any errors """ expected_metrics = get_monitor_metrics_from_selfdescribe( "collectd/protocols") expected_dims = get_monitor_dims_from_selfdescribe("collectd/protocols") with Agent.run(""" monitors: - type: collectd/protocols """) as agent: assert wait_for(p(has_any_metric_or_dim, agent.fake_services, expected_metrics, expected_dims), timeout_seconds=60 ), "timed out waiting for metrics and/or dimensions!" assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"
def create_serviceaccount(body, namespace=None, timeout=K8S_CREATE_TIMEOUT): api = kube_client.CoreV1Api() name = body["metadata"]["name"] body["apiVersion"] = "v1" if namespace: body["metadata"]["namespace"] = namespace else: namespace = body["metadata"].get("namespace", "default") if not has_namespace(namespace): create_namespace(namespace) serviceaccount = api.create_namespaced_service_account(body=body, namespace=namespace) assert wait_for( p(has_serviceaccount, name, namespace=namespace), timeout_seconds=timeout), ( 'timed out waiting for service account "%s" to be created!' % name) return serviceaccount
def test_health_checker_http_windows(): with run_agent( string.Template( dedent( """ monitors: - type: collectd/health-checker host: $host port: 80 path: / """ ) ).substitute(host="localhost") ) as [backend, _, _]: assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "health_checker") ), "Didn't get health_checker datapoints"
def start_registry(self): self.get_client() print("\nStarting registry container localhost:%d in minikube ..." % self.registry_port) retry( p( self.client.containers.run, image="registry:2.7", name="registry", detach=True, environment={"REGISTRY_HTTP_ADDR": "0.0.0.0:%d" % self.registry_port}, ports={"%d/tcp" % self.registry_port: self.registry_port}, ), docker.errors.DockerException, ) assert wait_for( p(tcp_socket_open, self.container_ip, self.registry_port), timeout_seconds=30, interval_seconds=2 ), "timed out waiting for registry to start!"
def test_kubernetes_cluster_namespace_scope(k8s_cluster): yamls = [SCRIPT_DIR / "good-pod.yaml", SCRIPT_DIR / "bad-pod.yaml"] with k8s_cluster.create_resources(yamls): config = """ monitors: - type: kubernetes-cluster kubernetesAPI: authType: serviceAccount namespace: good """ with k8s_cluster.run_agent(agent_yaml=config) as agent: assert wait_for( p(has_datapoint, agent.fake_services, dimensions={"kubernetes_namespace": "good"}) ), "timed out waiting for good pod metrics" assert ensure_always( lambda: not has_datapoint(agent.fake_services, dimensions={"kubernetes_namespace": "bad"}) ), "got pod metrics from unspecified namespace"
def test_conviva_single_metric(): with run_agent( dedent( f""" monitors: - type: conviva pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}} pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}} metricConfigs: - metricParameter: concurrent_plays """ ) ) as [backend, _, _]: assert wait_for(lambda: len(backend.datapoints) > 0), "Didn't get conviva datapoints" assert ensure_always( p(all_datapoints_have_metric_name, backend, "conviva.concurrent_plays") ), "Received conviva datapoints for other metrics"
def test_omitting_kafka_metrics(version="1.0.1"): with run_kafka(version) as kafka: kafka_host = container_ip(kafka) with Agent.run(f""" monitors: - type: collectd/kafka host: {kafka_host} port: 7099 clusterName: testCluster mBeansToOmit: - kafka-active-controllers """) as agent: assert not wait_for( p(has_datapoint_with_metric_name, agent.fake_services, "gauge.kafka-active-controllers"), timeout_seconds=60, ), "Didn't get kafka datapoints"
def test_k8s_portless_pods_with_declared_port(k8s_cluster): with k8s_cluster.create_resources( [TEST_SERVICES_DIR / "redis" / "redis-k8s.yaml"]): config = """ observers: - type: k8s-api monitors: - type: collectd/redis discoveryRule: target == "pod" && kubernetes_pod_name =~ "redis-deployment" port: 6379 """ with k8s_cluster.run_agent(config) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name="bytes.used_memory_rss"))
def test_netio_filter(): forbidden_metrics = METADATA.included_metrics - {"network.total"} with Agent.run( """ procPath: /proc monitors: - type: net-io interfaces: - "!*" """ ) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name="network.total"), timeout_seconds=60 ), "timed out waiting for metrics and/or dimensions!" assert ensure_never(p(any_metric_found, agent.fake_services, forbidden_metrics)) assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"
def patch_resource(body, api_client, namespace=None, timeout=K8S_CREATE_TIMEOUT): name = body["metadata"]["name"] kind = body["kind"] # The namespace in the resource body always takes precidence namespace = body.get("metadata", {}).get("namespace", namespace) resource = getattr(api_client, "patch_namespaced_" + camel_case_to_snake_case(kind))(name=name, body=body, namespace=namespace) assert wait_for( p(has_resource, name, kind, api_client, namespace=namespace), timeout_seconds=timeout ), 'timed out waiting for %s "%s" to be patched!' % (kind, name) return resource
def test_haproxy_basic(version): with run_service("haproxy", buildargs={"HAPROXY_VERSION": version}) as service_container: host = container_ip(service_container) assert wait_for(p(tcp_socket_open, host, 9000)), "haproxy not listening on port" with Agent.run( f""" monitors: - type: collectd/haproxy host: {host} port: 9000 enhancedMetrics: false """ ) as agent: requests.get(f"http://{host}:80", timeout=5) requests.get(f"http://{host}:80", timeout=5) verify(agent, EXPECTED_DEFAULTS, 10)
def test_kubernetes_cluster_in_k8s(k8s_cluster): config = """ monitors: - type: kubernetes-cluster """ yamls = [ SCRIPT_DIR / "resource_quota.yaml", TEST_SERVICES_DIR / "nginx/nginx-k8s.yaml" ] with k8s_cluster.create_resources(yamls): with k8s_cluster.run_agent(agent_yaml=config) as agent: for metric in get_default_monitor_metrics_from_selfdescribe( "kubernetes-cluster"): if "replication_controller" in metric: continue assert wait_for( p(has_datapoint, agent.fake_services, metric_name=metric))
def test_with_discovery_rule(): with Agent.run(f""" observers: - type: docker monitors: - type: python-monitor discoveryRule: container_name =~ "nginx-python-monitor" && port == 80 scriptFilePath: {script_path("monitor1.py")} intervalSeconds: 1 a: test """) as agent: with run_service("nginx", name="nginx-python-monitor"): assert wait_for( p(has_datapoint, agent.fake_services, metric_name="my.gauge", dimensions={"a": "test"})), "Didn't get datapoints"
def test_kubelet_stats_extra_pod_metric_group(k8s_cluster): _skip_if_1_18_or_newer(k8s_cluster) config = f""" monitors: - type: kubelet-stats kubeletAPI: skipVerify: true authType: serviceAccount extraGroups: [podEphemeralStats] """ with k8s_cluster.run_agent(agent_yaml=config) as agent: for metric in METADATA.metrics_by_group.get("podEphemeralStats", []): assert wait_for(p(has_datapoint, agent.fake_services, metric_name=metric), timeout_seconds=100)
def test_filtering_by_dimensions(): with run_service("nginx") as nginx_cont: with Agent.run(f""" monitors: - type: collectd/nginx host: {container_ip(nginx_cont)} port: 80 intervalSeconds: 1 datapointsToExclude: - dimensions: plugin: ['*', '!nginx'] """) as agent: assert wait_for( p(has_datapoint, agent.fake_services, dimensions={"plugin": "nginx"})), "Didn't get nginx datapoints"
def test_cassandra_included(): with run_service("cassandra") as cassandra_cont: host = container_ip(cassandra_cont) # Wait for the JMX port to be open in the container assert wait_for(p(tcp_socket_open, host, 7199)), "Cassandra JMX didn't start" run_agent_verify_included_metrics( f""" monitors: - type: collectd/cassandra host: {host} port: 7199 username: cassandra password: cassandra """, METADATA, )
def load_kubeconfig(self, kubeconfig_path="/kubeconfig", timeout=300): with tempfile.NamedTemporaryFile(dir="/tmp/scratch") as fd: kubeconfig = fd.name assert wait_for( p(container_cmd_exit_0, self.container, "test -f %s" % kubeconfig_path), timeout_seconds=timeout, interval_seconds=2, ), ("timed out waiting for the minikube cluster to be ready!\n\n%s\n\n" % self.get_logs()) time.sleep(2) exit_code, output = self.container.exec_run( "cp -f %s %s" % (kubeconfig_path, kubeconfig)) assert exit_code == 0, "failed to get %s from minikube!\n%s" % ( kubeconfig_path, output.decode("utf-8")) self.kubeconfig = kubeconfig kube_config.load_kube_config(config_file=self.kubeconfig)
def test_prometheus_exporter(): with run_service("dpgen", environment={"NUM_METRICS": 3}) as dpgen_cont: with Agent.run( dedent(f""" monitors: - type: prometheus-exporter host: {container_ip(dpgen_cont)} port: 3000 intervalSeconds: 2 extraDimensions: source: prometheus """)) as agent: assert wait_for( p(has_datapoint, agent.fake_services, dimensions={"source": "prometheus" })), "didn't get prometheus datapoint"