def test_node_uid_host_dim_kubernetes_cluster_config(k8s_cluster): config = """ monitors: - type: kubelet-stats - type: cpu - type: kubernetes-cluster # This will make it fail kubernetesAPI: authType: none """ yamls = [TEST_SERVICES_DIR / "nginx/nginx-k8s.yaml"] with k8s_cluster.create_resources(yamls): with k8s_cluster.run_agent(agent_yaml=config, wait_for_ready=False) as agent: # If it works for one node it should work for all of them node = k8s_cluster.client.CoreV1Api().list_node().items[0] def no_node_uid_dim(): return not has_datapoint(agent.fake_services, dimensions={"kubernetes_node_uid": node.metadata.uid}) def no_host_dim(): return not has_datapoint_with_dim_key(agent.fake_services, "host") assert ensure_always(no_node_uid_dim) assert ensure_always(no_host_dim), "no metrics should come through if cannot get node uid" # We should get this error since we aren't using in-cluster auth. assert "certificate signed by unknown authority" in agent.get_logs()
def test_filter_with_restart(): with run_agent(""" monitors: - type: collectd/signalfx-metadata - type: collectd/df - type: collectd/memory - type: collectd/uptime metricsToExclude: - metricNames: - memory.* monitorType: collectd/memory """) as [backend, _, update_config]: assert wait_for( lambda: has_datapoint_with_metric_name(backend, "df_complex.free")) assert ensure_always( lambda: not has_datapoint_with_metric_name(backend, "memory.used")) assert ensure_always( lambda: not has_datapoint_with_metric_name(backend, "memory.free")) update_config(""" monitors: - type: collectd/signalfx-metadata - type: collectd/df - type: collectd/memory - type: collectd/uptime metricsToExclude: - metricNames: - memory.used monitorType: collectd/memory """) assert wait_for( lambda: has_datapoint_with_metric_name(backend, "memory.free"))
def test_negated_filter_with_monitor_type(): """ Having monitorType in a filter should make that filter only apply to a specific monitor type and not to other metrics. """ with run_agent(""" monitors: - type: collectd/signalfx-metadata - type: collectd/memory - type: collectd/df - type: collectd/uptime metricsToExclude: - metricNames: - memory.used - memory.free monitorType: collectd/memory negated: true - metricName: uptime """) as [backend, _, _]: assert wait_for( lambda: has_datapoint_with_metric_name(backend, "memory.used")) assert wait_for( lambda: has_datapoint_with_metric_name(backend, "memory.free")) assert wait_for( lambda: has_datapoint_with_metric_name(backend, "df_complex.free")) assert ensure_always( lambda: not has_datapoint_with_metric_name(backend, "memory.cached" ), 10) assert ensure_always( lambda: not has_datapoint_with_metric_name(backend, "uptime"), 5)
def test_negated_filter_with_monitor_type(): """ Having monitorType in a filter should make that filter only apply to a specific monitor type and not to other metrics. """ with Agent.run( """ monitors: - type: collectd/signalfx-metadata - type: collectd/memory - type: collectd/df - type: collectd/uptime metricsToExclude: - metricNames: - memory.used - memory.free monitorType: collectd/memory negated: true - metricName: uptime """ ) as agent: assert wait_for(p(has_datapoint, agent.fake_services, metric_name="memory.used")) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="memory.free")) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="df_complex.free")) assert ensure_always(p(has_no_datapoint, agent.fake_services, metric_name="memory.cached"), 10) assert ensure_always(p(has_no_datapoint, agent.fake_services, metric_name="uptime"), 5)
def test_combined_filter_with_monitor_type(): with Agent.run(""" monitors: - type: memory - type: filesystems - type: collectd/uptime metricsToExclude: - metricNames: - memory.used monitorType: memory negated: true - metricName: uptime - metricNames: - memory.free monitorType: memory negated: true """) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name="memory.used")) assert wait_for( p(has_datapoint, agent.fake_services, metric_name="memory.free")) assert ensure_always( p(has_no_datapoint, agent.fake_services, metric_name="memory.cached"), 10) assert ensure_always( p(has_no_datapoint, agent.fake_services, metric_name="uptime"), 5)
def test_negated_filter_with_monitor_type(): with run_agent(""" monitors: - type: collectd/signalfx-metadata - type: collectd/memory - type: collectd/df - type: collectd/uptime metricsToExclude: - metricNames: - memory.used - memory.free monitorType: collectd/memory negated: true - metricName: uptime """) as [backend, _, _]: assert wait_for( lambda: has_datapoint_with_metric_name(backend, "memory.used")) assert wait_for( lambda: has_datapoint_with_metric_name(backend, "memory.free")) assert wait_for( lambda: has_datapoint_with_metric_name(backend, "df_complex.free")) assert ensure_always( lambda: not has_datapoint_with_metric_name(backend, "memory.cached" ), 10) assert ensure_always( lambda: not has_datapoint_with_metric_name(backend, "uptime"), 5)
def test_conviva_single_metriclens_dimension(conviva_metriclens_dimensions): with run_agent( dedent(f""" intervalSeconds: 5 monitors: - type: conviva pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}} pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}} metricConfigs: - metricParameter: quality_metriclens metricLensDimensions: - {conviva_metriclens_dimensions[0]} """), debug=CONVIVA_DEBUG, ) as [backend, _, _]: assert wait_for(lambda: len(backend.datapoints) > 0 ), "Didn't get conviva datapoints" pattern = re.compile("^conviva\.quality_metriclens\..*") assert ensure_always( p(all_datapoints_have_metric_name, backend, pattern)), "Received conviva datapoints for other metrics" assert ensure_always( p(all_datapoints_have_dim_key, backend, get_dim_key(conviva_metriclens_dimensions[0]))), ( "Received conviva datapoints without %s dimension" % conviva_metriclens_dimensions[0])
def test_filter_with_restart(): """ Ensure the filters get updated properly when the agent reloads a new config """ with Agent.run( """ monitors: - type: collectd/signalfx-metadata - type: collectd/df - type: collectd/memory - type: collectd/uptime metricsToExclude: - metricNames: - memory.* monitorType: collectd/memory """ ) as agent: assert wait_for(p(has_datapoint, agent.fake_services, metric_name="df_complex.free")) assert ensure_always(p(has_no_datapoint, agent.fake_services, metric_name="memory.used")) assert ensure_always(p(has_no_datapoint, agent.fake_services, metric_name="memory.free")) agent.update_config( """ monitors: - type: collectd/signalfx-metadata - type: collectd/df - type: collectd/memory - type: collectd/uptime metricsToExclude: - metricNames: - memory.used monitorType: collectd/memory """ ) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="memory.free"))
def test_combined_filter_with_monitor_type(): with Agent.run(""" monitors: - type: collectd/signalfx-metadata - type: collectd/memory - type: collectd/df - type: collectd/uptime metricsToExclude: - metricNames: - memory.used monitorType: collectd/memory negated: true - metricName: uptime - metricNames: - memory.free monitorType: collectd/memory negated: true """) as agent: assert wait_for(lambda: has_datapoint_with_metric_name( agent.fake_services, "memory.used")) assert wait_for(lambda: has_datapoint_with_metric_name( agent.fake_services, "memory.free")) assert ensure_always( lambda: not has_datapoint_with_metric_name(agent.fake_services, "memory.cached"), 10) assert ensure_always( lambda: not has_datapoint_with_metric_name(agent.fake_services, "uptime"), 5)
def test_docker_observer_labels(): """ Test that docker observer picks up a fully configured endpoint from container labels """ with run_agent( dedent(""" observers: - type: docker """)) as [backend, _, _]: with run_service( "nginx", name="nginx-disco-full", labels={ "agent.signalfx.com.monitorType.80": "collectd/nginx", "agent.signalfx.com.config.80.intervalSeconds": "1", }, ): assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "nginx")), "Didn't get nginx datapoints" # Let nginx be removed by docker observer and collectd restart time.sleep(5) backend.reset_datapoints() assert ensure_always( lambda: not has_datapoint_with_dim(backend, "container_name", "nginx-disco-full"), 10)
def test_docker_observer_labels_partial(): """ Test that docker observer picks up a partially configured endpoint from container labels """ with run_agent( dedent(""" observers: - type: docker monitors: - type: collectd/nginx discoveryRule: container_name =~ "nginx-disco-partial" && port == 80 """)) as [backend, _, _]: with run_service( "nginx", name="nginx-disco-partial", labels={ "agent.signalfx.com.config.80.extraDimensions": "{mydim: myvalue}" }, ): assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "nginx")), "Didn't get nginx datapoints" assert wait_for( p(has_datapoint_with_dim, backend, "mydim", "myvalue")), "Didn't get extra dimension" # Let nginx be removed by docker observer and collectd restart time.sleep(5) backend.reset_datapoints() assert ensure_always( lambda: not has_datapoint_with_dim(backend, "container_name", "nginx-disco-partial"), 10)
def test_negated_filtering(): with Agent.run(NEGATIVE_FILTERING_CONFIG) as agent: assert wait_for(lambda: has_datapoint_with_metric_name( agent.fake_services, "memory.used")) assert ensure_always( lambda: not has_datapoint_with_metric_name(agent.fake_services, "uptime"), 10)
def test_conviva_basic(): with run_agent( dedent(f""" intervalSeconds: 5 monitors: - type: conviva pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}} pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}} """), debug=False, ) as [backend, get_output, agent_config]: assert wait_for(lambda: len(backend.datapoints) > 0 ), "Didn't get conviva datapoints" pattern = re.compile("^conviva\.quality_metriclens\..*") assert ensure_always( p(all_datapoints_have_metric_name_and_dims, backend, pattern, {"filter": "All Traffic"}) ), "Received datapoints without metric quality_metriclens or {filter: All Traffic} dimension" config_path = agent_config(None) agent_status = get_agent_status(config_path) assert CONVIVA_PULSE_PASSWORD not in agent_status, ( "cleartext password(s) found in agent status output!\n\n%s\n" % agent_status) agent_output = get_output() assert CONVIVA_PULSE_PASSWORD not in agent_output, ( "cleartext password(s) found in agent output!\n\n%s\n" % agent_output)
def test_monitor_filter(): """ Ensure the filters on monitors get applied """ with Agent.run( """ monitors: - type: collectd/signalfx-metadata - type: collectd/df - type: collectd/memory metricsToExclude: - metricName: memory.used - type: collectd/uptime """ ) as agent: assert wait_for(p(has_datapoint, agent.fake_services, metric_name="df_complex.free")) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="memory.free")) assert ensure_always(p(has_no_datapoint, agent.fake_services, metric_name="memory.used")) agent.update_config( """ monitors: - type: collectd/signalfx-metadata - type: collectd/df - type: collectd/memory - type: collectd/uptime """ ) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="memory.used")) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="memory.free"))
def test_include_filter_with_monitor_type(): """ Test that include filters will override exclude filters """ with Agent.run( """ enableBuiltInFiltering: false monitors: - type: collectd/disk - type: collectd/uptime metricsToExclude: - metricNames: - disk_time.read monitorType: collectd/disk - metricNames: - disk_ops.read - disk_ops.write monitorType: collectd/disk negated: true metricsToInclude: - metricNames: - disk_time.read """ ) as agent: assert wait_for(p(has_datapoint, agent.fake_services, metric_name="disk_ops.read")) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="disk_ops.write"), 5) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="disk_time.read"), 5) assert ensure_always(p(has_no_datapoint, agent.fake_services, metric_name="disk_time.write"), 5) assert wait_for(p(has_datapoint, agent.fake_services, metric_name="uptime"), 5)
def test_conviva_metric_account(conviva_accounts): with run_agent( dedent( f""" monitors: - type: conviva pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}} pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}} metricConfigs: - metricParameter: concurrent_plays account: {conviva_accounts[0]} """ ) ) as [backend, _, _]: assert wait_for(lambda: len(backend.datapoints) > 0), "Didn't get conviva datapoints" assert ensure_always( p( all_datapoints_have_metric_name_and_dims, backend, "conviva.concurrent_plays", {"account": conviva_accounts[0]}, ) ), ( "Received conviva datapoints without metric conviva.concurrent_plays or {account: %s} dimension" % conviva_accounts[0] )
def test_signalfx_metadata(): with run_agent(""" procPath: /proc etcPath: /etc monitors: - type: collectd/signalfx-metadata persistencePath: /var/run/signalfx-agent - type: collectd/cpu - type: collectd/disk - type: collectd/memory """) as [backend, get_output, _]: assert wait_for( p(has_datapoint, backend, "cpu.utilization", {"plugin": "signalfx-metadata"})) assert wait_for( p(has_datapoint, backend, "disk_ops.total", {"plugin": "signalfx-metadata"})) assert wait_for( p(has_datapoint, backend, "memory.utilization", {"plugin": "signalfx-metadata"})) assert ensure_always( lambda: not has_datapoint(backend, "cpu.utilization_per_core", {"plugin": "signalfx-metadata"})) assert not has_log_message(get_output().lower(), "error"), "error found in agent output!"
def test_prometheus_exporter_basic_auth(): # The dpgen service just checks that basic auth is present, not correct with run_service("dpgen", environment={ "NUM_METRICS": 3, "REQUIRE_BASIC_AUTH": "yes" }) as dpgen_cont: with Agent.run( dedent(f""" monitors: - type: prometheus-exporter host: {container_ip(dpgen_cont)} port: 3000 intervalSeconds: 2 extraDimensions: source: prometheus """)) as agent: assert ensure_always( lambda: not has_datapoint(agent.fake_services, dimensions={"source": "prometheus"}), timeout_seconds=5 ), "got prometheus datapoint without basic auth (test setup is wrong)" agent.config["monitors"][0]["username"] = "******" agent.config["monitors"][0]["password"] = "******" agent.write_config() assert wait_for( p(has_datapoint, agent.fake_services, dimensions={"source": "prometheus" })), "didn't get prometheus datapoint"
def test_ecs_observer_multi_containers(): with run_service("ecsmeta") as ecsmeta: with run_container("redis:4-alpine") as redis, run_container( "mongo:4") as mongo: with Agent.run( CONFIG.substitute( host=container_ip(ecsmeta), redis_ip=container_ip(redis), mongo_ip=container_ip(mongo), case="metadata_multi_containers", )) as agent: assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "container_image", "redis:latest")), "Didn't get redis datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "container_image", "mongo:latest")), "Didn't get mongo datapoints" # Let redis be removed by docker observer and collectd restart time.sleep(5) agent.fake_services.datapoints.clear() assert ensure_always( lambda: not has_datapoint_with_dim( agent.fake_services, "ClusterName", "seon-fargate-test"), 10)
def test_haproxy_default_metrics_from_stats_page_proxies_to_monitor_frontend_200s( version): with run_service("haproxy", buildargs={"HAPROXY_VERSION": version}) as service_container: host = container_ip(service_container) with Agent.run(f""" monitors: - type: haproxy url: http://{host}:8080/stats?stats;csv proxies: ["FRONTEND", "200s"] """) as agent: assert ensure_always( p( datapoints_have_some_or_all_dims, agent.fake_services, { "proxy_name": "200s", "service_name": "FRONTEND" }, ), 10, ) assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!" assert any_metric_found(agent.fake_services, ["haproxy_response_2xx"])
def test_conviva_single_filter(conviva_filters): with run_agent( dedent(f""" intervalSeconds: 5 monitors: - type: conviva pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}} pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}} metricConfigs: - metricParameter: concurrent_plays filters: - {conviva_filters[0]} """), debug=CONVIVA_DEBUG, ) as [backend, _, _]: assert wait_for(lambda: len(backend.datapoints) > 0 ), "Didn't get conviva datapoints" assert ensure_always( p( all_datapoints_have_metric_name_and_dims, backend, "conviva.concurrent_plays", {"filter": conviva_filters[0]}, ) ), ("Received conviva datapoints without metric conviva.concurrent_plays or {filter: %s} dimension" % conviva_filters[0])
def test_haproxy_default_metrics_from_stats_page_basic_auth_wrong_password( version): with run_service("haproxy", buildargs={"HAPROXY_VERSION": version}) as service_container: host = container_ip(service_container) url = f"http://{host}:8081/stats?stats;csv" with Agent.run(f""" monitors: - type: haproxy username: a_username password: a_wrong_password url: {url} proxies: ["FRONTEND", "200s"] """) as agent: assert ensure_always( p( datapoints_have_some_or_all_dims, agent.fake_services, { "proxy_name": "200s", "service_name": "FRONTEND" }, ), 10, ) assert has_log_message(agent.output.lower(), "error"), "error found in agent output!"
def test_basic_filtering(): with run_agent(basic_config) as [backend, _, _]: assert wait_for( lambda: has_datapoint_with_metric_name(backend, "uptime")) assert ensure_always( lambda: not has_datapoint_with_metric_name(backend, "cpu.utilization"), 10)
def create_agent_daemonset(self, daemonset_path): daemonset_yaml = yaml.load(open(daemonset_path).read()) self.daemonset_name = daemonset_yaml["metadata"]["name"] daemonset_labels = daemonset_yaml["spec"]["selector"]["matchLabels"] self.delete_agent_daemonset() daemonset_yaml["spec"]["template"]["spec"]["containers"][0][ "resources"] = { "requests": { "cpu": "100m" } } if self.image_name and self.image_tag: print('Creating daemonset "%s" for %s:%s from %s ...' % (self.daemonset_name, self.image_name, self.image_tag, daemonset_path)) daemonset_yaml["spec"]["template"]["spec"]["containers"][0][ "image"] = (self.image_name + ":" + self.image_tag) else: print('Creating daemonset "%s" from %s ...' % (self.daemonset_name, daemonset_path)) k8s.create_daemonset(body=daemonset_yaml, namespace=self.namespace) assert ensure_always( lambda: k8s.daemonset_is_ready(self.daemonset_name, namespace=self.namespace), 5) labels = ",".join( ["%s=%s" % keyval for keyval in daemonset_labels.items()]) self.pods = k8s.get_pods_by_labels(labels, namespace=self.namespace) assert self.pods, "no agent pods found" assert all([ k8s.pod_is_ready(pod.metadata.name, namespace=self.namespace) for pod in self.pods ])
def test_postgresql_database_filter(): with run_service("postgres", buildargs={"POSTGRES_VERSION": "11-alpine"}, environment=ENV, print_logs=False) as postgres_cont: host = container_ip(postgres_cont) assert wait_for(p(tcp_socket_open, host, 5432), 60), "service didn't start" with Agent.run( dedent(f""" monitors: - type: postgresql host: {host} port: 5432 connectionString: "user=test_user password=test_pwd dbname=postgres sslmode=disable" databases: ['*', '!postgres'] """)) as agent: for metric in METADATA.default_metrics: assert wait_for( p(has_datapoint, agent.fake_services, metric_name=metric, dimensions={"database": "dvdrental"}) ), f"Didn't get default postgresql metric {metric} for database dvdrental" assert ensure_always(lambda: not has_datapoint( agent.fake_services, dimensions={"database": "postgres"} )), f"Should not get metric for postgres default database"
def test_include_filter_with_monitor_type(): """ Test that include filters will override exclude filters """ with Agent.run(""" monitors: - type: collectd/disk - type: collectd/uptime metricsToExclude: - metricNames: - disk_time.read monitorType: collectd/disk - metricNames: - disk_ops.read - disk_ops.write monitorType: collectd/disk negated: true metricsToInclude: - metricNames: - disk_time.read """) as agent: assert wait_for(lambda: has_datapoint_with_metric_name( agent.fake_services, "disk_ops.read")) assert wait_for( lambda: has_datapoint_with_metric_name(agent.fake_services, "disk_ops.write"), 5) assert wait_for( lambda: has_datapoint_with_metric_name(agent.fake_services, "disk_time.read"), 5) assert ensure_always( lambda: not has_datapoint_with_metric_name(agent.fake_services, "disk_time.write"), 5) assert wait_for( lambda: has_datapoint_with_metric_name(agent.fake_services, "uptime"), 5)
def test_haproxy_default_metrics_from_stats_page_basic_auth(version): with run_service("haproxy", buildargs={"HAPROXY_VERSION": version}) as service_container: host = container_ip(service_container) with Agent.run(f""" monitors: - type: haproxy username: a_username password: a_password url: http://{host}:8081/stats?stats;csv proxies: ["FRONTEND", "200s"] """) as agent: assert ensure_always( p( datapoints_have_some_or_all_dims, agent.fake_services, { "proxy_name": "200s", "service_name": "FRONTEND" }, ), 10, ) assert any_metric_found(agent.fake_services, ["haproxy_response_2xx"])
def test_does_not_set_hostname_if_not_host_specific(): with run_agent(""" hostname: acmeinc.com disableHostDimensions: true monitors: - type: collectd/signalfx-metadata persistencePath: /dev/null - type: collectd/cpu - type: collectd/uptime """) as [backend, _, _]: assert ensure_always( lambda: not has_datapoint_with_dim(backend, "host", "acmeinc.com") ), "Got overridden hostname in datapoint" assert ensure_always( lambda: not has_event_with_dim(backend, "host", "acmeinc.com") ), "Got overridden hostname in event"
def test_new_monitor_filtering(): with Agent.run(""" monitors: - type: internal-metrics intervalSeconds: 1 datapointsToExclude: - metricNames: - '*' - '!sfxagent.go_heap_*' - '!sfxagent.go_frees' """) as agent: is_expected = lambda dp: dp.metric.startswith( "sfxagent.go_heap") or dp.metric == "sfxagent.go_frees" def no_filtered_metrics(): for dp in agent.fake_services.datapoints: assert is_expected( dp), f"Got unexpected metric name {dp.metric}" return True assert wait_for( lambda: agent.fake_services.datapoints), "No datapoints received" assert ensure_always(no_filtered_metrics, interval_seconds=2, timeout_seconds=5) metrics_received = agent.fake_services.datapoints_by_metric.keys() assert "sfxagent.go_frees" in metrics_received assert "sfxagent.go_heap_inuse" in metrics_received assert "sfxagent.go_heap_released" in metrics_received
def test_signalfx_metadata(): with Agent.run(""" procPath: /proc etcPath: /etc monitors: - type: collectd/signalfx-metadata persistencePath: /var/run/signalfx-agent - type: collectd/cpu - type: collectd/disk - type: collectd/memory """) as agent: assert wait_for( p(has_datapoint, agent.fake_services, "cpu.utilization", {"plugin": "signalfx-metadata"})) assert wait_for( p(has_datapoint, agent.fake_services, "disk_ops.total", {"plugin": "signalfx-metadata"})) assert wait_for( p(has_datapoint, agent.fake_services, "memory.utilization", {"plugin": "signalfx-metadata"})) assert ensure_always( lambda: not has_datapoint(agent.fake_services, "cpu.utilization_per_core", {"plugin": "signalfx-metadata"}), timeout_seconds=5, ) assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"