def test_tracing_load(): """ Test that all of the traces sent through the agent get the proper service correlation datapoint. """ port = random.randint(5001, 20000) with Agent.run( dedent( f""" hostname: "testhost" writer: sendTraceHostCorrelationMetrics: true traceHostCorrelationMetricsInterval: 1s staleServiceTimeout: 7s monitors: - type: trace-forwarder listenAddress: localhost:{port} """ ) ) as agent: assert wait_for(p(tcp_port_open_locally, port)), "trace forwarder port never opened!" for i in range(0, 100): spans = _test_trace() spans[0]["localEndpoint"]["serviceName"] += f"-{i}" spans[1]["localEndpoint"]["serviceName"] += f"-{i}" resp = retry_on_ebadf( lambda: requests.post( f"http://localhost:{port}/v1/trace", headers={"Content-Type": "application/json"}, data=json.dumps(spans), # pylint:disable=cell-var-from-loop ) )() assert resp.status_code == 200 for i in range(0, 100): assert wait_for( p( has_datapoint, agent.fake_services, metric_name="sf.int.service.heartbeat", dimensions={"sf_hasService": f"myapp-{i}", "host": "testhost"}, ) ), "Didn't get host correlation datapoint" assert wait_for( p( has_datapoint, agent.fake_services, metric_name="sf.int.service.heartbeat", dimensions={"sf_hasService": f"file-server-{i}", "host": "testhost"}, ) ), "Didn't get host correlation datapoint" time.sleep(10) agent.fake_services.reset_datapoints() assert ensure_never( p(has_datapoint, agent.fake_services, metric_name="sf.int.service.heartbeat"), timeout_seconds=5 ), "Got infra correlation metric when it should have been expired"
def test_filesystems_fstype_filter(): expected_metrics = [ "df_complex.free", "df_complex.used", "percent_bytes.free", "percent_bytes.used", "disk.utilization", ] if sys.platform == "linux": expected_metrics.extend(["df_inodes.free", "df_inodes.used", "percent_inodes.free", "percent_inodes.used"]) with Agent.run( """ procPath: /proc monitors: - type: filesystems fsTypes: - "!*" """ ) as agent: assert wait_for( p(has_any_metric_or_dim, agent.fake_services, ["disk.summary_utilization"], []), timeout_seconds=60 ), "timed out waiting for metrics and/or dimensions!" assert ensure_never(lambda: has_any_metric_or_dim(agent.fake_services, expected_metrics, [])) assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"
def test_tracing_output(): """ Test that the basic trace writer and service tracker work """ port = random.randint(5001, 20000) with Agent.run( dedent(f""" hostname: "testhost" writer: traceHostCorrelationMetricsInterval: 1s staleServiceTimeout: 5s monitors: - type: trace-forwarder listenAddress: localhost:{port} """)) as agent: assert wait_for(p(tcp_port_open_locally, port)), "trace forwarder port never opened!" resp = requests.post( f"http://localhost:{port}/v1/trace", headers={"Content-Type": "application/json"}, data=json.dumps(_test_trace()), ) assert resp.status_code == 200 assert wait_for( p(has_trace_span, agent.fake_services, tags={"env": "prod"})), "Didn't get span tag" assert wait_for(p(has_trace_span, agent.fake_services, name="fetch")), "Didn't get span name" assert wait_for( p( has_datapoint, agent.fake_services, metric_name="sf.int.service.heartbeat", dimensions={ "sf_hasService": "myapp", "host": "testhost" }, )), "Didn't get host correlation datapoint" # Service names expire after 5s in the config provided in this test time.sleep(8) agent.fake_services.reset_datapoints() assert ensure_never( p(has_datapoint, agent.fake_services, metric_name="sf.int.service.heartbeat"), timeout_seconds=5 ), "Got infra correlation metric when it should have been expired"
def test_netio_filter(): forbidden_metrics = METADATA.included_metrics - {"network.total"} with Agent.run( """ procPath: /proc monitors: - type: net-io interfaces: - "!*" """ ) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name="network.total"), timeout_seconds=60 ), "timed out waiting for metrics and/or dimensions!" assert ensure_never(p(any_metric_found, agent.fake_services, forbidden_metrics)) assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"
def test_netio_filter(): expected_metrics = get_monitor_metrics_from_selfdescribe("net-io") try: expected_metrics.remove("network.total") except KeyError: pass with run_agent(""" procPath: /proc monitors: - type: net-io interfaces: - "!*" """) as [backend, get_output, _]: assert wait_for(p(has_any_metric_or_dim, backend, ["network.total"], []), timeout_seconds=60 ), "timed out waiting for metrics and/or dimensions!" assert ensure_never( lambda: has_any_metric_or_dim(backend, expected_metrics, [])) assert not has_log_message(get_output().lower(), "error"), "error found in agent output!"