def test_hadoop(version): """ Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e. docker build \ -t quay.io/signalfx/hadoop-test:<version> \ --build-arg HADOOP_VER=<version> \ <repo_root>/test-services/hadoop docker push quay.io/signalfx/hadoop-test:<version> """ with run_container("quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-master") as hadoop_master: with run_container("quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-worker1") as hadoop_worker1: host = start_hadoop(hadoop_master, hadoop_worker1) # start the agent with hadoop config config = HADOOP_CONFIG.substitute(host=host, port=8088) with Agent.run(config) as agent: assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "plugin", "apache_hadoop")), "Didn't get hadoop datapoints" assert wait_for( p(has_datapoint, agent.fake_services, "gauge.hadoop.cluster.metrics.active_nodes", {}, 1)), "expected 1 hadoop worker node"
def run_node(node_type, version): """ Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e. docker build \ -t quay.io/signalfx/hadoop-test:<version> \ --build-arg HADOOP_VER=<version> \ <repo_root>/test-services/hadoop docker push quay.io/signalfx/hadoop-test:<version> """ with run_container( f"quay.io/signalfx/hadoop-test:{version}", hostname="hadoop-master" ) as hadoop_master, run_container( f"quay.io/signalfx/hadoop-test:{version}", hostname="hadoop-worker1" ) as hadoop_worker1: if node_type in ["nameNode", "resourceManager"]: container = hadoop_master else: container = hadoop_worker1 host = container_ip(container) port = NODETYPE_PORT[node_type] if node_type in ["resourceManager", "nodeManager"]: yarn_var = YARN_VAR[node_type] yarn_opts = YARN_OPTS % (yarn_var, port, yarn_var) cmd = ["/bin/bash", "-c", f"echo 'export {yarn_opts}' >> {YARN_ENV_PATH}"] container.exec_run(cmd) start_hadoop(hadoop_master, hadoop_worker1) # wait for jmx to be available assert wait_for(p(tcp_socket_open, host, port)), f"JMX service not listening on port {port}" yield host, port
def test_hadoop_default(version): """ Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e. docker build \ -t quay.io/signalfx/hadoop-test:<version> \ --build-arg HADOOP_VER=<version> \ <repo_root>/test-services/hadoop docker push quay.io/signalfx/hadoop-test:<version> """ with run_container( "quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-master") as hadoop_master, run_container( "quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-worker1") as hadoop_worker1: host = start_hadoop(hadoop_master, hadoop_worker1) # start the agent with hadoop config config = f""" monitors: - type: collectd/hadoop host: {host} port: 8088 verbose: true """ with Agent.run(config) as agent: verify(agent, METADATA.default_metrics - EXCLUDED) # Need to run the agent manually because we want to wait for this metric to become 1 but it may # be 0 at first. assert wait_for( p(has_datapoint, agent.fake_services, "gauge.hadoop.cluster.metrics.active_nodes", {}, 1)), "expected 1 hadoop worker node" assert has_datapoint_with_dim( agent.fake_services, "plugin", "apache_hadoop"), "Didn't get hadoop datapoints"
def test_marathon(marathon_image): with run_container("zookeeper:3.5") as zookeeper: zkhost = container_ip(zookeeper) assert wait_for(p(tcp_socket_open, zkhost, 2181), 60), "zookeeper didn't start" with run_container( marathon_image, command=["--master", "localhost:5050", "--zk", "zk://{0}:2181/marathon".format(zkhost)] ) as service_container: host = container_ip(service_container) config = dedent( f""" monitors: - type: collectd/marathon host: {host} port: 8080 """ ) assert wait_for(p(tcp_socket_open, host, 8080), 120), "marathon not listening on port" assert wait_for( p(http_status, url="http://{0}:8080/v2/info".format(host), status=[200]), 120 ), "service didn't start" with Agent.run(config) as agent: assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "plugin", "marathon") ), "didn't get datapoints"
def test_all_kafka_monitors(version): with run_kafka(version) as kafka: kafka_host = container_ip(kafka) with run_container( kafka.image.id, environment={ "JMX_PORT": "8099", "START_AS": "producer", "KAFKA_BROKER": "%s:9092" % (kafka_host, ) }, ) as kafka_producer: kafkaproducerhost = container_ip(kafka_producer) assert wait_for(p(tcp_socket_open, kafkaproducerhost, 8099), 60), "kafka producer jmx didn't start" with run_container( kafka.image.id, environment={ "JMX_PORT": "9099", "START_AS": "consumer", "KAFKA_BROKER": "%s:9092" % (kafka_host, ) }, ) as kafka_consumer: kafkaconsumerhost = container_ip(kafka_consumer) assert wait_for(p(tcp_socket_open, kafkaconsumerhost, 9099), 60), "kafka consumer jmx didn't start" with Agent.run( textwrap.dedent(""" monitors: - type: collectd/kafka host: {0} port: 7099 clusterName: testCluster - type: collectd/kafka_producer host: {1} port: 8099 - type: collectd/kafka_consumer host: {2} port: 9099 """.format(kafka_host, kafkaproducerhost, kafkaconsumerhost))) as agent: assert wait_for( p(has_datapoint_with_metric_name, agent.fake_services, "gauge.kafka-active-controllers"), timeout_seconds=60, ), "Didn't get kafka datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "cluster", "testCluster"), timeout_seconds=60 ), "Didn't get cluster dimension from kafka datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "client-id", "console-producer"), timeout_seconds=60, ), "Didn't get client-id dimension from kafka_producer datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "client-id", "consumer-1"), timeout_seconds=60 ), "Didn't get client-id dimension from kafka_consumer datapoints"
def test_ecs_observer_multi_containers(): with run_service("ecsmeta") as ecsmeta: with run_container("redis:4-alpine") as redis, run_container( "mongo:4") as mongo: with Agent.run( CONFIG.substitute( host=container_ip(ecsmeta), redis_ip=container_ip(redis), mongo_ip=container_ip(mongo), case="metadata_multi_containers", )) as agent: assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "container_image", "redis:latest")), "Didn't get redis datapoints" assert wait_for( p(has_datapoint_with_dim, agent.fake_services, "container_image", "mongo:latest")), "Didn't get mongo datapoints" # Let redis be removed by docker observer and collectd restart time.sleep(5) agent.fake_services.datapoints.clear() assert ensure_always( lambda: not has_datapoint_with_dim( agent.fake_services, "ClusterName", "seon-fargate-test"), 10)
def run_kafka(version, **kwargs): """ Runs a kafka container with zookeeper """ args = dict(kwargs) args.setdefault("name", f"kafka-broker-{random_hex()}") with run_container("zookeeper:3.5") as zookeeper: zkhost = container_ip(zookeeper) assert wait_for(p(tcp_socket_open, zkhost, 2181), 60), "zookeeper didn't start" with run_service( "kafka", environment={f"KAFKA_ZOOKEEPER_CONNECT": f"{zkhost}:2181", "START_AS": "broker"}, buildargs={"KAFKA_VERSION": version}, **args, ) as kafka_container: kafka_host = container_ip(kafka_container) assert wait_for(p(tcp_socket_open, kafka_host, 9092), 60), "kafka broker didn't start" assert wait_for(p(tcp_socket_open, kafka_host, 7099), 60), "kafka broker jmx didn't start" with run_container( kafka_container.image.id, environment={"START_AS": "create-topic", f"KAFKA_ZOOKEEPER_CONNECT": f"{zkhost}:2181"}, ) as kafka_topic: assert kafka_topic.wait(timeout=DEFAULT_TIMEOUT)["StatusCode"] == 0, "unable to create kafka topic" yield kafka_container
def run(config, metrics): cadvisor_opts = dict( volumes={ "/": { "bind": "/rootfs", "mode": "ro" }, "/var/run": { "bind": "/var/run", "mode": "ro" }, "/sys": { "bind": "/sys", "mode": "ro" }, "/var/lib/docker": { "bind": "/var/lib/docker", "mode": "ro" }, "/dev/disk": { "bind": "/dev/disk", "mode": "ro" }, }) with run_container("google/cadvisor:latest", **cadvisor_opts) as cadvisor_container, run_container( # Run container to generate memory limit metric. "alpine", command=["tail", "-f", "/dev/null"], mem_limit="64m", ): host = container_ip(cadvisor_container) assert wait_for(p(tcp_socket_open, host, 8080), 60), "service didn't start" run_agent_verify(config.format(host=host), metrics)
def test_kong(kong_image): # pylint: disable=redefined-outer-name kong_env = dict(KONG_ADMIN_LISTEN="0.0.0.0:8001", KONG_LOG_LEVEL="warn", KONG_DATABASE="postgres", KONG_PG_DATABASE="kong") with run_container("postgres:9.5", environment=dict(POSTGRES_USER="******", POSTGRES_DB="kong")) as db: db_ip = container_ip(db) kong_env["KONG_PG_HOST"] = db_ip def db_is_ready(): return db.exec_run("pg_isready -U kong").exit_code == 0 assert wait_for(db_is_ready) with run_container(kong_image, environment=kong_env, command="sleep inf") as migrations: def db_is_reachable(): return migrations.exec_run( "psql -h {} -U kong".format(db_ip)).exit_code == 0 assert wait_for(db_is_reachable) assert migrations.exec_run("kong migrations up --v").exit_code == 0 with run_container(kong_image, environment=kong_env) as kong: kong_ip = container_ip(kong) def kong_is_listening(): try: return get("http://{}:8001/signalfx".format( kong_ip)).status_code == 200 except RequestException: return False assert wait_for(kong_is_listening) config = string.Template( dedent(""" monitors: - type: collectd/kong host: $host port: 8001 metrics: - metric: connections_handled report: true """)).substitute(host=container_ip(kong)) with run_agent(config) as [backend, _, _]: assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "kong")), "Didn't get Kong data point"
def test_postgresql_enhanced(): with run_container("postgres:10", environment=ENV) as cont: host = container_ip(cont) assert wait_for(p(tcp_socket_open, host, 5432), 60), "service didn't start" target_metric = "pg_blks.toast_hit" assert target_metric in METADATA.nondefault_metrics with Agent.run(f""" monitors: - type: collectd/postgresql host: {host} port: 5432 extraMetrics: - "{target_metric}" username: "******" password: "******" databases: - name: test username: "******" password: "******" interval: 5 expireDelay: 10 sslMode: disable """) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name="pg_blks.toast_hit"))
def run_init_system_image( base_image, with_socat=True, path=DOCKERFILES_DIR, dockerfile=None, ingest_host="ingest.us0.signalfx.com", # Whatever value is used here needs a self-signed cert in ./images/certs/ api_host="api.us0.signalfx.com", # Whatever value is used here needs a self-signed cert in ./images/certs/ command=None, buildargs=None, ): # pylint: disable=too-many-arguments image_id = retry( lambda: build_base_image(base_image, path, dockerfile, buildargs), docker.errors.BuildError) print("Image ID: %s" % image_id) if with_socat: backend_ip = "127.0.0.1" else: backend_ip = get_host_ip() with fake_backend.start(ip_addr=backend_ip) as backend: container_options = { # Init systems running in the container want permissions "privileged": True, "volumes": { "/sys/fs/cgroup": { "bind": "/sys/fs/cgroup", "mode": "ro" }, "/tmp/scratch": { "bind": "/tmp/scratch", "mode": "rw" }, }, "extra_hosts": { # Socat will be running on localhost to forward requests to # these hosts to the fake backend ingest_host: backend.ingest_host, api_host: backend.api_host, }, } if command: container_options["command"] = command with run_container(image_id, wait_for_ip=True, **container_options) as cont: if with_socat: # Proxy the backend calls through a fake HTTPS endpoint so that we # don't have to change the default configuration default by the # package. The base_image used should trust the self-signed certs # default in the images dir so that the agent doesn't throw TLS # verification errors. with socat_https_proxy(cont, backend.ingest_host, backend.ingest_port, ingest_host, "127.0.0.1"), socat_https_proxy( cont, backend.api_host, backend.api_port, api_host, "127.0.0.2"): yield [cont, backend] else: yield [cont, backend]
def test_rabbitmq(): with run_container("rabbitmq:3.6-management") as rabbitmq_cont: host = container_ip(rabbitmq_cont) config = dedent(f""" monitors: - type: collectd/rabbitmq host: {host} port: 15672 username: guest password: guest collectNodes: true collectChannels: true """) assert wait_for(p(tcp_socket_open, host, 15672), 60), "service didn't start" with run_agent(config) as [backend, _, _]: assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "rabbitmq")), "Didn't get rabbitmq datapoints" assert wait_for( p(has_datapoint_with_dim, backend, "plugin_instance", "%s-15672" % host)), "Didn't get expected plugin_instance dimension"
def test_postgresql_defaults(): with run_container("postgres:10", environment=ENV) as cont: host = container_ip(cont) assert wait_for(p(tcp_socket_open, host, 5432), 60), "service didn't start" with Agent.run(f""" monitors: - type: collectd/postgresql host: {host} port: 5432 username: "******" password: "******" queries: - name: "exampleQuery" minVersion: 60203 maxVersion: 200203 statement: | SELECT coalesce(sum(n_live_tup), 0) AS live, coalesce(sum(n_dead_tup), 0) AS dead FROM pg_stat_user_tables; results: - type: gauge instancePrefix: live valuesFrom: - live databases: - name: test username: "******" password: "******" interval: 5 expireDelay: 10 sslMode: disable """) as agent: verify(agent, METADATA.default_metrics)
def test_ecs_container_stats(): with run_service("ecsmeta") as ecsmeta, run_container("redis:4-alpine") as redis: ecsmeta_ip = container_ip(ecsmeta) redis_ip = container_ip(redis) with Agent.run( """ monitors: - type: ecs-metadata enableExtraCPUMetrics: true enableExtraMemoryMetrics: true metadataEndpoint: http://%s/metadata_single?redis_ip=%s statsEndpoint: http://%s/stats """ % (ecsmeta_ip, redis_ip, ecsmeta_ip) ) as agent: assert wait_for( p(has_datapoint_with_metric_name, agent.fake_services, "cpu.percent") ), "Didn't get docker cpu datapoints" assert wait_for( p(has_datapoint_with_metric_name, agent.fake_services, "memory.percent") ), "Didn't get docker memory datapoints" assert wait_for( # container_id is included in stats.json file in ecsmeta app # because stats data don't come directly from the docker container but from ecs metadata api p( has_datapoint_with_dim, agent.fake_services, "container_id", "c42fa5a73634bcb6e301dfb7b13ac7ead2af473210be6a15da75a290c283b66c", ) ), "Didn't get redis datapoints"
def test_mongo_enhanced_metrics(): with run_container("mongo:3.6") as mongo_cont: host = container_ip(mongo_cont) config = dedent(f""" monitors: - type: collectd/mongodb host: {host} port: 27017 databases: [admin] sendCollectionMetrics: true sendCollectionTopMetrics: true """) assert wait_for(p(tcp_socket_open, host, 27017), 60), "service didn't start" with Agent.run(config) as agent: assert wait_for( p(has_datapoint, agent.fake_services, metric_name="gauge.collection.size"), 60), "Did not get datapoint from SendCollectionMetrics config" assert wait_for( p(has_datapoint, agent.fake_services, metric_name="counter.collection.commandsTime"), 60 ), "Did not get datapoint from SendCollectionTopMetrics config"
def test_postgresql(): with run_container("postgres:10", environment=env) as cont: config = config_temp.substitute(host=cont.attrs["NetworkSettings"]["IPAddress"]) with run_agent(config) as [backend, _, _]: assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "postgresql")), "Didn't get postgresql datapoints" assert wait_for(p(has_datapoint_with_metric_name, backend, "pg_blks.toast_hit"))
def run_etcd(tls=False, **kwargs): if tls: cmd = """ --listen-client-urls https://0.0.0.0:2379 --advertise-client-urls https://0.0.0.0:2379 --trusted-ca-file /opt/testing/certs/server.crt --cert-file /opt/testing/certs/server.crt --key-file /opt/testing/certs/server.key --client-cert-auth """ with run_service("etcd", command=cmd, **kwargs) as container: host = container_ip(container) assert wait_for(p(tcp_socket_open, host, 2379), 60), "service didn't start" yield container else: cmd = """ --listen-client-urls http://0.0.0.0:2379,http://0.0.0.0:4001 --advertise-client-urls http://0.0.0.0:2379,http://0.0.0.0:4001 """ with run_container("quay.io/coreos/etcd:v2.3.8", command=cmd) as container: host = container_ip(container) assert wait_for(p(tcp_socket_open, host, 2379), 60), "service didn't start" yield container
def test_python_runner_with_redis(): with run_container("redis:4-alpine") as test_container: host = container_ip(test_container) config = MONITOR_CONFIG.substitute(host=host, bundle_root=BUNDLE_DIR) assert wait_for(p(tcp_socket_open, host, 6379), 60), "redis is not listening on port" redis_client = redis.StrictRedis(host=host, port=6379, db=0) assert wait_for(redis_client.ping, 60), "service didn't start" with run_agent(config) as [backend, get_output, _]: assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "redis_info")), "didn't get datapoints" assert wait_for( p(regex_search_matches_output, get_output, PID_RE.search)) pid = int(PID_RE.search(get_output()).groups()[0]) os.kill(pid, signal.SIGTERM) time.sleep(3) backend.reset_datapoints() assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "redis_info") ), "didn't get datapoints after Python process was killed" assert wait_for( p(has_datapoint, backend, metric_name="counter.lru_clock", metric_type=sf_pbuf.CUMULATIVE_COUNTER), timeout_seconds=3, ), "metric type was wrong"
def run_init_system_image(base_image): image_id = build_base_image(base_image) print("Image ID: %s" % image_id) with fake_backend.start() as backend: container_options = { # Init systems running in the container want permissions "privileged": True, "volumes": { "/sys/fs/cgroup": { "bind": "/sys/fs/cgroup", "mode": "ro" }, "/tmp/scratch": { "bind": "/tmp/scratch", "mode": "rw" }, }, "extra_hosts": { # Socat will be running on localhost to forward requests to # these hosts to the fake backend "ingest.signalfx.com": '127.0.0.1', "api.signalfx.com": '127.0.0.2', }, } with run_container(image_id, wait_for_ip=False, **container_options) as cont: # Proxy the backend calls through a fake HTTPS endpoint so that we # don't have to change the default configuration included by the # package. The base_image used should trust the self-signed certs # included in the images dir so that the agent doesn't throw TLS # verification errors. with socat_https_proxy(cont, backend.ingest_host, backend.ingest_port, "ingest.signalfx.com", "127.0.0.1"), \ socat_https_proxy(cont, backend.api_host, backend.api_port, "api.signalfx.com", "127.0.0.2"): yield [cont, backend]
def test_bad_globbing(): with run_container("zookeeper:3.4") as zk: assert wait_for(p(container_cmd_exit_0, zk, "nc -z localhost 2181"), 5) create_znode(zk, "/env", "prod") final_conf = bad_glob_config.substitute(zk_endpoint="%s:2181" % container_ip(zk)) with run_agent(final_conf) as [backend, get_output, _]: assert wait_for(lambda: "Zookeeper only supports globs" in get_output())
def run_kong(kong_version): pg_env = dict(POSTGRES_USER="******", POSTGRES_PASSWORD="******", POSTGRES_DB="kong") kong_env = dict( KONG_ADMIN_LISTEN="0.0.0.0:8001", KONG_LOG_LEVEL="warn", KONG_DATABASE="postgres", KONG_PG_DATABASE=pg_env["POSTGRES_DB"], KONG_PG_PASSWORD=pg_env["POSTGRES_PASSWORD"], ) with run_container("postgres:9.5", environment=pg_env) as db: db_ip = container_ip(db) kong_env["KONG_PG_HOST"] = db_ip assert wait_for(p(tcp_socket_open, db_ip, 5432)) with run_service("kong", buildargs={"KONG_VERSION": kong_version}, environment=kong_env, command="sleep inf") as migrations: if kong_version in ["0.15-centos", "1.0.0-centos"]: assert container_cmd_exit_0(migrations, "kong migrations bootstrap") else: assert container_cmd_exit_0(migrations, "kong migrations up") with run_service("kong", buildargs={"KONG_VERSION": kong_version}, environment=kong_env) as kong, run_container( "openresty/openresty:1.15.8.1-4-centos", files=[(SCRIPT_DIR / "echo.conf", "/etc/nginx/conf.d/echo.conf")]) as echo: kong_ip = container_ip(kong) kong_admin = f"http://{kong_ip}:8001" assert wait_for( p(http_status, url=f"{kong_admin}/signalfx", status=[200])) paths, _ = configure_kong(kong_admin, kong_version, container_ip(echo)) # Needs time to settle after creating routes. retry(lambda: run_traffic(paths, f"http://{kong_ip}:8000"), AssertionError, interval_seconds=2) yield kong_ip
def run_redis(image="redis:4-alpine"): with run_container(image) as redis_container: host = container_ip(redis_container) assert wait_for(p(tcp_socket_open, host, 6379), 60), "service not listening on port" redis_client = redis.StrictRedis(host=host, port=6379, db=0) assert wait_for(redis_client.ping, 60), "service didn't start" yield [host, redis_client]
def test_logstash_tcp_server(version): with run_container( f"docker.elastic.co/logstash/logstash:{version}", environment={ "XPACK_MONITORING_ENABLED": "false", "CONFIG_RELOAD_AUTOMATIC": "true" }, ) as logstash_cont: agent_host = get_host_ip() copy_file_content_into_container(SAMPLE_EVENTS, logstash_cont, "tmp/events.log") config = dedent(f""" monitors: - type: logstash-tcp mode: server host: 0.0.0.0 port: 0 """) with Agent.run(config) as agent: log_match = wait_for_value( lambda: LISTEN_LOG_RE.search(agent.output)) assert log_match is not None listen_port = int(log_match.groups()[0]) copy_file_content_into_container( # The pipeline conf is written for server mode so patch it to # act as a client. PIPELINE_CONF.read_text(encoding="utf-8").replace( 'mode => "server"', 'mode => "client"').replace( 'host => "0.0.0.0"', f'host => "{agent_host}"').replace( "port => 8900", f"port => {listen_port}"), logstash_cont, "/usr/share/logstash/pipeline/test.conf", ) assert wait_for(p(has_datapoint, agent.fake_services, "logins.count", value=7, dimensions={}), timeout_seconds=180) assert wait_for( p(has_datapoint, agent.fake_services, "process_time.count", value=7, dimensions={})) assert wait_for( p(has_datapoint, agent.fake_services, "process_time.mean", value=4, dimensions={}))
def test_bad_globbing(): with run_container("zookeeper:3.4") as zk_cont: zkhost = container_ip(zk_cont) assert wait_for(p(tcp_socket_open, zkhost, 2181), 30) create_znode(zk_cont, "/env", "prod") final_conf = BAD_GLOB_CONFIG.substitute(zk_endpoint="%s:2181" % zkhost) with run_agent(final_conf) as [_, get_output, _]: assert wait_for(lambda: "Zookeeper only supports globs" in get_output())
def test_kong(kong_image): kong_env = dict(KONG_ADMIN_LISTEN='0.0.0.0:8001', KONG_LOG_LEVEL='warn', KONG_DATABASE='postgres', KONG_PG_DATABASE='kong') with run_container('postgres:9.5', environment=dict(POSTGRES_USER='******', POSTGRES_DB='kong')) as db: db_ip = container_ip(db) kong_env['KONG_PG_HOST'] = db_ip def db_is_ready(): return db.exec_run('pg_isready -U postgres').exit_code == 0 assert wait_for(db_is_ready) with run_container(kong_image, environment=kong_env, command='sleep inf') as migrations: def db_is_reachable(): return migrations.exec_run('psql -h {} -U postgres'.format(db_ip)).exit_code == 0 assert wait_for(db_is_reachable) assert migrations.exec_run('kong migrations up --v').exit_code == 0 with run_container(kong_image, environment=kong_env) as kong: kong_ip = container_ip(kong) def kong_is_listening(): try: return get('http://{}:8001/signalfx'.format(kong_ip)).status_code == 200 except RequestException: return False assert wait_for(kong_is_listening) config = string.Template(dedent(''' monitors: - type: collectd/kong host: $host port: 8001 metrics: - metric: connections_handled report: true ''')).substitute(host=container_ip(kong)) with run_agent(config) as [backend, _, _]: assert wait_for(p(has_datapoint_with_dim, backend, 'plugin', 'kong')), "Didn't get Kong data point"
def test_hadoopjmx(version, nodeType): """ Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e. docker build \ -t quay.io/signalfx/hadoop-test:<version> \ --build-arg HADOOP_VER=<version> \ <repo_root>/test-services/hadoop docker push quay.io/signalfx/hadoop-test:<version> """ with run_container("quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-master") as hadoop_master: with run_container("quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-worker1") as hadoop_worker1: if nodeType in ["nameNode", "resourceManager"]: container = hadoop_master else: container = hadoop_worker1 host = container_ip(container) port = NODETYPE_PORT[nodeType] if nodeType in ["resourceManager", "nodeManager"]: yarn_var = YARN_VAR[nodeType] yarn_opts = YARN_OPTS % (yarn_var, port, yarn_var) cmd = [ "/bin/bash", "-c", "echo 'export %s' >> %s" % (yarn_opts, YARN_ENV_PATH) ] container.exec_run(cmd) start_hadoop(hadoop_master, hadoop_worker1) # wait for jmx to be available assert wait_for(p(tcp_socket_open, host, port), 60), "jmx service not listening on port %d" % port # start the agent with hadoopjmx config config = HADOOPJMX_CONFIG.substitute(host=host, port=port, nodeType=nodeType) with run_agent(config) as [backend, _, _]: assert wait_for( p(has_datapoint_with_dim, backend, "nodeType", nodeType)), ( "Didn't get hadoopjmx datapoints for nodeType %s" % nodeType)
def run_consumer(image, kafka_host, **kwargs): with run_container( image, name=f"kafka-consumer-{random_hex()}", environment={"JMX_PORT": "9099", "START_AS": "consumer", "KAFKA_BROKER": f"{kafka_host}:9092"}, **kwargs, ) as kafka_consumer: host = container_ip(kafka_consumer) assert wait_for(p(tcp_socket_open, host, 9099), 60), "kafka consumer jmx didn't start" yield host
def test_etcd_monitor(): with run_container("quay.io/coreos/etcd:v2.3.8", command=ETCD_COMMAND) as etcd_cont: config = etcd_config.substitute( host=etcd_cont.attrs["NetworkSettings"]["IPAddress"]) with run_agent(config) as [backend, _, _]: assert wait_for( p(has_datapoint_with_dim, backend, "plugin", "etcd")), "Didn't get etcd datapoints"
def test_rabbitmq(): with run_container("rabbitmq:3.6-management") as rabbitmq_cont: host = rabbitmq_cont.attrs["NetworkSettings"]["IPAddress"] config = rabbitmq_config.substitute(host=host) wait_for_rabbit_to_start(rabbitmq_cont) with run_agent(config) as [backend, _, _]: assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "rabbitmq")), "Didn't get rabbitmq datapoints" assert wait_for(p(has_datapoint_with_dim, backend, "plugin_instance", "%s-15672" % host)), \ "Didn't get expected plugin_instance dimension"
def test_consul(): with run_container("consul:0.9.3") as consul_cont: host = container_ip(consul_cont) config = CONSUL_CONFIG.substitute(host=host) assert wait_for(p(tcp_socket_open, host, 8500), 60), "consul service didn't start" with Agent.run(config) as agent: assert wait_for( p(has_datapoint_with_metric_name, agent.fake_services, "gauge.consul.catalog.services.total"), 60 ), "Didn't get consul datapoints"