def test_hadoopjmx(version, nodeType):
    """
    Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e.
    docker build \
        -t quay.io/signalfx/hadoop-test:<version> \
        --build-arg HADOOP_VER=<version> \
        <repo_root>/test-services/hadoop
    docker push quay.io/signalfx/hadoop-test:<version>
    """
    with run_container("quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-master") as hadoop_master:
        with run_container("quay.io/signalfx/hadoop-test:%s" % version, hostname="hadoop-worker1") as hadoop_worker1:
            if nodeType in ["nameNode", "resourceManager"]:
                container = hadoop_master
            else:
                container = hadoop_worker1
            host = container_ip(container)
            port = NODETYPE_PORT[nodeType]
            if nodeType in ["resourceManager", "nodeManager"]:
                yarn_var = YARN_VAR[nodeType]
                yarn_opts = YARN_OPTS % (yarn_var, port, yarn_var)
                cmd = ["/bin/bash", "-c", "echo 'export %s' >> %s" % (yarn_opts, YARN_ENV_PATH)]
                container.exec_run(cmd)

            start_hadoop(hadoop_master, hadoop_worker1)

            # wait for jmx to be available
            assert wait_for(p(tcp_socket_open, host, port), 60), "jmx service not listening on port %d" % port

            # start the agent with hadoopjmx config
            config = HADOOPJMX_CONFIG.substitute(host=host, port=port, nodeType=nodeType)
            with run_agent(config) as [backend, _, _]:
                assert wait_for(p(has_datapoint_with_dim, backend, "nodeType", nodeType)), (
                    "Didn't get hadoopjmx datapoints for nodeType %s" % nodeType
                )
Exemple #2
0
def test_kong(kong_image):  # pylint: disable=redefined-outer-name
    kong_env = dict(KONG_ADMIN_LISTEN="0.0.0.0:8001",
                    KONG_LOG_LEVEL="warn",
                    KONG_DATABASE="postgres",
                    KONG_PG_DATABASE="kong")

    with run_container("postgres:9.5",
                       environment=dict(POSTGRES_USER="******",
                                        POSTGRES_DB="kong")) as db:
        db_ip = container_ip(db)
        kong_env["KONG_PG_HOST"] = db_ip

        def db_is_ready():
            return db.exec_run("pg_isready -U kong").exit_code == 0

        assert wait_for(db_is_ready)

        with run_container(kong_image,
                           environment=kong_env,
                           command="sleep inf") as migrations:

            def db_is_reachable():
                return migrations.exec_run(
                    "psql -h {} -U kong".format(db_ip)).exit_code == 0

            assert wait_for(db_is_reachable)
            assert migrations.exec_run("kong migrations up --v").exit_code == 0

        with run_container(kong_image, environment=kong_env) as kong:
            kong_ip = container_ip(kong)

            def kong_is_listening():
                try:
                    return get("http://{}:8001/signalfx".format(
                        kong_ip)).status_code == 200
                except RequestException:
                    return False

            assert wait_for(kong_is_listening)

            config = string.Template(
                dedent("""
            monitors:
              - type: collectd/kong
                host: $host
                port: 8001
                metrics:
                  - metric: connections_handled
                    report: true
            """)).substitute(host=container_ip(kong))

            with run_agent(config) as [backend, _, _]:
                assert wait_for(
                    p(has_datapoint_with_dim, backend, "plugin",
                      "kong")), "Didn't get Kong data point"
def test_python_runner_with_redis():
    with run_container("redis:4-alpine") as test_container:
        host = container_ip(test_container)
        config = MONITOR_CONFIG.substitute(host=host, bundle_root=BUNDLE_DIR)
        assert wait_for(p(tcp_socket_open, host, 6379),
                        60), "redis is not listening on port"

        redis_client = redis.StrictRedis(host=host, port=6379, db=0)
        assert wait_for(redis_client.ping, 60), "service didn't start"

        with run_agent(config) as [backend, get_output, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "redis_info")), "didn't get datapoints"

            assert wait_for(
                p(regex_search_matches_output, get_output, PID_RE.search))
            pid = int(PID_RE.search(get_output()).groups()[0])

            os.kill(pid, signal.SIGTERM)

            time.sleep(3)
            backend.datapoints.clear()

            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin", "redis_info")
            ), "didn't get datapoints after Python process was killed"

            assert wait_for(
                p(has_datapoint,
                  backend,
                  metric_name="counter.lru_clock",
                  metric_type=sf_pbuf.CUMULATIVE_COUNTER),
                timeout_seconds=3,
            ), "metric type was wrong"
Exemple #4
0
def run_kafka(version):
    """
    Runs a kafka container with zookeeper
    """
    with run_container("zookeeper:3.5") as zookeeper:
        zkhost = container_ip(zookeeper)
        assert wait_for(p(tcp_socket_open, zkhost, 2181),
                        60), "zookeeper didn't start"
        with run_service(
                "kafka",
                environment={
                    "JMX_PORT": "7099",
                    "KAFKA_ZOOKEEPER_CONNECT": "%s:2181" % (zkhost, ),
                    "START_AS": "broker"
                },
                buildargs={"KAFKA_VERSION": version},
        ) as kafka_container:
            run_service(
                "kafka",
                environment={
                    "START_AS": "create-topic",
                    "KAFKA_ZOOKEEPER_CONNECT": "%s:2181" % (zkhost, )
                },
                buildargs={"KAFKA_VERSION": version},
            )
            yield kafka_container
Exemple #5
0
def test_etcd_monitor():
    with run_container("quay.io/coreos/etcd:v2.3.8", command=ETCD_COMMAND) as etcd_cont:
        host = container_ip(etcd_cont)
        config = ETCD_CONFIG.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 2379), 60), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "etcd")), "Didn't get etcd datapoints"
Exemple #6
0
def test_bad_globbing():
    with run_container("zookeeper:3.4") as zk_cont:
        zkhost = container_ip(zk_cont)
        assert wait_for(p(tcp_socket_open, zkhost, 2181), 30)
        create_znode(zk_cont, "/env", "prod")

        final_conf = BAD_GLOB_CONFIG.substitute(zk_endpoint="%s:2181" % zkhost)
        with run_agent(final_conf) as [_, get_output, _]:
            assert wait_for(
                lambda: "Zookeeper only supports globs" in get_output())
def run_redis(image="redis:4-alpine"):
    with run_container(image) as redis_container:
        host = container_ip(redis_container)
        assert wait_for(p(tcp_socket_open, host, 6379),
                        60), "service not listening on port"

        redis_client = redis.StrictRedis(host=host, port=6379, db=0)
        assert wait_for(redis_client.ping, 60), "service didn't start"

        yield [host, redis_client]
Exemple #8
0
def test_postgresql():
    with run_container("postgres:10", environment=ENV) as cont:
        host = container_ip(cont)
        config = CONFIG_TEMP.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 5432), 60), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin", "postgresql")
            ), "Didn't get postgresql datapoints"
            assert wait_for(p(has_datapoint_with_metric_name, backend, "pg_blks.toast_hit"))
def test_bad_globbing():
    with run_container("zookeeper:3.4") as zk_cont:
        assert wait_for(
            p(container_cmd_exit_0, zk_cont, "nc -z localhost 2181"), 5)
        create_znode(zk_cont, "/env", "prod")

        final_conf = BAD_GLOB_CONFIG.substitute(zk_endpoint="%s:2181" %
                                                container_ip(zk_cont))
        with run_agent(final_conf) as [_, get_output, _]:
            assert wait_for(
                lambda: "Zookeeper only supports globs" in get_output())
Exemple #10
0
def test_redis(image):
    with run_container(image) as test_container:
        host = container_ip(test_container)
        config = MONITOR_CONFIG.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 6379), 60), "service not listening on port"

        redis_client = redis.StrictRedis(host=host, port=6379, db=0)
        assert wait_for(redis_client.ping, 60), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "redis_info")), "didn't get datapoints"
Exemple #11
0
def test_basic_etcd2_config():
    with run_container(ETCD2_IMAGE, command=ETCD_COMMAND) as etcd:
        assert wait_for(p(container_cmd_exit_0, etcd, "/etcdctl ls"), 5), "etcd didn't start"
        create_path(etcd, "/env", "prod")
        create_path(etcd, "/monitors/cpu", "- type: collectd/cpu")
        create_path(etcd, "/monitors/signalfx-metadata", "- type: collectd/signalfx-metadata")

        final_conf = CONFIG.substitute(endpoint="%s:2379" % container_ip(etcd))
        with run_agent(final_conf) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin", "signalfx-metadata")
            ), "Datapoints didn't come through"
            assert wait_for(p(has_datapoint_with_dim, backend, "env", "prod")), "dimension wasn't set"
Exemple #12
0
def devstack():
    devstack_opts = dict(
        entrypoint="/lib/systemd/systemd",
        privileged=True,
        volumes={
            "/lib/modules": {"bind": "/lib/modules", "mode": "ro"},
            "/sys/fs/cgroup": {"bind": "/sys/fs/cgroup", "mode": "ro"},
        },
        environment={"container": "docker"},
    )
    with run_container("quay.io/signalfx/devstack:latest", **devstack_opts) as container:
        code, output = container.exec_run("start-devstack.sh")
        assert code == 0, "devstack failed to start:\n%s" % output.decode("utf-8")
        yield container
Exemple #13
0
def run_vault():
    with run_container("vault:1.0.2") as vault_cont:
        vault_ip = container_ip(vault_cont)
        assert wait_for(p(tcp_socket_open, vault_ip, 8200), 30)
        assert wait_for(lambda: "Root Token:" in vault_cont.logs().decode("utf-8"), 10)

        logs = vault_cont.logs()
        token = re.search(r"Root Token: (.*)$", logs.decode("utf-8"), re.MULTILINE).group(1)
        assert token, "Could not get root token of vault server"
        client = hvac.Client(url=f"http://{vault_ip}:8200", token=token)
        client.sys.enable_audit_device(
            device_type="file", options={"log_raw": True, "prefix": AUDIT_PREFIX, "file_path": "stdout"}
        )
        yield [client, lambda: parse_audit_events_from_logs(vault_cont)]
def test_marathon(marathon_image):
    with run_container("zookeeper:3.5") as zookeeper:
        zkhost = container_ip(zookeeper)
        assert wait_for(p(tcp_socket_open, zkhost, 2181), 60), "zookeeper didn't start"
        with run_container(
            marathon_image, command=["--master", "localhost:5050", "--zk", "zk://{0}:2181/marathon".format(zkhost)]
        ) as service_container:
            host = container_ip(service_container)
            config = dedent(
                f"""
                monitors:
                - type: collectd/marathon
                  host: {host}
                  port: 8080
                """
            )

            assert wait_for(p(tcp_socket_open, host, 8080), 120), "marathon not listening on port"
            assert wait_for(
                p(http_status, url="http://{0}:8080/v2/info".format(host), status=[200]), 120
            ), "service didn't start"

            with run_agent(config) as [backend, _, _]:
                assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "marathon")), "didn't get datapoints"
Exemple #15
0
def test_interior_globbing():
    with run_container(ETCD2_IMAGE, command=ETCD_COMMAND) as etcd:
        assert wait_for(p(container_cmd_exit_0, etcd, "/etcdctl ls"), 5), "etcd didn't start"
        create_path(etcd, "/env", "prod")
        create_path(etcd, "/services/cpu/monitor", "- type: collectd/cpu")
        create_path(etcd, "/services/signalfx/monitor", "- type: collectd/signalfx-metadata")

        final_conf = INTERNAL_GLOB_CONFIG.substitute(endpoint="%s:2379" % container_ip(etcd))
        with run_agent(final_conf) as [backend, _, _]:
            assert wait_for(
                p(has_event_with_dim, backend, "plugin", "signalfx-metadata")
            ), "Datapoints didn't come through"

            create_path(etcd, "/services/uptime/monitor", "- type: collectd/uptime")
            assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "uptime")), "didn't get uptime datapoints"
Exemple #16
0
def test_basic_zk_config():
    with run_container("zookeeper:3.4") as zk_cont:
        zkhost = container_ip(zk_cont)
        assert wait_for(p(tcp_socket_open, zkhost, 2181), 30)
        create_znode(zk_cont, "/env", "prod")
        create_znode(zk_cont, "/monitors", "")
        create_znode(zk_cont, "/monitors/cpu", "- type: collectd/cpu")
        create_znode(zk_cont, "/monitors/signalfx-metadata",
                     "- type: collectd/signalfx-metadata")

        final_conf = CONFIG.substitute(zk_endpoint="%s:2181" % zkhost)
        with run_agent(final_conf) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "signalfx-metadata"))
            assert wait_for(p(has_datapoint_with_dim, backend, "env", "prod"))
Exemple #17
0
def run_init_system_image(base_image, with_socat=True):
    image_id = build_base_image(base_image)
    print("Image ID: %s" % image_id)
    if with_socat:
        backend_ip = "127.0.0.1"
    else:
        backend_ip = get_host_ip()
    with fake_backend.start(ip_addr=backend_ip) as backend:
        container_options = {
            # Init systems running in the container want permissions
            "privileged": True,
            "volumes": {
                "/sys/fs/cgroup": {
                    "bind": "/sys/fs/cgroup",
                    "mode": "ro"
                },
                "/tmp/scratch": {
                    "bind": "/tmp/scratch",
                    "mode": "rw"
                },
            },
            "extra_hosts": {
                # Socat will be running on localhost to forward requests to
                # these hosts to the fake backend
                "ingest.signalfx.com": backend.ingest_host,
                "api.signalfx.com": backend.api_host,
            },
        }
        with run_container(image_id, wait_for_ip=True,
                           **container_options) as cont:
            if with_socat:
                # Proxy the backend calls through a fake HTTPS endpoint so that we
                # don't have to change the default configuration included by the
                # package.  The base_image used should trust the self-signed certs
                # included in the images dir so that the agent doesn't throw TLS
                # verification errors.
                with socat_https_proxy(cont, backend.ingest_host,
                                       backend.ingest_port,
                                       "ingest.signalfx.com",
                                       "127.0.0.1"), socat_https_proxy(
                                           cont, backend.api_host,
                                           backend.api_port,
                                           "api.signalfx.com", "127.0.0.2"):
                    yield [cont, backend]
            else:
                yield [cont, backend]
def test_basic_zk_config():
    with run_container("zookeeper:3.4") as zk_cont:
        assert wait_for(
            p(container_cmd_exit_0, zk_cont, "nc -z localhost 2181"), 5)
        create_znode(zk_cont, "/env", "prod")
        create_znode(zk_cont, "/monitors", "")
        create_znode(zk_cont, "/monitors/cpu", "- type: collectd/cpu")
        create_znode(zk_cont, "/monitors/signalfx-metadata",
                     "- type: collectd/signalfx-metadata")

        final_conf = CONFIG.substitute(zk_endpoint="%s:2181" %
                                       container_ip(zk_cont))
        with run_agent(final_conf) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "signalfx-metadata"))
            assert wait_for(p(has_datapoint_with_dim, backend, "env", "prod"))
Exemple #19
0
def test_mongo():
    with run_container("mongo:3.6") as mongo_cont:
        host = container_ip(mongo_cont)
        config = dedent(f"""
            monitors:
              - type: collectd/mongodb
                host: {host}
                port: 27017
                databases: [admin]
            """)
        assert wait_for(p(tcp_socket_open, host, 27017),
                        60), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "mongo")), "Didn't get mongo datapoints"
def test_hadoop(version):
    with run_service("hadoop",
                     buildargs={"HADOOP_VER": version},
                     hostname="hadoop-master") as hadoop_master:
        with run_container(hadoop_master.image,
                           hostname="hadoop-worker1") as hadoop_worker1:
            containers = {
                "hadoop-master": hadoop_master,
                "hadoop-worker1": hadoop_worker1
            }

            # distribute the ip and hostnames for each container
            distribute_hostnames(containers)

            # format hdfs
            print_lines(
                hadoop_master.exec_run(
                    ["/usr/local/hadoop/bin/hdfs", "namenode", "-format"])[1])

            # start hadoop and yarn
            print_lines(hadoop_master.exec_run("start-dfs.sh")[1])
            print_lines(hadoop_master.exec_run("start-yarn.sh")[1])

            # wait for yarn api to be available
            host = container_ip(hadoop_master)
            assert wait_for(p(tcp_socket_open, host, 8088),
                            60), "service not listening on port"
            assert wait_for(
                p(http_status,
                  url="http://{0}:8088".format(host),
                  status=[200]), 120), "service didn't start"

            # start the agent with hadoop config
            config = HADOOP_CONFIG.substitute(host=host, port=8088)
            with run_agent(config) as [backend, _, _]:
                assert wait_for(
                    p(has_datapoint_with_dim, backend, "plugin",
                      "apache_hadoop")), "Didn't get hadoop datapoints"
                assert wait_for(
                    p(has_datapoint, backend,
                      "gauge.hadoop.cluster.metrics.active_nodes", {},
                      1)), "expected 1 hadoop worker node"
Exemple #21
0
def test_cadvisor():
    cadvisor_opts = dict(
        volumes={
            "/": {
                "bind": "/rootfs",
                "mode": "ro"
            },
            "/var/run": {
                "bind": "/var/run",
                "mode": "ro"
            },
            "/sys": {
                "bind": "/sys",
                "mode": "ro"
            },
            "/var/lib/docker": {
                "bind": "/var/lib/docker",
                "mode": "ro"
            },
            "/dev/disk": {
                "bind": "/dev/disk",
                "mode": "ro"
            },
        })
    with run_container("google/cadvisor:latest",
                       **cadvisor_opts) as cadvisor_container:
        host = container_ip(cadvisor_container)
        config = dedent(f"""
            monitors:
              - type: cadvisor
                cadvisorURL: http://{host}:8080
        """)
        assert wait_for(p(tcp_socket_open, host, 8080),
                        60), "service didn't start"
        with run_agent(config) as [backend, _, _]:
            expected_metrics = get_monitor_metrics_from_selfdescribe(
                "cadvisor")
            assert wait_for(
                p(any_metric_found, backend,
                  expected_metrics)), "Didn't get cadvisor datapoints"
Exemple #22
0
def test_vault_renewable_secret_refresh():
    """
    Use the Mongo database secret engine to get renewable Mongo credentials to
    use in the Mongo collectd plugin.  Make sure the secret gets renewed as
    expected.
    """
    with run_container("mongo:3.6") as mongo_cont, run_vault() as [vault_client, get_audit_events]:
        assert wait_for(p(tcp_socket_open, container_ip(mongo_cont), 27017), 30), "mongo service didn't start"

        vault_client.sys.enable_secrets_engine(backend_type="database")

        vault_client.write(
            "database/config/my-mongodb-database",
            plugin_name="mongodb-database-plugin",
            allowed_roles="my-role",
            connection_url=f"mongodb://{container_ip(mongo_cont)}:27017/admin",
            username="******",
            password="",
        )

        vault_client.write(
            "database/roles/my-role",
            db_name="my-mongodb-database",
            creation_statements='{ "db": "admin", "roles": [{ "role": "readWrite" }, {"role": "read", "db": "foo"}] }',
            default_ttl="13s",
            max_ttl="24h",
        )

        with run_agent(
            dedent(
                f"""
            intervalSeconds: 1
            configSources:
              vault:
                vaultToken: {vault_client.token}
                vaultAddr: {vault_client.url}
            monitors:
             - type: collectd/mongodb
               host: {container_ip(mongo_cont)}
               port: 27017
               databases:
                - admin
               username: {{"#from": "vault:database/creds/my-role[username]"}}
               password: {{"#from": "vault:database/creds/my-role[password]"}}
               metricsToExclude:
                - metricName: "!gauge.objects"
        """
            )
        ) as [backend, _, _]:
            assert wait_for(p(has_datapoint, backend, dimensions={"plugin": "mongo"}))
            assert audit_read_paths(get_audit_events()) == ["database/creds/my-role"], "expected one read"

            time.sleep(10)
            assert audit_read_paths(get_audit_events()) == ["database/creds/my-role"], "expected still one read"

            renewals = audit_secret_renewals(get_audit_events())
            # The secret gets renewed immediately by the renewer and then again
            # within its lease duration period.
            assert len(renewals) == 2, "expected two renewal ops"
            for ren in renewals:
                assert "database/creds/my-role" in ren, "expected renewal of right secret"

            backend.datapoints.clear()
            assert wait_for(p(has_datapoint, backend, dimensions={"plugin": "mongo"})), "plugin lost access to mongo"