예제 #1
0
def run(config, metrics):
    with run_service("spark", command="bin/spark-class org.apache.spark.deploy.master.Master") as spark_master:
        master_ip = container_ip(spark_master)
        assert wait_for(p(tcp_socket_open, master_ip, 7077), 60), "master service didn't start"
        assert wait_for(p(tcp_socket_open, master_ip, 8080), 60), "master webui service didn't start"
        assert spark_master.exec_run("./sbin/start-history-server.sh").exit_code == 0, "history service didn't start"

        with run_service(
            "spark", command=f"bin/spark-class org.apache.spark.deploy.worker.Worker spark://{master_ip}:7077"
        ) as spark_worker:
            worker_ip = container_ip(spark_worker)
            assert wait_for(p(tcp_socket_open, worker_ip, 8081), 60), "worker webui service didn't start"

            spark_master.exec_run("nc -lk 9999", detach=True)
            spark_master.exec_run(
                f"bin/spark-submit --master spark://{master_ip}:7077 --conf spark.driver.host={master_ip} {SPARK_APP}",
                detach=True,
            )
            assert wait_for(p(tcp_socket_open, master_ip, 4040), 60), "application service didn't start"

            config = config.format(master_ip=master_ip, worker_ip=worker_ip)
            with Agent.run(config) as agent:
                verify(agent, metrics, timeout=60)
                assert has_datapoint_with_dim(
                    agent.fake_services, "plugin", "apache_spark"
                ), "Didn't get spark datapoints"
예제 #2
0
def test_postgresql_defaults():
    with run_container("postgres:10", environment=ENV) as cont:
        host = container_ip(cont)
        assert wait_for(p(tcp_socket_open, host, 5432),
                        60), "service didn't start"

        with Agent.run(f"""
                monitors:
                  - type: collectd/postgresql
                    host: {host}
                    port: 5432
                    username: "******"
                    password: "******"
                    queries:
                    - name: "exampleQuery"
                      minVersion: 60203
                      maxVersion: 200203
                      statement: |
                        SELECT coalesce(sum(n_live_tup), 0) AS live, coalesce(sum(n_dead_tup), 0)
                        AS dead FROM pg_stat_user_tables;
                      results:
                      - type: gauge
                        instancePrefix: live
                        valuesFrom:
                        - live
                    databases:
                    - name: test
                      username: "******"
                      password: "******"
                      interval: 5
                      expireDelay: 10
                      sslMode: disable
                """) as agent:
            verify(agent, METADATA.default_metrics)
예제 #3
0
def test_cgroup_monitor():
    with run_service("nginx",
                     cpu_period=100_000,
                     cpu_quota=10000,
                     cpu_shares=50,
                     mem_limit=20 * 1024 * 1024) as nginx_container:
        with Agent.run("""
    monitors:
      - type: cgroups
        extraMetrics: ['*']
    """) as agent:
            verify(agent, METADATA.all_metrics)

            expected_cgroup = "/docker/" + nginx_container.id

            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="cgroup.cpu_shares",
                    value=50,
                    dimensions={"cgroup": expected_cgroup},
                ))

            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="cgroup.cpu_cfs_period_us",
                    value=100_000,
                    dimensions={"cgroup": expected_cgroup},
                ))
예제 #4
0
def test_hadoop_default(version):
    """
    Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e.
    docker build \
        -t quay.io/signalfx/hadoop-test:<version> \
        --build-arg HADOOP_VER=<version> \
        <repo_root>/test-services/hadoop
    docker push quay.io/signalfx/hadoop-test:<version>
    """
    with run_container(
            "quay.io/signalfx/hadoop-test:%s" % version,
            hostname="hadoop-master") as hadoop_master, run_container(
                "quay.io/signalfx/hadoop-test:%s" % version,
                hostname="hadoop-worker1") as hadoop_worker1:
        host = start_hadoop(hadoop_master, hadoop_worker1)

        # start the agent with hadoop config
        config = f"""
            monitors:
              - type: collectd/hadoop
                host: {host}
                port: 8088
                verbose: true
            """
        with Agent.run(config) as agent:
            verify(agent, METADATA.default_metrics - EXCLUDED)
            # Need to run the agent manually because we want to wait for this metric to become 1 but it may
            # be 0 at first.
            assert wait_for(
                p(has_datapoint, agent.fake_services,
                  "gauge.hadoop.cluster.metrics.active_nodes", {},
                  1)), "expected 1 hadoop worker node"
            assert has_datapoint_with_dim(
                agent.fake_services, "plugin",
                "apache_hadoop"), "Didn't get hadoop datapoints"
예제 #5
0
def test_load_default():
    with Agent.run("""
        monitors:
        - type: collectd/load
        """) as agent:
        verify(agent, METADATA.default_metrics)
    assert not has_log_message(agent.output.lower(),
                               "error"), "error found in agent output!"
예제 #6
0
def test_netio_defaults():
    with Agent.run(
        """
    monitors:
      - type: net-io
    """
    ) as agent:
        verify(agent, METADATA.included_metrics)
        assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"
예제 #7
0
def run(config, metrics):
    with run_service("apache") as apache_container:
        host = container_ip(apache_container)
        config = config.format(host=host)
        assert wait_for(p(tcp_socket_open, host, 80), 60), "service didn't start"

        with Agent.run(config) as agent:
            verify(agent, metrics)
            assert has_datapoint_with_dim(agent.fake_services, "plugin", "apache"), "Didn't get apache datapoints"
예제 #8
0
def test_nginx_included():
    with run_nginx() as host, Agent.run(f"""
        monitors:
        - type: collectd/nginx
          host: {host}
          port: 80
        """) as agent:
        verify(agent, METADATA.included_metrics)
        assert has_datapoint_with_dim(agent.fake_services, "plugin",
                                      "nginx"), "Didn't get nginx datapoints"
예제 #9
0
def run(version, node_type, metrics, extra_metrics=""):
    with run_node(node_type, version) as (host, port):
        # start the agent with hadoopjmx config
        config = HADOOPJMX_CONFIG.format(host=host, port=port, nodeType=node_type, extraMetrics=extra_metrics)
        with Agent.run(config) as agent:
            verify(agent, metrics)
            # Check for expected dimension.
            assert has_datapoint_with_dim(
                agent.fake_services, "nodeType", node_type
            ), f"Didn't get hadoopjmx datapoints for nodeType {node_type}"
예제 #10
0
def test_haproxy_default_metrics_from_stats_page(version):
    with run_service("haproxy", buildargs={"HAPROXY_VERSION":
                                           version}) as service_container:
        host = container_ip(service_container)
        with Agent.run(f"""
           monitors:
           - type: haproxy
             url: http://{host}:8080/stats?stats;csv
           """) as agent:
            verify(agent, EXPECTED_DEFAULTS - EXPECTED_DEFAULTS_FROM_SOCKET,
                   10)
예제 #11
0
def test_kong_default(kong_version):
    with run_kong(kong_version) as kong_ip:
        config = f"""
        monitors:
        - type: collectd/kong
          host: {kong_ip}
          port: 8001
        """

        with Agent.run(config) as agent:
            verify(agent, METADATA.default_metrics)
            assert has_datapoint_with_dim(agent.fake_services, "plugin", "kong"), "Didn't get Kong dimension"
예제 #12
0
def test_kubernetes_scheduler(k8s_cluster):
    config = """
        observers:
        - type: k8s-api

        monitors:
        - type: kubernetes-scheduler
          discoveryRule: kubernetes_pod_name =~ "kube-scheduler"
          port: 10251
          extraMetrics: ["*"]
     """
    with k8s_cluster.run_agent(config) as agent:
        verify(agent, METADATA.all_metrics)
예제 #13
0
def test_supervisor_default():
    with run_supervisor_fpm() as host, Agent.run(
        f"""
        monitors:
        - type: supervisor
          host: {host}
          port: {PORT}
        """
    ) as agent:
        verify(agent, METADATA.default_metrics)
        assert has_datapoint_with_dim(
            agent.fake_services, "name", PROCESS
        ), "Didn't get process name dimension {}".format(PROCESS)
예제 #14
0
def test_haproxy_default_metrics_from_stats_page_by_discovery_rule(version):
    with run_service("haproxy",
                     buildargs={"HAPROXY_VERSION": version},
                     name="haproxy"):
        with Agent.run(f"""
           observers:
           - type: docker
           monitors:
           - type: haproxy
             discoveryRule: 'container_name == "haproxy"'
           """) as agent:
            verify(agent, EXPECTED_DEFAULTS - EXPECTED_DEFAULTS_FROM_SOCKET,
                   10)
예제 #15
0
def test_php_default():
    with run_php_fpm() as host, Agent.run(f"""
        monitors:
        - type: collectd/php-fpm
          url: "http://{host}/status?json"
          name: {INSTANCE}
        """) as agent:
        verify(agent, METADATA.default_metrics)
        assert has_datapoint_with_dim(
            agent.fake_services, "plugin",
            "curl_json"), "Didn't get php-fpm datapoints"
        assert has_datapoint_with_dim(
            agent.fake_services, "plugin_instance",
            INSTANCE), "Didn't get right instance dimension on datapoints"
예제 #16
0
def test_process_monitor_process_name_filter():
    proc = psutil.Process(os.getpid())

    self_proc_name = proc.name()

    with Agent.run(f"""
monitors:
  - type: process
    processes:
     - {self_proc_name}
""") as agent:
        verify(agent, METADATA.all_metrics)
        assert has_datapoint(agent.fake_services,
                             dimensions={"command": self_proc_name})
예제 #17
0
def test_process_monitor_executable_filter():
    proc = psutil.Process(os.getpid())

    self_proc_exec = proc.exe()

    with Agent.run(f"""
monitors:
  - type: process
    executables:
     - {self_proc_exec}
""") as agent:
        verify(agent, METADATA.all_metrics)
        assert has_datapoint(agent.fake_services,
                             dimensions={"executable": self_proc_exec})
예제 #18
0
def test_consul_defaults():
    with run_container("consul:1.4.4") as consul_cont:
        host = container_ip(consul_cont)
        assert wait_for(p(tcp_socket_open, host, 8500),
                        60), "consul service didn't start"

        with Agent.run(f"""
         monitors:
           - type: collectd/consul
             host: {host}
             port: 8500
             enhancedMetrics: false
         """) as agent:
            verify(agent, EXPECTED_DEFAULTS)
예제 #19
0
def test_memory():
    expected_metrics = {"memory.used", "memory.utilization"}
    if sys.platform == "linux":
        expected_metrics.update({
            "memory.buffered", "memory.cached", "memory.free",
            "memory.slab_recl", "memory.slab_unrecl"
        })
    with Agent.run("""
        monitors:
          - type: memory
        """) as agent:
        for met in expected_metrics:
            assert met in METADATA.default_metrics

        verify(agent, expected_metrics)
예제 #20
0
def test_kong_metric_config():
    """Test turning on metric config flag allows through filter"""
    with run_kong(LATEST) as kong_ip:
        config = f"""
        monitors:
        - type: collectd/kong
          host: {kong_ip}
          port: 8001
          metrics:
          - metric: connections_accepted
            report: true
        """
        with Agent.run(config) as agent:
            verify(agent, METADATA.default_metrics | {"counter.kong.connections.accepted"})
            assert has_datapoint_with_dim(agent.fake_services, "plugin", "kong"), "Didn't get Kong dimension"
예제 #21
0
def test_mongo_basic():
    with run_container("mongo:3.6") as mongo_cont:
        host = container_ip(mongo_cont)
        config = dedent(f"""
            monitors:
              - type: collectd/mongodb
                host: {host}
                port: 27017
                databases: [admin]
            """)
        assert wait_for(p(tcp_socket_open, host, 27017),
                        60), "service didn't start"

        with Agent.run(config) as agent:
            verify(agent, EXPECTED_DEFAULTS)
예제 #22
0
def test_haproxy_default_and_status_metrics_from_stats_page(version):
    with run_service("haproxy", buildargs={"HAPROXY_VERSION":
                                           version}) as service_container:
        host = container_ip(service_container)
        status_metric = "haproxy_status"
        with Agent.run(f"""
           monitors:
           - type: haproxy
             url: http://{host}:8080/stats?stats;csv
             extraMetrics: [{status_metric}]
           """) as agent:
            verify(agent, (EXPECTED_DEFAULTS | {status_metric}) -
                   EXPECTED_DEFAULTS_FROM_SOCKET, 10)
            assert not has_log_message(agent.output.lower(),
                                       "error"), "error found in agent output!"
예제 #23
0
def test_kong_extra_metric():
    """Test adding extra metric enables underlying config metric"""
    # counter.kong.connections.handled chosen because it's not reported by default by the monitor
    # and is not a default metric.
    with run_kong(LATEST) as kong_ip:
        config = f"""
        monitors:
        - type: collectd/kong
          host: {kong_ip}
          port: 8001
          extraMetrics:
          - counter.kong.connections.handled
        """

        with Agent.run(config) as agent:
            verify(agent, METADATA.default_metrics | {"counter.kong.connections.handled"})
            assert has_datapoint_with_dim(agent.fake_services, "plugin", "kong"), "Didn't get Kong dimension"
예제 #24
0
def test_haproxy_basic(version):
    with run_service("haproxy", buildargs={"HAPROXY_VERSION": version}) as service_container:
        host = container_ip(service_container)
        assert wait_for(p(tcp_socket_open, host, 9000)), "haproxy not listening on port"

        with Agent.run(
            f"""
           monitors:
           - type: collectd/haproxy
             host: {host}
             port: 9000
             enhancedMetrics: false
           """
        ) as agent:
            requests.get(f"http://{host}:80", timeout=5)
            requests.get(f"http://{host}:80", timeout=5)
            verify(agent, EXPECTED_DEFAULTS, 10)
예제 #25
0
def test_couchbase_included(tag):
    with run_couchbase(tag) as host, Agent.run(f"""
        monitors:
          - type: collectd/couchbase
            host: {host}
            port: 8091
            collectTarget: NODE
            username: administrator
            password: password
        """) as agent:
        verify(
            agent,
            (METADATA.metrics_by_group["nodes"] & METADATA.included_metrics) -
            EXCLUDED)
        assert has_datapoint_with_dim(
            agent.fake_services, "plugin",
            "couchbase"), "Didn't get couchbase datapoints"
예제 #26
0
def run_all(version, metrics, extra_metrics=""):
    with run_kafka(version) as kafka:
        kafka_ip = container_ip(kafka)
        kafka_host = container_hostname(kafka)

        image = kafka.image.id

        # We add the Kafka broker host:ip as an extra_host because by default the Kafka broker advertises itself with
        # its hostname and without this the producer and consumer wouldn't be able to resolve the broker hostname.
        with run_producer(image,
                          kafka_host,
                          extra_hosts={
                              kafka_host: kafka_ip
                          }) as kafkaproducerhost, run_consumer(
                              image,
                              kafka_host,
                              extra_hosts={
                                  kafka_host: kafka_ip
                              }) as kafkaconsumerhost, Agent.run(f"""
            monitors:
             - type: collectd/kafka
               host: {kafka_ip}
               port: 7099
               clusterName: testCluster
               extraMetrics: {extra_metrics}
             - type: collectd/kafka_producer
               host: {kafkaproducerhost}
               port: 8099
               extraMetrics: {extra_metrics}
             - type: collectd/kafka_consumer
               host: {kafkaconsumerhost}
               port: 9099
               extraMetrics: {extra_metrics}
            """) as agent:
            verify(agent, metrics)
            assert has_datapoint_with_dim(
                agent.fake_services, "cluster", "testCluster"
            ), "Didn't get cluster dimension from kafka datapoints"
            assert has_datapoint_with_dim(
                agent.fake_services, "client-id", "console-producer"
            ), "Didn't get client-id dimension from kafka_producer datapoints"
            assert has_datapoint_with_dim(
                agent.fake_services, "client-id", "consumer-1"
            ), "Didn't get client-id dimension from kafka_consumer datapoints"
예제 #27
0
def test_extra_metrics_passthrough():
    """
    The specified extraMetrics should be allowed through even though they are
    not included by default.
    """
    metadata = Metadata.from_package("expvar")

    with run_expvar() as expvar_container_ip:
        with Agent.run(f"""
               monitors:
                 - type: expvar
                   host: {expvar_container_ip}
                   port: 8080
                   intervalSeconds: 1
                   extraMetrics:
                    - memstats.by_size.mallocs
               """) as agent:
            assert "memstats.by_size.mallocs" in metadata.nonincluded_metrics
            verify(agent,
                   metadata.included_metrics | {"memstats.by_size.mallocs"})
예제 #28
0
def test_built_in_filtering_disabled_no_whitelist_for_monitor():
    """
    Test a monitor that doesn't have any entries in whitelist.json
    """
    metadata = Metadata.from_package("expvar")

    with run_expvar() as expvar_container_ip:
        with Agent.run(f"""
               enableBuiltInFiltering: false
               monitors:
                 - type: expvar
                   host: {expvar_container_ip}
                   port: 8080
                   intervalSeconds: 1
                   enhancedMetrics: true
                   # This should be ignored
                   extraMetrics:
                    - memstats.by_size.mallocs
               metricsToExclude:
                - {{"#from": "{REPO_ROOT_DIR}/whitelist.json", flatten: true}}
               """) as agent:
            verify(agent, metadata.all_metrics)
예제 #29
0
def test_mongo_enhanced_metrics():
    with run_container("mongo:3.6") as mongo_cont:
        host = container_ip(mongo_cont)
        config = dedent(f"""
            monitors:
              - type: collectd/mongodb
                host: {host}
                port: 27017
                databases: [admin]
                sendCollectionMetrics: true
                sendCollectionTopMetrics: true
            """)
        assert wait_for(p(tcp_socket_open, host, 27017),
                        60), "service didn't start"

        with Agent.run(config) as agent:
            verify(
                agent,
                METADATA.metrics_by_group["collection"]
                | METADATA.metrics_by_group["collection-top"]
                | EXPECTED_DEFAULTS,
            )
def test_elasticsearch_included():
    with run_elasticsearch(
            environment={"cluster.name": "testCluster"}) as es_container:
        host = container_ip(es_container)
        config = f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
            """
        with Agent.run(config) as agent:
            verify(agent, METADATA.default_metrics - EXCLUDED)
            assert has_datapoint_with_dim(
                agent.fake_services, "plugin",
                "elasticsearch"), "Didn't get elasticsearch datapoints"
            assert has_datapoint_with_dim(
                agent.fake_services, "plugin_instance",
                "testCluster"), "Cluster name not picked from read callback"
            assert not has_log_message(agent.output.lower(),
                                       "error"), "error found in agent output!"