Beispiel #1
0
def run_kafka(version):
    """
    Runs a kafka container with zookeeper
    """
    with run_container("zookeeper:3.5") as zookeeper:
        zkhost = container_ip(zookeeper)
        assert wait_for(p(tcp_socket_open, zkhost, 2181),
                        60), "zookeeper didn't start"
        with run_service(
                "kafka",
                environment={
                    "JMX_PORT": "7099",
                    "KAFKA_ZOOKEEPER_CONNECT": "%s:2181" % (zkhost, ),
                    "START_AS": "broker"
                },
                buildargs={"KAFKA_VERSION": version},
        ) as kafka_container:
            run_service(
                "kafka",
                environment={
                    "START_AS": "create-topic",
                    "KAFKA_ZOOKEEPER_CONNECT": "%s:2181" % (zkhost, )
                },
                buildargs={"KAFKA_VERSION": version},
            )
            yield kafka_container
Beispiel #2
0
def test_all_kafka_monitors(version):
    with run_kafka(version) as kafka:
        kafkahost = container_ip(kafka)
        with run_service(
                "kafka",
                environment={
                    "JMX_PORT": "8099",
                    "START_AS": "producer",
                    "KAFKA_BROKER": "%s:9092" % (kafkahost, )
                },
                buildargs={"KAFKA_VERSION": version},
        ) as kafka_producer:
            kafkaproducerhost = container_ip(kafka_producer)
            with run_service(
                    "kafka",
                    environment={
                        "JMX_PORT": "9099",
                        "START_AS": "consumer",
                        "KAFKA_BROKER": "%s:9092" % (kafkahost, )
                    },
                    buildargs={"KAFKA_VERSION": version},
            ) as kafka_consumer:
                kafkaconsumerhost = container_ip(kafka_consumer)
                with run_agent(
                        textwrap.dedent("""
                monitors:
                 - type: collectd/kafka
                   host: {0}
                   port: 7099
                   clusterName: testCluster
                 - type: collectd/kafka_producer
                   host: {1}
                   port: 8099
                 - type: collectd/kafka_consumer
                   host: {2}
                   port: 9099
                """.format(kafkahost, kafkaproducerhost,
                           kafkaconsumerhost))) as [backend, _, _]:
                    assert wait_for(
                        p(has_datapoint_with_metric_name, backend,
                          "gauge.kafka-active-controllers"),
                        timeout_seconds=60), "Didn't get kafka datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, backend, "cluster",
                          "testCluster"),
                        timeout_seconds=60
                    ), "Didn't get cluster dimension from kafka datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, backend, "client-id",
                          "console-producer"),
                        timeout_seconds=60
                    ), "Didn't get client-id dimension from kafka_producer datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, backend, "client-id",
                          "consumer-1"),
                        timeout_seconds=60
                    ), "Didn't get client-id dimension from kafka_consumer datapoints"
Beispiel #3
0
def test_docker_observer_labels_partial():
    """
    Test that docker observer picks up a partially configured endpoint from
    container labels
    """
    with run_agent(
            dedent("""
        observers:
          - type: docker
        monitors:
          - type: collectd/nginx
            discoveryRule: container_name =~ "nginx-disco-partial" && port == 80
    """)) as [backend, _, _]:
        with run_service(
                "nginx",
                name="nginx-disco-partial",
                labels={
                    "agent.signalfx.com.config.80.extraDimensions":
                    "{mydim: myvalue}"
                },
        ):
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "nginx")), "Didn't get nginx datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, backend, "mydim",
                  "myvalue")), "Didn't get extra dimension"
        # Let nginx be removed by docker observer and collectd restart
        time.sleep(5)
        backend.datapoints.clear()
        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "container_name",
                                               "nginx-disco-partial"), 10)
Beispiel #4
0
def test_elasticsearch_without_cluster():
    # start the ES container without the service
    with run_service("elasticsearch/6.4.2",
                     environment={"cluster.name": "testCluster"},
                     entrypoint="sleep inf") as es_container:
        host = container_ip(es_container)
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
            """)
        with run_agent(config) as [backend, _, _]:
            assert not wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "elasticsearch")), "datapoints found without service"
            # start ES service and make sure it gets discovered
            es_container.exec_run(
                "/usr/local/bin/docker-entrypoint.sh eswrapper", detach=True)
            assert wait_for(
                p(http_status,
                  url=f"http://{host}:9200/_nodes/_local",
                  status=[200]), 180), "service didn't start"
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
Beispiel #5
0
def test_elasticsearch_with_cluster_option():
    with run_service("elasticsearch/6.4.2",
                     environment={"cluster.name":
                                  "testCluster"}) as es_container:
        host = container_ip(es_container)
        assert wait_for(
            p(http_status,
              url=f"http://{host}:9200/_nodes/_local",
              status=[200]), 180), "service didn't start"
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
              cluster: testCluster1
            """)
        with run_agent(config) as [backend, get_output, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin_instance",
                  "testCluster1")
            ), "Cluster name not picked from read callback"
            # make sure all plugin_instance dimensions were overridden by the cluster option
            assert not wait_for(
                p(has_datapoint_with_dim, backend, "plugin_instance",
                  "testCluster"), 10
            ), "plugin_instance dimension not overridden by cluster option"
            assert not has_log_message(get_output().lower(),
                                       "error"), "error found in agent output!"
Beispiel #6
0
def test_elasticsearch_with_additional_metrics():
    with run_service("elasticsearch/6.2.0",
                     environment={"cluster.name":
                                  "testCluster"}) as es_container:
        host = container_ip(es_container)
        assert wait_for(
            p(http_status,
              url=f"http://{host}:9200/_nodes/_local",
              status=[200]), 180), "service didn't start"
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
              additionalMetrics:
               - cluster.initializing-shards
               - thread_pool.threads
            """)
        with run_agent(config) as [backend, get_output, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
            assert wait_for(
                p(has_datapoint_with_metric_name, backend,
                  "gauge.cluster.initializing-shards")
            ), "Didn't get gauge.cluster.initializing-shards metric"
            assert wait_for(
                p(has_datapoint_with_metric_name, backend,
                  "gauge.thread_pool.threads")
            ), "Didn't get gauge.thread_pool.threads metric"
            assert not has_log_message(get_output().lower(),
                                       "error"), "error found in agent output!"
Beispiel #7
0
def test_elasticsearch_with_threadpool():
    with run_service("elasticsearch/6.2.0",
                     environment={"cluster.name":
                                  "testCluster"}) as es_container:
        host = container_ip(es_container)
        assert wait_for(
            p(http_status,
              url=f"http://{host}:9200/_nodes/_local",
              status=[200]), 180), "service didn't start"
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
              threadPools:
               - bulk
               - index
               - search
            """)
        with run_agent(config) as [backend, get_output, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, backend, "thread_pool",
                  "bulk")), "Didn't get bulk thread pool metrics"
            assert not has_log_message(get_output().lower(),
                                       "error"), "error found in agent output!"
Beispiel #8
0
def test_jenkins(version):
    with run_service("jenkins",
                     buildargs={
                         "JENKINS_VERSION": version,
                         "JENKINS_PORT": "8080"
                     }) as jenkins_container:
        host = container_ip(jenkins_container)
        config = dedent(f"""
            monitors:
              - type: collectd/jenkins
                host: {host}
                port: 8080
                metricsKey: {METRICS_KEY}
            """)
        assert wait_for(p(tcp_socket_open, host, 8080),
                        60), "service not listening on port"
        assert wait_for(
            p(http_status,
              url=f"http://{host}:8080/metrics/{METRICS_KEY}/ping/",
              status=[200]), 120), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "jenkins")), "Didn't get jenkins datapoints"
Beispiel #9
0
def test_docker_observer_labels():
    """
    Test that docker observer picks up a fully configured endpoint from
    container labels
    """
    with run_agent(
            dedent("""
        observers:
          - type: docker
    """)) as [backend, _, _]:
        with run_service(
                "nginx",
                name="nginx-disco-full",
                labels={
                    "agent.signalfx.com.monitorType.80": "collectd/nginx",
                    "agent.signalfx.com.config.80.intervalSeconds": "1",
                },
        ):
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "nginx")), "Didn't get nginx datapoints"
        # Let nginx be removed by docker observer and collectd restart
        time.sleep(5)
        backend.datapoints.clear()
        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "container_name",
                                               "nginx-disco-full"), 10)
Beispiel #10
0
def test_health_checker_tcp():
    with run_service("nginx") as nginx_container:
        host = container_ip(nginx_container)
        assert wait_for(p(tcp_socket_open, host, 80),
                        60), "service didn't start"

        with run_agent(CONFIG.substitute(host=host)) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "health_checker")), "Didn't get health_checker datapoints"
Beispiel #11
0
def test_haproxy(version):
    with run_service("haproxy", buildargs={"HAPROXY_VERSION":
                                           version}) as service_container:
        host = container_ip(service_container)
        config = MONITOR_CONFIG.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 9000),
                        120), "haproxy not listening on port"
        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "haproxy")), "didn't get datapoints"
Beispiel #12
0
def test_apache():
    with run_service("apache") as apache_container:
        host = container_ip(apache_container)
        config = APACHE_CONFIG.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 80),
                        60), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "apache")), "Didn't get apache datapoints"
Beispiel #13
0
def test_nginx():
    with run_service("nginx") as nginx_container:
        host = container_ip(nginx_container)
        config = NGINX_CONFIG.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 80),
                        60), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "nginx")), "Didn't get nginx datapoints"
Beispiel #14
0
def test_docker_observer_use_host_bindings():
    with run_service("nginx",
                     name="nginx-non-host-binding",
                     labels={"mylabel": "non-host-binding"}):
        with run_service(
                "nginx",
                name="nginx-with-host-binding",
                labels={"mylabel": "with-host-binding"},
                ports={"80/tcp": ("127.0.0.1", 0)},
        ) as container_bind:
            with run_agent(
                    HOST_BINDING_CONFIG.substitute(
                        port=container_bind.attrs["NetworkSettings"]["Ports"]
                        ["80/tcp"][0]["HostPort"])) as [backend, _, _]:
                assert not wait_for(
                    p(has_datapoint_with_dim, backend, "mydim",
                      "non-host-binding")), "Didn't get custom label dimension"
                assert wait_for(
                    p(has_datapoint_with_dim, backend, "mydim",
                      "with-host-binding")
                ), "Didn't get custom label dimension"
Beispiel #15
0
def test_basic_service_discovery():
    with run_agent(CONFIG) as [backend, get_output, _]:
        with run_service("nginx", name="nginx-discovery"):
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "nginx")), "Didn't get nginx datapoints"
        # Let nginx be removed by docker observer and collectd restart
        time.sleep(5)
        backend.datapoints.clear()
        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "plugin", "nginx"), 10)
        assert not has_log_message(get_output(), "error")
def test_docker_detects_new_containers():
    with run_agent(
        """
    monitors:
      - type: docker-container-stats

    """
    ) as [backend, _, _]:
        time.sleep(5)
        with run_service("nginx") as nginx_container:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "container_id", nginx_container.id)
            ), "Didn't get nginx datapoints"
def test_docker_image_filtering():
    with run_service("nginx") as nginx_container:
        with run_agent(
            """
    monitors:
      - type: docker-container-stats
        excludedImages:
         - "%s"

    """
            % nginx_container.attrs["Image"]
        ) as [backend, _, _]:
            assert ensure_always(lambda: not has_datapoint_with_dim(backend, "container_id", nginx_container.id))
def test_docker_envvar_dimensions():
    with run_service("nginx", environment={"APP": "myserver"}):
        with run_agent(
            """
    monitors:
      - type: docker-container-stats
        envToDimensions:
          APP: app

    """
        ) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "app", "myserver")
            ), "Didn't get datapoint with service app"
def test_docker_label_dimensions():
    with run_service("nginx", labels={"app": "myserver"}):
        with run_agent(
            """
    monitors:
      - type: docker-container-stats
        labelsToDimensions:
          app: service

    """
        ) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "service", "myserver")
            ), "Didn't get datapoint with service dim"
def test_cassandra():
    with run_service("cassandra") as cassandra_cont:
        config = CASSANDRA_CONFIG.substitute(host=container_ip(cassandra_cont))

        # Wait for the JMX port to be open in the container
        assert wait_for(
            p(container_cmd_exit_0, cassandra_cont,
              "sh -c 'cat /proc/net/tcp | grep 1C1F'")
        ), "Cassandra JMX didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_metric_name, backend,
                  "counter.cassandra.ClientRequest.Read.Latency.Count"),
                30), "Didn't get Cassandra datapoints"
Beispiel #21
0
def test_docker_observer():
    with run_agent(CONFIG) as [backend, _, _]:
        with run_service("nginx",
                         name="nginx-discovery",
                         labels={"mylabel": "abc"}):
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "nginx")), "Didn't get nginx datapoints"
            assert wait_for(p(has_datapoint_with_dim, backend, "mydim",
                              "abc")), "Didn't get custom label dimension"
        # Let nginx be removed by docker observer and collectd restart
        time.sleep(5)
        backend.datapoints.clear()
        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "container_name",
                                               "nginx-discovery"), 10)
def test_docker_stops_watching_paused_containers():
    with run_service("nginx") as nginx_container:
        with run_agent(
            """
        monitors:
          - type: docker-container-stats

        """
        ) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "container_id", nginx_container.id)
            ), "Didn't get nginx datapoints"
            nginx_container.pause()
            time.sleep(5)
            backend.datapoints.clear()
            assert ensure_always(lambda: not has_datapoint_with_dim(backend, "container_id", nginx_container.id))
def test_couchbase(tag):
    with run_service("couchbase",
                     buildargs={"COUCHBASE_VERSION": tag},
                     hostname="node1.cluster") as couchbase_container:
        host = container_ip(couchbase_container)
        config = COUCHBASE_CONFIG.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 8091),
                        60), "service not listening on port"
        assert wait_for(
            p(http_status,
              url="http://{0}:8091/pools".format(host),
              status=[401]), 120), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "couchbase")), "Didn't get couchbase datapoints"
Beispiel #24
0
def test_health_checker_http():
    with run_service("nginx") as nginx_container:
        host = container_ip(nginx_container)
        assert wait_for(p(tcp_socket_open, host, 80),
                        60), "service didn't start"

        with run_agent(
                string.Template(
                    dedent("""
        monitors:
          - type: collectd/health-checker
            host: $host
            port: 80
            path: /nonexistent
        """)).substitute(host=host)) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "health_checker")), "Didn't get health_checker datapoints"
def test_docker_container_stats():
    with run_service("nginx") as nginx_container:
        with run_agent(
            """
    monitors:
      - type: docker-container-stats

    """
        ) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_metric_name, backend, "cpu.percent")
            ), "Didn't get docker cpu datapoints"
            assert wait_for(
                p(has_datapoint_with_metric_name, backend, "memory.percent")
            ), "Didn't get docker memory datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, backend, "container_id", nginx_container.id)
            ), "Didn't get nginx datapoints"
Beispiel #26
0
def test_solr_monitor():
    with run_service("solr") as solr_container:
        host = container_ip(solr_container)
        config = dedent(f"""
        monitors:
        - type: collectd/solr
          host: {host}
          port: 8983
        """)
        assert wait_for(p(tcp_socket_open, host, 8983),
                        60), "service not listening on port"
        with run_agent(config) as [backend, get_output, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "solr")), "Didn't get solr datapoints"
            assert ensure_always(lambda: has_datapoint_with_metric_name(
                backend, "counter.solr.http_5xx_responses"))
            assert not has_log_message(get_output().lower(),
                                       "error"), "error found in agent output!"
def test_activemq():
    with run_service("activemq") as activemq_container:
        host = container_ip(activemq_container)
        config = dedent(f"""
            monitors:
              - type: collectd/activemq
                host: {host}
                port: 1099
                serviceURL: service:jmx:rmi:///jndi/rmi://{host}:1099/jmxrmi
                username: testuser
                password: testing123
        """)
        assert wait_for(p(tcp_socket_open, host, 1099),
                        60), "service didn't start"
        with run_agent(config) as [backend, _, _]:
            metrics = get_monitor_metrics_from_selfdescribe(
                "collectd/activemq")
            assert wait_for(p(any_metric_found, backend,
                              metrics)), "Didn't get activemq datapoints"
def test_hadoop(version):
    with run_service("hadoop",
                     buildargs={"HADOOP_VER": version},
                     hostname="hadoop-master") as hadoop_master:
        with run_container(hadoop_master.image,
                           hostname="hadoop-worker1") as hadoop_worker1:
            containers = {
                "hadoop-master": hadoop_master,
                "hadoop-worker1": hadoop_worker1
            }

            # distribute the ip and hostnames for each container
            distribute_hostnames(containers)

            # format hdfs
            print_lines(
                hadoop_master.exec_run(
                    ["/usr/local/hadoop/bin/hdfs", "namenode", "-format"])[1])

            # start hadoop and yarn
            print_lines(hadoop_master.exec_run("start-dfs.sh")[1])
            print_lines(hadoop_master.exec_run("start-yarn.sh")[1])

            # wait for yarn api to be available
            host = container_ip(hadoop_master)
            assert wait_for(p(tcp_socket_open, host, 8088),
                            60), "service not listening on port"
            assert wait_for(
                p(http_status,
                  url="http://{0}:8088".format(host),
                  status=[200]), 120), "service didn't start"

            # start the agent with hadoop config
            config = HADOOP_CONFIG.substitute(host=host, port=8088)
            with run_agent(config) as [backend, _, _]:
                assert wait_for(
                    p(has_datapoint_with_dim, backend, "plugin",
                      "apache_hadoop")), "Didn't get hadoop datapoints"
                assert wait_for(
                    p(has_datapoint, backend,
                      "gauge.hadoop.cluster.metrics.active_nodes", {},
                      1)), "expected 1 hadoop worker node"
Beispiel #29
0
def test_docker_observer_labels_multiple_monitors_per_port():
    """
    Test that we can configure multiple monitors per port using labels
    """
    with run_agent(
            dedent("""
        observers:
          - type: docker
    """)) as [backend, _, _]:
        with run_service(
                "nginx",
                name="nginx-multi-monitors",
                labels={
                    "agent.signalfx.com.monitorType.80":
                    "collectd/nginx",
                    "agent.signalfx.com.config.80.intervalSeconds":
                    "1",
                    "agent.signalfx.com.config.80.extraDimensions":
                    "{app: nginx}",
                    "agent.signalfx.com.monitorType.80-nginx2":
                    "collectd/nginx",
                    "agent.signalfx.com.config.80-nginx2.intervalSeconds":
                    "1",
                    "agent.signalfx.com.config.80-nginx2.extraDimensions":
                    "{app: other}",
                },
        ):
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "nginx")), "Didn't get nginx datapoints"
            assert wait_for(p(has_datapoint_with_dim, backend, "app",
                              "nginx")), "Didn't get extra dims"
            assert wait_for(p(has_datapoint_with_dim, backend, "app",
                              "other")), "Didn't get extra dims"
        # Let nginx be removed by docker observer and collectd restart
        time.sleep(5)
        backend.datapoints.clear()
        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "container_name",
                                               "nginx-multi-monitors"), 10)