Exemple #1
0
def test_mongo():
    with run_container("mongo:3.6") as mongo_cont:
        host = container_ip(mongo_cont)
        config = dedent(
            f"""
            monitors:
              - type: collectd/mongodb
                host: {host}
                port: 27017
                databases: [admin]
            """
        )
        assert wait_for(p(tcp_socket_open, host, 27017), 60), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "mongo")), "Didn't get mongo datapoints"
Exemple #2
0
def test_chrony():
    """
    Unfortunately, chronyd is very hard to run in a test environment without
    giving it the ability to change the time which we don't want, so just check
    for an error message ensuring that the monitor actually did configure it,
    even if it doesn't emit any metrics.
    """
    with run_agent(chrony_config) as [_, get_output, _]:

        def has_error():
            return has_log_message(
                get_output(),
                level="error",
                message="chrony plugin: chrony_query (REQ_TRACKING) failed")

        assert wait_for(has_error), "Didn't get chrony error message"
def test_docker_observer():
    with run_agent(CONFIG) as [backend, _, _]:
        with run_service("nginx",
                         name="nginx-discovery",
                         labels={"mylabel": "abc"}):
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "nginx")), "Didn't get nginx datapoints"
            assert wait_for(p(has_datapoint_with_dim, backend, "mydim",
                              "abc")), "Didn't get custom label dimension"
        # Let nginx be removed by docker observer and collectd restart
        time.sleep(5)
        backend.reset_datapoints()
        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "container_name",
                                               "nginx-discovery"), 10)
Exemple #4
0
def test_basic_etcd2_config():
    with run_container(ETCD2_IMAGE, command=ETCD_COMMAND) as etcd:
        assert wait_for(p(container_cmd_exit_0, etcd, "/etcdctl ls"),
                        5), "etcd didn't start"
        create_path(etcd, "/env", "prod")
        create_path(etcd, "/monitors/cpu", "- type: collectd/cpu")
        create_path(etcd, "/monitors/signalfx-metadata",
                    "- type: collectd/signalfx-metadata")

        final_conf = CONFIG.substitute(endpoint="%s:2379" % container_ip(etcd))
        with run_agent(final_conf) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "signalfx-metadata")), "Datapoints didn't come through"
            assert wait_for(p(has_datapoint_with_dim, backend, "env",
                              "prod")), "dimension wasn't set"
def test_protocols():
    """
    Test that we get any datapoints without any errors
    """
    expected_metrics = get_monitor_metrics_from_selfdescribe("collectd/protocols")
    expected_dims = get_monitor_dims_from_selfdescribe("collectd/protocols")
    with run_agent(
        """
    monitors:
      - type: collectd/protocols
    """
    ) as [backend, get_output, _]:
        assert wait_for(
            p(has_any_metric_or_dim, backend, expected_metrics, expected_dims), timeout_seconds=60
        ), "timed out waiting for metrics and/or dimensions!"
        assert not has_log_message(get_output().lower(), "error"), "error found in agent output!"
Exemple #6
0
def test_docker_container_stats():
    with run_service("nginx") as nginx_container:
        with run_agent("""
    monitors:
      - type: docker-container-stats

    """) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_metric_name, backend,
                  "cpu.percent")), "Didn't get docker cpu datapoints"
            assert wait_for(
                p(has_datapoint_with_metric_name, backend,
                  "memory.percent")), "Didn't get docker memory datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, backend, "container_id",
                  nginx_container.id)), "Didn't get nginx datapoints"
def test_does_not_set_hostname_if_not_host_specific():
    with run_agent("""
hostname: acmeinc.com
disableHostDimensions: true
monitors:
  - type: collectd/signalfx-metadata
    persistencePath: /dev/null
  - type: collectd/cpu
  - type: collectd/uptime
    """) as [backend, _, _]:
        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "host", "acmeinc.com")
        ), "Got overridden hostname in datapoint"
        assert ensure_always(
            lambda: not has_event_with_dim(backend, "host", "acmeinc.com")
        ), "Got overridden hostname in event"
def test_win_perf_counters(monitor_config):
    measurement, config, include_total, metrics = monitor_config
    with run_agent(config) as [backend, get_output, _]:
        assert wait_for(
            p(has_datapoint_with_dim, backend, "plugin",
              "telegraf-win_perf_counters")), ("Didn't get %s datapoints" %
                                               measurement)
        if include_total:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "instance",
                  "_Total")), "Didn't get _Total datapoints"
        for metric in metrics:
            assert wait_for(p(has_datapoint_with_metric_name, backend,
                              metric)), "Didn't get metric %s" % metric
        assert not has_log_message(get_output().lower(),
                                   "error"), "error found in agent output!"
Exemple #9
0
def test_health_checker_http():
    with run_service("nginx") as nginx_container:
        host = container_ip(nginx_container)
        assert wait_for(p(tcp_socket_open, host, 80),
                        60), "service didn't start"

        with run_agent(
                string.Template(
                    dedent("""
        monitors:
          - type: collectd/health-checker
            host: $host
            port: 80
            path: /nonexistent
        """)).substitute(host=host)) as [backend, _, _]:
            assert wait_for(p(has_datapoint_with_dim, backend, "plugin", "health_checker")), \
                "Didn't get health_checker datapoints"
Exemple #10
0
def test_health_checker_http_windows():
    with run_agent(
        string.Template(
            dedent(
                """
    monitors:
      - type: collectd/health-checker
        host: $host
        port: 80
        path: /
    """
            )
        ).substitute(host="localhost")
    ) as [backend, _, _]:
        assert wait_for(
            p(has_datapoint_with_dim, backend, "plugin", "health_checker")
        ), "Didn't get health_checker datapoints"
Exemple #11
0
def test_couchbase(tag):
    with run_service("couchbase",
                     buildargs={"COUCHBASE_VERSION": tag},
                     hostname="node1.cluster") as couchbase_container:
        host = container_ip(couchbase_container)
        config = COUCHBASE_CONFIG.substitute(host=host)
        assert wait_for(p(tcp_socket_open, host, 8091),
                        60), "service not listening on port"
        assert wait_for(
            p(http_status,
              url="http://{0}:8091/pools".format(host),
              status=[401]), 120), "service didn't start"

        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "couchbase")), "Didn't get couchbase datapoints"
def test_conviva_single_metric():
    with run_agent(
        dedent(
            f"""
        monitors:
        - type: conviva
          pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}}
          pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}}
          metricConfigs:
          - metricParameter: concurrent_plays
    """
        )
    ) as [backend, _, _]:
        assert wait_for(lambda: len(backend.datapoints) > 0), "Didn't get conviva datapoints"
        assert ensure_always(
            p(all_datapoints_have_metric_name, backend, "conviva.concurrent_plays")
        ), "Received conviva datapoints for other metrics"
Exemple #13
0
def test_omitting_kafka_metrics(version="1.0.1"):
    with run_kafka(version) as kafka:
        kafkahost = container_ip(kafka)
        with run_agent(
                textwrap.dedent("""
        monitors:
         - type: collectd/kafka
           host: {0}
           port: 7099
           clusterName: testCluster
           mBeansToOmit:
             - kafka-active-controllers
        """.format(kafkahost))) as [backend, _, _]:
            assert not wait_for(
                p(has_datapoint_with_metric_name, backend,
                  "gauge.kafka-active-controllers"),
                timeout_seconds=60), "Didn't get kafka datapoints"
Exemple #14
0
def test_all_kafka_monitors(version):
    with run_kafka(version) as kafka:
        kafka_host = container_ip(kafka)
        with run_container(
            kafka.image.id,
            environment={"JMX_PORT": "8099", "START_AS": "producer", "KAFKA_BROKER": "%s:9092" % (kafka_host,)},
        ) as kafka_producer:
            kafkaproducerhost = container_ip(kafka_producer)
            assert wait_for(p(tcp_socket_open, kafkaproducerhost, 8099), 60), "kafka producer jmx didn't start"
            with run_container(
                kafka.image.id,
                environment={"JMX_PORT": "9099", "START_AS": "consumer", "KAFKA_BROKER": "%s:9092" % (kafka_host,)},
            ) as kafka_consumer:
                kafkaconsumerhost = container_ip(kafka_consumer)
                assert wait_for(p(tcp_socket_open, kafkaconsumerhost, 9099), 60), "kafka consumer jmx didn't start"
                with run_agent(
                    textwrap.dedent(
                        """
                monitors:
                 - type: collectd/kafka
                   host: {0}
                   port: 7099
                   clusterName: testCluster
                 - type: collectd/kafka_producer
                   host: {1}
                   port: 8099
                 - type: collectd/kafka_consumer
                   host: {2}
                   port: 9099
                """.format(
                            kafka_host, kafkaproducerhost, kafkaconsumerhost
                        )
                    )
                ) as [backend, _, _]:
                    assert wait_for(
                        p(has_datapoint_with_metric_name, backend, "gauge.kafka-active-controllers"), timeout_seconds=60
                    ), "Didn't get kafka datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, backend, "cluster", "testCluster"), timeout_seconds=60
                    ), "Didn't get cluster dimension from kafka datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, backend, "client-id", "console-producer"), timeout_seconds=60
                    ), "Didn't get client-id dimension from kafka_producer datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, backend, "client-id", "consumer-1"), timeout_seconds=60
                    ), "Didn't get client-id dimension from kafka_consumer datapoints"
def test_ecs_container_label_dimension():
    with run_service("ecsmeta") as ecsmeta, run_container(
            "redis:4-alpine") as redis:
        ecsmeta_ip = container_ip(ecsmeta)
        redis_ip = container_ip(redis)
        with run_agent("""
    monitors:
      - type: ecs-metadata
        metadataEndpoint: http://%s/metadata_single?redis_ip=%s
        statsEndpoint: http://%s/stats
        labelsToDimensions:
          container_name: container_title

    """ % (ecsmeta_ip, redis_ip, ecsmeta_ip)) as [backend, _, _]:
            assert ensure_always(lambda: not has_datapoint_with_dim(
                backend, "container_title",
                "ecs-seon-fargate-test-3-redis-baf2cfda88f8d8ee4900"))
Exemple #16
0
def test_tail():
    with tempfile.NamedTemporaryFile("w+b") as f:
        config = monitor_config.substitute(file=f.name)
        f.write(b"disk,customtag1=foo bytes=1024\n")
        f.flush()
        with run_agent(config) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "customtag1", "foo")
            ), "didn't get datapoint written before startup"
            f.write(b"mem,customtag2=foo2 bytes=1024\n")
            f.flush()
            assert wait_for(
                p(has_datapoint_with_dim, backend, "customtag2", "foo2")
            ), "didn't get datapoint written after startup"
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin", "telegraf-tail")
            ), "didn't get datapoint with expected plugin dimension"
Exemple #17
0
def test_signalfx_metadata():
    expected_metrics = get_monitor_metrics_from_selfdescribe(
        "collectd/signalfx-metadata")
    expected_dims = get_monitor_dims_from_selfdescribe(
        "collectd/signalfx-metadata")
    with run_agent("""
    monitors:
      - type: collectd/signalfx-metadata
        procFSPath: /proc
        etcPath: /etc
        persistencePath: /var/run/signalfx-agent
      - type: collectd/cpu
    """) as [backend, get_output, _]:
        assert has_any_metric_or_dim(backend, expected_metrics, expected_dims, timeout=60), \
            "timed out waiting for metrics and/or dimensions!"
        assert not has_log_message(get_output().lower(),
                                   "error"), "error found in agent output!"
Exemple #18
0
def test_nginx():
    with run_service("expvar") as expvar_container:
        host = container_ip(expvar_container)
        assert wait_for(p(tcp_socket_open, host, 8080),
                        60), "service didn't start"

        with run_agent(
                dedent(f"""
          monitors:
           - type: expvar
             host: {host}
             port: 8080
         """)) as [backend, _, _]:
            for metric in METADATA.included_metrics:
                print("Waiting for %s" % metric)
                assert wait_for(
                    p(has_datapoint, backend,
                      metric_name=metric)), "Didn't get included datapoints"
def test_endpoint_config_mapping():
    with run_agent(CONFIG) as [backend, _, _]:
        with run_kafka(
                "1.1.1",
                name="kafka-discovery",
                labels={
                    "com.signalfx.extraDimensions": "{a: 1}",
                    "com.signalfx.cluster": "prod"
                },
        ):
            assert wait_for(
                p(has_datapoint,
                  backend,
                  dimensions={
                      "a": "1",
                      "cluster": "prod"
                  })
            ), "Didn't get kafka datapoints with properly mapped config"
def test_conviva_multi_filter(conviva_filters):
    with run_agent(
        dedent(
            f"""
        monitors:
        - type: conviva
          pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}}
          pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}}
          metricConfigs:
          - metricParameter: concurrent_plays
            filters: {conviva_filters}
    """
        )
    ) as [backend, _, _]:
        for cf in conviva_filters:
            assert wait_for(p(has_datapoint, backend, "conviva.concurrent_plays", {"filter": cf})), (
                "Didn't get conviva datapoints for metric concurrent_plays with dimension {filter: %s}" % cf
            )
def test_conviva_extra_dimensions():
    with run_agent(
        dedent(
            f"""
        monitors:
        - type: conviva
          pulseUsername: {{"#from": "env:CONVIVA_PULSE_USERNAME"}}
          pulsePassword: {{"#from": "env:CONVIVA_PULSE_PASSWORD"}}
          extraDimensions:
            metric_source: conviva
            mydim: foo
    """
        )
    ) as [backend, _, _]:
        assert wait_for(lambda: len(backend.datapoints) > 0), "Didn't get conviva datapoints"
        assert ensure_always(
            p(all_datapoints_have_dims, backend, {"metric_source": "conviva", "mydim": "foo"})
        ), "Received conviva datapoints without extra dimensions"
Exemple #22
0
def test_does_not_set_hostname_on_monitor_if_not_host_specific():
    with run_agent("""
hostname: acmeinc.com
monitors:
  - type: collectd/signalfx-metadata
    persistencePath: /dev/null
  - type: collectd/cpu
  - type: collectd/uptime
    disableHostDimensions: true
    """) as [backend, _, _]:
        assert wait_for(
            p(has_datapoint_with_all_dims, backend,
              dict(host="acmeinc.com", plugin="signalfx-metadata"))
        ), "Didn't get overridden hostname in datapoint"

        assert ensure_always(lambda: not has_datapoint_with_dim(
            backend, "uptime", "acmeinc.com")
                             ), "Got overridden hostname in datapoint"
def test_ecs_container_image_filtering():
    with run_service("ecsmeta") as ecsmeta, run_container(
            "redis:4-alpine") as redis:
        ecsmeta_ip = container_ip(ecsmeta)
        redis_ip = container_ip(redis)
        with run_agent("""
    monitors:
      - type: ecs-metadata
        metadataEndpoint: http://%s/metadata_single?redis_ip=%s
        statsEndpoint: http://%s/stats
        excludedImages:
          - redis:latest

    """ % (ecsmeta_ip, redis_ip, ecsmeta_ip)) as [backend, _, _]:
            assert ensure_always(lambda: not has_datapoint_with_dim(
                backend, "container_id",
                "c42fa5a73634bcb6e301dfb7b13ac7ead2af473210be6a15da75a290c283b66c"
            ))
def test_custom_collectd_shutdown():
    with run_agent(
            dedent("""
        monitors:
          - type: collectd/df
          - type: collectd/custom
            template: |
              LoadPlugin "ping"
              <Plugin ping>
                Host "google.com"
              </Plugin>
    """)) as [backend, _, configure]:
        assert wait_for(p(has_datapoint_with_dim, backend, "plugin",
                          "ping")), "Didn't get ping datapoints"
        assert wait_for(p(has_datapoint_with_dim, backend, "plugin",
                          "df")), "Didn't get df datapoints"

        configure(
            dedent("""
            monitors:
              - type: collectd/df
        """))

        time.sleep(3)
        backend.reset_datapoints()

        assert ensure_always(
            lambda: not has_datapoint_with_dim(backend, "plugin", "ping")
        ), "Got ping datapoint when we shouldn't have"

        configure(
            dedent("""
            monitors:
              - type: collectd/df
              - type: collectd/custom
                template: |
                  LoadPlugin "ping"
                  <Plugin ping>
                    Host "google.com"
                  </Plugin>
        """))

        assert wait_for(p(has_datapoint_with_dim, backend, "plugin",
                          "ping")), "Didn't get ping datapoints"
Exemple #25
0
def test_kong(kong_image):
    kong_env = dict(KONG_ADMIN_LISTEN='0.0.0.0:8001', KONG_LOG_LEVEL='warn', KONG_DATABASE='postgres',
                    KONG_PG_DATABASE='kong')

    with run_container('postgres:9.5', environment=dict(POSTGRES_USER='******', POSTGRES_DB='kong')) as db:
        db_ip = container_ip(db)
        kong_env['KONG_PG_HOST'] = db_ip

        def db_is_ready():
            return db.exec_run('pg_isready -U postgres').exit_code == 0

        assert wait_for(db_is_ready)

        with run_container(kong_image, environment=kong_env, command='sleep inf') as migrations:

            def db_is_reachable():
                return migrations.exec_run('psql -h {} -U postgres'.format(db_ip)).exit_code == 0

            assert wait_for(db_is_reachable)
            assert migrations.exec_run('kong migrations up --v').exit_code == 0

        with run_container(kong_image, environment=kong_env) as kong:
            kong_ip = container_ip(kong)

            def kong_is_listening():
                try:
                    return get('http://{}:8001/signalfx'.format(kong_ip)).status_code == 200
                except RequestException:
                    return False

            assert wait_for(kong_is_listening)

            config = string.Template(dedent('''
            monitors:
              - type: collectd/kong
                host: $host
                port: 8001
                metrics:
                  - metric: connections_handled
                    report: true
            ''')).substitute(host=container_ip(kong))

            with run_agent(config) as [backend, _, _]:
                assert wait_for(p(has_datapoint_with_dim, backend, 'plugin', 'kong')), "Didn't get Kong data point"
Exemple #26
0
def test_hadoopjmx(version, nodeType):
    """
    Any new versions of hadoop should be manually built, tagged, and pushed to quay.io, i.e.
    docker build \
        -t quay.io/signalfx/hadoop-test:<version> \
        --build-arg HADOOP_VER=<version> \
        <repo_root>/test-services/hadoop
    docker push quay.io/signalfx/hadoop-test:<version>
    """
    with run_container("quay.io/signalfx/hadoop-test:%s" % version,
                       hostname="hadoop-master") as hadoop_master:
        with run_container("quay.io/signalfx/hadoop-test:%s" % version,
                           hostname="hadoop-worker1") as hadoop_worker1:
            if nodeType in ["nameNode", "resourceManager"]:
                container = hadoop_master
            else:
                container = hadoop_worker1
            host = container_ip(container)
            port = NODETYPE_PORT[nodeType]
            if nodeType in ["resourceManager", "nodeManager"]:
                yarn_var = YARN_VAR[nodeType]
                yarn_opts = YARN_OPTS % (yarn_var, port, yarn_var)
                cmd = [
                    "/bin/bash", "-c",
                    "echo 'export %s' >> %s" % (yarn_opts, YARN_ENV_PATH)
                ]
                container.exec_run(cmd)

            start_hadoop(hadoop_master, hadoop_worker1)

            # wait for jmx to be available
            assert wait_for(p(tcp_socket_open, host, port),
                            60), "jmx service not listening on port %d" % port

            # start the agent with hadoopjmx config
            config = HADOOPJMX_CONFIG.substitute(host=host,
                                                 port=port,
                                                 nodeType=nodeType)
            with run_agent(config) as [backend, _, _]:
                assert wait_for(
                    p(has_datapoint_with_dim, backend, "nodeType",
                      nodeType)), (
                          "Didn't get hadoopjmx datapoints for nodeType %s" %
                          nodeType)
Exemple #27
0
def test_cpu_utilization_per_core():
    with run_agent("""
    monitors:
      - type: collectd/signalfx-metadata
        procFSPath: /proc
        etcPath: /etc
        persistencePath: /var/run/signalfx-agent
        perCoreCPUUtil: true
      - type: collectd/cpu
    metricsToInclude:
      - metricNames:
        - cpu.utilization_per_core
        monitorType: collectd/signalfx-metadata
        """) as [backend, get_output, _]:
        assert wait_for(
            p(has_datapoint, backend, "cpu.utilization_per_core",
              {"plugin": "signalfx-metadata"}))
        assert not has_log_message(get_output().lower(),
                                   "error"), "error found in agent output!"
def test_rabbitmq_broker_name():
    with run_container("rabbitmq:3.6-management") as rabbitmq_cont:
        host = rabbitmq_cont.attrs["NetworkSettings"]["IPAddress"]
        config = rabbitmq_config.substitute(host=host)
        wait_for_rabbit_to_start(rabbitmq_cont)

        with run_agent("""
monitors:
  - type: collectd/rabbitmq
    host: %s
    brokerName: '{{.host}}-{{.username}}'
    port: 15672
    username: guest
    password: guest
    collectNodes: true
    collectChannels: true
        """ % (host, )) as [backend, _, _]:
            assert wait_for(p(has_datapoint_with_dim, backend, "plugin_instance", "%s-guest" % host)), \
                "Didn't get expected plugin_instance dimension"
def test_vault_token_renewal():
    """
    Test the token renewal feature
    """
    with run_vault() as [vault_client, get_audit_events]:
        new_token = vault_client.create_token(policies=["root"],
                                              renewable=True,
                                              ttl="12s")

        vault_client.write("secret/data/appinfo", data={"env": "prod"})
        with run_agent(
                dedent(f"""
            intervalSeconds: 2
            globalDimensions:
              env: {{"#from": "vault:secret/data/appinfo[data.env]"}}
            configSources:
              vault:
                vaultToken: {new_token['auth']['client_token']}
                vaultAddr: {vault_client.url}
            monitors:
             - type: collectd/uptime
        """)) as [backend, _, _]:
            assert wait_for(
                p(has_datapoint, backend, dimensions={"env": "prod"}))
            assert audit_read_paths(get_audit_events()) == [
                "secret/data/appinfo"
            ], "expected one reads"

            assert audit_token_renewals(get_audit_events()) == [
                new_token["auth"]["accessor"]
            ], "token immediately renews"

            time.sleep(10)

            assert audit_token_renewals(get_audit_events()) == [
                new_token["auth"]["accessor"],
                new_token["auth"]["accessor"],
            ], "token has renewed twice now"

            time.sleep(10)

            assert len(audit_token_renewals(
                get_audit_events())) >= 3, "token has renewed three times now"
Exemple #30
0
def test_solr_monitor():
    with run_service("solr") as solr_container:
        host = container_ip(solr_container)
        config = dedent(f"""
        monitors:
        - type: collectd/solr
          host: {host}
          port: 8983
        """)
        assert wait_for(p(tcp_socket_open, host, 8983),
                        60), "service not listening on port"
        with run_agent(config) as [backend, get_output, _]:
            assert wait_for(
                p(has_datapoint_with_dim, backend, "plugin",
                  "solr")), "Didn't get solr datapoints"
            assert ensure_always(lambda: has_datapoint_with_metric_name(
                backend, "counter.solr.http_5xx_responses"))
            assert not has_log_message(get_output().lower(),
                                       "error"), "error found in agent output!"