예제 #1
0
def test_cgroup_monitor():
    with run_service("nginx",
                     cpu_period=100_000,
                     cpu_quota=10000,
                     cpu_shares=50,
                     mem_limit=20 * 1024 * 1024) as nginx_container:
        with Agent.run("""
    monitors:
      - type: cgroups
        extraMetrics: ['*']
    """) as agent:
            verify(agent, METADATA.all_metrics)

            expected_cgroup = "/docker/" + nginx_container.id

            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="cgroup.cpu_shares",
                    value=50,
                    dimensions={"cgroup": expected_cgroup},
                ))

            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="cgroup.cpu_cfs_period_us",
                    value=100_000,
                    dimensions={"cgroup": expected_cgroup},
                ))
예제 #2
0
def test_jenkins_enhanced(version):
    with run_service("jenkins",
                     buildargs={
                         "JENKINS_REPO": version[0],
                         "JENKINS_VERSION": version[1],
                         "JENKINS_PORT": "8080"
                     }) as jenkins_container:
        host = container_ip(jenkins_container)
        config = dedent(f"""
            monitors:
              - type: collectd/jenkins
                host: {host}
                port: 8080
                metricsKey: {METRICS_KEY}
                enhancedMetrics: true
            """)
        assert wait_for(p(tcp_socket_open, host, 8080),
                        60), "service not listening on port"
        assert wait_for(
            p(http_status,
              url=f"http://{host}:8080/metrics/{METRICS_KEY}/ping/",
              status=[200]), 120), "service didn't start"

        with Agent.run(config) as agent:
            verify_expected_is_subset(agent, ENHANCED_METRICS[version])
def test_new_monitor_filtering():
    with Agent.run("""
           monitors:
             - type: internal-metrics
               intervalSeconds: 1
               datapointsToExclude:
                - metricNames:
                  - '*'
                  - '!sfxagent.go_heap_*'
                  - '!sfxagent.go_frees'
           """) as agent:
        is_expected = lambda dp: dp.metric.startswith(
            "sfxagent.go_heap") or dp.metric == "sfxagent.go_frees"

        def no_filtered_metrics():
            for dp in agent.fake_services.datapoints:
                assert is_expected(
                    dp), f"Got unexpected metric name {dp.metric}"
            return True

        assert wait_for(
            lambda: agent.fake_services.datapoints), "No datapoints received"
        assert ensure_always(no_filtered_metrics,
                             interval_seconds=2,
                             timeout_seconds=5)

        metrics_received = agent.fake_services.datapoints_by_metric.keys()
        assert "sfxagent.go_frees" in metrics_received
        assert "sfxagent.go_heap_inuse" in metrics_received
        assert "sfxagent.go_heap_released" in metrics_received
예제 #4
0
def test_sql_mysql_db():
    with run_mysql_replication() as [_, slave_ip]:
        with Agent.run(
            dedent(
                f"""
                monitors:
                  - type: sql
                    host: {slave_ip}
                    port: 3306
                    dbDriver: mysql
                    params:
                      username: root
                      password: master_root_password
                    connectionString: '{{{{.username}}}}:{{{{.password}}}}@tcp({{{{.host}}}})/mysql'
                    queries:
                     - query: 'SHOW SLAVE STATUS'
                       datapointExpressions:
                         - |
                             GAUGE("mysql.slave_sql_running",
                                   {{master_uuid: Master_UUID, channel: Channel_name}},
                                   Slave_SQL_Running == "Yes" ? 1 : 0)
                """
            )
        ) as agent:
            assert wait_for(
                p(has_datapoint, agent.fake_services, metric_name="mysql.slave_sql_running", value=1),
                timeout_seconds=120,
            ), f"Didn't get mysql.slave_sql_running metric"
def test_endpoint_config_mapping():
    with run_service("postgres",
                     environment=[
                         "POSTGRES_USER=test_user",
                         "POSTGRES_PASSWORD=test_pwd", "POSTGRES_DB=postgres"
                     ]) as postgres_container:
        with Agent.run(f"""
          observers:
            - type: docker
          monitors:
            - type: postgresql
              host: {container_ip(postgres_container)}
              connectionString: "user=test_user password=test_pwd dbname=postgres sslmode=disable"
              port: 5432
              dimensionTransformations:
                database: db
          """) as agent:
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={
                      "db": "dvdrental"
                  })), "Didn't get properly transformed dimension name"

            assert not has_datapoint(agent.fake_services,
                                     dimensions={"database": "dvdrental"})
예제 #6
0
def test_postgresql_enhanced():
    with run_container("postgres:10", environment=ENV) as cont:
        host = container_ip(cont)
        assert wait_for(p(tcp_socket_open, host, 5432),
                        60), "service didn't start"

        target_metric = "pg_blks.toast_hit"
        assert target_metric in METADATA.nondefault_metrics

        with Agent.run(f"""
                monitors:
                  - type: collectd/postgresql
                    host: {host}
                    port: 5432
                    extraMetrics:
                     - "{target_metric}"
                    username: "******"
                    password: "******"
                    databases:
                    - name: test
                      username: "******"
                      password: "******"
                      interval: 5
                      expireDelay: 10
                      sslMode: disable
                """) as agent:
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  metric_name="pg_blks.toast_hit"))
예제 #7
0
def test_haproxy_default_metrics_from_stats_page_basic_auth(version):
    with run_service("haproxy", buildargs={"HAPROXY_VERSION":
                                           version}) as service_container:
        host = container_ip(service_container)
        with Agent.run(f"""
           monitors:
           - type: haproxy
             username: a_username
             password: a_password
             url: http://{host}:8081/stats?stats;csv
             proxies: ["FRONTEND", "200s"]
           """) as agent:
            assert ensure_always(
                p(
                    datapoints_have_some_or_all_dims,
                    agent.fake_services,
                    {
                        "proxy_name": "200s",
                        "service_name": "FRONTEND"
                    },
                ),
                10,
            )
            assert any_metric_found(agent.fake_services,
                                    ["haproxy_response_2xx"])
예제 #8
0
def test_rabbitmq():
    with run_container("rabbitmq:3.6-management") as rabbitmq_cont:
        host = container_ip(rabbitmq_cont)
        config = dedent(f"""
            monitors:
              - type: collectd/rabbitmq
                host: {host}
                port: 15672
                username: guest
                password: guest
                collectNodes: true
                collectChannels: true
            """)

        assert wait_for(p(tcp_socket_open, host, 15672),
                        60), "service didn't start"

        with Agent.run(config) as agent:
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "plugin",
                  "rabbitmq")), "Didn't get rabbitmq datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services,
                  "plugin_instance", "%s-15672" %
                  host)), "Didn't get expected plugin_instance dimension"
예제 #9
0
def test_signalfx_metadata():
    with Agent.run("""
    procPath: /proc
    etcPath: /etc
    monitors:
      - type: collectd/signalfx-metadata
        persistencePath: /var/run/signalfx-agent
      - type: collectd/cpu
      - type: collectd/disk
      - type: collectd/memory
    """) as agent:
        assert wait_for(
            p(has_datapoint, agent.fake_services, "cpu.utilization",
              {"plugin": "signalfx-metadata"}))
        assert wait_for(
            p(has_datapoint, agent.fake_services, "disk_ops.total",
              {"plugin": "signalfx-metadata"}))
        assert wait_for(
            p(has_datapoint, agent.fake_services, "memory.utilization",
              {"plugin": "signalfx-metadata"}))
        assert ensure_always(
            lambda:
            not has_datapoint(agent.fake_services, "cpu.utilization_per_core",
                              {"plugin": "signalfx-metadata"}),
            timeout_seconds=5,
        )
        assert not has_log_message(agent.output.lower(),
                                   "error"), "error found in agent output!"
예제 #10
0
def test_postgresql_database_filter():
    with run_service("postgres",
                     buildargs={"POSTGRES_VERSION": "11-alpine"},
                     environment=ENV,
                     print_logs=False) as postgres_cont:
        host = container_ip(postgres_cont)
        assert wait_for(p(tcp_socket_open, host, 5432),
                        60), "service didn't start"

        with Agent.run(
                dedent(f"""
                monitors:
                  - type: postgresql
                    host: {host}
                    port: 5432
                    connectionString: "user=test_user password=test_pwd dbname=postgres sslmode=disable"
                    databases: ['*', '!postgres']
                """)) as agent:
            for metric in METADATA.default_metrics:
                assert wait_for(
                    p(has_datapoint,
                      agent.fake_services,
                      metric_name=metric,
                      dimensions={"database": "dvdrental"})
                ), f"Didn't get default postgresql metric {metric} for database dvdrental"

            assert ensure_always(lambda: not has_datapoint(
                agent.fake_services, dimensions={"database": "postgres"}
            )), f"Should not get metric for postgres default database"
예제 #11
0
def test_writer_no_skipped_datapoints():
    """
    See if we get every datapoint that we expect
    """
    num_metrics = 1000
    with run_service("dpgen", environment={"NUM_METRICS":
                                           num_metrics}) as dpgen_cont:
        with Agent.run(
                dedent(f"""
             writer:
               maxRequests: 1
               datapointMaxBatchSize: 100
               maxDatapointsBuffered: 1947
             monitors:
             - type: prometheus-exporter
               host: {container_ip(dpgen_cont)}
               port: 3000
               intervalSeconds: 1
        """)) as agent:
            time.sleep(10)
            dpgen_cont.remove(force=True, v=True)
            time.sleep(2)

            assert agent.fake_services.datapoints, "Didn't get any datapoints"
            assert len(agent.fake_services.datapoints
                       ) % num_metrics == 0, "Didn't get 1000n datapoints"
            for i in range(0, num_metrics):
                assert (len([
                    dp for dp in agent.fake_services.datapoints if [
                        dim for dim in dp.dimensions
                        if dim.key == "index" and dim.value == str(i)
                    ]
                ]) == len(agent.fake_services.datapoints) /
                        num_metrics), "Didn't get each datapoint n times"
예제 #12
0
def test_basic_vault_config():
    with run_vault() as [vault_client, get_audit_events]:
        vault_client.sys.enable_secrets_engine(backend_type="kv",
                                               options={"version": "1"})

        vault_client.write("secret/data/appinfo", data={"env": "prod"})
        vault_client.write("kv/usernames", app="me")
        with Agent.run(
                dedent(f"""
            intervalSeconds: 2
            globalDimensions:
              env: {{"#from": "vault:secret/data/appinfo[data.env]"}}
              user: {{"#from": "vault:kv/usernames[app]"}}
            configSources:
              vault:
                vaultToken: {vault_client.token}
                vaultAddr: {vault_client.url}
            monitors:
             - type: collectd/uptime
        """)) as agent:
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={"env": "prod"}))
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={"user": "******"}))
            assert sorted(audit_read_paths(get_audit_events())) == [
                "kv/usernames",
                "secret/data/appinfo",
            ], "expected two reads"
예제 #13
0
def test_vault_kv_poll_refetch():
    """
    Test the KV v2 token refetch operation
    """
    with run_vault() as [vault_client, get_audit_events]:
        vault_client.write("secret/data/app", data={"env": "dev"})
        with Agent.run(
                dedent(f"""
            intervalSeconds: 2
            globalDimensions:
               env: {{"#from": "vault:secret/data/app[data.env]"}}
            configSources:
              vault:
                vaultToken: {vault_client.token}
                vaultAddr: {vault_client.url}
                kvV2PollInterval: 10s
            monitors:
             - type: collectd/uptime
        """)) as agent:
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={"env": "dev"}))

            assert audit_read_paths(
                get_audit_events()) == ["secret/data/app"], "expected one read"

            vault_client.write("secret/data/app", data={"env": "prod"})
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={"env": "prod"}))

            assert "secret/metadata/app" in audit_read_paths(
                get_audit_events())
예제 #14
0
def test_vault_nonrenewable_secret_refresh():
    with run_vault() as [vault_client, get_audit_events]:
        vault_client.sys.enable_secrets_engine(backend_type="kv",
                                               options={"version": "1"})

        vault_client.write("kv/passwords", app="s3cr3t", ttl="10s")
        with Agent.run(
                dedent(f"""
            intervalSeconds: 1
            globalDimensions:
              password: {{"#from": "vault:kv/passwords[app]"}}
            configSources:
              vault:
                vaultToken: {vault_client.token}
                vaultAddr: {vault_client.url}
            monitors:
             - type: internal-metrics
               datapointsToExclude:
                - metricNames:
                  - "*"
                  - "!sfxagent.go_num_goroutine"
        """)) as agent:
            assert wait_for(
                p(has_datapoint,
                  agent.fake_services,
                  dimensions={"password": "******"}))
            assert audit_read_paths(
                get_audit_events()) == ["kv/passwords"], "expected one read"

            # Renew time is 1/2 of the lease time of 10s
            time.sleep(5)
            assert audit_read_paths(
                get_audit_events()) == ["kv/passwords",
                                        "kv/passwords"], "expected two reads"
예제 #15
0
def test_trace_forwarder_monitor():
    """
    Test basic functionality
    """
    port = random.randint(5001, 20000)
    with Agent.run(
            dedent(f"""
        hostname: "testhost"
        monitors:
          - type: trace-forwarder
            listenAddress: localhost:{port}
    """)) as agent:
        assert wait_for(p(tcp_port_open_locally,
                          port)), "trace forwarder port never opened!"
        for i, path in enumerate(FORWARDER_PATHS):
            test_trace = TEST_TRACE.copy()
            test_trace[0]["traceId"] = test_trace[0]["traceId"] + str(i)
            resp = requests.post(
                f"http://localhost:{port}{path}",
                headers={"Content-Type": "application/json"},
                data=json.dumps(test_trace),
            )
            assert resp.status_code == 200

            assert wait_for(
                p(has_trace_span,
                  agent.fake_services,
                  trace_id=test_trace[0]["traceId"],
                  tags={"env": "prod"})), "Didn't get span tag"
def test_filesystems_fstype_filter():
    expected_metrics = [
        "df_complex.free",
        "df_complex.used",
        "percent_bytes.free",
        "percent_bytes.used",
        "disk.utilization",
    ]
    if sys.platform == "linux":
        expected_metrics.extend(["df_inodes.free", "df_inodes.used", "percent_inodes.free", "percent_inodes.used"])

    with Agent.run(
        """
    procPath: /proc
    monitors:
      - type: filesystems
        fsTypes:
         - "!*"
    """
    ) as agent:
        assert wait_for(
            p(has_any_metric_or_dim, agent.fake_services, ["disk.summary_utilization"], []), timeout_seconds=60
        ), "timed out waiting for metrics and/or dimensions!"
        assert ensure_never(lambda: has_any_metric_or_dim(agent.fake_services, expected_metrics, []))
        assert not has_log_message(agent.output.lower(), "error"), "error found in agent output!"
예제 #17
0
def test_postgresql_defaults():
    with run_container("postgres:10", environment=ENV) as cont:
        host = container_ip(cont)
        assert wait_for(p(tcp_socket_open, host, 5432),
                        60), "service didn't start"

        with Agent.run(f"""
                monitors:
                  - type: collectd/postgresql
                    host: {host}
                    port: 5432
                    username: "******"
                    password: "******"
                    queries:
                    - name: "exampleQuery"
                      minVersion: 60203
                      maxVersion: 200203
                      statement: |
                        SELECT coalesce(sum(n_live_tup), 0) AS live, coalesce(sum(n_dead_tup), 0)
                        AS dead FROM pg_stat_user_tables;
                      results:
                      - type: gauge
                        instancePrefix: live
                        valuesFrom:
                        - live
                    databases:
                    - name: test
                      username: "******"
                      password: "******"
                      interval: 5
                      expireDelay: 10
                      sslMode: disable
                """) as agent:
            verify(agent, METADATA.default_metrics)
예제 #18
0
def test_marathon(marathon_image):
    with run_container("zookeeper:3.5") as zookeeper:
        zkhost = container_ip(zookeeper)
        assert wait_for(p(tcp_socket_open, zkhost, 2181), 60), "zookeeper didn't start"
        with run_container(
            marathon_image, command=["--master", "localhost:5050", "--zk", "zk://{0}:2181/marathon".format(zkhost)]
        ) as service_container:
            host = container_ip(service_container)
            config = dedent(
                f"""
                monitors:
                - type: collectd/marathon
                  host: {host}
                  port: 8080
                """
            )

            assert wait_for(p(tcp_socket_open, host, 8080), 120), "marathon not listening on port"
            assert wait_for(
                p(http_status, url="http://{0}:8080/v2/info".format(host), status=[200]), 120
            ), "service didn't start"

            with Agent.run(config) as agent:
                assert wait_for(
                    p(has_datapoint_with_dim, agent.fake_services, "plugin", "marathon")
                ), "didn't get datapoints"
예제 #19
0
def test_haproxy_default_metrics_from_stats_page_basic_auth_wrong_password(
        version):
    with run_service("haproxy", buildargs={"HAPROXY_VERSION":
                                           version}) as service_container:
        host = container_ip(service_container)
        url = f"http://{host}:8081/stats?stats;csv"
        with Agent.run(f"""
           monitors:
           - type: haproxy
             username: a_username
             password: a_wrong_password
             url: {url}
             proxies: ["FRONTEND", "200s"]
           """) as agent:
            assert ensure_always(
                p(
                    datapoints_have_some_or_all_dims,
                    agent.fake_services,
                    {
                        "proxy_name": "200s",
                        "service_name": "FRONTEND"
                    },
                ),
                10,
            )
            assert has_log_message(agent.output.lower(),
                                   "error"), "error found in agent output!"
예제 #20
0
def test_multiple_templates():
    with Agent.run("""
monitors:
  - type: collectd/df
  - type: collectd/custom
    templates:
     - |
       LoadPlugin "cpu"
     - |
      LoadPlugin "filecount"
      <Plugin filecount>
        <Directory "/bin">
          Instance "bin"
        </Directory>
      </Plugin>
collectd:
  logLevel: debug
""") as agent:
        assert wait_for(
            p(has_datapoint_with_dim, agent.fake_services, "plugin",
              "df")), "Didn't get df datapoints"
        assert wait_for(
            p(has_datapoint_with_dim, agent.fake_services, "plugin",
              "cpu")), "Didn't get cpu datapoints"
        assert wait_for(
            p(has_datapoint_with_dim, agent.fake_services, "plugin",
              "filecount")), "Didn't get filecount datapoints"
예제 #21
0
def test_all_kafka_monitors(version):
    with run_kafka(version) as kafka:
        kafka_host = container_ip(kafka)
        with run_container(
                kafka.image.id,
                environment={
                    "JMX_PORT": "8099",
                    "START_AS": "producer",
                    "KAFKA_BROKER": "%s:9092" % (kafka_host, )
                },
        ) as kafka_producer:
            kafkaproducerhost = container_ip(kafka_producer)
            assert wait_for(p(tcp_socket_open, kafkaproducerhost, 8099),
                            60), "kafka producer jmx didn't start"
            with run_container(
                    kafka.image.id,
                    environment={
                        "JMX_PORT": "9099",
                        "START_AS": "consumer",
                        "KAFKA_BROKER": "%s:9092" % (kafka_host, )
                    },
            ) as kafka_consumer:
                kafkaconsumerhost = container_ip(kafka_consumer)
                assert wait_for(p(tcp_socket_open, kafkaconsumerhost, 9099),
                                60), "kafka consumer jmx didn't start"
                with Agent.run(
                        textwrap.dedent("""
                monitors:
                 - type: collectd/kafka
                   host: {0}
                   port: 7099
                   clusterName: testCluster
                 - type: collectd/kafka_producer
                   host: {1}
                   port: 8099
                 - type: collectd/kafka_consumer
                   host: {2}
                   port: 9099
                """.format(kafka_host, kafkaproducerhost,
                           kafkaconsumerhost))) as agent:
                    assert wait_for(
                        p(has_datapoint_with_metric_name, agent.fake_services,
                          "gauge.kafka-active-controllers"),
                        timeout_seconds=60,
                    ), "Didn't get kafka datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, agent.fake_services,
                          "cluster", "testCluster"),
                        timeout_seconds=60
                    ), "Didn't get cluster dimension from kafka datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, agent.fake_services,
                          "client-id", "console-producer"),
                        timeout_seconds=60,
                    ), "Didn't get client-id dimension from kafka_producer datapoints"
                    assert wait_for(
                        p(has_datapoint_with_dim, agent.fake_services,
                          "client-id", "consumer-1"),
                        timeout_seconds=60
                    ), "Didn't get client-id dimension from kafka_consumer datapoints"
예제 #22
0
def test_elasticsearch_with_threadpool():
    with run_service("elasticsearch/6.2.0",
                     environment={"cluster.name":
                                  "testCluster"}) as es_container:
        host = container_ip(es_container)
        assert wait_for(
            p(http_status,
              url=f"http://{host}:9200/_nodes/_local",
              status=[200]), 180), "service didn't start"
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
              threadPools:
               - bulk
               - index
               - search
            """)
        with Agent.run(config) as agent:
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "thread_pool",
                  "bulk")), "Didn't get bulk thread pool metrics"
            assert not has_log_message(agent.output.lower(),
                                       "error"), "error found in agent output!"
예제 #23
0
def test_redis_key_lengths():
    with run_redis() as [hostname, redis_client]:
        redis_client.lpush("queue-1", *["a", "b", "c"])
        redis_client.lpush("queue-2", *["x", "y"])

        config = dedent(f"""
          monitors:
           - type: collectd/redis
             host: {hostname}
             port: 6379
             sendListLengths:
              - databaseIndex: 0
                keyPattern: queue-*
        """)
        with Agent.run(config) as agent:
            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="gauge.key_llen",
                    dimensions={"key_name": "queue-1"},
                    value=3,
                )), "didn't get datapoints"
            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="gauge.key_llen",
                    dimensions={"key_name": "queue-2"},
                    value=2,
                )), "didn't get datapoints"
예제 #24
0
def test_elasticsearch_with_additional_metrics():
    with run_service("elasticsearch/6.2.0",
                     environment={"cluster.name":
                                  "testCluster"}) as es_container:
        host = container_ip(es_container)
        assert wait_for(
            p(http_status,
              url=f"http://{host}:9200/_nodes/_local",
              status=[200]), 180), "service didn't start"
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
              additionalMetrics:
               - cluster.initializing-shards
               - thread_pool.threads
            """)
        with Agent.run(config) as agent:
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
            assert wait_for(
                p(has_datapoint_with_metric_name, agent.fake_services,
                  "gauge.cluster.initializing-shards")
            ), "Didn't get gauge.cluster.initializing-shards metric"
            assert wait_for(
                p(has_datapoint_with_metric_name, agent.fake_services,
                  "gauge.thread_pool.threads")
            ), "Didn't get gauge.thread_pool.threads metric"
            assert not has_log_message(agent.output.lower(),
                                       "error"), "error found in agent output!"
예제 #25
0
def test_ecs_container_stats():
    with run_service("ecsmeta") as ecsmeta, run_container("redis:4-alpine") as redis:
        ecsmeta_ip = container_ip(ecsmeta)
        redis_ip = container_ip(redis)
        with Agent.run(
            """
    monitors:
      - type: ecs-metadata
        enableExtraCPUMetrics: true
        enableExtraMemoryMetrics: true
        metadataEndpoint: http://%s/metadata_single?redis_ip=%s
        statsEndpoint: http://%s/stats

    """
            % (ecsmeta_ip, redis_ip, ecsmeta_ip)
        ) as agent:
            assert wait_for(
                p(has_datapoint_with_metric_name, agent.fake_services, "cpu.percent")
            ), "Didn't get docker cpu datapoints"
            assert wait_for(
                p(has_datapoint_with_metric_name, agent.fake_services, "memory.percent")
            ), "Didn't get docker memory datapoints"
            assert wait_for(
                # container_id is included in stats.json file in ecsmeta app
                # because stats data don't come directly from the docker container but from ecs metadata api
                p(
                    has_datapoint_with_dim,
                    agent.fake_services,
                    "container_id",
                    "c42fa5a73634bcb6e301dfb7b13ac7ead2af473210be6a15da75a290c283b66c",
                )
            ), "Didn't get redis datapoints"
예제 #26
0
def test_elasticsearch_with_cluster_option():
    with run_service("elasticsearch/6.4.2",
                     environment={"cluster.name":
                                  "testCluster"}) as es_container:
        host = container_ip(es_container)
        assert wait_for(
            p(http_status,
              url=f"http://{host}:9200/_nodes/_local",
              status=[200]), 180), "service didn't start"
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
              cluster: testCluster1
            """)
        with Agent.run(config) as agent:
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services,
                  "plugin_instance", "testCluster1")
            ), "Cluster name not picked from read callback"
            # make sure all plugin_instance dimensions were overridden by the cluster option
            assert not wait_for(
                p(has_datapoint_with_dim, agent.fake_services,
                  "plugin_instance", "testCluster"), 10
            ), "plugin_instance dimension not overridden by cluster option"
            assert not has_log_message(agent.output.lower(),
                                       "error"), "error found in agent output!"
예제 #27
0
def test_jenkins(version):
    with run_service("jenkins",
                     buildargs={
                         "JENKINS_REPO": version[0],
                         "JENKINS_VERSION": version[1],
                         "JENKINS_PORT": "8080"
                     }) as jenkins_container:
        host = container_ip(jenkins_container)
        config = dedent(f"""
            monitors:
              - type: collectd/jenkins
                host: {host}
                port: 8080
                metricsKey: {METRICS_KEY}
            """)
        assert wait_for(p(tcp_socket_open, host, 8080),
                        60), "service not listening on port"
        assert wait_for(
            p(http_status,
              url=f"http://{host}:8080/metrics/{METRICS_KEY}/ping/",
              status=[200]), 120), "service didn't start"

        with Agent.run(config) as agent:
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "plugin",
                  "jenkins")), "Didn't get jenkins datapoints"
예제 #28
0
def test_elasticsearch_without_cluster():
    # start the ES container without the service
    with run_service("elasticsearch/6.4.2",
                     environment={"cluster.name": "testCluster"},
                     entrypoint="sleep inf") as es_container:
        host = container_ip(es_container)
        config = dedent(f"""
            monitors:
            - type: collectd/elasticsearch
              host: {host}
              port: 9200
              username: elastic
              password: testing123
            """)
        with Agent.run(config) as agent:
            assert not wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "plugin",
                  "elasticsearch")), "datapoints found without service"
            # start ES service and make sure it gets discovered
            es_container.exec_run(
                "/usr/local/bin/docker-entrypoint.sh eswrapper", detach=True)
            assert wait_for(
                p(http_status,
                  url=f"http://{host}:9200/_nodes/_local",
                  status=[200]), 180), "service didn't start"
            assert wait_for(
                p(has_datapoint_with_dim, agent.fake_services, "plugin",
                  "elasticsearch")), "Didn't get elasticsearch datapoints"
예제 #29
0
def test_tracing_load():
    """
    Test that all of the traces sent through the agent get the proper service
    correlation datapoint.
    """
    port = random.randint(5001, 20000)
    with Agent.run(
        dedent(
            f"""
        hostname: "testhost"
        writer:
            sendTraceHostCorrelationMetrics: true
            traceHostCorrelationMetricsInterval: 1s
            staleServiceTimeout: 7s
        monitors:
          - type: trace-forwarder
            listenAddress: localhost:{port}
    """
        )
    ) as agent:
        assert wait_for(p(tcp_port_open_locally, port)), "trace forwarder port never opened!"
        for i in range(0, 100):
            spans = _test_trace()
            spans[0]["localEndpoint"]["serviceName"] += f"-{i}"
            spans[1]["localEndpoint"]["serviceName"] += f"-{i}"
            resp = retry_on_ebadf(
                lambda: requests.post(
                    f"http://localhost:{port}/v1/trace",
                    headers={"Content-Type": "application/json"},
                    data=json.dumps(spans),  # pylint:disable=cell-var-from-loop
                )
            )()

            assert resp.status_code == 200

        for i in range(0, 100):
            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="sf.int.service.heartbeat",
                    dimensions={"sf_hasService": f"myapp-{i}", "host": "testhost"},
                )
            ), "Didn't get host correlation datapoint"

            assert wait_for(
                p(
                    has_datapoint,
                    agent.fake_services,
                    metric_name="sf.int.service.heartbeat",
                    dimensions={"sf_hasService": f"file-server-{i}", "host": "testhost"},
                )
            ), "Didn't get host correlation datapoint"

        time.sleep(10)
        agent.fake_services.reset_datapoints()

        assert ensure_never(
            p(has_datapoint, agent.fake_services, metric_name="sf.int.service.heartbeat"), timeout_seconds=5
        ), "Got infra correlation metric when it should have been expired"
예제 #30
0
def test_statsd_monitor_conversion():
    """
    Test conversion
    """
    with Agent.run("""
monitors:
  - type: statsd
    listenPort: 0
    converters:
    - pattern: 'cluster.cds_{traffic}_{mesh}_{service}-vn_{}.{action}'
      metricName: '{traffic}.{action}'
""") as agent:
        port = get_statsd_port(agent)

        assert wait_for(p(udp_port_open_locally_netstat,
                          port)), "statsd port never opened!"
        send_udp_message(
            "localhost", port,
            "cluster.cds_egress_ecommerce-demo-mesh_gateway-vn_tcp_8080.update_success:8|c"
        )

        assert wait_for(
            p(
                has_datapoint,
                agent.fake_services,
                metric_name="egress.update_success",
                dimensions={
                    "traffic": "egress",
                    "mesh": "ecommerce-demo-mesh",
                    "service": "gateway",
                    "action": "update_success",
                },
            )), "Didn't get metric"