Esempio n. 1
0
def test_parse():
    input_ = StringIO(
        "Results:"
        "Op rate                   :   14,997 op/s  [WRITE: 14,997 op/s]"
        "Partition rate            :   14,997 pk/s  [WRITE: 14,997 pk/s]"
        "Row rate                  :   14,997 row/s [WRITE: 14,997 row/s]"
        "Latency mean              :    1.9 ms [WRITE: 1.9 ms]"
        "Latency median            :    0.3 ms [WRITE: 0.3 ms]"
        "Latency 95th percentile   :    0.4 ms [WRITE: 0.4 ms]"
        "Latency 99th percentile   :   74.0 ms [WRITE: 74.0 ms]"
        "Latency 99.9th percentile :  146.8 ms [WRITE: 146.8 ms]"
        "Latency max               :  160.2 ms [WRITE: 160.2 ms]"
        "Total partitions          :  1,350,028 [WRITE: 1,350,028]"
        "Total errors              :          0 [WRITE: 0]"
        "Total GC count            : 0"
        "Total GC memory           : 0.000 KiB"
        "Total GC time             :    0.0 seconds"
        "Avg GC time               :    NaN ms"
        "StdDev GC time            :    0.0 ms"
        "Total operation time      : 00:01:30")
    expected = [
        Metric('cassandra_qps', value=14997, type=MetricType.GAUGE,
               help="QPS"),
        Metric('cassandra_p99',
               value=74.0,
               type=MetricType.GAUGE,
               help="99th percentile")
    ]
    assert expected == parse(input_, None, None, {}, 'cassandra_')
Esempio n. 2
0
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    """Custom parse function for YCSB.
    Parses lines similar to (added new line characters to improve readibility):
        2018-08-22 17:33:25:811 581 sec: 581117 operations;
        975 current ops/sec;
        est completion in 2 hours 36 minutes
        [READ: Count=462, Max=554, Min=273, Avg=393.39, 90=457,
        99=525, 99.9=554, 99.99=554] [UPDATE: Count=513, Max=699,
        Min=254, Avg=383.83, 90=441, 99=512, 99.9=589, 99.99=699] # noqa
    """
    new_metrics = []

    new_line = readline_with_check(input)

    if "operations" in new_line:
        operations_and_ops = \
            re.search(r'(?P<operations>\d+) operations;', new_line).groupdict()
        operations = float(operations_and_ops['operations'])
        new_metrics.append(
            Metric(metric_name_prefix + 'operations',
                   operations,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="Done operations in Cassandra"))

    if "current ops" in new_line:
        operations_and_ops = \
            re.search(r'(?P<ops_per_sec>\d+(\.\d+)?) current ops\/sec', new_line).groupdict()
        ops_per_sec = float(operations_and_ops['ops_per_sec'])
        new_metrics.append(
            Metric(metric_name_prefix + 'ops_per_sec',
                   ops_per_sec,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="Ops per sec Cassandra"))

    if "READ" in new_line:
        read = re.search(r'\[READ.*?99\.99=(\d+).*?\]', new_line)
        p9999 = float(read.group(1))
        new_metrics.append(
            Metric(metric_name_prefix + 'read_p9999',
                   p9999,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99.99th percentile of read latency in Cassandra"))
    if "UPDATE" in new_line:
        update = re.search(r'\[UPDATE.*?99\.99=(\d+).*?\]', new_line)
        p9999 = float(update.group(1))
        new_metrics.append(
            Metric(metric_name_prefix + 'update_p9999',
                   p9999,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99.99th percentile of update latency in Cassandra"))

    return new_metrics
Esempio n. 3
0
def test_parse():
    data = """
        stress-ng: info:  [99] Time 1546433449, counter=173
        stress-ng: info:  [96] Time 1546433449, counter=210
        stress-ng: info:  [103] Time 1546433449, counter=191
        stress-ng: info:  [104] Time 1546433449, counter=195
    """

    number_of_reads = len(data.splitlines())
    input_ = StringIO(data)

    got = []
    for _ in range(number_of_reads):
        got.extend(parse(input_, '', None, {}, 'stress_ng_'))
    expected = [
        Metric('stress_ng_bogo_ops_counter', value=173, labels={'id_proc_stress_ng': '99'},
               type=MetricType.COUNTER,
               help="Counter bogo ops per proc stress-ng, updated per 1 sec"),
        Metric('stress_ng_bogo_ops_counter', value=210, labels={'id_proc_stress_ng': '96'},
               type=MetricType.COUNTER,
               help="Counter bogo ops per proc stress-ng, updated per 1 sec"),
        Metric('stress_ng_bogo_ops_counter', value=191, labels={'id_proc_stress_ng': '103'},
               type=MetricType.COUNTER,
               help="Counter bogo ops per proc stress-ng, updated per 1 sec"),
        Metric('stress_ng_bogo_ops_counter', value=195, labels={'id_proc_stress_ng': '104'},
               type=MetricType.COUNTER,
               help="Counter bogo ops per proc stress-ng, updated per 1 sec")
    ]

    assert expected == got
Esempio n. 4
0
def test_parse():
    input_ = StringIO(
        "2018-08-22 17:33:25:811 581 sec: 581117 operations; "
        "975 current ops/sec; "
        "est completion in 2 hours 36 minutes "
        "[READ: Count=462, Max=554, Min=273, Avg=393.39, 90=457, "
        "99=525, 99.9=554, 99.99=554] [UPDATE: Count=513, Max=699, "
        "Min=254, Avg=383.83, 90=441, 99=512, 99.9=589, 99.99=699] # noq")
    expected = [
        Metric("cassandra_operations",
               value=581117,
               type=MetricType.GAUGE,
               help="Done operations in Cassandra"),
        Metric("cassandra_ops_per_sec",
               value=975,
               type=MetricType.GAUGE,
               help="Ops per sec Cassandra"),
        Metric("cassandra_read_p9999",
               value=554.0,
               type=MetricType.GAUGE,
               help="99.99th percentile of read latency in Cassandra"),
        Metric("cassandra_update_p9999",
               value=699.0,
               type=MetricType.GAUGE,
               help="99.99th percentile of update latency in Cassandra"),
    ]
    assert expected == parse(input_, None, None, {}, 'cassandra_')
Esempio n. 5
0
def test_parse():
    input_ = StringIO(
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] -----"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Window: 155"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Connections: Ok: 0 Error: 0 Timeout: 0 Open: 80"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Sockets: Create: 0 Close: 0 Read: 31601 Write: "
        + "15795 Flush: 0"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Requests: Sent: 15795 Prepared: 16384 "
        + "In-Flight: 40"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Responses: Ok: 15793 Error: 0 Timeout: 0 Hit: "
        + "3144 Miss: 6960"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Rate: 15823.74 rps Success: 100.00 % Hit Rate:"
        + " 31.12 %"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Percentiles: Response OK (us): min: 47 p50: 389"
        + " p90: 775 p99: 86436 p999: 89120 p9999: 89657 max: 89657")
    expected = [
        Metric("rpcperf_p9999",
               value=89657,
               type=MetricType.GAUGE,
               help="99.99th percentile of latency in rpc-perf"),
        Metric("rpcperf_p999",
               value=89120,
               type=MetricType.GAUGE,
               help="99.9th percentile of latency in rpc-perf"),
        Metric("rpcperf_p99",
               value=86436,
               type=MetricType.GAUGE,
               help="99th percentile of latency in rpc-perf"),
        Metric("rpcperf_p90",
               value=775,
               type=MetricType.GAUGE,
               help="90th percentile of latency in rpc-perf"),
        Metric("rpcperf_p50",
               value=389,
               type=MetricType.GAUGE,
               help="50th percentile of latency in rpc-perf"),
        Metric("rpcperf_min",
               value=47,
               type=MetricType.GAUGE,
               help="min of latency in rpc-perf"),
        Metric("rpcperf_max",
               value=89657,
               type=MetricType.GAUGE,
               help="max of latency in rpc-perf"),
        Metric("rpcperf_hit_rate",
               value=31.12,
               type=MetricType.GAUGE,
               help="Hit rate in rpc-perf"),
        Metric("rpcperf_success",
               value=100.00,
               type=MetricType.GAUGE,
               help="Success responses in rpc-perf"),
        Metric("rpcperf_rate",
               value=15823.74,
               type=MetricType.GAUGE,
               help="Rate in rpc-perf"),
    ]
    assert expected == parse(input_, None, None, {}, 'rpcperf_')
Esempio n. 6
0
def sample_metrics():
    return [
        Metric(name='average_latency_miliseconds', value=8,
               labels={'user': '******', 'node': 'slave_1'},
               type=MetricType.COUNTER, help='latency measured in miliseconds'),
        Metric(name='percentile_99th_miliseconds', value=89,
               labels={'user': '******', 'node': 'slave_1'},
               type=MetricType.COUNTER, help='99th percentile in miliseconds')
    ]
Esempio n. 7
0
def sample_metrics_mixed():
    return [
        Metric(name='bar', value=89, type=None, help='bar-help'),
        Metric(name='foo', value=1, labels=dict(a='3'),
               type=MetricType.COUNTER, help='foo-help'),
        Metric(name='foo', value=1, labels=dict(a='20'),
               type=MetricType.COUNTER, help='foo-help'),
        Metric(name='foo', value=1, labels=dict(a='1'),
               type=MetricType.COUNTER, help='foo-help'),
        Metric(name='bar2', value=89),
    ]
Esempio n. 8
0
def append_service_level_metrics(service_level_args: ServiceLevelArgs,
                                 labels: Dict[str, str], metrics: List[Metric]):
    """Append service level metrics based on choosen matric from parsed metrics.
    :param metrics: list of metrics, additional service level metrics will
        be appended to that list.
    """
    for metric in metrics:
        if (service_level_args.sli_metric_name is not None and
                service_level_args.sli_metric_name == metric.name):
            if service_level_args.inverse_sli_metric_value:
                value = 1.0/float(metric.value)
            else:
                value = float(metric.value)
            log.debug(metric)
            # Send `slo` metric only if SLI was found.
            metrics.append(Metric(
                "slo",
                float(service_level_args.slo),
                labels=labels,
                type=MetricType.GAUGE,
                help='Service Level Objective based on %s metric' %
                     service_level_args.sli_metric_name,
            ))
            metrics.append(Metric(
                "sli",
                value,
                labels=labels,
                type=MetricType.GAUGE,
                help='Service Level Indicator based on %s metric' %
                     service_level_args.sli_metric_name,
            ))
            metrics.append(Metric(
                "sli_normalized",
                value/service_level_args.slo,
                labels=labels,
                type=MetricType.GAUGE,
                help='Normalized Service Level Indicator based on %s metric and SLO' %
                     service_level_args.sli_metric_name,

            ))

        if (service_level_args.load_metric_name not in (None, "const") and
                service_level_args.load_metric_name == metric.name):
            value = float(metric.value)
            peak_load = float(service_level_args.peak_load)
            metrics.append(Metric("peak_load", float(service_level_args.peak_load), labels=labels))
            metrics.append(Metric("load", value, labels=labels))
            metrics.append(Metric("load_normalized", value/peak_load,
                                  labels=labels))

    # If set to `const` the behaviour is slightly different:
    #   as real load were all the time equal to peak_load
    #   (then load_normalized == 1).
    if service_level_args.load_metric_name == "const":
        metrics.append(Metric("peak_load", float(service_level_args.peak_load), labels=labels))
        metrics.append(Metric("load", float(service_level_args.peak_load), labels=labels))
        metrics.append(Metric("load_normalized", 1.0, labels=labels))
Esempio n. 9
0
def create_metrics(platform: Platform) -> List[Metric]:
    """Creates a list of Metric objects from data in Platform object"""
    platform_metrics = list()
    platform_metrics.append(
        Metric.create_metric_with_metadata(name=MetricName.MEM_USAGE,
                                           value=platform.total_memory_used))
    for cpu_id, cpu_usage in platform.cpus_usage.items():
        platform_metrics.append(
            Metric.create_metric_with_metadata(
                name=MetricName.CPU_USAGE_PER_CPU,
                value=cpu_usage,
                labels={"cpu": str(cpu_id)}))
    return platform_metrics
Esempio n. 10
0
def test_parse():
    data = """#type       avg     std     min     5th    10th    90th    95th    99th
               read      801.9   155.0   304.5   643.7   661.1  1017.8  1128.2  1386.5
               update    804.6   157.8   539.4   643.4   661.2  1026.1  1136.1  1404.3
               op_q        1.0     0.0     1.0     1.0     1.0     1.1     1.1     1.1

               Total QPS = 159578.5 (1595835 / 10.0s)

               Misses = 0 (0.0%)
               Skipped TXs = 0 (0.0%)

               RX  382849511 bytes :   36.5 MB/s
               TX   67524708 bytes :    6.4 MB/s
"""

    number_of_reads = len(data.splitlines())
    input_ = StringIO(data)

    got = []
    for _ in range(number_of_reads):
        got.extend(parse(input_, '', None, {}, 'twemcache_'))
    expected = [
        Metric('twemcache_read_avg',
               value=801.9,
               labels={},
               type=MetricType.GAUGE,
               help="Average"),
        Metric('twemcache_read_p90',
               value=1017.8,
               labels={},
               type=MetricType.GAUGE,
               help="90th percentile of read latency"),
        Metric('twemcache_read_p95',
               value=1128.2,
               labels={},
               type=MetricType.GAUGE,
               help="95th percentile of read latency"),
        Metric('twemcache_read_p99',
               value=1386.5,
               labels={},
               type=MetricType.GAUGE,
               help="99th percentile of read latency"),
        Metric('twemcache_qps',
               value=159578.5,
               labels={},
               type=MetricType.GAUGE,
               help="QPS")
    ]

    assert expected == got
Esempio n. 11
0
def sample_metrics_unconvertable_to_PEF():
    """for testing for proper float representation"""
    return [
        Metric(name='latency-miliseconds', value=8.223,
               labels={'user': '******', 'node': 'slave_1'},
               type=MetricType.COUNTER, help='latency measured in miliseconds'),
    ]
Esempio n. 12
0
def sample_metrics_with_quote():
    """for testing for proper escaping"""
    return [
        Metric(name='average_latency_miliseconds', value=8,
               labels={'user': '******', 'node': 'slave_1 called "brave heart"'},  # quoted
               type=MetricType.COUNTER, help='latency measured in miliseconds'),
    ]
Esempio n. 13
0
def test_parse():
    input_ = StringIO("580	248.7 examples/sec")
    expected = [
        Metric('tensorflow_benchmark_prediction_speed',
               value=248.7,
               type=MetricType.GAUGE,
               help="tensorflow benchmark prediction speed")
    ]
    assert expected == parse(input_, None, None, {}, 'tensorflow_benchmark_')
Esempio n. 14
0
def test_parse():
    input_ = StringIO("180	images/sec: 74.9 +/- 0.5 (jitter = 8.9)    2.409")
    expected = [
        Metric('tensorflow_benchmark_training_speed',
               value=74.9,
               type=MetricType.GAUGE,
               help="tensorflow benchmark training speed")
    ]
    assert expected == parse(input_, None, None, {}, 'tensorflow_benchmark_')
Esempio n. 15
0
def create_metrics(task_measurements: Measurements) -> List[Metric]:
    """Prepare a list of metrics for a mesos tasks based on provided measurements
    applying common_labels.
    :param task_measurements: use values of measurements to create metrics
    """
    metrics = []
    for metric_name, metric_value in task_measurements.items():
        metric = Metric.create_metric_with_metadata(name=metric_name, value=metric_value)
        metrics.append(metric)
    return metrics
Esempio n. 16
0
def test_collect_platform_information(*mocks):
    assert collect_platform_information() == (Platform(1, 1, 2, {
        0: 100,
        1: 200
    }, 1337, 1536071557.123456), [
        Metric.create_metric_with_metadata(name=MetricName.MEM_USAGE,
                                           value=1337),
        Metric.create_metric_with_metadata(name=MetricName.CPU_USAGE_PER_CPU,
                                           value=100,
                                           labels={"cpu": "0"}),
        Metric.create_metric_with_metadata(name=MetricName.CPU_USAGE_PER_CPU,
                                           value=200,
                                           labels={"cpu": "1"}),
    ], {
        "sockets": "1",
        "cores": "1",
        "cpus": "2",
        "host": "test_host",
        "owca_version": "0.1"
    })
Esempio n. 17
0
def test_parse_scan_mode():
    data = """
    #type       avg     min     1st     5th    10th    90th    95th    99th      QPS   target
    read       76.3   346.3    21.1    23.5    24.5    34.3    38.7  2056.6   1002.0   1000"""

    number_of_reads = len(data.splitlines())
    input_ = StringIO(data)

    got = []

    for _ in range(number_of_reads):
        got.extend(parse(input_, '', None, {}, 'twemcache_'))

    expected = [
        Metric('twemcache_scan_qps',
               value=1002.0,
               labels={},
               type=MetricType.GAUGE,
               help="QPS"),
        Metric('twemcache_scan_read_avg',
               value=76.3,
               labels={},
               type=MetricType.GAUGE,
               help="Average"),
        Metric('twemcache_scan_read_p90',
               value=34.3,
               labels={},
               type=MetricType.GAUGE,
               help="90th percentile of read latency"),
        Metric('twemcache_scan_read_p95',
               value=38.7,
               labels={},
               type=MetricType.GAUGE,
               help="95th percentile of read latency"),
        Metric('twemcache_scan_read_p99',
               value=2056.6,
               labels={},
               type=MetricType.GAUGE,
               help="99th percentile of read latency")
    ]
    assert expected == got
Esempio n. 18
0
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    return [
        Metric(name="example",
               value=1.337,
               labels={"test": "label"},
               type=MetricType.GAUGE,
               help="Empty example metric")
    ]
Esempio n. 19
0
def test_grouping_metrics_by_metadata(sample_metrics_mixed):

    got_grouped = storage.group_metrics_by_name(sample_metrics_mixed)

    expected_grouped = [
        ('bar', [
            Metric(name='bar', value=89, type=None, help='bar-help'),
        ]),
        ('bar2', [
            Metric(name='bar2', value=89),
        ]),
        ('foo', [
            Metric(name='foo', value=1, labels=dict(a='1'),
                   type=MetricType.COUNTER, help='foo-help'),
            Metric(name='foo', value=1, labels=dict(a='3'),
                   type=MetricType.COUNTER, help='foo-help'),
            Metric(name='foo', value=1, labels=dict(a='20'),
                   type=MetricType.COUNTER, help='foo-help'),
        ]),
    ]

    assert got_grouped == expected_grouped
Esempio n. 20
0
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    """
    Custom parse function for specjbb.
    For sample output from specjbb see file:
    ./specjbb_sample_stdout.txt

    Discards until finds
    >>Response times:<<
    and read until empty line.

    Readed lines represents a table.
    In the code the table representation is named data_frame.
    """
    new_metrics = []
    input_lines = []

    # discarding lines
    new_line = readline_with_check(input)
    while not re.match(r"^\s*Response times:\s*$", new_line):
        new_line = readline_with_check(input)
    new_line = readline_with_check(input)

    # reading until empty line
    while not re.match(EMPTY_LINE, new_line):
        input_lines.append(new_line)
        new_line = readline_with_check(input)
    log.debug("Found separator in {0}".format(new_line))

    # Two dimensional list, first row contains names of columns. Almost as data frame.
    data_frame = [[el.strip() for el in line.split(",")]
                  for line in input_lines]

    # For now we need only one metric: TotalPurchase, p99.
    metric_name = metric_name_prefix + 'p99_total_purchase'
    metric_value = float(data_frame[1][-3])  # total purchase, p99
    new_metrics.append(
        Metric(metric_name,
               metric_value,
               type=MetricType.GAUGE,
               labels=labels,
               help="Specjbb2015 metric, Total Purchase, percentile 99"))

    return new_metrics
Esempio n. 21
0
def test_parse():
    """Reads textfile with sample output from specjbb."""
    expected = [
        Metric("specjbb_p99_total_purchase",
               value=0,
               type=MetricType.GAUGE,
               help="Specjbb2015 metric, Total Purchase, percentile 99")
    ]

    path = os.path.dirname(os.path.abspath(__file__))
    with open(path + '/specjbb_sample_stdout.txt', 'r') as fin:
        expected[0].value = 3800000.0
        assert expected == parse(fin, None, None, {}, 'specjbb_')
        expected[0].value = 581000.0
        assert expected == parse(fin, None, None, {}, 'specjbb_')
        expected[0].value = 6800000.0
        assert expected == parse(fin, None, None, {}, 'specjbb_')
Esempio n. 22
0
def parse(input: TextIOWrapper, regexp: str, separator: str = None,
          labels: Dict[str, str] = {}, metric_name_prefix: str = '') -> List[Metric]:
    """Custom parse function for tensorflow benchmark predition
        580    248.7 examples/sec
    """

    new_metrics = []
    new_line = readline_with_check(input)

    if "examples/sec" in new_line:
        read = re.search(r'[0-9]*\t([0-9]*\.[0-9]*)[ ]*examples\/sec', new_line)
        p99 = float(read.group(1))
        new_metrics.append(Metric(metric_name_prefix + 'prediction_speed', p99,
                                  type=MetricType.GAUGE, labels=labels,
                                  help="tensorflow benchmark prediction speed"))

    return new_metrics
Esempio n. 23
0
def anomaly_metrics(contended_task_id: TaskId,
                    contending_task_ids: List[TaskId]):
    """Helper method to create metric based on anomaly.
    uuid is used if provided.
    """
    metrics = []
    for task_id in contending_task_ids:
        uuid = _create_uuid_from_tasks_ids(contending_task_ids +
                                           [contended_task_id])
        metrics.append(
            Metric(name='anomaly',
                   value=1,
                   labels=dict(contended_task_id=contended_task_id,
                               contending_task_id=task_id,
                               resource=ContendedResource.MEMORY_BW,
                               uuid=uuid,
                               type='contention'),
                   type=MetricType.COUNTER))
    return metrics
Esempio n. 24
0
def anomaly_metrics(contended_task_id: TaskId, contending_task_ids: List[TaskId],
                    contending_workload_instances: Dict[TaskId, str] = {},
                    labels: Dict[TaskId, Dict[str, str]] = {}):
    """Helper method to create metric based on anomaly.
    uuid is used if provided.
    """
    metrics = []
    for task_id in contending_task_ids:
        uuid = _create_uuid_from_tasks_ids(contending_task_ids + [contended_task_id])
        metric = Metric(name='anomaly', value=1,
                        labels=dict(contended_task_id=contended_task_id, contending_task_id=task_id,
                                    resource=ContendedResource.MEMORY_BW, uuid=uuid,
                                    type='contention',
                                    contending_workload_instance=contending_workload_instances[
                                        task_id], workload_instance=contending_workload_instances[
                                            contended_task_id]), type=MetricType.COUNTER)
        if contended_task_id in labels:
            metric.labels.update(labels[contended_task_id])
        metrics.append(metric)
    return metrics
Esempio n. 25
0
def default_parse(input: TextIOWrapper, regexp: str, separator: str = None,
                  labels: Dict[str, str] = {}, metric_name_prefix: str = '') -> List[Metric]:
    """
    Parses workload output. If no separator is provided, parses only one line at a time.
    With separator, it appends lines to a list until the line with separator appears and then
    uses regexp on the collected output. Separator should be sent in a separate line,
    without metrics. If there is no new lines in input from the workload, function stay at
    input.readline() call. After new data arrives, previously read values will be overwritten.
    :param input: output of the workload process
    :param regexp: regexp used for finding metric names and their values.
           Needs to contain 2 named groups "name" and "value".
    :param separator: string that separates blocks of measurements. If none is passed,
           only one line of input will be parsed
    :param labels: dictionary of labels like workload name, workload parameters etc.
           Used for labeling the metrics in prometheus format.
    :return: List of Metrics
    """
    new_metrics = []
    input_lines = []

    new_line = readline_with_check(input)
    if separator is not None:
        # With separator, first we read the whole block of output until the separator appears

        while not re.match(separator, new_line) and not new_line == '':
            input_lines.append(new_line)
            new_line = readline_with_check(input)
        log.debug("Found separator in {0}".format(new_line))
    else:
        # Without separator only one line is processed at a time
        input_lines.append(new_line)
    found_metrics = re.finditer(regexp, '\n'.join(input_lines))
    for metric in list(found_metrics):
        metric = metric.groupdict()
        new_metrics.append(Metric(metric_name_prefix+metric['name'], float(metric['value']),
                                  labels=labels))
    return new_metrics
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    """Custom parse function for tensorflow benchmark training
        180    images/sec: 74.9 +/- 0.5 (jitter = 8.9)    2.409
    """

    new_metrics = []
    new_line = readline_with_check(input)

    if "images/sec" in new_line:
        read = re.search(r'[0-9]*\timages\/sec:[ ]*([0-9]*\.[0-9]*)', new_line)
        p99 = float(read.group(1))

        new_metrics.append(
            Metric(metric_name_prefix + 'training_speed',
                   p99,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="tensorflow benchmark training speed"))

    return new_metrics
Esempio n. 27
0
    def generate_metrics(self):
        """Encodes contention anomaly as list of metrics.

        Anomaly of multiple tasks together, will be encode as many metrics.
        Note, it can return more metrics that provided anomalies because it is necessary
        to encode relation in this way.
        For example:
        anomaly = ContentionAnomaly(
            resource=ContendedResource.LLC,
            contended_task_id='task1',
            contending_task_ids=['task2', 'task3'],
            metrics=[Metrics(name='cpi', type='gauge', value=10)],
        )

        wile be encoded as two metrics of type counters:

        metrics = [
            Metric(name='anomaly', type='counter', value=1,
                labels=dict(
                    uuid="1234",
                    contended_task_id="task1",
                    contending_task_id='task2',
                    type="contention"))
            Metric(name='anomaly', type='counter', value=1,
                labels=dict(
                    uuid="1234",
                    contended_task_id="task1",
                    contending_task_id='task3',
                    type="contention"))
            Metrics(name='cpi', type='gauge', value=10,
                labels=dict(type='anomaly', uuid="1234", contended_task_id="task1"))
        ]

        Note, that contention related metrics will get additional labels (type and uuid).

        Effectively being encoded as in Prometheus format:

        # HELP anomaly ...
        # TYPE anomaly counter
        anomaly{type="contention", contended_task_id="task1", contending_task_ids="task2",  resource="cache", uuid="1234"} 1 # noqa
        anomaly{type="contention", contended_task_id="task1", contending_task_ids="task3", resource="cache", uuid="1234"} 1 # noqa
        cpi{contended_task_id="task1", uuid="1234", type="anomaly"} 10
        """
        metrics = []
        for task_id in self.contending_task_ids:
            metrics.append(
                Metric(name='anomaly',
                       value=1,
                       type=MetricType.COUNTER,
                       labels=dict(
                           contended_task_id=self.contended_task_id,
                           contending_task_id=task_id,
                           resource=self.resource,
                           uuid=self._uuid(),
                           type=self.anomaly_type,
                       )))

        # Mark contention related metrics with two labels: uuid and type='anomaly'.
        for metric in self.metrics:
            metric.labels.update(uuid=self._uuid(), type='anomaly')

        return metrics + self.metrics
Esempio n. 28
0
    def run(self):
        if self.rdt_enabled and not check_resctrl():
            return
        elif not self.rdt_enabled:
            log.warning('Rdt disabled. Skipping collecting measurements '
                        'and resctrl synchronization')
        else:
            # Resctrl is enabled and available - cleanup previous runs.
            cleanup_resctrl()

        if not are_privileges_sufficient():
            log.critical(
                "Impossible to use perf_event_open. You need to: adjust "
                "/proc/sys/kernel/perf_event_paranoid; or has CAP_DAC_OVERRIDE capability"
                " set. You can run process as root too. See man 2 perf_event_open for "
                "details.")
            return

        while True:
            # Collect information about tasks running on node.
            tasks = self.node.get_tasks()

            # Keep sync of found tasks and internally managed containers.
            self._sync_containers_state(tasks)

            # Owca internal metrics.
            internal_metrics = [
                Metric(name='owca_up',
                       type=MetricType.COUNTER,
                       value=time.time()),
                Metric(name='owca_tasks',
                       type=MetricType.GAUGE,
                       value=len(tasks)),
            ]

            # Platform information
            platform, platform_metrics, platform_labels = platforms.collect_platform_information(
            )

            # Common labels
            common_labels = dict(platform_labels, **self.extra_labels)

            # Update platform_metrics with common labels.
            for metric in platform_metrics + internal_metrics:
                metric.labels.update(common_labels)

            # Build labeled tasks_metrics and task_metrics_values.
            tasks_measurements: TasksMeasurements = {}
            tasks_resources: TasksResources = {}
            tasks_labels: TasksLabels = {}
            tasks_metrics: List[Metric] = []
            for task, container in self.containers.items():
                # Single task data
                task_measurements = container.get_measurements()
                task_metrics = create_metrics(task_measurements)
                # Prepare tasks labels based on Mesos tasks metadata labels and task id.
                task_labels = {
                    sanitize_mesos_label(label_key): label_value
                    for label_key, label_value in task.labels.items()
                }
                task_labels['task_id'] = task.task_id

                # Task scoped label decoration.
                for task_metric in task_metrics:
                    task_metric.labels.update(common_labels)
                    task_metric.labels.update(task_labels)

                # Aggregate over all tasks.
                tasks_labels[task.task_id] = task_labels
                tasks_measurements[task.task_id] = task_measurements
                tasks_resources[task.task_id] = task.resources
                tasks_metrics += task_metrics

            self.metrics_storage.store(platform_metrics + tasks_metrics +
                                       internal_metrics)

            anomalies, extra_metrics = self.detector.detect(
                platform, tasks_measurements, tasks_resources, tasks_labels)

            log.debug('Anomalies detected: %d', len(anomalies))

            # Note: anomaly metrics include metrics found in ContentionAnomaly.metrics.
            anomaly_metrics = convert_anomalies_to_metrics(
                anomalies, tasks_labels)

            # Extra anomaly statistics
            if len(anomalies):
                self.anomaly_last_occurence = time.time()
                self.anomaly_counter += len(anomalies)

            statistics_metrics = [
                Metric(name='anomaly_count',
                       type=MetricType.COUNTER,
                       value=self.anomaly_counter),
            ]
            if self.anomaly_last_occurence:
                statistics_metrics.extend([
                    Metric(name='anomaly_last_occurence',
                           type=MetricType.COUNTER,
                           value=self.anomaly_last_occurence),
                ])

            # Update anomaly & extra metrics with common labels.
            for metric in anomaly_metrics + extra_metrics + statistics_metrics:
                metric.labels.update(common_labels)

            self.anomalies_storage.store(anomaly_metrics + extra_metrics +
                                         statistics_metrics)

            if not self.wait_or_finish():
                break

        # cleanup
        for container in self.containers.values():
            container.cleanup()
Esempio n. 29
0

import pytest

from owca.mesos import create_metrics, sanitize_mesos_label
from owca.metrics import Metric


@pytest.mark.parametrize('label_key,expected_label_key', (
    ('org.apache.ble', 'ble'),
    ('org.apache.aurora.metadata.foo', 'foo'),
    ('some.dots.found', 'some_dots_found'),
))
def test_sanitize_labels(label_key, expected_label_key):
    assert sanitize_mesos_label(label_key) == expected_label_key


@pytest.mark.parametrize('task_measurements,expected_metrics', (
        ({}, []),
        ({'cpu': 15},
         [Metric(name='cpu', value=15)]),
        ({'cpu': 15, 'ram': 30},
         [
             Metric(name='cpu', value=15),
             Metric(name='ram', value=30)
         ]),
))
def test_create_metrics(task_measurements, expected_metrics):
    got_metrics = create_metrics(task_measurements)
    assert expected_metrics == got_metrics
Esempio n. 30
0
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    """Custom parse function for rpc-perf.
        2018-09-13 08:15:43.404 INFO  [rpc-perf] -----
        2018-09-13 08:15:43.404 INFO  [rpc-perf] Window: 155
        2018-09-13 08:15:43.404 INFO  [rpc-perf] Connections: Ok: 0 Error: 0 Timeout: 0 Open: 80
        2018-09-13 08:15:43.404 INFO  [rpc-perf] Sockets: Create: 0 Close: 0 Read: 31601 Write:
        15795 Flush: 0
        2018-09-13 08:15:43.404 INFO  [rpc-perf] Requests: Sent: 15795 Prepared: 16384 In-Flight: 40
        2018-09-13 08:15:43.404 INFO  [rpc-perf] Responses: Ok: 15793 Error: 0 Timeout: 0 Hit: 3144
        Miss: 6960
        2018-09-13 08:15:43.404 INFO  [rpc-perf] Rate: 15823.74 rps Success: 100.00 % Hit Rate:
        31.12 %
        2018-09-13 08:15:43.404 INFO  [rpc-perf] Percentiles: Response OK (us): min: 47 p50: 389
        p90: 775 p99:86436 p999: 89120 p9999: 89657 max: 89657
    """

    new_metrics = []

    new_line = readline_with_check(input)

    if "[rpc-perf] Percentiles:" in new_line:
        percentiles = dict(
            re.findall(r'(?P<name>min|max|p\d*): (?P<value>\d+)', new_line))
        p9999 = float(percentiles['p9999'])
        p999 = float(percentiles['p999'])
        p99 = float(percentiles['p99'])
        p90 = float(percentiles['p90'])
        p50 = float(percentiles['p50'])
        min = float(percentiles['min'])
        max = float(percentiles['max'])
        new_metrics.append(
            Metric(metric_name_prefix + 'p9999',
                   p9999,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99.99th percentile of latency in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'p999',
                   p999,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99.9th percentile of latency in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'p99',
                   p99,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99th percentile of latency in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'p90',
                   p90,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="90th percentile of latency in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'p50',
                   p50,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="50th percentile of latency in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'min',
                   min,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="min of latency in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'max',
                   max,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="max of latency in rpc-perf"))

    if "[rpc-perf] Rate:" in new_line:
        statistic = \
            dict(re.findall(r'(?P<name>Hit Rate|Success|Rate): (?P<value>\d+.\d+)', new_line))
        hit_rate = float(statistic['Hit Rate'])
        success = float(statistic['Success'])
        rate = float(statistic['Rate'])
        new_metrics.append(
            Metric(metric_name_prefix + 'hit_rate',
                   hit_rate,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="Hit rate in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'success',
                   success,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="Success responses in rpc-perf"))
        new_metrics.append(
            Metric(metric_name_prefix + 'rate',
                   rate,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="Rate in rpc-perf"))

    return new_metrics