Esempio n. 1
0
def create_metrics(platform: Platform) -> List[Metric]:
    """Creates a list of Metric objects from data in Platform object"""
    platform_metrics = []

    platform_metrics.extend([
        Metric.create_metric_with_metadata(MetricName.PLATFORM_TOPOLOGY_CORES,
                                           value=platform.cores),
        Metric.create_metric_with_metadata(MetricName.PLATFORM_TOPOLOGY_CPUS,
                                           value=platform.cpus),
        Metric.create_metric_with_metadata(MetricName.PLATFORM_TOPOLOGY_SOCKETS,
                                           value=platform.sockets),
        Metric.create_metric_with_metadata(MetricName.PLATFORM_LAST_SEEN,
                                           value=time.time()),
    ])

    # Exporting measurements into metrics.
    platform_metrics.extend(export_metrics_from_measurements(platform.measurements))

    platform_metrics.append(
        Metric.create_metric_with_metadata(
            MetricName.WCA_INFORMATION,
            value=1,
            labels=dict(
                sockets=str(platform.sockets),
                cores=str(platform.cores),
                cpus=str(platform.cpus),
                cpu_model=platform.cpu_model,
                wca_version=get_wca_version(),
            )
        )
    )

    return platform_metrics
Esempio n. 2
0
def test_parse(mock_print):
    input_ = StringIO(
        "2018-08-22 17:33:25:811 581 sec: 581117 operations; "
        "975 current ops/sec; "
        "est completion in 2 hours 36 minutes "
        "[READ: Count=462, Max=554, Min=273, Avg=393.39, 90=457, "
        "99=525, 99.9=554, 99.99=554] [UPDATE: Count=513, Max=699, "
        "Min=254, Avg=383.83, 90=441, 99=512, 99.9=589, 99.99=699] # noq")
    expected = [
        Metric("cassandra_operations",
               value=581117,
               type=MetricType.GAUGE,
               help="Done operations in Cassandra"),
        Metric("cassandra_ops_per_sec",
               value=975,
               type=MetricType.GAUGE,
               help="Ops per sec Cassandra"),
        Metric("cassandra_read_p9999",
               value=554.0,
               type=MetricType.GAUGE,
               help="99.99th percentile of read latency in Cassandra"),
        Metric("cassandra_update_p9999",
               value=699.0,
               type=MetricType.GAUGE,
               help="99.99th percentile of update latency in Cassandra"),
    ]
    assert expected == parse(input_, None, None, {}, 'cassandra_')
def test_parse_scan_mode():
    data = """
    #type       avg     min     1st     5th    10th    90th    95th    99th      QPS   target
    read       76.3   346.3    21.1    23.5    24.5    34.3    38.7  2056.6   1002.0   1000"""

    number_of_reads = len(data.splitlines())
    input_ = StringIO(data)

    got = []

    for _ in range(number_of_reads):
        got.extend(parse(input_, '', None, {}, 'twemcache_'))

    expected = [
        Metric('twemcache_scan_qps', value=1002.0, labels={},
               type=MetricType.GAUGE, help="QPS"),
        Metric('twemcache_scan_read_avg', value=76.3, labels={},
               type=MetricType.GAUGE, help="Average"),
        Metric('twemcache_scan_read_p90', value=34.3, labels={},
               type=MetricType.GAUGE, help="90th percentile of read latency"),
        Metric('twemcache_scan_read_p95', value=38.7, labels={},
               type=MetricType.GAUGE, help="95th percentile of read latency"),
        Metric('twemcache_scan_read_p99', value=2056.6, labels={},
               type=MetricType.GAUGE, help="99th percentile of read latency")
    ]
    assert expected == got
Esempio n. 4
0
def calculate_variance(app_name, node_name: str, requested_fraction: Dict[ResourceType, float],
                       bar_weights: Dict[ResourceType, float]) -> \
        Tuple[float, List[Metric]]:
    """Return variance using bar_weights"""
    # Mean - priority according to variance of dimensions
    metrics = []
    mean = sum([v for v in requested_fraction.values()]) / len(requested_fraction)
    log.log(TRACE, "[Prioritize][app=%s][node=%s][bar] Mean: %s", app_name, node_name, mean)
    metrics.append(
        Metric(name=MetricName.BAR_MEAN,
               value=mean, labels=dict(app=app_name, node=node_name),
               type=MetricType.GAUGE))
    # Variance
    if len(requested_fraction) > 2:
        variance = sum([((fraction - mean) * (fraction - mean)) * bar_weights.get(rt, 1)
                        for rt, fraction in requested_fraction.items()]) \
                   / len(requested_fraction)
    elif len(requested_fraction) == 2:
        values = list(requested_fraction.values())
        variance = abs(values[0] - values[1])
    else:
        variance = 0
    log.log(TRACE,
            "[Prioritize][app=%s][node=%s][bar] "
            "Variance(weighted quadratic sum of requested_fraction-mean): %s",
            app_name, node_name, variance)
    metrics.append(
        Metric(name=MetricName.BAR_VARIANCE,
               value=variance, labels=dict(app=app_name, node=node_name),
               type=MetricType.GAUGE))
    return variance, metrics
def test_parse():
    data = """#type       avg     std     min     5th    10th    90th    95th    99th
               read      801.9   155.0   304.5   643.7   661.1  1017.8  1128.2  1386.5
               update    804.6   157.8   539.4   643.4   661.2  1026.1  1136.1  1404.3
               op_q        1.0     0.0     1.0     1.0     1.0     1.1     1.1     1.1

               Total QPS = 159578.5 (1595835 / 10.0s)

               Misses = 0 (0.0%)
               Skipped TXs = 0 (0.0%)

               RX  382849511 bytes :   36.5 MB/s
               TX   67524708 bytes :    6.4 MB/s
"""

    number_of_reads = len(data.splitlines())
    input_ = StringIO(data)

    got = []
    for _ in range(number_of_reads):
        got.extend(parse(input_, '', None, {}, 'twemcache_'))
    expected = [
        Metric('twemcache_read_avg', value=801.9, labels={},
               type=MetricType.GAUGE, help="Average"),
        Metric('twemcache_read_p90', value=1017.8, labels={},
               type=MetricType.GAUGE, help="90th percentile of read latency"),
        Metric('twemcache_read_p95', value=1128.2, labels={},
               type=MetricType.GAUGE, help="95th percentile of read latency"),
        Metric('twemcache_read_p99', value=1386.5, labels={},
               type=MetricType.GAUGE, help="99th percentile of read latency"),
        Metric('twemcache_qps', value=159578.5, labels={},
               type=MetricType.GAUGE, help="QPS")]

    assert expected == got
def test_grouping_metrics_by_metadata(sample_metrics_mixed):

    got_grouped = storage.group_metrics_by_name(sample_metrics_mixed)

    expected_grouped = [
        ('bar', [
            Metric(name='bar', value=89, type=None, help='bar-help'),
        ]),
        ('bar2', [
            Metric(name='bar2', value=89),
        ]),
        ('foo', [
            Metric(name='foo',
                   value=1,
                   labels=dict(a='1'),
                   type=MetricType.COUNTER,
                   help='foo-help'),
            Metric(name='foo',
                   value=1,
                   labels=dict(a='3'),
                   type=MetricType.COUNTER,
                   help='foo-help'),
            Metric(name='foo',
                   value=1,
                   labels=dict(a='20'),
                   type=MetricType.COUNTER,
                   help='foo-help'),
        ]),
    ]

    assert got_grouped == expected_grouped
Esempio n. 7
0
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    """Custom parse function for YCSB.
    Parses lines similar to (added new line characters to improve readibility):
        2018-08-22 17:33:25:811 581 sec: 581117 operations;
        975 current ops/sec;
        est completion in 2 hours 36 minutes
        [READ: Count=462, Max=554, Min=273, Avg=393.39, 90=457,
        99=525, 99.9=554, 99.99=554] [UPDATE: Count=513, Max=699,
        Min=254, Avg=383.83, 90=441, 99=512, 99.9=589, 99.99=699] # noqa
    """
    new_metrics = []

    new_line = readline_with_check(input)

    if "operations" in new_line:
        operations_and_ops = \
            re.search(r'(?P<operations>\d+) operations;', new_line).groupdict()
        operations = float(operations_and_ops['operations'])
        new_metrics.append(
            Metric(metric_name_prefix + 'operations',
                   operations,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="Done operations in Cassandra"))

    if "current ops" in new_line:
        operations_and_ops = \
            re.search(r'(?P<ops_per_sec>\d+(\.\d+)?) current ops\/sec', new_line).groupdict()
        ops_per_sec = float(operations_and_ops['ops_per_sec'])
        new_metrics.append(
            Metric(metric_name_prefix + 'ops_per_sec',
                   ops_per_sec,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="Ops per sec Cassandra"))

    if "READ" in new_line:
        read = re.search(r'\[READ.*?99\.99=(\d+).*?\]', new_line)
        p9999 = float(read.group(1))
        new_metrics.append(
            Metric(metric_name_prefix + 'read_p9999',
                   p9999,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99.99th percentile of read latency in Cassandra"))
    if "UPDATE" in new_line:
        update = re.search(r'\[UPDATE.*?99\.99=(\d+).*?\]', new_line)
        p9999 = float(update.group(1))
        new_metrics.append(
            Metric(metric_name_prefix + 'update_p9999',
                   p9999,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99.99th percentile of update latency in Cassandra"))

    return new_metrics
Esempio n. 8
0
def _log_initial_state(tasks_data, balanced_memory, extra_metrics):
    log.log(
        TRACE, "Current state of the system, balanced_memory=%s[bytes]" %
        balanced_memory)
    log.debug(
        "Current task assigments to nodes, expressed "
        "in sum of memory limits of pinned tasks: %s[bytes]" % {
            node: sum(t[1] for t in tasks) / 2**10
            for node, tasks in balanced_memory.items()
        })
    log.debug("Current task assigments: %s" %
              {node: len(tasks)
               for node, tasks in balanced_memory.items()})
    log.debug(
        "Current task assigments: %s" % {
            node: [task[0] for task in tasks]
            for node, tasks in balanced_memory.items()
        })

    for node, tasks_with_memory in balanced_memory.items():
        extra_metrics.extend([
            Metric('numa__balanced_memory_tasks',
                   value=len(tasks_with_memory),
                   labels=dict(numa_node=str(node))),
            Metric('numa__balanced_memory_size',
                   value=sum([m for t, m in tasks_with_memory]),
                   labels=dict(numa_node=str(node))),
            Metric('numa__task_tasks_count', value=len(tasks_data)),
        ])
def test_parse(mock_print):
    input_ = StringIO(
        "Results:"
        "Op rate                   :   14,997 op/s  [WRITE: 14,997 op/s]"
        "Partition rate            :   14,997 pk/s  [WRITE: 14,997 pk/s]"
        "Row rate                  :   14,997 row/s [WRITE: 14,997 row/s]"
        "Latency mean              :    1.9 ms [WRITE: 1.9 ms]"
        "Latency median            :    0.3 ms [WRITE: 0.3 ms]"
        "Latency 95th percentile   :    0.4 ms [WRITE: 0.4 ms]"
        "Latency 99th percentile   :   74.0 ms [WRITE: 74.0 ms]"
        "Latency 99.9th percentile :  146.8 ms [WRITE: 146.8 ms]"
        "Latency max               :  160.2 ms [WRITE: 160.2 ms]"
        "Total partitions          :  1,350,028 [WRITE: 1,350,028]"
        "Total errors              :          0 [WRITE: 0]"
        "Total GC count            : 0"
        "Total GC memory           : 0.000 KiB"
        "Total GC time             :    0.0 seconds"
        "Avg GC time               :    NaN ms"
        "StdDev GC time            :    0.0 ms"
        "Total operation time      : 00:01:30"
    )
    expected = [
        Metric('cassandra_qps', value=14997, type=MetricType.GAUGE,
               help="QPS"),
        Metric('cassandra_p99', value=74.0, type=MetricType.GAUGE,
               help="99th percentile")
    ]
    assert expected == parse(input_, None, None, {}, 'cassandra_')
Esempio n. 10
0
def test_parse(mock_print):
    input_ = StringIO(
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] -----"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Window: 155"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Connections: Ok: 0 Error: 0 Timeout: 0 Open: 80"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Sockets: Create: 0 Close: 0 Read: 31601 Write: "
        + "15795 Flush: 0"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Requests: Sent: 15795 Prepared: 16384 "
        + "In-Flight: 40"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Responses: Ok: 15793 Error: 0 Timeout: 0 Hit: "
        + "3144 Miss: 6960"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Rate: 15823.74 rps Success: 100.00 % Hit Rate:"
        + " 31.12 %"
        "2018-09-13 08:15:43.404 INFO  [rpc-perf] Percentiles: Response OK (us): min: 47 p50: 389"
        + " p90: 775 p99: 86436 p999: 89120 p9999: 89657 max: 89657")
    expected = [
        Metric("rpcperf_p9999",
               value=89657,
               type=MetricType.GAUGE,
               help="99.99th percentile of latency in rpc-perf"),
        Metric("rpcperf_p999",
               value=89120,
               type=MetricType.GAUGE,
               help="99.9th percentile of latency in rpc-perf"),
        Metric("rpcperf_p99",
               value=86436,
               type=MetricType.GAUGE,
               help="99th percentile of latency in rpc-perf"),
        Metric("rpcperf_p90",
               value=775,
               type=MetricType.GAUGE,
               help="90th percentile of latency in rpc-perf"),
        Metric("rpcperf_p50",
               value=389,
               type=MetricType.GAUGE,
               help="50th percentile of latency in rpc-perf"),
        Metric("rpcperf_min",
               value=47,
               type=MetricType.GAUGE,
               help="min of latency in rpc-perf"),
        Metric("rpcperf_max",
               value=89657,
               type=MetricType.GAUGE,
               help="max of latency in rpc-perf"),
        Metric("rpcperf_hit_rate",
               value=31.12,
               type=MetricType.GAUGE,
               help="Hit rate in rpc-perf"),
        Metric("rpcperf_success",
               value=100.00,
               type=MetricType.GAUGE,
               help="Success responses in rpc-perf"),
        Metric("rpcperf_rate",
               value=15823.74,
               type=MetricType.GAUGE,
               help="Rate in rpc-perf"),
    ]
    assert expected == parse(input_, None, None, {}, 'rpcperf_')
Esempio n. 11
0
def sample_metrics():
    return [
        Metric(name='average_latency_miliseconds', value=8,
               labels={'user': '******', 'node': 'slave_1'},
               type=MetricType.COUNTER, help='latency measured in miliseconds'),
        Metric(name='percentile_99th_miliseconds', value=89,
               labels={'user': '******', 'node': 'slave_1'},
               type=MetricType.COUNTER, help='99th percentile in miliseconds')
    ]
Esempio n. 12
0
    def generate_metrics(self) -> List[Metric]:
        """Encode RDT Allocation as metrics.
        Note:
        - cache allocation: generated two metrics, with number of cache ways and
                            mask of bits (encoded as int)
        - memory bandwidth: is encoded as int, representing MB/s or percentage
        """
        # Empty object generate no metric.
        if not self.rdt_allocation.l3 and not self.rdt_allocation.mb:
            return []

        group_name = self.get_resgroup_name()

        metrics = []
        if self.rdt_allocation.l3:
            domains = _parse_schemata_file_row(self.rdt_allocation.l3)
            for domain_id, raw_value in domains.items():
                metrics.extend([
                    Metric(name='allocation_rdt_l3_cache_ways',
                           value=_count_enabled_bits(raw_value),
                           type=MetricType.GAUGE,
                           labels=dict(
                               allocation_type='rdt_l3_cache_ways',
                               group_name=group_name,
                               domain_id=domain_id,
                               container_name=self.container_name,
                           )),
                    Metric(name='allocation_rdt_l3_mask',
                           value=int(raw_value, 16),
                           type=MetricType.GAUGE,
                           labels=dict(
                               allocation_type='rdt_l3_mask',
                               group_name=group_name,
                               domain_id=domain_id,
                               container_name=self.container_name,
                           ))
                ])

        if self.rdt_allocation.mb:
            domains = _parse_schemata_file_row(self.rdt_allocation.mb)
            for domain_id, raw_value in domains.items():
                # NOTE: raw_value is treated as int, ignoring unit used (MB or %)
                value = int(raw_value)
                metrics.append(
                    Metric(name='allocation_rdt_mb',
                           value=value,
                           type=MetricType.GAUGE,
                           labels=dict(
                               allocation_type='rdt_mb',
                               group_name=group_name,
                               domain_id=domain_id,
                               container_name=self.container_name,
                           )))

        self.labels_updater.update_labels(metrics)

        return metrics
Esempio n. 13
0
def sample_metrics_mixed():
    return [
        Metric(name='bar', value=89, type=None, help='bar-help'),
        Metric(name='foo', value=1, labels=dict(a='3'),
               type=MetricType.COUNTER, help='foo-help'),
        Metric(name='foo', value=1, labels=dict(a='20'),
               type=MetricType.COUNTER, help='foo-help'),
        Metric(name='foo', value=1, labels=dict(a='1'),
               type=MetricType.COUNTER, help='foo-help'),
        Metric(name='bar2', value=89),
    ]
def create_metrics(platform: Platform) -> List[Metric]:
    """Creates a list of Metric objects from data in Platform object"""
    platform_metrics = list()
    platform_metrics.append(
        Metric.create_metric_with_metadata(name=MetricName.MEM_USAGE,
                                           value=platform.total_memory_used))
    for cpu_id, cpu_usage in platform.cpus_usage.items():
        platform_metrics.append(
            Metric.create_metric_with_metadata(
                name=MetricName.CPU_USAGE_PER_CPU,
                value=cpu_usage,
                labels={"cpu": str(cpu_id)}))
    return platform_metrics
Esempio n. 15
0
 def _log_moved_pages(self, extra_metrics: List[Metric],
                      tasks_data: TasksData):
     """modify extra_metrics"""
     for task, page_to_move in self._pages_to_move.items():
         data: TaskData = tasks_data[task]
         extra_metrics.append(
             Metric('numa__task_pages_to_move',
                    value=page_to_move,
                    labels=data.labels))
     total_pages_to_move = sum(p for p in self._pages_to_move.values())
     extra_metrics.append(
         Metric('numa__total_pages_to_move', value=total_pages_to_move))
     log.log(TRACE, 'Pages to move: %r', self._pages_to_move)
Esempio n. 16
0
def get_logging_metrics() -> List[Metric]:
    metrics = []
    for logger_name, counter in _module_record_counters.items():
        metrics.extend([
            Metric(name="wca_warning_count",
                   type=MetricType.COUNTER,
                   value=counter[logging.WARNING],
                   labels={"module": logger_name}),
            Metric(name="wca_error_count",
                   type=MetricType.COUNTER,
                   value=counter[logging.ERROR],
                   labels={"module": logger_name}),
        ])
    return metrics
Esempio n. 17
0
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    """Custom parse function for cassandra-stress.
        Results:
        Op rate                   :   14,997 op/s  [WRITE: 14,997 op/s]
        Partition rate            :   14,997 pk/s  [WRITE: 14,997 pk/s]
        Row rate                  :   14,997 row/s [WRITE: 14,997 row/s]
        Latency mean              :    1.9 ms [WRITE: 1.9 ms]
        Latency median            :    0.3 ms [WRITE: 0.3 ms]
        Latency 95th percentile   :    0.4 ms [WRITE: 0.4 ms]
        Latency 99th percentile   :   74.0 ms [WRITE: 74.0 ms]
        Latency 99.9th percentile :  146.8 ms [WRITE: 146.8 ms]
        Latency max               :  160.2 ms [WRITE: 160.2 ms]
        Total partitions          :  1,350,028 [WRITE: 1,350,028]
        Total errors              :          0 [WRITE: 0]
        Total GC count            : 0
        Total GC memory           : 0.000 KiB
        Total GC time             :    0.0 seconds
        Avg GC time               :    NaN ms
        StdDev GC time            :    0.0 ms
        Total operation time      : 00:01:30
    """

    new_metrics = []
    new_line = readline_with_check(input, EOF_line)
    if "Op rate" in new_line:
        read_op_rate = re.search(r'Op rate[ ]*:[ ]*([0-9,]*) op/s', new_line)
        op_rate = float(''.join(read_op_rate.group(1).split(',')))
        new_metrics.append(
            Metric(metric_name_prefix + 'qps',
                   op_rate,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="QPS"))
    if "Latency 99th percentile" in new_line:
        read = re.search(
            r'Latency 99th percentile[ ]*:[ ]*([0-9]*\.[0-9]*) ms', new_line)
        p99 = float(read.group(1))
        new_metrics.append(
            Metric(metric_name_prefix + 'p99',
                   p99,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="99th percentile"))
    return new_metrics
Esempio n. 18
0
def sample_metrics_unconvertable_to_PEF():
    """for testing for proper float representation"""
    return [
        Metric(name='latency-miliseconds', value=8.223,
               labels={'user': '******', 'node': 'slave_1'},
               type=MetricType.COUNTER, help='latency measured in miliseconds'),
    ]
Esempio n. 19
0
def parse(input: TextIOWrapper,
          regexp: str,
          separator: str = None,
          labels: Dict[str, str] = {},
          metric_name_prefix: str = '') -> List[Metric]:
    """Custom parse function for gauge tpm from mysql.
        TPM: 87060.0
        TPM: 95220.0
        TPM: 93600.0
        TPM: 90000.0
    """
    new_metrics = []

    new_line = readline_with_check(input, EOF_line='end')

    if "TPM:" in new_line:
        regex = re.findall(r'TPM: (?P<tpm>\d*.\d*)', new_line)
        tpm = float(regex[0])

        new_metrics.append(
            Metric(metric_name_prefix + 'tpm',
                   tpm,
                   type=MetricType.GAUGE,
                   labels=labels,
                   help="TPM (transaction per minute) from mysql"))

    return new_metrics
def _get_internal_metrics(tasks: List[Task]) -> List[Metric]:
    """Internal wca metrics e.g. memory usage, profiling information."""

    # Memory usage.
    memory_usage_rss_self = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    memory_usage_rss_children = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss
    memory_usage_rss = memory_usage_rss_self + memory_usage_rss_children

    metrics = [
        Metric(name='wca_up', type=MetricType.COUNTER, value=time.time()),
        Metric(name='wca_tasks', type=MetricType.GAUGE, value=len(tasks)),
        Metric(name='wca_memory_usage_bytes', type=MetricType.GAUGE,
               value=int(memory_usage_rss * 1024)),
    ]

    return metrics
Esempio n. 21
0
def sample_metrics_with_quote():
    """for testing for proper escaping"""
    return [
        Metric(name='average_latency_miliseconds', value=8,
               labels={'user': '******', 'node': 'slave_1 called "brave heart"'},  # quoted
               type=MetricType.COUNTER, help='latency measured in miliseconds'),
    ]
def get_requested_fraction(app_name, apps_spec, assigned_apps, node_name,
                           nodes_capacities,
                           dimensions) -> Tuple[Resources, List[Metric]]:
    """
    returns requested_fraction, metrics
    """
    # Current node context: used and free currently
    used, free, requested, capacity, membw_read_write_ratio, metrics = \
        used_free_requested(node_name, app_name, dimensions,
                            nodes_capacities, assigned_apps, apps_spec)

    # SUM requested by app and already used on node
    try:
        requested_and_used = sum_resources(requested, used)
    except ValueError as e:
        msg = 'cannot sum app=%s requested=%s and node=%s used=%s: %s' % (
            app_name, requested, node_name, used, e)
        log.error(msg)
        raise DataMissingException(msg) from e

    # FRACTION
    requested_fraction = divide_resources(requested_and_used, capacity,
                                          membw_read_write_ratio)
    for resource, fraction in requested_fraction.items():
        metrics.append(
            Metric(name=MetricName.BAR_REQUESTED_FRACTION,
                   value=fraction,
                   labels=dict(app=app_name, resource=resource),
                   type=MetricType.GAUGE))
    return requested_fraction, metrics
def _get_internal_metrics(tasks: List[Task]) -> List[Metric]:
    """Internal wca metrics e.g. memory usage, profiling information."""

    # Memory usage.
    memory_usage_rss_self = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
    memory_usage_rss_children = resource.getrusage(resource.RUSAGE_CHILDREN).ru_maxrss
    memory_usage_rss = memory_usage_rss_self + memory_usage_rss_children

    metrics = [
        Metric.create_metric_with_metadata(MetricName.WCA_UP, value=time.time()),
        Metric.create_metric_with_metadata(MetricName.WCA_TASKS, value=len(tasks)),
        Metric.create_metric_with_metadata(MetricName.WCA_MEM_USAGE_BYTES,
                                           value=int(memory_usage_rss * 1024)),
    ]

    return metrics
Esempio n. 24
0
def anomaly_metrics(contended_task_id: TaskId,
                    contending_task_ids: List[TaskId],
                    contending_workload_instances: Dict[TaskId, str] = {},
                    labels: Dict[TaskId, Dict[str, str]] = {}):
    """Helper method to create metric based on anomaly.
    uuid is used if provided.
    """
    metrics = []
    for task_id in contending_task_ids:
        uuid = _create_uuid_from_tasks_ids(contending_task_ids +
                                           [contended_task_id])
        metric = Metric(
            name='anomaly',
            value=1,
            labels=dict(
                contended_task_id=contended_task_id,
                contending_task_id=task_id,
                resource=ContendedResource.MEMORY_BW,
                uuid=uuid,
                type='contention',
                contending_workload_instance=contending_workload_instances[
                    task_id],
                workload_instance=contending_workload_instances[
                    contended_task_id]),
            type=MetricType.COUNTER)
        if contended_task_id in labels:
            metric.labels.update(labels[contended_task_id])
        metrics.append(metric)
    return metrics
Esempio n. 25
0
def test_log_storage(*mocks):
    open_mock = Mock()
    with patch('builtins.open', open_mock):
        metric = Metric(name='foo', value=8)
        log_storage = LogStorage(output_filename='mocked_file_name.log')
        log_storage.store([metric])
    assert open_mock.return_value.write.call_count == 2
    assert open_mock.return_value.method_calls[0] == call.write('foo 8 1\n')
Esempio n. 26
0
def test_log_storage_overwrite_mode(logfile_mock, get_current_time_mock,
                                    pathlib_rename_mock):
    metric = Metric(name='foo', value=8)
    log_storage = LogStorage(output_filename='mocked_file_name.log',
                             overwrite=True)
    log_storage.store([metric])
    logfile_mock.assert_has_calls([call().__enter__().write('foo 8\n')])
    pathlib_rename_mock.assert_called_once()
Esempio n. 27
0
def _get_allocations_statistics_metrics(allocations_count, allocations_errors, allocation_duration):
    """Extra external plugin allocations statistics."""

    metrics = [
        Metric(name='allocations_count', type=MetricType.COUNTER,
               value=allocations_count),
        Metric(name='allocations_errors', type=MetricType.COUNTER,
               value=allocations_errors),
    ]

    if allocation_duration is not None:
        metrics.extend([
            Metric(name='allocation_duration', type=MetricType.GAUGE,
                   value=allocation_duration)
        ])

    return metrics
def test_parse():
    input_ = StringIO("180	images/sec: 74.9 +/- 0.5 (jitter = 8.9)    2.409")
    expected = [
        Metric('tensorflow_benchmark_training_speed',
               value=74.9,
               type=MetricType.GAUGE,
               help="tensorflow benchmark training speed")
    ]
    assert expected == parse(input_, None, None, {}, 'tensorflow_benchmark_')
Esempio n. 29
0
    def get_metrics(self, anomalies: List[Anomaly]) -> List[Metric]:
        """Extra external plugin anomaly statistics."""

        self.validate(anomalies)

        if len(anomalies):
            self._anomaly_last_occurrence = time.time()
            self._anomaly_counter += len(anomalies)

        statistics_metrics = [
            Metric(name='anomaly_count', type=MetricType.COUNTER, value=self._anomaly_counter),
        ]
        if self._anomaly_last_occurrence:
            statistics_metrics.extend([
                Metric(name='anomaly_last_occurrence', type=MetricType.COUNTER,
                       value=self._anomaly_last_occurrence),
            ])
        return statistics_metrics
def test_parse():
    input_ = StringIO("580	248.7 examples/sec")
    expected = [
        Metric('tensorflow_benchmark_prediction_speed',
               value=248.7,
               type=MetricType.GAUGE,
               help="tensorflow benchmark prediction speed")
    ]
    assert expected == parse(input_, None, None, {}, 'tensorflow_benchmark_')