Exemple #1
0
def collect_latency(monitor_node, start, end, load_type, cluster, nodes_list):
    res = {}
    prometheus = PrometheusDBStats(host=monitor_node.external_address)
    duration = int(end - start)
    cassandra_stress_precision = ['99', '95']  # in the future should include also 'max'
    scylla_precision = ['99']  # in the future should include also '95', '5'

    for precision in cassandra_stress_precision:
        metric = f'c-s {precision}' if precision == 'max' else f'c-s P{precision}'
        if not precision == 'max':
            precision = f'perc_{precision}'
        query = f'collectd_cassandra_stress_{load_type}_gauge{{type="lat_{precision}"}}'
        query_res = prometheus.query(query, start, end)
        latency_values_lst = []
        max_latency_values_lst = []
        for entry in query_res:
            if not entry['values']:
                continue
            sequence = [float(val[-1]) for val in entry['values'] if not val[-1].lower() == 'nan']
            if not sequence or all(val == sequence[0] for val in sequence):
                continue
            latency_values_lst.extend(sequence)
            max_latency_values_lst.extend(sequence)

        if latency_values_lst:
            res[metric] = float(format(avg(latency_values_lst), '.2f'))
        if max_latency_values_lst:
            res[f'{metric} max'] = float(format(max(max_latency_values_lst), '.2f'))

    if load_type == 'mixed':
        load_type = ['read', 'write']
    else:
        load_type = [load_type]

    for load in load_type:
        for precision in scylla_precision:
            query = f'histogram_quantile(0.{precision},sum(rate(scylla_storage_proxy_coordinator_{load}_' \
                    f'latency_bucket{{}}[{duration}s])) by (instance, le))'
            query_res = prometheus.query(query, start, end)
            for entry in query_res:
                node_ip = entry['metric']['instance'].replace('[', '').replace(']', '')
                node = cluster.get_node_by_ip(node_ip)
                if not node:
                    for db_node in nodes_list:
                        if db_node.ip_address == node_ip:
                            node = db_node
                if node:
                    node_idx = node.name.split('-')[-1]
                else:
                    continue
                node_name = f'node-{node_idx}'
                metric = f"Scylla P{precision}_{load} - {node_name}"
                if not entry['values']:
                    continue
                sequence = [float(val[-1]) for val in entry['values'] if not val[-1].lower() == 'nan']
                if sequence:
                    res[metric] = float(format(avg(sequence) / 1000, '.2f'))

    return res
    def check_prometheus_metrics(self, start_time, now):
        prometheus = PrometheusDBStats(self.monitors.nodes[0].public_ip_address)
        node_procs_blocked = 'scylla_transport_requests_blocked_memory'
        node_procs_res = prometheus.query(node_procs_blocked, start_time, now)

        is_admission_control_triggered = False
        for node in node_procs_res:
            if int(node['values'][0][1]) > 0:
                self.log.info('Admission control was triggered')
                is_admission_control_triggered = True

        return is_admission_control_triggered