def collect_latency(monitor_node, start, end, load_type, cluster, nodes_list): res = {} prometheus = PrometheusDBStats(host=monitor_node.external_address) duration = int(end - start) cassandra_stress_precision = ['99', '95'] # in the future should include also 'max' scylla_precision = ['99'] # in the future should include also '95', '5' for precision in cassandra_stress_precision: metric = f'c-s {precision}' if precision == 'max' else f'c-s P{precision}' if not precision == 'max': precision = f'perc_{precision}' query = f'collectd_cassandra_stress_{load_type}_gauge{{type="lat_{precision}"}}' query_res = prometheus.query(query, start, end) latency_values_lst = [] max_latency_values_lst = [] for entry in query_res: if not entry['values']: continue sequence = [float(val[-1]) for val in entry['values'] if not val[-1].lower() == 'nan'] if not sequence or all(val == sequence[0] for val in sequence): continue latency_values_lst.extend(sequence) max_latency_values_lst.extend(sequence) if latency_values_lst: res[metric] = float(format(avg(latency_values_lst), '.2f')) if max_latency_values_lst: res[f'{metric} max'] = float(format(max(max_latency_values_lst), '.2f')) if load_type == 'mixed': load_type = ['read', 'write'] else: load_type = [load_type] for load in load_type: for precision in scylla_precision: query = f'histogram_quantile(0.{precision},sum(rate(scylla_storage_proxy_coordinator_{load}_' \ f'latency_bucket{{}}[{duration}s])) by (instance, le))' query_res = prometheus.query(query, start, end) for entry in query_res: node_ip = entry['metric']['instance'].replace('[', '').replace(']', '') node = cluster.get_node_by_ip(node_ip) if not node: for db_node in nodes_list: if db_node.ip_address == node_ip: node = db_node if node: node_idx = node.name.split('-')[-1] else: continue node_name = f'node-{node_idx}' metric = f"Scylla P{precision}_{load} - {node_name}" if not entry['values']: continue sequence = [float(val[-1]) for val in entry['values'] if not val[-1].lower() == 'nan'] if sequence: res[metric] = float(format(avg(sequence) / 1000, '.2f')) return res
def check_prometheus_metrics(self, start_time, now): prometheus = PrometheusDBStats(self.monitors.nodes[0].public_ip_address) node_procs_blocked = 'scylla_transport_requests_blocked_memory' node_procs_res = prometheus.query(node_procs_blocked, start_time, now) is_admission_control_triggered = False for node in node_procs_res: if int(node['values'][0][1]) > 0: self.log.info('Admission control was triggered') is_admission_control_triggered = True return is_admission_control_triggered