def prometheus_log(info, alert_list): """ Log all alerts from Prometheus API every 3 seconds. Args: info (dict): Contains run key attribute that controls thread. If `info['run'] == False` then thread will stop alert_list (list): List to be populated with alerts """ prometheus = PrometheusAPI() logger.info('Logging of all prometheus alerts started') while info.get('run'): alerts_response = prometheus.get( 'alerts', payload={ 'silenced': False, 'inhibited': False } ) msg = f"Request {alerts_response.request.url} failed" assert alerts_response.ok, msg for alert in alerts_response.json().get('data').get('alerts'): if alert not in alert_list: logger.info(f"Adding {alert} to alert list") alert_list.append(alert) time.sleep(3) logger.info('Logging of all prometheus alerts stopped')
def collect_prometheus_metrics( metrics, dir_name, start, stop, step=1.0, ): """ Collects metrics from Prometheus and saves them in file in json format. Metrics can be found in OCP Console in Monitoring -> Metrics. Args: metrics (list): list of metrics to get from Prometheus (E.g. ceph_cluster_total_used_bytes, cluster:cpu_usage_cores:sum, cluster:memory_usage_bytes:sum) dir_name (str): directory name to store metrics. Metrics will be stored in dir_name suffix with _ocs_metrics. start (str): start timestamp of required datapoints stop (str): stop timestamp of required datapoints step (float): step of required datapoints """ api = PrometheusAPI() log_dir_path = os.path.join( os.path.expanduser(ocsci_config.RUN['log_dir']), f"failed_testcase_ocs_logs_{ocsci_config.RUN['run_id']}", f"{dir_name}_ocs_metrics" ) if not os.path.exists(log_dir_path): log.info(f'Creating directory {log_dir_path}') os.makedirs(log_dir_path) for metric in metrics: datapoints = api.get( 'query_range', { 'query': metric, 'start': start, 'end': stop, 'step': step } ) file_name = os.path.join(log_dir_path, f'{metric}.json') log.info(f'Saving {metric} data into {file_name}') with open(file_name, 'w') as outfile: json.dump(datapoints.json(), outfile)