def stats(): try: stats = pyslurm.statistics() s = stats.get() #display(s) print(s) except ValueError as e: print("Error - {0}".format(e.args[0]))
if stats_dict: req_time=stats_dict['req_time'] for key, value in stats_dict.iteritems(): if key in ['rpc_user_stats', 'rpc_type_stats']: label = 'rpc_user_id' if key == 'rpc_type_stats': label = 'rpc_type_id' for rpc_key, rpc_val in value.iteritems(): if label == 'rpc_user_id': rpc_key = pwd.getpwuid(rpc_key)[0] for rpc_val_key, rpc_value in rpc_val.iteritems(): print("{0}.slurmctld.stats.{1}.{2}.{3} {4} {5}".format(prefix, key, rpc_key, rpc_val_key, rpc_value, req_time)) else: print("{0}.slurmctld.stats.{1} {2} {3}".format(prefix, key, value, req_time)) if __name__ == "__main__": import pwd import time import pyslurm try: stats = pyslurm.statistics() s = stats.get() display(s) except ValueError as e: print("Error - {0}".format(e.args[0]))
def collect(self): try: sdiag = pyslurm.statistics().get() except: return else: # Slurmctld Stats self.publish('server_thread_count',sdiag.get("server_thread_count")) self.publish('agent_queue_size',sdiag.get("agent_queue_size")) # Jobs Stats self.publish('jobs_submitted',sdiag.get("jobs_submitted")) self.publish('jobs_started',sdiag.get("jobs_started")) self.publish('jobs_completed',sdiag.get("jobs_completed")) self.publish('jobs_canceled',sdiag.get("jobs_canceled")) self.publish('jobs_failed',sdiag.get("jobs_failed")) # Main Scheduler Stats self.publish('main_last_cycle',sdiag.get("schedule_cycle_last")) self.publish('main_max_cycle',sdiag.get("schedule_cycle_max")) self.publish('main_total_cycles',sdiag.get("schedule_cycle_counter")) if sdiag.get("schedule_cycle_counter") > 0: self.publish('main_mean_cycle', sdiag.get("schedule_cycle_sum") / sdiag.get("schedule_cycle_counter") ) self.publish('main_mean_depth_cycle', ( sdiag.get("schedule_cycle_depth") / sdiag.get("schedule_cycle_counter") )) if (sdiag.get("req_time") - sdiag.get("req_time_start")) > 60: self.publish('main_cycles_per_minute', ( sdiag.get("schedule_cycle_counter") / ((sdiag.get("req_time") - sdiag.get("req_time_start")) / 60) )) self.publish('main_last_queue_length',sdiag.get("schedule_queue_len")) # Backfilling stats self.publish('bf_total_jobs_since_slurm_start',sdiag.get("bf_backfilled_jobs")) self.publish('bf_total_jobs_since_cycle_start',sdiag.get("bf_last_backfilled_jobs")) self.publish('bf_total_cycles',sdiag.get("bf_cycle_counter")) self.publish('bf_last_cycle',sdiag.get("bf_cycle_last")) self.publish('bf_max_cycle',sdiag.get("bf_cycle_max")) self.publish('bf_queue_length',sdiag.get("bf_queue_len")) if sdiag.get("bf_cycle_counter") > 0: self.publish('bf_mean_cycle', ( sdiag.get("bf_cycle_sum") / sdiag.get("bf_cycle_counter") )) self.publish('bf_depth_mean', ( sdiag.get("bf_depth_sum") / sdiag.get("bf_cycle_counter") )) self.publish('bf_depth_mean_try', ( sdiag.get("bf_depth_try_sum") / sdiag.get("bf_cycle_counter") )) self.publish('bf_queue_length_mean', ( sdiag.get("bf_queue_len_sum") / sdiag.get("bf_cycle_counter") )) self.publish('bf_last_depth_cycle',sdiag.get("bf_last_depth")) self.publish('bf_last_depth_cycle_try',sdiag.get("bf_last_depth_try"))
def test_get_statistics(): """Statistics: Test get_statistics() return type""" test_statistics = pyslurm.statistics().get() assert_true(isinstance(test_statistics, dict))
def get(self): s = pyslurm.statistics() data = s.get() return data
def get_stats(debug=False): stats = {} time_before = time.time() try: sdiag = pyslurm.statistics().get() except: return None time_after = time.time() # Plugin Stats stats["stats_get_time"] = time_after - time_before # Slurmctld Stats stats["server_thread_count"] = sdiag.get("server_thread_count") stats["agent_queue_size"] = sdiag.get("agent_queue_size") stats["dbd_agent_queue_size"] = sdiag.get("dbd_agent_queue_size") # Jobs Stats stats["jobs_submitted"] = sdiag.get("jobs_submitted") stats["jobs_started"] = sdiag.get("jobs_started") stats["jobs_completed"] = sdiag.get("jobs_completed") stats["jobs_canceled"] = sdiag.get("jobs_canceled") stats["jobs_failed"] = sdiag.get("jobs_failed") # Main Scheduler Stats stats["main_last_cycle"] = sdiag.get("schedule_cycle_last") stats["main_max_cycle"] = sdiag.get("schedule_cycle_max") stats["main_total_cycles"] = sdiag.get("schedule_cycle_counter") if sdiag.get("schedule_cycle_counter") > 0: stats["main_mean_cycle"] = \ sdiag.get("schedule_cycle_sum") / \ sdiag.get("schedule_cycle_counter") stats["main_mean_depth_cycle"] = \ sdiag.get("schedule_cycle_depth") / \ sdiag.get("schedule_cycle_counter") if (sdiag.get("req_time") - sdiag.get("req_time_start")) > 60: stats["main_cycles_per_minute"] = \ sdiag.get("schedule_cycle_counter") / \ ((sdiag.get("req_time") - sdiag.get("req_time_start")) / 60) stats["main_last_queue_length"] = sdiag.get("schedule_queue_len") # Backfilling stats stats["bf_total_jobs_since_slurm_start"] = \ sdiag.get("bf_backfilled_jobs") stats["bf_total_jobs_since_cycle_start"] = \ sdiag.get("bf_last_backfilled_jobs") stats["bf_total_cycles"] = sdiag.get("bf_cycle_counter") stats["bf_last_cycle"] = sdiag.get("bf_cycle_last") stats["bf_max_cycle"] = sdiag.get("bf_cycle_max") stats["bf_queue_length"] = sdiag.get("bf_queue_len") if sdiag.get("bf_cycle_counter") > 0: stats["bf_mean_cycle"] = (sdiag.get("bf_cycle_sum") / sdiag.get("bf_cycle_counter")) stats["bf_depth_mean"] = (sdiag.get("bf_depth_sum") / sdiag.get("bf_cycle_counter")) stats["bf_depth_mean_try"] = (sdiag.get("bf_depth_try_sum") / sdiag.get("bf_cycle_counter")) stats["bf_queue_length_mean"] = (sdiag.get("bf_queue_len_sum") / sdiag.get("bf_cycle_counter")) stats["bf_last_depth_cycle"] = sdiag.get("bf_last_depth") stats["bf_last_depth_cycle_try"] = sdiag.get("bf_last_depth_try") # RPC users stats rpc_user_stats = sdiag.get('rpc_user_stats') if rpc_user_stats is None: rpc_user_stats = {} for user, u_metrics in rpc_user_stats.items(): metric_prefixes = ['rpc_user_' + user + '_'] if user not in ['root', 'slurm']: metric_prefixes += ['rpc_user_users_'] for metric_prefix in metric_prefixes: if metric_prefix + 'count' not in stats: stats[metric_prefix + 'count'] = 0 stats[metric_prefix + 'total_time'] = 0 stats[metric_prefix + 'count'] += u_metrics[u'count'] stats[metric_prefix + 'total_time'] += u_metrics[u'total_time'] stats[metric_prefix + 'ave_time'] = \ stats[metric_prefix + 'total_time'] / \ stats[metric_prefix + 'count'] # RPC types stats rpc_type_stats = sdiag.get('rpc_type_stats') if rpc_type_stats is None: rpc_type_stats = {} for rpc_type, rpc_metrics in rpc_type_stats.items(): for m_name, m_value in rpc_metrics.items(): if m_name != 'id': metric = 'rpc_type_' + str(rpc_type) + '-' + m_name stats[metric] = m_value # pending RPC by type and sum global pending RPC counter metric_global = 'rpc_pending_global' rpc_queue_stats = sdiag.get('rpc_queue_stats') if rpc_queue_stats is None: rpc_queue_stats = {} stats[metric_global] = 0 for rpc_type, rpc_metrics in rpc_queue_stats.items(): stats[metric_global] += rpc_metrics[u'count'] stats['rpc_pending_' + rpc_type] = rpc_metrics[u'count'] return stats
def collect(self): try: sdiag = pyslurm.statistics().get() except: return else: # Slurmctld Stats self.publish('server_thread_count', sdiag.get("server_thread_count")) self.publish('agent_queue_size', sdiag.get("agent_queue_size")) # Jobs Stats self.publish('jobs_submitted', sdiag.get("jobs_submitted")) self.publish('jobs_started', sdiag.get("jobs_started")) self.publish('jobs_completed', sdiag.get("jobs_completed")) self.publish('jobs_canceled', sdiag.get("jobs_canceled")) self.publish('jobs_failed', sdiag.get("jobs_failed")) # Main Scheduler Stats self.publish('main_last_cycle', sdiag.get("schedule_cycle_last")) self.publish('main_max_cycle', sdiag.get("schedule_cycle_max")) self.publish('main_total_cycles', sdiag.get("schedule_cycle_counter")) if sdiag.get("schedule_cycle_counter") > 0: self.publish( 'main_mean_cycle', sdiag.get("schedule_cycle_sum") / sdiag.get("schedule_cycle_counter")) self.publish('main_mean_depth_cycle', (sdiag.get("schedule_cycle_depth") / sdiag.get("schedule_cycle_counter"))) if (sdiag.get("req_time") - sdiag.get("req_time_start")) > 60: self.publish( 'main_cycles_per_minute', (sdiag.get("schedule_cycle_counter") / ((sdiag.get("req_time") - sdiag.get("req_time_start")) / 60))) self.publish('main_last_queue_length', sdiag.get("schedule_queue_len")) # Backfilling stats self.publish('bf_total_jobs_since_slurm_start', sdiag.get("bf_backfilled_jobs")) self.publish('bf_total_jobs_since_cycle_start', sdiag.get("bf_last_backfilled_jobs")) self.publish('bf_total_cycles', sdiag.get("bf_cycle_counter")) self.publish('bf_last_cycle', sdiag.get("bf_cycle_last")) self.publish('bf_max_cycle', sdiag.get("bf_cycle_max")) self.publish('bf_queue_length', sdiag.get("bf_queue_len")) if sdiag.get("bf_cycle_counter") > 0: self.publish('bf_mean_cycle', (sdiag.get("bf_cycle_sum") / sdiag.get("bf_cycle_counter"))) self.publish('bf_depth_mean', (sdiag.get("bf_depth_sum") / sdiag.get("bf_cycle_counter"))) self.publish('bf_depth_mean_try', (sdiag.get("bf_depth_try_sum") / sdiag.get("bf_cycle_counter"))) self.publish('bf_queue_length_mean', (sdiag.get("bf_queue_len_sum") / sdiag.get("bf_cycle_counter"))) self.publish('bf_last_depth_cycle', sdiag.get("bf_last_depth")) self.publish('bf_last_depth_cycle_try', sdiag.get("bf_last_depth_try"))