def get_xml_FactoryStats_total(self, indent_tab=xmlFormat.DEFAULT_TAB, leading_tab="", total=None): return xmlFormat.class2string(total, inst_name="total", indent_tab=indent_tab, leading_tab=leading_tab)
def get_xml(self): old_default_ignore_nones=xmlFormat.DEFAULT_IGNORE_NONES old_default_lists_params=xmlFormat.DEFAULT_LISTS_PARAMS old_default_dicts_params=xmlFormat.DEFAULT_DICTS_PARAMS xmlFormat.DEFAULT_IGNORE_NONES=True # these are used internally, do not need to be ordered xml_format=self.get_xml_format() xmlFormat.DEFAULT_LISTS_PARAMS=xml_format['lists_params'] xmlFormat.DEFAULT_DICTS_PARAMS=xml_format['dicts_params'] # hack needed to make xmlFormat to properly do the formating, using override_dictionary_type dict_override=type(xmlParse.OrderedDict()) out=xmlFormat.class2string(self.data, self.get_top_element(), override_dictionary_type=dict_override) xmlFormat.DEFAULT_IGNORE_NONES=old_default_ignore_nones xmlFormat.DEFAULT_LISTS_PARAMS=old_default_lists_params xmlFormat.DEFAULT_DICTS_PARAMS=old_default_dicts_params return out
def aggregateStatus(): global monitorAggregatorConfig type_strings = { 'Jobs': 'Jobs', 'Glideins': 'Glidein', 'MatchedJobs': 'MatchJob', 'MatchedGlideins': 'MatchGlidein', 'MatchedCores': 'MatchCore', 'Requested': 'Req' } global_total = { 'Jobs': None, 'Glideins': None, 'MatchedJobs': None, 'Requested': None, 'MatchedGlideins': None, 'MatchedCores': None, } status={'groups': {}, 'total': global_total} global_fact_totals = {} for fos in ('factories', 'states'): global_fact_totals[fos] = {} nr_groups = 0 for group in monitorAggregatorConfig.groups: # load group status file status_fname = os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir, 'group_'+group), monitorAggregatorConfig.status_relname) try: group_data=xmlParse.xmlfile2dict(status_fname) except xmlParse.CorruptXML as e: logSupport.log.error("Corrupt XML in %s; deleting (it will be recreated)." % (status_fname)) os.unlink(status_fname) continue except IOError: continue # file not found, ignore # update group status['groups'][group] = {} for fos in ('factories', 'states'): try: status['groups'][group][fos] = group_data[fos] except KeyError as e: # first time after upgrade factories may not be defined status['groups'][group][fos] = {} this_group=status['groups'][group] for fos in ('factories', 'states'): for fact in this_group[fos].keys(): this_fact = this_group[fos][fact] if not fact in global_fact_totals[fos].keys(): # first iteration through, set fact totals equal to the first group's fact totals global_fact_totals[fos][fact]={} for attribute in type_strings.keys(): global_fact_totals[fos][fact][attribute] = {} if attribute in this_fact.keys(): for type_attribute in this_fact[attribute].keys(): this_type_attribute=this_fact[attribute][type_attribute] try: global_fact_totals[fos][fact][attribute][type_attribute] = int(this_type_attribute) except: pass else: # next iterations, factory already present in global fact totals, add the new factory values to the previous ones for attribute in type_strings.keys(): if attribute in this_fact.keys(): for type_attribute in this_fact[attribute].keys(): this_type_attribute = this_fact[attribute][type_attribute] if isinstance(this_type_attribute, type(global_fact_totals[fos])): # dict, do nothing pass else: if attribute in global_fact_totals[fos][fact].keys() and type_attribute in global_fact_totals[fos][fact][attribute].keys(): global_fact_totals[fos][fact][attribute][type_attribute] += int(this_type_attribute) else: global_fact_totals[fos][fact][attribute][type_attribute] = int(this_type_attribute) #nr_groups+=1 #status['groups'][group]={} if 'total' in group_data: nr_groups += 1 status['groups'][group]['total'] = group_data['total'] for w in global_total.keys(): tel = global_total[w] if w not in group_data['total']: continue #status['groups'][group][w]=group_data[w] el = group_data['total'][w] if tel is None: # new one, just copy over global_total[w] = {} tel = global_total[w] for a in el.keys(): tel[a] = int(el[a]) #coming from XML, everything is a string else: # successive, sum for a in el.keys(): if a in tel: tel[a] += int(el[a]) # if any attribute from prev. factories are not in the current one, remove from total for a in tel.keys(): if a not in el: del tel[a] for w in global_total.keys(): if global_total[w] is None: del global_total[w] # remove group if not defined # Write xml files updated=time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<VOFrontendStats>\n'+ xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status["groups"], dict_name="groups", el_name="group", subtypes_params={"class":{"dicts_params":{"factories":{"el_name":"factory", "subtypes_params":{"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}}, "states":{"el_name":"state", "subtypes_params":{"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}} }}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"], inst_name="total", leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(global_fact_totals['factories'], dict_name="factories", el_name="factory", subtypes_params={"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(global_fact_totals['states'], dict_name="states", el_name="state", subtypes_params={"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</VOFrontendStats>\n") glideinFrontendMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.status_relname, xml_str) # Write rrds glideinFrontendMonitoring.monitoringConfig.establish_dir("total") write_one_rrd("total/Status_Attributes", updated, global_total, 0) for fact in global_fact_totals['factories'].keys(): fe_dir="total/factory_%s"%glideinFrontendMonitoring.sanitize(fact) glideinFrontendMonitoring.monitoringConfig.establish_dir(fe_dir) write_one_rrd("%s/Status_Attributes"%fe_dir, updated, global_fact_totals['factories'][fact], 1) for fact in global_fact_totals['states'].keys(): fe_dir="total/state_%s"%glideinFrontendMonitoring.sanitize(fact) glideinFrontendMonitoring.monitoringConfig.establish_dir(fe_dir) write_one_rrd("%s/Status_Attributes"%fe_dir, updated, global_fact_totals['states'][fact], 1) return status
def write_aggregation(self, global_fact_totals, updated, global_total, status): xml_str = ( '<?xml version="1.0" encoding="ISO-8859-1"?>\n\n' + '<VOFrontendStats>\n' + xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.dict2string( status["groups"], dict_name="groups", el_name="group", subtypes_params={ "class": { "dicts_params": { "factories": { "el_name": "factory", "subtypes_params": { "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } } }, "states": { "el_name": "state", "subtypes_params": { "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.class2string(status["total"], inst_name="total", leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.dict2string(global_fact_totals['factories'], dict_name="factories", el_name="factory", subtypes_params={ "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.dict2string(global_fact_totals['states'], dict_name="states", el_name="state", subtypes_params={ "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + "</VOFrontendStats>\n") Monitoring_Output.write_file( Monitoring_Output.global_config_aggr["status_relname"], xml_str)
def get_xml_total(self,indent_tab=xmlFormat.DEFAULT_TAB,leading_tab=""): total=self.get_total() return xmlFormat.class2string(total, inst_name="total", indent_tab=indent_tab,leading_tab=leading_tab)
def aggregateLogSummary(): """ Create an aggregate of log summary files, write it in an aggregate log summary file and in the end return the values """ global monitorAggregatorConfig # initialize global counters global_total={'Current':{},'Entered':{},'Exited':{},'CompletedCounts':{'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}}} for s in ('Wait','Idle','Running','Held'): for k in ['Current','Entered','Exited']: global_total[k][s]=0 for s in ('Completed','Removed'): for k in ['Entered']: global_total[k][s]=0 for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): el={} for t in glideFactoryMonitoring.getAllMillRanges(): el[t]=0 global_total['CompletedCounts'][w][k]=el el={} for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['Lasted']=el el={} for t in glideFactoryMonitoring.getAllJobRanges(): el[t]=0 global_total['CompletedCounts']['JobsNr']=el el={} # KEL - why is the same el used twice (see above) for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['JobsDuration']=el global_total['CompletedCounts']['Sum']={'Glideins':0, 'Lasted':0, 'FailedNr':0, 'JobsNr':0, 'JobsLasted':0, 'JobsGoodput':0, 'JobsTerminated':0, 'CondorLasted':0} fe_total=copy.deepcopy(global_total) # same as above but for frontend totals # status={'entries':{},'total':global_total} status_fe={'frontends':{}} #analogous to above but for frontend totals nr_entries=0 nr_feentries={} #dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry log summary file status_fname=os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir,'entry_'+entry), monitorAggregatorConfig.logsummary_relname) try: entry_data=xmlParse.xmlfile2dict(status_fname,always_singular_list=['Fraction','TimeRange','Range']) except IOError: continue # file not found, ignore # update entry out_data={} for frontend in entry_data['frontends'].keys(): fe_el=entry_data['frontends'][frontend] out_fe_el={} for k in ['Current','Entered','Exited']: out_fe_el[k]={} for s in fe_el[k].keys(): out_fe_el[k][s]=int(fe_el[k][s]) out_fe_el['CompletedCounts']={'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{},'Sum':{}} for tkey in fe_el['CompletedCounts']['Sum'].keys(): out_fe_el['CompletedCounts']['Sum'][tkey]=int(fe_el['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): out_fe_el['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): out_fe_el['CompletedCounts'][w][k][t]=int(fe_el['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['Lasted'][t]=int(fe_el['CompletedCounts']['Lasted'][t]['val']) out_fe_el['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['JobsDuration'][t]=int(fe_el['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): out_fe_el['CompletedCounts']['JobsNr'][t]=int(fe_el['CompletedCounts']['JobsNr'][t]['val']) out_data[frontend]=out_fe_el status['entries'][entry]={'frontends':out_data} # update total if entry_data.has_key('total'): nr_entries+=1 local_total={} for k in ['Current','Entered','Exited']: local_total[k]={} for s in global_total[k].keys(): local_total[k][s]=int(entry_data['total'][k][s]) global_total[k][s]+=int(entry_data['total'][k][s]) local_total['CompletedCounts']={'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}} for tkey in entry_data['total']['CompletedCounts']['Sum'].keys(): local_total['CompletedCounts']['Sum'][tkey]=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) global_total['CompletedCounts']['Sum'][tkey]+=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): local_total['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): local_total['CompletedCounts'][w][k][t]=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) global_total['CompletedCounts'][w][k][t]+=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['Lasted'][t]=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) global_total['CompletedCounts']['Lasted'][t]+=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) local_total['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['JobsDuration'][t]=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) global_total['CompletedCounts']['JobsDuration'][t]+=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): local_total['CompletedCounts']['JobsNr'][t]=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) global_total['CompletedCounts']['JobsNr'][t]+=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) status['entries'][entry]['total']=local_total # update frontends for fe in out_data: #compare each to the list of fe's accumulated so far if not (fe in status_fe['frontends']): status_fe['frontends'][fe]={} if not (fe in nr_feentries): nr_feentries[fe]=1 #already found one else: nr_feentries[fe]+=1 # sum them up sumDictInt(out_data[fe],status_fe['frontends'][fe]) # Write xml files # To do - Igor: Consider adding status_fe to the XML file updated=time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<glideFactoryLogSummary>\n'+ xmlFormat.time2xml(updated,"updated", indent_tab=xmlFormat.DEFAULT_TAB,leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status["entries"],dict_name="entries",el_name="entry", subtypes_params={"class":{"dicts_params":{"frontends":{"el_name":"frontend", "subtypes_params":{"class":{'subclass_params':{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}}}}, "subclass_params":{"total":{"subclass_params":{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}} } }, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"],inst_name="total",subclass_params={'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()},leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</glideFactoryLogSummary>\n") glideFactoryMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.logsummary_relname,xml_str) # Write rrds writeLogSummaryRRDs("total",status["total"]) # Frontend total rrds across all factories for fe in status_fe['frontends']: writeLogSummaryRRDs("total/%s"%("frontend_"+fe),status_fe['frontends'][fe]) return status
def aggregateStatus(in_downtime): """ Create an aggregate of status files, write it in an aggregate status file and in the end return the values @type in_downtime: boolean @param in_downtime: Entry downtime information @rtype: dict @return: Dictionary of status information """ global monitorAggregatorConfig avgEntries=('InfoAge',) global_total={'Status':None,'Requested':None,'ClientMonitor':None} status={'entries':{},'total':global_total} status_fe={'frontends':{}} #analogous to above but for frontend totals # initialize the RRD dictionary, so it gets created properly val_dict={} for tp in global_total.keys(): # type - status or requested if not (tp in status_attributes.keys()): continue tp_str=type_strings[tp] attributes_tp=status_attributes[tp] for a in attributes_tp: val_dict["%s%s"%(tp_str,a)]=None nr_entries=0 nr_feentries={} #dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry status file status_fname=os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir,'entry_'+entry), monitorAggregatorConfig.status_relname) try: entry_data=xmlParse.xmlfile2dict(status_fname) except IOError: continue # file not found, ignore # update entry status['entries'][entry]={'downtime':entry_data['downtime'], 'frontends':entry_data['frontends']} # update total if entry_data.has_key('total'): nr_entries+=1 status['entries'][entry]['total']=entry_data['total'] for w in global_total.keys(): tel=global_total[w] if not entry_data['total'].has_key(w): continue el=entry_data['total'][w] if tel is None: # new one, just copy over global_total[w]={} tel=global_total[w] for a in el.keys(): tel[a]=int(el[a]) #coming from XML, everything is a string else: # successive, sum for a in el.keys(): if tel.has_key(a): tel[a]+=int(el[a]) # if any attribute from prev. frontends are not in the current one, remove from total for a in tel.keys(): if not el.has_key(a): del tel[a] # update frontends if entry_data.has_key('frontends'): #loop on fe's in this entry for fe in entry_data['frontends'].keys(): #compare each to the list of fe's accumulated so far if not status_fe['frontends'].has_key(fe): status_fe['frontends'][fe]={} if not nr_feentries.has_key(fe): nr_feentries[fe]=1 #already found one else: nr_feentries[fe]+=1 for w in entry_data['frontends'][fe].keys(): if not status_fe['frontends'][fe].has_key(w): status_fe['frontends'][fe][w]={} tela=status_fe['frontends'][fe][w] ela=entry_data['frontends'][fe][w] for a in ela.keys(): #for the 'Downtime' field (only bool), do logical AND of all site downtimes # 'w' is frontend attribute name, ie 'ClientMonitor' or 'Downtime' # 'a' is sub-field, such as 'GlideIdle' or 'status' if w=='Downtime' and a=='status': ela_val=(ela[a]!='False') # Check if 'True' or 'False' but default to True if neither if tela.has_key(a): try: tela[a]=tela[a] and ela_val except: pass # just protect else: tela[a]=ela_val else: try: #if there already, sum if tela.has_key(a): tela[a]+=int(ela[a]) else: tela[a]=int(ela[a]) except: pass #not an int, not Downtime, so do nothing # if any attribute from prev. frontends are not in the current one, remove from total for a in tela.keys(): if not ela.has_key(a): del tela[a] for w in global_total.keys(): if global_total[w] is None: del global_total[w] # remove entry if not defined else: tel=global_total[w] for a in tel.keys(): if a in avgEntries: tel[a]=tel[a]/nr_entries # since all entries must have this attr to be here, just divide by nr of entries #do average for per-fe stat--'InfoAge' only for fe in status_fe['frontends'].keys(): for w in status_fe['frontends'][fe].keys(): tel=status_fe['frontends'][fe][w] for a in tel.keys(): if a in avgEntries and nr_feentries.has_key(fe): tel[a]=tel[a]/nr_feentries[fe] # divide per fe xml_downtime = xmlFormat.dict2string({}, dict_name = 'downtime', el_name = '', params = {'status':str(in_downtime)}, leading_tab = xmlFormat.DEFAULT_TAB) # Write xml files updated=time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<glideFactoryQStats>\n'+ xmlFormat.time2xml(updated,"updated", indent_tab=xmlFormat.DEFAULT_TAB,leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xml_downtime + "\n" + xmlFormat.dict2string(status["entries"],dict_name="entries",el_name="entry", subtypes_params={"class":{"dicts_params":{"frontends":{"el_name":"frontend", "subtypes_params":{"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"],inst_name="total",leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status_fe["frontends"],dict_name="frontends",el_name="frontend", subtypes_params={"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</glideFactoryQStats>\n") glideFactoryMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.status_relname,xml_str) # Write rrds glideFactoryMonitoring.monitoringConfig.establish_dir("total") # Total rrd across all frontends and factories for tp in global_total.keys(): # type - status or requested if not (tp in status_attributes.keys()): continue tp_str=type_strings[tp] attributes_tp=status_attributes[tp] tp_el=global_total[tp] for a in tp_el.keys(): if a in attributes_tp: a_el=int(tp_el[a]) val_dict["%s%s"%(tp_str,a)]=a_el glideFactoryMonitoring.monitoringConfig.write_rrd_multi("total/Status_Attributes", "GAUGE",updated,val_dict) # Frontend total rrds across all factories for fe in status_fe['frontends'].keys(): glideFactoryMonitoring.monitoringConfig.establish_dir("total/%s"%("frontend_"+fe)) for tp in status_fe['frontends'][fe].keys(): # type - status or requested if not (tp in type_strings.keys()): continue tp_str=type_strings[tp] attributes_tp=status_attributes[tp] tp_el=status_fe['frontends'][fe][tp] for a in tp_el.keys(): if a in attributes_tp: a_el=int(tp_el[a]) val_dict["%s%s"%(tp_str,a)]=a_el glideFactoryMonitoring.monitoringConfig.write_rrd_multi("total/%s/Status_Attributes"%("frontend_"+fe), "GAUGE",updated,val_dict) return status
def aggregateStatus(in_downtime): """ Create an aggregate of status files, write it in an aggregate status file and in the end return the values @type in_downtime: boolean @param in_downtime: Entry downtime information @rtype: dict @return: Dictionary of status information """ global monitorAggregatorConfig avgEntries = ('InfoAge',) global_total = {'Status': None, 'Requested': None, 'ClientMonitor': None} status = {'entries': {}, 'total': global_total} status_fe = {'frontends': {}} # analogous to above but for frontend totals completed_data_tot = {'entries': {}} # initialize the RRD dictionary, so it gets created properly val_dict = {} for tp in global_total.keys(): # type - status or requested if not (tp in status_attributes.keys()): continue tp_str = type_strings[tp] attributes_tp = status_attributes[tp] for a in attributes_tp: val_dict["%s%s" % (tp_str, a)] = None nr_entries = 0 nr_feentries = {} # dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry status file status_fname = os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_'+entry), monitorAggregatorConfig.status_relname) # load entry completed data file completed_data_fname = os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_'+entry), monitorAggregatorConfig.completed_data_relname) completed_data_fp = None try: # entry_data is a regular dictionary of nested dictionaries/lists returned form the XML parsed entry_data = xmlParse.xmlfile2dict(status_fname) completed_data_fp = open(completed_data_fname) completed_data = json.load(completed_data_fp) except IOError: continue # file not found, ignore finally: if completed_data_fp: completed_data_fp.close() # update entry status['entries'][entry] = {'downtime': entry_data['downtime'], 'frontends': entry_data['frontends']} # update completed data completed_data_tot['entries'][entry] = completed_data['stats'] # to log when total dictionary is modified (in update total/frontend) tmp_list_removed = [] # update total if 'total' in entry_data: nr_entries += 1 status['entries'][entry]['total'] = entry_data['total'] for w in global_total: tel = global_total[w] if w not in entry_data['total']: continue el = entry_data['total'][w] if tel is None: # new one, just copy over global_total[w] = {} tel = global_total[w] for a in el: tel[a] = int(el[a]) # coming from XML, everything is a string else: # successive, sum for a in el: if a in tel: tel[a] += int(el[a]) # if any attribute from prev. frontends is not in the current one, remove from total for a in list(tel): # making a copy of the keys because the dict is being modified if a not in el: del tel[a] tmp_list_removed.append(a) if tmp_list_removed: logSupport.log.debug("Elements removed from total status (%s: %s) because of %s: %s" % (w, len(tel), entry, tmp_list_removed)) tmp_list_removed = [] # update frontends if 'frontends' in entry_data: # loop on fe's in this entry for fe in entry_data['frontends']: # compare each to the list of fe's accumulated so far if fe not in status_fe['frontends']: status_fe['frontends'][fe] = {} fe_first = True else: fe_first = False # number of entries with this frontend if fe not in nr_feentries: nr_feentries[fe] = 1 # first occurrence of frontend else: nr_feentries[fe] += 1 # already found one for w in entry_data['frontends'][fe]: # w is the entry name of the entry using the frontend if w not in status_fe['frontends'][fe]: status_fe['frontends'][fe][w] = {} tela = status_fe['frontends'][fe][w] ela = entry_data['frontends'][fe][w] for a in ela: # for the 'Downtime' field (only bool), do logical AND of all site downtimes # 'w' is frontend attribute name, ie 'ClientMonitor' or 'Downtime' # 'a' is sub-field, such as 'GlideIdle' or 'status' if w == 'Downtime' and a == 'status': ela_val = (ela[a] != 'False') # Check if 'True' or 'False' but default to True if neither try: tela[a] = tela[a] and ela_val except KeyError: tela[a] = ela_val except: pass # just protect else: # All other fields could be numbers or something else try: # if there already, sum if a in tela: tela[a] += int(ela[a]) else: if fe_first: # to avoid adding back attributes that were not in other frontends tela[a] = int(ela[a]) except: pass # not an int, not Downtime, so do nothing # if any attribute from prev. frontends is not in the current one, remove from total if not fe_first and w != 'Downtime': for a in list(tela): # making a copy of the keys because the dict is being modified if a not in ela: del tela[a] tmp_list_removed.append(a) if tmp_list_removed: logSupport.log.debug("Elements removed from Frontend %s total status (%s: %s) because of %s: %s" % (fe, w, len(tela), entry, tmp_list_removed)) tmp_list_removed = [] for w in list(global_total): # making a copy of the keys because the dict is being modified if global_total[w] is None: del global_total[w] # remove entry if not defined else: tel = global_total[w] for a in tel: if a in avgEntries: # since all entries must have this attr to be here, just divide by nr of entries tel[a] = tel[a]/nr_entries # do average for per-fe stat--'InfoAge' only for fe in status_fe['frontends'].keys(): for w in status_fe['frontends'][fe].keys(): tel = status_fe['frontends'][fe][w] for a in tel.keys(): if a in avgEntries and fe in nr_feentries: tel[a] = tel[a]/nr_feentries[fe] # divide per fe xml_downtime = xmlFormat.dict2string({}, dict_name='downtime', el_name='', params={'status': str(in_downtime)}, leading_tab=xmlFormat.DEFAULT_TAB) # Write xml files updated = time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<glideFactoryQStats>\n'+ xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xml_downtime + "\n" + xmlFormat.dict2string(status["entries"], dict_name="entries", el_name="entry", subtypes_params={"class":{"dicts_params":{"frontends":{"el_name":"frontend", "subtypes_params":{"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"], inst_name="total", leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status_fe["frontends"], dict_name="frontends", el_name="frontend", subtypes_params={"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</glideFactoryQStats>\n") glideFactoryMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.status_relname, xml_str) # write json glideFactoryMonitoring.monitoringConfig.write_completed_json(monitorAggregatorConfig.completed_data_relname.split('.')[0],updated,completed_data_tot) # Write rrds glideFactoryMonitoring.monitoringConfig.establish_dir("total") # Total rrd across all frontends and factories for tp in global_total: # type - status or requested if not (tp in status_attributes.keys()): continue tp_str = type_strings[tp] attributes_tp = status_attributes[tp] tp_el = global_total[tp] for a in tp_el.keys(): if a in attributes_tp: a_el = int(tp_el[a]) val_dict["%s%s" % (tp_str, a)] = a_el glideFactoryMonitoring.monitoringConfig.write_rrd_multi("total/Status_Attributes", "GAUGE", updated, val_dict) # Frontend total rrds across all factories for fe in status_fe['frontends'].keys(): glideFactoryMonitoring.monitoringConfig.establish_dir("total/%s" % ("frontend_"+fe)) for tp in status_fe['frontends'][fe].keys(): # type - status or requested if not (tp in type_strings.keys()): continue tp_str = type_strings[tp] attributes_tp = status_attributes[tp] tp_el = status_fe['frontends'][fe][tp] for a in tp_el.keys(): if a in attributes_tp: a_el = int(tp_el[a]) val_dict["%s%s" % (tp_str, a)] = a_el glideFactoryMonitoring.monitoringConfig.write_rrd_multi("total/%s/Status_Attributes" % ("frontend_"+fe), "GAUGE", updated, val_dict) return status
def aggregateLogSummary(): """ Create an aggregate of log summary files, write it in an aggregate log summary file and in the end return the values """ global monitorAggregatorConfig # initialize global counters global_total = { 'Current': {}, 'Entered': {}, 'Exited': {}, 'CompletedCounts': { 'Sum': {}, 'Waste': {}, 'WasteTime': {}, 'Lasted': {}, 'JobsNr': {}, 'JobsDuration': {} } } for s in ('Wait', 'Idle', 'Running', 'Held'): for k in ['Current', 'Entered', 'Exited']: global_total[k][s] = 0 for s in ('Completed', 'Removed'): for k in ['Entered']: global_total[k][s] = 0 for k in glideFactoryMonitoring.getAllJobTypes(): for w in ('Waste', 'WasteTime'): el = {} for t in glideFactoryMonitoring.getAllMillRanges(): el[t] = 0 global_total['CompletedCounts'][w][k] = el el = {} for t in glideFactoryMonitoring.getAllTimeRanges(): el[t] = 0 global_total['CompletedCounts']['Lasted'] = el el = {} for t in glideFactoryMonitoring.getAllJobRanges(): el[t] = 0 global_total['CompletedCounts']['JobsNr'] = el el = {} # KEL - why is the same el used twice (see above) for t in glideFactoryMonitoring.getAllTimeRanges(): el[t] = 0 global_total['CompletedCounts']['JobsDuration'] = el global_total['CompletedCounts']['Sum'] = { 'Glideins': 0, 'Lasted': 0, 'FailedNr': 0, 'JobsNr': 0, 'JobsLasted': 0, 'JobsGoodput': 0, 'JobsTerminated': 0, 'CondorLasted': 0 } fe_total = copy.deepcopy( global_total) # same as above but for frontend totals # status = {'entries': {}, 'total': global_total} status_fe = {'frontends': {}} # analogous to above but for frontend totals nr_entries = 0 nr_feentries = {} # dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry log summary file status_fname = os.path.join( os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_' + entry), monitorAggregatorConfig.logsummary_relname) try: entry_data = xmlParse.xmlfile2dict( status_fname, always_singular_list=['Fraction', 'TimeRange', 'Range']) except IOError: continue # file not found, ignore # update entry out_data = {} for frontend in entry_data['frontends'].keys(): fe_el = entry_data['frontends'][frontend] out_fe_el = {} for k in ['Current', 'Entered', 'Exited']: out_fe_el[k] = {} for s in fe_el[k].keys(): out_fe_el[k][s] = int(fe_el[k][s]) out_fe_el['CompletedCounts'] = { 'Waste': {}, 'WasteTime': {}, 'Lasted': {}, 'JobsNr': {}, 'JobsDuration': {}, 'Sum': {} } for tkey in fe_el['CompletedCounts']['Sum'].keys(): out_fe_el['CompletedCounts']['Sum'][tkey] = int( fe_el['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste", "WasteTime"): out_fe_el['CompletedCounts'][w][k] = {} for t in glideFactoryMonitoring.getAllMillRanges(): out_fe_el['CompletedCounts'][w][k][t] = int( fe_el['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['Lasted'][t] = int( fe_el['CompletedCounts']['Lasted'][t]['val']) out_fe_el['CompletedCounts']['JobsDuration'] = {} for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['JobsDuration'][t] = int( fe_el['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): out_fe_el['CompletedCounts']['JobsNr'][t] = int( fe_el['CompletedCounts']['JobsNr'][t]['val']) out_data[frontend] = out_fe_el status['entries'][entry] = {'frontends': out_data} # update total if 'total' in entry_data: nr_entries += 1 local_total = {} for k in ['Current', 'Entered', 'Exited']: local_total[k] = {} for s in global_total[k].keys(): local_total[k][s] = int(entry_data['total'][k][s]) global_total[k][s] += int(entry_data['total'][k][s]) local_total['CompletedCounts'] = { 'Sum': {}, 'Waste': {}, 'WasteTime': {}, 'Lasted': {}, 'JobsNr': {}, 'JobsDuration': {} } for tkey in entry_data['total']['CompletedCounts']['Sum'].keys(): local_total['CompletedCounts']['Sum'][tkey] = int( entry_data['total']['CompletedCounts']['Sum'][tkey]) global_total['CompletedCounts']['Sum'][tkey] += int( entry_data['total']['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ('Waste', 'WasteTime'): local_total['CompletedCounts'][w][k] = {} for t in glideFactoryMonitoring.getAllMillRanges(): local_total['CompletedCounts'][w][k][t] = int( entry_data['total']['CompletedCounts'][w][k][t] ['val']) global_total['CompletedCounts'][w][k][t] += int( entry_data['total']['CompletedCounts'][w][k][t] ['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['Lasted'][t] = int( entry_data['total']['CompletedCounts']['Lasted'][t]['val']) global_total['CompletedCounts']['Lasted'][t] += int( entry_data['total']['CompletedCounts']['Lasted'][t]['val']) local_total['CompletedCounts']['JobsDuration'] = {} for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['JobsDuration'][t] = int( entry_data['total']['CompletedCounts']['JobsDuration'][t] ['val']) global_total['CompletedCounts']['JobsDuration'][t] += int( entry_data['total']['CompletedCounts']['JobsDuration'][t] ['val']) for t in glideFactoryMonitoring.getAllJobRanges(): local_total['CompletedCounts']['JobsNr'][t] = int( entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) global_total['CompletedCounts']['JobsNr'][t] += int( entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) status['entries'][entry]['total'] = local_total # update frontends for fe in out_data: # compare each to the list of fe's accumulated so far if not (fe in status_fe['frontends']): status_fe['frontends'][fe] = {} if not (fe in nr_feentries): nr_feentries[fe] = 1 # already found one else: nr_feentries[fe] += 1 # sum them up sumDictInt(out_data[fe], status_fe['frontends'][fe]) # Write xml files # To do - Igor: Consider adding status_fe to the XML file updated = time.time() xml_str = ('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n' + '<glideFactoryLogSummary>\n' + xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=xmlFormat.DEFAULT_TAB) + '\n' + xmlFormat.dict2string( status["entries"], dict_name="entries", el_name="entry", subtypes_params={ "class": { "dicts_params": { "frontends": { "el_name": "frontend", "subtypes_params": { "class": { 'subclass_params': { 'CompletedCounts': glideFactoryMonitoring. get_completed_stats_xml_desc() } } } } }, "subclass_params": { "total": { "subclass_params": { 'CompletedCounts': glideFactoryMonitoring. get_completed_stats_xml_desc() } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + '\n' + xmlFormat.class2string( status["total"], inst_name="total", subclass_params={ 'CompletedCounts': glideFactoryMonitoring.get_completed_stats_xml_desc() }, leading_tab=xmlFormat.DEFAULT_TAB) + '\n' + "</glideFactoryLogSummary>\n") glideFactoryMonitoring.monitoringConfig.write_file( monitorAggregatorConfig.logsummary_relname, xml_str) # Write rrds writeLogSummaryRRDs("total", status["total"]) # Frontend total rrds across all factories for fe in status_fe['frontends']: writeLogSummaryRRDs("total/%s" % ("frontend_" + fe), status_fe['frontends'][fe]) return status
def aggregateStatus(in_downtime): """ Create an aggregate of status files, write it in an aggregate status file and in the end return the values @type in_downtime: boolean @param in_downtime: Entry downtime information @rtype: dict @return: Dictionary of status information """ global monitorAggregatorConfig avgEntries = ('InfoAge', ) global_total = {'Status': None, 'Requested': None, 'ClientMonitor': None} status = {'entries': {}, 'total': global_total} status_fe = {'frontends': {}} # analogous to above but for frontend totals completed_data_tot = {'entries': {}} # initialize the RRD dictionary, so it gets created properly val_dict = {} for tp in global_total.keys(): # type - status or requested if not (tp in status_attributes.keys()): continue tp_str = type_strings[tp] attributes_tp = status_attributes[tp] for a in attributes_tp: val_dict["%s%s" % (tp_str, a)] = None nr_entries = 0 nr_feentries = {} # dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry status file status_fname = os.path.join( os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_' + entry), monitorAggregatorConfig.status_relname) # load entry completed data file completed_data_fname = os.path.join( os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_' + entry), monitorAggregatorConfig.completed_data_relname) completed_data_fp = None try: # entry_data is a regular dictionary of nested dictionaries/lists returned form the XML parsed entry_data = xmlParse.xmlfile2dict(status_fname) completed_data_fp = open(completed_data_fname) completed_data = json.load(completed_data_fp) except IOError: continue # file not found, ignore finally: if completed_data_fp: completed_data_fp.close() # update entry status['entries'][entry] = { 'downtime': entry_data['downtime'], 'frontends': entry_data['frontends'] } # update completed data completed_data_tot['entries'][entry] = completed_data['stats'] # to log when total dictionary is modified (in update total/frontend) tmp_list_removed = [] # update total if 'total' in entry_data: nr_entries += 1 status['entries'][entry]['total'] = entry_data['total'] for w in global_total: tel = global_total[w] if w not in entry_data['total']: continue el = entry_data['total'][w] if tel is None: # new one, just copy over global_total[w] = {} tel = global_total[w] for a in el: tel[a] = int( el[a]) # coming from XML, everything is a string else: # successive, sum for a in el: if a in tel: tel[a] += int(el[a]) # if any attribute from prev. frontends is not in the current one, remove from total for a in list( tel ): # making a copy of the keys because the dict is being modified if a not in el: del tel[a] tmp_list_removed.append(a) if tmp_list_removed: logSupport.log.debug( "Elements removed from total status (%s: %s) because of %s: %s" % (w, len(tel), entry, tmp_list_removed)) tmp_list_removed = [] # update frontends if 'frontends' in entry_data: # loop on fe's in this entry for fe in entry_data['frontends']: # compare each to the list of fe's accumulated so far if fe not in status_fe['frontends']: status_fe['frontends'][fe] = {} fe_first = True else: fe_first = False # number of entries with this frontend if fe not in nr_feentries: nr_feentries[fe] = 1 # first occurrence of frontend else: nr_feentries[fe] += 1 # already found one for w in entry_data['frontends'][fe]: # w is the entry name of the entry using the frontend if w not in status_fe['frontends'][fe]: status_fe['frontends'][fe][w] = {} tela = status_fe['frontends'][fe][w] ela = entry_data['frontends'][fe][w] for a in ela: # for the 'Downtime' field (only bool), do logical AND of all site downtimes # 'w' is frontend attribute name, ie 'ClientMonitor' or 'Downtime' # 'a' is sub-field, such as 'GlideIdle' or 'status' if w == 'Downtime' and a == 'status': ela_val = ( ela[a] != 'False' ) # Check if 'True' or 'False' but default to True if neither try: tela[a] = tela[a] and ela_val except KeyError: tela[a] = ela_val except: pass # just protect else: # All other fields could be numbers or something else try: # if there already, sum if a in tela: tela[a] += int(ela[a]) else: if fe_first: # to avoid adding back attributes that were not in other frontends tela[a] = int(ela[a]) except: pass # not an int, not Downtime, so do nothing # if any attribute from prev. frontends is not in the current one, remove from total if not fe_first and w != 'Downtime': for a in list( tela ): # making a copy of the keys because the dict is being modified if a not in ela: del tela[a] tmp_list_removed.append(a) if tmp_list_removed: logSupport.log.debug( "Elements removed from Frontend %s total status (%s: %s) because of %s: %s" % (fe, w, len(tela), entry, tmp_list_removed)) tmp_list_removed = [] for w in list( global_total ): # making a copy of the keys because the dict is being modified if global_total[w] is None: del global_total[w] # remove entry if not defined else: tel = global_total[w] for a in tel: if a in avgEntries: # since all entries must have this attr to be here, just divide by nr of entries tel[a] = tel[a] / nr_entries # do average for per-fe stat--'InfoAge' only for fe in status_fe['frontends'].keys(): for w in status_fe['frontends'][fe].keys(): tel = status_fe['frontends'][fe][w] for a in tel.keys(): if a in avgEntries and fe in nr_feentries: tel[a] = tel[a] / nr_feentries[fe] # divide per fe xml_downtime = xmlFormat.dict2string({}, dict_name='downtime', el_name='', params={'status': str(in_downtime)}, leading_tab=xmlFormat.DEFAULT_TAB) # Write xml files updated = time.time() xml_str = ( '<?xml version="1.0" encoding="ISO-8859-1"?>\n\n' + '<glideFactoryQStats>\n' + xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xml_downtime + "\n" + xmlFormat.dict2string( status["entries"], dict_name="entries", el_name="entry", subtypes_params={ "class": { "dicts_params": { "frontends": { "el_name": "frontend", "subtypes_params": { "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.class2string(status["total"], inst_name="total", leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.dict2string(status_fe["frontends"], dict_name="frontends", el_name="frontend", subtypes_params={ "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + "</glideFactoryQStats>\n") glideFactoryMonitoring.monitoringConfig.write_file( monitorAggregatorConfig.status_relname, xml_str) # write json glideFactoryMonitoring.monitoringConfig.write_completed_json( monitorAggregatorConfig.completed_data_relname.split('.')[0], updated, completed_data_tot) # Write rrds glideFactoryMonitoring.monitoringConfig.establish_dir("total") # Total rrd across all frontends and factories for tp in global_total: # type - status or requested if not (tp in status_attributes.keys()): continue tp_str = type_strings[tp] attributes_tp = status_attributes[tp] tp_el = global_total[tp] for a in tp_el.keys(): if a in attributes_tp: a_el = int(tp_el[a]) val_dict["%s%s" % (tp_str, a)] = a_el glideFactoryMonitoring.monitoringConfig.write_rrd_multi( "total/Status_Attributes", "GAUGE", updated, val_dict) # Frontend total rrds across all factories for fe in status_fe['frontends'].keys(): glideFactoryMonitoring.monitoringConfig.establish_dir( "total/%s" % ("frontend_" + fe)) for tp in status_fe['frontends'][fe].keys(): # type - status or requested if not (tp in type_strings.keys()): continue tp_str = type_strings[tp] attributes_tp = status_attributes[tp] tp_el = status_fe['frontends'][fe][tp] for a in tp_el.keys(): if a in attributes_tp: a_el = int(tp_el[a]) val_dict["%s%s" % (tp_str, a)] = a_el glideFactoryMonitoring.monitoringConfig.write_rrd_multi( "total/%s/Status_Attributes" % ("frontend_" + fe), "GAUGE", updated, val_dict) return status
def get_xml_total(self, indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=""): total = self.get_total() return xmlFormat.class2string(total, inst_name="total", indent_tab=indent_tab, leading_tab=leading_tab)
updated=time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<VOFrontendStats>\n'+ xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB,leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status["groups"],dict_name="groups",el_name="group", subtypes_params={"class":{"dicts_params":{"factories":{"el_name":"factory", "subtypes_params":{"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}}, "states":{"el_name":"state", "subtypes_params":{"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}} }}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"],inst_name="total",leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(global_fact_totals['factories'],dict_name="factories",el_name="factory", subtypes_params={"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(global_fact_totals['states'],dict_name="states",el_name="state", subtypes_params={"class":{"subclass_params":{"Requested":{"dicts_params":{"Parameters":{"el_name":"Parameter", "subtypes_params":{"class":{}}}}}}}}, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</VOFrontendStats>\n") glideinFrontendMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.status_relname,xml_str)
def aggregateStatus(in_downtime): """ Create an aggregate of status files, write it in an aggregate status file and in the end return the values @type in_downtime: boolean @param in_downtime: Entry downtime information @rtype: dict @return: Dictionary of status information """ global monitorAggregatorConfig avgEntries = ('InfoAge', ) global_total = {'Status': None, 'Requested': None, 'ClientMonitor': None} status = {'entries': {}, 'total': global_total} status_fe = {'frontends': {}} #analogous to above but for frontend totals completed_data_tot = {'entries': {}} # initialize the RRD dictionary, so it gets created properly val_dict = {} for tp in global_total.keys(): # type - status or requested if not (tp in status_attributes.keys()): continue tp_str = type_strings[tp] attributes_tp = status_attributes[tp] for a in attributes_tp: val_dict["%s%s" % (tp_str, a)] = None nr_entries = 0 nr_feentries = {} #dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry status file status_fname = os.path.join( os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_' + entry), monitorAggregatorConfig.status_relname) # load entry completed data file completed_data_fname = os.path.join( os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_' + entry), monitorAggregatorConfig.completed_data_relname) try: completed_data_fp = None entry_data = xmlParse.xmlfile2dict(status_fname) completed_data_fp = open(completed_data_fname) completed_data = json.load(completed_data_fp) except IOError: continue # file not found, ignore finally: if completed_data_fp: completed_data_fp.close() # update entry status['entries'][entry] = { 'downtime': entry_data['downtime'], 'frontends': entry_data['frontends'] } # update completed data completed_data_tot['entries'][entry] = completed_data['stats'] # update total if 'total' in entry_data: nr_entries += 1 status['entries'][entry]['total'] = entry_data['total'] for w in global_total.keys(): tel = global_total[w] if w not in entry_data['total']: continue el = entry_data['total'][w] if tel is None: # new one, just copy over global_total[w] = {} tel = global_total[w] for a in el.keys(): tel[a] = int( el[a]) #coming from XML, everything is a string else: # successive, sum for a in el.keys(): if a in tel: tel[a] += int(el[a]) # if any attribute from prev. frontends are not in the current one, remove from total for a in tel.keys(): if a not in el: del tel[a] # update frontends if 'frontends' in entry_data: #loop on fe's in this entry for fe in entry_data['frontends'].keys(): #compare each to the list of fe's accumulated so far if fe not in status_fe['frontends']: status_fe['frontends'][fe] = {} if fe not in nr_feentries: nr_feentries[fe] = 1 #already found one else: nr_feentries[fe] += 1 for w in entry_data['frontends'][fe].keys(): if w not in status_fe['frontends'][fe]: status_fe['frontends'][fe][w] = {} tela = status_fe['frontends'][fe][w] ela = entry_data['frontends'][fe][w] for a in ela.keys(): #for the 'Downtime' field (only bool), do logical AND of all site downtimes # 'w' is frontend attribute name, ie 'ClientMonitor' or 'Downtime' # 'a' is sub-field, such as 'GlideIdle' or 'status' if w == 'Downtime' and a == 'status': ela_val = ( ela[a] != 'False' ) # Check if 'True' or 'False' but default to True if neither if a in tela: try: tela[a] = tela[a] and ela_val except: pass # just protect else: tela[a] = ela_val else: try: #if there already, sum if a in tela: tela[a] += int(ela[a]) else: tela[a] = int(ela[a]) except: pass #not an int, not Downtime, so do nothing # if any attribute from prev. frontends are not in the current one, remove from total for a in tela.keys(): if a not in ela: del tela[a] for w in global_total.keys(): if global_total[w] is None: del global_total[w] # remove entry if not defined else: tel = global_total[w] for a in tel.keys(): if a in avgEntries: tel[a] = tel[ a] / nr_entries # since all entries must have this attr to be here, just divide by nr of entries #do average for per-fe stat--'InfoAge' only for fe in status_fe['frontends'].keys(): for w in status_fe['frontends'][fe].keys(): tel = status_fe['frontends'][fe][w] for a in tel.keys(): if a in avgEntries and fe in nr_feentries: tel[a] = tel[a] / nr_feentries[fe] # divide per fe xml_downtime = xmlFormat.dict2string({}, dict_name='downtime', el_name='', params={'status': str(in_downtime)}, leading_tab=xmlFormat.DEFAULT_TAB) # Write xml files updated = time.time() xml_str = ( '<?xml version="1.0" encoding="ISO-8859-1"?>\n\n' + '<glideFactoryQStats>\n' + xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xml_downtime + "\n" + xmlFormat.dict2string( status["entries"], dict_name="entries", el_name="entry", subtypes_params={ "class": { "dicts_params": { "frontends": { "el_name": "frontend", "subtypes_params": { "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.class2string(status["total"], inst_name="total", leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + xmlFormat.dict2string(status_fe["frontends"], dict_name="frontends", el_name="frontend", subtypes_params={ "class": { "subclass_params": { "Requested": { "dicts_params": { "Parameters": { "el_name": "Parameter", "subtypes_params": { "class": {} } } } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + "\n" + "</glideFactoryQStats>\n") glideFactoryMonitoring.Monitoring_Output.write_file( monitorAggregatorConfig.status_relname, xml_str) # write json glideFactoryMonitoring.Monitoring_Output.write_completed_json( monitorAggregatorConfig.completed_data_relname.split('.')[0], updated, completed_data_tot) # Write rrds HERE for out in glideFactoryMonitoring.Monitoring_Output.out_list: out.write_aggregateStatus() return status
def aggregateLogSummary(): """ Create an aggregate of log summary files, write it in an aggregate log summary file and in the end return the values """ global monitorAggregatorConfig # initialize global counters global_total={'Current':{},'Entered':{},'Exited':{},'CompletedCounts':{'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}}} for s in ('Wait','Idle','Running','Held'): for k in ['Current','Entered','Exited']: global_total[k][s]=0 for s in ('Completed','Removed'): for k in ['Entered']: global_total[k][s]=0 for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): el={} for t in glideFactoryMonitoring.getAllMillRanges(): el[t]=0 global_total['CompletedCounts'][w][k]=el el={} for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['Lasted']=el el={} for t in glideFactoryMonitoring.getAllJobRanges(): el[t]=0 global_total['CompletedCounts']['JobsNr']=el el={} # KEL - why is the same el used twice (see above) for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['JobsDuration']=el global_total['CompletedCounts']['Sum']={'Glideins':0, 'Lasted':0, 'FailedNr':0, 'JobsNr':0, 'JobsLasted':0, 'JobsGoodput':0, 'JobsTerminated':0, 'CondorLasted':0} # status={'entries':{},'total':global_total} nr_entries=0 for entry in monitorAggregatorConfig.entries: # load entry log summary file status_fname=os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir,'entry_'+entry), monitorAggregatorConfig.logsummary_relname) try: entry_data=xmlParse.xmlfile2dict(status_fname,always_singular_list=['Fraction','TimeRange','Range']) except IOError: continue # file not found, ignore # update entry out_data={} for frontend in entry_data['frontends'].keys(): fe_el=entry_data['frontends'][frontend] out_fe_el={} for k in ['Current','Entered','Exited']: out_fe_el[k]={} for s in fe_el[k].keys(): out_fe_el[k][s]=int(fe_el[k][s]) out_fe_el['CompletedCounts']={'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{},'Sum':{}} for tkey in fe_el['CompletedCounts']['Sum'].keys(): out_fe_el['CompletedCounts']['Sum'][tkey]=int(fe_el['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): out_fe_el['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): out_fe_el['CompletedCounts'][w][k][t]=int(fe_el['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['Lasted'][t]=int(fe_el['CompletedCounts']['Lasted'][t]['val']) out_fe_el['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['JobsDuration'][t]=int(fe_el['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): out_fe_el['CompletedCounts']['JobsNr'][t]=int(fe_el['CompletedCounts']['JobsNr'][t]['val']) out_data[frontend]=out_fe_el status['entries'][entry]={'frontends':out_data} # update total if entry_data.has_key('total'): nr_entries+=1 local_total={} for k in ['Current','Entered','Exited']: local_total[k]={} for s in global_total[k].keys(): local_total[k][s]=int(entry_data['total'][k][s]) global_total[k][s]+=int(entry_data['total'][k][s]) local_total['CompletedCounts']={'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}} for tkey in entry_data['total']['CompletedCounts']['Sum'].keys(): local_total['CompletedCounts']['Sum'][tkey]=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) global_total['CompletedCounts']['Sum'][tkey]+=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): local_total['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): local_total['CompletedCounts'][w][k][t]=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) global_total['CompletedCounts'][w][k][t]+=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['Lasted'][t]=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) global_total['CompletedCounts']['Lasted'][t]+=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) local_total['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['JobsDuration'][t]=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) global_total['CompletedCounts']['JobsDuration'][t]+=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): local_total['CompletedCounts']['JobsNr'][t]=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) global_total['CompletedCounts']['JobsNr'][t]+=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) status['entries'][entry]['total']=local_total # Write xml files updated=time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<glideFactoryLogSummary>\n'+ xmlFormat.time2xml(updated,"updated", indent_tab=xmlFormat.DEFAULT_TAB,leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status["entries"],dict_name="entries",el_name="entry", subtypes_params={"class":{"dicts_params":{"frontends":{"el_name":"frontend", "subtypes_params":{"class":{'subclass_params':{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}}}}, "subclass_params":{"total":{"subclass_params":{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}} } }, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"],inst_name="total",subclass_params={'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()},leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</glideFactoryLogSummary>\n") glideFactoryMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.logsummary_relname,xml_str) # Write rrds fe_dir="total" sdata=status["total"]['Current'] glideFactoryMonitoring.monitoringConfig.establish_dir(fe_dir) val_dict_counts={} val_dict_counts_desc={} val_dict_completed={} val_dict_stats={} val_dict_waste={} val_dict_wastetime={} for s in ('Wait','Idle','Running','Held','Completed','Removed'): if not (s in ('Completed','Removed')): # I don't have their numbers from inactive logs count=sdata[s] val_dict_counts["Status%s"%s]=count val_dict_counts_desc["Status%s"%s]={'ds_type':'GAUGE'} exited=-status["total"]['Exited'][s] val_dict_counts["Exited%s"%s]=exited val_dict_counts_desc["Exited%s"%s]={'ds_type':'ABSOLUTE'} entered=status["total"]['Entered'][s] val_dict_counts["Entered%s"%s]=entered val_dict_counts_desc["Entered%s"%s]={'ds_type':'ABSOLUTE'} if s=='Completed': completed_counts=status["total"]['CompletedCounts'] count_entered_times=completed_counts['Lasted'] count_jobnrs=completed_counts['JobsNr'] count_jobs_duration=completed_counts['JobsDuration'] count_waste_mill=completed_counts['Waste'] time_waste_mill=completed_counts['WasteTime'] # save run times for timerange in count_entered_times.keys(): val_dict_stats['Lasted_%s'%timerange]=count_entered_times[timerange] # they all use the same indexes val_dict_stats['JobsLasted_%s'%timerange]=count_jobs_duration[timerange] # save jobsnr for jobrange in count_jobnrs.keys(): val_dict_stats['JobsNr_%s'%jobrange]=count_jobnrs[jobrange] # save simple vals for tkey in completed_counts['Sum'].keys(): val_dict_completed[tkey]=completed_counts['Sum'][tkey] # save waste_mill for w in count_waste_mill.keys(): count_waste_mill_w=count_waste_mill[w] for p in count_waste_mill_w.keys(): val_dict_waste['%s_%s'%(w,p)]=count_waste_mill_w[p] for w in time_waste_mill.keys(): time_waste_mill_w=time_waste_mill[w] for p in time_waste_mill_w.keys(): val_dict_wastetime['%s_%s'%(w,p)]=time_waste_mill_w[p] # write the data to disk glideFactoryMonitoring.monitoringConfig.write_rrd_multi_hetero("%s/Log_Counts"%fe_dir, val_dict_counts_desc,updated,val_dict_counts) glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed"%fe_dir, "ABSOLUTE",updated,val_dict_completed) glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed_Stats"%fe_dir, "ABSOLUTE",updated,val_dict_stats) # Disable Waste RRDs... WasteTime much more useful #glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed_Waste"%fe_dir, # "ABSOLUTE",updated,val_dict_waste) glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed_WasteTime"%fe_dir, "ABSOLUTE",updated,val_dict_wastetime) return status