def aggregateLogSummary(): """ Create an aggregate of log summary files, write it in an aggregate log summary file and in the end return the values """ global monitorAggregatorConfig # initialize global counters global_total={'Current':{},'Entered':{},'Exited':{},'CompletedCounts':{'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}}} for s in ('Wait','Idle','Running','Held'): for k in ['Current','Entered','Exited']: global_total[k][s]=0 for s in ('Completed','Removed'): for k in ['Entered']: global_total[k][s]=0 for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): el={} for t in glideFactoryMonitoring.getAllMillRanges(): el[t]=0 global_total['CompletedCounts'][w][k]=el el={} for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['Lasted']=el el={} for t in glideFactoryMonitoring.getAllJobRanges(): el[t]=0 global_total['CompletedCounts']['JobsNr']=el el={} # KEL - why is the same el used twice (see above) for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['JobsDuration']=el global_total['CompletedCounts']['Sum']={'Glideins':0, 'Lasted':0, 'FailedNr':0, 'JobsNr':0, 'JobsLasted':0, 'JobsGoodput':0, 'JobsTerminated':0, 'CondorLasted':0} fe_total=copy.deepcopy(global_total) # same as above but for frontend totals # status={'entries':{},'total':global_total} status_fe={'frontends':{}} #analogous to above but for frontend totals nr_entries=0 nr_feentries={} #dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry log summary file status_fname=os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir,'entry_'+entry), monitorAggregatorConfig.logsummary_relname) try: entry_data=xmlParse.xmlfile2dict(status_fname,always_singular_list=['Fraction','TimeRange','Range']) except IOError: continue # file not found, ignore # update entry out_data={} for frontend in entry_data['frontends'].keys(): fe_el=entry_data['frontends'][frontend] out_fe_el={} for k in ['Current','Entered','Exited']: out_fe_el[k]={} for s in fe_el[k].keys(): out_fe_el[k][s]=int(fe_el[k][s]) out_fe_el['CompletedCounts']={'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{},'Sum':{}} for tkey in fe_el['CompletedCounts']['Sum'].keys(): out_fe_el['CompletedCounts']['Sum'][tkey]=int(fe_el['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): out_fe_el['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): out_fe_el['CompletedCounts'][w][k][t]=int(fe_el['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['Lasted'][t]=int(fe_el['CompletedCounts']['Lasted'][t]['val']) out_fe_el['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['JobsDuration'][t]=int(fe_el['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): out_fe_el['CompletedCounts']['JobsNr'][t]=int(fe_el['CompletedCounts']['JobsNr'][t]['val']) out_data[frontend]=out_fe_el status['entries'][entry]={'frontends':out_data} # update total if entry_data.has_key('total'): nr_entries+=1 local_total={} for k in ['Current','Entered','Exited']: local_total[k]={} for s in global_total[k].keys(): local_total[k][s]=int(entry_data['total'][k][s]) global_total[k][s]+=int(entry_data['total'][k][s]) local_total['CompletedCounts']={'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}} for tkey in entry_data['total']['CompletedCounts']['Sum'].keys(): local_total['CompletedCounts']['Sum'][tkey]=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) global_total['CompletedCounts']['Sum'][tkey]+=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): local_total['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): local_total['CompletedCounts'][w][k][t]=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) global_total['CompletedCounts'][w][k][t]+=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['Lasted'][t]=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) global_total['CompletedCounts']['Lasted'][t]+=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) local_total['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['JobsDuration'][t]=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) global_total['CompletedCounts']['JobsDuration'][t]+=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): local_total['CompletedCounts']['JobsNr'][t]=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) global_total['CompletedCounts']['JobsNr'][t]+=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) status['entries'][entry]['total']=local_total # update frontends for fe in out_data: #compare each to the list of fe's accumulated so far if not (fe in status_fe['frontends']): status_fe['frontends'][fe]={} if not (fe in nr_feentries): nr_feentries[fe]=1 #already found one else: nr_feentries[fe]+=1 # sum them up sumDictInt(out_data[fe],status_fe['frontends'][fe]) # Write xml files # To do - Igor: Consider adding status_fe to the XML file updated=time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<glideFactoryLogSummary>\n'+ xmlFormat.time2xml(updated,"updated", indent_tab=xmlFormat.DEFAULT_TAB,leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status["entries"],dict_name="entries",el_name="entry", subtypes_params={"class":{"dicts_params":{"frontends":{"el_name":"frontend", "subtypes_params":{"class":{'subclass_params':{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}}}}, "subclass_params":{"total":{"subclass_params":{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}} } }, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"],inst_name="total",subclass_params={'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()},leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</glideFactoryLogSummary>\n") glideFactoryMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.logsummary_relname,xml_str) # Write rrds writeLogSummaryRRDs("total",status["total"]) # Frontend total rrds across all factories for fe in status_fe['frontends']: writeLogSummaryRRDs("total/%s"%("frontend_"+fe),status_fe['frontends'][fe]) return status
def verifyRRD(fix_rrd=False): """ Go through all known monitoring rrds and verify that they match existing schema (could be different if an upgrade happened) If fix_rrd is true, then also attempt to add any missing attributes. """ global rrd_problems_found global monitorAggregatorConfig dir=monitorAggregatorConfig.monitor_dir total_dir=os.path.join(dir,"total") status_dict={} completed_stats_dict={} completed_waste_dict={} counts_dict={} # initialize the RRD dictionaries to match the current schema for verification for tp in status_attributes.keys(): if tp in type_strings.keys(): tp_str=type_strings[tp] attributes_tp=status_attributes[tp] for a in attributes_tp: status_dict["%s%s"%(tp_str,a)]=None for jobrange in glideFactoryMonitoring.getAllJobRanges(): completed_stats_dict["JobsNr_%s"%(jobrange)]=None for timerange in glideFactoryMonitoring.getAllTimeRanges(): completed_stats_dict["Lasted_%s"%(timerange)]=None completed_stats_dict["JobsLasted_%s"%(timerange)]=None for jobtype in glideFactoryMonitoring.getAllJobTypes(): for timerange in glideFactoryMonitoring.getAllMillRanges(): completed_waste_dict["%s_%s"%(jobtype,timerange)]=None for jobtype in ('Entered','Exited','Status'): for jobstatus in ('Wait','Idle','Running','Held'): counts_dict["%s%s"%(jobtype,jobstatus)]=None for jobstatus in ('Completed','Removed'): counts_dict["%s%s"%('Entered',jobstatus)]=None verifyHelper(os.path.join(total_dir, "Status_Attributes.rrd"),status_dict, fix_rrd) verifyHelper(os.path.join(total_dir, "Log_Completed.rrd"), glideFactoryMonitoring.getLogCompletedDefaults(), fix_rrd) verifyHelper(os.path.join(total_dir, "Log_Completed_Stats.rrd"),completed_stats_dict, fix_rrd) verifyHelper(os.path.join(total_dir, "Log_Completed_WasteTime.rrd"),completed_waste_dict, fix_rrd) verifyHelper(os.path.join(total_dir, "Log_Counts.rrd"),counts_dict, fix_rrd) for filename in os.listdir(dir): if filename[:6]=="entry_": entrydir=os.path.join(dir,filename) for subfilename in os.listdir(entrydir): if subfilename[:9]=="frontend_": current_dir=os.path.join(entrydir,subfilename) verifyHelper(os.path.join(current_dir, "Status_Attributes.rrd"),status_dict, fix_rrd) verifyHelper(os.path.join(current_dir, "Log_Completed.rrd"), glideFactoryMonitoring.getLogCompletedDefaults(),fix_rrd) verifyHelper(os.path.join(current_dir, "Log_Completed_Stats.rrd"),completed_stats_dict,fix_rrd) verifyHelper(os.path.join(current_dir, "Log_Completed_WasteTime.rrd"), completed_waste_dict,fix_rrd) verifyHelper(os.path.join(current_dir, "Log_Counts.rrd"),counts_dict,fix_rrd) return not rrd_problems_found
def verifyRRD(fix_rrd=False): """ Go through all known monitoring rrds and verify that they match existing schema (could be different if an upgrade happened) If fix_rrd is true, then also attempt to add any missing attributes. """ global rrd_problems_found global monitorAggregatorConfig mon_dir = monitorAggregatorConfig.monitor_dir status_dict = {} completed_stats_dict = {} completed_waste_dict = {} counts_dict = {} # initialize the RRD dictionaries to match the current schema for verification for tp in status_attributes.keys(): if tp in type_strings.keys(): tp_str = type_strings[tp] attributes_tp = status_attributes[tp] for a in attributes_tp: status_dict["%s%s" % (tp_str, a)] = None for jobrange in glideFactoryMonitoring.getAllJobRanges(): completed_stats_dict["JobsNr_%s" % (jobrange, )] = None for timerange in glideFactoryMonitoring.getAllTimeRanges(): completed_stats_dict["Lasted_%s" % (timerange, )] = None completed_stats_dict["JobsLasted_%s" % (timerange, )] = None for jobtype in glideFactoryMonitoring.getAllJobTypes(): for timerange in glideFactoryMonitoring.getAllMillRanges(): completed_waste_dict["%s_%s" % (jobtype, timerange)] = None for jobtype in ('Entered', 'Exited', 'Status'): for jobstatus in ('Wait', 'Idle', 'Running', 'Held'): counts_dict["%s%s" % (jobtype, jobstatus)] = None for jobstatus in ('Completed', 'Removed'): counts_dict["%s%s" % ('Entered', jobstatus)] = None # FROM: lib2to3.fixes.fix_ws_comma # completed_waste_dict["%s_%s"%(jobtype, timerange)]=None # # for jobtype in ('Entered', 'Exited', 'Status'): # for jobstatus in ('Wait', 'Idle', 'Running', 'Held'): # counts_dict["%s%s"%(jobtype, jobstatus)]=None # for jobstatus in ('Completed', 'Removed'): # counts_dict["%s%s"%('Entered', jobstatus)]=None # # verifyHelper(os.path.join(total_dir, # "Status_Attributes.rrd"), status_dict, fix_rrd) # verifyHelper(os.path.join(total_dir, # "Log_Completed.rrd"), # glideFactoryMonitoring.getLogCompletedDefaults(), fix_rrd) # verifyHelper(os.path.join(total_dir, # "Log_Completed_Stats.rrd"), completed_stats_dict, fix_rrd) # verifyHelper(os.path.join(total_dir, # "Log_Completed_WasteTime.rrd"), completed_waste_dict, fix_rrd) # verifyHelper(os.path.join(total_dir, # "Log_Counts.rrd"), counts_dict, fix_rrd) # for filename in os.listdir(dir): # if filename[:6]=="entry_": # entrydir=os.path.join(dir, filename) # for subfilename in os.listdir(entrydir): # if subfilename[:9]=="frontend_": # current_dir=os.path.join(entrydir, subfilename) # verifyHelper(os.path.join(current_dir, # "Status_Attributes.rrd"), status_dict, fix_rrd) # verifyHelper(os.path.join(current_dir, # "Log_Completed.rrd"), # glideFactoryMonitoring.getLogCompletedDefaults(), fix_rrd) # verifyHelper(os.path.join(current_dir, # "Log_Completed_Stats.rrd"), completed_stats_dict, fix_rrd) # verifyHelper(os.path.join(current_dir, # "Log_Completed_WasteTime.rrd"), # completed_waste_dict, fix_rrd) # verifyHelper(os.path.join(current_dir, # "Log_Counts.rrd"), counts_dict, fix_rrd) # return not rrd_problems_found completed_dict = glideFactoryMonitoring.getLogCompletedDefaults() rrdict = { "Status_Attributes.rrd": status_dict, "Log_Completed.rrd": completed_dict, "Log_Completed_Stats.rrd": completed_stats_dict, "Log_Completed_WasteTime.rrd": completed_waste_dict, "Log_Counts.rrd": counts_dict, } for dir_name, sdir_name, f_list in os.walk(mon_dir): for file_name in f_list: if file_name in rrdict.keys(): verifyHelper(os.path.join(dir_name, file_name), rrdict[file_name], fix_rrd) return not rrd_problems_found
def aggregateLogSummary(): """ Create an aggregate of log summary files, write it in an aggregate log summary file and in the end return the values """ global monitorAggregatorConfig # initialize global counters global_total = { 'Current': {}, 'Entered': {}, 'Exited': {}, 'CompletedCounts': { 'Sum': {}, 'Waste': {}, 'WasteTime': {}, 'Lasted': {}, 'JobsNr': {}, 'JobsDuration': {} } } for s in ('Wait', 'Idle', 'Running', 'Held'): for k in ['Current', 'Entered', 'Exited']: global_total[k][s] = 0 for s in ('Completed', 'Removed'): for k in ['Entered']: global_total[k][s] = 0 for k in glideFactoryMonitoring.getAllJobTypes(): for w in ('Waste', 'WasteTime'): el = {} for t in glideFactoryMonitoring.getAllMillRanges(): el[t] = 0 global_total['CompletedCounts'][w][k] = el el = {} for t in glideFactoryMonitoring.getAllTimeRanges(): el[t] = 0 global_total['CompletedCounts']['Lasted'] = el el = {} for t in glideFactoryMonitoring.getAllJobRanges(): el[t] = 0 global_total['CompletedCounts']['JobsNr'] = el el = {} # KEL - why is the same el used twice (see above) for t in glideFactoryMonitoring.getAllTimeRanges(): el[t] = 0 global_total['CompletedCounts']['JobsDuration'] = el global_total['CompletedCounts']['Sum'] = { 'Glideins': 0, 'Lasted': 0, 'FailedNr': 0, 'JobsNr': 0, 'JobsLasted': 0, 'JobsGoodput': 0, 'JobsTerminated': 0, 'CondorLasted': 0 } fe_total = copy.deepcopy( global_total) # same as above but for frontend totals # status = {'entries': {}, 'total': global_total} status_fe = {'frontends': {}} # analogous to above but for frontend totals nr_entries = 0 nr_feentries = {} # dictionary for nr entries per fe for entry in monitorAggregatorConfig.entries: # load entry log summary file status_fname = os.path.join( os.path.join(monitorAggregatorConfig.monitor_dir, 'entry_' + entry), monitorAggregatorConfig.logsummary_relname) try: entry_data = xmlParse.xmlfile2dict( status_fname, always_singular_list=['Fraction', 'TimeRange', 'Range']) except IOError: continue # file not found, ignore # update entry out_data = {} for frontend in entry_data['frontends'].keys(): fe_el = entry_data['frontends'][frontend] out_fe_el = {} for k in ['Current', 'Entered', 'Exited']: out_fe_el[k] = {} for s in fe_el[k].keys(): out_fe_el[k][s] = int(fe_el[k][s]) out_fe_el['CompletedCounts'] = { 'Waste': {}, 'WasteTime': {}, 'Lasted': {}, 'JobsNr': {}, 'JobsDuration': {}, 'Sum': {} } for tkey in fe_el['CompletedCounts']['Sum'].keys(): out_fe_el['CompletedCounts']['Sum'][tkey] = int( fe_el['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste", "WasteTime"): out_fe_el['CompletedCounts'][w][k] = {} for t in glideFactoryMonitoring.getAllMillRanges(): out_fe_el['CompletedCounts'][w][k][t] = int( fe_el['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['Lasted'][t] = int( fe_el['CompletedCounts']['Lasted'][t]['val']) out_fe_el['CompletedCounts']['JobsDuration'] = {} for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['JobsDuration'][t] = int( fe_el['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): out_fe_el['CompletedCounts']['JobsNr'][t] = int( fe_el['CompletedCounts']['JobsNr'][t]['val']) out_data[frontend] = out_fe_el status['entries'][entry] = {'frontends': out_data} # update total if 'total' in entry_data: nr_entries += 1 local_total = {} for k in ['Current', 'Entered', 'Exited']: local_total[k] = {} for s in global_total[k].keys(): local_total[k][s] = int(entry_data['total'][k][s]) global_total[k][s] += int(entry_data['total'][k][s]) local_total['CompletedCounts'] = { 'Sum': {}, 'Waste': {}, 'WasteTime': {}, 'Lasted': {}, 'JobsNr': {}, 'JobsDuration': {} } for tkey in entry_data['total']['CompletedCounts']['Sum'].keys(): local_total['CompletedCounts']['Sum'][tkey] = int( entry_data['total']['CompletedCounts']['Sum'][tkey]) global_total['CompletedCounts']['Sum'][tkey] += int( entry_data['total']['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ('Waste', 'WasteTime'): local_total['CompletedCounts'][w][k] = {} for t in glideFactoryMonitoring.getAllMillRanges(): local_total['CompletedCounts'][w][k][t] = int( entry_data['total']['CompletedCounts'][w][k][t] ['val']) global_total['CompletedCounts'][w][k][t] += int( entry_data['total']['CompletedCounts'][w][k][t] ['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['Lasted'][t] = int( entry_data['total']['CompletedCounts']['Lasted'][t]['val']) global_total['CompletedCounts']['Lasted'][t] += int( entry_data['total']['CompletedCounts']['Lasted'][t]['val']) local_total['CompletedCounts']['JobsDuration'] = {} for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['JobsDuration'][t] = int( entry_data['total']['CompletedCounts']['JobsDuration'][t] ['val']) global_total['CompletedCounts']['JobsDuration'][t] += int( entry_data['total']['CompletedCounts']['JobsDuration'][t] ['val']) for t in glideFactoryMonitoring.getAllJobRanges(): local_total['CompletedCounts']['JobsNr'][t] = int( entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) global_total['CompletedCounts']['JobsNr'][t] += int( entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) status['entries'][entry]['total'] = local_total # update frontends for fe in out_data: # compare each to the list of fe's accumulated so far if not (fe in status_fe['frontends']): status_fe['frontends'][fe] = {} if not (fe in nr_feentries): nr_feentries[fe] = 1 # already found one else: nr_feentries[fe] += 1 # sum them up sumDictInt(out_data[fe], status_fe['frontends'][fe]) # Write xml files # To do - Igor: Consider adding status_fe to the XML file updated = time.time() xml_str = ('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n' + '<glideFactoryLogSummary>\n' + xmlFormat.time2xml(updated, "updated", indent_tab=xmlFormat.DEFAULT_TAB, leading_tab=xmlFormat.DEFAULT_TAB) + '\n' + xmlFormat.dict2string( status["entries"], dict_name="entries", el_name="entry", subtypes_params={ "class": { "dicts_params": { "frontends": { "el_name": "frontend", "subtypes_params": { "class": { 'subclass_params': { 'CompletedCounts': glideFactoryMonitoring. get_completed_stats_xml_desc() } } } } }, "subclass_params": { "total": { "subclass_params": { 'CompletedCounts': glideFactoryMonitoring. get_completed_stats_xml_desc() } } } } }, leading_tab=xmlFormat.DEFAULT_TAB) + '\n' + xmlFormat.class2string( status["total"], inst_name="total", subclass_params={ 'CompletedCounts': glideFactoryMonitoring.get_completed_stats_xml_desc() }, leading_tab=xmlFormat.DEFAULT_TAB) + '\n' + "</glideFactoryLogSummary>\n") glideFactoryMonitoring.monitoringConfig.write_file( monitorAggregatorConfig.logsummary_relname, xml_str) # Write rrds writeLogSummaryRRDs("total", status["total"]) # Frontend total rrds across all factories for fe in status_fe['frontends']: writeLogSummaryRRDs("total/%s" % ("frontend_" + fe), status_fe['frontends'][fe]) return status
def aggregateLogSummary(): """ Create an aggregate of log summary files, write it in an aggregate log summary file and in the end return the values """ global monitorAggregatorConfig # initialize global counters global_total={'Current':{},'Entered':{},'Exited':{},'CompletedCounts':{'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}}} for s in ('Wait','Idle','Running','Held'): for k in ['Current','Entered','Exited']: global_total[k][s]=0 for s in ('Completed','Removed'): for k in ['Entered']: global_total[k][s]=0 for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): el={} for t in glideFactoryMonitoring.getAllMillRanges(): el[t]=0 global_total['CompletedCounts'][w][k]=el el={} for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['Lasted']=el el={} for t in glideFactoryMonitoring.getAllJobRanges(): el[t]=0 global_total['CompletedCounts']['JobsNr']=el el={} # KEL - why is the same el used twice (see above) for t in glideFactoryMonitoring.getAllTimeRanges(): el[t]=0 global_total['CompletedCounts']['JobsDuration']=el global_total['CompletedCounts']['Sum']={'Glideins':0, 'Lasted':0, 'FailedNr':0, 'JobsNr':0, 'JobsLasted':0, 'JobsGoodput':0, 'JobsTerminated':0, 'CondorLasted':0} # status={'entries':{},'total':global_total} nr_entries=0 for entry in monitorAggregatorConfig.entries: # load entry log summary file status_fname=os.path.join(os.path.join(monitorAggregatorConfig.monitor_dir,'entry_'+entry), monitorAggregatorConfig.logsummary_relname) try: entry_data=xmlParse.xmlfile2dict(status_fname,always_singular_list=['Fraction','TimeRange','Range']) except IOError: continue # file not found, ignore # update entry out_data={} for frontend in entry_data['frontends'].keys(): fe_el=entry_data['frontends'][frontend] out_fe_el={} for k in ['Current','Entered','Exited']: out_fe_el[k]={} for s in fe_el[k].keys(): out_fe_el[k][s]=int(fe_el[k][s]) out_fe_el['CompletedCounts']={'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{},'Sum':{}} for tkey in fe_el['CompletedCounts']['Sum'].keys(): out_fe_el['CompletedCounts']['Sum'][tkey]=int(fe_el['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): out_fe_el['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): out_fe_el['CompletedCounts'][w][k][t]=int(fe_el['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['Lasted'][t]=int(fe_el['CompletedCounts']['Lasted'][t]['val']) out_fe_el['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): out_fe_el['CompletedCounts']['JobsDuration'][t]=int(fe_el['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): out_fe_el['CompletedCounts']['JobsNr'][t]=int(fe_el['CompletedCounts']['JobsNr'][t]['val']) out_data[frontend]=out_fe_el status['entries'][entry]={'frontends':out_data} # update total if entry_data.has_key('total'): nr_entries+=1 local_total={} for k in ['Current','Entered','Exited']: local_total[k]={} for s in global_total[k].keys(): local_total[k][s]=int(entry_data['total'][k][s]) global_total[k][s]+=int(entry_data['total'][k][s]) local_total['CompletedCounts']={'Sum':{},'Waste':{},'WasteTime':{},'Lasted':{},'JobsNr':{},'JobsDuration':{}} for tkey in entry_data['total']['CompletedCounts']['Sum'].keys(): local_total['CompletedCounts']['Sum'][tkey]=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) global_total['CompletedCounts']['Sum'][tkey]+=int(entry_data['total']['CompletedCounts']['Sum'][tkey]) for k in glideFactoryMonitoring.getAllJobTypes(): for w in ("Waste","WasteTime"): local_total['CompletedCounts'][w][k]={} for t in glideFactoryMonitoring.getAllMillRanges(): local_total['CompletedCounts'][w][k][t]=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) global_total['CompletedCounts'][w][k][t]+=int(entry_data['total']['CompletedCounts'][w][k][t]['val']) for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['Lasted'][t]=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) global_total['CompletedCounts']['Lasted'][t]+=int(entry_data['total']['CompletedCounts']['Lasted'][t]['val']) local_total['CompletedCounts']['JobsDuration']={} for t in glideFactoryMonitoring.getAllTimeRanges(): local_total['CompletedCounts']['JobsDuration'][t]=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) global_total['CompletedCounts']['JobsDuration'][t]+=int(entry_data['total']['CompletedCounts']['JobsDuration'][t]['val']) for t in glideFactoryMonitoring.getAllJobRanges(): local_total['CompletedCounts']['JobsNr'][t]=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) global_total['CompletedCounts']['JobsNr'][t]+=int(entry_data['total']['CompletedCounts']['JobsNr'][t]['val']) status['entries'][entry]['total']=local_total # Write xml files updated=time.time() xml_str=('<?xml version="1.0" encoding="ISO-8859-1"?>\n\n'+ '<glideFactoryLogSummary>\n'+ xmlFormat.time2xml(updated,"updated", indent_tab=xmlFormat.DEFAULT_TAB,leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.dict2string(status["entries"],dict_name="entries",el_name="entry", subtypes_params={"class":{"dicts_params":{"frontends":{"el_name":"frontend", "subtypes_params":{"class":{'subclass_params':{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}}}}, "subclass_params":{"total":{"subclass_params":{'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()}}} } }, leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ xmlFormat.class2string(status["total"],inst_name="total",subclass_params={'CompletedCounts':glideFactoryMonitoring.get_completed_stats_xml_desc()},leading_tab=xmlFormat.DEFAULT_TAB)+"\n"+ "</glideFactoryLogSummary>\n") glideFactoryMonitoring.monitoringConfig.write_file(monitorAggregatorConfig.logsummary_relname,xml_str) # Write rrds fe_dir="total" sdata=status["total"]['Current'] glideFactoryMonitoring.monitoringConfig.establish_dir(fe_dir) val_dict_counts={} val_dict_counts_desc={} val_dict_completed={} val_dict_stats={} val_dict_waste={} val_dict_wastetime={} for s in ('Wait','Idle','Running','Held','Completed','Removed'): if not (s in ('Completed','Removed')): # I don't have their numbers from inactive logs count=sdata[s] val_dict_counts["Status%s"%s]=count val_dict_counts_desc["Status%s"%s]={'ds_type':'GAUGE'} exited=-status["total"]['Exited'][s] val_dict_counts["Exited%s"%s]=exited val_dict_counts_desc["Exited%s"%s]={'ds_type':'ABSOLUTE'} entered=status["total"]['Entered'][s] val_dict_counts["Entered%s"%s]=entered val_dict_counts_desc["Entered%s"%s]={'ds_type':'ABSOLUTE'} if s=='Completed': completed_counts=status["total"]['CompletedCounts'] count_entered_times=completed_counts['Lasted'] count_jobnrs=completed_counts['JobsNr'] count_jobs_duration=completed_counts['JobsDuration'] count_waste_mill=completed_counts['Waste'] time_waste_mill=completed_counts['WasteTime'] # save run times for timerange in count_entered_times.keys(): val_dict_stats['Lasted_%s'%timerange]=count_entered_times[timerange] # they all use the same indexes val_dict_stats['JobsLasted_%s'%timerange]=count_jobs_duration[timerange] # save jobsnr for jobrange in count_jobnrs.keys(): val_dict_stats['JobsNr_%s'%jobrange]=count_jobnrs[jobrange] # save simple vals for tkey in completed_counts['Sum'].keys(): val_dict_completed[tkey]=completed_counts['Sum'][tkey] # save waste_mill for w in count_waste_mill.keys(): count_waste_mill_w=count_waste_mill[w] for p in count_waste_mill_w.keys(): val_dict_waste['%s_%s'%(w,p)]=count_waste_mill_w[p] for w in time_waste_mill.keys(): time_waste_mill_w=time_waste_mill[w] for p in time_waste_mill_w.keys(): val_dict_wastetime['%s_%s'%(w,p)]=time_waste_mill_w[p] # write the data to disk glideFactoryMonitoring.monitoringConfig.write_rrd_multi_hetero("%s/Log_Counts"%fe_dir, val_dict_counts_desc,updated,val_dict_counts) glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed"%fe_dir, "ABSOLUTE",updated,val_dict_completed) glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed_Stats"%fe_dir, "ABSOLUTE",updated,val_dict_stats) # Disable Waste RRDs... WasteTime much more useful #glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed_Waste"%fe_dir, # "ABSOLUTE",updated,val_dict_waste) glideFactoryMonitoring.monitoringConfig.write_rrd_multi("%s/Log_Completed_WasteTime"%fe_dir, "ABSOLUTE",updated,val_dict_wastetime) return status