def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE. @param cp: The GIP configuration object @type cp: ConfigParser.ConfigParser """ VOView = getTemplate("GlueCE", "GlueVOViewLocalID") ce_name = cp_get(cp, "ce", "name", "") #status = cp_get(cp, "condor", "status", "Production") #condorVersion = getLrmsInfo(cp) total_nodes, _, unclaimed = parseNodes(cp) vo_map = VoMapper(cp) jobs_info = getJobsInfo(vo_map, cp) groupInfo = getGroupInfo(vo_map, cp) # Add in the default group all_group_vos = [] total_assigned = 0 for key, val in groupInfo.items(): if key == 'default': continue all_group_vos.extend(val['vos']) total_assigned += val.get('quota', 0) all_vos = sets.Set(voList(cp)) defaultVoList = [i for i in all_vos if i not in all_group_vos] if 'default' not in groupInfo: groupInfo['default'] = {} groupInfo['default']['vos'] = defaultVoList if total_nodes > total_assigned: log.info("There are %i assigned job slots out of %i total; assigning" \ " the rest to the default group." % (total_assigned, total_nodes)) groupInfo['default']['quota'] = total_nodes - total_assigned else: log.warning("More assigned nodes (%i) than actual nodes (%i)!" % \ (total_assigned, total_nodes)) if defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group in groupInfo: jinfo = jobs_info.get(group, {}) vos = sets.Set(groupInfo[group].get('vos', [group])) vos.update(jinfo.keys()) vos.intersection_update(all_vos) # Enforce invariants # VO_FREE_SLOTS <= CE_FREE_SLOTS # VO_FREE_SLOTS <= CE_ASSIGNED - VO_RUNNING # This code determines CE_ASSIGNED ginfo = groupInfo[group] if ginfo.get("quota", 0) > 0: assigned = ginfo.get("quota", 0) else: assigned = total_nodes log.debug("All VOs for %s: %s" % (group, ", ".join(vos))) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) max_wall = cp_getInt(cp, "condor", "max_wall", 1440) myrunning = sum([i.get('running', 0) for i in jinfo.values()], 0) assigned = max(assigned, myrunning) for vo in vos: acbr = 'VO:%s' % vo info = jinfo.get(vo.lower(), {"running": 0, "idle": 0, "held": 0}) ert, wrt = responseTimes(cp, info["running"], info["idle"] + \ info["held"], max_job_time=max_wall*60) free = min(unclaimed, assigned - myrunning, assigned - int(info['running'])) free = int(free) waiting = int(info["idle"]) + int(info["held"]) if waiting > cp_getInt(cp, 'condor', 'idle_slack', '10'): free = 0 info = { "vo": vo, "acbr": acbr, "ceUniqueID": ce_unique_id, "voLocalID": vo, "ce_name": ce_name, "job_manager": 'condor', "queue": vo, "running": info["running"], # Held jobs are included as "waiting" since the definition is: # Number of jobs that are in a state different than running "waiting": waiting, "total": info["running"] + info["idle"] + info["held"], "free_slots": free, "job_slots": int(total_nodes), "ert": ert, "wrt": wrt, "default_se": getDefaultSE(cp), 'app': cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data": cp_get(cp, "osg_dirs", "data", "/Unknown"), } printTemplate(VOView, info)
def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE. @param cp: The GIP configuration object @type cp: ConfigParser.ConfigParser """ VOView = getTemplate("GlueCE", "GlueVOViewLocalID") ce_name = cp_get(cp, "ce", "name", "") #status = cp_get(cp, "condor", "status", "Production") #condorVersion = getLrmsInfo(cp) total_nodes, _, unclaimed = parseNodes(cp) vo_map = VoMapper(cp) jobs_info = getJobsInfo(vo_map, cp) groupInfo = getGroupInfo(vo_map, cp) # Add in the default group all_group_vos = [] total_assigned = 0 for key, val in groupInfo.items(): if key == 'default': continue all_group_vos.extend(val['vos']) total_assigned += val.get('quota', 0) all_vos = sets.Set(voList(cp)) defaultVoList = [i for i in all_vos if i not in all_group_vos] if 'default' not in groupInfo: groupInfo['default'] = {} groupInfo['default']['vos'] = defaultVoList if total_nodes > total_assigned: log.info("There are %i assigned job slots out of %i total; assigning" \ " the rest to the default group." % (total_assigned, total_nodes)) groupInfo['default']['quota'] = total_nodes-total_assigned else: log.warning("More assigned nodes (%i) than actual nodes (%i)!" % \ (total_assigned, total_nodes)) if defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group in groupInfo: jinfo = jobs_info.get(group, {}) vos = sets.Set(groupInfo[group].get('vos', [group])) vos.update(jinfo.keys()) vos.intersection_update(all_vos) # Enforce invariants # VO_FREE_SLOTS <= CE_FREE_SLOTS # VO_FREE_SLOTS <= CE_ASSIGNED - VO_RUNNING # This code determines CE_ASSIGNED ginfo = groupInfo[group] if ginfo.get("quota", 0) > 0: assigned = ginfo.get("quota", 0) else: assigned = total_nodes log.debug("All VOs for %s: %s" % (group, ", ".join(vos))) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) max_wall = cp_getInt(cp, "condor", "max_wall", 1440) myrunning = sum([i.get('running', 0) for i in jinfo.values()], 0) assigned = max(assigned, myrunning) for vo in vos: acbr = 'VO:%s' % vo info = jinfo.get(vo.lower(), {"running": 0, "idle": 0, "held": 0}) ert, wrt = responseTimes(cp, info["running"], info["idle"] + \ info["held"], max_job_time=max_wall*60) free = min(unclaimed, assigned-myrunning, assigned-int(info['running'])) free = int(free) waiting = int(info["idle"]) + int(info["held"]) if waiting > cp_getInt(cp, 'condor', 'idle_slack', '10'): free = 0 info = {"vo" : vo, "acbr" : acbr, "ceUniqueID" : ce_unique_id, "voLocalID" : vo, "ce_name" : ce_name, "job_manager" : 'condor', "queue" : vo, "running" : info["running"], # Held jobs are included as "waiting" since the definition is: # Number of jobs that are in a state different than running "waiting" : waiting, "total" : info["running"] + info["idle"] + info["held"], "free_slots" : free, "job_slots" : int(total_nodes), "ert" : ert, "wrt" : wrt, "default_se" : getDefaultSE(cp), 'app' : cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data" : cp_get(cp, "osg_dirs", "data", "/Unknown"), } printTemplate(VOView, info)
else: log.warning("More assigned nodes (%i) than actual nodes (%i)! Assigning" \ " all slots also to the default group." % (total_assigned, total_nodes)) # NB: If you sum up the GlueCEInfoTotalCPUs for every queue, you will calculate # more slots than the batch system actually has. That seems fair, since the group # quotas are oversubscribed. groupInfo['default'][ 'quota'] = 0 # will get transformed to total_nodes below all_vos = voList(cp) defaultVoList = [i for i in all_vos if i not in all_group_vos] groupInfo['default']['vos'] = defaultVoList #acbr = '\n'.join(['GlueCEAccessControlBaseRule: VO:%s' % i for i in \ # defaultVoList]) #groupInfo['default']['acbr'] = acbr if not groupInfo['default']['vos'] or defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group, ginfo in groupInfo.items(): jinfo = jobs_info.get(group, {}) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) vos = ginfo['vos'] if not isinstance(vos, sets.Set): vos = sets.Set(vos) vos.update(jinfo.keys()) vos.intersection_update(sets.Set(all_vos)) log.debug("CE %s, post-filtering VOs: %s." % (ce_unique_id, ", ".join(\ vos))) if not vos: continue
groupInfo['default']['quota'] = total_nodes-total_assigned else: log.warning("More assigned nodes (%i) than actual nodes (%i)! Assigning" \ " all slots also to the default group." % (total_assigned, total_nodes)) # NB: If you sum up the GlueCEInfoTotalCPUs for every queue, you will calculate # more slots than the batch system actually has. That seems fair, since the group # quotas are oversubscribed. groupInfo['default']['quota'] = 0 # will get transformed to total_nodes below all_vos = voList(cp) defaultVoList = [i for i in all_vos if i not in all_group_vos] groupInfo['default']['vos'] = defaultVoList #acbr = '\n'.join(['GlueCEAccessControlBaseRule: VO:%s' % i for i in \ # defaultVoList]) #groupInfo['default']['acbr'] = acbr if not groupInfo['default']['vos'] or defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group, ginfo in groupInfo.items(): jinfo = jobs_info.get(group, {}) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) vos = ginfo['vos'] if not isinstance(vos, sets.Set): vos = sets.Set(vos) vos.update(jinfo.keys()) vos.intersection_update(sets.Set(all_vos)) log.debug("CE %s, post-filtering VOs: %s." % (ce_unique_id, ", ".join(\ vos))) if not vos: continue