def print_VOViewLocal(cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) vo_queues = getVoQueues(cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") for vo, queue in vo_queues: ce_unique_id = buildCEUniqueID(cp, ce_name, 'sge', queue) info = { 'ceUniqueID' : ce_unique_id, 'voLocalID' : vo, 'acbr' : 'VO:%s' % vo, 'running' : queue_jobs.get(queue, {}).get(vo, {}).\ get('running', 0), 'waiting' : queue_jobs.get(queue, {}).get(vo, {}).\ get('waiting', 0), #'free_slots' : vo.get(queue, {}).get('free_slots', 0), 'free_slots' : 0, #TODO: fix 'ert' : 3600, 'wrt' : 3600, 'default_se' : getDefaultSE(cp), 'app' : cp_get(cp, "osg_dirs", "app", "/OSG_APP_UNKNOWN"), 'data' : cp_get(cp, "osg_dirs", "data", "/OSG_DATA_UNKNOWN"), } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def main(): try: # Load up the site configuration cp = config() se_only = cp_getBoolean(cp, "gip", "se_only", False) if not se_only: # Load up the template for GlueLocationLocalID # To view its contents, see $VDT_LOCATION/gip/templates/GlueCluster template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) osg_grid = cp_get(cp, "osg_dirs", "grid_dir", None) if not osg_grid: raise RuntimeError('grid_dir ($OSG_GRID) not defined!') for subClusterId in getSubClusterIDs(cp): # Dictionary of data to fill in for GlueLocationLocalID info = {'locationId': 'OSG_GRID', 'subClusterId': subClusterId, 'clusterId': cluster_id, 'locationName': 'OSG_GRID', 'version': 1.0, 'path': osg_grid, } # Spit out our template, fill it with the appropriate info. printTemplate(template, info) except Exception, e: # Log error, then report it via stderr. log.error(e) sys.stdout = sys.stderr raise
def main(): try: # Load up the site configuration cp = config() se_only = cp_getBoolean(cp, "gip", "se_only", False) if not se_only: # Load up the template for GlueLocationLocalID # To view its contents, see $VDT_LOCATION/gip/templates/GlueCluster template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) osg_grid = cp_get(cp, "osg_dirs", "grid_dir", None) if not osg_grid: raise RuntimeError('grid_dir ($OSG_GRID) not defined!') for subClusterId in getSubClusterIDs(cp): # Dictionary of data to fill in for GlueLocationLocalID info = { 'locationId': 'OSG_GRID', 'subClusterId': subClusterId, 'clusterId': cluster_id, 'locationName': 'OSG_GRID', 'version': 1.0, 'path': osg_grid, } # Spit out our template, fill it with the appropriate info. printTemplate(template, info) except Exception, e: # Log error, then report it via stderr. log.error(e) sys.stdout = sys.stderr raise
def print_single_SA(info, se, cp): #pylint: disable-msg=W0613 """ Print out the GLUE for a single SA. """ se_unique_id = se.getUniqueID() saTemplate = getTemplate("GlueSE", "GlueSALocalID") info.setdefault('seUniqueID', se_unique_id) info.setdefault('saLocalID', 'UNKNOWN_SA') info.setdefault('root', '/') info.setdefault('path', '/UNKNOWN') info.setdefault('filetype', 'permanent') info.setdefault('saName', info['saLocalID']) info.setdefault('installedOnlineCapacity', info.get('totalOnline', 0)) info.setdefault('installedNearlineCapacity', info.get('totalNearline', 0)) info.setdefault('totalOnline', 0) info.setdefault('usedOnline', 0) info.setdefault('freeOnline', 0) info.setdefault('reservedOnline', 0) info.setdefault('allocatedOnline', 0) info.setdefault('totalNearline', 0) info.setdefault('usedNearline', 0) info.setdefault('freeNearline', 0) info.setdefault('reservedNearline', 0) info.setdefault('retention', 'replica') info.setdefault('accessLatency', 'online') info.setdefault('expiration', 'neverExpire') info.setdefault('availableSpace', 0) info.setdefault('usedSpace', 0) printTemplate(saTemplate, info)
def print_clusters(cp): cluster_name = cp_get(cp, "cluster", "name", None) if not cluster_name: cluster_name = cp_get(cp, "ce", "hosting_cluster", None) if not cluster_name: cluster_name = cp_get(cp, "ce", "unique_name", None) if not cluster_name: getClusterName(cp) # raise Exception("Could not determine cluster name.") # clusterUniqueID = cp_get(cp, 'ce', 'unique_name', cluster_name) clusterUniqueID = getClusterID(cp) siteUniqueID = cp_get(cp, "site", "unique_name", "UNKNOWN_SITE") extraCEs = cp_get(cp, "cluster", "other_ces", []) if extraCEs: extraCEs = [x.strip() for x in extraCEs.split(",")] ces = getCEList(cp, extraCEs) glueClusters = "" for ce in ces: glueClusters += "GlueForeignKey: GlueCEUniqueID=%s\n" % ce bdii = cp_get(cp, "gip", "bdii", "ldap://is.grid.iu.edu:2170") info = { "cluster": cluster_name, "clusterUniqueID": clusterUniqueID, "tmp": cp_get(cp, "osg_dirs", "tmp", cp_get(cp, "osg_dirs", "data", "/tmp")), "wn_tmp": cp_get(cp, "osg_dirs", "wn_tmp", "/tmp"), "siteUniqueID": siteUniqueID, "glueClusters": glueClusters, "bdii": bdii, } template = getTemplate("GlueCluster", "GlueClusterUniqueID") printTemplate(template, info)
def main(): try: # Load up the site configuration cp = config() se_only = cp_getBoolean(cp, "gip", "se_only", False) if not se_only: # Get the timestamp in the two formats we wanted epoch = str(time.time()) now = time.strftime("%a %b %d %T UTC %Y", time.gmtime()) # Load up the template for GlueLocationLocalID # To view its contents, see $VDT_LOCATION/gip/templates/GlueCluster template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) for subClusterId in getSubClusterIDs(cp): # Dictionary of data to fill in for GlueLocationLocalID info = { 'locationId': 'TIMESTAMP', 'subClusterId': subClusterId, 'clusterId': cluster_id, 'locationName': 'TIMESTAMP', 'version': epoch, 'path': now, } # Spit out our template, fill it with the appropriate info. printTemplate(template, info) except Exception, e: # Log error, then report it via stderr. log.exception(e) sys.stdout = sys.stderr raise
def print_clusters(cp): cluster_name = cp_get(cp, 'cluster', 'name', None) if not cluster_name: cluster_name = cp_get(cp, 'ce', 'hosting_cluster', None) if not cluster_name: cluster_name = cp_get(cp, 'ce', 'unique_name', None) if not cluster_name: getClusterName(cp) #raise Exception("Could not determine cluster name.") #clusterUniqueID = cp_get(cp, 'ce', 'unique_name', cluster_name) clusterUniqueID = getClusterID(cp) siteUniqueID = cp_get(cp, "site", "unique_name", 'UNKNOWN_SITE') extraCEs = cp_get(cp, 'cluster', 'other_ces', []) if extraCEs: extraCEs = [x.strip() for x in extraCEs.split(',')] ces = getCEList(cp, extraCEs) glueClusters = '' for ce in ces: glueClusters += 'GlueForeignKey: GlueCEUniqueID=%s\n' % ce bdii = cp_get(cp, 'gip', 'bdii', 'ldap://is.grid.iu.edu:2170') info = { \ 'cluster': cluster_name, 'clusterUniqueID': clusterUniqueID, 'tmp': cp_get(cp, "osg_dirs", "tmp", cp_get(cp, "osg_dirs", "data", \ "/tmp")), 'wn_tmp': cp_get(cp, "osg_dirs", "wn_tmp", "/tmp"), 'siteUniqueID': siteUniqueID, 'glueClusters': glueClusters, 'bdii': bdii, } template = getTemplate("GlueCluster", "GlueClusterUniqueID") printTemplate(template, info)
def main(): try: # Load up the site configuration cp = config() se_only = cp_getBoolean(cp, "gip", "se_only", False) if not se_only: # Get the timestamp in the two formats we wanted epoch = str(time.time()) now = time.strftime("%a %b %d %T UTC %Y", time.gmtime()) # Load up the template for GlueLocationLocalID # To view its contents, see $VDT_LOCATION/gip/templates/GlueCluster template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) for subClusterId in getSubClusterIDs(cp): # Dictionary of data to fill in for GlueLocationLocalID info = {'locationId': 'TIMESTAMP', 'subClusterId': subClusterId, 'clusterId': cluster_id, 'locationName': 'TIMESTAMP', 'version': epoch, 'path': now, } # Spit out our template, fill it with the appropriate info. printTemplate(template, info) except Exception, e: # Log error, then report it via stderr. log.exception(e) sys.stdout = sys.stderr raise
def print_VOViewLocal(queue_info, cp): """ Print out the VOView objects for the LSF batch system. One VOView per VO per queue, for each VO which has access to the queue. """ ce_name = cp.get(ce, "name") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(queue_info, cp) for vo, queue in vo_queues: vo = vo.lower() vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) ce_unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) my_queue_info = queue_info.setdefault(queue, {}) if cp.has_option("lsf", "max_wall"): my_queue_info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in my_queue_info: my_queue_info["max_wall"] = 1440 ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("waiting", 0), max_job_time=my_queue_info.get("max_wall", 0)) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('waiting', 0) if waiting > cp_getInt(cp, 'lsf', 'idle_slack', '10'): free_slots = 0 info = { 'ceUniqueID': ce_unique_id, 'job_slots': my_queue_info.get('job_slots', 0), 'free_slots': free_slots, 'ce_name': ce_name, 'queue': queue, 'vo': vo, 'voLocalID': vo, 'job_manager': 'lsf', 'running': info2.get('running', 0), 'max_running': info2.get('max_running', 0), 'priority': queue_info.get(queue, {}).get('priority', 0), 'waiting': waiting, 'data': cp.get("osg_dirs", "data"), 'app': cp.get("osg_dirs", "app"), 'default_se': getDefaultSE(cp), 'ert': ert, 'wrt': wrt, 'acbr': 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def print_SE(se, cp): """ Emit the GLUE entities for the SE, based upon the StorageElement class. """ # if the unique ID is UNKNOWN, a real SE does not exist, the classic SE # will probably be invoked seUniqueID = se.getUniqueID() if seUniqueID == "UNKNOWN" or seUniqueID == "UNAVAILABLE": return status = se.getStatus() version = se.getVersion() # Determine space information try: used, available, total = se.getSESpace(total=True, gb=True) except: used, available, total = 0, 0, 0 # Tape information, if we have it... nu, _, nt = se.getSETape() siteUniqueID = cp.get("site", "unique_name") implementation = se.getImplementation() # Try to guess the appropriate architecture arch = se.getSEArch() # port number was hard coded to 8443, get from cp now # NOTE: this field is deprecated by the schema so it should not be used port = se.getPort() # Fill in the information for the template info = { 'seName': se.getName(), 'seUniqueID': se.getUniqueID(), 'implementation': implementation, "version": version, "status": status, "port": port, "onlineTotal": total, "nearlineTotal": nt, "onlineUsed": used, "nearlineUsed": nu, "architecture": arch, "free": available, "total": total, "bdii": cp_get(cp, "bdii", "endpoint", "Unknown"), "siteUniqueID": siteUniqueID, "arch": arch, } seTemplate = getTemplate("GlueSE", "GlueSEUniqueID") log.info(str(info)) printTemplate(seTemplate, info) try: print_SA(se, cp, se.getSection()) except Exception, e: log.exception(e)
def print_Locations(cp): template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) for subClusterId in getSubClusterIDs(cp): for entry in getApplications(cp): entry['subClusterId'] = subClusterId entry['clusterId'] = cluster_id printTemplate(template, entry)
def print_SE(se, cp): """ Emit the GLUE entities for the SE, based upon the StorageElement class. """ # if the unique ID is UNKNOWN, a real SE does not exist, the classic SE # will probably be invoked seUniqueID = se.getUniqueID() if seUniqueID == "UNKNOWN" or seUniqueID == "UNAVAILABLE": return status = se.getStatus() version = se.getVersion() # Determine space information try: used, available, total = se.getSESpace(total=True, gb=True) except: used, available, total = 0, 0, 0 # Tape information, if we have it... nu, _, nt = se.getSETape() siteUniqueID = cp.get("site", "unique_name") implementation = se.getImplementation() # Try to guess the appropriate architecture arch = se.getSEArch() # port number was hard coded to 8443, get from cp now # NOTE: this field is deprecated by the schema so it should not be used port = se.getPort() # Fill in the information for the template info = { 'seName' : se.getName(), 'seUniqueID' : se.getUniqueID(), 'implementation' : implementation, "version" : version, "status" : status, "port" : port, "onlineTotal" : total, "nearlineTotal" : nt, "onlineUsed" : used, "nearlineUsed" : nu, "architecture" : arch, "free" : available, "total" : total, "bdii" : cp_get(cp, "bdii", "endpoint", "Unknown"), "siteUniqueID" : siteUniqueID, "arch" : arch, } seTemplate = getTemplate("GlueSE", "GlueSEUniqueID") log.info(str(info)) printTemplate(seTemplate, info) try: print_SA(se, cp, se.getSection()) except Exception, e: log.exception(e)
def print_VOViewLocal(queue_info, cp): """ Print out the VOView objects for the LSF batch system. One VOView per VO per queue, for each VO which has access to the queue. """ ce_name = cp.get(ce, "name") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(queue_info, cp) for vo, queue in vo_queues: vo = vo.lower() vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) ce_unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) my_queue_info = queue_info.setdefault(queue, {}) if cp.has_option("lsf", "max_wall"): my_queue_info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in my_queue_info: my_queue_info["max_wall"] = 1440 ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("waiting", 0), max_job_time=my_queue_info.get("max_wall", 0)) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('waiting', 0) if waiting > cp_getInt(cp, 'lsf', 'idle_slack', '10'): free_slots = 0 info = { 'ceUniqueID' : ce_unique_id, 'job_slots' : my_queue_info.get('job_slots', 0), 'free_slots' : free_slots, 'ce_name' : ce_name, 'queue' : queue, 'vo' : vo, 'voLocalID' : vo, 'job_manager' : 'lsf', 'running' : info2.get('running', 0), 'max_running' : info2.get('max_running', 0), 'priority' : queue_info.get(queue, {}).get('priority', 0), 'waiting' : waiting, 'data' : cp.get("osg_dirs", "data"), 'app' : cp.get("osg_dirs", "app"), 'default_se' : getDefaultSE(cp), 'ert' : ert, 'wrt' : wrt, 'acbr' : 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def publish_gums(cp, template): hostname = cp_get(cp, "ce", 'name', gethostname()) siteID = cp_get(cp, "site", "unique_name", gethostname()) gumsConfig = vdtDir(os.path.expandvars('$VDT_LOCATION/gums/config/gums-client.properties'), '/etc/gums/gums-client.properties') gumsConfigFile = open(gumsConfig, 'r') gums_re = re.compile('gums.authz\s*=\s*(https://(.*):.*?/(.*))') lines = gumsConfigFile.readlines() for line in lines: m = gums_re.match(line) if m: (gums_uri, gums_host) = m.groups()[0:2] os.putenv('X509_USER_CERT', '/etc/grid-security/http/httpcert.pem') os.putenv('X509_USER_KEY' , '/etc/grid-security/http/httpkey.pem') mapping_subject_dn = '/GIP-GUMS-Probe-Identity' mapping_subject_name = '`grid-cert-info -subject` ' gums_command = vdtDir(os.path.expandvars('$VDT_LOCATION/gums/scripts/gums-service'), '/usr/bin/gums-service') gums_command += ' mapUser -s ' + mapping_subject_name + mapping_subject_dn (gums_output, pin) = popen2.popen4(gums_command) gums_id_re = re.compile('.*\[userName: (.*)\].*') status = "Warning" statusInfo = "Test mapping failed: if GUMS was not down, check logs" +\ " at " + gums_host + ':' + '$VDT_LOCATION/tomcat/v55/logs' lines = gums_output.readlines() for line in lines: m = gums_id_re.match(line) if m: uidMapping = m.groups([0]) status = "OK" statusInfo = "Test mapping successful: user id = %s" % uidMapping break info = {'serviceID': gums_uri, 'serviceType': 'GUMS', 'serviceName': 'Authorization', 'version': 'UNDEFINED', 'endpoint': gums_uri, 'semantics': 'UNDEFINED', 'owner': '', 'url': gums_uri, 'uri': gums_uri, 'status': status, 'statusInfo': statusInfo, 'wsdl': 'Not Applicable', 'startTime': 'Not Applicable', 'siteID': siteID, 'acbr': '__GIP_DELETEME' } printTemplate(template, info)
def main(): log.info('Starting CREAM service provider') try: cp = config() serviceID = buildServiceID(cp) siteID = cp_get(cp, "site", "unique_name", 'UNKNOWN_SITE') serviceName = '%s-CREAM' % siteID creamVersion = getCreamVersion(cp) endpoint = 'https://%s:8443/ce-cream/services' % cp_get(cp, "ce", "name", 'UNKNOWN_CE') allVOs = voList(cp) acbr = '' owner = '' log.debug('CREAM VOs are %s' % allVOs) if not allVOs: log.error("No VOs supported!") acbr = '__GIP_DELETEME' else: acbr = '\n'.join(['GlueServiceAccessControlBaseRule: %s\n' \ 'GlueServiceAccessControlBaseRule: VO:%s' % (vo, vo) for vo in allVOs]) owner = '\n' + '\n'.join(['GlueServiceOwner: %s' % vo for vo in allVOs]) # owner needs an extra prepended newline pid = -1 startTime = 'Not Applicable' serviceStatus = 'Not OK' serviceStatusInfo = 'Could not find tomcat process' try: (startTime, pid) = getStartTimeAndPid(cp) serviceStatus = 'OK' serviceStatusInfo = 'Tomcat (%d) is running' % pid except: log.error('Could not locate tomcat process (pgrep -f "org.apache.catalina.startup.Bootstrap start"' ' probably failed to return any output!)') info = {'serviceID': serviceID, 'serviceType': 'org.glite.ce.CREAM', 'serviceName': serviceName, 'version': creamVersion, 'endpoint': endpoint, 'semantics': 'https://edms.cern.ch/document/595770', 'owner': owner, 'url': '__GIP_DELETEME', # deprecated 'uri': '__GIP_DELETEME', # deprecated 'status': serviceStatus, 'statusInfo': serviceStatusInfo, 'wsdl': 'http://grid.pd.infn.it/cream/wsdl/org.glite.ce-cream_service.wsdl', 'startTime': startTime, 'siteID': siteID, 'acbr': acbr } template = getTemplate("GlueService", "GlueServiceUniqueID") printTemplate(template, info) except Exception, e: sys.stdout = sys.stderr log.error(e) raise
def print_single_VOInfo(voinfo, se, cp): #pylint: disable-msg=W0613 """ Emit the GLUE entity for a single VOInfo dictionary. """ voinfoTemplate = getTemplate('GlueSE', 'GlueVOInfoLocalID') voinfo.setdefault('acbr', 'GlueVOInfoAccessControlBaseRule: UNKNOWN') voinfo.setdefault('path', '/UNKNOWN') voinfo.setdefault('tag', '__GIP_DELETEME') voinfo.setdefault('seUniqueID', se.getUniqueID()) printTemplate(voinfoTemplate, voinfo)
def print_CESEBind(cp): group_template = getTemplate("GlueCESEBind", "GlueCESEBindGroupCEUniqueID") se_template = getTemplate("GlueCESEBind", "GlueCESEBindSEUniqueID") bind_info = getCESEBindInfo(cp) cegroups = {} for info in bind_info: printTemplate(se_template, info) ses = cegroups.setdefault(info['ceUniqueID'], sets.Set()) ses.add(info['seUniqueID']) for ce, ses in cegroups.items(): ses = '\n'.join(['GlueCESEBindGroupSEUniqueID: %s' % i for i in ses]) info = {'ceUniqueID': ce, 'se_groups': ses} printTemplate(group_template, info)
def print_subclusters(cp): subclusters = gip_cluster.generateSubClusters(cp) template = getTemplate("GlueCluster", "GlueSubClusterUniqueID") for subcluster_info in subclusters: if 'hepspec' in subcluster_info and subcluster_info['hepspec']: desc = 'GlueHostProcessorOtherDescription: ' \ 'Cores=%s, Benchmark=%s-HEP-SPEC06' % \ (str(subcluster_info['cores']), str(subcluster_info['hepspec'])) else: desc = 'GlueHostProcessorOtherDescription: Cores=%s' % \ str(subcluster_info['cores']) subcluster_info['otherDesc'] = desc printTemplate(template, subcluster_info)
def print_subclusters(cp): subclusters = gip_cluster.generateSubClusters(cp) template = getTemplate("GlueCluster", "GlueSubClusterUniqueID") for subcluster_info in subclusters: if "hepspec" in subcluster_info and subcluster_info["hepspec"]: desc = "GlueHostProcessorOtherDescription: " "Cores=%s, Benchmark=%s-HEP-SPEC06" % ( str(subcluster_info["cores"]), str(subcluster_info["hepspec"]), ) else: desc = "GlueHostProcessorOtherDescription: Cores=%s" % str(subcluster_info["cores"]) subcluster_info["otherDesc"] = desc printTemplate(template, subcluster_info)
def print_VOViewLocal(queue_info, cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(cp) for vo, queue in vo_queues: vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) port = getPort(cp) ce_unique_id = buildCEUniqueID(cp, ce_name, 'pbs', queue) my_queue_info = queue_info.setdefault(queue, {}) max_job_time = my_queue_info.get("max_wall", 0) if cp.has_option("pbs", "max_wall"): max_job_time = cp_getInt(cp, "pbs", "max_wall", 1440) ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("wait", 0), max_job_time) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('wait', 0) if waiting > cp_getInt(cp, 'pbs', 'idle_slack', '10'): free_slots = 0 info = { 'ceUniqueID' : ce_unique_id, 'job_slots' : my_queue_info.get('job_slots', 0), 'free_slots' : free_slots, 'ce_name' : ce_name, 'queue' : queue, 'vo' : vo, 'voLocalID' : vo, 'job_manager' : 'pbs', 'running' : info2.get('running', 0), 'max_running' : info2.get('max_running', 0), 'priority' : queue_info.get(queue, {}).get('priority', 0), 'waiting' : waiting, 'data' : cp_get(cp, "osg_dirs", "data", "UNKNOWN_DATA"), 'app' : cp_get(cp, "osg_dirs", "app", "UNKNOWN_APP"), 'default_se' : getDefaultSE(cp), 'ert' : 3600, 'wrt' : 3600, 'acbr' : 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def print_single_SRM(info, se, cp): """ Print out the GLUE service and CP entities for a single SRM dictionary. """ sitename = cp.get("site", "unique_name") sename = se.getUniqueID() version = info.setdefault('version', '2.2.0') info.setdefault('siteID', sitename) info.setdefault('seUniqueID', sename) info.setdefault('startTime', '1970-01-01T00:00:00Z') info.setdefault('statusInfo', 'OK') endpoint = info.get('endpoint', 'httpg://example.org:8443/srm/managerv2') # Filter endpoint to make it acceptable! endpoint.replace('srm://', 'httpg://') sfn_loc = endpoint.find('?SFN=') if sfn_loc >= 0: endpoint = endpoint[:sfn_loc] info['protocolType'] = 'SRM' info['serviceType'] = 'SRM' info['capability'] = 'control' info['semantics'] = 'UNDEFINED' info['owner'] = '' if version.find('2') >= 0: info['version'] = "2.2.0" info['endpoint'] = endpoint info['serviceID'] = endpoint info['uri'] = endpoint info['url'] = endpoint info['serviceName'] = endpoint info["wsdl"] = "http://sdm.lbl.gov/srm-wg/srm.v2.2.wsdl" info["semantics"] = "http://sdm.lbl.gov/srm-wg/doc/SRM.v2.2.pdf" # Bugfix: Make the control protocol unique ID unique between the SRM # versions info['cpLocalID'] = info.get('name', sename) + '_srmv2' else: info['version'] = '1.1.0' info['endpoint'] = endpoint info['serviceID'] = endpoint info['uri'] = endpoint info['url'] = endpoint info['serviceName'] = endpoint info["wsdl"] = "http://sdm.lbl.gov/srm-wg/srm.v1.1.wsdl" info["semantics"] = "http://sdm.lbl.gov/srm-wg/srm.v1.1.wsdl" info['cpLocalID'] = info.get('name', sename) + '_srmv1' ServiceTemplate = getTemplate("GlueService", "GlueServiceUniqueID") ControlTemplate = getTemplate("GlueSE", "GlueSEControlProtocolLocalID") printTemplate(ControlTemplate, info) printTemplate(ServiceTemplate, info)
def main(): try: # Load up the site configuration cp = config() se_only = cp_getBoolean(cp, "gip", "se_only", False) if not se_only and 'VDT_LOCATION' in os.environ: # get the VDT version vdt_version_cmd = os.path.expandvars( "$VDT_LOCATION/vdt/bin/") + 'vdt-version --no-wget' vdt_version_out = runCommand(vdt_version_cmd).readlines() gip_re = re.compile('Generic Information Provider\s+(.*?)\s*-.*') gip_version = 'UNKNOWN' for line in vdt_version_out: m = gip_re.match(line) if m: gip_version = m.groups()[0] break gip_version += '; $Revision$' # Get the timestamp in the two formats we wanted now = time.strftime("%a %b %d %T UTC %Y", time.gmtime()) # Load up the template for GlueLocationLocalID # To view its contents, see $VDT_LOCATION/gip/templates/GlueCluster template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) for subClusterId in getSubClusterIDs(cp): # Dictionary of data to fill in for GlueLocationLocalID info = { 'locationId': 'GIP_VERSION', 'subClusterId': subClusterId, 'clusterId': cluster_id, 'locationName': 'GIP_VERSION', 'version': gip_version, 'path': now, } # Spit out our template, fill it with the appropriate info. printTemplate(template, info) except Exception, e: # Log error, then report it via stderr. log.exception(e) sys.stdout = sys.stderr raise
def print_VOViewLocal(queue_info, cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(cp) for vo, queue in vo_queues: vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) port = getPort(cp) ce_unique_id = buildCEUniqueID(cp, ce_name, "pbs", queue) my_queue_info = queue_info.setdefault(queue, {}) max_job_time = my_queue_info.get("max_wall", 0) if cp.has_option("pbs", "max_wall"): max_job_time = cp_getInt(cp, "pbs", "max_wall", 1440) ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("wait", 0), max_job_time) free_slots = my_queue_info.get("free_slots", 0) waiting = info2.get("wait", 0) if waiting > cp_getInt(cp, "pbs", "idle_slack", "10"): free_slots = 0 info = { "ceUniqueID": ce_unique_id, "job_slots": my_queue_info.get("job_slots", 0), "free_slots": free_slots, "ce_name": ce_name, "queue": queue, "vo": vo, "voLocalID": vo, "job_manager": "pbs", "running": info2.get("running", 0), "max_running": info2.get("max_running", 0), "priority": queue_info.get(queue, {}).get("priority", 0), "waiting": waiting, "data": cp_get(cp, "osg_dirs", "data", "UNKNOWN_DATA"), "app": cp_get(cp, "osg_dirs", "app", "UNKNOWN_APP"), "default_se": getDefaultSE(cp), "ert": 3600, "wrt": 3600, "acbr": "VO:%s" % vo, } info["total"] = info["waiting"] + info["running"] printTemplate(VOView, info)
def main(): try: # Load up the site configuration cp = config() se_only = cp_getBoolean(cp, "gip", "se_only", False) if not se_only and 'VDT_LOCATION' in os.environ: # get the VDT version vdt_version_cmd = os.path.expandvars("$VDT_LOCATION/vdt/bin/") + 'vdt-version --no-wget' vdt_version_out = runCommand(vdt_version_cmd).readlines() gip_re = re.compile('Generic Information Provider\s+(.*?)\s*-.*') gip_version = 'UNKNOWN' for line in vdt_version_out: m = gip_re.match(line) if m: gip_version = m.groups()[0] break gip_version += '; $Revision$' # Get the timestamp in the two formats we wanted now = time.strftime("%a %b %d %T UTC %Y", time.gmtime()) # Load up the template for GlueLocationLocalID # To view its contents, see $VDT_LOCATION/gip/templates/GlueCluster template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) for subClusterId in getSubClusterIDs(cp): # Dictionary of data to fill in for GlueLocationLocalID info = {'locationId': 'GIP_VERSION', 'subClusterId': subClusterId, 'clusterId': cluster_id, 'locationName': 'GIP_VERSION', 'version': gip_version, 'path': now, } # Spit out our template, fill it with the appropriate info. printTemplate(template, info) except Exception, e: # Log error, then report it via stderr. log.exception(e) sys.stdout = sys.stderr raise
def print_VOViewLocal(queue_info, cp): ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") vo_map = VoMapper(cp) queue_jobs = getJobsInfo(vo_map, cp) VOView = getTemplate("GlueCE", "GlueVOViewLocalID") vo_queues = getVoQueues(cp) for vo, queue in vo_queues: vo_info = queue_jobs.get(queue, {}) info2 = vo_info.get(vo, {}) port = getPort(cp) ce_unique_id = buildCEUniqueID(cp, ce_name, 'pbs', queue) my_queue_info = queue_info.setdefault(queue, {}) ert, wrt = responseTimes(cp, info2.get("running", 0), info2.get("wait", 0), max_job_time=my_queue_info.get("max_wall", 0)) free_slots = my_queue_info.get('free_slots', 0) waiting = info2.get('wait', 0) if waiting > 0: free_slots = 0 info = { 'ceUniqueID' : ce_unique_id, 'job_slots' : my_queue_info.get('job_slots', 0), 'free_slots' : free_slots, 'ce_name' : ce_name, 'queue' : queue, 'vo' : vo, 'voLocalID' : vo, 'job_manager' : 'pbs', 'running' : info2.get('running', 0), 'max_running' : info2.get('max_running', 0), 'priority' : queue_info.get(queue, {}).get('priority', 0), 'waiting' : waiting, 'data' : cp_get(cp, "osg_dirs", "data", "UNKNOWN_DATA"), 'app' : cp_get(cp, "osg_dirs", "app", "UNKNOWN_APP"), 'default_se' : getDefaultSE(cp), 'ert' : 3600, 'wrt' : 3600, 'acbr' : 'VO:%s' % vo } info['total'] = info['waiting'] + info['running'] printTemplate(VOView, info)
def publish_gridmap_file(cp, template): hostname = cp_get(cp, "ce", 'name', gethostname()) siteID = cp_get(cp, "site", "unique_name", gethostname()) info = {'serviceID': '%s:gridmap-file' % hostname, 'serviceType': 'gridmap-file', 'serviceName': 'Authorization', 'version': 'UNDEFINED', 'endpoint': 'Not Applicable', 'semantics': 'UNDEFINED', 'owner': '', 'url': 'localhost://etc/grid-security/gridmap-file', 'uri': 'localhost://etc/grid-security/gridmap-file', 'status': 'OK', 'statusInfo': 'Node is configured to use gridmap-file. ' + 'Did not check if gridmap-file is properly configured.', 'wsdl': 'Not Applicable', 'startTime': 'Not Applicable', 'siteID': siteID, 'acbr': '__GIP_DELETEME' } # Spit out our template, fill it with the appropriate info. printTemplate(template, info)
def main(): try: # Load up the site configuration cp = config() se_only = cp_getBoolean(cp, "gip", "se_only", False) if not se_only and 'VDT_LOCATION' in os.environ: # get the VDT version vdt_version_cmd = os.path.expandvars("$VDT_LOCATION/vdt/bin/") + 'vdt-version --brief' vdt_version = runCommand(vdt_version_cmd).readlines()[0].strip() if (vdt_version == ""): vdt_version = "OLD_VDT" # Get the timestamp in the two formats we wanted now = time.strftime("%a %b %d %T UTC %Y", time.gmtime()) # Load up the template for GlueLocationLocalID # To view its contents, see $VDT_LOCATION/gip/templates/GlueCluster template = getTemplate("GlueCluster", "GlueLocationLocalID") cluster_id = getClusterID(cp) for subClusterId in getSubClusterIDs(cp): # Dictionary of data to fill in for GlueLocationLocalID info = {'locationId': 'VDT_VERSION', 'subClusterId': subClusterId, 'clusterId': cluster_id, 'locationName': 'VDT_VERSION', 'version': vdt_version, 'path': now, } # Spit out our template, fill it with the appropriate info. printTemplate(template, info) except Exception, e: # Log error, then report it via stderr. log.exception(e) sys.stdout = sys.stderr raise
def print_CE(cp): """ Print out the GlueCE objects for LSF; one GlueCE per grid queue. """ try: lsfVersion = getLrmsInfo(cp) except: lsfVersion = 'Unknown' log.debug('Using LSF version %s' % lsfVersion) queueInfo = getQueueInfo(cp) try: totalCpu, freeCpu, queueCpus = parseNodes(queueInfo, cp) except: #raise totalCpu, freeCpu, queueCpus = 0, 0, {} log.debug('Total, Free CPU: (%s, %s)' % (totalCpu, freeCpu)) ce_name = cp.get(ce, "name") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp.get("lsf", \ "queue_exclude").split(',')] except: excludeQueues = [] vo_queues = getVoQueues(queueInfo, cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue log.debug('Processing queue %s' % queue) if 'running' not in info: info['running'] = 0 if 'status' not in info: # There really should be an unknown status... info['status'] = 'Closed' if 'total' not in info: info['total'] = 0 info["lrmsVersion"] = lsfVersion info["job_manager"] = "lsf" if int(info.get("wait", 0)) > 0: info["free_slots"] = 0 else: if queue in queueCpus and 'max' in queueCpus[queue] and 'njobs' in queueCpus[queue]: info["free_slots"] = queueCpus[queue]['max'] - queueCpus[queue]['njobs'] else: info["free_slots"] = freeCpu info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) info['ceUniqueID'] = unique_id if "job_slots" not in info: if queue in queueCpus and 'max' in queueCpus[queue]: log.debug('queue %s, info is %s' % (queue, queueCpus[queue])) info['job_slots'] = queueCpus[queue]['max'] else: info["job_slots"] = totalCpu if "priority" not in info: info["priority"] = 0 if "max_running" not in info: info["max_running"] = info["job_slots"] elif not info['max_running'] or info['max_running'] == '-': info['max_running'] = 999999 if cp.has_option("lsf", "max_wall"): info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in info: info["max_wall"] = 1440 info["max_wall"] = int(info["max_wall"]) # glue proscribes ints info["job_slots"] = min(totalCpu, info["job_slots"]) ert, wrt = responseTimes(cp, info["running"], info["wait"], max_job_time=info["max_wall"]) contact_string = buildContactString(cp, 'lsf', queue, unique_id, log) ceImpl, ceImplVersion = getCEImpl(cp) info['ert'] = ert info['wrt'] = wrt info['hostingCluster'] = cp_get(cp, ce, 'hosting_cluster', ce_name) info['hostName'] = cp_get(cp, ce, 'host_name', ce_name) info['ceImpl'] = ceImpl info['ceImplVersion'] = ceImplVersion info['contact_string'] = contact_string info['app_dir'] = cp.get('osg_dirs', 'app') info['data_dir'] = cp.get('osg_dirs', 'data') info['default_se'] = getDefaultSE(cp) info['max_waiting'] = 999999 #info['max_total'] = info['max_running'] info['max_total'] = info['max_waiting'] + info['max_running'] info['assigned'] = info['job_slots'] info['lrmsType'] = 'lsf' info['preemption'] = str(cp_getInt(cp, 'lsf', 'preemption', '0')) acbr = '' for vo, queue2 in vo_queues: if queue == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo.lower() if not acbr: continue #print info info['acbr'] = acbr[:-1] info['bdii'] = cp.get('bdii', 'endpoint') gramVersion = getGramVersion(cp) port = getPort(cp) info['gramVersion'] = gramVersion info['port'] = port info['waiting'] = info.get('wait', 0) info['referenceSI00'] = gip_cluster.getReferenceSI00(cp) info['clusterUniqueID'] = getClusterID(cp) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'lsf', queue, log) info['max_slots'] = maxSlots info['htpc'] = htpcRSL if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' info['extraCapabilities'] = extraCapabilities printTemplate(CE, info) return queueInfo, totalCpu, freeCpu, queueCpus
"version" : version, "status" : status, "port" : 2811, "onlineTotal" : 0, "nearlineTotal" : nt, "onlineUsed" : used, "nearlineUsed" : nu, "architecture" : arch, "free" : available, "total" : total, "bdii" : cp_get(cp, "bdii", "endpoint", "Unknown"), "siteUniqueID" : siteUniqueID, "arch" : arch, } seTemplate = getTemplate("GlueSE", "GlueSEUniqueID") printTemplate(seTemplate, info) vos = voListStorage(cp) try: used, available, total = getClassicSESpace(cp, total=True) except Exception, e: used = 0 available = 0 total = 0 acbr = [] for vo in vos: acbr.append("GlueSAAccessControlBaseRule: VO:%s" % vo) acbr = '\n'.join(acbr) path = cp_get(cp, "osg_dirs", "data", "/UNKNOWN") info = {"saLocalID" : seUniqueID, "seUniqueID" : seUniqueID,
def main(): cp = gip_common.config() myosg_url = gip_common.cp_get(cp, "MyOSG", "url", default_url) MyOSG = myosg.MyOSG() MyOSG.query(myosg_url) domainTemplate = gip_common.getTemplate("Glue2Site", "GLUE2DomainID") groupTemplate = gip_common.getTemplate("Glue2Site", "GLUE2GroupID") locationTemplate = gip_common.getTemplate("Glue2Site", "GLUE2LocationID") storageServiceTemplate = gip_common.getTemplate("Glue2Storage", "GLUE2ServiceID") shareTemplate = gip_common.getTemplate("Glue2Storage", "GLUE2ShareID") srmTemplate = gip_common.getTemplate("Glue2Storage", "GLUE2EndpointID") srmPolicyTemplate = gip_common.getTemplate("Glue2Storage", "GLUE2PolicyID") srmServiceTemplate = gip_common.getTemplate("Glue2Service", "GLUE2ServiceID") srmServiceEndpointTemplate = gip_common.getTemplate("Glue2Service", "GLUE2EndpointID") srmServicePolicyTemplate = gip_common.getTemplate("Glue2Service", "GLUE2PolicyID") for resource_group in MyOSG.getResourceGroups(): found_srm = False for resource in resource_group["resources"]: if 'SRMv2' in resource['services'].keys(): resource_info = formatStorage(resource, resource_group) found_srm = True gip_common.printTemplate(storageServiceTemplate, resource_info) for vo in resource['ownership']: if vo == "(Other)": continue vo = vo.lower() vo_info = formatVOShare(vo, resource, resource_group) gip_common.printTemplate(shareTemplate, vo_info) srm_info = formatSRMPolicy(resource['services']['SRMv2'], resource, resource_group) gip_common.printTemplate(srmPolicyTemplate, srm_info) gip_common.printTemplate(srmTemplate, srm_info) gip_common.printTemplate(srmServiceTemplate, srm_info) gip_common.printTemplate(srmServiceEndpointTemplate, srm_info) gip_common.printTemplate(srmServicePolicyTemplate, srm_info) if found_srm: # Only print these once. gip_common.printTemplate(domainTemplate, resource_info) gip_common.printTemplate(groupTemplate, resource_info)
def print_CE(cp): SGEVersion = getLrmsInfo(cp) queueInfo, _ = getQueueInfo(cp) ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") ce_template = getTemplate("GlueCE", "GlueCEUniqueID") queueList = getQueueList(cp) vo_queues = getVoQueues(cp) default_max_waiting = 999999 for queue in queueInfo.values(): if 'name' not in queue or queue['name'] not in queueList: continue if queue['name'] == 'waiting': continue unique_id = buildCEUniqueID(cp, ce_name, 'sge', queue['name']) acbr = '' for vo, queue2 in vo_queues: if queue['name'] == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo referenceSI00 = gip_cluster.getReferenceSI00(cp) contact_string = buildContactString(cp, 'sge', queue['name'], unique_id, log) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'sge', queue, log) if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' gramVersion = getGramVersion(cp) port = getPort(cp) ceImpl, ceImplVersion = getCEImpl(cp) max_wall = queue["max_wall"] if cp.has_option("sge", "max_wall"): max_wall = cp_getInt(cp, "sge", "max_wall", 1440) info = { \ "ceUniqueID" : unique_id, "ceName" : ce_name, "ceImpl" : ceImpl, "ceImplVersion" : ceImplVersion, "clusterUniqueID" : getClusterID(cp), "queue" : queue['name'], "priority" : queue['priority'], "lrmsType" : 'sge', "lrmsVersion" : SGEVersion, "job_manager" : "sge", "job_slots" : queue["slots_total"], "free_slots" : queue["slots_free"], "running" : queue["slots_used"], "status" : queue['status'], "total" : queue['slots_used'] + queue['waiting'], "ert" : 3600, "wrt" : 3600, "hostingCluster" : cp_get(cp, ce, 'hosting_cluster', ce_name), "hostName" : cp_get(cp, ce, 'host_name', ce_name), "contact_string" : contact_string, "app_dir" : cp_get(cp, 'osg_dirs', 'app', "/OSG_APP_UNKNOWN"), "data_dir" : cp_get(cp, 'osg_dirs', 'data', "/OSG_DATA_UNKNOWN"), "default_se" : getDefaultSE(cp), "max_running" : queue["slots_total"], "max_wall" : max_wall, "max_waiting" : default_max_waiting, "max_slots" : maxSlots, "max_total" : default_max_waiting + queue["slots_total"], "assigned" : queue["slots_used"], "preemption" : cp_get(cp, 'sge', 'preemption', '0'), "acbr" : acbr[:-1], "bdii": cp.get('bdii', 'endpoint'), "gramVersion" : gramVersion, "port" : port, "waiting" : queue['waiting'], "referenceSI00": referenceSI00, 'extraCapabilities' : extraCapabilities, "htpc" : htpcRSL } printTemplate(ce_template, info) return queueInfo
def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE. @param cp: The GIP configuration object @type cp: ConfigParser.ConfigParser """ VOView = getTemplate("GlueCE", "GlueVOViewLocalID") ce_name = cp_get(cp, "ce", "name", "") #status = cp_get(cp, "condor", "status", "Production") #condorVersion = getLrmsInfo(cp) total_nodes, _, unclaimed = parseNodes(cp) vo_map = VoMapper(cp) jobs_info = getJobsInfo(vo_map, cp) groupInfo = getGroupInfo(vo_map, cp) # Add in the default group all_group_vos = [] total_assigned = 0 for key, val in groupInfo.items(): if key == 'default': continue all_group_vos.extend(val['vos']) total_assigned += val.get('quota', 0) all_vos = sets.Set(voList(cp)) defaultVoList = [i for i in all_vos if i not in all_group_vos] if 'default' not in groupInfo: groupInfo['default'] = {} groupInfo['default']['vos'] = defaultVoList if total_nodes > total_assigned: log.info("There are %i assigned job slots out of %i total; assigning" \ " the rest to the default group." % (total_assigned, total_nodes)) groupInfo['default']['quota'] = total_nodes-total_assigned else: log.warning("More assigned nodes (%i) than actual nodes (%i)!" % \ (total_assigned, total_nodes)) if defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group in groupInfo: jinfo = jobs_info.get(group, {}) vos = sets.Set(groupInfo[group].get('vos', [group])) vos.update(jinfo.keys()) vos.intersection_update(all_vos) # Enforce invariants # VO_FREE_SLOTS <= CE_FREE_SLOTS # VO_FREE_SLOTS <= CE_ASSIGNED - VO_RUNNING # This code determines CE_ASSIGNED ginfo = groupInfo[group] if ginfo.get("quota", 0) > 0: assigned = ginfo.get("quota", 0) else: assigned = total_nodes log.debug("All VOs for %s: %s" % (group, ", ".join(vos))) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) max_wall = cp_getInt(cp, "condor", "max_wall", 1440) myrunning = sum([i.get('running', 0) for i in jinfo.values()], 0) assigned = max(assigned, myrunning) for vo in vos: acbr = 'VO:%s' % vo info = jinfo.get(vo.lower(), {"running": 0, "idle": 0, "held": 0}) ert, wrt = responseTimes(cp, info["running"], info["idle"] + \ info["held"], max_job_time=max_wall*60) free = min(unclaimed, assigned-myrunning, assigned-int(info['running'])) free = int(free) waiting = int(info["idle"]) + int(info["held"]) if waiting > cp_getInt(cp, 'condor', 'idle_slack', '10'): free = 0 info = {"vo" : vo, "acbr" : acbr, "ceUniqueID" : ce_unique_id, "voLocalID" : vo, "ce_name" : ce_name, "job_manager" : 'condor', "queue" : vo, "running" : info["running"], # Held jobs are included as "waiting" since the definition is: # Number of jobs that are in a state different than running "waiting" : waiting, "total" : info["running"] + info["idle"] + info["held"], "free_slots" : free, "job_slots" : int(total_nodes), "ert" : ert, "wrt" : wrt, "default_se" : getDefaultSE(cp), 'app' : cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data" : cp_get(cp, "osg_dirs", "data", "/Unknown"), } printTemplate(VOView, info)
"max_running" : max_running, "max_waiting" : 99999, "max_total" : 99999, "max_wall" : cp_getInt(cp, "condor", "max_wall", 1440), "status" : status, 'app_dir' : cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data_dir" : cp_get(cp, "osg_dirs", "data", "/Unknown"), "default_se" : getDefaultSE(cp), "acbr" : ginfo['acbr'], "referenceSI00" : referenceSI00, "clusterUniqueID": getClusterID(cp), "bdii" : cp_get(cp, "bdii", "endpoint", "Unknown"), 'extraCapabilities' : extraCapabilities, "htpc" : htpcRSL } printTemplate(ce_template, info) return total_nodes, claimed, unclaimed def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE. @param cp: The GIP configuration object
"version": version, "status": status, "port": 2811, "onlineTotal": 0, "nearlineTotal": nt, "onlineUsed": used, "nearlineUsed": nu, "architecture": arch, "free": available, "total": total, "bdii": cp_get(cp, "bdii", "endpoint", "Unknown"), "siteUniqueID": siteUniqueID, "arch": arch, } seTemplate = getTemplate("GlueSE", "GlueSEUniqueID") printTemplate(seTemplate, info) vos = voListStorage(cp) try: used, available, total = getClassicSESpace(cp, total=True) except Exception, e: used = 0 available = 0 total = 0 acbr = [] for vo in vos: acbr.append("GlueSAAccessControlBaseRule: VO:%s" % vo) acbr = '\n'.join(acbr) path = cp_get(cp, "osg_dirs", "data", "/UNKNOWN") info = { "saLocalID": seUniqueID,
def print_CE(cp): """ Print out the GlueCE objects for LSF; one GlueCE per grid queue. """ try: lsfVersion = getLrmsInfo(cp) except: lsfVersion = 'Unknown' log.debug('Using LSF version %s' % lsfVersion) queueInfo = getQueueInfo(cp) try: totalCpu, freeCpu, queueCpus = parseNodes(queueInfo, cp) except: #raise totalCpu, freeCpu, queueCpus = 0, 0, {} log.debug('Total, Free CPU: (%s, %s)' % (totalCpu, freeCpu)) ce_name = cp.get(ce, "name") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp.get("lsf", \ "queue_exclude").split(',')] except: excludeQueues = [] vo_queues = getVoQueues(queueInfo, cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue log.debug('Processing queue %s' % queue) if 'running' not in info: info['running'] = 0 if 'status' not in info: # There really should be an unknown status... info['status'] = 'Closed' if 'total' not in info: info['total'] = 0 info["lrmsVersion"] = lsfVersion info["job_manager"] = "lsf" if int(info.get("wait", 0)) > 0: info["free_slots"] = 0 else: if queue in queueCpus and 'max' in queueCpus[ queue] and 'njobs' in queueCpus[queue]: info["free_slots"] = queueCpus[queue]['max'] - queueCpus[ queue]['njobs'] else: info["free_slots"] = freeCpu info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, 'lsf', queue) info['ceUniqueID'] = unique_id if "job_slots" not in info: if queue in queueCpus and 'max' in queueCpus[queue]: log.debug('queue %s, info is %s' % (queue, queueCpus[queue])) info['job_slots'] = queueCpus[queue]['max'] else: info["job_slots"] = totalCpu if "priority" not in info: info["priority"] = 0 if "max_running" not in info: info["max_running"] = info["job_slots"] elif not info['max_running'] or info['max_running'] == '-': info['max_running'] = 999999 if cp.has_option("lsf", "max_wall"): info["max_wall"] = cp_getInt(cp, "lsf", "max_wall", 1440) else: if "max_wall" not in info: info["max_wall"] = 1440 info["max_wall"] = int(info["max_wall"]) # glue proscribes ints info["job_slots"] = min(totalCpu, info["job_slots"]) ert, wrt = responseTimes(cp, info["running"], info["wait"], max_job_time=info["max_wall"]) contact_string = buildContactString(cp, 'lsf', queue, unique_id, log) ceImpl, ceImplVersion = getCEImpl(cp) info['ert'] = ert info['wrt'] = wrt info['hostingCluster'] = cp_get(cp, ce, 'hosting_cluster', ce_name) info['hostName'] = cp_get(cp, ce, 'host_name', ce_name) info['ceImpl'] = ceImpl info['ceImplVersion'] = ceImplVersion info['contact_string'] = contact_string info['app_dir'] = cp.get('osg_dirs', 'app') info['data_dir'] = cp.get('osg_dirs', 'data') info['default_se'] = getDefaultSE(cp) info['max_waiting'] = 999999 #info['max_total'] = info['max_running'] info['max_total'] = info['max_waiting'] + info['max_running'] info['assigned'] = info['job_slots'] info['lrmsType'] = 'lsf' info['preemption'] = str(cp_getInt(cp, 'lsf', 'preemption', '0')) acbr = '' for vo, queue2 in vo_queues: if queue == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo.lower() if not acbr: continue #print info info['acbr'] = acbr[:-1] info['bdii'] = cp.get('bdii', 'endpoint') gramVersion = getGramVersion(cp) port = getPort(cp) info['gramVersion'] = gramVersion info['port'] = port info['waiting'] = info.get('wait', 0) info['referenceSI00'] = gip_cluster.getReferenceSI00(cp) info['clusterUniqueID'] = getClusterID(cp) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'lsf', queue, log) info['max_slots'] = maxSlots info['htpc'] = htpcRSL if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' info['extraCapabilities'] = extraCapabilities printTemplate(CE, info) return queueInfo, totalCpu, freeCpu, queueCpus
def print_site(cp): info = generateGlueSite(cp) siteTemplate = getTemplate("GlueSite", "GlueSiteUniqueID") printTemplate(siteTemplate, info)
def print_CE(cp): slurmVersion = getLrmsInfo(cp) queueInfo = getQueueInfo(cp) ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp_get(cp, "slurm", "queue_exclude", "").split(",")] except: excludeQueues = [] vo_queues = getVoQueues(cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue info["lrmsVersion"] = slurmVersion info["job_manager"] = "slurm" # if no jobs are waiting in the queue, set the number of free slots # to (job_slots - running), or the total number of free slots on the cluster, # whichever is less. info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, "slurm", queue) ceImpl, ceImplVersion = getCEImpl(cp) port = getPort(cp) info["ceUniqueID"] = unique_id if "job_slots" not in info: log.error("no job_slots found for %s!" % queue) if "priority" not in info: info["priority"] = 0 if "max_running" not in info: log.error("no max_running found for %s!" % queue) if "max_wall" not in info: info["max_wall"] = 1440 info["free_slots"] = 0 if info["wait"] == 0: freeSlots = info["job_slots"] - info["running"] if freeSlots > 0: info["free_slots"] = freeSlots ert, wrt = responseTimes(cp, info.get("running", 0), info.get("wait", 0), max_job_time=info["max_wall"]) info["ert"] = ert info["wrt"] = wrt info["hostingCluster"] = cp_get(cp, ce, "hosting_cluster", ce_name) info["hostName"] = cp_get(cp, ce, "host_name", ce_name) info["ceImpl"] = ceImpl info["ceImplVersion"] = ceImplVersion contact_string = buildContactString(cp, "slurm", queue, unique_id, log) info["contact_string"] = contact_string info["app_dir"] = cp_get(cp, "osg_dirs", "app", "/UNKNOWN_APP") info["data_dir"] = cp_get(cp, "osg_dirs", "data", "/UNKNOWN_DATA") info["default_se"] = getDefaultSE(cp) if "max_waiting" not in info: info["max_waiting"] = 999999 if "max_queuable" in info: info["max_total"] = info["max_queuable"] info["free_slots"] = min(info["free_slots"], info["max_queuable"]) else: info["max_total"] = info["max_waiting"] + info["max_running"] info["free_slots"] = min(info["free_slots"], info["max_total"]) # Enforce invariants: # max_total <= max_running # free_slots <= max_running info["max_total"] = min(info["max_total"], info["max_running"]) info["free_slots"] = min(info["free_slots"], info["max_running"]) info["assigned"] = info["job_slots"] # Enforce invariants: # assigned <= max_running info["assigned"] = min(info["assigned"], info["max_running"]) info["lrmsType"] = "slurm" info["preemption"] = cp_get(cp, "slurm", "preemption", "0") acbr = "" has_vo = False for vo, queue2 in vo_queues: if queue == queue2: acbr += "GlueCEAccessControlBaseRule: VO:%s\n" % vo has_vo = True if not has_vo: continue info["acbr"] = acbr[:-1] info["bdii"] = cp.get("bdii", "endpoint") gramVersion = getGramVersion(cp) info["gramVersion"] = gramVersion info["port"] = port info["waiting"] = info["wait"] info["referenceSI00"] = gip_cluster.getReferenceSI00(cp) info["clusterUniqueID"] = getClusterID(cp) extraCapabilities = "" if cp_getBoolean(cp, "site", "glexec_enabled", False): extraCapabilities = extraCapabilities + "\n" + "GlueCECapability: glexec" htpcRSL, maxSlots = getHTPCInfo(cp, "slurm", queue, log) info["max_slots"] = maxSlots if maxSlots > 1: extraCapabilities = extraCapabilities + "\n" + "GlueCECapability: htpc" info["extraCapabilities"] = extraCapabilities info["htpc"] = htpcRSL printTemplate(CE, info) return queueInfo
"max_running" : max_running, "max_waiting" : 99999, "max_total" : 99999, "max_wall" : cp_getInt(cp, "condor", "max_wall", 1440), "status" : status, 'app_dir' : cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data_dir" : cp_get(cp, "osg_dirs", "data", "/Unknown"), "default_se" : getDefaultSE(cp), "acbr" : ginfo['acbr'], "referenceSI00" : referenceSI00, "clusterUniqueID": getClusterID(cp), "bdii" : cp_get(cp, "bdii", "endpoint", "Unknown"), 'extraCapabilities' : extraCapabilities, "htpc" : htpcRSL } printTemplate(ce_template, info) return total_nodes, claimed, unclaimed def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE.
def main(): log.info('Starting CREAM service provider') try: cp = config() serviceID = buildServiceID(cp) siteID = cp_get(cp, "site", "unique_name", 'UNKNOWN_SITE') serviceName = '%s-CREAM' % siteID creamVersion = getCreamVersion(cp) endpoint = 'https://%s:8443/ce-cream/services' % cp_get( cp, "ce", "name", 'UNKNOWN_CE') allVOs = voList(cp) acbr = '' owner = '' log.debug('CREAM VOs are %s' % allVOs) if not allVOs: log.error("No VOs supported!") acbr = '__GIP_DELETEME' else: acbr = '\n'.join(['GlueServiceAccessControlBaseRule: %s\n' \ 'GlueServiceAccessControlBaseRule: VO:%s' % (vo, vo) for vo in allVOs]) owner = '\n' + '\n'.join( ['GlueServiceOwner: %s' % vo for vo in allVOs]) # owner needs an extra prepended newline pid = -1 startTime = 'Not Applicable' serviceStatus = 'Not OK' serviceStatusInfo = 'Could not find tomcat process' try: (startTime, pid) = getStartTimeAndPid(cp) serviceStatus = 'OK' serviceStatusInfo = 'Tomcat (%d) is running' % pid except: log.error( 'Could not locate tomcat process (pgrep -f "org.apache.catalina.startup.Bootstrap start"' ' probably failed to return any output!)') info = { 'serviceID': serviceID, 'serviceType': 'org.glite.ce.CREAM', 'serviceName': serviceName, 'version': creamVersion, 'endpoint': endpoint, 'semantics': 'https://edms.cern.ch/document/595770', 'owner': owner, 'url': '__GIP_DELETEME', # deprecated 'uri': '__GIP_DELETEME', # deprecated 'status': serviceStatus, 'statusInfo': serviceStatusInfo, 'wsdl': 'http://grid.pd.infn.it/cream/wsdl/org.glite.ce-cream_service.wsdl', 'startTime': startTime, 'siteID': siteID, 'acbr': acbr } template = getTemplate("GlueService", "GlueServiceUniqueID") printTemplate(template, info) except Exception, e: sys.stdout = sys.stderr log.error(e) raise
def print_VOViewLocal(cp): """ Print the GLUE VOView entity; shows the VO's view of the condor batch system. Config options used: * ce.name. The human-readable name of the ce. * condor.status. The status of condor; defaults to "Production" * osg_dirs.app. The $OSG_APP directory; defaults to "/Unknown" * osg_dirs.data. The $OSG_DATA directory; defaults to "/Unknown" * se.name. The human-readable name of the closest SE. @param cp: The GIP configuration object @type cp: ConfigParser.ConfigParser """ VOView = getTemplate("GlueCE", "GlueVOViewLocalID") ce_name = cp_get(cp, "ce", "name", "") #status = cp_get(cp, "condor", "status", "Production") #condorVersion = getLrmsInfo(cp) total_nodes, _, unclaimed = parseNodes(cp) vo_map = VoMapper(cp) jobs_info = getJobsInfo(vo_map, cp) groupInfo = getGroupInfo(vo_map, cp) # Add in the default group all_group_vos = [] total_assigned = 0 for key, val in groupInfo.items(): if key == 'default': continue all_group_vos.extend(val['vos']) total_assigned += val.get('quota', 0) all_vos = sets.Set(voList(cp)) defaultVoList = [i for i in all_vos if i not in all_group_vos] if 'default' not in groupInfo: groupInfo['default'] = {} groupInfo['default']['vos'] = defaultVoList if total_nodes > total_assigned: log.info("There are %i assigned job slots out of %i total; assigning" \ " the rest to the default group." % (total_assigned, total_nodes)) groupInfo['default']['quota'] = total_nodes - total_assigned else: log.warning("More assigned nodes (%i) than actual nodes (%i)!" % \ (total_assigned, total_nodes)) if defaultGroupIsExcluded(cp): if groupInfo.has_key('default'): del groupInfo['default'] for group in groupInfo: jinfo = jobs_info.get(group, {}) vos = sets.Set(groupInfo[group].get('vos', [group])) vos.update(jinfo.keys()) vos.intersection_update(all_vos) # Enforce invariants # VO_FREE_SLOTS <= CE_FREE_SLOTS # VO_FREE_SLOTS <= CE_ASSIGNED - VO_RUNNING # This code determines CE_ASSIGNED ginfo = groupInfo[group] if ginfo.get("quota", 0) > 0: assigned = ginfo.get("quota", 0) else: assigned = total_nodes log.debug("All VOs for %s: %s" % (group, ", ".join(vos))) ce_unique_id = buildCEUniqueID(cp, ce_name, 'condor', group) max_wall = cp_getInt(cp, "condor", "max_wall", 1440) myrunning = sum([i.get('running', 0) for i in jinfo.values()], 0) assigned = max(assigned, myrunning) for vo in vos: acbr = 'VO:%s' % vo info = jinfo.get(vo.lower(), {"running": 0, "idle": 0, "held": 0}) ert, wrt = responseTimes(cp, info["running"], info["idle"] + \ info["held"], max_job_time=max_wall*60) free = min(unclaimed, assigned - myrunning, assigned - int(info['running'])) free = int(free) waiting = int(info["idle"]) + int(info["held"]) if waiting > cp_getInt(cp, 'condor', 'idle_slack', '10'): free = 0 info = { "vo": vo, "acbr": acbr, "ceUniqueID": ce_unique_id, "voLocalID": vo, "ce_name": ce_name, "job_manager": 'condor', "queue": vo, "running": info["running"], # Held jobs are included as "waiting" since the definition is: # Number of jobs that are in a state different than running "waiting": waiting, "total": info["running"] + info["idle"] + info["held"], "free_slots": free, "job_slots": int(total_nodes), "ert": ert, "wrt": wrt, "default_se": getDefaultSE(cp), 'app': cp_get(cp, 'osg_dirs', 'app', '/Unknown'), "data": cp_get(cp, "osg_dirs", "data", "/Unknown"), } printTemplate(VOView, info)
def print_CE(cp): slurmVersion = getLrmsInfo(cp) queueInfo = getQueueInfo(cp) ce_name = cp_get(cp, ce, "name", "UNKNOWN_CE") CE = getTemplate("GlueCE", "GlueCEUniqueID") try: excludeQueues = [i.strip() for i in cp_get(cp, "slurm", \ "queue_exclude", "").split(',')] except: excludeQueues = [] vo_queues = getVoQueues(cp) for queue, info in queueInfo.items(): if queue in excludeQueues: continue info["lrmsVersion"] = slurmVersion info["job_manager"] = "slurm" # if no jobs are waiting in the queue, set the number of free slots # to (job_slots - running), or the total number of free slots on the cluster, # whichever is less. info["queue"] = queue info["ceName"] = ce_name unique_id = buildCEUniqueID(cp, ce_name, 'slurm', queue) ceImpl, ceImplVersion = getCEImpl(cp) port = getPort(cp) info['ceUniqueID'] = unique_id if "job_slots" not in info: log.error("no job_slots found for %s!" % queue) if "priority" not in info: info["priority"] = 0 if "max_running" not in info: log.error("no max_running found for %s!" % queue) if "max_wall" not in info: info["max_wall"] = 1440 info["free_slots"] = 0 if info["wait"] == 0: freeSlots = info["job_slots"] - info["running"] if freeSlots > 0: info["free_slots"] = freeSlots ert, wrt = responseTimes(cp, info.get("running", 0), info.get("wait", 0), max_job_time=info["max_wall"]) info['ert'] = ert info['wrt'] = wrt info['hostingCluster'] = cp_get(cp, ce, 'hosting_cluster', ce_name) info['hostName'] = cp_get(cp, ce, 'host_name', ce_name) info['ceImpl'] = ceImpl info['ceImplVersion'] = ceImplVersion contact_string = buildContactString(cp, 'slurm', queue, unique_id, log) info['contact_string'] = contact_string info['app_dir'] = cp_get(cp, 'osg_dirs', 'app', "/UNKNOWN_APP") info['data_dir'] = cp_get(cp, 'osg_dirs', 'data', "/UNKNOWN_DATA") info['default_se'] = getDefaultSE(cp) if 'max_waiting' not in info: info['max_waiting'] = 999999 if 'max_queuable' in info: info['max_total'] = info['max_queuable'] info['free_slots'] = min(info['free_slots'], info['max_queuable']) else: info['max_total'] = info['max_waiting'] + info['max_running'] info['free_slots'] = min(info['free_slots'], info['max_total']) # Enforce invariants: # max_total <= max_running # free_slots <= max_running info['max_total'] = min(info['max_total'], info['max_running']) info['free_slots'] = min(info['free_slots'], info['max_running']) info['assigned'] = info['job_slots'] # Enforce invariants: # assigned <= max_running info['assigned'] = min(info['assigned'], info['max_running']) info['lrmsType'] = 'slurm' info['preemption'] = cp_get(cp, 'slurm', 'preemption', '0') acbr = '' has_vo = False for vo, queue2 in vo_queues: if queue == queue2: acbr += 'GlueCEAccessControlBaseRule: VO:%s\n' % vo has_vo = True if not has_vo: continue info['acbr'] = acbr[:-1] info['bdii'] = cp.get('bdii', 'endpoint') gramVersion = getGramVersion(cp) info['gramVersion'] = gramVersion info['port'] = port info['waiting'] = info['wait'] info['referenceSI00'] = gip_cluster.getReferenceSI00(cp) info['clusterUniqueID'] = getClusterID(cp) extraCapabilities = '' if cp_getBoolean(cp, 'site', 'glexec_enabled', False): extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: glexec' htpcRSL, maxSlots = getHTPCInfo(cp, 'slurm', queue, log) info['max_slots'] = maxSlots if maxSlots > 1: extraCapabilities = extraCapabilities + '\n' + 'GlueCECapability: htpc' info['extraCapabilities'] = extraCapabilities info['htpc'] = htpcRSL printTemplate(CE, info) return queueInfo