def _unbanStorageElement( self, storageElement ): endpoints = getFTS3Servers()[ 'Value' ] blacklist = {} for endpoint in endpoints: #endpoint = 'https://fts3-pilot.cern.ch:8446' #TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath.get('OK'): return S_ERROR("Proxy not found!") try: proxyPath = proxyPath.get('Value').get('path') except Exception as e: return S_ERROR(e.message) context = fts3.Context(endpoint, proxyPath) fts3.unban_se(context, storageElement) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK( blacklist )
def _unbanStorageElement(self, storageElement): endpoints = getFTS3Servers() if not endpoints['OK']: return endpoints endpoints = endpoints['Value'] blacklist = {} for endpoint in endpoints: # endpoint = 'https://fts3-pilot.cern.ch:8446' # TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath['OK']: return proxyPath try: proxyPath = proxyPath['Value']['path'] except Exception as e: return S_ERROR(repr(e).replace(',)', ')')) context = fts3.Context(endpoint, proxyPath) fts3.unban_se(context, storageElement) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK(blacklist)
def _banStorageElement( self, storageElement ): endpoints = getFTS3Servers() if not endpoints['OK']: return endpoints endpoints = endpoints['Value'] blacklist = {} for endpoint in endpoints: # endpoint = 'https://fts3-pilot.cern.ch:8446' # TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath['OK']: return proxyPath try: proxyPath = proxyPath['Value']['path'] except Exception as e: return S_ERROR( repr( e ).replace( ',)', ')' ) ) context = fts3.Context( endpoint, proxyPath ) status = 'wait' # This status leaves the jobs queued. The only alternative is "cancel" pausedJobIDs = fts3.ban_se( context, storageElement, status, timeout = 3600, allow_submit = False ) self.log.info( "fts3.ban_se: paused jobs: %s" % ','.join(pausedJobIDs) ) blacklist[endpoint] = json.loads( context.get( "ban/se" ) ) return S_OK( blacklist )
def _banStorageElement(self, storageElement): endpoints = getFTS3Servers()['Value'] blacklist = {} for endpoint in endpoints: #endpoint = 'https://fts3-pilot.cern.ch:8446' #TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath.get('OK'): return S_ERROR("Proxy not found!") try: proxyPath = proxyPath.get('Value').get('path') except Exception as e: return S_ERROR(e.message) context = fts3.Context(endpoint, proxyPath) timeout = 3600 #or...? status = 'wait' #or...? allow_submit = False #or...? #TODO: ban_se returns the list of jobIDs interrupted by the banning pausedJobIDs = fts3.ban_se(context, storageElement, status, timeout, allow_submit) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK(blacklist) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def _unbanStorageElement(self, storageElement): endpoints = getFTS3Servers() if not endpoints["OK"]: return endpoints endpoints = endpoints["Value"] blacklist = {} for endpoint in endpoints: # endpoint = 'https://fts3-pilot.cern.ch:8446' # TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath["OK"]: return proxyPath try: proxyPath = proxyPath["Value"]["path"] except Exception as e: return S_ERROR(repr(e).replace(",)", ")")) context = fts3.Context(endpoint, proxyPath) fts3.unban_se(context, storageElement) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK(blacklist)
def _unbanStorageElement(self, storageElement): endpoints = getFTS3Servers()['Value'] blacklist = {} for endpoint in endpoints: #endpoint = 'https://fts3-pilot.cern.ch:8446' #TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath.get('OK'): return S_ERROR("Proxy not found!") try: proxyPath = proxyPath.get('Value').get('path') except Exception as e: return S_ERROR(e.message) context = fts3.Context(endpoint, proxyPath) fts3.unban_se(context, storageElement) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK(blacklist)
def _banStorageElement( self, storageElement ): endpoints = getFTS3Servers()[ 'Value' ] blacklist = {} for endpoint in endpoints: # endpoint = 'https://fts3-pilot.cern.ch:8446' # TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath['OK']: return proxyPath try: proxyPath = proxyPath['Value']['path'] except Exception as e: return S_ERROR( repr( e ).replace( ',)', ')' ) ) context = fts3.Context( endpoint, proxyPath ) timeout = 3600 # or...? status = 'wait' # or...? allow_submit = False # or...? # TODO: ban_se returns the list of jobIDs interrupted by the banning pausedJobIDs = fts3.ban_se( context, storageElement, status, timeout, allow_submit ) self.log.info( "fts3.ban_se: %s" % pausedJobIDs ) blacklist[endpoint] = json.loads( context.get( "ban/se" ) ) return S_OK( blacklist )
def _banStorageElement(self, storageElement): endpoints = getFTS3Servers()['Value'] blacklist = {} for endpoint in endpoints: # endpoint = 'https://fts3-pilot.cern.ch:8446' # TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath['OK']: return proxyPath try: proxyPath = proxyPath['Value']['path'] except Exception as e: return S_ERROR(repr(e).replace(',)', ')')) context = fts3.Context(endpoint, proxyPath) timeout = 3600 # or...? status = 'wait' # or...? allow_submit = False # or...? # TODO: ban_se returns the list of jobIDs interrupted by the banning pausedJobIDs = fts3.ban_se(context, storageElement, status, timeout, allow_submit) self.log.info("fts3.ban_se: %s" % pausedJobIDs) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK(blacklist)
def _banStorageElement( self, storageElement ): endpoints = getFTS3Servers()[ 'Value' ] blacklist = {} for endpoint in endpoints: #endpoint = 'https://fts3-pilot.cern.ch:8446' #TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath.get('OK'): return S_ERROR("Proxy not found!") try: proxyPath = proxyPath.get('Value').get('path') except Exception as e: return S_ERROR(e.message) context = fts3.Context(endpoint, proxyPath) timeout = 3600 #or...? status = 'wait' #or...? allow_submit = False #or...? #TODO: ban_se returns the list of jobIDs interrupted by the banning pausedJobIDs = fts3.ban_se(context, storageElement, status, timeout, allow_submit) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK( blacklist ) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def __init__( self, csPath = None, ftsHistoryViews = None ): """ Call the init of the parent, and initialize the list of FTS3 servers """ self.log = gLogger.getSubLogger( "FTS3Placement" ) super( FTS3Placement, self ).__init__( csPath = csPath, ftsHistoryViews = ftsHistoryViews ) srvList = getFTS3Servers() if not srvList['OK']: self.log.error( srvList['Message'] ) self.__serverList = srvList.get( 'Value', [] ) self.maxAttempts = len( self.__serverList )
def __init__(self, csPath=None, ftsHistoryViews=None): """ Call the init of the parent, and initialize the list of FTS3 servers """ self.log = gLogger.getSubLogger("FTS3Placement") super(FTS3Placement, self).__init__(csPath=csPath, ftsHistoryViews=ftsHistoryViews) srvList = getFTS3Servers() if not srvList['OK']: self.log.error(srvList['Message']) self.__serverList = srvList.get('Value', []) self.maxAttempts = len(self.__serverList)
def doMaster( self ): ''' Master method, which looks little bit spaghetti code, sorry ! - It gets all sites and transforms them into gocSites. - It gets all the storage elements and transforms them into their hosts - It gets the the CEs (FTS and file catalogs will come). ''' gocSites = CSHelpers.getGOCSites() if not gocSites[ 'OK' ]: return gocSites gocSites = gocSites[ 'Value' ] sesHosts = CSHelpers.getStorageElementsHosts() if not sesHosts[ 'OK' ]: return sesHosts sesHosts = sesHosts[ 'Value' ] resources = sesHosts ftsServer = getFTS3Servers() if ftsServer[ 'OK' ]: resources.extend( ftsServer[ 'Value' ] ) #TODO: file catalogs need also to use their hosts #fc = CSHelpers.getFileCatalogs() #if fc[ 'OK' ]: # resources = resources + fc[ 'Value' ] ce = CSHelpers.getComputingElements() if ce[ 'OK' ]: resources.extend( ce[ 'Value' ] ) gLogger.verbose( 'Processing Sites: %s' % ', '.join( gocSites ) ) siteRes = self.doNew( ( 'Site', gocSites ) ) if not siteRes[ 'OK' ]: self.metrics[ 'failed' ].append( siteRes[ 'Message' ] ) gLogger.verbose( 'Processing Resources: %s' % ', '.join( resources ) ) resourceRes = self.doNew( ( 'Resource', resources ) ) if not resourceRes[ 'OK' ]: self.metrics[ 'failed' ].append( resourceRes[ 'Message' ] ) return S_OK( self.metrics ) ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def doMaster(self): """ Master method, which looks little bit spaghetti code, sorry ! - It gets all sites and transforms them into gocSites. - It gets all the storage elements and transforms them into their hosts - It gets the the CEs (FTS and file catalogs will come). """ gocSites = getGOCSites() if not gocSites['OK']: return gocSites gocSites = gocSites['Value'] sesHosts = getStorageElementsHosts() if not sesHosts['OK']: return sesHosts sesHosts = sesHosts['Value'] resources = sesHosts if sesHosts else [] ftsServer = getFTS3Servers(hostOnly=True) if ftsServer['OK'] and ftsServer['Value']: resources.extend(ftsServer['Value']) # TODO: file catalogs need also to use their hosts # fc = CSHelpers.getFileCatalogs() # if fc[ 'OK' ]: # resources = resources + fc[ 'Value' ] ce = getComputingElements() if ce['OK'] and ce['Value']: resources.extend(ce['Value']) self.log.verbose('Processing Sites', ', '.join(gocSites if gocSites else ['NONE'])) siteRes = self.doNew(('Site', gocSites)) if not siteRes['OK']: self.metrics['failed'].append(siteRes['Message']) self.log.verbose('Processing Resources', ', '.join(resources if resources else ['NONE'])) resourceRes = self.doNew(('Resource', resources)) if not resourceRes['OK']: self.metrics['failed'].append(resourceRes['Message']) return S_OK(self.metrics)
def doMaster(self): """Master method, which looks little bit spaghetti code, sorry ! - It gets all sites and transforms them into gocSites. - It gets all the storage elements and transforms them into their hosts - It gets the the CEs (FTS and file catalogs will come). """ gocSites = getGOCSites() if not gocSites["OK"]: return gocSites gocSites = gocSites["Value"] sesHosts = getStorageElementsHosts() if not sesHosts["OK"]: return sesHosts sesHosts = sesHosts["Value"] resources = sesHosts if sesHosts else [] ftsServer = getFTS3Servers(hostOnly=True) if ftsServer["OK"] and ftsServer["Value"]: resources.extend(ftsServer["Value"]) # TODO: file catalogs need also to use their hosts # fc = CSHelpers.getFileCatalogs() # if fc[ 'OK' ]: # resources = resources + fc[ 'Value' ] res = getCESiteMapping() if res["OK"] and res["Value"]: resources.extend(list(res["Value"])) self.log.verbose("Processing Sites", ", ".join(gocSites if gocSites else ["NONE"])) siteRes = self.doNew(("Site", gocSites)) if not siteRes["OK"]: self.metrics["failed"].append(siteRes["Message"]) self.log.verbose("Processing Resources", ", ".join(resources if resources else ["NONE"])) resourceRes = self.doNew(("Resource", resources)) if not resourceRes["OK"]: self.metrics["failed"].append(resourceRes["Message"]) return S_OK(self.metrics)
def doMaster(self): """ Master method, which looks little bit spaghetti code, sorry ! - It gets all sites and transforms them into gocSites. - It gets all the storage elements and transforms them into their hosts - It gets the the CEs (FTS and file catalogs will come). """ gocSites = CSHelpers.getGOCSites() if not gocSites["OK"]: return gocSites gocSites = gocSites["Value"] sesHosts = CSHelpers.getStorageElementsHosts() if not sesHosts["OK"]: return sesHosts sesHosts = sesHosts["Value"] resources = sesHosts ftsServer = getFTS3Servers() if ftsServer["OK"]: resources.extend(ftsServer["Value"]) # TODO: file catalogs need also to use their hosts # fc = CSHelpers.getFileCatalogs() # if fc[ 'OK' ]: # resources = resources + fc[ 'Value' ] ce = CSHelpers.getComputingElements() if ce["OK"]: resources.extend(ce["Value"]) self.log.verbose("Processing Sites: %s" % ", ".join(gocSites)) siteRes = self.doNew(("Site", gocSites)) if not siteRes["OK"]: self.metrics["failed"].append(siteRes["Message"]) self.log.verbose("Processing Resources: %s" % ", ".join(resources)) resourceRes = self.doNew(("Resource", resources)) if not resourceRes["OK"]: self.metrics["failed"].append(resourceRes["Message"]) return S_OK(self.metrics)
def __resolveFTSServer( self ): """ resolve FTS server to use, it should be the closest one from target SE :param self: self reference """ if self.ftsVersion.upper() == 'FTS2': from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS2ServersForSites if not self.targetSE: return S_ERROR( "Target SE not set" ) res = getSitesForSE( self.targetSE ) if not res['OK'] or not res['Value']: return S_ERROR( "Could not determine target site" ) targetSites = res['Value'] targetSite = '' for targetSite in targetSites: targetFTS = getFTS2ServersForSites( [targetSite] ) if targetFTS['OK']: ftsTarget = targetFTS['Value'][targetSite] if ftsTarget: self.ftsServer = ftsTarget return S_OK( self.ftsServer ) else: return targetFTS elif self.ftsVersion.upper() == 'FTS3': from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS3Servers res = getFTS3Servers() if not res['OK']: return res ftsServerList = res['Value'] if ftsServerList: # Here we take the first one, regardless of the policy... # Unclean but all this will disapear after refactoring the fts code self.ftsServer = ftsServerList[0] return S_OK( self.ftsServer ) else: return S_ERROR( 'Unknown FTS version %s' % self.ftsVersion ) return S_ERROR( 'No FTS server found for %s' % targetSite )
def __resolveFTSServer( self ): """ resolve FTS server to use, it should be the closest one from target SE :param self: self reference """ if self.ftsVersion.upper() == 'FTS2': from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS2ServersForSites if not self.targetSE: return S_ERROR( "Target SE not set" ) res = getSitesForSE( self.targetSE ) if not res['OK'] or not res['Value']: return S_ERROR( "Could not determine target site" ) targetSites = res['Value'] targetSite = '' for targetSite in targetSites: targetFTS = getFTS2ServersForSites( [targetSite] ) if targetFTS['OK']: ftsTarget = targetFTS['Value'][targetSite] if ftsTarget: self.ftsServer = ftsTarget return S_OK( self.ftsServer ) else: return targetFTS elif self.ftsVersion.upper() == 'FTS3': from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS3Servers res = getFTS3Servers() if not res['OK']: return res ftsServerList = res['Value'] if ftsServerList: # Here we take the first one, regardless of the policy... # Unclean but all this will disapear after refactoring the fts code self.ftsServer = ftsServerList[0] return S_OK( self.ftsServer ) else: return S_ERROR( 'Unknown FTS version %s' % self.ftsVersion ) return S_ERROR( 'No FTS server found for %s' % targetSite )
def __removeNonExistingResourcesFromRM(self): """ Remove resources from DowntimeCache table that no longer exist in the CS. """ if not getServiceURL("ResourceStatus/ResourceManagement"): gLogger.verbose( "ResourceManagement is not installed, skipping removal of non existing resources..." ) return S_OK() sesHosts = getStorageElementsHosts() if not sesHosts["OK"]: return sesHosts sesHosts = sesHosts["Value"] resources = sesHosts ftsServer = getFTS3Servers(hostOnly=True) if ftsServer["OK"]: resources.extend(ftsServer["Value"]) res = getCESiteMapping() if res["OK"]: resources.extend(list(res["Value"])) downtimes = self.rManagement.selectDowntimeCache() if not downtimes["OK"]: return downtimes # Remove hosts that no longer exist in the CS for host in downtimes["Value"]: gLogger.verbose("Checking if %s is still in the CS" % host[0]) if host[0] not in resources: gLogger.verbose("%s is no longer in CS, removing entry..." % host[0]) result = self.rManagement.deleteDowntimeCache(name=host[0]) if not result["OK"]: return result return S_OK()
def __removeNonExistingResourcesFromRM(self): ''' Remove resources from DowntimeCache table that no longer exist in the CS. ''' if not getServiceURL("ResourceStatus/ResourceManagement"): gLogger.verbose( 'ResourceManagement is not installed, skipping removal of non existing resources...' ) return S_OK() sesHosts = getStorageElementsHosts() if not sesHosts['OK']: return sesHosts sesHosts = sesHosts['Value'] resources = sesHosts ftsServer = getFTS3Servers(hostOnly=True) if ftsServer['OK']: resources.extend(ftsServer['Value']) ce = CSHelpers.getComputingElements() if ce['OK']: resources.extend(ce['Value']) downtimes = self.rManagement.selectDowntimeCache() if not downtimes['OK']: return downtimes # Remove hosts that no longer exist in the CS for host in downtimes['Value']: gLogger.verbose('Checking if %s is still in the CS' % host[0]) if host[0] not in resources: gLogger.verbose('%s is no longer in CS, removing entry...' % host[0]) result = self.rManagement.deleteDowntimeCache(name=host[0]) if not result['OK']: return result return S_OK()
def __removeNonExistingResourcesFromRM(self): ''' Remove resources from DowntimeCache table that no longer exist in the CS. ''' if not getServiceURL("ResourceStatus/ResourceManagement"): gLogger.verbose( 'ResourceManagement is not installed, skipping removal of non existing resources...') return S_OK() sesHosts = CSHelpers.getStorageElementsHosts() if not sesHosts['OK']: return sesHosts sesHosts = sesHosts['Value'] resources = sesHosts ftsServer = getFTS3Servers() if ftsServer['OK']: resources.extend(ftsServer['Value']) ce = CSHelpers.getComputingElements() if ce['OK']: resources.extend(ce['Value']) downtimes = self.rManagement.selectDowntimeCache() if not downtimes['OK']: return downtimes # Remove hosts that no longer exist in the CS for host in downtimes['Value']: gLogger.verbose('Checking if %s is still in the CS' % host[0]) if host[0] not in resources: gLogger.verbose( '%s is no longer in CS, removing entry...' % host[0]) result = self.rManagement.deleteDowntimeCache(name=host[0]) if not result['OK']: return result return S_OK()
def _banStorageElement(self, storageElement): endpoints = getFTS3Servers() if not endpoints['OK']: return endpoints endpoints = endpoints['Value'] blacklist = {} for endpoint in endpoints: # endpoint = 'https://fts3-pilot.cern.ch:8446' # TODO: maybe proxyPath is not needed since it is picked from the environment by the REST API proxyPath = getProxyInfo() if not proxyPath['OK']: return proxyPath try: proxyPath = proxyPath['Value']['path'] except Exception as e: return S_ERROR(repr(e).replace(',)', ')')) context = fts3.Context(endpoint, proxyPath) status = 'wait' # This status leaves the jobs queued. The only alternative is "cancel" pausedJobIDs = fts3.ban_se(context, storageElement, status, timeout=3600, allow_submit=False) self.log.info("fts3.ban_se: paused jobs: %s" % ','.join(pausedJobIDs)) blacklist[endpoint] = json.loads(context.get("ban/se")) return S_OK(blacklist)