def initialize(self): """Run at the agent initialization (normally every 500 cycles)""" # client to connect to GOCDB self.GOCDBClient = GOCDBClient() self.dryRun = self.am_getOption("DryRun", self.dryRun) # API needed to update configuration stored by CS self.csAPI = CSAPI() return self.csAPI.initialize()
def __init__(self, rsDBin=None): self.rsDB = rsDBin if self.rsDB == None: from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB self.rsDB = ResourceStatusDB() self.GOCDBClient = GOCDBClient()
def doCommand(self, resources=None): """ Returns downtimes information for all the resources in input. :params: :attr:`sites`: list of resource names (when not given, take every resource) :returns: {'ResourceName': {'SEVERITY': 'OUTAGE'|'AT_RISK', 'StartDate': 'aDate', ...} ... } """ if self.client is None: from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() if resources is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC = RPCClient("ResourceStatus/ResourceStatus") resources = RPC.getResourcesList() if not resources['OK']: raise RSSException, where( self, self.doCommand) + " " + resources['Message'] else: resources = resources['Value'] try: res = self.client.getStatus('Resource', resources, None, 120) except: gLogger.exception("Exception when calling GOCDBClient.") return {} if not res['OK']: raise RSSException, where(self, self.doCommand) + " " + res['Message'] else: res = res['Value'] if res == None: return {} resToReturn = {} for dt_ID in res: dt = {} dt['ID'] = dt_ID dt['StartDate'] = res[dt_ID]['FORMATED_START_DATE'] dt['EndDate'] = res[dt_ID]['FORMATED_END_DATE'] dt['Severity'] = res[dt_ID]['SEVERITY'] dt['Description'] = res[dt_ID]['DESCRIPTION'].replace('\'', '') dt['Link'] = res[dt_ID]['GOCDB_PORTAL_URL'] resToReturn[dt_ID] = dt return resToReturn
def __init__(self, rsClient=None, rmClient=None): self.GOCDBClient = GOCDBClient() self.rsClient = ResourceStatusClient( ) if rsClient == None else rsClient self.rmClient = ResourceManagementClient( ) if rmClient == None else rmClient self.synclist = [ 'Sites', 'Resources', 'StorageElements', 'Services', 'RegistryUsers' ]
def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if 'GOCDBClient' in self.apis: self.gClient = self.apis['GOCDBClient'] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient()
def __init__(self, args=None, clients=None): super(GOCDBSyncCommand, self).__init__(args, clients) if 'GOCDBClient' in self.apis: self.gClient = self.apis['GOCDBClient'] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient() self.seenHostnames = set()
def __init__( self, rsClient = None, rmClient = None ): self.GOCDBClient = GOCDBClient() self.rsClient = ResourceStatusClient() if rsClient == None else rsClient self.rmClient = ResourceManagementClient() if rmClient == None else rmClient self.synclist = [ 'Sites', 'Resources', 'StorageElements', 'Services', 'RegistryUsers' ]
def setUp(self): self.mockRSS = mock.MagicMock() self.GOCCli = GOCDBClient() # self.SAMCli = SAMResultsClient() self.GGUSCli = GGUSTicketsClient()
def initialize(self): """ Define the commands to be executed, and instantiate the clients that will be used. """ res = ObjectLoader().loadObject('DIRAC.ResourceStatusSystem.Client.ResourceStatusClient', 'ResourceStatusClient') if not res['OK']: self.log.error('Failed to load ResourceStatusClient class: %s' % res['Message']) return res rsClass = res['Value'] res = ObjectLoader().loadObject('DIRAC.ResourceStatusSystem.Client.ResourceManagementClient', 'ResourceManagementClient') if not res['OK']: self.log.error('Failed to load ResourceManagementClient class: %s' % res['Message']) return res rmClass = res['Value'] self.commands['Downtime'] = [{'Downtime': {}}] self.commands['GOCDBSync'] = [{'GOCDBSync': {}}] self.commands['FreeDiskSpace'] = [{'FreeDiskSpace': {}}] # PilotsCommand # self.commands[ 'Pilots' ] = [ # { 'PilotsWMS' : { 'element' : 'Site', 'siteName' : None } }, # { 'PilotsWMS' : { 'element' : 'Resource', 'siteName' : None } } # ] # FIXME: do not forget about hourly vs Always ...etc # AccountingCacheCommand # self.commands[ 'AccountingCache' ] = [ # {'SuccessfullJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # {'FailedJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # {'SuccessfullPilotsBySiteSplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'FailedPilotsBySiteSplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'SuccessfullPilotsByCESplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'FailedPilotsByCESplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'RunningJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :168, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :720, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :8760, 'plotType' :'Job' }}, # ] # VOBOXAvailability # self.commands[ 'VOBOXAvailability' ] = [ # { 'VOBOXAvailability' : {} } # # Reuse clients for the commands self.clients['GOCDBClient'] = GOCDBClient() self.clients['ReportsClient'] = ReportsClient() self.clients['ResourceStatusClient'] = rsClass() self.clients['ResourceManagementClient'] = rmClass() self.clients['WMSAdministrator'] = WMSAdministratorClient() self.clients['Pilots'] = PilotManagerClient() self.cCaller = CommandCaller return S_OK()
def initialize( self ): # client to connect to GOCDB self.GOCDBClient = GOCDBClient() # API needed to update configuration stored by CS self.csAPI = CSAPI() return self.csAPI.initialize()
def __init__( self, rsDBin = None ): self.rsDB = rsDBin if self.rsDB == None: from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB self.rsDB = ResourceStatusDB() self.GOCDBClient = GOCDBClient()
class DTEveryResources_Command( Command ): def doCommand( self, resources = None ): """ Returns downtimes information for all the resources in input. :params: :attr:`sites`: list of resource names (when not given, take every resource) :returns: {'ResourceName': {'SEVERITY': 'OUTAGE'|'AT_RISK', 'StartDate': 'aDate', ...} ... } """ if self.client is None: from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() if resources is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC = RPCClient( "ResourceStatus/ResourceStatus" ) resources = RPC.getResourcesList() if not resources['OK']: raise RSSException, where( self, self.doCommand ) + " " + resources['Message'] else: resources = resources['Value'] try: res = self.client.getStatus( 'Resource', resources, None, 120 ) except: gLogger.exception( "Exception when calling GOCDBClient." ) return {} if not res['OK']: raise RSSException, where( self, self.doCommand ) + " " + res['Message'] else: res = res['Value'] if res == None: return {} resToReturn = {} for dt_ID in res: dt = {} dt['ID'] = dt_ID dt['StartDate'] = res[dt_ID]['FORMATED_START_DATE'] dt['EndDate'] = res[dt_ID]['FORMATED_END_DATE'] dt['Severity'] = res[dt_ID]['SEVERITY'] dt['Description'] = res[dt_ID]['DESCRIPTION'].replace( '\'', '' ) dt['Link'] = res[dt_ID]['GOCDB_PORTAL_URL'] resToReturn[dt_ID] = dt return resToReturn doCommand.__doc__ = Command.doCommand.__doc__ + doCommand.__doc__
def initialize(self): """ Run at the agent initialization (normally every 500 cycles) """ # client to connect to GOCDB self.GOCDBClient = GOCDBClient() self.dryRun = self.am_getOption('DryRun', self.dryRun) # API needed to update configuration stored by CS self.csAPI = CSAPI() return self.csAPI.initialize()
def setUp(self): from DIRAC.Core.Base.Script import parseCommandLine parseCommandLine() self.mockRSS = Mock() self.GOCCli = GOCDBClient() self.SLSCli = SLSClient() self.SAMCli = SAMResultsClient() self.GGUSCli = GGUSTicketsClient()
def doCommand(self, resources=None): """ Returns downtimes information for all the resources in input. :params: :attr:`sites`: list of resource names (when not given, take every resource) :returns: {'ResourceName': {'SEVERITY': 'OUTAGE'|'AT_RISK', 'StartDate': 'aDate', ...} ... } """ if self.client is None: from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() if resources is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC = RPCClient("ResourceStatus/ResourceStatus") resources = RPC.getResourcesList() if not resources["OK"]: raise RSSException, where(self, self.doCommand) + " " + resources["Message"] else: resources = resources["Value"] try: res = self.client.getStatus("Resource", resources, None, 120) except: gLogger.exception("Exception when calling GOCDBClient.") return {} if not res["OK"]: raise RSSException, where(self, self.doCommand) + " " + res["Message"] else: res = res["Value"] if res == None: return {} resToReturn = {} for dt_ID in res: dt = {} dt["ID"] = dt_ID dt["StartDate"] = res[dt_ID]["FORMATED_START_DATE"] dt["EndDate"] = res[dt_ID]["FORMATED_END_DATE"] dt["Severity"] = res[dt_ID]["SEVERITY"] dt["Description"] = res[dt_ID]["DESCRIPTION"].replace("'", "") dt["Link"] = res[dt_ID]["GOCDB_PORTAL_URL"] resToReturn[dt_ID] = dt return resToReturn
def initialize(self, *args, **kwargs): """ Initialize. Initialise method pulls in some extra configuration options These include: VOKeys - List of VO identifiers """ self.vokeys = self.am_getOption('VOKeys', ['GridPP']) self.removal_threshold = self.am_getOption('RemovalThreshold', 5) self.gocdb_client = GOCDBClient() return S_OK()
def initialize(self): """ Define the commands to be executed, and instantiate the clients that will be used. """ self.am_setOption('shifterProxy', 'DataManager') self.rmClient = ResourceManagementClient() self.commands['Downtime'] = [{'Downtime': {}}] self.commands['SpaceTokenOccupancy'] = [{'SpaceTokenOccupancy': {}}] self.commands['GOCDBSync'] = [{'GOCDBSync': {}}] self.commands['FreeDiskSpace'] = [{'FreeDiskSpace': {}}] # PilotsCommand # self.commands[ 'Pilots' ] = [ # { 'PilotsWMS' : { 'element' : 'Site', 'siteName' : None } }, # { 'PilotsWMS' : { 'element' : 'Resource', 'siteName' : None } } # ] # FIXME: do not forget about hourly vs Always ...etc # AccountingCacheCommand # self.commands[ 'AccountingCache' ] = [ # {'SuccessfullJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # {'FailedJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # {'SuccessfullPilotsBySiteSplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'FailedPilotsBySiteSplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'SuccessfullPilotsByCESplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'FailedPilotsByCESplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'RunningJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :168, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :720, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :8760, 'plotType' :'Job' }}, # ] # VOBOXAvailability # self.commands[ 'VOBOXAvailability' ] = [ # { 'VOBOXAvailability' : {} } # # Reuse clients for the commands self.clients['GOCDBClient'] = GOCDBClient() self.clients['ReportGenerator'] = RPCClient( 'Accounting/ReportGenerator') self.clients['ReportsClient'] = ReportsClient() self.clients['ResourceStatusClient'] = ResourceStatusClient() self.clients['ResourceManagementClient'] = ResourceManagementClient() self.clients['WMSAdministrator'] = RPCClient( 'WorkloadManagement/WMSAdministrator') self.cCaller = CommandCaller return S_OK()
def __init__( self, args = None, clients = None ): super( DowntimeCommand, self ).__init__( args, clients ) if 'GOCDBClient' in self.apis: self.gClient = self.apis[ 'GOCDBClient' ] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis[ 'ResourceManagementClient' ] else: self.rmClient = ResourceManagementClient()
def initialize(self): self.am_setOption('shifterProxy', 'DataManager') self.rmClient = ResourceManagementIHEPClient() self.commands['JobIHEP'] = [{'JobIHEP': {}}] self.commands['StorageIHEP'] = [{'StorageIHEP': {}}] self.commands['WorkNodeIHEP'] = [{'WorkNodeIHEP': {}}] # Reuse clients for the commands self.clients['GOCDBClient'] = GOCDBClient() self.clients['ReportGenerator'] = RPCClient( 'Accounting/ReportGenerator') self.clients['ReportsClient'] = ReportsClient() self.clients['ResourceStatusClient'] = ResourceStatusClient() self.clients[ 'ResourceManagementIHEPClient'] = ResourceManagementIHEPClient() self.clients['WMSAdministrator'] = RPCClient( 'WorkloadManagement/WMSAdministrator') self.cCaller = CommandCaller return S_OK()
class Synchronizer: ############################################################################# def __init__(self, rsDBin=None, rmDBin=None): self.rsDB = rsDBin self.rmDB = rmDBin if self.rsDB == None and self.rmDB == None: from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB from DIRAC.ResourceStatusSystem.DB.ResourceManagementDB import ResourceManagementDB self.rsDB = ResourceStatusDB() self.rmDB = ResourceManagementDB() self.GOCDBClient = GOCDBClient() ############################################################################# # def sync(self, thingsToSync = None, fake_param = None): def sync(self, _a, _b): """ :params: :attr:`thingsToSync`: list of things to sync """ thingsToSync = [ 'Utils', 'Sites', 'VOBOX', 'Resources', 'StorageElements', 'RegistryUsers' ] gLogger.info("!!! Sync DB content with CS content for %s !!!" % (' '.join(x for x in thingsToSync))) for thing in thingsToSync: getattr(self, '_sync' + thing)() return S_OK() ############################################################################# def _syncUtils(self): """ Sync DB content with what is in :mod:`DIRAC.ResourceStatusSystem.Utilities.Utils` """ statusIn = self.rsDB.getStatusList() #delete status not more in Utils for stIn in statusIn: if stIn not in ValidStatus: self.rsDB.removeStatus(stIn) #Add new status for s in ValidStatus: if s not in statusIn: self.rsDB.addStatus(s) for g in ('Site', 'Service', 'Resource'): typeIn = self.rsDB.getTypesList(g) if g == 'Site': typesList = ValidSiteType elif g == 'Service': typesList = ValidServiceType if g == 'Resource': typesList = ValidResourceType #delete types not more in Utils for tIn in typeIn: if tIn not in typesList: self.rsDB.removeType(g, tIn) #Add new types for t in typesList: if t not in typeIn: self.rsDB.addType(g, t) ############################################################################# def _syncSites(self): """ Sync DB content with sites that are in the CS """ # sites in the DB now sitesIn = self.rsDB.getMonitoredsList('Site', paramsList=['SiteName']) sitesIn = [s[0] for s in sitesIn] # sites in CS now sitesList = getSites()['Value'] try: sitesList.remove('LCG.Dummy.ch') except ValueError: pass # remove sites from the DB not more in the CS for site in sitesIn: if site not in sitesList: self.rsDB.removeSite(site) # add to DB what is in CS now and wasn't before for site in sitesList: if site not in sitesIn: # DIRAC Tier tier = getSiteTier(site)['Value'][0] if tier == 0 or tier == '0': t = 'T0' elif tier == 1 or tier == '1': t = 'T1' elif tier == 3 or tier == '3': t = 'T3' else: t = 'T2' #Grid Name of the site gridSiteName = getGOCSiteName(site) if not gridSiteName['OK']: raise RSSException, gridSiteName['Message'] gridSiteName = gridSiteName['Value'] #Grid Tier (with a workaround!) DIRACSitesOfGridSites = getDIRACSiteName(gridSiteName) if not DIRACSitesOfGridSites['OK']: raise RSSException, DIRACSitesOfGridSites['Message'] DIRACSitesOfGridSites = DIRACSitesOfGridSites['Value'] if len(DIRACSitesOfGridSites) == 1: gt = t else: gt = self.__getGOCTier(DIRACSitesOfGridSites) self.rsDB.addOrModifySite( site, t, gridSiteName, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) self.rsDB.addOrModifyGridSite(gridSiteName, gt) sitesIn.append(site) ############################################################################# def _syncVOBOX(self): """ Sync DB content with VOBoxes """ # services in the DB now servicesIn = self.rsDB.getMonitoredsList('Service', paramsList=['ServiceName']) servicesIn = [s[0] for s in servicesIn] for site in [ 'LCG.CNAF.it', 'LCG.IN2P3.fr', 'LCG.PIC.es', 'LCG.RAL.uk', 'LCG.GRIDKA.de', 'LCG.NIKHEF.nl' ]: service = 'VO-BOX@' + site if service not in servicesIn: self.rsDB.addOrModifyService( service, 'VO-BOX', site, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) ############################################################################# def _syncResources(self): # resources in the DB now resourcesIn = self.rsDB.getMonitoredsList('Resource', paramsList=['ResourceName']) resourcesIn = [r[0] for r in resourcesIn] # services in the DB now servicesIn = self.rsDB.getMonitoredsList('Service', paramsList=['ServiceName']) servicesIn = [s[0] for s in servicesIn] # Site-CE mapping in CS now siteCE = getSiteCEMapping('LCG')['Value'] # Site-SE mapping in CS now siteSE = getSiteSEMapping('LCG')['Value'] # CEs in CS now CEList = [] for i in siteCE.values(): for ce in i: if ce is None: continue CEList.append(ce) # SEs in CS now SEList = [] for i in siteSE.values(): for x in i: SEList.append(x) # SE Nodes in CS now SENodeList = [] for SE in SEList: node = getSENodes(SE)['Value'][0] if node is None: continue if node not in SENodeList: SENodeList.append(node) # LFC Nodes in CS now LFCNodeList_L = [] LFCNodeList_C = [] for site in getLFCSites()['Value']: for readable in ('ReadOnly', 'ReadWrite'): LFCNode = getLFCNode(site, readable)['Value'] if LFCNode is None or LFCNode == []: continue LFCNode = LFCNode[0] if readable == 'ReadWrite': if LFCNode not in LFCNodeList_C: LFCNodeList_C.append(LFCNode) elif readable == 'ReadOnly': if LFCNode not in LFCNodeList_L: LFCNodeList_L.append(LFCNode) # FTS Nodes in CS now FTSNodeList = [] sitesWithFTS = getFTSSites() for site in sitesWithFTS['Value']: fts = getFTSEndpoint(site)['Value'] if fts is None or fts == []: continue fts = fts[0] if fts not in FTSNodeList: FTSNodeList.append(fts) # VOMS Nodes in CS now VOMSNodeList = getVOMSEndpoints()['Value'] # complete list of resources in CS now resourcesList = CEList + SENodeList + LFCNodeList_L + LFCNodeList_C + FTSNodeList + VOMSNodeList # list of services in CS now (to be done) servicesList = [] #remove resources no more in the CS for res in resourcesIn: if res not in resourcesList: self.rsDB.removeResource(res) self.rsDB.removeStorageElement(resourceName=res) # add to DB what is in CS now and wasn't before # CEs for site in siteCE.keys(): if site == 'LCG.Dummy.ch': continue for ce in siteCE[site]: if ce is None: continue siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', ce) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex(ce)[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue serviceType = 'Computing' service = serviceType + '@' + site if service not in servicesList: servicesList.append(service) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) servicesIn.append(service) if ce not in resourcesIn: CEType = getCEType(site, ce)['Value'] ceType = 'CE' if CEType == 'CREAM': ceType = 'CREAMCE' self.rsDB.addOrModifyResource( ce, ceType, serviceType, site, siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) resourcesIn.append(ce) # SRMs for srm in SENodeList: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex(srm)[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName(siteInGOCDB) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append(service) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) servicesIn.append(service) if srm not in resourcesIn and srm is not None: self.rsDB.addOrModifyResource( srm, 'SE', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) resourcesIn.append(srm) # LFC_C for lfc in LFCNodeList_C: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', lfc) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex(lfc)[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName(siteInGOCDB) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append(service) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) servicesIn.append(service) if lfc not in resourcesIn and lfc is not None: self.rsDB.addOrModifyResource( lfc, 'LFC_C', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) resourcesIn.append(lfc) # LFC_L for lfc in LFCNodeList_L: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', lfc) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex(lfc)[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName(siteInGOCDB) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append(service) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) servicesIn.append(service) if lfc not in resourcesIn and lfc is not None: self.rsDB.addOrModifyResource( lfc, 'LFC_L', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) resourcesIn.append(lfc) # FTSs for fts in FTSNodeList: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', fts) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex(fts)[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName(siteInGOCDB) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append(service) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) servicesIn.append(service) if fts not in resourcesIn and fts is not None: self.rsDB.addOrModifyResource( fts, 'FTS', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) resourcesIn.append(fts) # VOMSs for voms in VOMSNodeList: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', voms) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex(voms)[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName(siteInGOCDB) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] site = siteInDIRAC['Value'] serviceType = 'VOMS' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append(service) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) servicesIn.append(service) if voms not in resourcesIn and voms is not None: self.rsDB.addOrModifyResource( voms, 'VOMS', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59)) resourcesIn.append(voms) #remove services no more in the CS for ser in servicesIn: if ser not in servicesList: serType = ser.split('@')[0] if serType != 'VO-BOX': self.rsDB.removeService(ser) self.rsDB.removeResource(serviceName=ser) site = ser.split('@')[1] if serType == 'Storage': self.rsDB.removeStorageElement(siteName=site) ############################################################################# def _syncStorageElements(self): # Get StorageElements from the CS SEs = getStorageElements() if not SEs['OK']: raise RSSException, SEs['Message'] SEs = SEs['Value'] for access in ('Read', 'Write'): storageElementsIn = self.rsDB.getMonitoredsList( 'StorageElement' + access, paramsList=['StorageElementName']) try: storageElementsIn = [x[0] for x in storageElementsIn] except IndexError: pass #remove storageElements no more in the CS for se in storageElementsIn: if se not in SEs: self.rsDB.removeStorageElement(storageElementName=se, resourceName=None, access=access) #Add new storage Elements for SE in SEs: srm = getSENodes(SE)['Value'][0] if srm == None: continue siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: continue siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] if SE not in storageElementsIn: self.rsDB.addOrModifyStorageElement( SE, srm, siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace(microsecond=0), 'RS_SVC', datetime.datetime(9999, 12, 31, 23, 59, 59), access=access) storageElementsIn.append(SE) ############################################################################# def __getGOCTier(self, sitesList): gridTier = 3 for site in sitesList: tier = getSiteTier(site)['Value'][0] if tier == 0 or tier == '0': tn = 0 elif tier == 1 or tier == '1': tn = 1 elif tier == 3 or tier == '3': tn = 3 else: tn = 2 if tn < gridTier: gridTier = tn if gridTier == 0: gt = 'T0' elif gridTier == 1: gt = 'T1' elif gridTier == 3: gt = 'T3' else: gt = 'T2' return gt ############################################################################# def _syncRegistryUsers(self): from DIRAC.ResourceStatusSystem.Utilities import CS users = CS.getTypedDictRootedAt("Users", root="/Registry") for u in users: if type(users[u]['DN']) == list: users[u]['DN'] = users[u]['DN'][0] if type(users[u]['Email']) == list: users[u]['Email'] = users[u]['Email'][0] users[u]['DN'] = users[u]['DN'].split('=')[-1] self.rmDB.registryAddUser(u, users[u]['DN'].lower(), users[u]['Email'].lower())
""" Few unit tests for LCG clients """ __RCSID__ = "$Id$" import mock from datetime import datetime, timedelta from DIRAC.Core.LCG.GOCDBClient import GOCDBClient mockRSS = mock.MagicMock() GOCCli = GOCDBClient() # data now = datetime.utcnow().replace(microsecond=0, second=0) tomorrow = datetime.utcnow().replace(microsecond=0, second=0) + timedelta(hours=24) inAWeek = datetime.utcnow().replace(microsecond=0, second=0) + timedelta(days=7) nowLess12h = str(now - timedelta(hours=12))[:-3] nowPlus8h = str(now + timedelta(hours=8))[:-3] nowPlus24h = str(now + timedelta(hours=24))[:-3] nowPlus40h = str(now + timedelta(hours=40))[:-3] nowPlus50h = str(now + timedelta(hours=50))[:-3] nowPlus60h = str(now + timedelta(hours=60))[:-3] XML_site_ongoing = '<?xml version="1.0"?>\n<ROOT><DOWNTIME ID="78505456" PRIMARY_KEY="28490G0"' XML_site_ongoing += ' CLASSIFICATION="SCHEDULED"><SITENAME>GRISU-ENEA-GRID</SITENAME>' XML_site_ongoing += '<SEVERITY>OUTAGE</SEVERITY>' XML_site_ongoing += '<DESCRIPTION>Software problems SITE</DESCRIPTION>' XML_site_ongoing += '<INSERT_DATE>1276273965</INSERT_DATE>'
class DowntimeCommand( Command ): ''' Downtime "master" Command. ''' def __init__( self, args = None, clients = None ): super( DowntimeCommand, self ).__init__( args, clients ) if 'GOCDBClient' in self.apis: self.gClient = self.apis[ 'GOCDBClient' ] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis[ 'ResourceManagementClient' ] else: self.rmClient = ResourceManagementClient() def _storeCommand( self, result ): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( downtimeID = dt[ 'DowntimeID' ], element = dt[ 'Element' ], name = dt[ 'Name' ], startDate = dt[ 'StartDate' ], endDate = dt[ 'EndDate' ], severity = dt[ 'Severity' ], description = dt[ 'Description' ], link = dt[ 'Link' ], gocdbServiceType = dt[ 'GOCDBServiceType' ] ) return resQuery def _cleanCommand( self, element, elementNames): ''' Clear Cache from expired DT. ''' resQuery = [] for elementName in elementNames: #reading all the cache entries result = self.rmClient.selectDowntimeCache( element = element, name = elementName ) if not result[ 'OK' ]: return result uniformResult = [ dict( zip( result[ 'Columns' ], res ) ) for res in result[ 'Value' ] ] currentDate = datetime.utcnow() if len(uniformResult) == 0: return S_OK( None ) for dt in uniformResult: if dt[ 'EndDate' ] < currentDate: result = self.rmClient.deleteDowntimeCache ( downtimeID = dt[ 'DowntimeID' ] ) resQuery.append(result) return S_OK( resQuery ) def _prepareCommand( self ): ''' DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR( '"name" not found in self.args' ) elementName = self.args[ 'name' ] if 'element' not in self.args: return S_ERROR( '"element" not found in self.args' ) element = self.args[ 'element' ] if 'elementType' not in self.args: return S_ERROR( '"elementType" not found in self.args' ) elementType = self.args[ 'elementType' ] if not element in [ 'Site', 'Resource' ]: return S_ERROR( 'element is neither Site nor Resource' ) hours = None if 'hours' in self.args: hours = self.args[ 'hours' ] gocdbServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName( elementName ) if not gocSite[ 'OK' ]: return gocSite elementName = gocSite[ 'Value' ] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # We need to distinguish if it's tape or disk if getStorageElementOptions( elementName )['Value']['TapeSE']: gocdbServiceType = "srm.nearline" elif getStorageElementOptions( elementName )['Value']['DiskSE']: gocdbServiceType = "srm" seHost = CSHelpers.getSEHost( elementName ) if not seHost: return S_ERROR( 'No seHost for %s' % elementName ) elementName = seHost elif elementType == 'FTS' or elementType == 'FTS3': gocdbServiceType = 'FTS' try: #WARNING: this method presupposes that the server is an FTS3 type elementName = getGOCFTSName(elementName) except: return S_ERROR( 'No FTS3 server specified in dirac.cfg (see Resources/FTSEndpoints)' ) return S_OK( ( element, elementName, hours, gocdbServiceType ) ) def doNew( self, masterParams = None ): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams #translate DIRAC CS elementNames into GOCDB elementNames translatedElementNames = [] for e in elementNames: translatedElementNames.append(CSHelpers.getSEHost( e )) elementNames = translatedElementNames hours = None elementName = None gocdbServiceType = None else: params = self._prepareCommand() if not params[ 'OK' ]: return params element, elementName, hours, gocdbServiceType = params[ 'Value' ] elementNames = [ elementName ] #WARNING: checking all the DT that are ongoing or starting in given <hours> from now startDate = None if hours is not None: startDate = datetime.utcnow() + timedelta( hours = hours ) try: results = self.gClient.getStatus( element, elementNames, startDate ) except urllib2.URLError: try: #Let's give it a second chance.. results = self.gClient.getStatus( element, elementNames, startDate ) except urllib2.URLError, e: return S_ERROR( e ) if not results[ 'OK' ]: return results results = results[ 'Value' ] if results is None: return S_OK( None ) #cleaning the Cache cleanRes = self._cleanCommand(element, elementNames) if not cleanRes[ 'OK' ]: return cleanRes uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if 'HOSTNAME' in downDic.keys(): dt[ 'Name' ] = downDic[ 'HOSTNAME' ] elif 'SITENAME' in downDic.keys(): dt[ 'Name' ] = downDic[ 'SITENAME' ] else: return S_ERROR( "SITENAME or HOSTNAME are missing" ) if 'SERVICE_TYPE' in downDic.keys(): dt[ 'GOCDBServiceType' ] = downDic[ 'SERVICE_TYPE' ] if gocdbServiceType: gocdbST = gocdbServiceType.lower() csST = downDic[ 'SERVICE_TYPE' ].lower() if gocdbST != csST: return S_ERROR( "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gocdbST, csST, dt[ 'Name' ]) ) else: #WARNING: do we want None as default value? dt[ 'GOCDBServiceType' ] = None dt[ 'DowntimeID' ] = downtime dt[ 'Element' ] = element dt[ 'StartDate' ] = downDic[ 'FORMATED_START_DATE' ] dt[ 'EndDate' ] = downDic[ 'FORMATED_END_DATE' ] dt[ 'Severity' ] = downDic[ 'SEVERITY' ] dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' ) dt[ 'Link' ] = downDic[ 'GOCDB_PORTAL_URL' ] uniformResult.append( dt ) storeRes = self._storeCommand( uniformResult ) if not storeRes[ 'OK' ]: return storeRes return S_OK()
class GOCDB2CSAgent(AgentModule): """ Class to retrieve information about service endpoints from GOCDB and update configuration stored by CS """ def __init__(self, *args, **kwargs): """ c'tor """ super(GOCDB2CSAgent, self).__init__(*args, **kwargs) self.GOCDBClient = None self.csAPI = None self.dryRun = False def initialize(self): """ Run at the agent initialization (normally every 500 cycles) """ # client to connect to GOCDB self.GOCDBClient = GOCDBClient() self.dryRun = self.am_getOption('DryRun', self.dryRun) # API needed to update configuration stored by CS self.csAPI = CSAPI() return self.csAPI.initialize() def execute(self): """ Execute GOCDB queries according to the function map and user request (options in configuration). """ # __functionMap is at the end of the class definition for option, functionCall in GOCDB2CSAgent.__functionMap.iteritems(): optionValue = self.am_getOption(option, True) if optionValue: result = functionCall(self) if not result['OK']: self.log.error("%s() failed with message: %s" % (functionCall.__name__, result['Message'])) else: self.log.info("Successfully executed %s" % functionCall.__name__) return S_OK() def updatePerfSONARConfiguration(self): """ Get current status of perfSONAR endpoints from GOCDB and update CS configuration accordingly. """ log = self.log.getSubLogger('updatePerfSONAREndpoints') log.debug('Begin function ...') # get endpoints result = self.__getPerfSONAREndpoints() if not result['OK']: log.error("__getPerfSONAREndpoints() failed with message: %s" % result['Message']) return S_ERROR('Unable to fetch perfSONAR endpoints from GOCDB.') endpointList = result['Value'] # add DIRAC site name result = self.__addDIRACSiteName(endpointList) if not result['OK']: log.error("__addDIRACSiteName() failed with message: %s" % result['Message']) return S_ERROR('Unable to extend the list with DIRAC site names.') extendedEndpointList = result['Value'] # prepare dictionary with new configuration result = self.__preparePerfSONARConfiguration(extendedEndpointList) if not result['OK']: log.error( "__preparePerfSONARConfiguration() failed with message: %s" % result['Message']) return S_ERROR('Unable to prepare a new perfSONAR configuration.') finalConfiguration = result['Value'] # update configuration according to the final status of endpoints self.__updateConfiguration(finalConfiguration) log.debug("Configuration updated succesfully") log.debug('End function.') return S_OK() def __getPerfSONAREndpoints(self): """ Retrieve perfSONAR endpoint information directly from GOCDB. :return: List of perfSONAR endpoints (dictionaries) as stored by GOCDB. """ log = self.log.getSubLogger('__getPerfSONAREndpoints') log.debug('Begin function ...') # get perfSONAR endpoints (latency and bandwidth) form GOCDB endpointList = [] for endpointType in ['Latency', 'Bandwidth']: result = self.GOCDBClient.getServiceEndpointInfo( 'service_type', 'net.perfSONAR.%s' % endpointType) if not result['OK']: log.error("getServiceEndpointInfo() failed with message: %s" % result['Message']) return S_ERROR('Could not fetch %s endpoints from GOCDB' % endpointType.lower()) log.debug('Number of %s endpoints: %s' % (endpointType.lower(), len(result['Value']))) endpointList.extend(result['Value']) log.debug('Number of perfSONAR endpoints: %s' % len(endpointList)) log.debug('End function.') return S_OK(endpointList) def __preparePerfSONARConfiguration(self, endpointList): """ Prepare a dictionary with a new CS configuration of perfSONAR endpoints. :return: Dictionary where keys are configuration paths (options and sections) and values are values of corresponding options or None in case of a path pointing to a section. """ log = self.log.getSubLogger('__preparePerfSONARConfiguration') log.debug('Begin function ...') # static elements of a path rootPath = '/Resources/Sites' extPath = 'Network' baseOptionName = 'Enabled' options = {baseOptionName: 'True', 'ServiceType': 'perfSONAR'} # enable GOCDB endpoints in configuration newConfiguration = {} for endpoint in endpointList: if endpoint['DIRACSITENAME'] is None: continue split = endpoint['DIRACSITENAME'].split('.') path = cfgPath(rootPath, split[0], endpoint['DIRACSITENAME'], extPath, endpoint['HOSTNAME']) for name, defaultValue in options.iteritems(): newConfiguration[cfgPath(path, name)] = defaultValue # get current configuration currentConfiguration = {} for option in options.iterkeys(): result = gConfig.getConfigurationTree(rootPath, extPath + '/', '/' + option) if not result['OK']: log.error("getConfigurationTree() failed with message: %s" % result['Message']) return S_ERROR('Unable to fetch perfSONAR endpoints from CS.') currentConfiguration.update(result['Value']) # disable endpoints that disappeared in GOCDB removedElements = set(currentConfiguration) - set(newConfiguration) newElements = set(newConfiguration) - set(currentConfiguration) addedEndpoints = len(newElements) / len(options) disabledEndpoints = 0 for path in removedElements: if baseOptionName in path: newConfiguration[path] = 'False' if currentConfiguration[path] != 'False': disabledEndpoints = disabledEndpoints + 1 # inform what will be changed if addedEndpoints > 0: self.log.info( "%s new perfSONAR endpoints will be added to the configuration" % addedEndpoints) if disabledEndpoints > 0: self.log.info( "%s old perfSONAR endpoints will be disable in the configuration" % disabledEndpoints) if addedEndpoints == 0 and disabledEndpoints == 0: self.log.info("perfSONAR configuration is up-to-date") log.debug('End function.') return S_OK(newConfiguration) def __addDIRACSiteName(self, inputList): """ Extend given list of GOCDB endpoints with DIRAC site name, i.e. add an entry "DIRACSITENAME" in dictionaries that describe endpoints. If given site name could not be found "DIRACSITENAME" is set to 'None'. :return: List of perfSONAR endpoints (dictionaries). """ log = self.log.getSubLogger('__addDIRACSiteName') log.debug('Begin function ...') # get site name dictionary result = getDIRACGOCDictionary() if not result['OK']: log.error("getDIRACGOCDictionary() failed with message: %s" % result['Message']) return S_ERROR('Could not get site name dictionary') # reverse the dictionary (assume 1 to 1 relation) DIRACGOCDict = result['Value'] GOCDIRACDict = dict(zip(DIRACGOCDict.values(), DIRACGOCDict.keys())) # add DIRAC site names outputList = [] for entry in inputList: try: entry['DIRACSITENAME'] = GOCDIRACDict[entry['SITENAME']] except KeyError: self.log.warn("No dictionary entry for %s. " % entry['SITENAME']) entry['DIRACSITENAME'] = None outputList.append(entry) log.debug('End function.') return S_OK(outputList) def __updateConfiguration(self, setElements=None, delElements=None): """ Update configuration stored by CS. """ if setElements is None: setElements = {} if delElements is None: delElements = [] log = self.log.getSubLogger('__updateConfiguration') log.debug('Begin function ...') # assure existence and proper value of a section or an option for path, value in setElements.iteritems(): if value is None: section = path else: split = path.rsplit('/', 1) section = split[0] try: result = self.csAPI.createSection(section) if not result['OK']: log.error("createSection() failed with message: %s" % result['Message']) except Exception as e: log.error("Exception in createSection(): %s" % repr(e).replace(',)', ')')) if value is not None: try: result = self.csAPI.setOption(path, value) if not result['OK']: log.error("setOption() failed with message: %s" % result['Message']) except Exception as e: log.error("Exception in setOption(): %s" % repr(e).replace(',)', ')')) # delete elements in the configuration for path in delElements: result = self.csAPI.delOption(path) if not result['OK']: log.warn("delOption() failed with message: %s" % result['Message']) result = self.csAPI.delSection(path) if not result['OK']: log.warn("delSection() failed with message: %s" % result['Message']) if self.dryRun: log.info("Dry Run: CS won't be updated") self.csAPI.showDiff() else: # update configuration stored by CS result = self.csAPI.commit() if not result['OK']: log.error("commit() failed with message: %s" % result['Message']) return S_ERROR("Could not commit changes to CS.") else: log.info("Committed changes to CS") log.debug('End function.') return S_OK() # define mapping between an agent option in the configuration and a function call __functionMap = { 'UpdatePerfSONARS': updatePerfSONARConfiguration, }
class Synchronizer: ############################################################################# def __init__( self, rsDBin = None, rmDBin = None ): self.rsDB = rsDBin self.rmDB = rmDBin if self.rsDB == None and self.rmDB == None: from DIRAC.ResourceStatusSystem.DB.ResourceStatusDB import ResourceStatusDB from DIRAC.ResourceStatusSystem.DB.ResourceManagementDB import ResourceManagementDB self.rsDB = ResourceStatusDB() self.rmDB = ResourceManagementDB() self.GOCDBClient = GOCDBClient() ############################################################################# # def sync(self, thingsToSync = None, fake_param = None): def sync( self, _a, _b ): """ :params: :attr:`thingsToSync`: list of things to sync """ thingsToSync = ['Utils', 'Sites', 'VOBOX', 'Resources', 'StorageElements', 'RegistryUsers'] gLogger.info( "!!! Sync DB content with CS content for %s !!!" % ( ' '.join( x for x in thingsToSync ) ) ) for thing in thingsToSync: getattr( self, '_sync' + thing )() return S_OK() ############################################################################# def _syncUtils( self ): """ Sync DB content with what is in :mod:`DIRAC.ResourceStatusSystem.Utilities.Utils` """ statusIn = self.rsDB.getStatusList() #delete status not more in Utils for stIn in statusIn: if stIn not in ValidStatus: self.rsDB.removeStatus( stIn ) #Add new status for s in ValidStatus: if s not in statusIn: self.rsDB.addStatus( s ) for g in ( 'Site', 'Service', 'Resource' ): typeIn = self.rsDB.getTypesList( g ) if g == 'Site': typesList = ValidSiteType elif g == 'Service': typesList = ValidServiceType if g == 'Resource': typesList = ValidResourceType #delete types not more in Utils for tIn in typeIn: if tIn not in typesList: self.rsDB.removeType( g, tIn ) #Add new types for t in typesList: if t not in typeIn: self.rsDB.addType( g, t ) ############################################################################# def _syncSites( self ): """ Sync DB content with sites that are in the CS """ # sites in the DB now sitesIn = self.rsDB.getMonitoredsList( 'Site', paramsList = ['SiteName'] ) sitesIn = [s[0] for s in sitesIn] # sites in CS now sitesList = getSites()['Value'] try: sitesList.remove( 'LCG.Dummy.ch' ) except ValueError: pass # remove sites from the DB not more in the CS for site in sitesIn: if site not in sitesList: self.rsDB.removeSite( site ) # add to DB what is in CS now and wasn't before for site in sitesList: if site not in sitesIn: # DIRAC Tier tier = getSiteTier( site )['Value'][0] if tier == 0 or tier == '0': t = 'T0' elif tier == 1 or tier == '1': t = 'T1' elif tier == 3 or tier == '3': t = 'T3' else: t = 'T2' #Grid Name of the site gridSiteName = getGOCSiteName( site ) if not gridSiteName['OK']: raise RSSException, gridSiteName['Message'] gridSiteName = gridSiteName['Value'] #Grid Tier (with a workaround!) DIRACSitesOfGridSites = getDIRACSiteName( gridSiteName ) if not DIRACSitesOfGridSites['OK']: raise RSSException, DIRACSitesOfGridSites['Message'] DIRACSitesOfGridSites = DIRACSitesOfGridSites['Value'] if len( DIRACSitesOfGridSites ) == 1: gt = t else: gt = self.__getGOCTier( DIRACSitesOfGridSites ) self.rsDB.addOrModifySite( site, t, gridSiteName, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) self.rsDB.addOrModifyGridSite( gridSiteName, gt ) sitesIn.append( site ) ############################################################################# def _syncVOBOX( self ): """ Sync DB content with VOBoxes """ # services in the DB now servicesIn = self.rsDB.getMonitoredsList( 'Service', paramsList = ['ServiceName'] ) servicesIn = [s[0] for s in servicesIn] for site in ['LCG.CNAF.it', 'LCG.IN2P3.fr', 'LCG.PIC.es', 'LCG.RAL.uk', 'LCG.GRIDKA.de', 'LCG.NIKHEF.nl']: service = 'VO-BOX@' + site if service not in servicesIn: self.rsDB.addOrModifyService( service, 'VO-BOX', site, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) ############################################################################# def _syncResources( self ): # resources in the DB now resourcesIn = self.rsDB.getMonitoredsList( 'Resource', paramsList = ['ResourceName'] ) resourcesIn = [r[0] for r in resourcesIn] # services in the DB now servicesIn = self.rsDB.getMonitoredsList( 'Service', paramsList = ['ServiceName'] ) servicesIn = [s[0] for s in servicesIn] # Site-CE mapping in CS now siteCE = getSiteCEMapping( 'LCG' )['Value'] # Site-SE mapping in CS now siteSE = getSiteSEMapping( 'LCG' )['Value'] # CEs in CS now CEList = [] for i in siteCE.values(): for ce in i: if ce is None: continue CEList.append( ce ) # SEs in CS now SEList = [] for i in siteSE.values(): for x in i: SEList.append( x ) # SE Nodes in CS now SENodeList = [] for SE in SEList: node = getSENodes( SE )['Value'][0] if node is None: continue if node not in SENodeList: SENodeList.append( node ) # LFC Nodes in CS now LFCNodeList_L = [] LFCNodeList_C = [] for site in getLFCSites()['Value']: for readable in ( 'ReadOnly', 'ReadWrite' ): LFCNode = getLFCNode( site, readable )['Value'] if LFCNode is None or LFCNode == []: continue LFCNode = LFCNode[0] if readable == 'ReadWrite': if LFCNode not in LFCNodeList_C: LFCNodeList_C.append( LFCNode ) elif readable == 'ReadOnly': if LFCNode not in LFCNodeList_L: LFCNodeList_L.append( LFCNode ) # FTS Nodes in CS now FTSNodeList = [] sitesWithFTS = getFTSSites() for site in sitesWithFTS['Value']: fts = getFTSEndpoint( site )['Value'] if fts is None or fts == []: continue fts = fts[0] if fts not in FTSNodeList: FTSNodeList.append( fts ) # VOMS Nodes in CS now VOMSNodeList = getVOMSEndpoints()['Value'] # complete list of resources in CS now resourcesList = CEList + SENodeList + LFCNodeList_L + LFCNodeList_C + FTSNodeList + VOMSNodeList # list of services in CS now (to be done) servicesList = [] #remove resources no more in the CS for res in resourcesIn: if res not in resourcesList: self.rsDB.removeResource( res ) self.rsDB.removeStorageElement( resourceName = res ) # add to DB what is in CS now and wasn't before # CEs for site in siteCE.keys(): if site == 'LCG.Dummy.ch': continue for ce in siteCE[site]: if ce is None: continue siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', ce ) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex( ce )[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName ) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue serviceType = 'Computing' service = serviceType + '@' + site if service not in servicesList: servicesList.append( service ) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) servicesIn.append( service ) if ce not in resourcesIn: CEType = getCEType( site, ce )['Value'] ceType = 'CE' if CEType == 'CREAM': ceType = 'CREAMCE' self.rsDB.addOrModifyResource( ce, ceType, serviceType, site, siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) resourcesIn.append( ce ) # SRMs for srm in SENodeList: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm ) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex( srm )[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName ) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName( siteInGOCDB ) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append( service ) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) servicesIn.append( service ) if srm not in resourcesIn and srm is not None: self.rsDB.addOrModifyResource( srm, 'SE', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) resourcesIn.append( srm ) # LFC_C for lfc in LFCNodeList_C: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', lfc ) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex( lfc )[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName ) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName( siteInGOCDB ) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append( service ) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) servicesIn.append( service ) if lfc not in resourcesIn and lfc is not None: self.rsDB.addOrModifyResource( lfc, 'LFC_C', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) resourcesIn.append( lfc ) # LFC_L for lfc in LFCNodeList_L: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', lfc ) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex( lfc )[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName ) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName( siteInGOCDB ) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append( service ) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) servicesIn.append( service ) if lfc not in resourcesIn and lfc is not None: self.rsDB.addOrModifyResource( lfc, 'LFC_L', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) resourcesIn.append( lfc ) # FTSs for fts in FTSNodeList: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', fts ) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex( fts )[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName ) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName( siteInGOCDB ) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] sites = siteInDIRAC['Value'] serviceType = 'Storage' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append( service ) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) servicesIn.append( service ) if fts not in resourcesIn and fts is not None: self.rsDB.addOrModifyResource( fts, 'FTS', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) resourcesIn.append( fts ) # VOMSs for voms in VOMSNodeList: siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', voms ) if not siteInGOCDB['OK']: raise RSSException, siteInGOCDB['Message'] if siteInGOCDB['Value'] == []: trueName = socket.gethostbyname_ex( voms )[0] siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', trueName ) try: siteInGOCDB = siteInGOCDB['Value'][0]['SITENAME'] except IndexError: continue siteInDIRAC = getDIRACSiteName( siteInGOCDB ) if not siteInDIRAC['OK']: raise RSSException, siteInDIRAC['Message'] site = siteInDIRAC['Value'] serviceType = 'VOMS' for site in sites: service = serviceType + '@' + site if service not in servicesList: servicesList.append( service ) if service not in servicesIn: self.rsDB.addOrModifyService( service, serviceType, site, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) servicesIn.append( service ) if voms not in resourcesIn and voms is not None: self.rsDB.addOrModifyResource( voms, 'VOMS', serviceType, 'NULL', siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ) ) resourcesIn.append( voms ) #remove services no more in the CS for ser in servicesIn: if ser not in servicesList: serType = ser.split( '@' )[0] if serType != 'VO-BOX': self.rsDB.removeService( ser ) self.rsDB.removeResource( serviceName = ser ) site = ser.split( '@' )[1] if serType == 'Storage': self.rsDB.removeStorageElement( siteName = site ) ############################################################################# def _syncStorageElements( self ): # Get StorageElements from the CS SEs = getStorageElements() if not SEs['OK']: raise RSSException, SEs['Message'] SEs = SEs['Value'] for access in ( 'Read', 'Write' ): storageElementsIn = self.rsDB.getMonitoredsList( 'StorageElement' + access, paramsList = [ 'StorageElementName' ] ) try: storageElementsIn = [ x[ 0 ] for x in storageElementsIn ] except IndexError: pass #remove storageElements no more in the CS for se in storageElementsIn: if se not in SEs: self.rsDB.removeStorageElement( storageElementName = se, resourceName = None, access = access ) #Add new storage Elements for SE in SEs: srm = getSENodes( SE )[ 'Value' ][ 0 ] if srm == None: continue siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm ) if not siteInGOCDB[ 'OK' ]: raise RSSException, siteInGOCDB[ 'Message' ] if siteInGOCDB[ 'Value' ] == []: continue siteInGOCDB = siteInGOCDB[ 'Value' ][ 0 ][ 'SITENAME' ] if SE not in storageElementsIn: self.rsDB.addOrModifyStorageElement( SE, srm, siteInGOCDB, 'Active', 'init', datetime.datetime.utcnow().replace( microsecond = 0 ), 'RS_SVC', datetime.datetime( 9999, 12, 31, 23, 59, 59 ), access = access ) storageElementsIn.append( SE ) ############################################################################# def __getGOCTier( self, sitesList ): gridTier = 3 for site in sitesList: tier = getSiteTier( site )['Value'][0] if tier == 0 or tier == '0': tn = 0 elif tier == 1 or tier == '1': tn = 1 elif tier == 3 or tier == '3': tn = 3 else: tn = 2 if tn < gridTier: gridTier = tn if gridTier == 0: gt = 'T0' elif gridTier == 1: gt = 'T1' elif gridTier == 3: gt = 'T3' else: gt = 'T2' return gt ############################################################################# def _syncRegistryUsers(self): from DIRAC.ResourceStatusSystem.Utilities import CS users = CS.getTypedDictRootedAt("Users", root= "/Registry") for u in users: if type(users[u]['DN']) == list: users[u]['DN'] = users[u]['DN'][0] if type(users[u]['Email']) == list: users[u]['Email'] = users[u]['Email'][0] users[u]['DN'] = users[u]['DN'].split('=')[-1] self.rmDB.registryAddUser(u, users[u]['DN'].lower(), users[u]['Email'].lower())
class DowntimeCommand( Command ): ''' Downtime "master" Command. ''' def __init__( self, args = None, clients = None ): super( DowntimeCommand, self ).__init__( args, clients ) if 'GOCDBClient' in self.apis: self.gClient = self.apis[ 'GOCDBClient' ] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis[ 'ResourceManagementClient' ] else: self.rmClient = ResourceManagementClient() def _storeCommand( self, result ): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( downtimeID = dt[ 'DowntimeID' ], element = dt[ 'Element' ], name = dt[ 'Name' ], startDate = dt[ 'StartDate' ], endDate = dt[ 'EndDate' ], severity = dt[ 'Severity' ], description = dt[ 'Description' ], link = dt[ 'Link' ], gocdbServiceType = dt[ 'GOCDBServiceType' ] ) if not resQuery[ 'OK' ]: return resQuery return S_OK() def _prepareCommand( self ): ''' DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR( '"name" not found in self.args' ) elementName = self.args[ 'name' ] if 'element' not in self.args: return S_ERROR( '"element" not found in self.args' ) element = self.args[ 'element' ] if 'elementType' not in self.args: return S_ERROR( '"elementType" not found in self.args' ) elementType = self.args[ 'elementType' ] if not element in [ 'Site', 'Resource' ]: return S_ERROR( 'element is not Site nor Resource' ) hours = None if 'hours' in self.args: hours = self.args[ 'hours' ] gocdbServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName( elementName ) if not gocSite[ 'OK' ]: return gocSite elementName = gocSite[ 'Value' ] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # We need to distinguish if it's tape or disk if getStorageElementOptions( elementName )['Value']['TapeSE']: gocdbServiceType = "srm" elif getStorageElementOptions( elementName )['Value']['DiskSE']: gocdbServiceType = "srm.nearline" seHost = CSHelpers.getSEHost( elementName ) if not seHost: return S_ERROR( 'No seHost for %s' % elementName ) elementName = seHost return S_OK( ( element, elementName, hours, gocdbServiceType ) ) def doNew( self, masterParams = None ): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams hours = None elementName = None gocdbServiceType = None else: params = self._prepareCommand() if not params[ 'OK' ]: return params element, elementName, hours, gocdbServiceType = params[ 'Value' ] elementNames = [ elementName ] startDate = datetime.utcnow() - timedelta( days = 14 ) try: results = self.gClient.getStatus( element, elementName, startDate, 120 ) except urllib2.URLError: try: #Let's give it a second chance.. results = self.gClient.getStatus( element, elementName, startDate, 120 ) except urllib2.URLError, e: return S_ERROR( e ) if not results[ 'OK' ]: return results results = results[ 'Value' ] if results is None: return S_OK( None ) uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if gocdbServiceType and downDic[ 'SERVICE_TYPE' ]: if gocdbServiceType.lower() != downDic[ 'SERVICE_TYPE' ].lower(): continue if element == 'Resource': dt[ 'Name' ] = downDic[ 'HOSTNAME' ] else: dt[ 'Name' ] = downDic[ 'SITENAME' ] if not dt[ 'Name' ] in elementNames: continue dt[ 'DowntimeID' ] = downtime dt[ 'Element' ] = element dt[ 'StartDate' ] = downDic[ 'FORMATED_START_DATE' ] dt[ 'EndDate' ] = downDic[ 'FORMATED_END_DATE' ] dt[ 'Severity' ] = downDic[ 'SEVERITY' ] dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' ) dt[ 'Link' ] = downDic[ 'GOCDB_PORTAL_URL' ] try: dt[ 'GOCDBServiceType' ] = downDic[ 'SERVICE_TYPE' ] except KeyError: # SERVICE_TYPE is not always defined pass uniformResult.append( dt ) storeRes = self._storeCommand( uniformResult ) if not storeRes[ 'OK' ]: return storeRes # We return only one downtime, if its ongoing at dtDate startDate = datetime.utcnow() if hours: startDate = startDate + timedelta( hours = hours ) endDate = startDate result = None dtOutages = [] dtWarnings = [] for dt in uniformResult: if ( dt[ 'StartDate' ] < str( startDate ) ) and ( dt[ 'EndDate' ] > str( endDate ) ): if dt[ 'Severity' ] == 'Outage': dtOutages.append( dt ) else: dtWarnings.append( dt ) #In case many overlapping downtimes have been declared, the first one in #severity and then time order will be selected. We want to get the latest one #( they are sorted by insertion time ) if len( dtOutages ) > 0: result = dtOutages[-1] elif len( dtWarnings ) > 0: result = dtWarnings[-1] return S_OK( result )
class AutoVac2CSAgent(AgentModule): """ AutoBdii2CSAgent. Automatically updates the CS automatically for CEs and SEs. """ max_cputime_map = {'VAC': 400000, 'CLOUD': 24000000} cc_regex = re.compile(r'\.([a-zA-Z]{2})$') cc_mappings = {'.gov': 'us', '.edu': 'us', 'efda.org': 'uk', 'atlas-swt2.org': 'us'} def initialize(self, *args, **kwargs): """ Initialize. Initialise method pulls in some extra configuration options These include: VOKeys - List of VO identifiers """ self.vokeys = self.am_getOption('VOKeys', ['GridPP']) self.removal_threshold = self.am_getOption('RemovalThreshold', 5) self.gocdb_client = GOCDBClient() return S_OK() def execute(self): """General agent execution method.""" cfg_system = ConfigurationSystem() cfg_system.initialize() # Get VAC sites. # ############## result = self.gocdb_client.getServiceEndpointInfo('service_type', "uk.ac.gridpp.vac") if not result['OK']: self.log.error("Problem getting GOCDB VAC information") return result try: self.process_gocdb_results(result['Value'], 'VAC', cfg_system) except: self.log.exception("Problem processing GOCDB VAC information") return S_ERROR("Problem processing GOCDB VAC information") # Get CLOUD (vcycle) sites. # ######################### result = self.gocdb_client.getServiceEndpointInfo('service_type', "uk.ac.gridpp.vcycle") if not result['OK']: self.log.error("Problem getting GOCDB CLOUD (vcycle) information") return result try: self.process_gocdb_results(result['Value'], 'CLOUD', cfg_system) except: self.log.exception("Problem processing GOCDB CLOUD (vcycle) information") return S_ERROR("Problem processing GOCDB CLOUD (vcycle) information") cfg_system.commit() # Remove old hosts/sites # ###################### try: self.remove_old(self.removal_threshold) except: self.log.exception("Problem removing old hosts/sites.") return S_ERROR("Problem processing GOCDB CLOUD (vcycle) information") return S_OK() def process_gocdb_results(self, services, site_path_prefix, cfg_system, country_default='xx'): """ Process GOCDB results. Args: services (list): List of services returned from GOCDB query. site_path_prefix (str): The CS path prefix (VAC or CLOUD) for the type of service that we are processing. cfg_system (ConfigurationSystem): A ConfigurationSystem instance used to update the CS. """ for service in services: # Resources sitename = service.get('SITENAME') hostname = service.get('HOSTNAME') country_code = AutoVac2CSAgent.extract_cc(hostname) or country_default if sitename is None or hostname is None: self.log.warn("Missing sitename or hostname for service:\n%s" % pformat(service)) continue site_path = cfgPath(SITES_BASE, site_path_prefix, "%s.%s.%s" % (site_path_prefix, sitename, country_code)) ce_path = cfgPath(site_path, 'CEs', hostname) queue_path = cfgPath(ce_path, 'Queues', 'default') cfg_system.add(site_path, 'Name', sitename) cfg_system.append_unique(site_path, 'CE', hostname) cfg_system.add(ce_path, 'CEType', site_path_prefix.capitalize()) cfg_system.add(ce_path, 'Architecture', 'x86_64') cfg_system.add(ce_path, 'OS', 'EL6') cfg_system.add(ce_path, 'LastSeen', date.today().strftime('%d/%m/%Y')) cfg_system.add(queue_path, 'maxCPUTime', AutoVac2CSAgent.max_cputime_map.get(site_path_prefix, 'Unknown')) for extension in service.get('EXTENSIONS', []): match = VOKEY_EXTENSION_REGEX.match(extension.get('KEY', '')) if match is None: continue extension_key = match.group() k, vokey = match.groups() if vokey not in self.vokeys: self.log.warn("Extension KEY %s for %s with vokey %s does not belong " "to a valid vokey: %s" % (extension_key, sitename, vokey, self.vokeys)) continue if k == 'SE': se = extension.get('VALUE') if se is None: self.log.warn("No SE value for extension KEY %s" % extension_key) continue cfg_system.append_unique(site_path, 'SE', se) # Registry elif k == 'DN': dn = extension.get('VALUE', '') if "CN=" not in dn: self.log.warn("For extension KEY %s, Could not find the CN component " "of DN: %s" % (extension_key, dn)) continue cn = max(CN_REGEX.findall(dn), key=len) host_path = cfgPath(HOSTS_BASE, cn) cfg_system.add(host_path, 'DN', dn) cfg_system.add(host_path, 'LastSeen', date.today().strftime('%d/%m/%Y')) cfg_system.add(host_path, 'Properties', ['GenericPilot', 'LimitedDelegation']) return S_OK() def remove_old(self, removal_threshold=5): """Remove old hosts/sites.""" cfg_system = ConfigurationSystem() result = cfg_system.getCurrentCFG() if not result['OK']: self.log.error('Could not get current config from the CS') raise RuntimeError("Error removing old Resources/Registry.") today = date.today() removal_threshold = timedelta(days=removal_threshold) old_ces = set() base_path = '/Resources/Sites' for site_type in ('VAC', 'CLOUD'): site_type_path = cfgPath(base_path, site_type) for site, site_info in result['Value'].getAsDict(base_path).iteritems(): site_path = cfgPath(site_type_path, site) for ce, ce_info in site_info.get('CEs', {}).iteritems(): ce_path = cfgPath(site_path, 'CEs', ce) if 'LastSeen' not in ce_info: self.log.warn("No LastSeen info for CE: %s at site: %s" % (ce, site)) continue last_seen = datetime.strptime(ce_info['LastSeen'], '%d/%m/%Y').date() delta = today - last_seen if delta > removal_threshold: self.log.warn("Last seen %s:%s %s days ago...removing" % (site, ce, delta.days)) cfg_system.remove(section=ce_path) old_ces.add(ce) if old_ces: cfg_system.remove(section=site_path, option='CE', value=old_ces) old_ces.clear() host_base = '/Registry/Hosts' for host, host_info in result['Value'].getAsDict(host_base).iteritems(): host_path = cfgPath(host_base, host) if 'LastSeen' not in host_info: self.log.warn("No LastSeen info for host: %s" % host) continue last_seen = datetime.strptime(host_info['LastSeen'], '%d/%m/%Y').date() delta = today - last_seen if delta > removal_threshold: self.log.warn("Last seen host %s %s days ago...removing" % (host, delta.days)) cfg_system.remove(section=host_path) cfg_system.commit() return S_OK() @classmethod def extract_cc(cls, ce, cc_mappings=None, cc_regex=None): """Extract the 2 character country code from the CE name.""" if cc_mappings is None: cc_mappings = cls.cc_mappings if cc_regex is None: cc_regex = cls.cc_regex ce = ce.strip().lower() for key, value in cc_mappings.iteritems(): if ce.endswith(key): return value cc = cc_regex.search(ce) if cc is not None: cc = cc.groups()[0] return cc
class DowntimeCommand(Command): ''' Downtime "master" Command or removed DTs. ''' def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if 'GOCDBClient' in self.apis: self.gClient = self.apis['GOCDBClient'] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient() def _storeCommand(self, result): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( downtimeID=dt['DowntimeID'], element=dt['Element'], name=dt['Name'], startDate=dt['StartDate'], endDate=dt['EndDate'], severity=dt['Severity'], description=dt['Description'], link=dt['Link'], gOCDBServiceType=dt['gOCDBServiceType']) return resQuery def _cleanCommand(self, element, elementNames): ''' Clear Cache from expired DT. ''' resQuery = [] for elementName in elementNames: #get the list of all DTs stored in the cache result = self.rmClient.selectDowntimeCache(element=element, name=elementName) if not result['OK']: return result uniformResult = [ dict(zip(result['Columns'], res)) for res in result['Value'] ] currentDate = datetime.utcnow() if len(uniformResult) == 0: continue #get the list of all ongoing DTs from GocDB gDTLinkList = self.gClient.getCurrentDTLinkList() if not gDTLinkList['OK']: return gDTLinkList for dt in uniformResult: #if DT expired or DT not in the list of current DTs, then we remove it from the cache if dt['EndDate'] < currentDate or dt[ 'Link'] not in gDTLinkList['Value']: result = self.rmClient.deleteDowntimeCache( downtimeID=dt['DowntimeID']) resQuery.append(result) return S_OK(resQuery) def _prepareCommand(self): ''' DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args['name'] if 'element' not in self.args: return S_ERROR('"element" not found in self.args') element = self.args['element'] if 'elementType' not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args['elementType'] if not element in ['Site', 'Resource']: return S_ERROR('element is neither Site nor Resource') hours = None if 'hours' in self.args: hours = self.args['hours'] gOCDBServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName(elementName) if not gocSite[ 'OK']: # The site is most probably not a grid site - not an issue, of course pass # so, elementName remains unchanged else: elementName = gocSite['Value'] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # We need to distinguish if it's tape or disk seOptions = getStorageElementOptions(elementName) if not seOptions['OK']: return seOptions if seOptions['Value'].get('TapeSE'): gOCDBServiceType = "srm.nearline" elif seOptions['Value'].get('DiskSE'): gOCDBServiceType = "srm" seHost = CSHelpers.getSEHost(elementName) if not seHost['OK']: return seHost seHost = seHost['Value'] if not seHost: return S_ERROR('No seHost for %s' % elementName) elementName = seHost elif elementType in ['FTS', 'FTS3']: gOCDBServiceType = 'FTS' try: #WARNING: this method presupposes that the server is an FTS3 type elementName = getGOCFTSName(elementName) except: return S_ERROR( 'No FTS3 server specified in dirac.cfg (see Resources/FTSEndpoints)' ) return S_OK((element, elementName, hours, gOCDBServiceType)) def doNew(self, masterParams=None): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams hours = 120 elementName = None gOCDBServiceType = None else: params = self._prepareCommand() if not params['OK']: return params element, elementName, hours, gOCDBServiceType = params['Value'] elementNames = [elementName] #WARNING: checking all the DT that are ongoing or starting in given <hours> from now try: results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError: try: #Let's give it a second chance.. results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError, e: return S_ERROR(e) if not results['OK']: return results results = results['Value'] if results is None: # no downtimes found return S_OK(None) #cleaning the Cache cleanRes = self._cleanCommand(element, elementNames) if not cleanRes['OK']: return cleanRes uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if 'HOSTNAME' in downDic.keys(): dt['Name'] = downDic['HOSTNAME'] elif 'SITENAME' in downDic.keys(): dt['Name'] = downDic['SITENAME'] else: return S_ERROR("SITENAME or HOSTNAME are missing") if 'SERVICE_TYPE' in downDic.keys(): dt['gOCDBServiceType'] = downDic['SERVICE_TYPE'] if gOCDBServiceType: gocdbST = gOCDBServiceType.lower() csST = downDic['SERVICE_TYPE'].lower() if gocdbST != csST: return S_ERROR( "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gocdbST, csST, dt['Name'])) else: #WARNING: do we want None as default value? dt['gOCDBServiceType'] = None dt['DowntimeID'] = downtime dt['Element'] = element dt['StartDate'] = downDic['FORMATED_START_DATE'] dt['EndDate'] = downDic['FORMATED_END_DATE'] dt['Severity'] = downDic['SEVERITY'] dt['Description'] = downDic['DESCRIPTION'].replace('\'', '') dt['Link'] = downDic['GOCDB_PORTAL_URL'] uniformResult.append(dt) storeRes = self._storeCommand(uniformResult) if not storeRes['OK']: return storeRes return S_OK()
class DTEverySites_Command(Command): def doCommand(self, sites=None): """ Returns downtimes information for all the sites in input. :params: :attr:`sites`: list of site names (when not given, take every site) :returns: {'SiteName': {'SEVERITY': 'OUTAGE'|'AT_RISK', 'StartDate': 'aDate', ...} ... } """ if self.client is None: from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() if sites is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC = RPCClient("ResourceStatus/ResourceStatus") GOC_sites = RPC.getGridSitesList() if not GOC_sites["OK"]: raise RSSException, where(self, self.doCommand) + " " + sites["Message"] else: GOC_sites = GOC_sites["Value"] else: GOC_sites = [getGOCSiteName(x)["Value"] for x in sites] try: res = self.client.getStatus("Site", GOC_sites, None, 120) except: gLogger.exception("Exception when calling GOCDBClient.") return {} if not res["OK"]: raise RSSException, where(self, self.doCommand) + " " + res["Message"] else: res = res["Value"] if res == None: return {} resToReturn = {} for dt_ID in res: try: dt = {} dt["ID"] = dt_ID dt["StartDate"] = res[dt_ID]["FORMATED_START_DATE"] dt["EndDate"] = res[dt_ID]["FORMATED_END_DATE"] dt["Severity"] = res[dt_ID]["SEVERITY"] dt["Description"] = res[dt_ID]["DESCRIPTION"].replace("'", "") dt["Link"] = res[dt_ID]["GOCDB_PORTAL_URL"] DIRACnames = getDIRACSiteName(res[dt_ID]["SITENAME"]) if not DIRACnames["OK"]: raise RSSException, DIRACnames["Message"] DIRACnames = DIRACnames["Value"] for DIRACname in DIRACnames: resToReturn[dt_ID.split()[0] + " " + DIRACname] = dt except KeyError: continue return resToReturn doCommand.__doc__ = Command.doCommand.__doc__ + doCommand.__doc__
class Synchronizer(object): def __init__(self, rsClient=None, rmClient=None): self.GOCDBClient = GOCDBClient() self.rsClient = ResourceStatusClient( ) if rsClient == None else rsClient self.rmClient = ResourceManagementClient( ) if rmClient == None else rmClient self.synclist = [ 'Sites', 'Resources', 'StorageElements', 'Services', 'RegistryUsers' ] ################################################################################ def sync(self, _a, _b): """ :params: :attr:`thingsToSync`: list of things to sync """ gLogger.info("!!! Sync DB content with CS content for %s !!!" % (", ".join(self.synclist))) for thing in self.synclist: getattr(self, '_sync' + thing)() return S_OK() ################################################################################ def __purge_resource(self, resourceName): # Maybe remove attached SEs #SEs = Utils.unpack(self.rsClient.getStorageElement(resourceName=resourceName)) SEs = self.rsClient.getStorageElement(resourceName=resourceName) if not SEs['OK']: gLogger.error(SEs['Message']) return SEs #Utils.unpack(self.rsClient.removeElement("StorageElement", [s[0] for s in SEs])) SEs = [se[0] for se in SEs] res = self.rsClient.removeElement('StorageElement', SEs) if not res['OK']: gLogger.error(res['Message']) return res # Remove resource itself. #Utils.unpack(self.rsClient.removeElement("Resource", resourceName)) res = self.rsClient.removeElement('Resource', resourceName) if not res['OK']: gLogger.error(res['Message']) return res def __purge_site(self, siteName): # Remove associated resources and services #resources = Utils.unpack(self.rsClient.getResource(siteName=siteName)) resources = self.rsClient.getResource(siteName=siteName) if not resources['OK']: gLogger.error(resources['Message']) return resources #services = Utils.unpack(self.rsClient.getService(siteName=siteName)) services = self.rsClient.getService(siteName=siteName) if not services['OK']: gLogger.error(services['Message']) return services #_ = [self.__purge_resource(r[0]) for r in resources] for resource in resources: res = self.__purge_resource(resource[0]) if not res['OK']: gLogger.error(res['Message']) return res #Utils.unpack(self.rsClient.removeElement("Service", [s[0] for s in services])) services = [service[0] for service in services['Value']] res = self.rsClient.removeElement('Service', services) if not res['OK']: gLogger.error(res['Message']) return res # Remove site itself #Utils.unpack(self.rsClient.removeElement("Site", siteName)) res = self.rsClient.removeElement('Site', siteName) if not res['OK']: gLogger.info(res['Message']) return res def _syncSites(self): """ Sync DB content with sites that are in the CS """ def getGOCTier(sitesList): return "T" + str(min([int(v) for v in CS.getSiteTiers(sitesList)])) # sites in the DB now #sitesDB = set((s[0] for s in Utils.unpack(self.rsClient.getSite()))) sites = self.rsClient.getSite() if not sites['OK']: gLogger.error(sites['Message']) return sites sitesDB = set([site[0] for site in sites['Value']]) # sites in CS now sitesCS = set(CS.getSites()) gLogger.info("Syncing Sites from CS: %d sites in CS, %d sites in DB" % (len(sitesCS), len(sitesDB))) # remove sites and associated resources, services, and storage # elements from the DB that are not in the CS: for s in sitesDB - sitesCS: gLogger.info("Purging Site %s (not in CS anymore)" % s) self.__purge_site(s) # add to DB what is missing gLogger.info("Updating %d Sites in DB" % len(sitesCS - sitesDB)) for site in sitesCS - sitesDB: siteType = site.split(".")[0] # DIRAC Tier tier = "T" + str(CS.getSiteTier(site)) if siteType == "LCG": # Grid Name of the site #gridSiteName = Utils.unpack(getGOCSiteName(site)) gridSiteName = getGOCSiteName(site) if not gridSiteName['OK']: gLogger.error(gridSiteName['Message']) return gridSiteName gridSiteName = gridSiteName['Value'] # Grid Tier (with a workaround!) #DIRACSitesOfGridSites = Utils.unpack(getDIRACSiteName(gridSiteName)) DIRACSitesOfGridSites = getDIRACSiteName(gridSiteName) if not DIRACSitesOfGridSites['OK']: gLogger.error(DIRACSitesOfGridSites['Message']) return DIRACSitesOfGridSites DIRACSitesOfGridSites = DIRACSitesOfGridSites['Value'] if len(DIRACSitesOfGridSites) == 1: gt = tier else: gt = getGOCTier(DIRACSitesOfGridSites) #Utils.protect2(self.rsClient.addOrModifyGridSite, gridSiteName, gt) res = self.rsClient.addOrModifyGridSite(gridSiteName, gt) if not res['OK']: gLogger.error(res['Message']) return res #Utils.protect2(self.rsClient.addOrModifySite, site, tier, gridSiteName ) res = self.rsClient.addOrModifySite(site, tier, gridSiteName) if not res['OK']: gLogger.error(res['Message']) return res elif siteType == "DIRAC": #Utils.protect2(self.rsClient.addOrModifySite, site, tier, "NULL" ) res = self.rsClient.addOrModifySite(site, tier, "NULL") if not res['OK']: gLogger.error(res['Message']) return res ################################################################################ # _syncResources HELPER functions def __updateService(self, site, type_): service = type_ + '@' + site #Utils.protect2(self.rsClient.addOrModifyService, service, type_, site ) res = self.rsClient.addOrModifyService(service, type_, site) if not res['OK']: gLogger.error(res['Message']) return res def __getServiceEndpointInfo(self, node): #res = Utils.unpack( self.GOCDBClient.getServiceEndpointInfo( 'hostname', node ) ) res = self.GOCDBClient.getServiceEndpointInfo('hostname', node) if res['OK']: res = res['Value'] else: gLogger.warn('Error getting hostname info for %s' % node) return [] if res == []: #res = Utils.unpack( self.GOCDBClient.getServiceEndpointInfo('hostname', Utils.canonicalURL(node)) ) url = Utils.canonicalURL(node) res = self.GOCDBClient.getServiceEndpointInfo('hostname', url) if res['OK']: res = res['Value'] else: gLogger.warn('Error getting canonical hostname info for %s' % node) res = [] return res def __syncNode(self, NodeInCS, resourcesInDB, resourceType, serviceType, site="NULL"): nodesToUpdate = NodeInCS - resourcesInDB if len(nodesToUpdate) > 0: gLogger.debug(str(NodeInCS)) gLogger.debug(str(nodesToUpdate)) # Update Service table siteInGOCDB = [ self.__getServiceEndpointInfo(node) for node in nodesToUpdate ] siteInGOCDB = Utils.list_sanitize(siteInGOCDB) #sites = [Utils.unpack(getDIRACSiteName(s[0]['SITENAME'])) for s in siteInGOCDB] sites = [] for sInGOCDB in siteInGOCDB: siteName = getDIRACSiteName(sInGOCDB[0]['SITENAME']) if not siteName['OK']: gLogger.error(siteName['Message']) return siteName sites.append(siteName['Value']) sites = Utils.list_sanitize(Utils.list_flatten(sites)) _ = [self.__updateService(s, serviceType) for s in sites] # Update Resource table for node in NodeInCS: if serviceType == "Computing": resourceType = CS.getCEType(site, node) if node not in resourcesInDB and node is not None: try: siteInGOCDB = self.__getServiceEndpointInfo( node)[0]['SITENAME'] except IndexError: # No INFO in GOCDB: Node does not exist gLogger.warn( "Node %s is not in GOCDB!! Considering that it does not exists!" % node) continue assert (type(siteInGOCDB) == str) #Utils.protect2(self.rsClient.addOrModifyResource, node, resourceType, serviceType, site, siteInGOCDB ) res = self.rsClient.addOrModifyResource( node, resourceType, serviceType, site, siteInGOCDB) if not res['OK']: gLogger.error(res['Message']) return res resourcesInDB.add(node) ################################################################################ def _syncResources(self): gLogger.info("Starting sync of Resources") # resources in the DB now #resourcesInDB = set((r[0] for r in Utils.unpack(self.rsClient.getResource()))) resources = self.rsClient.getResource() if not resources['OK']: gLogger.error(resources['Message']) return resources resourcesInDB = set([resource[0] for resource in resources['Value']]) # Site-CE / Site-SE mapping in CS now #CEinCS = Utils.unpack(getSiteCEMapping( 'LCG' )) CEinCS = getSiteCEMapping('LCG') if not CEinCS['OK']: gLogger.error(CEinCS['Message']) return CEinCS CEinCS = CEinCS['Value'] # All CEs in CS now CEInCS = Utils.set_sanitize( [CE for celist in CEinCS.values() for CE in celist]) # All SE Nodes in CS now SENodeInCS = set(CS.getSENodes()) # LFC Nodes in CS now LFCNodeInCS_L = set(CS.getLFCNode(readable="ReadOnly")) LFCNodeInCS_C = set(CS.getLFCNode(readable="ReadWrite")) # FTS Nodes in CS now FTSNodeInCS = set([ v.split("/")[2][0:-5] for v in CS.getTypedDictRootedAt( root="/Resources/FTSEndpoints").values() ]) # VOMS Nodes in CS now VOMSNodeInCS = set(CS.getVOMSEndpoints()) # complete list of resources in CS now resourcesInCS = CEInCS | SENodeInCS | LFCNodeInCS_L | LFCNodeInCS_C | FTSNodeInCS | VOMSNodeInCS gLogger.info( " %d resources in CS, %s resources in DB, updating %d resources" % (len(resourcesInCS), len(resourcesInDB), len(resourcesInCS) - len(resourcesInDB))) # Remove resources that are not in the CS anymore for res in resourcesInDB - resourcesInCS: gLogger.info("Purging resource %s. Reason: not in CS anywore." % res) self.__purge_resource(res) # Add to DB what is in CS now and wasn't before # CEs for site in CEinCS: self.__syncNode(set(CEinCS[site]), resourcesInDB, "", "Computing", site) # SRMs self.__syncNode(SENodeInCS, resourcesInDB, "SE", "Storage") # LFC_C self.__syncNode(LFCNodeInCS_C, resourcesInDB, "LFC_C", "Storage") # LFC_L self.__syncNode(LFCNodeInCS_L, resourcesInDB, "LFC_L", "Storage") # FTSs self.__syncNode(FTSNodeInCS, resourcesInDB, "FTS", "Storage") # VOMSs self.__syncNode(VOMSNodeInCS, resourcesInDB, "VOMS", "VOMS") ################################################################################ def _syncStorageElements(self): # Get StorageElements from the CS and the DB CSSEs = set(CS.getSEs()) #DBSEs = set((s[0] for s in Utils.unpack(self.rsClient.getStorageElement()))) ses = self.rsClient.getStorageElement() if not ses['OK']: gLogger.error(ses['Message']) return ses DBSEs = set([se[0] for se in ses['Value']]) # Remove storageElements that are in DB but not in CS for se in DBSEs - CSSEs: #Utils.protect2(self.rsClient.removeElement, 'StorageElement', se ) res = self.rsClient.removeElement('StorageElement', se) if not res['OK']: gLogger.error(res['Message']) return res # Add new storage elements gLogger.info( "Updating %d StorageElements in DB (%d on CS vs %d on DB)" % (len(CSSEs - DBSEs), len(CSSEs), len(DBSEs))) for SE in CSSEs - DBSEs: srm = CS.getSEHost(SE) if not srm: gLogger.warn("%s has no srm URL in CS!!!" % SE) continue #siteInGOCDB = Utils.unpack(self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm )) siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm) if siteInGOCDB['OK']: siteInGOCDB = siteInGOCDB['Value'] else: gLogger.error("Error getting hostname for %s from GOCDB!!!" % srm) continue if siteInGOCDB == []: gLogger.warn("%s is not in GOCDB!!!" % srm) continue siteInGOCDB = siteInGOCDB[0]['SITENAME'] #Utils.protect2(self.rsClient.addOrModifyStorageElement, SE, srm, siteInGOCDB ) res = self.rsClient.addOrModifyStorageElement(SE, srm, siteInGOCDB) if not res['OK']: gLogger.error(res['Message']) return res ################################################################################ def _syncServices(self): """This function is in charge of cleaning the Service table in DB in case of obsolescence.""" # services in the DB now #servicesInDB = Utils.unpack(self.rsClient.getService()) servicesInDB = self.rsClient.getService() if not servicesInDB['OK']: gLogger.error(servicesInDB['Message']) return servicesInDB servicesInDB = servicesInDB['Value'] for service_name, service_type, site_name in servicesInDB: if not service_type in ["VO-BOX", "CondDB", "VOMS", "Storage"]: #if Utils.unpack(self.rsClient.getResource(siteName=site_name, serviceType=service_type)) == []: resource = self.rsClient.getResource(siteName=site_name, serviceType=service_type) if not resource['OK']: gLogger.error(resource['Message']) return resource if resource['Value'] == []: gLogger.info( "Deleting Service %s since it has no corresponding resources." % service_name) #Utils.protect2(self.rsClient.removeElement, "Service", service_name) res = self.rsClient.removeElement("Service", service_name) if not res['OK']: gLogger.error(res['Message']) return res elif service_type == "Storage": res = self.rsClient.getSite(siteName=site_name, meta={'columns': 'GridSiteName'}) if res['OK']: res = res['Value'] else: res = [] if res: if self.rsClient.getResource( gridSiteName=res[0], serviceType=service_type) == []: gLogger.info( "Deleting Service %s since it has no corresponding resources." % service_name) #Utils.protect2(self.rsClient.removeElement, "Service", service_name) res = self.rsClient.removeElement( "Service", service_name) if not res['OK']: gLogger.error(res['Message']) return res def _syncRegistryUsers(self): users = CS.getTypedDictRootedAt("Users", root="/Registry") usersInCS = set(users.keys()) #usersInDB = set((u[0] for u in Utils.unpack(self.rmClient.getUserRegistryCache()))) usersInCache = self.rmClient.getUserRegistryCache() if not usersInCache['OK']: gLogger.error(usersInCache['Message']) return usersInCache usersInDB = set( [userInCache[0] for userInCache in usersInCache['Value']]) usersToAdd = usersInCS - usersInDB usersToDel = usersInDB - usersInCS gLogger.info("Updating Registry Users: + %d, - %d" % (len(usersToAdd), len(usersToDel))) if len(usersToAdd) > 0: gLogger.debug(str(usersToAdd)) if len(usersToDel) > 0: gLogger.debug(str(usersToDel)) for u in usersToAdd: if type(users[u]['DN']) == list: users[u]['DN'] = users[u]['DN'][0] if type(users[u]['Email']) == list: users[u]['Email'] = users[u]['Email'][0] users[u]['DN'] = users[u]['DN'].split('=')[-1] #Utils.unpack(self.rmClient.addOrModifyUserRegistryCache( u, users[u]['DN'], users[u]['Email'].lower())) res = self.rmClient.addOrModifyUserRegistryCache( u, users[u]['DN'], users[u]['Email'].lower()) if not res['OK']: gLogger.error(res['Message']) return res for u in usersToDel: #Utils.protect2(self.rmClient.deleteUserRegistryCache, u) res = self.rmClient.deleteUserRegistryCache(u) if not res['OK']: gLogger.error(res['Message']) return res ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
def doCommand(self): """ Return getStatus from GOC DB Client :attr:`args`: - args[0]: string: should be a ValidRes - args[1]: string: should be the name of the ValidRes - args[2]: string: optional, number of hours in which the down time is starting """ super(GOCDBStatus_Command, self).doCommand() if self.client is None: # use standard GOC DB Client from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() granularity = self.args[0] name = self.args[1] try: hours = self.args[2] except IndexError: hours = None if granularity in ('Site', 'Sites'): name = getGOCSiteName(name) if not name['OK']: raise RSSException, name['Message'] name = name['Value'] try: res = self.client.getStatus(granularity, name, None, hours, self.timeout) if not res['OK']: return {'Result': 'Unknown'} res = res['Value'] if res is None or res == {}: return {'Result': {'DT': None}} DT_dict_result = {} now = datetime.datetime.utcnow().replace(microsecond=0, second=0) if len(res) > 1: #there's more than one DT for dt_ID in res: #looking for an ongoing one startSTR = res[dt_ID]['FORMATED_START_DATE'] start_datetime = datetime.datetime( *time.strptime(startSTR, "%Y-%m-%d %H:%M")[0:5]) if start_datetime < now: resDT = res[dt_ID] break try: resDT except: #if I'm here, there's no OnGoing DT resDT = res[res.keys()[0]] res = resDT else: res = res[res.keys()[0]] DT_dict_result['DT'] = res['SEVERITY'] DT_dict_result['EndDate'] = res['FORMATED_END_DATE'] startSTR = res['FORMATED_START_DATE'] start_datetime = datetime.datetime( *time.strptime(startSTR, "%Y-%m-%d %H:%M")[0:5]) if start_datetime > now: diff = convertTime(start_datetime - now, 'hours') DT_dict_result['DT'] = DT_dict_result['DT'] + " in " + str( diff) + ' hours' return {'Result': DT_dict_result} except urllib2.URLError: gLogger.error("GOCDB timed out for " + granularity + " " + name) return {'Result': 'Unknown'} except: gLogger.exception("Exception when calling GOCDBClient for " + granularity + " " + name) return {'Result': 'Unknown'}
def doCommand(self): """ Return getStatus from GOC DB Client :attr:`args`: - args[0]: string: should be a ValidRes - args[1]: string: should be the name of the ValidRes - args[2]: string: optional, number of hours in which the down time is starting """ super(GOCDBStatus_Command, self).doCommand() if self.client is None: # use standard GOC DB Client from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() granularity = self.args[0] name = self.args[1] try: hours = self.args[2] except IndexError: hours = None if granularity in ('Site', 'Sites'): name = getGOCSiteName(name) if not name['OK']: raise RSSException, name['Message'] name = name['Value'] try: res = self.client.getStatus(granularity, name, None, hours, self.timeout) if not res['OK']: return {'Result':'Unknown'} res = res['Value'] if res is None or res == {}: return {'Result':{'DT':None}} DT_dict_result = {} now = datetime.datetime.utcnow().replace(microsecond = 0, second = 0) if len(res) > 1: #there's more than one DT for dt_ID in res: #looking for an ongoing one startSTR = res[dt_ID]['FORMATED_START_DATE'] start_datetime = datetime.datetime( *time.strptime(startSTR, "%Y-%m-%d %H:%M")[0:5] ) if start_datetime < now: resDT = res[dt_ID] break try: resDT except: #if I'm here, there's no OnGoing DT resDT = res[res.keys()[0]] res = resDT else: res = res[res.keys()[0]] DT_dict_result['DT'] = res['SEVERITY'] DT_dict_result['EndDate'] = res['FORMATED_END_DATE'] startSTR = res['FORMATED_START_DATE'] start_datetime = datetime.datetime( *time.strptime(startSTR, "%Y-%m-%d %H:%M")[0:5] ) if start_datetime > now: diff = convertTime(start_datetime - now, 'hours') DT_dict_result['DT'] = DT_dict_result['DT'] + " in " + str(diff) + ' hours' return {'Result':DT_dict_result} except urllib2.URLError: gLogger.error("GOCDB timed out for " + granularity + " " + name ) return {'Result':'Unknown'} except: gLogger.exception("Exception when calling GOCDBClient for " + granularity + " " + name ) return {'Result':'Unknown'}
def doCommand( self, sites = None ): """ Returns downtimes information for all the sites in input. :params: :attr:`sites`: list of site names (when not given, take every site) :returns: {'SiteName': {'SEVERITY': 'OUTAGE'|'AT_RISK', 'StartDate': 'aDate', ...} ... } """ if self.client is None: from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() if sites is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC = RPCClient( "ResourceStatus/ResourceStatus" ) GOC_sites = RPC.getGridSitesList() if not GOC_sites['OK']: raise RSSException, where( self, self.doCommand ) + " " + sites['Message'] else: GOC_sites = GOC_sites['Value'] else: GOC_sites = [getGOCSiteName( x )['Value'] for x in sites] try: res = self.client.getStatus( 'Site', GOC_sites, None, 120 ) except: gLogger.exception( "Exception when calling GOCDBClient." ) return {} if not res['OK']: raise RSSException, where( self, self.doCommand ) + " " + res['Message'] else: res = res['Value'] if res == None: return {} resToReturn = {} for dt_ID in res: try: dt = {} dt['ID'] = dt_ID dt['StartDate'] = res[dt_ID]['FORMATED_START_DATE'] dt['EndDate'] = res[dt_ID]['FORMATED_END_DATE'] dt['Severity'] = res[dt_ID]['SEVERITY'] dt['Description'] = res[dt_ID]['DESCRIPTION'].replace( '\'', '' ) dt['Link'] = res[dt_ID]['GOCDB_PORTAL_URL'] DIRACnames = getDIRACSiteName( res[dt_ID]['SITENAME'] ) if not DIRACnames['OK']: raise RSSException, DIRACnames['Message'] DIRACnames = DIRACnames['Value'] for DIRACname in DIRACnames: resToReturn[dt_ID.split()[0] + ' ' + DIRACname] = dt except KeyError: continue return resToReturn
class DowntimeCommand( Command ): ''' Downtime "master" Command. ''' def __init__( self, args = None, clients = None ): super( DowntimeCommand, self ).__init__( args, clients ) if 'GOCDBClient' in self.apis: self.gClient = self.apis[ 'GOCDBClient' ] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis[ 'ResourceManagementClient' ] else: self.rmClient = ResourceManagementClient() def _storeCommand( self, result ): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( dt[ 'DowntimeID' ], dt[ 'Element' ], dt[ 'Name' ], dt[ 'StartDate' ], dt[ 'EndDate' ], dt[ 'Severity' ], dt[ 'Description' ], dt[ 'Link' ] ) if not resQuery[ 'OK' ]: return resQuery return S_OK() def _prepareCommand( self ): ''' DowntimeCommand requires three arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR( '"name" not found in self.args' ) elementName = self.args[ 'name' ] if 'element' not in self.args: return S_ERROR( '"element" not found in self.args' ) element = self.args[ 'element' ] if 'elementType' not in self.args: return S_ERROR( '"elementType" not found in self.args' ) elementType = self.args[ 'elementType' ] if not element in [ 'Site', 'Resource' ]: return S_ERROR( 'element is not Site nor Resource' ) hours = None if 'hours' in self.args: hours = self.args[ 'hours' ] # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName( elementName ) if not gocSite[ 'OK' ]: return gocSite elementName = gocSite[ 'Value' ] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': seHost = CSHelpers.getSEHost( elementName ) if not seHost: return S_ERROR( 'No seHost for %s' % elementName ) elementName = seHost return S_OK( ( element, elementName, hours ) ) def doNew( self, masterParams = None ): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams hours = None elementName = None else: params = self._prepareCommand() if not params[ 'OK' ]: return params element, elementName, hours = params[ 'Value' ] elementNames = [ elementName ] startDate = datetime.utcnow() - timedelta( days = 2 ) try: results = self.gClient.getStatus( element, elementName, startDate, 120 ) except urllib2.URLError: try: #Let's give it a second chance.. results = self.gClient.getStatus( element, elementName, startDate, 120 ) except urllib2.URLError, e: return S_ERROR( e ) if not results[ 'OK' ]: return results results = results[ 'Value' ] if results is None: return S_OK( None ) uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if element == 'Resource': dt[ 'Name' ] = downDic[ 'HOSTNAME' ] else: dt[ 'Name' ] = downDic[ 'SITENAME' ] if not dt[ 'Name' ] in elementNames: continue dt[ 'DowntimeID' ] = downtime dt[ 'Element' ] = element dt[ 'StartDate' ] = downDic[ 'FORMATED_START_DATE' ] dt[ 'EndDate' ] = downDic[ 'FORMATED_END_DATE' ] dt[ 'Severity' ] = downDic[ 'SEVERITY' ] dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' ) dt[ 'Link' ] = downDic[ 'GOCDB_PORTAL_URL' ] uniformResult.append( dt ) storeRes = self._storeCommand( uniformResult ) if not storeRes[ 'OK' ]: return storeRes # We return only one downtime, if its ongoind at dtDate dtDate = datetime.now() if hours: dtDate = dtDate + timedelta( hours = hours ) result = None for dt in uniformResult: if ( dt[ 'StartDate' ] < str( dtDate ) ) and ( dt[ 'EndDate' ] > str( dtDate ) ): result = dt break return S_OK( result )
class DowntimeCommand(Command): """ Downtime "master" Command or removed DTs. """ def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if "GOCDBClient" in self.apis: self.gClient = self.apis["GOCDBClient"] else: self.gClient = GOCDBClient() if "ResourceManagementClient" in self.apis: self.rmClient = self.apis["ResourceManagementClient"] else: self.rmClient = ResourceManagementClient() def _storeCommand(self, result): """ Stores the results of doNew method on the database. """ for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( downtimeID=dt["DowntimeID"], element=dt["Element"], name=dt["Name"], startDate=dt["StartDate"], endDate=dt["EndDate"], severity=dt["Severity"], description=dt["Description"], link=dt["Link"], gocdbServiceType=dt["GOCDBServiceType"], ) return resQuery def _cleanCommand(self, element, elementNames): """ Clear Cache from expired DT. """ resQuery = [] for elementName in elementNames: # get the list of all DTs stored in the cache result = self.rmClient.selectDowntimeCache(element=element, name=elementName) if not result["OK"]: return result uniformResult = [dict(zip(result["Columns"], res)) for res in result["Value"]] currentDate = datetime.utcnow() if len(uniformResult) == 0: continue # get the list of all ongoing DTs from GocDB gDTLinkList = self.gClient.getCurrentDTLinkList() if not gDTLinkList["OK"]: return gDTLinkList for dt in uniformResult: # if DT expired or DT not in the list of current DTs, then we remove it from the cache if dt["EndDate"] < currentDate or dt["Link"] not in gDTLinkList["Value"]: result = self.rmClient.deleteDowntimeCache(downtimeID=dt["DowntimeID"]) resQuery.append(result) return S_OK(resQuery) def _prepareCommand(self): """ DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. """ if "name" not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args["name"] if "element" not in self.args: return S_ERROR('"element" not found in self.args') element = self.args["element"] if "elementType" not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args["elementType"] if not element in ["Site", "Resource"]: return S_ERROR("element is neither Site nor Resource") hours = None if "hours" in self.args: hours = self.args["hours"] gocdbServiceType = None # Transform DIRAC site names into GOCDB topics if element == "Site": gocSite = getGOCSiteName(elementName) if not gocSite["OK"]: return gocSite elementName = gocSite["Value"] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == "StorageElement": # We need to distinguish if it's tape or disk seOptions = getStorageElementOptions(elementName) if not seOptions["OK"]: return seOptions if seOptions["Value"].get("TapeSE"): gocdbServiceType = "srm.nearline" elif seOptions["Value"].get("DiskSE"): gocdbServiceType = "srm" seHost = CSHelpers.getSEHost(elementName) if not seHost["OK"]: return seHost seHost = seHost["Value"] if not seHost: return S_ERROR("No seHost for %s" % elementName) elementName = seHost elif elementType in ["FTS", "FTS3"]: gocdbServiceType = "FTS" try: # WARNING: this method presupposes that the server is an FTS3 type elementName = getGOCFTSName(elementName) except: return S_ERROR("No FTS3 server specified in dirac.cfg (see Resources/FTSEndpoints)") return S_OK((element, elementName, hours, gocdbServiceType)) def doNew(self, masterParams=None): """ Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. """ if masterParams is not None: element, elementNames = masterParams hours = 120 elementName = None gocdbServiceType = None else: params = self._prepareCommand() if not params["OK"]: return params element, elementName, hours, gocdbServiceType = params["Value"] elementNames = [elementName] # WARNING: checking all the DT that are ongoing or starting in given <hours> from now try: results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError: try: # Let's give it a second chance.. results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError, e: return S_ERROR(e) if not results["OK"]: return results results = results["Value"] if results is None: return S_OK(None) # cleaning the Cache cleanRes = self._cleanCommand(element, elementNames) if not cleanRes["OK"]: return cleanRes uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if "HOSTNAME" in downDic.keys(): dt["Name"] = downDic["HOSTNAME"] elif "SITENAME" in downDic.keys(): dt["Name"] = downDic["SITENAME"] else: return S_ERROR("SITENAME or HOSTNAME are missing") if "SERVICE_TYPE" in downDic.keys(): dt["GOCDBServiceType"] = downDic["SERVICE_TYPE"] if gocdbServiceType: gocdbST = gocdbServiceType.lower() csST = downDic["SERVICE_TYPE"].lower() if gocdbST != csST: return S_ERROR( "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gocdbST, csST, dt["Name"]) ) else: # WARNING: do we want None as default value? dt["GOCDBServiceType"] = None dt["DowntimeID"] = downtime dt["Element"] = element dt["StartDate"] = downDic["FORMATED_START_DATE"] dt["EndDate"] = downDic["FORMATED_END_DATE"] dt["Severity"] = downDic["SEVERITY"] dt["Description"] = downDic["DESCRIPTION"].replace("'", "") dt["Link"] = downDic["GOCDB_PORTAL_URL"] uniformResult.append(dt) storeRes = self._storeCommand(uniformResult) if not storeRes["OK"]: return storeRes return S_OK()
class Synchronizer( object ): def __init__( self, rsClient = None, rmClient = None ): self.GOCDBClient = GOCDBClient() self.rsClient = ResourceStatusClient() if rsClient == None else rsClient self.rmClient = ResourceManagementClient() if rmClient == None else rmClient self.synclist = [ 'Sites', 'Resources', 'StorageElements', 'Services', 'RegistryUsers' ] ################################################################################ def sync( self, _a, _b ): """ :params: :attr:`thingsToSync`: list of things to sync """ gLogger.info( "!!! Sync DB content with CS content for %s !!!" % ( ", ".join(self.synclist) ) ) for thing in self.synclist: getattr( self, '_sync' + thing )() return S_OK() ################################################################################ def __purge_resource( self, resourceName ): # Maybe remove attached SEs #SEs = Utils.unpack(self.rsClient.getStorageElement(resourceName=resourceName)) SEs = self.rsClient.getStorageElement( resourceName = resourceName ) if not SEs[ 'OK' ]: gLogger.error( SEs[ 'Message' ] ) return SEs #Utils.unpack(self.rsClient.removeElement("StorageElement", [s[0] for s in SEs])) SEs = [ se[0] for se in SEs ] res = self.rsClient.removeElement( 'StorageElement', SEs ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res # Remove resource itself. #Utils.unpack(self.rsClient.removeElement("Resource", resourceName)) res = self.rsClient.removeElement( 'Resource', resourceName ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res def __purge_site( self, siteName ): # Remove associated resources and services #resources = Utils.unpack(self.rsClient.getResource(siteName=siteName)) resources = self.rsClient.getResource( siteName = siteName ) if not resources[ 'OK' ]: gLogger.error( resources[ 'Message' ] ) return resources #services = Utils.unpack(self.rsClient.getService(siteName=siteName)) services = self.rsClient.getService( siteName = siteName ) if not services[ 'OK' ]: gLogger.error( services[ 'Message' ] ) return services #_ = [self.__purge_resource(r[0]) for r in resources] for resource in resources: res = self.__purge_resource( resource[ 0 ] ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res #Utils.unpack(self.rsClient.removeElement("Service", [s[0] for s in services])) services = [ service[ 0 ] for service in services[ 'Value' ] ] res = self.rsClient.removeElement( 'Service', services ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res # Remove site itself #Utils.unpack(self.rsClient.removeElement("Site", siteName)) res = self.rsClient.removeElement( 'Site', siteName ) if not res[ 'OK' ]: gLogger.info( res[ 'Message' ] ) return res def _syncSites( self ): """ Sync DB content with sites that are in the CS """ def getGOCTier(sitesList): return "T" + str(min([int(v) for v in CS.getSiteTiers(sitesList)])) # sites in the DB now #sitesDB = set((s[0] for s in Utils.unpack(self.rsClient.getSite()))) sites = self.rsClient.getSite() if not sites[ 'OK' ]: gLogger.error( sites[ 'Message' ] ) return sites sitesDB = set( [ site[0] for site in sites[ 'Value' ] ] ) # sites in CS now sitesCS = set( CS.getSites() ) gLogger.info("Syncing Sites from CS: %d sites in CS, %d sites in DB" % (len(sitesCS), len(sitesDB))) # remove sites and associated resources, services, and storage # elements from the DB that are not in the CS: for s in sitesDB - sitesCS: gLogger.info("Purging Site %s (not in CS anymore)" % s) self.__purge_site(s) # add to DB what is missing gLogger.info("Updating %d Sites in DB" % len(sitesCS - sitesDB)) for site in sitesCS - sitesDB: siteType = site.split(".")[0] # DIRAC Tier tier = "T" + str(CS.getSiteTier( site )) if siteType == "LCG": # Grid Name of the site #gridSiteName = Utils.unpack(getGOCSiteName(site)) gridSiteName = getGOCSiteName( site ) if not gridSiteName[ 'OK' ]: gLogger.error( gridSiteName[ 'Message' ] ) return gridSiteName gridSiteName = gridSiteName[ 'Value' ] # Grid Tier (with a workaround!) #DIRACSitesOfGridSites = Utils.unpack(getDIRACSiteName(gridSiteName)) DIRACSitesOfGridSites = getDIRACSiteName( gridSiteName ) if not DIRACSitesOfGridSites[ 'OK' ]: gLogger.error( DIRACSitesOfGridSites[ 'Message' ] ) return DIRACSitesOfGridSites DIRACSitesOfGridSites = DIRACSitesOfGridSites[ 'Value' ] if len( DIRACSitesOfGridSites ) == 1: gt = tier else: gt = getGOCTier( DIRACSitesOfGridSites ) #Utils.protect2(self.rsClient.addOrModifyGridSite, gridSiteName, gt) res = self.rsClient.addOrModifyGridSite( gridSiteName, gt ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res #Utils.protect2(self.rsClient.addOrModifySite, site, tier, gridSiteName ) res = self.rsClient.addOrModifySite( site, tier, gridSiteName ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res elif siteType == "DIRAC": #Utils.protect2(self.rsClient.addOrModifySite, site, tier, "NULL" ) res = self.rsClient.addOrModifySite( site, tier, "NULL" ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res ################################################################################ # _syncResources HELPER functions def __updateService(self, site, type_): service = type_ + '@' + site #Utils.protect2(self.rsClient.addOrModifyService, service, type_, site ) res = self.rsClient.addOrModifyService( service, type_, site ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res def __getServiceEndpointInfo(self, node): #res = Utils.unpack( self.GOCDBClient.getServiceEndpointInfo( 'hostname', node ) ) res = self.GOCDBClient.getServiceEndpointInfo( 'hostname', node ) if res['OK']: res = res[ 'Value' ] else: gLogger.warn( 'Error getting hostname info for %s' % node ) return [] if res == []: #res = Utils.unpack( self.GOCDBClient.getServiceEndpointInfo('hostname', Utils.canonicalURL(node)) ) url = Utils.canonicalURL(node) res = self.GOCDBClient.getServiceEndpointInfo('hostname', url ) if res['OK']: res = res[ 'Value' ] else: gLogger.warn( 'Error getting canonical hostname info for %s' % node ) res = [] return res def __syncNode(self, NodeInCS, resourcesInDB, resourceType, serviceType, site = "NULL"): nodesToUpdate = NodeInCS - resourcesInDB if len(nodesToUpdate) > 0: gLogger.debug(str(NodeInCS)) gLogger.debug(str(nodesToUpdate)) # Update Service table siteInGOCDB = [self.__getServiceEndpointInfo(node) for node in nodesToUpdate] siteInGOCDB = Utils.list_sanitize(siteInGOCDB) #sites = [Utils.unpack(getDIRACSiteName(s[0]['SITENAME'])) for s in siteInGOCDB] sites = [] for sInGOCDB in siteInGOCDB: siteName = getDIRACSiteName( sInGOCDB[ 0 ][ 'SITENAME' ] ) if not siteName[ 'OK' ]: gLogger.error( siteName[ 'Message' ] ) return siteName sites.append( siteName[ 'Value' ] ) sites = Utils.list_sanitize( Utils.list_flatten( sites ) ) _ = [ self.__updateService(s, serviceType) for s in sites ] # Update Resource table for node in NodeInCS: if serviceType == "Computing": resourceType = CS.getCEType(site, node) if node not in resourcesInDB and node is not None: try: siteInGOCDB = self.__getServiceEndpointInfo(node)[0]['SITENAME'] except IndexError: # No INFO in GOCDB: Node does not exist gLogger.warn("Node %s is not in GOCDB!! Considering that it does not exists!" % node) continue assert(type(siteInGOCDB) == str) #Utils.protect2(self.rsClient.addOrModifyResource, node, resourceType, serviceType, site, siteInGOCDB ) res = self.rsClient.addOrModifyResource( node, resourceType, serviceType, site, siteInGOCDB ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res resourcesInDB.add( node ) ################################################################################ def _syncResources( self ): gLogger.info("Starting sync of Resources") # resources in the DB now #resourcesInDB = set((r[0] for r in Utils.unpack(self.rsClient.getResource()))) resources = self.rsClient.getResource() if not resources[ 'OK' ]: gLogger.error( resources[ 'Message' ] ) return resources resourcesInDB = set( [ resource[ 0 ] for resource in resources[ 'Value' ] ] ) # Site-CE / Site-SE mapping in CS now #CEinCS = Utils.unpack(getSiteCEMapping( 'LCG' )) CEinCS = getSiteCEMapping( 'LCG' ) if not CEinCS[ 'OK' ]: gLogger.error( CEinCS[ 'Message' ] ) return CEinCS CEinCS = CEinCS[ 'Value' ] # All CEs in CS now CEInCS = Utils.set_sanitize([CE for celist in CEinCS.values() for CE in celist]) # All SE Nodes in CS now SENodeInCS = set(CS.getSENodes()) # LFC Nodes in CS now LFCNodeInCS_L = set(CS.getLFCNode(readable = "ReadOnly")) LFCNodeInCS_C = set(CS.getLFCNode(readable = "ReadWrite")) # FTS Nodes in CS now FTSNodeInCS = set([v.split("/")[2][0:-5] for v in CS.getTypedDictRootedAt(root="/Resources/FTSEndpoints").values()]) # VOMS Nodes in CS now VOMSNodeInCS = set(CS.getVOMSEndpoints()) # complete list of resources in CS now resourcesInCS = CEInCS | SENodeInCS | LFCNodeInCS_L | LFCNodeInCS_C | FTSNodeInCS | VOMSNodeInCS gLogger.info(" %d resources in CS, %s resources in DB, updating %d resources" % (len(resourcesInCS), len(resourcesInDB), len(resourcesInCS)-len(resourcesInDB))) # Remove resources that are not in the CS anymore for res in resourcesInDB - resourcesInCS: gLogger.info("Purging resource %s. Reason: not in CS anywore." % res) self.__purge_resource(res) # Add to DB what is in CS now and wasn't before # CEs for site in CEinCS: self.__syncNode(set(CEinCS[site]), resourcesInDB, "", "Computing", site) # SRMs self.__syncNode(SENodeInCS, resourcesInDB, "SE", "Storage") # LFC_C self.__syncNode(LFCNodeInCS_C, resourcesInDB, "LFC_C", "Storage") # LFC_L self.__syncNode(LFCNodeInCS_L, resourcesInDB, "LFC_L", "Storage") # FTSs self.__syncNode(FTSNodeInCS, resourcesInDB, "FTS", "Storage") # VOMSs self.__syncNode(VOMSNodeInCS, resourcesInDB, "VOMS", "VOMS") ################################################################################ def _syncStorageElements( self ): # Get StorageElements from the CS and the DB CSSEs = set(CS.getSEs()) #DBSEs = set((s[0] for s in Utils.unpack(self.rsClient.getStorageElement()))) ses = self.rsClient.getStorageElement() if not ses[ 'OK' ]: gLogger.error( ses[ 'Message' ] ) return ses DBSEs = set( [ se[0] for se in ses[ 'Value' ] ] ) # Remove storageElements that are in DB but not in CS for se in DBSEs - CSSEs: #Utils.protect2(self.rsClient.removeElement, 'StorageElement', se ) res = self.rsClient.removeElement( 'StorageElement', se ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res # Add new storage elements gLogger.info("Updating %d StorageElements in DB (%d on CS vs %d on DB)" % (len(CSSEs - DBSEs), len(CSSEs), len(DBSEs))) for SE in CSSEs - DBSEs: srm = CS.getSEHost( SE ) if not srm: gLogger.warn("%s has no srm URL in CS!!!" % SE) continue #siteInGOCDB = Utils.unpack(self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm )) siteInGOCDB = self.GOCDBClient.getServiceEndpointInfo( 'hostname', srm ) if siteInGOCDB[ 'OK' ]: siteInGOCDB = siteInGOCDB[ 'Value' ] else: gLogger.error("Error getting hostname for %s from GOCDB!!!" % srm) continue if siteInGOCDB == []: gLogger.warn("%s is not in GOCDB!!!" % srm) continue siteInGOCDB = siteInGOCDB[ 0 ][ 'SITENAME' ] #Utils.protect2(self.rsClient.addOrModifyStorageElement, SE, srm, siteInGOCDB ) res = self.rsClient.addOrModifyStorageElement( SE, srm, siteInGOCDB ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res ################################################################################ def _syncServices(self): """This function is in charge of cleaning the Service table in DB in case of obsolescence.""" # services in the DB now #servicesInDB = Utils.unpack(self.rsClient.getService()) servicesInDB = self.rsClient.getService() if not servicesInDB[ 'OK' ]: gLogger.error( servicesInDB[ 'Message' ] ) return servicesInDB servicesInDB = servicesInDB[ 'Value' ] for service_name, service_type, site_name in servicesInDB: if not service_type in ["VO-BOX", "CondDB", "VOMS", "Storage"]: #if Utils.unpack(self.rsClient.getResource(siteName=site_name, serviceType=service_type)) == []: resource = self.rsClient.getResource( siteName = site_name, serviceType = service_type ) if not resource[ 'OK' ]: gLogger.error( resource[ 'Message' ] ) return resource if resource[ 'Value' ] == []: gLogger.info("Deleting Service %s since it has no corresponding resources." % service_name) #Utils.protect2(self.rsClient.removeElement, "Service", service_name) res = self.rsClient.removeElement( "Service", service_name ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res elif service_type == "Storage": res = self.rsClient.getSite( siteName = site_name, meta = { 'columns' : 'GridSiteName'} ) if res[ 'OK' ]: res = res[ 'Value' ] else: res = [] if res: if self.rsClient.getResource( gridSiteName = res[0], serviceType = service_type ) == []: gLogger.info("Deleting Service %s since it has no corresponding resources." % service_name) #Utils.protect2(self.rsClient.removeElement, "Service", service_name) res = self.rsClient.removeElement( "Service", service_name ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res def _syncRegistryUsers(self): users = CS.getTypedDictRootedAt("Users", root= "/Registry") usersInCS = set(users.keys()) #usersInDB = set((u[0] for u in Utils.unpack(self.rmClient.getUserRegistryCache()))) usersInCache = self.rmClient.getUserRegistryCache() if not usersInCache[ 'OK' ]: gLogger.error( usersInCache[ 'Message' ] ) return usersInCache usersInDB = set( [ userInCache[ 0 ] for userInCache in usersInCache[ 'Value' ] ] ) usersToAdd = usersInCS - usersInDB usersToDel = usersInDB - usersInCS gLogger.info("Updating Registry Users: + %d, - %d" % (len(usersToAdd), len(usersToDel))) if len(usersToAdd) > 0: gLogger.debug(str(usersToAdd)) if len(usersToDel) > 0: gLogger.debug(str(usersToDel)) for u in usersToAdd: if type(users[u]['DN']) == list: users[u]['DN'] = users[u]['DN'][0] if type(users[u]['Email']) == list: users[u]['Email'] = users[u]['Email'][0] users[u]['DN'] = users[u]['DN'].split('=')[-1] #Utils.unpack(self.rmClient.addOrModifyUserRegistryCache( u, users[u]['DN'], users[u]['Email'].lower())) res = self.rmClient.addOrModifyUserRegistryCache( u, users[u]['DN'], users[u]['Email'].lower() ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res for u in usersToDel: #Utils.protect2(self.rmClient.deleteUserRegistryCache, u) res = self.rmClient.deleteUserRegistryCache( u ) if not res[ 'OK' ]: gLogger.error( res[ 'Message' ] ) return res ################################################################################ #EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF#EOF
class GOCDBSyncCommand(Command): def __init__(self, args=None, clients=None): super(GOCDBSyncCommand, self).__init__(args, clients) if 'GOCDBClient' in self.apis: self.gClient = self.apis['GOCDBClient'] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient() self.seenHostnames = set() def doNew(self, masterParams=None): """ Gets the downtime IDs and dates of a given hostname from the local database and compares the results with the remote database of GOCDB. If the downtime dates have been changed it updates the local database. :param: `masterParams` - string :return: S_OK / S_ERROR """ if masterParams: hostname = masterParams else: return S_ERROR(errno.EINVAL, 'masterParams is not provided') result = self.rmClient.selectDowntimeCache(name=hostname) if not result['OK']: return result for downtimes in result['Value']: localDBdict = {'DowntimeID': downtimes[3], 'FORMATED_START_DATE': downtimes[6].strftime('%Y-%m-%d %H:%M'), 'FORMATED_END_DATE': downtimes[7].strftime('%Y-%m-%d %H:%M')} response = self.gClient.getHostnameDowntime(hostname, ongoing=True) if not response['OK']: return response doc = minidom.parseString(response['Value']) downtimeElements = doc.getElementsByTagName("DOWNTIME") for dtElement in downtimeElements: GOCDBdict = _parseSingleElement(dtElement, ['PRIMARY_KEY', 'ENDPOINT', 'FORMATED_START_DATE', 'FORMATED_END_DATE']) localDowntimeID = localDBdict['DowntimeID'] GOCDBDowntimeID = GOCDBdict['PRIMARY_KEY'] + ' ' + GOCDBdict['ENDPOINT'] if localDowntimeID == GOCDBDowntimeID: if localDBdict['FORMATED_START_DATE'] != GOCDBdict['FORMATED_START_DATE']: result = self.rmClient.addOrModifyDowntimeCache(downtimeID=localDBdict['DowntimeID'], startDate=GOCDBdict['FORMATED_START_DATE']) gLogger.verbose("The start date of %s has been changed!" % downtimes[3]) if not result['OK']: return result if localDBdict['FORMATED_END_DATE'] != GOCDBdict['FORMATED_END_DATE']: result = self.rmClient.addOrModifyDowntimeCache(downtimeID=localDBdict['DowntimeID'], endDate=GOCDBdict['FORMATED_END_DATE']) gLogger.verbose("The end date of %s has been changed!" % downtimes[3]) if not result['OK']: return result return S_OK() def doCache(self): return S_OK() def doMaster(self): """ This method calls the doNew method for each hostname that exists in the DowntimeCache table of the local database. :return: S_OK / S_ERROR """ # Query DB for all downtimes result = self.rmClient.selectDowntimeCache() if not result['OK']: return result for data in result['Value']: # If already processed don't do it again if data[0] in self.seenHostnames: continue # data[0] contains the hostname gLogger.verbose("Checking if the downtime of %s has been changed" % data[0]) result = self.doNew(data[0]) if not result['OK']: return result self.seenHostnames.add(data[0]) return S_OK()
class DowntimeCommand(Command): ''' Downtime "master" Command or removed DTs. ''' def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if 'GOCDBClient' in self.apis: self.gClient = self.apis['GOCDBClient'] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient() def _storeCommand(self, result): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( downtimeID=dt['DowntimeID'], element=dt['Element'], name=dt['Name'], startDate=dt['StartDate'], endDate=dt['EndDate'], severity=dt['Severity'], description=dt['Description'], link=dt['Link'], gOCDBServiceType=dt['gOCDBServiceType']) return resQuery def _cleanCommand(self, element, elementNames): ''' Clear Cache from expired DT. ''' resQuery = [] for elementName in elementNames: # get the list of all DTs stored in the cache result = self.rmClient.selectDowntimeCache(element=element, name=elementName) if not result['OK']: return result uniformResult = [ dict(zip(result['Columns'], res)) for res in result['Value'] ] currentDate = datetime.utcnow() if not uniformResult: continue # get the list of all ongoing DTs from GocDB gDTLinkList = self.gClient.getCurrentDTLinkList() if not gDTLinkList['OK']: return gDTLinkList for dt in uniformResult: # if DT expired or DT not in the list of current DTs, then we remove it from the cache if dt['EndDate'] < currentDate or dt[ 'Link'] not in gDTLinkList['Value']: result = self.rmClient.deleteDowntimeCache( downtimeID=dt['DowntimeID']) resQuery.append(result) return S_OK(resQuery) def _prepareCommand(self): ''' DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args['name'] if 'element' not in self.args: return S_ERROR('"element" not found in self.args') element = self.args['element'] if 'elementType' not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args['elementType'] if element not in ['Site', 'Resource']: return S_ERROR('element is neither Site nor Resource') hours = None if 'hours' in self.args: hours = self.args['hours'] gOCDBServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName(elementName) if not gocSite[ 'OK']: # The site is most probably is not a grid site - not an issue, of course pass # so, elementName remains unchanged else: elementName = gocSite['Value'] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # We need to distinguish if it's tape or disk try: seOptions = StorageElement(elementName).options except AttributeError: # Sometimes the SE can't be instantiated properly self.log.error( "Failure instantiating StorageElement object for %s" % elementName) return S_ERROR("Failure instantiating StorageElement") if 'SEType' in seOptions: # Type should follow the convention TXDY seType = seOptions['SEType'] diskSE = re.search('D[1-9]', seType) != None tapeSE = re.search('T[1-9]', seType) != None if tapeSE: gOCDBServiceType = "srm.nearline" elif diskSE: gOCDBServiceType = "srm" seHost = CSHelpers.getSEHost(elementName) if not seHost['OK']: return seHost seHost = seHost['Value'] if not seHost: return S_ERROR('No seHost for %s' % elementName) elementName = seHost elif elementType in ['FTS', 'FTS3']: gOCDBServiceType = 'FTS' # WARNING: this method presupposes that the server is an FTS3 type gocSite = getGOCFTSName(elementName) if not gocSite['OK']: self.log.warn("%s not in Resources/FTSEndpoints/FTS3 ?" % elementName) else: elementName = gocSite['Value'] return S_OK((element, elementName, hours, gOCDBServiceType)) def doNew(self, masterParams=None): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams hours = 120 elementName = None gOCDBServiceType = None else: params = self._prepareCommand() if not params['OK']: return params element, elementName, hours, gOCDBServiceType = params['Value'] elementNames = [elementName] # WARNING: checking all the DT that are ongoing or starting in given <hours> from now try: results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError: try: # Let's give it a second chance.. results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError as e: return S_ERROR(e) if not results['OK']: return results results = results['Value'] if results is None: # no downtimes found return S_OK(None) # cleaning the Cache cleanRes = self._cleanCommand(element, elementNames) if not cleanRes['OK']: return cleanRes uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if 'HOSTNAME' in downDic.keys(): dt['Name'] = downDic['HOSTNAME'] elif 'SITENAME' in downDic.keys(): dt['Name'] = downDic['SITENAME'] else: return S_ERROR("SITENAME or HOSTNAME are missing") if 'SERVICE_TYPE' in downDic.keys(): dt['gOCDBServiceType'] = downDic['SERVICE_TYPE'] if gOCDBServiceType: gocdbST = gOCDBServiceType.lower() csST = downDic['SERVICE_TYPE'].lower() if gocdbST != csST: return S_ERROR( "SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gocdbST, csST, dt['Name'])) else: # WARNING: do we want None as default value? dt['gOCDBServiceType'] = None dt['DowntimeID'] = downtime dt['Element'] = element dt['StartDate'] = downDic['FORMATED_START_DATE'] dt['EndDate'] = downDic['FORMATED_END_DATE'] dt['Severity'] = downDic['SEVERITY'] dt['Description'] = downDic['DESCRIPTION'].replace('\'', '') dt['Link'] = downDic['GOCDB_PORTAL_URL'] uniformResult.append(dt) storeRes = self._storeCommand(uniformResult) if not storeRes['OK']: return storeRes return S_OK() def doCache(self): ''' Method that reads the cache table and tries to read from it. It will return a list with one dictionary describing the DT if there are results. ''' params = self._prepareCommand() if not params['OK']: return params element, elementName, hours, gOCDBServiceType = params['Value'] result = self.rmClient.selectDowntimeCache( element=element, name=elementName, gOCDBServiceType=gOCDBServiceType) if not result['OK']: return result uniformResult = [ dict(zip(result['Columns'], res)) for res in result['Value'] ] #'targetDate' can be either now or some 'hours' later in the future targetDate = datetime.utcnow() # dtOverlapping is a buffer to assure only one dt is returned # when there are overlapping outage/warning dt for same element # on top of the buffer we put the most recent outages # while at the bottom the most recent warnings, # assumption: uniformResult list is already ordered by resource/site name, severity, startdate dtOverlapping = [] if hours is not None: # IN THE FUTURE targetDate = targetDate + timedelta(hours=hours) # sorting by 'StartDate' b/c if we look for DTs in the future # then we are interested in the earliest DTs uniformResult.sort(key=itemgetter('Name', 'Severity', 'StartDate')) for dt in uniformResult: if (dt['StartDate'] < targetDate) and (dt['EndDate'] > targetDate): # the list is already ordered in a way that outages come first over warnings # and the earliest outages are on top of other outages and warnings # while the earliest warnings are on top of the other warnings # so what ever comes first in the list is also what we are looking for dtOverlapping = [dt] break else: # IN THE PRESENT # sorting by 'EndDate' b/c if we look for DTs in the present # then we are interested in those DTs that last longer uniformResult.sort(key=itemgetter('Name', 'Severity', 'EndDate')) for dt in uniformResult: if (dt['StartDate'] < targetDate) and (dt['EndDate'] > targetDate): # if outage, we put it on top of the overlapping buffer # i.e. the latest ending outage is on top if dt['Severity'].upper() == 'OUTAGE': dtOverlapping = [dt] + dtOverlapping # if warning, we put it at the bottom of the overlapping buffer # i.e. the latest ending warning is at the bottom elif dt['Severity'].upper() == 'WARNING': dtOverlapping.append(dt) result = None if len(dtOverlapping) > 0: dtTop = dtOverlapping[0] dtBottom = dtOverlapping[-1] if dtTop['Severity'].upper() == 'OUTAGE': result = dtTop else: result = dtBottom return S_OK(result) def doMaster(self): ''' Master method, which looks little bit spaghetti code, sorry ! - It gets all sites and transforms them into gocSites. - It gets all the storage elements and transforms them into their hosts - It gets the the CEs (FTS and file catalogs will come). ''' gocSites = CSHelpers.getGOCSites() if not gocSites['OK']: return gocSites gocSites = gocSites['Value'] sesHosts = CSHelpers.getStorageElementsHosts() if not sesHosts['OK']: return sesHosts sesHosts = sesHosts['Value'] resources = sesHosts ftsServer = getFTS3Servers() if ftsServer['OK']: resources.extend(ftsServer['Value']) # TODO: file catalogs need also to use their hosts #fc = CSHelpers.getFileCatalogs() # if fc[ 'OK' ]: # resources = resources + fc[ 'Value' ] ce = CSHelpers.getComputingElements() if ce['OK']: resources.extend(ce['Value']) self.log.verbose('Processing Sites: %s' % ', '.join(gocSites)) siteRes = self.doNew(('Site', gocSites)) if not siteRes['OK']: self.metrics['failed'].append(siteRes['Message']) self.log.verbose('Processing Resources: %s' % ', '.join(resources)) resourceRes = self.doNew(('Resource', resources)) if not resourceRes['OK']: self.metrics['failed'].append(resourceRes['Message']) return S_OK(self.metrics)
class GOCDB2CSAgent (AgentModule): """ Class to retrieve information about service endpoints from GOCDB and update configuration stored by CS """ def __init__(self, *args, **kwargs): """ c'tor """ super(GOCDB2CSAgent, self).__init__(*args, **kwargs) self.GOCDBClient = None self.csAPI = None self.dryRun = False def initialize(self): """ Run at the agent initialization (normally every 500 cycles) """ # client to connect to GOCDB self.GOCDBClient = GOCDBClient() self.dryRun = self.am_getOption('DryRun', self.dryRun) # API needed to update configuration stored by CS self.csAPI = CSAPI() return self.csAPI.initialize() def execute(self): """ Execute GOCDB queries according to the function map and user request (options in configuration). """ # __functionMap is at the end of the class definition for option, functionCall in GOCDB2CSAgent.__functionMap.iteritems(): optionValue = self.am_getOption(option, True) if optionValue: result = functionCall(self) if not result['OK']: self.log.error("%s() failed with message: %s" % (functionCall.__name__, result['Message'])) else: self.log.info("Successfully executed %s" % functionCall.__name__) return S_OK() def updatePerfSONARConfiguration(self): """ Get current status of perfSONAR endpoints from GOCDB and update CS configuration accordingly. """ log = self.log.getSubLogger('updatePerfSONAREndpoints') log.debug('Begin function ...') # get endpoints result = self.__getPerfSONAREndpoints() if not result['OK']: log.error("__getPerfSONAREndpoints() failed with message: %s" % result['Message']) return S_ERROR('Unable to fetch perfSONAR endpoints from GOCDB.') endpointList = result['Value'] # add DIRAC site name result = self.__addDIRACSiteName(endpointList) if not result['OK']: log.error("__addDIRACSiteName() failed with message: %s" % result['Message']) return S_ERROR('Unable to extend the list with DIRAC site names.') extendedEndpointList = result['Value'] # prepare dictionary with new configuration result = self.__preparePerfSONARConfiguration(extendedEndpointList) if not result['OK']: log.error("__preparePerfSONARConfiguration() failed with message: %s" % result['Message']) return S_ERROR('Unable to prepare a new perfSONAR configuration.') finalConfiguration = result['Value'] # update configuration according to the final status of endpoints self.__updateConfiguration(finalConfiguration) log.debug("Configuration updated succesfully") log.debug('End function.') return S_OK() def __getPerfSONAREndpoints(self): """ Retrieve perfSONAR endpoint information directly from GOCDB. :return: List of perfSONAR endpoints (dictionaries) as stored by GOCDB. """ log = self.log.getSubLogger('__getPerfSONAREndpoints') log.debug('Begin function ...') # get perfSONAR endpoints (latency and bandwidth) form GOCDB endpointList = [] for endpointType in ['Latency', 'Bandwidth']: result = self.GOCDBClient.getServiceEndpointInfo('service_type', 'net.perfSONAR.%s' % endpointType) if not result['OK']: log.error("getServiceEndpointInfo() failed with message: %s" % result['Message']) return S_ERROR('Could not fetch %s endpoints from GOCDB' % endpointType.lower()) log.debug('Number of %s endpoints: %s' % (endpointType.lower(), len(result['Value']))) endpointList.extend(result['Value']) log.debug('Number of perfSONAR endpoints: %s' % len(endpointList)) log.debug('End function.') return S_OK(endpointList) def __preparePerfSONARConfiguration(self, endpointList): """ Prepare a dictionary with a new CS configuration of perfSONAR endpoints. :return: Dictionary where keys are configuration paths (options and sections) and values are values of corresponding options or None in case of a path pointing to a section. """ log = self.log.getSubLogger('__preparePerfSONARConfiguration') log.debug('Begin function ...') # static elements of a path rootPath = '/Resources/Sites' extPath = 'Network' baseOptionName = 'Enabled' options = {baseOptionName: 'True', 'ServiceType': 'perfSONAR'} # enable GOCDB endpoints in configuration newConfiguration = {} for endpoint in endpointList: if endpoint['DIRACSITENAME'] is None: continue split = endpoint['DIRACSITENAME'].split('.') path = cfgPath(rootPath, split[0], endpoint['DIRACSITENAME'], extPath, endpoint['HOSTNAME']) for name, defaultValue in options.iteritems(): newConfiguration[cfgPath(path, name)] = defaultValue # get current configuration currentConfiguration = {} for option in options.iterkeys(): result = gConfig.getConfigurationTree(rootPath, extPath + '/', '/' + option) if not result['OK']: log.error("getConfigurationTree() failed with message: %s" % result['Message']) return S_ERROR('Unable to fetch perfSONAR endpoints from CS.') currentConfiguration.update(result['Value']) # disable endpoints that disappeared in GOCDB removedElements = set(currentConfiguration) - set(newConfiguration) newElements = set(newConfiguration) - set(currentConfiguration) addedEndpoints = len(newElements) / len(options) disabledEndpoints = 0 for path in removedElements: if baseOptionName in path: newConfiguration[path] = 'False' if currentConfiguration[path] != 'False': disabledEndpoints = disabledEndpoints + 1 # inform what will be changed if addedEndpoints > 0: self.log.info("%s new perfSONAR endpoints will be added to the configuration" % addedEndpoints) if disabledEndpoints > 0: self.log.info("%s old perfSONAR endpoints will be disable in the configuration" % disabledEndpoints) if addedEndpoints == 0 and disabledEndpoints == 0: self.log.info("perfSONAR configuration is up-to-date") log.debug('End function.') return S_OK(newConfiguration) def __addDIRACSiteName(self, inputList): """ Extend given list of GOCDB endpoints with DIRAC site name, i.e. add an entry "DIRACSITENAME" in dictionaries that describe endpoints. If given site name could not be found "DIRACSITENAME" is set to 'None'. :return: List of perfSONAR endpoints (dictionaries). """ log = self.log.getSubLogger('__addDIRACSiteName') log.debug('Begin function ...') # get site name dictionary result = getDIRACGOCDictionary() if not result['OK']: log.error("getDIRACGOCDictionary() failed with message: %s" % result['Message']) return S_ERROR('Could not get site name dictionary') # reverse the dictionary (assume 1 to 1 relation) DIRACGOCDict = result['Value'] GOCDIRACDict = dict(zip(DIRACGOCDict.values(), DIRACGOCDict.keys())) # add DIRAC site names outputList = [] for entry in inputList: try: entry['DIRACSITENAME'] = GOCDIRACDict[entry['SITENAME']] except KeyError: self.log.warn("No dictionary entry for %s. " % entry['SITENAME']) entry['DIRACSITENAME'] = None outputList.append(entry) log.debug('End function.') return S_OK(outputList) def __updateConfiguration(self, setElements=None, delElements=None): """ Update configuration stored by CS. """ if setElements is None: setElements = {} if delElements is None: delElements = [] log = self.log.getSubLogger('__updateConfiguration') log.debug('Begin function ...') # assure existence and proper value of a section or an option for path, value in setElements.iteritems(): if value is None: section = path else: split = path.rsplit('/', 1) section = split[0] try: result = self.csAPI.createSection(section) if not result['OK']: log.error("createSection() failed with message: %s" % result['Message']) except Exception as e: log.error("Exception in createSection(): %s" % repr(e).replace(',)', ')')) if value is not None: try: result = self.csAPI.setOption(path, value) if not result['OK']: log.error("setOption() failed with message: %s" % result['Message']) except Exception as e: log.error("Exception in setOption(): %s" % repr(e).replace(',)', ')')) # delete elements in the configuration for path in delElements: result = self.csAPI.delOption(path) if not result['OK']: log.warn("delOption() failed with message: %s" % result['Message']) result = self.csAPI.delSection(path) if not result['OK']: log.warn("delSection() failed with message: %s" % result['Message']) if self.dryRun: log.info("Dry Run: CS won't be updated") self.csAPI.showDiff() else: # update configuration stored by CS result = self.csAPI.commit() if not result['OK']: log.error("commit() failed with message: %s" % result['Message']) return S_ERROR("Could not commit changes to CS.") else: log.info("Committed changes to CS") log.debug('End function.') return S_OK() # define mapping between an agent option in the configuration and a function call __functionMap = {'UpdatePerfSONARS': updatePerfSONARConfiguration, }
class DowntimeCommand(Command): ''' Downtime "master" Command. ''' def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if 'GOCDBClient' in self.apis: self.gClient = self.apis['GOCDBClient'] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient() def _storeCommand(self, result): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( dt['DowntimeID'], dt['Element'], dt['Name'], dt['StartDate'], dt['EndDate'], dt['Severity'], dt['Description'], dt['Link']) if not resQuery['OK']: return resQuery return S_OK() def _prepareCommand(self): ''' DowntimeCommand requires three arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args['name'] if 'element' not in self.args: return S_ERROR('"element" not found in self.args') element = self.args['element'] if 'elementType' not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args['elementType'] if not element in ['Site', 'Resource']: return S_ERROR('element is not Site nor Resource') hours = None if 'hours' in self.args: hours = self.args['hours'] # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName(elementName) if not gocSite['OK']: return gocSite elementName = gocSite['Value'] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': seHost = CSHelpers.getSEHost(elementName) if not seHost: return S_ERROR('No seHost for %s' % elementName) elementName = seHost return S_OK((element, elementName, hours)) def doNew(self, masterParams=None): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams hours = None elementName = None else: params = self._prepareCommand() if not params['OK']: return params element, elementName, hours = params['Value'] elementNames = [elementName] startDate = datetime.utcnow() - timedelta(days=2) try: results = self.gClient.getStatus(element, elementName, startDate, 120) except urllib2.URLError: try: #Let's give it a second chance.. results = self.gClient.getStatus(element, elementName, startDate, 120) except urllib2.URLError, e: return S_ERROR(e) if not results['OK']: return results results = results['Value'] if results is None: return S_OK(None) uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if element == 'Resource': dt['Name'] = downDic['HOSTNAME'] else: dt['Name'] = downDic['SITENAME'] if not dt['Name'] in elementNames: continue dt['DowntimeID'] = downtime dt['Element'] = element dt['StartDate'] = downDic['FORMATED_START_DATE'] dt['EndDate'] = downDic['FORMATED_END_DATE'] dt['Severity'] = downDic['SEVERITY'] dt['Description'] = downDic['DESCRIPTION'].replace('\'', '') dt['Link'] = downDic['GOCDB_PORTAL_URL'] uniformResult.append(dt) storeRes = self._storeCommand(uniformResult) if not storeRes['OK']: return storeRes # We return only one downtime, if its ongoind at dtDate startDate = datetime.now() endDate = startDate if hours: startDate = startDate + timedelta(hours=hours) result = None for dt in uniformResult: if (dt['StartDate'] < str(startDate)) and (dt['EndDate'] > str(endDate)): result = dt break return S_OK(result)
class DowntimeCommand(Command): """ Downtime "master" Command or removed DTs. """ def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if "GOCDBClient" in self.apis: self.gClient = self.apis["GOCDBClient"] else: self.gClient = GOCDBClient() if "ResourceManagementClient" in self.apis: self.rmClient = self.apis["ResourceManagementClient"] else: self.rmClient = ResourceManagementClient() def _storeCommand(self, result): """ Stores the results of doNew method on the database. """ for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( downtimeID=dt["DowntimeID"], element=dt["Element"], name=dt["Name"], startDate=dt["StartDate"], endDate=dt["EndDate"], severity=dt["Severity"], description=dt["Description"], link=dt["Link"], gOCDBServiceType=dt["gOCDBServiceType"], ) return resQuery def _cleanCommand(self, element, elementNames): """ Clear Cache from expired DT. """ resQuery = [] for elementName in elementNames: # get the list of all DTs stored in the cache result = self.rmClient.selectDowntimeCache(element=element, name=elementName) if not result["OK"]: return result uniformResult = [ dict(zip(result["Columns"], res)) for res in result["Value"] ] currentDate = datetime.utcnow() if not uniformResult: continue # get the list of all ongoing DTs from GocDB gDTLinkList = self.gClient.getCurrentDTLinkList() if not gDTLinkList["OK"]: return gDTLinkList for dt in uniformResult: # if DT expired or DT not in the list of current DTs, then we remove it from the cache if dt["EndDate"] < currentDate or dt[ "Link"] not in gDTLinkList["Value"]: result = self.rmClient.deleteDowntimeCache( downtimeID=dt["DowntimeID"]) resQuery.append(result) return S_OK(resQuery) def _prepareCommand(self): """ DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. """ if "name" not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args["name"] if "element" not in self.args: return S_ERROR('"element" not found in self.args') element = self.args["element"] if "elementType" not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args["elementType"] if element not in ["Site", "Resource"]: return S_ERROR("element is neither Site nor Resource") hours = None if "hours" in self.args: hours = self.args["hours"] gOCDBServiceType = None # Transform DIRAC site names into GOCDB topics if element == "Site": gocSite = getGOCSiteName(elementName) if not gocSite[ "OK"]: # The site is most probably is not a grid site - not an issue, of course pass # so, elementName remains unchanged else: elementName = gocSite["Value"] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == "StorageElement": # for SRM and SRM only, we need to distinguish if it's tape or disk # if it's not SRM, then gOCDBServiceType will be None (and we'll use them all) try: se = StorageElement(elementName) seOptions = se.options seProtocols = set(se.localAccessProtocolList) | set( se.localWriteProtocolList) except AttributeError: # Sometimes the SE can't be instantiated properly self.log.error("Failure instantiating StorageElement object", elementName) return S_ERROR("Failure instantiating StorageElement") if "SEType" in seOptions and "srm" in seProtocols: # Type should follow the convention TXDY seType = seOptions["SEType"] diskSE = re.search("D[1-9]", seType) is not None tapeSE = re.search("T[1-9]", seType) is not None if tapeSE: gOCDBServiceType = "srm.nearline" elif diskSE: gOCDBServiceType = "srm" res = getSEHosts(elementName) if not res["OK"]: return res seHosts = res["Value"] if not seHosts: return S_ERROR("No seHost(s) for %s" % elementName) elementName = seHosts # in this case it will return a list, because there might be more than one host only elif elementType in ["FTS", "FTS3"]: gOCDBServiceType = "FTS" # WARNING: this method presupposes that the server is an FTS3 type gocSite = getGOCFTSName(elementName) if not gocSite["OK"]: self.log.warn("FTS not in Resources/FTSEndpoints/FTS3 ?", elementName) else: elementName = gocSite["Value"] elif elementType == "ComputingElement": res = getCESiteMapping(elementName) if not res["OK"]: return res siteName = res["Value"][elementName] ceType = gConfig.getValue( cfgPath("Resources", "Sites", siteName.split(".")[0], siteName, "CEs", elementName, "CEType")) if ceType == "HTCondorCE": gOCDBServiceType = "org.opensciencegrid.htcondorce" elif ceType == "ARC": gOCDBServiceType = "ARC-CE" return S_OK((element, elementName, hours, gOCDBServiceType)) def doNew(self, masterParams=None): """ Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. """ if masterParams is not None: element, elementNames = masterParams hours = 120 elementName = None gOCDBServiceType = None else: params = self._prepareCommand() if not params["OK"]: return params element, elementName, hours, gOCDBServiceType = params["Value"] if not isinstance(elementName, list): elementNames = [elementName] else: elementNames = elementName # WARNING: checking all the DT that are ongoing or starting in given <hours> from now try: results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except URLError: try: # Let's give it a second chance.. results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except URLError as e: return S_ERROR(e) if not results["OK"]: return results results = results["Value"] if results is None: # no downtimes found return S_OK(None) # cleaning the Cache if elementNames: cleanRes = self._cleanCommand(element, elementNames) if not cleanRes["OK"]: return cleanRes uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): # can be an iterator dt = {} dt["Name"] = downDic.get( "URL", downDic.get("HOSTNAME", downDic.get("SITENAME"))) if not dt["Name"]: return S_ERROR( "URL, SITENAME and HOSTNAME are missing from downtime dictionary" ) dt["gOCDBServiceType"] = downDic.get("SERVICE_TYPE") if dt["gOCDBServiceType"] and gOCDBServiceType: if gOCDBServiceType.lower() != downDic["SERVICE_TYPE"].lower(): self.log.warn( "SERVICE_TYPE mismatch", "between GOCDB (%s) and CS (%s) for %s" % (downDic["SERVICE_TYPE"], gOCDBServiceType, dt["Name"]), ) dt["DowntimeID"] = downtime dt["Element"] = element dt["StartDate"] = downDic["FORMATED_START_DATE"] dt["EndDate"] = downDic["FORMATED_END_DATE"] dt["Severity"] = downDic["SEVERITY"] dt["Description"] = downDic["DESCRIPTION"].replace("'", "") dt["Link"] = downDic["GOCDB_PORTAL_URL"] uniformResult.append(dt) storeRes = self._storeCommand(uniformResult) if not storeRes["OK"]: return storeRes return S_OK() def doCache(self): """ Method that reads the cache table and tries to read from it. It will return a list with one dictionary describing the DT if there are results. """ params = self._prepareCommand() if not params["OK"]: return params element, elementName, hours, gOCDBServiceType = params["Value"] result = self.rmClient.selectDowntimeCache( element=element, name=elementName, gOCDBServiceType=gOCDBServiceType) if not result["OK"]: return result if not result["Value"]: return S_OK() uniformResult = [ dict(zip(result["Columns"], res)) for res in result["Value"] ] # 'targetDate' can be either now or in some 'hours' from now targetDate = datetime.utcnow() # dtOverlapping is a buffer to assure only one dt is returned # when there are overlapping outage/warning dt for same element # on top of the buffer we put the most recent outages # while at the bottom the most recent warnings, # assumption: uniformResult list is already ordered by resource/site name, severity, startdate dtOverlapping = [] if hours is not None: # IN THE FUTURE targetDate = targetDate + timedelta(hours=hours) # sorting by 'StartDate' b/c if we look for DTs in the future # then we are interested in the earliest DTs uniformResult.sort(key=itemgetter("Name", "Severity", "StartDate")) for dt in uniformResult: if (dt["StartDate"] < targetDate) and (dt["EndDate"] > targetDate): # the list is already ordered in a way that outages come first over warnings # and the earliest outages are on top of other outages and warnings # while the earliest warnings are on top of the other warnings # so what ever comes first in the list is also what we are looking for dtOverlapping = [dt] break else: # IN THE PRESENT # sorting by 'EndDate' b/c if we look for DTs in the present # then we are interested in those DTs that last longer uniformResult.sort(key=itemgetter("Name", "Severity", "EndDate")) for dt in uniformResult: if (dt["StartDate"] < targetDate) and (dt["EndDate"] > targetDate): # if outage, we put it on top of the overlapping buffer # i.e. the latest ending outage is on top if dt["Severity"].upper() == "OUTAGE": dtOverlapping = [dt] + dtOverlapping # if warning, we put it at the bottom of the overlapping buffer # i.e. the latest ending warning is at the bottom elif dt["Severity"].upper() == "WARNING": dtOverlapping.append(dt) if not dtOverlapping: return S_OK() dtTop = dtOverlapping[0] if dtTop["Severity"].upper() == "OUTAGE": return S_OK(dtTop) else: return S_OK(dtOverlapping[-1]) def doMaster(self): """Master method, which looks little bit spaghetti code, sorry ! - It gets all sites and transforms them into gocSites. - It gets all the storage elements and transforms them into their hosts - It gets the the CEs (FTS and file catalogs will come). """ gocSites = getGOCSites() if not gocSites["OK"]: return gocSites gocSites = gocSites["Value"] sesHosts = getStorageElementsHosts() if not sesHosts["OK"]: return sesHosts sesHosts = sesHosts["Value"] resources = sesHosts if sesHosts else [] ftsServer = getFTS3Servers(hostOnly=True) if ftsServer["OK"] and ftsServer["Value"]: resources.extend(ftsServer["Value"]) # TODO: file catalogs need also to use their hosts # fc = CSHelpers.getFileCatalogs() # if fc[ 'OK' ]: # resources = resources + fc[ 'Value' ] res = getCESiteMapping() if res["OK"] and res["Value"]: resources.extend(list(res["Value"])) self.log.verbose("Processing Sites", ", ".join(gocSites if gocSites else ["NONE"])) siteRes = self.doNew(("Site", gocSites)) if not siteRes["OK"]: self.metrics["failed"].append(siteRes["Message"]) self.log.verbose("Processing Resources", ", ".join(resources if resources else ["NONE"])) resourceRes = self.doNew(("Resource", resources)) if not resourceRes["OK"]: self.metrics["failed"].append(resourceRes["Message"]) return S_OK(self.metrics)
def initialize(self): """Define the commands to be executed, and instantiate the clients that will be used.""" res = ObjectLoader().loadObject( "DIRAC.ResourceStatusSystem.Client.ResourceStatusClient") if not res["OK"]: self.log.error("Failed to load ResourceStatusClient class: %s" % res["Message"]) return res rsClass = res["Value"] res = ObjectLoader().loadObject( "DIRAC.ResourceStatusSystem.Client.ResourceManagementClient") if not res["OK"]: self.log.error( "Failed to load ResourceManagementClient class: %s" % res["Message"]) return res rmClass = res["Value"] self.commands["Downtime"] = [{"Downtime": {}}] self.commands["GOCDBSync"] = [{"GOCDBSync": {}}] self.commands["FreeDiskSpace"] = [{"FreeDiskSpace": {}}] # PilotsCommand self.commands["Pilot"] = [ { "Pilot": { "element": "Site", "siteName": None } }, { "Pilot": { "element": "Resource", "siteName": None } }, ] # FIXME: do not forget about hourly vs Always ...etc # AccountingCacheCommand # self.commands[ 'AccountingCache' ] = [ # {'SuccessfullJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # {'FailedJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # {'SuccessfullPilotsBySiteSplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'FailedPilotsBySiteSplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'SuccessfullPilotsByCESplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'FailedPilotsByCESplitted' :{'hours' :24, 'plotType' :'Pilot' }}, # {'RunningJobsBySiteSplitted' :{'hours' :24, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :168, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :720, 'plotType' :'Job' }}, # # {'RunningJobsBySiteSplitted' :{'hours' :8760, 'plotType' :'Job' }}, # ] # VOBOXAvailability # self.commands[ 'VOBOXAvailability' ] = [ # { 'VOBOXAvailability' : {} } # # Reuse clients for the commands self.clients["GOCDBClient"] = GOCDBClient() self.clients["ReportsClient"] = ReportsClient() self.clients["ResourceStatusClient"] = rsClass() self.clients["ResourceManagementClient"] = rmClass() self.clients["WMSAdministrator"] = WMSAdministratorClient() self.clients["Pilots"] = PilotManagerClient() self.cCaller = CommandCaller return S_OK()
class DowntimeCommand(Command): ''' Downtime "master" Command or removed DTs. ''' def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if 'GOCDBClient' in self.apis: self.gClient = self.apis['GOCDBClient'] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis['ResourceManagementClient'] else: self.rmClient = ResourceManagementClient() def _storeCommand(self, result): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache(downtimeID=dt['DowntimeID'], element=dt['Element'], name=dt['Name'], startDate=dt['StartDate'], endDate=dt['EndDate'], severity=dt['Severity'], description=dt['Description'], link=dt['Link'], gOCDBServiceType=dt['gOCDBServiceType']) return resQuery def _cleanCommand(self, element, elementNames): ''' Clear Cache from expired DT. ''' resQuery = [] for elementName in elementNames: # get the list of all DTs stored in the cache result = self.rmClient.selectDowntimeCache(element=element, name=elementName) if not result['OK']: return result uniformResult = [dict(zip(result['Columns'], res)) for res in result['Value']] currentDate = datetime.utcnow() if not uniformResult: continue # get the list of all ongoing DTs from GocDB gDTLinkList = self.gClient.getCurrentDTLinkList() if not gDTLinkList['OK']: return gDTLinkList for dt in uniformResult: # if DT expired or DT not in the list of current DTs, then we remove it from the cache if dt['EndDate'] < currentDate or dt['Link'] not in gDTLinkList['Value']: result = self.rmClient.deleteDowntimeCache(downtimeID=dt['DowntimeID']) resQuery.append(result) return S_OK(resQuery) def _prepareCommand(self): ''' DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args['name'] if 'element' not in self.args: return S_ERROR('"element" not found in self.args') element = self.args['element'] if 'elementType' not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args['elementType'] if element not in ['Site', 'Resource']: return S_ERROR('element is neither Site nor Resource') hours = None if 'hours' in self.args: hours = self.args['hours'] gOCDBServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName(elementName) if not gocSite['OK']: # The site is most probably is not a grid site - not an issue, of course pass # so, elementName remains unchanged else: elementName = gocSite['Value'] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # We need to distinguish if it's tape or disk try: seOptions = StorageElement(elementName).options except AttributeError: # Sometimes the SE can't be instantiated properly self.log.error( "Failure instantiating StorageElement object for %s" % elementName) return S_ERROR("Failure instantiating StorageElement") if 'SEType' in seOptions: # Type should follow the convention TXDY seType = seOptions['SEType'] diskSE = re.search('D[1-9]', seType) != None tapeSE = re.search('T[1-9]', seType) != None if tapeSE: gOCDBServiceType = "srm.nearline" elif diskSE: gOCDBServiceType = "srm" seHost = CSHelpers.getSEHost(elementName) if not seHost['OK']: return seHost seHost = seHost['Value'] if not seHost: return S_ERROR('No seHost for %s' % elementName) elementName = seHost elif elementType in ['FTS', 'FTS3']: gOCDBServiceType = 'FTS' # WARNING: this method presupposes that the server is an FTS3 type gocSite = getGOCFTSName(elementName) if not gocSite['OK']: self.log.warn("%s not in Resources/FTSEndpoints/FTS3 ?" % elementName) else: elementName = gocSite['Value'] return S_OK((element, elementName, hours, gOCDBServiceType)) def doNew(self, masterParams=None): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams hours = 120 elementName = None gOCDBServiceType = None else: params = self._prepareCommand() if not params['OK']: return params element, elementName, hours, gOCDBServiceType = params['Value'] elementNames = [elementName] # WARNING: checking all the DT that are ongoing or starting in given <hours> from now try: results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError: try: # Let's give it a second chance.. results = self.gClient.getStatus(element, name=elementNames, startingInHours=hours) except urllib2.URLError as e: return S_ERROR(e) if not results['OK']: return results results = results['Value'] if results is None: # no downtimes found return S_OK(None) # cleaning the Cache cleanRes = self._cleanCommand(element, elementNames) if not cleanRes['OK']: return cleanRes uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.iteritems(): dt = {} dt['Name'] = downDic.get('HOSTNAME', downDic.get('SITENAME')) if not dt['Name']: return S_ERROR("SITENAME and HOSTNAME are missing from downtime dictionary") dt['gOCDBServiceType'] = downDic.get('SERVICE_TYPE') if dt['gOCDBServiceType'] and gOCDBServiceType: if gOCDBServiceType.lower() != downDic['SERVICE_TYPE'].lower(): return S_ERROR("SERVICE_TYPE mismatch between GOCDB (%s) and CS (%s) for %s" % (gOCDBServiceType, downDic['SERVICE_TYPE'], dt['Name'])) dt['DowntimeID'] = downtime dt['Element'] = element dt['StartDate'] = downDic['FORMATED_START_DATE'] dt['EndDate'] = downDic['FORMATED_END_DATE'] dt['Severity'] = downDic['SEVERITY'] dt['Description'] = downDic['DESCRIPTION'].replace('\'', '') dt['Link'] = downDic['GOCDB_PORTAL_URL'] uniformResult.append(dt) storeRes = self._storeCommand(uniformResult) if not storeRes['OK']: return storeRes return S_OK() def doCache(self): ''' Method that reads the cache table and tries to read from it. It will return a list with one dictionary describing the DT if there are results. ''' params = self._prepareCommand() if not params['OK']: return params element, elementName, hours, gOCDBServiceType = params['Value'] result = self.rmClient.selectDowntimeCache(element=element, name=elementName, gOCDBServiceType=gOCDBServiceType) if not result['OK']: return result uniformResult = [dict(zip(result['Columns'], res)) for res in result['Value']] #'targetDate' can be either now or some 'hours' later in the future targetDate = datetime.utcnow() # dtOverlapping is a buffer to assure only one dt is returned # when there are overlapping outage/warning dt for same element # on top of the buffer we put the most recent outages # while at the bottom the most recent warnings, # assumption: uniformResult list is already ordered by resource/site name, severity, startdate dtOverlapping = [] if hours is not None: # IN THE FUTURE targetDate = targetDate + timedelta(hours=hours) # sorting by 'StartDate' b/c if we look for DTs in the future # then we are interested in the earliest DTs uniformResult.sort(key=itemgetter('Name', 'Severity', 'StartDate')) for dt in uniformResult: if (dt['StartDate'] < targetDate) and (dt['EndDate'] > targetDate): # the list is already ordered in a way that outages come first over warnings # and the earliest outages are on top of other outages and warnings # while the earliest warnings are on top of the other warnings # so what ever comes first in the list is also what we are looking for dtOverlapping = [dt] break else: # IN THE PRESENT # sorting by 'EndDate' b/c if we look for DTs in the present # then we are interested in those DTs that last longer uniformResult.sort(key=itemgetter('Name', 'Severity', 'EndDate')) for dt in uniformResult: if (dt['StartDate'] < targetDate) and (dt['EndDate'] > targetDate): # if outage, we put it on top of the overlapping buffer # i.e. the latest ending outage is on top if dt['Severity'].upper() == 'OUTAGE': dtOverlapping = [dt] + dtOverlapping # if warning, we put it at the bottom of the overlapping buffer # i.e. the latest ending warning is at the bottom elif dt['Severity'].upper() == 'WARNING': dtOverlapping.append(dt) result = None if dtOverlapping: dtTop = dtOverlapping[0] dtBottom = dtOverlapping[-1] if dtTop['Severity'].upper() == 'OUTAGE': result = dtTop else: result = dtBottom return S_OK(result) def doMaster(self): ''' Master method, which looks little bit spaghetti code, sorry ! - It gets all sites and transforms them into gocSites. - It gets all the storage elements and transforms them into their hosts - It gets the the CEs (FTS and file catalogs will come). ''' gocSites = CSHelpers.getGOCSites() if not gocSites['OK']: return gocSites gocSites = gocSites['Value'] sesHosts = CSHelpers.getStorageElementsHosts() if not sesHosts['OK']: return sesHosts sesHosts = sesHosts['Value'] resources = sesHosts ftsServer = getFTS3Servers() if ftsServer['OK']: resources.extend(ftsServer['Value']) # TODO: file catalogs need also to use their hosts #fc = CSHelpers.getFileCatalogs() # if fc[ 'OK' ]: # resources = resources + fc[ 'Value' ] ce = CSHelpers.getComputingElements() if ce['OK']: resources.extend(ce['Value']) self.log.verbose('Processing Sites: %s' % ', '.join(gocSites)) siteRes = self.doNew(('Site', gocSites)) if not siteRes['OK']: self.metrics['failed'].append(siteRes['Message']) self.log.verbose('Processing Resources: %s' % ', '.join(resources)) resourceRes = self.doNew(('Resource', resources)) if not resourceRes['OK']: self.metrics['failed'].append(resourceRes['Message']) return S_OK(self.metrics)
class DowntimeCommand( Command ): ''' Downtime "master" Command. ''' def __init__( self, args = None, clients = None ): super( DowntimeCommand, self ).__init__( args, clients ) if 'GOCDBClient' in self.apis: self.gClient = self.apis[ 'GOCDBClient' ] else: self.gClient = GOCDBClient() if 'ResourceManagementClient' in self.apis: self.rmClient = self.apis[ 'ResourceManagementClient' ] else: self.rmClient = ResourceManagementClient() def _storeCommand( self, result ): ''' Stores the results of doNew method on the database. ''' for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( downtimeID = dt[ 'DowntimeID' ], element = dt[ 'Element' ], name = dt[ 'Name' ], startDate = dt[ 'StartDate' ], endDate = dt[ 'EndDate' ], severity = dt[ 'Severity' ], description = dt[ 'Description' ], link = dt[ 'Link' ], gocdbServiceType = dt[ 'GOCDBServiceType' ] ) if not resQuery[ 'OK' ]: return resQuery return S_OK() def _prepareCommand( self ): ''' DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR( '"name" not found in self.args' ) elementName = self.args[ 'name' ] if 'element' not in self.args: return S_ERROR( '"element" not found in self.args' ) element = self.args[ 'element' ] if 'elementType' not in self.args: return S_ERROR( '"elementType" not found in self.args' ) elementType = self.args[ 'elementType' ] if not element in [ 'Site', 'Resource' ]: return S_ERROR( 'element is not Site nor Resource' ) hours = None if 'hours' in self.args: hours = self.args[ 'hours' ] gocdbServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName( elementName ) if not gocSite[ 'OK' ]: return gocSite elementName = gocSite[ 'Value' ] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # We need to distinguish if it's tape or disk if getStorageElementOptions( elementName )['Value']['TapeSE']: gocdbServiceType = "srm.nearline" elif getStorageElementOptions( elementName )['Value']['DiskSE']: gocdbServiceType = "srm" seHost = CSHelpers.getSEHost( elementName ) if not seHost: return S_ERROR( 'No seHost for %s' % elementName ) elementName = seHost return S_OK( ( element, elementName, hours, gocdbServiceType ) ) def doNew( self, masterParams = None ): ''' Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. ''' if masterParams is not None: element, elementNames = masterParams hours = None elementName = None gocdbServiceType = None else: params = self._prepareCommand() if not params[ 'OK' ]: return params element, elementName, hours, gocdbServiceType = params[ 'Value' ] elementNames = [ elementName ] startDate = datetime.utcnow() - timedelta( days = 14 ) try: results = self.gClient.getStatus( element, elementName, startDate, 120 ) except urllib2.URLError: try: #Let's give it a second chance.. results = self.gClient.getStatus( element, elementName, startDate, 120 ) except urllib2.URLError, e: return S_ERROR( e ) if not results[ 'OK' ]: return results results = results[ 'Value' ] if results is None: return S_OK( None ) uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if gocdbServiceType and downDic[ 'SERVICE_TYPE' ]: if gocdbServiceType.lower() != downDic[ 'SERVICE_TYPE' ].lower(): continue if element == 'Resource': dt[ 'Name' ] = downDic[ 'HOSTNAME' ] else: dt[ 'Name' ] = downDic[ 'SITENAME' ] if not dt[ 'Name' ] in elementNames: continue dt[ 'DowntimeID' ] = downtime dt[ 'Element' ] = element dt[ 'StartDate' ] = downDic[ 'FORMATED_START_DATE' ] dt[ 'EndDate' ] = downDic[ 'FORMATED_END_DATE' ] dt[ 'Severity' ] = downDic[ 'SEVERITY' ] dt[ 'Description' ] = downDic[ 'DESCRIPTION' ].replace( '\'', '' ) dt[ 'Link' ] = downDic[ 'GOCDB_PORTAL_URL' ] try: dt[ 'GOCDBServiceType' ] = downDic[ 'SERVICE_TYPE' ] except KeyError: # SERVICE_TYPE is not always defined pass uniformResult.append( dt ) storeRes = self._storeCommand( uniformResult ) if not storeRes[ 'OK' ]: return storeRes # We return only one downtime, if its ongoing at dtDate startDate = datetime.utcnow() if hours: startDate = startDate + timedelta( hours = hours ) endDate = startDate result = None dtOutages = [] dtWarnings = [] for dt in uniformResult: if ( dt[ 'StartDate' ] < str( startDate ) ) and ( dt[ 'EndDate' ] > str( endDate ) ): if dt[ 'Severity' ] == 'Outage': dtOutages.append( dt ) else: dtWarnings.append( dt ) #In case many overlapping downtimes have been declared, the first one in #severity and then time order will be selected. We want to get the latest one #( they are sorted by insertion time ) if len( dtOutages ) > 0: result = dtOutages[-1] elif len( dtWarnings ) > 0: result = dtWarnings[-1] return S_OK( result )
class DowntimeCommand(Command): """ Downtime "master" Command. """ def __init__(self, args=None, clients=None): super(DowntimeCommand, self).__init__(args, clients) if "GOCDBClient" in self.apis: self.gClient = self.apis["GOCDBClient"] else: self.gClient = GOCDBClient() if "ResourceManagementClient" in self.apis: self.rmClient = self.apis["ResourceManagementClient"] else: self.rmClient = ResourceManagementClient() def _storeCommand(self, result): """ Stores the results of doNew method on the database. """ for dt in result: resQuery = self.rmClient.addOrModifyDowntimeCache( dt["DowntimeID"], dt["Element"], dt["Name"], dt["StartDate"], dt["EndDate"], dt["Severity"], dt["Description"], dt["Link"], ) if not resQuery["OK"]: return resQuery return S_OK() def _prepareCommand(self): """ DowntimeCommand requires three arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. """ if "name" not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args["name"] if "element" not in self.args: return S_ERROR('"element" not found in self.args') element = self.args["element"] if "elementType" not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args["elementType"] if not element in ["Site", "Resource"]: return S_ERROR("element is not Site nor Resource") hours = None if "hours" in self.args: hours = self.args["hours"] # Transform DIRAC site names into GOCDB topics if element == "Site": gocSite = getGOCSiteName(elementName) if not gocSite["OK"]: return gocSite elementName = gocSite["Value"] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == "StorageElement": seHost = CSHelpers.getSEHost(elementName) if not seHost: return S_ERROR("No seHost for %s" % elementName) elementName = seHost return S_OK((element, elementName, hours)) def doNew(self, masterParams=None): """ Gets the parameters to run, either from the master method or from its own arguments. For every elementName, unless it is given a list, in which case it contacts the gocdb client. The server is not very stable, so in case of failure tries a second time. If there are downtimes, are recorded and then returned. """ if masterParams is not None: element, elementNames = masterParams hours = None elementName = None else: params = self._prepareCommand() if not params["OK"]: return params element, elementName, hours = params["Value"] elementNames = [elementName] startDate = datetime.utcnow() - timedelta(days=14) try: results = self.gClient.getStatus(element, elementName, startDate, 120) except urllib2.URLError: try: # Let's give it a second chance.. results = self.gClient.getStatus(element, elementName, startDate, 120) except urllib2.URLError, e: return S_ERROR(e) if not results["OK"]: return results results = results["Value"] if results is None: return S_OK(None) uniformResult = [] # Humanize the results into a dictionary, not the most optimal, but readable for downtime, downDic in results.items(): dt = {} if element == "Resource": dt["Name"] = downDic["HOSTNAME"] else: dt["Name"] = downDic["SITENAME"] if not dt["Name"] in elementNames: continue dt["DowntimeID"] = downtime dt["Element"] = element dt["StartDate"] = downDic["FORMATED_START_DATE"] dt["EndDate"] = downDic["FORMATED_END_DATE"] dt["Severity"] = downDic["SEVERITY"] dt["Description"] = downDic["DESCRIPTION"].replace("'", "") dt["Link"] = downDic["GOCDB_PORTAL_URL"] uniformResult.append(dt) storeRes = self._storeCommand(uniformResult) if not storeRes["OK"]: return storeRes # We return only one downtime, if its ongoind at dtDate startDate = datetime.utcnow() endDate = startDate if hours: startDate = startDate + timedelta(hours=hours) result = None for dt in uniformResult: if (dt["StartDate"] < str(startDate)) and (dt["EndDate"] > str(endDate)): result = dt # We want to take the latest one ( they are sorted by insertion time ) # break return S_OK(result)
def setUp(self): self.mockRSS = mock.MagicMock() self.GOCCli = GOCDBClient()
def doCommand(self, sites=None): """ Returns downtimes information for all the sites in input. :params: :attr:`sites`: list of site names (when not given, take every site) :returns: {'SiteName': {'SEVERITY': 'OUTAGE'|'AT_RISK', 'StartDate': 'aDate', ...} ... } """ if self.client is None: from DIRAC.Core.LCG.GOCDBClient import GOCDBClient self.client = GOCDBClient() if sites is None: # from DIRAC.Core.DISET.RPCClient import RPCClient RPC = RPCClient("ResourceStatus/ResourceStatus") GOC_sites = RPC.getGridSitesList() if not GOC_sites['OK']: raise RSSException, where( self, self.doCommand) + " " + sites['Message'] else: GOC_sites = GOC_sites['Value'] else: GOC_sites = [getGOCSiteName(x)['Value'] for x in sites] try: res = self.client.getStatus('Site', GOC_sites, None, 120) except: gLogger.exception("Exception when calling GOCDBClient.") return {} if not res['OK']: raise RSSException, where(self, self.doCommand) + " " + res['Message'] else: res = res['Value'] if res == None: return {} resToReturn = {} for dt_ID in res: try: dt = {} dt['ID'] = dt_ID dt['StartDate'] = res[dt_ID]['FORMATED_START_DATE'] dt['EndDate'] = res[dt_ID]['FORMATED_END_DATE'] dt['Severity'] = res[dt_ID]['SEVERITY'] dt['Description'] = res[dt_ID]['DESCRIPTION'].replace('\'', '') dt['Link'] = res[dt_ID]['GOCDB_PORTAL_URL'] DIRACnames = getDIRACSiteName(res[dt_ID]['SITENAME']) if not DIRACnames['OK']: raise RSSException, DIRACnames['Message'] DIRACnames = DIRACnames['Value'] for DIRACname in DIRACnames: resToReturn[dt_ID.split()[0] + ' ' + DIRACname] = dt except KeyError: continue return resToReturn