def _getSEList( self, SEType = 'ProductionOutputs', DataType = 'SimtelProd' ): """ get from CS the list of available SE for data upload """ opsHelper = Operations() optionName = os.path.join( SEType, DataType ) SEList = opsHelper.getValue( optionName , [] ) SEList = List.randomize( SEList ) DIRAC.gLogger.notice( 'List of %s SE: %s ' % ( SEType, SEList ) ) # # Check if the local SE is in the list. If yes try it first by reversing list order localSEList = [] res = getSEsForSite( DIRAC.siteName() ) if res['OK']: localSEList = res['Value'] retainedlocalSEList = [] for localSE in localSEList: if localSE in SEList: DIRAC.gLogger.notice( 'The local Storage Element is an available SE: ', localSE ) retainedlocalSEList.append( localSE ) SEList.remove( localSE ) SEList = retainedlocalSEList + SEList if len( SEList ) == 0: return DIRAC.S_ERROR( 'Error in building SEList' ) return DIRAC.S_OK( SEList )
def _ByShare(self, shareType='CPU'): """ first get the shares from the CS, and then makes the grouping looking at the history """ res = self._getShares(shareType, normalise=True) if not res['OK']: return res cpuShares = res['Value'] self.util.logInfo("Obtained the following target shares (%):") for site in sorted(cpuShares.keys()): self.util.logInfo("%s: %.1f" % (site.ljust(15), cpuShares[site])) # Get the existing destinations from the transformationDB res = self.util.getExistingCounters(requestedSites=cpuShares.keys()) if not res['OK']: self.util.logError("Failed to get existing file share", res['Message']) return res existingCount = res['Value'] if existingCount: self.util.logInfo("Existing site utilization (%):") normalisedExistingCount = self.util._normaliseShares( existingCount.copy()) for se in sorted(normalisedExistingCount.keys()): self.util.logInfo("%s: %.1f" % (se.ljust(15), normalisedExistingCount[se])) # Group the input files by their existing replicas res = self.util.groupByReplicas(self.data, self.params['Status']) if not res['OK']: return res replicaGroups = res['Value'] tasks = [] # For the replica groups for replicaSE, lfns in replicaGroups: possibleSEs = replicaSE.split(',') # Determine the next site based on requested shares, existing usage and candidate sites res = self._getNextSite( existingCount, cpuShares, candidates=self._getSitesForSEs(possibleSEs)) if not res['OK']: self.util.logError("Failed to get next destination SE", res['Message']) continue targetSite = res['Value'] # Resolve the ses for the target site res = getSEsForSite(targetSite) if not res['OK']: continue ses = res['Value'] # Determine the selected SE and create the task for chosenSE in ses: if chosenSE in possibleSEs: tasks.append((chosenSE, lfns)) if not existingCount.has_key(targetSite): existingCount[targetSite] = 0 existingCount[targetSite] += len(lfns) return S_OK(tasks)
def determineSeFromSite(): siteName = DIRAC.siteName() SEname = SeSiteMap.get(siteName, '') if not SEname: result = getSEsForSite(siteName) if result['OK'] and result['Value']: SEname = result['Value'][0] return SEname
def determineSeFromSite(): siteName = DIRAC.siteName() SEname = SeSiteMap.get(siteName, "") if not SEname: result = getSEsForSite(siteName) if result["OK"] and result["Value"]: SEname = result["Value"][0] return SEname
def __updateSharedSESites( self, jobState, stageSite, stagedLFNs, opData ): siteCandidates = opData[ 'SiteCandidates' ] seStatus = {} result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption( 'VirtualOrganization' ) for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.verbose( "Checking %s for shared SEs" % siteName ) siteData = siteCandidates[ siteName ] result = getSEsForSite( siteName ) if not result[ 'OK' ]: continue closeSEs = result[ 'Value' ] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seObj = StorageElement( seName, vo = vo ) result = seObj.getStatus() if not result['OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) continue seStatus[ seName ] = result[ 'Value' ] # get the SE status from mem and add it if its disk status = seStatus[ seName ] if status['Read'] and status['DiskSE']: diskSEs.append( seName ) self.jobLog.verbose( "Disk SEs for %s are %s" % ( siteName, ", ".join( diskSEs ) ) ) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData['Value']['Value']['Successful'] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[ seName ]: self.jobLog.verbose( "Checking %s for %s" % ( seName, lfn ) ) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[ lfn ]: if siteSE in diskSEs: self.jobLog.verbose( "%s on disk for %s" % ( lfn, siteSE ) ) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose( "Setting LFN to disk for %s" % ( seName ) ) siteData[ 'disk' ] += 1 siteData[ 'tape' ] -= 1 return S_OK()
def isLocalSE( self ): """ Test if the Storage Element is local in the current context """ import DIRAC gLogger.verbose( "StorageElement.isLocalSE: Determining whether %s is a local SE." % self.name ) localSEs = getSEsForSite( DIRAC.siteName() )['Value'] if self.name in localSEs: return S_OK( True ) else: return S_OK( False )
def isLocalSE( self ): """ Test if the Storage Element is local in the current context """ import DIRAC self.log.verbose( "isLocalSE: Determining whether %s is a local SE." % self.name ) localSEs = getSEsForSite( DIRAC.siteName() )['Value'] if self.name in localSEs: return S_OK( True ) else: return S_OK( False )
def __isLocalSE( self ): """ Test if the Storage Element is local in the current context """ self.log.getSubLogger( 'LocalSE' ).verbose( "Determining whether %s is a local SE." % self.name ) import DIRAC localSEs = getSEsForSite( DIRAC.siteName() )['Value'] if self.name in localSEs: return S_OK( True ) else: return S_OK( False )
def getSiteSE(SEname): sitename = DIRAC.siteName() DIRAC.gLogger.error('Sitename: %s' % (sitename)) print "sitename", sitename res = getSEsForSite(sitename) if not res['OK']: DIRAC.gLogger.error(res['Message']) return SEname if res['Value']: SEname = res['Value'][0] return SEname
def __updateSharedSESites(self, jobState, stageSite, stagedLFNs, opData): siteCandidates = opData["SiteCandidates"] seStatus = {} for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.verbose("Checking %s for shared SEs" % siteName) siteData = siteCandidates[siteName] result = getSEsForSite(siteName) if not result["OK"]: continue closeSEs = result["Value"] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seObj = StorageElement(seName) result = seObj.getStatus() if not result["OK"]: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result["Message"])) continue seStatus[seName] = result["Value"] # get the SE status from mem and add it if its disk status = seStatus[seName] if status["Read"] and status["DiskSE"]: diskSEs.append(seName) self.jobLog.verbose("Disk SEs for %s are %s" % (siteName, ", ".join(diskSEs))) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData["Value"]["Value"]["Successful"] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[seName]: self.jobLog.verbose("Checking %s for %s" % (seName, lfn)) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[lfn]: if siteSE in diskSEs: self.jobLog.verbose("%s on disk for %s" % (lfn, siteSE)) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose("Setting LFN to disk for %s" % (seName)) siteData["disk"] += 1 siteData["tape"] -= 1 return S_OK()
def __isLocalSE(self): """ Test if the Storage Element is local in the current context """ self.log.getSubLogger("LocalSE").verbose("Determining whether %s is a local SE." % self.name) import DIRAC localSEs = getSEsForSite(DIRAC.siteName())["Value"] if self.name in localSEs: return S_OK(True) else: return S_OK(False)
def __getStagingSites(self,stagingSite,destinationSites): """ Get a list of sites where the staged data will be available """ result = getSEsForSite(stagingSite) if not result['OK']: return result stagingSEs = result['Value'] stagingSites = [stagingSite] for s in destinationSites: if s != stagingSite: result = getSEsForSite(s) if not result['OK']: continue for se in result['Value']: if se in stagingSEs: stagingSites.append(s) break stagingSites.sort() return S_OK(stagingSites)
def __getStagingSites(self, stagingSite, destinationSites): """ Get a list of sites where the staged data will be available """ result = getSEsForSite(stagingSite) if not result['OK']: return result stagingSEs = result['Value'] stagingSites = [stagingSite] for s in destinationSites: if s != stagingSite: result = getSEsForSite(s) if not result['OK']: continue for se in result['Value']: if se in stagingSEs: stagingSites.append(s) break stagingSites.sort() return S_OK(stagingSites)
def _ByShare( self, shareType = 'CPU' ): """ first get the shares from the CS, and then makes the grouping looking at the history """ res = self._getShares( shareType, normalise = True ) if not res['OK']: return res cpuShares = res['Value'] self.util.logInfo( "Obtained the following target shares (%):" ) for site in sorted( cpuShares.keys() ): self.util.logInfo( "%s: %.1f" % ( site.ljust( 15 ), cpuShares[site] ) ) # Get the existing destinations from the transformationDB res = self.util.getExistingCounters( requestedSites = cpuShares.keys() ) if not res['OK']: self.util.logError( "Failed to get existing file share", res['Message'] ) return res existingCount = res['Value'] if existingCount: self.util.logInfo( "Existing site utilization (%):" ) normalisedExistingCount = self.util._normaliseShares( existingCount.copy() ) for se in sorted( normalisedExistingCount.keys() ): self.util.logInfo( "%s: %.1f" % ( se.ljust( 15 ), normalisedExistingCount[se] ) ) # Group the input files by their existing replicas res = self.util.groupByReplicas( self.data, self.params['Status'] ) if not res['OK']: return res replicaGroups = res['Value'] tasks = [] # For the replica groups for replicaSE, lfns in replicaGroups: possibleSEs = replicaSE.split( ',' ) # Determine the next site based on requested shares, existing usage and candidate sites res = self._getNextSite( existingCount, cpuShares, candidates = self._getSitesForSEs( possibleSEs ) ) if not res['OK']: self.util.logError( "Failed to get next destination SE", res['Message'] ) continue targetSite = res['Value'] # Resolve the ses for the target site res = getSEsForSite( targetSite ) if not res['OK']: continue ses = res['Value'] # Determine the selected SE and create the task for chosenSE in ses: if chosenSE in possibleSEs: tasks.append( ( chosenSE, lfns ) ) if not existingCount.has_key( targetSite ): existingCount[targetSite] = 0 existingCount[targetSite] += len( lfns ) return S_OK( tasks )
def __updateSharedSESites(self, jobManifest, stageSite, stagedLFNs, opData): siteCandidates = opData["SiteCandidates"] seStatus = {} vo = jobManifest.getOption("VirtualOrganization") for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.debug("Checking %s for shared SEs" % siteName) siteData = siteCandidates[siteName] result = getSEsForSite(siteName) if not result["OK"]: continue closeSEs = result["Value"] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seStatus[seName] = StorageElement(seName, vo=vo).status() # get the SE status from mem and add it if its disk status = seStatus[seName] if status["Read"] and status["DiskSE"]: diskSEs.append(seName) self.jobLog.debug("Disk SEs for %s are %s" % (siteName, ", ".join(diskSEs))) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData["Value"]["Value"]["Successful"] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[seName]: self.jobLog.debug("Checking %s for %s" % (seName, lfn)) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[lfn]: if siteSE in diskSEs: self.jobLog.verbose("lfn on disk", ": %s at %s" % (lfn, siteSE)) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose("Setting LFN to disk", "for %s" % seName) siteData["disk"] += 1 siteData["tape"] -= 1
def upload_to_seList(FileLFN, FileName): DIRAC.gLogger.notice('Put and register in LFC and DFC:', FileLFN) from DIRAC.Interfaces.API.Dirac import Dirac from DIRAC.Core.Utilities.SiteSEMapping import getSEsForSite result = getSEsForSite(DIRAC.siteName()) if result['OK']: localSEs = result['Value'] dirac = Dirac() upload_result = 'NOTOK' failing_se = [] for se in localSEs: if se in seList: DIRAC.gLogger.notice('Local SE is in the list:', se) ret = dirac.addFile(FileLFN, FileName, se) res = CheckCatalogCoherence(FileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error( 'Job failed: Catalog Coherence problem found') DIRAC.gLogger.notice('Failing SE:', se) failing_se.append(se) continue upload_result = 'OK' if upload_result != 'OK': for se in seList: DIRAC.gLogger.notice('Try upload to:', se) ret = dirac.addFile(FileLFN, FileName, se) res = CheckCatalogCoherence(FileLFN) if res != DIRAC.S_OK: DIRAC.gLogger.error( 'Job failed: Catalog Coherence problem found') failing_se.append(se) DIRAC.gLogger.notice('Failing SE:', se) continue upload_result = 'OK' break DIRAC.gLogger.notice('Failing SE list:', failing_se) #for se in failing_se: # seList.remove(se) # DIRAC.gLogger.notice('Failing SE list:',failing_se) if upload_result != 'OK': return DIRAC.S_ERROR return DIRAC.S_OK
def __updateOtherSites(self, job, stagingSite, stagedLFNsPerSE, optInfo): """ Update Optimizer Info for other sites for which the SE on which we have staged Files are declared local """ updated = False seDict = {} for site, siteDict in optInfo['SiteCandidates'].items(): if stagingSite == site: continue closeSEs = getSEsForSite(site) if not closeSEs['OK']: continue closeSEs = closeSEs['Value'] siteDiskSEs = [] for se in closeSEs: if se not in seDict: try: storageElement = StorageElement(se) seDict[se] = storageElement.getStatus()['Value'] except Exception: self.log.exception( 'Failed to instantiate StorageElement( %s )' % se) continue seStatus = seDict[se] if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append(se) for lfn, replicas in optInfo['Value']['Value']['Successful'].items( ): for stageSE, stageLFNs in stagedLFNsPerSE.items(): if lfn in stageLFNs and stageSE in closeSEs: # The LFN has been staged, we need to check now if this SE is close # to the Site and if the LFN was not already on a Disk SE at the Site isOnDisk = False for se in replicas: if se in siteDiskSEs: isOnDisk = True if not isOnDisk: # This is updating optInfo updated = True siteDict['disk'] += 1 siteDict['tape'] -= 1 break if updated: self.log.verbose( 'Updating %s Optimizer Info for Job %s:' % (self.dataAgentName, job), optInfo) self.setOptimizerJobInfo(job, self.dataAgentName, optInfo)
def __updateOtherSites( self, job, stagingSite, stagedLFNsPerSE, optInfo ): """ Update Optimizer Info for other sites for which the SE on which we have staged Files are declared local """ updated = False seDict = {} for site, siteDict in optInfo['SiteCandidates'].items(): if stagingSite == site: continue closeSEs = getSEsForSite( site ) if not closeSEs['OK']: continue closeSEs = closeSEs['Value'] siteDiskSEs = [] for se in closeSEs: if se not in seDict: try: storageElement = StorageElement( se ) seDict[se] = storageElement.getStatus()['Value'] except Exception: self.log.exception( 'Failed to instantiate StorageElement( %s )' % se ) continue seStatus = seDict[se] if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append( se ) for lfn, replicas in optInfo['Value']['Value']['Successful'].items(): for stageSE, stageLFNs in stagedLFNsPerSE.items(): if lfn in stageLFNs and stageSE in closeSEs: # The LFN has been staged, we need to check now if this SE is close # to the Site and if the LFN was not already on a Disk SE at the Site isOnDisk = False for se in replicas: if se in siteDiskSEs: isOnDisk = True if not isOnDisk: # This is updating optInfo updated = True siteDict['disk'] += 1 siteDict['tape'] -= 1 break if updated: self.log.verbose( 'Updating %s Optimizer Info for Job %s:' % ( self.dataAgentName, job ), optInfo ) self.setOptimizerJobInfo( job, self.dataAgentName, optInfo )
def __updateOtherSites(self, job, stagingSite, stagedLFNsPerSE, optInfo): """ Update Optimizer Info for other sites for which the SE on which we have staged Files are declared local """ updated = False for site, siteDict in optInfo["SiteCandidates"].items(): if stagingSite == site: continue closeSEs = getSEsForSite(site) if not closeSEs["OK"]: continue closeSEs = closeSEs["Value"] siteDiskSEs = [] for se in closeSEs: storageElement = StorageElement(se) seStatus = storageElement.getStatus()["Value"] if seStatus["Read"] and seStatus["DiskSE"]: siteDiskSEs.append(se) for lfn, replicas in optInfo["Value"]["Value"]["Successful"].items(): for stageSE, stageLFNs in stagedLFNsPerSE.items(): if lfn in stageLFNs and stageSE in closeSEs: # The LFN has been staged, we need to check now if this SE is close # to the Site and if the LFN was not already on a Disk SE at the Site isOnDisk = False for se in replicas: if se in siteDiskSEs: isOnDisk = True if not isOnDisk: # This is updating optInfo updated = True siteDict["disk"] += 1 siteDict["tape"] -= 1 break if updated: self.setOptimizerJobInfo(job, self.dataAgentName, optInfo)
def __preRequestStaging( self, jobState, stageSite, opData ): result = getSEsForSite( stageSite ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: se = StorageElement( seName ) result = se.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) #I swear this is horrible DM code it's not mine. #Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[ lfn ] #Check SEs seStage = [] for seName in replicas: if seName in diskSEs: #This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: #This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) #Check if any LFN is in more than one SE #If that's the case, try to stage from the SE that has more LFNs to stage to group the request #1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False #2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: #3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True #4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) return stageLFNs
def runDiracConfigure(params): Script.registerSwitch("S:", "Setup=", "Set <setup> as DIRAC setup", params.setSetup) Script.registerSwitch("e:", "Extensions=", "Set <extensions> as DIRAC extensions", params.setExtensions) Script.registerSwitch("C:", "ConfigurationServer=", "Set <server> as DIRAC configuration server", params.setServer) Script.registerSwitch("I", "IncludeAllServers", "include all Configuration Servers", params.setAllServers) Script.registerSwitch("n:", "SiteName=", "Set <sitename> as DIRAC Site Name", params.setSiteName) Script.registerSwitch("N:", "CEName=", "Determiner <sitename> from <cename>", params.setCEName) Script.registerSwitch("V:", "VO=", "Set the VO name", params.setVO) Script.registerSwitch("W:", "gateway=", "Configure <gateway> as DIRAC Gateway for the site", params.setGateway) Script.registerSwitch("U", "UseServerCertificate", "Configure to use Server Certificate", params.setServerCert) Script.registerSwitch("H", "SkipCAChecks", "Configure to skip check of CAs", params.setSkipCAChecks) Script.registerSwitch("D", "SkipCADownload", "Configure to skip download of CAs", params.setSkipCADownload) Script.registerSwitch( "M", "SkipVOMSDownload", "Configure to skip download of VOMS info", params.setSkipVOMSDownload ) Script.registerSwitch("v", "UseVersionsDir", "Use versions directory", params.setUseVersionsDir) Script.registerSwitch("A:", "Architecture=", "Configure /Architecture=<architecture>", params.setArchitecture) Script.registerSwitch("L:", "LocalSE=", "Configure LocalSite/LocalSE=<localse>", params.setLocalSE) Script.registerSwitch( "F", "ForceUpdate", "Force Update of cfg file (i.e. dirac.cfg) (otherwise nothing happens if dirac.cfg already exists)", params.forceUpdate, ) Script.registerSwitch("O:", "output=", "output configuration file", params.setOutput) Script.parseCommandLine(ignoreErrors=True) if not params.logLevel: params.logLevel = DIRAC.gConfig.getValue(cfgInstallPath("LogLevel"), "") if params.logLevel: DIRAC.gLogger.setLevel(params.logLevel) else: DIRAC.gConfig.setOptionValue(cfgInstallPath("LogLevel"), params.logLevel) if not params.gatewayServer: newGatewayServer = DIRAC.gConfig.getValue(cfgInstallPath("Gateway"), "") if newGatewayServer: params.setGateway(newGatewayServer) if not params.configurationServer: newConfigurationServer = DIRAC.gConfig.getValue(cfgInstallPath("ConfigurationServer"), "") if newConfigurationServer: params.setServer(newConfigurationServer) if not params.includeAllServers: newIncludeAllServer = DIRAC.gConfig.getValue(cfgInstallPath("IncludeAllServers"), False) if newIncludeAllServer: params.setAllServers(True) if not params.setup: newSetup = DIRAC.gConfig.getValue(cfgInstallPath("Setup"), "") if newSetup: params.setSetup(newSetup) if not params.siteName: newSiteName = DIRAC.gConfig.getValue(cfgInstallPath("SiteName"), "") if newSiteName: params.setSiteName(newSiteName) if not params.ceName: newCEName = DIRAC.gConfig.getValue(cfgInstallPath("CEName"), "") if newCEName: params.setCEName(newCEName) if not params.useServerCert: newUserServerCert = DIRAC.gConfig.getValue(cfgInstallPath("UseServerCertificate"), False) if newUserServerCert: params.setServerCert(newUserServerCert) if not params.skipCAChecks: newSkipCAChecks = DIRAC.gConfig.getValue(cfgInstallPath("SkipCAChecks"), False) if newSkipCAChecks: params.setSkipCAChecks(newSkipCAChecks) if not params.skipCADownload: newSkipCADownload = DIRAC.gConfig.getValue(cfgInstallPath("SkipCADownload"), False) if newSkipCADownload: params.setSkipCADownload(newSkipCADownload) if not params.useVersionsDir: newUseVersionsDir = DIRAC.gConfig.getValue(cfgInstallPath("UseVersionsDir"), False) if newUseVersionsDir: params.setUseVersionsDir(newUseVersionsDir) # Set proper Defaults in configuration (even if they will be properly overwrite by gComponentInstaller instancePath = os.path.dirname(os.path.dirname(DIRAC.rootPath)) rootPath = os.path.join(instancePath, "pro") DIRAC.gConfig.setOptionValue(cfgInstallPath("InstancePath"), instancePath) DIRAC.gConfig.setOptionValue(cfgInstallPath("RootPath"), rootPath) if not params.architecture: newArchitecture = DIRAC.gConfig.getValue(cfgInstallPath("Architecture"), "") if newArchitecture: params.setArchitecture(newArchitecture) if not params.vo: newVO = DIRAC.gConfig.getValue(cfgInstallPath("VirtualOrganization"), "") if newVO: params.setVO(newVO) if not params.extensions: newExtensions = DIRAC.gConfig.getValue(cfgInstallPath("Extensions"), "") if newExtensions: params.setExtensions(newExtensions) DIRAC.gLogger.notice("Executing: %s " % (" ".join(sys.argv))) DIRAC.gLogger.notice('Checking DIRAC installation at "%s"' % DIRAC.rootPath) if params.update: if params.outputFile: DIRAC.gLogger.notice("Will update the output file %s" % params.outputFile) else: DIRAC.gLogger.notice("Will update %s" % DIRAC.gConfig.diracConfigFilePath) if params.setup: DIRAC.gLogger.verbose("/DIRAC/Setup =", params.setup) if params.vo: DIRAC.gLogger.verbose("/DIRAC/VirtualOrganization =", params.vo) if params.configurationServer: DIRAC.gLogger.verbose("/DIRAC/Configuration/Servers =", params.configurationServer) if params.siteName: DIRAC.gLogger.verbose("/LocalSite/Site =", params.siteName) if params.architecture: DIRAC.gLogger.verbose("/LocalSite/Architecture =", params.architecture) if params.localSE: DIRAC.gLogger.verbose("/LocalSite/localSE =", params.localSE) if not params.useServerCert: DIRAC.gLogger.verbose("/DIRAC/Security/UseServerCertificate =", "no") # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "no") else: DIRAC.gLogger.verbose("/DIRAC/Security/UseServerCertificate =", "yes") # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "yes") host = DIRAC.gConfig.getValue(cfgInstallPath("Host"), "") if host: DIRAC.gConfig.setOptionValue(cfgPath("DIRAC", "Hostname"), host) if params.skipCAChecks: DIRAC.gLogger.verbose("/DIRAC/Security/SkipCAChecks =", "yes") # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") Script.localCfg.addDefaultEntry("/DIRAC/Security/SkipCAChecks", "yes") else: # Necessary to allow initial download of CA's if not params.skipCADownload: DIRAC.gConfig.setOptionValue("/DIRAC/Security/SkipCAChecks", "yes") if not params.skipCADownload: Script.enableCS() try: dirName = os.path.join(DIRAC.rootPath, "etc", "grid-security", "certificates") mkDir(dirName) except Exception: DIRAC.gLogger.exception() DIRAC.gLogger.fatal("Fail to create directory:", dirName) DIRAC.exit(-1) try: bdc = BundleDeliveryClient() result = bdc.syncCAs() if result["OK"]: result = bdc.syncCRLs() except Exception as e: DIRAC.gLogger.error("Failed to sync CAs and CRLs: %s" % str(e)) Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") if params.ceName or params.siteName: # This is used in the pilot context, we should have a proxy, or a certificate, and access to CS if params.useServerCert: # Being sure it was not there before Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "yes") Script.enableCS() # Get the site resource section gridSections = DIRAC.gConfig.getSections("/Resources/Sites/") if not gridSections["OK"]: DIRAC.gLogger.warn("Could not get grid sections list") grids = [] else: grids = gridSections["Value"] # try to get siteName from ceName or Local SE from siteName using Remote Configuration for grid in grids: siteSections = DIRAC.gConfig.getSections("/Resources/Sites/%s/" % grid) if not siteSections["OK"]: DIRAC.gLogger.warn("Could not get %s site list" % grid) sites = [] else: sites = siteSections["Value"] if not params.siteName: if params.ceName: for site in sites: res = DIRAC.gConfig.getSections("/Resources/Sites/%s/%s/CEs/" % (grid, site), []) if not res["OK"]: DIRAC.gLogger.warn("Could not get %s CEs list" % site) if params.ceName in res["Value"]: params.siteName = site break if params.siteName: DIRAC.gLogger.notice("Setting /LocalSite/Site = %s" % params.siteName) Script.localCfg.addDefaultEntry("/LocalSite/Site", params.siteName) DIRAC.__siteName = False if params.ceName: DIRAC.gLogger.notice("Setting /LocalSite/GridCE = %s" % params.ceName) Script.localCfg.addDefaultEntry("/LocalSite/GridCE", params.ceName) if not params.localSE and params.siteName in sites: params.localSE = getSEsForSite(params.siteName) if params.localSE["OK"] and params.localSE["Value"]: params.localSE = ",".join(params.localSE["Value"]) DIRAC.gLogger.notice("Setting /LocalSite/LocalSE =", params.localSE) Script.localCfg.addDefaultEntry("/LocalSite/LocalSE", params.localSE) break if params.gatewayServer: DIRAC.gLogger.verbose("/DIRAC/Gateways/%s =" % DIRAC.siteName(), params.gatewayServer) Script.localCfg.addDefaultEntry("/DIRAC/Gateways/%s" % DIRAC.siteName(), params.gatewayServer) # Create the local cfg if it is not yet there if not params.outputFile: params.outputFile = DIRAC.gConfig.diracConfigFilePath params.outputFile = os.path.abspath(params.outputFile) if not os.path.exists(params.outputFile): configDir = os.path.dirname(params.outputFile) mkDir(configDir) params.update = True DIRAC.gConfig.dumpLocalCFGToFile(params.outputFile) if params.includeAllServers: # We need user proxy or server certificate to continue in order to get all the CS URLs if not params.useServerCert: Script.enableCS() result = getProxyInfo() if not result["OK"]: DIRAC.gLogger.notice("Configuration is not completed because no user proxy is available") DIRAC.gLogger.notice("Create one using dirac-proxy-init and execute again with -F option") return 1 else: Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.addDefaultEntry("/DIRAC/Security/UseServerCertificate", "yes") Script.enableCS() DIRAC.gConfig.setOptionValue("/DIRAC/Configuration/Servers", ",".join(DIRAC.gConfig.getServersList())) DIRAC.gLogger.verbose("/DIRAC/Configuration/Servers =", ",".join(DIRAC.gConfig.getServersList())) if params.useServerCert: # always removing before dumping Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") Script.localCfg.deleteOption("/DIRAC/Security/SkipVOMSDownload") if params.update: DIRAC.gConfig.dumpLocalCFGToFile(params.outputFile) # ## LAST PART: do the vomsdir/vomses magic # This has to be done for all VOs in the installation if params.skipVOMSDownload: return 0 result = Registry.getVOMSServerInfo() if not result["OK"]: return 1 error = "" vomsDict = result["Value"] for vo in vomsDict: voName = vomsDict[vo]["VOMSName"] vomsDirPath = os.path.join(DIRAC.rootPath, "etc", "grid-security", "vomsdir", voName) vomsesDirPath = os.path.join(DIRAC.rootPath, "etc", "grid-security", "vomses") for path in (vomsDirPath, vomsesDirPath): mkDir(path) vomsesLines = [] for vomsHost in vomsDict[vo].get("Servers", {}): hostFilePath = os.path.join(vomsDirPath, "%s.lsc" % vomsHost) try: DN = vomsDict[vo]["Servers"][vomsHost]["DN"] CA = vomsDict[vo]["Servers"][vomsHost]["CA"] port = vomsDict[vo]["Servers"][vomsHost]["Port"] if not DN or not CA or not port: DIRAC.gLogger.error("DN = %s" % DN) DIRAC.gLogger.error("CA = %s" % CA) DIRAC.gLogger.error("Port = %s" % port) DIRAC.gLogger.error("Missing Parameter for %s" % vomsHost) continue with open(hostFilePath, "wt") as fd: fd.write("%s\n%s\n" % (DN, CA)) vomsesLines.append('"%s" "%s" "%s" "%s" "%s" "24"' % (voName, vomsHost, port, DN, voName)) DIRAC.gLogger.notice("Created vomsdir file %s" % hostFilePath) except Exception: DIRAC.gLogger.exception("Could not generate vomsdir file for host", vomsHost) error = "Could not generate vomsdir file for VO %s, host %s" % (voName, vomsHost) try: vomsesFilePath = os.path.join(vomsesDirPath, voName) with open(vomsesFilePath, "wt") as fd: fd.write("%s\n" % "\n".join(vomsesLines)) DIRAC.gLogger.notice("Created vomses file %s" % vomsesFilePath) except Exception: DIRAC.gLogger.exception("Could not generate vomses file") error = "Could not generate vomses file for VO %s" % voName if params.useServerCert: Script.localCfg.deleteOption("/DIRAC/Security/UseServerCertificate") # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.deleteOption("/DIRAC/Security/SkipCAChecks") if error: return 1 return 0
def __setStagingRequest(self, job, destination, inputDataDict): """A Staging request is formulated and saved as a job optimizer parameter. """ self.log.verbose('Destination site %s' % (destination)) self.log.verbose('Input Data: %s' % (inputDataDict)) destinationSEs = getSEsForSite(destination) if not destinationSEs['OK']: return S_ERROR('Could not determine SEs for site %s' % destination) destinationSEs = destinationSEs['Value'] siteTapeSEs = [] siteDiskSEs = [] for se in destinationSEs: storageElement = StorageElement(se) seStatus = storageElement.getStatus()['Value'] if seStatus['Read'] and seStatus['TapeSE']: siteTapeSEs.append(se) if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append(se) if not siteTapeSEs: return S_ERROR('No LocalSEs For Site') self.log.verbose('Site tape SEs: %s' % (', '.join(siteTapeSEs))) stageSURLs = {} # OLD WAY stageLfns = {} # NEW WAY inputData = inputDataDict['Value']['Value']['Successful'] for lfn, reps in inputData.items(): for se, surl in reps.items(): if se in siteDiskSEs: # this File is on Disk, we can ignore it break if se not in siteTapeSEs: # this File is not being staged continue if not lfn in stageSURLs.keys(): stageSURLs[lfn] = {} stageSURLs[lfn].update({se: surl}) if not stageLfns.has_key(se): # NEW WAY stageLfns[se] = [] # NEW WAY stageLfns[se].append(lfn) # NEW WAY # Now we need to check is any LFN is in more than one SE if len(stageLfns) > 1: stageSEs = sorted([(len(stageLfns[se]), se) for se in stageLfns.keys()]) for lfn in stageSURLs: lfnFound = False for se in [item[1] for item in reversed(stageSEs)]: # for ( numberOfLfns, se ) in reversed( stageSEs ): if lfnFound and lfn in stageLfns[se]: stageLfns[se].remove(lfn) if lfn in stageLfns[se]: lfnFound = True stagerClient = StorageManagerClient() request = stagerClient.setRequest( stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', job) if request['OK']: self.jobDB.setJobParameter(int(job), 'StageRequest', str(request['Value'])) if not request['OK']: self.log.error('Problem sending Staging request:') self.log.error(request) return S_ERROR('Error Sending Staging Request') else: self.log.info('Staging request successfully sent') result = self.updateJobStatus(job, self.stagingStatus, self.stagingMinorStatus, "Unknown") if not result['OK']: return result return S_OK(stageLfns)
def requestStage(self, jobState, candidates, lfnData): # Any site is as good as any so random time! stageSite = random.sample(candidates, 1)[0] self.jobLog.info("Site selected %s for staging" % stageSite) result = getSEsForSite(stageSite) if not result["OK"]: return S_ERROR("Could not determine SEs for site %s" % stageSite) siteSEs = result["Value"] tapeSEs = [] diskSEs = [] for seName in siteSEs: result = self.__getSEStatus(seName) if not result["OK"]: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result["Message"])) return S_ERROR("Cannot retrieve SE status") seStatus = result["Value"] if seStatus["Read"] and seStatus["TapeSE"]: tapeSEs.append(seName) if seStatus["Read"] and seStatus["DiskSE"]: diskSEs.append(seName) if not tapeSEs: return S_ERROR("No Local SEs for site %s" % stageSite) self.jobLog.verbose("Tape SEs are %s" % (", ".join(tapeSEs))) stageLFNs = {} lfnToStage = [] for lfn in lfnData: replicas = lfnData[lfn]["Replicas"] # Check SEs seStage = [] for seName in replicas: _surl = replicas[seName]["SURL"] if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append(seName) for seName in seStage: if seName not in stageLFNs: stageLFNs[seName] = [] stageLFNs[seName].append(lfn) if lfn not in lfnToStage: lfnToStage.append(lfn) if not stageLFNs: return S_ERROR("Cannot find tape replicas") # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed(sorted([(len(stageLFNs[seName]), seName) for seName in stageLFNs.keys()])) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[seName]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[seName].remove(lfn) else: found = True # 4.-If empty SE, remove if len(stageLFNs[seName]) == 0: stageLFNs.pop(seName) self.jobLog.info( "Stage request will be \n\t%s" % "\n\t".join(["%s:%s" % (lfn, stageLFNs[lfn]) for lfn in stageLFNs]) ) stagerClient = StorageManagerClient() result = stagerClient.setRequest( stageLFNs, "WorkloadManagement", "stageCallback@WorkloadManagement/OptimizationMind", int(jobState.jid) ) if not result["OK"]: self.jobLog.error("Could not send stage request: %s" % result["Message"]) return S_ERROR("Problem sending staging request") rid = str(result["Value"]) self.jobLog.info("Stage request %s sent" % rid) jobState.setParameter("StageRequest", rid) result = jobState.setStatus( self.ex_getOption("StagingStatus", "Staging"), self.ex_getOption("StagingMinorStatus", "Request Sent"), appStatus="", source=self.ex_optimizerName(), ) if not result["OK"]: return result stageCandidates = [] for seName in stageLFNs: result = self.__getSitesForSE(seName) if result["OK"]: stageCandidates.append(result["Value"]) stageCandidates = candidates.intersection(*[sC for sC in stageCandidates]).union([stageSite]) return S_OK(stageCandidates)
def __preRequestStaging(self, jobState, stageSite, opData): result = getSEsForSite(stageSite) if not result['OK']: return S_ERROR('Could not determine SEs for site %s' % stageSite) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption('VirtualOrganization') for seName in siteSEs: se = StorageElement(seName, vo=vo) result = se.getStatus() if not result['OK']: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result['Message'])) return S_ERROR("Cannot retrieve SE status") seStatus = result['Value'] if seStatus['Read'] and seStatus['TapeSE']: tapeSEs.append(seName) if seStatus['Read'] and seStatus['DiskSE']: diskSEs.append(seName) if not tapeSEs: return S_ERROR("No Local SEs for site %s" % stageSite) self.jobLog.verbose("Tape SEs are %s" % (", ".join(tapeSEs))) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[lfn] # Check SEs seStage = [] for seName in replicas: if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append(seName) for seName in seStage: if seName not in stageLFNs: stageLFNs[seName] = [] stageLFNs[seName].append(lfn) if lfn not in lfnToStage: lfnToStage.append(lfn) if not stageLFNs: return S_ERROR("Cannot find tape replicas") # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted([(len(stageLFNs[seName]), seName) for seName in stageLFNs.keys()])) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[seName]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[seName].remove(lfn) else: found = True # 4.-If empty SE, remove if len(stageLFNs[seName]) == 0: stageLFNs.pop(seName) return S_OK(stageLFNs)
def getDestinationSEList(outputSE, site, outputmode='Any'): """ Evaluate the output SE list from a workflow and return the concrete list of SEs to upload output data. """ # Add output SE defined in the job description gLogger.info('Resolving workflow output SE description: %s' % outputSE) # Check if the SE is defined explicitly for the site prefix = site.split('.')[0] country = site.split('.')[-1] # Concrete SE name result = gConfig.getOptions('/Resources/StorageElements/' + outputSE) if result['OK']: gLogger.info('Found concrete SE %s' % outputSE) return S_OK([outputSE]) # There is an alias defined for this Site alias_se = gConfig.getValue('/Resources/Sites/%s/%s/AssociatedSEs/%s' % (prefix, site, outputSE), []) if alias_se: gLogger.info('Found associated SE for site %s' % (alias_se)) return S_OK(alias_se) localSEs = getSEsForSite(site)['Value'] gLogger.verbose('Local SE list is: %s' % (localSEs)) groupSEs = gConfig.getValue('/Resources/StorageElementGroups/' + outputSE, []) gLogger.verbose('Group SE list is: %s' % (groupSEs)) if not groupSEs: return S_ERROR('Failed to resolve SE ' + outputSE) if outputmode.lower() == "local": for se in localSEs: if se in groupSEs: gLogger.info('Found eligible local SE: %s' % (se)) return S_OK([se]) #check if country is already one with associated SEs associatedSE = gConfig.getValue('/Resources/Countries/%s/AssociatedSEs/%s' % (country, outputSE), '') if associatedSE: gLogger.info('Found associated SE %s in /Resources/Countries/%s/AssociatedSEs/%s' % (associatedSE, country, outputSE)) return S_OK([associatedSE]) # Final check for country associated SE count = 0 assignedCountry = country while count < 10: gLogger.verbose('Loop count = %s' % (count)) gLogger.verbose("/Resources/Countries/%s/AssignedTo" % assignedCountry) opt = gConfig.getOption("/Resources/Countries/%s/AssignedTo" % assignedCountry) if opt['OK'] and opt['Value']: assignedCountry = opt['Value'] gLogger.verbose('/Resources/Countries/%s/AssociatedSEs' % assignedCountry) assocCheck = gConfig.getOption('/Resources/Countries/%s/AssociatedSEs' % assignedCountry) if assocCheck['OK'] and assocCheck['Value']: break count += 1 if not assignedCountry: return S_ERROR('Could not determine associated SE list for %s' % country) alias_se = gConfig.getValue('/Resources/Countries/%s/AssociatedSEs/%s' % (assignedCountry, outputSE), []) if alias_se: gLogger.info('Found alias SE for site: %s' % alias_se) return S_OK(alias_se) else: gLogger.error('Could not establish alias SE for country %s from section: /Resources/Countries/%s/AssociatedSEs/%s' % (country, assignedCountry, outputSE)) return S_ERROR('Failed to resolve SE ' + outputSE) # For collective Any and All modes return the whole group # Make sure that local SEs are passing first newSEList = [] for se in groupSEs: if se in localSEs: newSEList.append(se) uniqueSEs = uniqueElements(newSEList + groupSEs) gLogger.verbose('Found unique SEs: %s' % (uniqueSEs)) return S_OK(uniqueSEs)
def __requestStaging( self, jobState, stageSite, opData ): result = getSEsForSite( stageSite ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: se = StorageElement( seName ) result = se.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) #I swear this is horrible DM code it's not mine. #Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[ lfn ] #Check SEs seStage = [] for seName in replicas: surl = replicas[ seName ] if seName in diskSEs: #This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: #This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) #Check if any LFN is in more than one SE #If that's the case, try to stage from the SE that has more LFNs to stage to group the request #1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False #2.- Traverse the SEs for stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: #3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True #4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) self.jobLog.verbose( "Stage request will be \n\t%s" % "\n\t".join( [ "%s:%s" % ( lfn, stageLFNs[ lfn ] ) for lfn in stageLFNs ] ) ) stagerClient = StorageManagerClient() result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request To Be Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', int( jobState.jid ) ) if not result[ 'OK' ]: self.jobLog.error( "Could not send stage request: %s" % result[ 'Message' ] ) return S_ERROR( "Problem sending staging request" ) rid = str( result[ 'Value' ] ) self.jobLog.info( "Stage request %s sent" % rid ) jobState.setParameter( "StageRequest", rid ) result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result return S_OK( stageLFNs )
def getSEsForSite(site): ''' Get the list of SE associated with this site''' from DIRAC.Core.Utilities.SiteSEMapping import getSEsForSite result = getSEsForSite(site) output(result)
def getSEsForSite(site): from DIRAC.Core.Utilities.SiteSEMapping import getSEsForSite result = getSEsForSite(site) output(result)
def __requestStaging(self, jobState, stageSite, opData): result = getSEsForSite(stageSite) if not result['OK']: return S_ERROR('Could not determine SEs for site %s' % stageSite) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: se = StorageElement(seName) result = se.getStatus() if not result['OK']: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result['Message'])) return S_ERROR("Cannot retrieve SE status") seStatus = result['Value'] if seStatus['Read'] and seStatus['TapeSE']: tapeSEs.append(seName) if seStatus['Read'] and seStatus['DiskSE']: diskSEs.append(seName) if not tapeSEs: return S_ERROR("No Local SEs for site %s" % stageSite) self.jobLog.verbose("Tape SEs are %s" % (", ".join(tapeSEs))) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[lfn] # Check SEs seStage = [] for seName in replicas: _surl = replicas[seName] if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append(seName) for seName in seStage: if seName not in stageLFNs: stageLFNs[seName] = [] stageLFNs[seName].append(lfn) if lfn not in lfnToStage: lfnToStage.append(lfn) if not stageLFNs: return S_ERROR("Cannot find tape replicas") # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted([(len(stageLFNs[seName]), seName) for seName in stageLFNs.keys()])) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[seName]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[seName].remove(lfn) else: found = True # 4.-If empty SE, remove if len(stageLFNs[seName]) == 0: stageLFNs.pop(seName) self.jobLog.verbose( "Stage request will be \n\t%s" % "\n\t".join(["%s:%s" % (lfn, stageLFNs[lfn]) for lfn in stageLFNs])) stagerClient = StorageManagerClient() result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', int(jobState.jid)) if not result['OK']: self.jobLog.error("Could not send stage request: %s" % result['Message']) return S_ERROR("Problem sending staging request") rid = str(result['Value']) self.jobLog.info("Stage request %s sent" % rid) jobState.setParameter("StageRequest", rid) result = jobState.setStatus(self.ex_getOption('StagingStatus', 'Staging'), self.ex_getOption('StagingMinorStatus', 'Request Sent'), appStatus="", source=self.ex_optimizerName()) if not result['OK']: return result return S_OK(stageLFNs)
if ceName: for site in sites: siteCEs = DIRAC.gConfig.getValue( '/Resources/Sites/%s/%s/CE' % ( grid, site ), [] ) if ceName in siteCEs: siteName = site break if siteName: DIRAC.gLogger.notice( 'Setting /LocalSite/Site = %s' % siteName ) Script.localCfg.addDefaultEntry( '/LocalSite/Site', siteName ) DIRAC.__siteName = False if ceName: DIRAC.gLogger.notice( 'Setting /LocalSite/GridCE = %s' % ceName ) Script.localCfg.addDefaultEntry( '/LocalSite/GridCE', ceName ) if not localSE and siteName in sites: localSE = getSEsForSite( siteName ) if localSE['OK'] and localSE['Value']: localSE = ','.join( localSE['Value'] ) DIRAC.gLogger.notice( 'Setting /LocalSite/LocalSE =', localSE ) Script.localCfg.addDefaultEntry( '/LocalSite/LocalSE', localSE ) break if gatewayServer: DIRAC.gLogger.verbose( '/DIRAC/Gateways/%s =' % DIRAC.siteName(), gatewayServer ) Script.localCfg.addDefaultEntry( '/DIRAC/Gateways/%s' % DIRAC.siteName(), gatewayServer ) # Create the local cfg if it is not yet there if not outputFile: outputFile = DIRAC.gConfig.diracConfigFilePath outputFile = os.path.abspath( outputFile ) if not os.path.exists( outputFile ):
def getSEsForSite(site): ''' Get the list of SE associated with this site''' from DIRAC.Core.Utilities.SiteSEMapping import getSEsForSite result = getSEsForSite(site) return result
def __setStagingRequest( self, job, destination, inputDataDict ): """A Staging request is formulated and saved as a job optimizer parameter. """ self.log.verbose( 'Destination site %s' % ( destination ) ) self.log.verbose( 'Input Data: %s' % ( inputDataDict ) ) destinationSEs = getSEsForSite( destination ) if not destinationSEs['OK']: return S_ERROR( 'Could not determine SEs for site %s' % destination ) destinationSEs = destinationSEs['Value'] siteTapeSEs = [] siteDiskSEs = [] for se in destinationSEs: storageElement = StorageElement( se ) seStatus = storageElement.getStatus()['Value'] if seStatus['Read'] and seStatus['TapeSE']: siteTapeSEs.append( se ) if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append( se ) if not siteTapeSEs: return S_ERROR( 'No LocalSEs For Site' ) self.log.verbose( 'Site tape SEs: %s' % ( ', '.join( siteTapeSEs ) ) ) stageSURLs = {} # OLD WAY stageLfns = {} # NEW WAY inputData = inputDataDict['Value']['Value']['Successful'] for lfn, reps in inputData.items(): for se, surl in reps.items(): if se in siteDiskSEs: # this File is on Disk, we can ignore it break if se not in siteTapeSEs: # this File is not being staged continue if not lfn in stageSURLs.keys(): stageSURLs[lfn] = {} stageSURLs[lfn].update( {se:surl} ) if not stageLfns.has_key( se ): # NEW WAY stageLfns[se] = [] # NEW WAY stageLfns[se].append( lfn ) # NEW WAY # Now we need to check is any LFN is in more than one SE if len( stageLfns ) > 1: stageSEs = sorted( [ ( len( stageLfns[se] ), se ) for se in stageLfns.keys() ] ) for lfn in stageSURLs: lfnFound = False for se in [ item[1] for item in reversed( stageSEs ) ]: # for ( numberOfLfns, se ) in reversed( stageSEs ): if lfnFound and lfn in stageLfns[se]: stageLfns[se].remove( lfn ) if lfn in stageLfns[se]: lfnFound = True stagerClient = StorageManagerClient() request = stagerClient.setRequest( stageLfns, 'WorkloadManagement', 'updateJobFromStager@WorkloadManagement/JobStateUpdate', job ) if request['OK']: self.jobDB.setJobParameter( int( job ), 'StageRequest', str( request['Value'] ) ) if not request['OK']: self.log.error( 'Problem sending Staging request:' ) self.log.error( request ) return S_ERROR( 'Error Sending Staging Request' ) else: self.log.info( 'Staging request successfully sent' ) result = self.updateJobStatus( job, self.stagingStatus, self.stagingMinorStatus, "Unknown" ) if not result['OK']: return result return S_OK( stageLfns )
gLogger.always('Site: %s' % site) for lfn in inputData: filesAtSite.setdefault(site, {}).setdefault(lfn, []).append(jobID) progressBar.endLoop() sep = '' for site in filesAtSite: seUsed = '' try: jobs = [] for lfn in filesAtSite[site]: for jobID in filesAtSite[site][lfn]: jobID = str(jobID) if jobID not in jobs: jobs.append(jobID) res = getSEsForSite(site) if not res['OK'] or not res['Value']: gLogger.always("Couldn't find SEs for site %s" % site) continue seList = res['Value'] inputData = sorted(filesAtSite[site]) if verbose: gLogger.always("%sSite: %s, jobs: %s, %d files" % (sep, site, ','.join(jobs), len(inputData))) else: gLogger.always("%sSite: %s, %d jobs, %d files" % (sep, site, len(jobs), len(inputData))) sep = '=====================================\n' if verbose: gLogger.always('For %s, SEs: %s' % (site, str(seList))) pbFound = False
def gridWeather(self, printOutput=False): """This method gives a snapshot of the current Grid weather from the perspective of the DIRAC site and SE masks. Tier-1 sites are returned with more detailed information. Example usage: >>> print dirac.gridWeather() {'OK': True, 'Value': {{'Sites':<siteInfo>,'SEs':<seInfo>,'Tier-1s':<tierInfo>}} @param printOutput: Optional flag to print result @type printOutput: boolean @return: S_OK,S_ERROR """ lcgSites = gConfig.getSections('/Resources/Sites/LCG') if not lcgSites['OK']: return lcgSites for lcgSite in lcgSites['Value']: tier = gConfig.getValue('/Resources/Sites/LCG/%s/MoUTierLevel' % lcgSite, 2) if tier in (0, 1): self.tier1s.append(lcgSite) siteInfo = self.checkSites() if not siteInfo['OK']: return siteInfo siteInfo = siteInfo['Value'] seInfo = self.checkSEs() if not seInfo['OK']: return seInfo seInfo = seInfo['Value'] tierSEs = {} for site in self.tier1s: tierSEs[site] = getSEsForSite(site)['Value'] tierInfo = {} for site, seList in tierSEs.items(): tierInfo[site] = {} for se in seList: if se in seInfo: tierSEInfo = seInfo[se] tierInfo[site][se] = tierSEInfo if site in siteInfo['AllowedSites']: tierInfo[site]['MaskStatus'] = 'Allowed' else: tierInfo[site]['MaskStatus'] = 'Banned' if printOutput: self.log.notice('========> Tier-1 status in DIRAC site and SE masks') for site in sorted(self.tier1s): self.log.notice('\n====> %s is %s in site mask\n' % (site, tierInfo[site]['MaskStatus'])) self.log.notice('%s %s %s' % ('Storage Element'.ljust(25), 'Read Status'.rjust(15), 'Write Status'.rjust(15))) for se in sorted(tierSEs[site]): if se in tierInfo[site]: self.log.notice('%s %s %s' % (se.ljust(25), tierInfo[site][se]['ReadStatus'].rjust(15), tierInfo[site][se]['WriteStatus'].rjust(15)) ) self.log.notice('\n========> Tier-2 status in DIRAC site mask\n') allowedSites = siteInfo['AllowedSites'] bannedSites = siteInfo['BannedSites'] for site in self.tier1s: if site in allowedSites: allowedSites.remove(site) if site in bannedSites: bannedSites.remove(site) self.log.notice(' %s sites are in the site mask, %s are banned.\n' % (len(allowedSites), len(bannedSites))) summary = {'Sites': siteInfo, 'SEs': seInfo, 'Tier-1s': tierInfo} return S_OK(summary)
for site in sites: siteCEs = DIRAC.gConfig.getValue( '/Resources/Sites/%s/%s/CE' % (grid, site), []) if ceName in siteCEs: siteName = site break if siteName: DIRAC.gLogger.notice('Setting /LocalSite/Site = %s' % siteName) Script.localCfg.addDefaultEntry('/LocalSite/Site', siteName) DIRAC.__siteName = False if ceName: DIRAC.gLogger.notice('Setting /LocalSite/GridCE = %s' % ceName) Script.localCfg.addDefaultEntry('/LocalSite/GridCE', ceName) if not localSE and siteName in sites: localSE = getSEsForSite(siteName) if localSE['OK'] and localSE['Value']: localSE = ','.join(localSE['Value']) DIRAC.gLogger.notice('Setting /LocalSite/LocalSE =', localSE) Script.localCfg.addDefaultEntry('/LocalSite/LocalSE', localSE) break if gatewayServer: DIRAC.gLogger.verbose('/DIRAC/Gateways/%s =' % DIRAC.siteName(), gatewayServer) Script.localCfg.addDefaultEntry('/DIRAC/Gateways/%s' % DIRAC.siteName(), gatewayServer) # Create the local cfg if it is not yet there
def requestStage( self, jobState, candidates, lfnData ): #Any site is as good as any so random time! stageSite = random.sample( candidates, 1 )[0] self.jobLog.info( "Site selected %s for staging" % stageSite ) result = getSEsForSite( stageSite ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] tapeSEs = [] diskSEs = [] for seName in siteSEs: result = self.__getSEStatus( seName ) if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) stageLFNs = {} lfnToStage = [] for lfn in lfnData: replicas = lfnData[ lfn ][ 'Replicas' ] # Check SEs seStage = [] for seName in replicas: _surl = replicas[ seName ][ 'SURL' ] if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True # 4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) self.jobLog.info( "Stage request will be \n\t%s" % "\n\t".join( [ "%s:%s" % ( lfn, stageLFNs[ lfn ] ) for lfn in stageLFNs ] ) ) stagerClient = StorageManagerClient() result = stagerClient.setRequest( stageLFNs, 'WorkloadManagement', 'stageCallback@WorkloadManagement/OptimizationMind', int( jobState.jid ) ) if not result[ 'OK' ]: self.jobLog.error( "Could not send stage request: %s" % result[ 'Message' ] ) return S_ERROR( "Problem sending staging request" ) rid = str( result[ 'Value' ] ) self.jobLog.info( "Stage request %s sent" % rid ) jobState.setParameter( "StageRequest", rid ) result = jobState.setStatus( self.ex_getOption( 'StagingStatus', 'Staging' ), self.ex_getOption( 'StagingMinorStatus', 'Request Sent' ), appStatus = "", source = self.ex_optimizerName() ) if not result[ 'OK' ]: return result stageCandidates = [] for seName in stageLFNs: result = self.__getSitesForSE( seName ) if result[ 'OK' ]: stageCandidates.append( result[ 'Value' ] ) stageCandidates = candidates.intersection( *[ sC for sC in stageCandidates ] ).union( [ stageSite ] ) return S_OK( stageCandidates )
def __setStagingRequest(self, job, destination, inputDataDict): """A Staging request is formulated and saved as a job optimizer parameter. """ self.log.verbose("Destination site %s" % (destination)) self.log.verbose("Input Data: %s" % (inputDataDict)) destinationSEs = getSEsForSite(destination) if not destinationSEs["OK"]: return S_ERROR("Could not determine SEs for site %s" % destination) destinationSEs = destinationSEs["Value"] siteTapeSEs = [] siteDiskSEs = [] for se in destinationSEs: storageElement = StorageElement(se) seStatus = storageElement.getStatus()["Value"] if seStatus["Read"] and seStatus["TapeSE"]: siteTapeSEs.append(se) if seStatus["Read"] and seStatus["DiskSE"]: siteDiskSEs.append(se) if not siteTapeSEs: return S_ERROR("No LocalSEs For Site") self.log.verbose("Site tape SEs: %s" % (", ".join(siteTapeSEs))) stageSURLs = {} # OLD WAY stageLfns = {} # NEW WAY inputData = inputDataDict["Value"]["Value"]["Successful"] for lfn, reps in inputData.items(): for se, surl in reps.items(): if se in siteDiskSEs: # this File is on Disk, we can ignore it break if not lfn in stageSURLs.keys(): stageSURLs[lfn] = {} stageSURLs[lfn].update({se: surl}) if not stageLfns.has_key(se): # NEW WAY stageLfns[se] = [] # NEW WAY stageLfns[se].append(lfn) # NEW WAY # Now we need to check is any LFN is in more than one SE if len(stageLfns) > 1: stageSEs = sorted([(len(stageLfns[se]), se) for se in stageLfns.keys()]) for lfn in stageSURLs: lfnFound = False for (se, numberOfLfns) in reversed(stageSEs): if lfnFound and lfn in stageLfns[se]: stageLfns[se].remove(lfn) if lfn in stageLfns[se]: lfnFound = True stagerClient = StorageManagerClient() request = stagerClient.setRequest( stageLfns, "WorkloadManagement", "updateJobFromStager@WorkloadManagement/JobStateUpdate", job ) if request["OK"]: self.jobDB.setJobParameter(int(job), "StageRequest", str(request["Value"])) if not request["OK"]: self.log.error("Problem sending Staging request:") self.log.error(request) return S_ERROR("Error Sending Staging Request") else: self.log.info("Staging request successfully sent") result = self.updateJobStatus(job, self.stagingStatus, self.stagingMinorStatus) if not result["OK"]: return result return S_OK(stageLfns)
def main(): global logLevel global setup global configurationServer global includeAllServers global gatewayServer global siteName global useServerCert global skipCAChecks global skipCADownload global useVersionsDir global architecture global localSE global ceName global vo global update global outputFile global skipVOMSDownload global extensions Script.disableCS() Script.registerSwitch("S:", "Setup=", "Set <setup> as DIRAC setup", setSetup) Script.registerSwitch("e:", "Extensions=", "Set <extensions> as DIRAC extensions", setExtensions) Script.registerSwitch("C:", "ConfigurationServer=", "Set <server> as DIRAC configuration server", setServer) Script.registerSwitch("I", "IncludeAllServers", "include all Configuration Servers", setAllServers) Script.registerSwitch("n:", "SiteName=", "Set <sitename> as DIRAC Site Name", setSiteName) Script.registerSwitch("N:", "CEName=", "Determiner <sitename> from <cename>", setCEName) Script.registerSwitch("V:", "VO=", "Set the VO name", setVO) Script.registerSwitch("W:", "gateway=", "Configure <gateway> as DIRAC Gateway for the site", setGateway) Script.registerSwitch("U", "UseServerCertificate", "Configure to use Server Certificate", setServerCert) Script.registerSwitch("H", "SkipCAChecks", "Configure to skip check of CAs", setSkipCAChecks) Script.registerSwitch("D", "SkipCADownload", "Configure to skip download of CAs", setSkipCADownload) Script.registerSwitch("M", "SkipVOMSDownload", "Configure to skip download of VOMS info", setSkipVOMSDownload) Script.registerSwitch("v", "UseVersionsDir", "Use versions directory", setUseVersionsDir) Script.registerSwitch("A:", "Architecture=", "Configure /Architecture=<architecture>", setArchitecture) Script.registerSwitch("L:", "LocalSE=", "Configure LocalSite/LocalSE=<localse>", setLocalSE) Script.registerSwitch( "F", "ForceUpdate", "Force Update of cfg file (i.e. dirac.cfg) (otherwise nothing happens if dirac.cfg already exists)", forceUpdate) Script.registerSwitch("O:", "output=", "output configuration file", setOutput) Script.setUsageMessage('\n'.join([ __doc__.split('\n')[1], '\nUsage:', ' %s [options] ...\n' % Script.scriptName ])) Script.parseCommandLine(ignoreErrors=True) args = Script.getExtraCLICFGFiles() if not logLevel: logLevel = DIRAC.gConfig.getValue(cfgInstallPath('LogLevel'), '') if logLevel: DIRAC.gLogger.setLevel(logLevel) else: DIRAC.gConfig.setOptionValue(cfgInstallPath('LogLevel'), logLevel) if not gatewayServer: newGatewayServer = DIRAC.gConfig.getValue(cfgInstallPath('Gateway'), '') if newGatewayServer: setGateway(newGatewayServer) if not configurationServer: newConfigurationServer = DIRAC.gConfig.getValue( cfgInstallPath('ConfigurationServer'), '') if newConfigurationServer: setServer(newConfigurationServer) if not includeAllServers: newIncludeAllServer = DIRAC.gConfig.getValue( cfgInstallPath('IncludeAllServers'), False) if newIncludeAllServer: setAllServers(True) if not setup: newSetup = DIRAC.gConfig.getValue(cfgInstallPath('Setup'), '') if newSetup: setSetup(newSetup) if not siteName: newSiteName = DIRAC.gConfig.getValue(cfgInstallPath('SiteName'), '') if newSiteName: setSiteName(newSiteName) if not ceName: newCEName = DIRAC.gConfig.getValue(cfgInstallPath('CEName'), '') if newCEName: setCEName(newCEName) if not useServerCert: newUserServerCert = DIRAC.gConfig.getValue( cfgInstallPath('UseServerCertificate'), False) if newUserServerCert: setServerCert(newUserServerCert) if not skipCAChecks: newSkipCAChecks = DIRAC.gConfig.getValue( cfgInstallPath('SkipCAChecks'), False) if newSkipCAChecks: setSkipCAChecks(newSkipCAChecks) if not skipCADownload: newSkipCADownload = DIRAC.gConfig.getValue( cfgInstallPath('SkipCADownload'), False) if newSkipCADownload: setSkipCADownload(newSkipCADownload) if not useVersionsDir: newUseVersionsDir = DIRAC.gConfig.getValue( cfgInstallPath('UseVersionsDir'), False) if newUseVersionsDir: setUseVersionsDir(newUseVersionsDir) # Set proper Defaults in configuration (even if they will be properly overwrite by gComponentInstaller instancePath = os.path.dirname(os.path.dirname(DIRAC.rootPath)) rootPath = os.path.join(instancePath, 'pro') DIRAC.gConfig.setOptionValue(cfgInstallPath('InstancePath'), instancePath) DIRAC.gConfig.setOptionValue(cfgInstallPath('RootPath'), rootPath) if not architecture: newArchitecture = DIRAC.gConfig.getValue( cfgInstallPath('Architecture'), '') if newArchitecture: setArchitecture(newArchitecture) if not vo: newVO = DIRAC.gConfig.getValue(cfgInstallPath('VirtualOrganization'), '') if newVO: setVO(newVO) if not extensions: newExtensions = DIRAC.gConfig.getValue(cfgInstallPath('Extensions'), '') if newExtensions: setExtensions(newExtensions) DIRAC.gLogger.notice('Executing: %s ' % (' '.join(sys.argv))) DIRAC.gLogger.notice('Checking DIRAC installation at "%s"' % DIRAC.rootPath) if update: if outputFile: DIRAC.gLogger.notice('Will update the output file %s' % outputFile) else: DIRAC.gLogger.notice('Will update %s' % DIRAC.gConfig.diracConfigFilePath) if setup: DIRAC.gLogger.verbose('/DIRAC/Setup =', setup) if vo: DIRAC.gLogger.verbose('/DIRAC/VirtualOrganization =', vo) if configurationServer: DIRAC.gLogger.verbose('/DIRAC/Configuration/Servers =', configurationServer) if siteName: DIRAC.gLogger.verbose('/LocalSite/Site =', siteName) if architecture: DIRAC.gLogger.verbose('/LocalSite/Architecture =', architecture) if localSE: DIRAC.gLogger.verbose('/LocalSite/localSE =', localSE) if not useServerCert: DIRAC.gLogger.verbose('/DIRAC/Security/UseServerCertificate =', 'no') # Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry('/DIRAC/Security/UseServerCertificate', 'no') else: DIRAC.gLogger.verbose('/DIRAC/Security/UseServerCertificate =', 'yes') # Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry('/DIRAC/Security/UseServerCertificate', 'yes') host = DIRAC.gConfig.getValue(cfgInstallPath("Host"), "") if host: DIRAC.gConfig.setOptionValue(cfgPath("DIRAC", "Hostname"), host) if skipCAChecks: DIRAC.gLogger.verbose('/DIRAC/Security/SkipCAChecks =', 'yes') # Being sure it was not there before Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') Script.localCfg.addDefaultEntry('/DIRAC/Security/SkipCAChecks', 'yes') else: # Necessary to allow initial download of CA's if not skipCADownload: DIRAC.gConfig.setOptionValue('/DIRAC/Security/SkipCAChecks', 'yes') if not skipCADownload: Script.enableCS() try: dirName = os.path.join(DIRAC.rootPath, 'etc', 'grid-security', 'certificates') mkDir(dirName) except BaseException: DIRAC.gLogger.exception() DIRAC.gLogger.fatal('Fail to create directory:', dirName) DIRAC.exit(-1) try: bdc = BundleDeliveryClient() result = bdc.syncCAs() if result['OK']: result = bdc.syncCRLs() except Exception as e: DIRAC.gLogger.error('Failed to sync CAs and CRLs: %s' % str(e)) if not skipCAChecks: Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') if ceName or siteName: # This is used in the pilot context, we should have a proxy, or a certificate, and access to CS if useServerCert: # Being sure it was not there before Script.localCfg.deleteOption( '/DIRAC/Security/UseServerCertificate') Script.localCfg.addDefaultEntry( '/DIRAC/Security/UseServerCertificate', 'yes') Script.enableCS() # Get the site resource section gridSections = DIRAC.gConfig.getSections('/Resources/Sites/') if not gridSections['OK']: DIRAC.gLogger.warn('Could not get grid sections list') grids = [] else: grids = gridSections['Value'] # try to get siteName from ceName or Local SE from siteName using Remote Configuration for grid in grids: siteSections = DIRAC.gConfig.getSections('/Resources/Sites/%s/' % grid) if not siteSections['OK']: DIRAC.gLogger.warn('Could not get %s site list' % grid) sites = [] else: sites = siteSections['Value'] if not siteName: if ceName: for site in sites: res = DIRAC.gConfig.getSections( '/Resources/Sites/%s/%s/CEs/' % (grid, site), []) if not res['OK']: DIRAC.gLogger.warn('Could not get %s CEs list' % site) if ceName in res['Value']: siteName = site break if siteName: DIRAC.gLogger.notice('Setting /LocalSite/Site = %s' % siteName) Script.localCfg.addDefaultEntry('/LocalSite/Site', siteName) DIRAC.__siteName = False if ceName: DIRAC.gLogger.notice('Setting /LocalSite/GridCE = %s' % ceName) Script.localCfg.addDefaultEntry('/LocalSite/GridCE', ceName) if not localSE and siteName in sites: localSE = getSEsForSite(siteName) if localSE['OK'] and localSE['Value']: localSE = ','.join(localSE['Value']) DIRAC.gLogger.notice('Setting /LocalSite/LocalSE =', localSE) Script.localCfg.addDefaultEntry( '/LocalSite/LocalSE', localSE) break if gatewayServer: DIRAC.gLogger.verbose('/DIRAC/Gateways/%s =' % DIRAC.siteName(), gatewayServer) Script.localCfg.addDefaultEntry( '/DIRAC/Gateways/%s' % DIRAC.siteName(), gatewayServer) # Create the local cfg if it is not yet there if not outputFile: outputFile = DIRAC.gConfig.diracConfigFilePath outputFile = os.path.abspath(outputFile) if not os.path.exists(outputFile): configDir = os.path.dirname(outputFile) mkDir(configDir) update = True DIRAC.gConfig.dumpLocalCFGToFile(outputFile) if includeAllServers: # We need user proxy or server certificate to continue in order to get all the CS URLs if not useServerCert: Script.enableCS() result = getProxyInfo() if not result['OK']: DIRAC.gLogger.notice( 'Configuration is not completed because no user proxy is available' ) DIRAC.gLogger.notice( 'Create one using dirac-proxy-init and execute again with -F option' ) sys.exit(1) else: Script.localCfg.deleteOption( '/DIRAC/Security/UseServerCertificate') # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.addDefaultEntry( '/DIRAC/Security/UseServerCertificate', 'yes') Script.enableCS() DIRAC.gConfig.setOptionValue('/DIRAC/Configuration/Servers', ','.join(DIRAC.gConfig.getServersList())) DIRAC.gLogger.verbose('/DIRAC/Configuration/Servers =', ','.join(DIRAC.gConfig.getServersList())) if useServerCert: # always removing before dumping Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') Script.localCfg.deleteOption('/DIRAC/Security/SkipVOMSDownload') if update: DIRAC.gConfig.dumpLocalCFGToFile(outputFile) # ## LAST PART: do the vomsdir/vomses magic # This has to be done for all VOs in the installation if skipVOMSDownload: # We stop here sys.exit(0) result = Registry.getVOMSServerInfo() if not result['OK']: sys.exit(1) error = '' vomsDict = result['Value'] for vo in vomsDict: voName = vomsDict[vo]['VOMSName'] vomsDirPath = os.path.join(DIRAC.rootPath, 'etc', 'grid-security', 'vomsdir', voName) vomsesDirPath = os.path.join(DIRAC.rootPath, 'etc', 'grid-security', 'vomses') for path in (vomsDirPath, vomsesDirPath): mkDir(path) vomsesLines = [] for vomsHost in vomsDict[vo].get('Servers', {}): hostFilePath = os.path.join(vomsDirPath, "%s.lsc" % vomsHost) try: DN = vomsDict[vo]['Servers'][vomsHost]['DN'] CA = vomsDict[vo]['Servers'][vomsHost]['CA'] port = vomsDict[vo]['Servers'][vomsHost]['Port'] if not DN or not CA or not port: DIRAC.gLogger.error('DN = %s' % DN) DIRAC.gLogger.error('CA = %s' % CA) DIRAC.gLogger.error('Port = %s' % port) DIRAC.gLogger.error('Missing Parameter for %s' % vomsHost) continue with open(hostFilePath, "wt") as fd: fd.write("%s\n%s\n" % (DN, CA)) vomsesLines.append('"%s" "%s" "%s" "%s" "%s" "24"' % (voName, vomsHost, port, DN, voName)) DIRAC.gLogger.notice("Created vomsdir file %s" % hostFilePath) except Exception: DIRAC.gLogger.exception( "Could not generate vomsdir file for host", vomsHost) error = "Could not generate vomsdir file for VO %s, host %s" % ( voName, vomsHost) try: vomsesFilePath = os.path.join(vomsesDirPath, voName) with open(vomsesFilePath, "wt") as fd: fd.write("%s\n" % "\n".join(vomsesLines)) DIRAC.gLogger.notice("Created vomses file %s" % vomsesFilePath) except Exception: DIRAC.gLogger.exception("Could not generate vomses file") error = "Could not generate vomses file for VO %s" % voName if useServerCert: Script.localCfg.deleteOption('/DIRAC/Security/UseServerCertificate') # When using Server Certs CA's will be checked, the flag only disables initial download # this will be replaced by the use of SkipCADownload Script.localCfg.deleteOption('/DIRAC/Security/SkipCAChecks') if error: sys.exit(1) sys.exit(0)
def getDestinationSEList(outputSE, site, outputmode='Any', run=None): """ Evaluate the output SE list from a workflow and return the concrete list of SEs to upload output data. """ if outputmode.lower() not in ('any', 'local', 'run'): raise RuntimeError("Unexpected outputmode") if outputmode.lower() == 'run': gLogger.verbose( "Output mode set to 'run', thus ignoring site parameter") if not run: raise RuntimeError("Expected runNumber") try: run = long(run) except ValueError as ve: raise RuntimeError("Expected runNumber as a number: %s" % ve) gLogger.debug("RunNumber = %d" % run) from LHCbDIRAC.TransformationSystem.Client.TransformationClient import TransformationClient runDestination = TransformationClient().getDestinationForRun(run) if not runDestination['OK'] or run not in runDestination['Value']: raise RuntimeError("Issue getting destinationForRun (%d): " % run + runDestination.get('Message', 'unknown run')) site = runDestination['Value'][run] gLogger.verbose("Site set to %s for run %d" % (site, run)) outputmode = 'Local' # Add output SE defined in the job description gLogger.info('Resolving workflow output SE description: %s' % outputSE) # Check if the SE is defined explicitly for the site prefix = site.split('.')[0] country = site.split('.')[-1] # Concrete SE name result = gConfig.getOptions('/Resources/StorageElements/' + outputSE) if result['OK']: gLogger.info('Found concrete SE %s' % outputSE) return [outputSE] # Get local SEs localSEs = getSEsForSite(site) if not localSEs['OK']: raise RuntimeError(localSEs['Message']) localSEs = localSEs['Value'] gLogger.verbose("Local SE list is: %s" % (localSEs)) # There is an alias defined for this Site associatedSEs = gConfig.getValue( '/Resources/Sites/%s/%s/AssociatedSEs/%s' % (prefix, site, outputSE), []) if associatedSEs: associatedSEs = _setLocalFirst(associatedSEs, localSEs) gLogger.info("Found associated SE %s for site %s" % (associatedSEs, site)) return associatedSEs groupSEs = resolveSEGroup(outputSE) if not groupSEs: raise RuntimeError("Failed to resolve SE " + outputSE) gLogger.verbose("Group SE list is: %s" % (groupSEs)) # Find a local SE or an SE considered as local because the country is associated to it if outputmode.lower() == "local": # First, check if one SE in the group is local for se in localSEs: if se in groupSEs: gLogger.info("Found eligible local SE: %s" % (se)) return [se] # Final check for country associated SE assignedCountry = country while True: # check if country is already one with associated SEs section = '/Resources/Countries/%s/AssociatedSEs/%s' % ( assignedCountry, outputSE) associatedSEs = gConfig.getValue(section, []) if associatedSEs: associatedSEs = _setLocalFirst(associatedSEs, localSEs) gLogger.info('Found associated SEs %s in %s' % (associatedSEs, section)) return associatedSEs gLogger.verbose("/Resources/Countries/%s/AssignedTo" % assignedCountry) opt = gConfig.getOption("/Resources/Countries/%s/AssignedTo" % assignedCountry) if opt['OK'] and opt['Value']: assignedCountry = opt['Value'] else: # No associated SE and no assigned country, give up raise RuntimeError( "Could not establish associated SE nor assigned country for country %s" % assignedCountry) # For collective Any and All modes return the whole group # Make sure that local SEs are passing first orderedSEs = _setLocalFirst(groupSEs, localSEs) gLogger.info('Found SEs, local first: %s' % orderedSEs) return orderedSEs
def getDestinationSEList(outputSE, site, outputmode='Any'): """ Evaluate the output SE list from a workflow and return the concrete list of SEs to upload output data. """ # Add output SE defined in the job description gLogger.info('Resolving workflow output SE description: %s' % outputSE) # Check if the SE is defined explicitly for the site prefix = site.split('.')[0] country = site.split('.')[-1] # Concrete SE name result = gConfig.getOptions('/Resources/StorageElements/' + outputSE) if result['OK']: gLogger.info('Found concrete SE %s' % outputSE) return S_OK([outputSE]) # There is an alias defined for this Site alias_se = gConfig.getValue( '/Resources/Sites/%s/%s/AssociatedSEs/%s' % (prefix, site, outputSE), []) if alias_se: gLogger.info('Found associated SE for site %s' % (alias_se)) return S_OK(alias_se) localSEs = getSEsForSite(site)['Value'] gLogger.verbose('Local SE list is: %s' % (localSEs)) groupSEs = gConfig.getValue('/Resources/StorageElementGroups/' + outputSE, []) gLogger.verbose('Group SE list is: %s' % (groupSEs)) if not groupSEs: return S_ERROR('Failed to resolve SE ' + outputSE) if outputmode.lower() == "local": for se in localSEs: if se in groupSEs: gLogger.info('Found eligible local SE: %s' % (se)) return S_OK([se]) #check if country is already one with associated SEs associatedSE = gConfig.getValue( '/Resources/Countries/%s/AssociatedSEs/%s' % (country, outputSE), '') if associatedSE: gLogger.info( 'Found associated SE %s in /Resources/Countries/%s/AssociatedSEs/%s' % (associatedSE, country, outputSE)) return S_OK([associatedSE]) # Final check for country associated SE count = 0 assignedCountry = country while count < 10: gLogger.verbose('Loop count = %s' % (count)) gLogger.verbose("/Resources/Countries/%s/AssignedTo" % assignedCountry) opt = gConfig.getOption("/Resources/Countries/%s/AssignedTo" % assignedCountry) if opt['OK'] and opt['Value']: assignedCountry = opt['Value'] gLogger.verbose('/Resources/Countries/%s/AssociatedSEs' % assignedCountry) assocCheck = gConfig.getOption( '/Resources/Countries/%s/AssociatedSEs' % assignedCountry) if assocCheck['OK'] and assocCheck['Value']: break count += 1 if not assignedCountry: return S_ERROR('Could not determine associated SE list for %s' % country) alias_se = gConfig.getValue( '/Resources/Countries/%s/AssociatedSEs/%s' % (assignedCountry, outputSE), []) if alias_se: gLogger.info('Found alias SE for site: %s' % alias_se) return S_OK(alias_se) else: gLogger.error( 'Could not establish alias SE for country %s from section: /Resources/Countries/%s/AssociatedSEs/%s' % (country, assignedCountry, outputSE)) return S_ERROR('Failed to resolve SE ' + outputSE) # For collective Any and All modes return the whole group # Make sure that local SEs are passing first newSEList = [] for se in groupSEs: if se in localSEs: newSEList.append(se) uniqueSEs = uniqueElements(newSEList + groupSEs) gLogger.verbose('Found unique SEs: %s' % (uniqueSEs)) return S_OK(uniqueSEs)