def checkSourceSE( self, sourceSE, lfn, catalogMetadata ): """ filter out SourceSE where PFN is not existing :param self: self reference :param str lfn: logical file name :param dict catalogMetadata: catalog metadata """ se = self.seCache.get( sourceSE, None ) if not se: se = StorageElement( sourceSE, "SRM2" ) self.seCache[sourceSE] = se pfn = se.getPfnForLfn( lfn ) if not pfn["OK"]: self.log.warn("checkSourceSE: unable to create pfn for %s lfn: %s" % ( lfn, pfn["Message"] ) ) return pfn pfn = pfn["Value"] seMetadata = se.getFileMetadata( pfn, singleFile=True ) if not seMetadata["OK"]: self.log.warn("checkSourceSE: %s" % seMetadata["Message"] ) return S_ERROR("checkSourceSE: failed to get metadata") seMetadata = seMetadata["Value"] catalogChecksum = catalogMetadata["Checksum"].replace("x", "0" ).zfill(8) if "Checksum" in catalogMetadata else None storageChecksum = seMetadata["Checksum"].replace("x", "0").zfill(8) if "Checksum" in seMetadata else None if catalogChecksum != storageChecksum: self.log.warn( "checkSourceSE: %s checksum mismatch catalogue:%s %s:%s" % ( lfn, catalogChecksum, sourceSE, storageChecksum ) ) return S_ERROR("checkSourceSE: checksum mismatch") ## if we're here everything is OK return S_OK()
def finalize( self ): """ register successfully transferred files """ if self.Status not in FTSJob.FINALSTATES: return S_OK() targetSE = StorageElement( self.TargetSE ) toRegister = [ ftsFile for ftsFile in self if ftsFile.Status == "Finished" ] toRegisterDict = {} for ftsFile in toRegister: pfn = targetSE.getPfnForProtocol( ftsFile.TargetSURL, "SRM2", withPort = False ) if not pfn["OK"]: continue pfn = pfn["Value"] toRegisterDict[ ftsFile.LFN ] = { "PFN": pfn, "SE": self.TargetSE } if toRegisterDict: register = self.replicaManager().addCatalogReplica( toRegisterDict ) if not register["OK"]: for ftsFile in toRegister: ftsFile.Error = "AddCatalogReplicaFailed" print ftsFile.Error return register register = register["Value"] failedFiles = register["Failed"] if "Failed" in register else {} for ftsFile in toRegister: if ftsFile.LFN in failedFiles: ftsFile.Error = "AddCatalogReplicaFailed" print ftsFile.Error return S_OK()
def __getStoragePathExists( self, lfnPaths, storageElement ): gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) ) se = StorageElement( storageElement ) res = se.getPfnForLfn( lfnPaths ) if not res['OK']: gLogger.error( "Failed to get PFNs for LFNs", res['Message'] ) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to obtain PFN from LFN', '%s %s' % ( lfnPath, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to obtain PFNs from LFNs' ) lfnPfns = res['Value']['Successful'] pfnLfns = {} for lfn, pfn in lfnPfns.items(): pfnLfns[pfn] = lfn res = se.exists( pfnLfns ) if not res['OK']: gLogger.error( "Failed to obtain existance of paths", res['Message'] ) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to determine existance of paths' ) pathExists = res['Value']['Successful'] resDict = {} for pfn, exists in pathExists.items(): if exists: resDict[pfnLfns[pfn]] = pfn return S_OK( resDict )
def __transferIfNotRegistered( self, file, transferDict ): result = self.isRegisteredInOutputCatalog( file, transferDict ) if not result[ 'OK' ]: self.log.error( result[ 'Message' ] ) return result #Already registered. Need to delete if result[ 'Value' ]: self.log.info( "Transfer file %s is already registered in the output catalog" % file ) #Delete filePath = os.path.join( transferDict[ 'InputPath' ], file ) if transferDict[ 'InputFC' ] == 'LocalDisk': os.unlink( filePath ) #FIXME: what is inFile supposed to be ?? else: inputFC = FileCatalog( [ transferDict['InputFC'] ] ) replicaDict = inputFC.getReplicas( filePath ) if not replicaDict['OK']: self.log.error( "Error deleting file", replicaDict['Message'] ) elif not inFile in replicaDict['Value']['Successful']: self.log.error( "Error deleting file", replicaDict['Value']['Failed'][inFile] ) else: seList = replicaDict['Value']['Successful'][inFile].keys() for se in seList: se = StorageElement( se ) self.log.info( 'Removing from %s:' % se.name, inFile ) se.removeFile( inFile ) inputFC.removeFile( file ) self.log.info( "File %s deleted from %s" % ( file, transferDict[ 'InputFC' ] ) ) self.__processingFiles.discard( file ) return S_OK( file ) #Do the transfer return self.__retrieveAndUploadFile( file, transferDict )
def finalize( self ): """ register successfully transferred files """ if self.Status not in FTSJob.FINALSTATES: return S_OK() startTime = time.time() targetSE = StorageElement( self.TargetSE ) toRegister = [ ftsFile for ftsFile in self if ftsFile.Status == "Finished" ] toRegisterDict = {} for ftsFile in toRegister: pfn = returnSingleResult( targetSE.getPfnForProtocol( ftsFile.TargetSURL, protocol = "SRM2", withPort = False ) ) if not pfn["OK"]: continue pfn = pfn["Value"] toRegisterDict[ ftsFile.LFN ] = { "PFN": pfn, "SE": self.TargetSE } if toRegisterDict: self._regTotal += len( toRegisterDict ) register = self._fc.addReplica( toRegisterDict ) self._regTime += time.time() - startTime if not register["OK"]: for ftsFile in toRegister: ftsFile.Error = "AddCatalogReplicaFailed" return register register = register["Value"] self._regSuccess += len( register.get( 'Successful', {} ) ) failedFiles = register.get( "Failed", {} ) for ftsFile in toRegister: if ftsFile.LFN in failedFiles: ftsFile.Error = "AddCatalogReplicaFailed" return S_OK()
def getFile(lfn, se=""): dm = DataManager() download_ok = 0 get_active_replicas_ok = False lfn_on_se = False error_msg = "" if se: for i in range(0, 5): result = dm.getActiveReplicas(lfn) if result["OK"] and result["Value"]["Successful"]: get_active_replicas_ok = True lfnReplicas = result["Value"]["Successful"] if se in lfnReplicas[lfn]: lfn_on_se = True break time.sleep(3) print "- Get replicas for %s failed, try again" % lfn if not get_active_replicas_ok: return S_ERROR("Get replicas error: %s" % lfn) if lfn_on_se: se = StorageElement(se) # try 5 times for j in range(0, 5): result = se.getFile(lfn) if result["OK"] and result["Value"]["Successful"] and result["Value"]["Successful"].has_key(lfn): break time.sleep(random.randint(180, 600)) print "- %s getStorageFile(%s) failed, try again" % (lfn, se) if result["OK"]: if result["Value"]["Successful"] and result["Value"]["Successful"].has_key(lfn): download_ok = 1 else: error_msg = "Downloading %s from SE %s error!" % (lfn, se) else: error_msg = result["Message"] else: if se: print 'File %s not found on SE "%s" after %s tries, trying other SE' % (lfn, se, i + 1) # try 5 times for j in range(0, 5): result = dm.getFile(lfn) if result["OK"] and result["Value"]["Successful"] and result["Value"]["Successful"].has_key(lfn): break time.sleep(random.randint(180, 600)) print "- getFile(%s) failed, try again" % lfn if result["OK"]: if result["Value"]["Successful"] and result["Value"]["Successful"].has_key(lfn): download_ok = 2 else: error_msg = "Downloading %s from random SE error!" % lfn else: error_msg = result["Message"] if download_ok: return S_OK({lfn: {"DownloadOK": download_ok, "Retry": j + 1}}) return S_ERROR(error_msg)
def __insertRegisterOperation( self, request, operation, toRegister ): """ add RegisterReplica operation :param Request request: request instance :param Operation transferOp: 'ReplicateAndRegister' operation for this FTSJob :param list toRegister: [ FTSDB.FTSFile, ... ] - files that failed to register """ log = self.log.getSubLogger( "req_%s/%s/registerFiles" % ( request.RequestID, request.RequestName ) ) byTarget = {} for ftsFile in toRegister: if ftsFile.TargetSE not in byTarget: byTarget.setdefault( ftsFile.TargetSE, [] ) byTarget[ftsFile.TargetSE].append( ftsFile ) log.info( "will create %s 'RegisterReplica' operations" % len( byTarget ) ) for target, ftsFileList in byTarget.iteritems(): log.info( "creating 'RegisterReplica' operation for targetSE %s with %s files..." % ( target, len( ftsFileList ) ) ) registerOperation = Operation() registerOperation.Type = "RegisterReplica" registerOperation.Status = "Waiting" registerOperation.TargetSE = target targetSE = StorageElement( target ) for ftsFile in ftsFileList: opFile = File() opFile.LFN = ftsFile.LFN pfn = returnSingleResult( targetSE.getURL( ftsFile.LFN, protocol = self.registrationProtocols ) ) if not pfn["OK"]: continue opFile.PFN = pfn["Value"] registerOperation.addFile( opFile ) request.insertBefore( registerOperation, operation ) return S_OK()
def __removeStorageDirectory( self, directory, storageElement ): """ wipe out all contents from :directory: at :storageElement: :param self: self reference :param str directory: path :param str storageElement: SE name """ self.log.info( 'Removing the contents of %s at %s' % ( directory, storageElement ) ) se = StorageElement( storageElement ) res = returnSingleResult( se.exists( directory ) ) if not res['OK']: self.log.error( "Failed to obtain existance of directory", res['Message'] ) return res exists = res['Value'] if not exists: self.log.info( "The directory %s does not exist at %s " % ( directory, storageElement ) ) return S_OK() res = returnSingleResult( se.removeDirectory( directory, recursive = True ) ) if not res['OK']: self.log.error( "Failed to remove storage directory", res['Message'] ) return res self.log.info( "Successfully removed %d files from %s at %s" % ( res['Value']['FilesRemoved'], directory, storageElement ) ) return S_OK()
def __setRegistrationRequest( self, lfn, targetSE, fileDict, catalog ): """ Sets a registration request :param str lfn: LFN :param list se: list of SE :param list catalog: list of catalogs to use :param dict fileDict: file metadata """ self.log.info( 'Setting registration request for %s at %s.' % ( lfn, targetSE ) ) for cat in catalog: register = Operation() register.Type = "RegisterFile" register.Catalog = cat register.TargetSE = targetSE regFile = File() regFile.LFN = lfn regFile.Checksum = fileDict.get( "Checksum", "" ) regFile.ChecksumType = fileDict.get( "ChecksumType", "" ) regFile.Size = fileDict.get( "Size", 0 ) regFile.GUID = fileDict.get( "GUID", "" ) se = StorageElement( targetSE ) pfn = se.getPfnForLfn( lfn ) if not pfn["OK"]: self.log.error( "unable to get PFN for LFN: %s" % pfn["Message"] ) return pfn regFile.PFN = pfn["Value"] register.addFile( regFile ) self.request.addOperation( register ) return S_OK()
def _filterReplicas( self, opFile ): """ filter out banned/invalid source SEs """ ret = { "Valid" : [], "Banned" : [], "Bad" : [] } replicas = self.replicaManager().getActiveReplicas( opFile.LFN ) if not replicas["OK"]: self.log.error( replicas["Message"] ) reNotExists = re.compile( "not such file or directory" ) replicas = replicas["Value"] failed = replicas["Failed"].get( opFile.LFN , "" ) if reNotExists.match( failed.lower() ): opFile.Status = "Failed" opFile.Error = failed return S_ERROR( failed ) replicas = replicas["Successful"][opFile.LFN] if opFile.LFN in replicas["Successful"] else {} for repSEName in replicas: seRead = self.rssSEStatus( repSEName, "ReadAccess" ) if not seRead["OK"]: self.log.error( seRead["Message"] ) ret["Banned"].append( repSEName ) continue if not seRead["Value"]: self.log.error( "StorageElement '%s' is banned for reading" % ( repSEName ) ) repSE = self.seCache.get( repSEName, None ) if not repSE: repSE = StorageElement( repSEName, "SRM2" ) self.seCache[repSE] = repSE pfn = repSE.getPfnForLfn( opFile.LFN ) if not pfn["OK"]: self.log.warn( "unable to create pfn for %s lfn: %s" % ( opFile.LFN, pfn["Message"] ) ) ret["Banned"].append( repSEName ) continue pfn = pfn["Value"] repSEMetadata = repSE.getFileMetadata( pfn, singleFile = True ) if not repSEMetadata["OK"]: self.log.warn( repSEMetadata["Message"] ) ret["Banned"].append( repSEName ) continue repSEMetadata = repSEMetadata["Value"] seChecksum = repSEMetadata["Checksum"].replace( "x", "0" ).zfill( 8 ) if "Checksum" in repSEMetadata else None if opFile.Checksum and opFile.Checksum != seChecksum: self.log.warn( " %s checksum mismatch: %s %s:%s" % ( opFile.LFN, opFile.Checksum, repSE, seChecksum ) ) ret["Bad"].append( repSEName ) continue # # if we're here repSE is OK ret["Valid"].append( repSEName ) return S_OK( ret )
def __updateSharedSESites( self, jobState, stageSite, stagedLFNs, opData ): siteCandidates = opData[ 'SiteCandidates' ] seStatus = {} result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption( 'VirtualOrganization' ) for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.verbose( "Checking %s for shared SEs" % siteName ) siteData = siteCandidates[ siteName ] result = getSEsForSite( siteName ) if not result[ 'OK' ]: continue closeSEs = result[ 'Value' ] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seObj = StorageElement( seName, vo = vo ) result = seObj.getStatus() if not result['OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) continue seStatus[ seName ] = result[ 'Value' ] # get the SE status from mem and add it if its disk status = seStatus[ seName ] if status['Read'] and status['DiskSE']: diskSEs.append( seName ) self.jobLog.verbose( "Disk SEs for %s are %s" % ( siteName, ", ".join( diskSEs ) ) ) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData['Value']['Value']['Successful'] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[ seName ]: self.jobLog.verbose( "Checking %s for %s" % ( seName, lfn ) ) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[ lfn ]: if siteSE in diskSEs: self.jobLog.verbose( "%s on disk for %s" % ( lfn, siteSE ) ) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose( "Setting LFN to disk for %s" % ( seName ) ) siteData[ 'disk' ] += 1 siteData[ 'tape' ] -= 1 return S_OK()
def __getSEStatus(self, seName): result = self.__SEStatus.get(seName) if result == False: seObj = StorageElement(seName) result = seObj.getStatus() if not result["OK"]: return result self.__SEStatus.add(seName, 600, result) return result
def sendViaDiracStorageElement(zipFile): head, tailzipFile = os.path.split(zipFile) from DIRAC.Core.Base.Script import parseCommandLine, initialize initialize(ignoreErrors = True, enableCommandLine = False) from DIRAC.Resources.Storage.StorageElement import StorageElement statSE = StorageElement(diracStorageElementName) log = statSE.putFile({ os.path.join(diracStorageElementFolder, tailzipFile) : zipFile}) logger.info('{0}'.format(log))
def __updateSharedSESites(self, jobState, stageSite, stagedLFNs, opData): siteCandidates = opData["SiteCandidates"] seStatus = {} for siteName in siteCandidates: if siteName == stageSite: continue self.jobLog.verbose("Checking %s for shared SEs" % siteName) siteData = siteCandidates[siteName] result = getSEsForSite(siteName) if not result["OK"]: continue closeSEs = result["Value"] diskSEs = [] for seName in closeSEs: # If we don't have the SE status get it and store it if seName not in seStatus: seObj = StorageElement(seName) result = seObj.getStatus() if not result["OK"]: self.jobLog.error("Cannot retrieve SE %s status: %s" % (seName, result["Message"])) continue seStatus[seName] = result["Value"] # get the SE status from mem and add it if its disk status = seStatus[seName] if status["Read"] and status["DiskSE"]: diskSEs.append(seName) self.jobLog.verbose("Disk SEs for %s are %s" % (siteName, ", ".join(diskSEs))) # Hell again to the dev of this crappy value of value of successful of ... lfnData = opData["Value"]["Value"]["Successful"] for seName in stagedLFNs: # If the SE is not close then skip it if seName not in closeSEs: continue for lfn in stagedLFNs[seName]: self.jobLog.verbose("Checking %s for %s" % (seName, lfn)) # I'm pretty sure that this cannot happen :P if lfn not in lfnData: continue # Check if it's already on disk at the site onDisk = False for siteSE in lfnData[lfn]: if siteSE in diskSEs: self.jobLog.verbose("%s on disk for %s" % (lfn, siteSE)) onDisk = True # If not on disk, then update! if not onDisk: self.jobLog.verbose("Setting LFN to disk for %s" % (seName)) siteData["disk"] += 1 siteData["tape"] -= 1 return S_OK()
def __call__( self ): """ reTransfer operation execution """ # # list of targetSEs targetSEs = self.operation.targetSEList # # get waiting files waitingFiles = self.getWaitingFilesList() # # prepare waiting files toRetransfer = dict( [ ( opFile.PFN, opFile ) for opFile in waitingFiles ] ) gMonitor.addMark( "FileReTransferAtt", len( toRetransfer ) ) if len( targetSEs ) != 1: error = "only one TargetSE allowed, got %s" % len( targetSEs ) for opFile in toRetransfer.values(): opFile.Error = error opFile.Status = "Failed" self.operation.Error = error gMonitor.addMark( "FileReTransferFail", len( toRetransfer ) ) return S_ERROR( error ) # # check targetSEs for removal targetSE = targetSEs[0] bannedTargets = self.checkSEsRSS( targetSE ) if not bannedTargets['OK']: gMonitor.addMark( "FileReTransferAtt" ) gMonitor.addMark( "FileReTransferFail" ) return bannedTargets if bannedTargets['Value']: return S_OK( "%s targets are banned for writing" % ",".join( bannedTargets['Value'] ) ) se = StorageElement( targetSE ) for opFile in toRetransfer.values(): reTransfer = se.retransferOnlineFile( opFile.PFN ) if not reTransfer["OK"]: opFile.Error = reTransfer["Message"] self.log.error( "%s retransfer failed: %s" % opFile.Error ) gMonitor.addMark( "FileReTransferFail", 1 ) continue reTransfer = reTransfer["Value"] if opFile.PFN in reTransfer["Failed"]: opFile.Error = reTransfer["Failed"][opFile.PFN] self.log.error( "%s retransfer failed: %s" % opFile.Error ) gMonitor.addMark( "FileReTransferFail", 1 ) continue opFile.Status = "Done" self.log.info( "%s retransfer done" % opFile.LFN ) gMonitor.addMark( "FileReTransferOK", 1 ) return S_OK()
def doNew(self, masterParams=None): """ Gets the total and the free disk space of a DIPS storage element that is found in the CS and inserts the results in the SpaceTokenOccupancyCache table of ResourceManagementDB database. """ if masterParams is not None: elementName = masterParams else: elementName = self._prepareCommand() if not elementName["OK"]: return elementName se = StorageElement(elementName) elementURL = se.getStorageParameters(protocol="dips") if elementURL["OK"]: elementURL = se.getStorageParameters(protocol="dips")["Value"]["URLBase"] else: gLogger.verbose("Not a DIPS storage element, skipping...") return S_OK() self.rpc = RPCClient(elementURL, timeout=120) free = self.rpc.getFreeDiskSpace("/") if not free["OK"]: return free free = free["Value"] total = self.rpc.getTotalDiskSpace("/") if not total["OK"]: return total total = total["Value"] if free and free < 1: free = 1 if total and total < 1: total = 1 result = self.rsClient.addOrModifySpaceTokenOccupancyCache( endpoint=elementURL, lastCheckTime=datetime.utcnow(), free=free, total=total, token=elementName ) if not result["OK"]: return result return S_OK()
def finalize( self ): """ register successfully transferred files """ if self.Status not in FTSJob.FINALSTATES: return S_OK() if not len( self ): return S_ERROR( "Empty job in finalize" ) startTime = time.time() targetSE = StorageElement( self.TargetSE ) toRegister = [ ftsFile for ftsFile in self if ftsFile.Status == "Finished" ] toRegisterDict = {} for ftsFile in toRegister: pfn = returnSingleResult( targetSE.getURL( ftsFile.LFN, protocol = 'srm' ) ) if pfn["OK"]: pfn = pfn["Value"] toRegisterDict[ ftsFile.LFN ] = { "PFN": pfn, "SE": self.TargetSE } else: self._log.error( "Error getting SRM URL", pfn['Message'] ) if toRegisterDict: self._regTotal += len( toRegisterDict ) register = self._fc.addReplica( toRegisterDict ) self._regTime += time.time() - startTime if not register["OK"]: self._log.error( 'Error registering replica', register['Message'] ) for ftsFile in toRegister: ftsFile.Error = "AddCatalogReplicaFailed" return register register = register["Value"] self._regSuccess += len( register.get( 'Successful', {} ) ) if self._regSuccess: self._log.info( 'Successfully registered %d replicas' % self._regSuccess ) failedFiles = register.get( "Failed", {} ) errorReason = {} for lfn, reason in failedFiles.items(): errorReason.setdefault( str( reason ), [] ).append( lfn ) for reason in errorReason: self._log.error( 'Error registering %d replicas' % len( errorReason[reason] ), reason ) for ftsFile in toRegister: if ftsFile.LFN in failedFiles: ftsFile.Error = "AddCatalogReplicaFailed" else: statuses = set( [ftsFile.Status for ftsFile in self] ) self._log.warn( "No replicas to register for FTSJob (%s) - Files status: '%s'" % \ ( self.Status, ','.join( sorted( statuses ) ) ) ) return S_OK()
def __updateOtherSites( self, job, stagingSite, stagedLFNsPerSE, optInfo ): """ Update Optimizer Info for other sites for which the SE on which we have staged Files are declared local """ updated = False seDict = {} for site, siteDict in optInfo['SiteCandidates'].items(): if stagingSite == site: continue closeSEs = getSEsForSite( site ) if not closeSEs['OK']: continue closeSEs = closeSEs['Value'] siteDiskSEs = [] for se in closeSEs: if se not in seDict: try: storageElement = StorageElement( se ) seDict[se] = storageElement.getStatus()['Value'] except Exception: self.log.exception( 'Failed to instantiate StorageElement( %s )' % se ) continue seStatus = seDict[se] if seStatus['Read'] and seStatus['DiskSE']: siteDiskSEs.append( se ) for lfn, replicas in optInfo['Value']['Value']['Successful'].items(): for stageSE, stageLFNs in stagedLFNsPerSE.items(): if lfn in stageLFNs and stageSE in closeSEs: # The LFN has been staged, we need to check now if this SE is close # to the Site and if the LFN was not already on a Disk SE at the Site isOnDisk = False for se in replicas: if se in siteDiskSEs: isOnDisk = True if not isOnDisk: # This is updating optInfo updated = True siteDict['disk'] += 1 siteDict['tape'] -= 1 break if updated: self.log.verbose( 'Updating %s Optimizer Info for Job %s:' % ( self.dataAgentName, job ), optInfo ) self.setOptimizerJobInfo( job, self.dataAgentName, optInfo )
class StorageElementTestCase( unittest.TestCase ): """ Base class for the StorageElement test cases """ def setUp( self ): self.numberOfFiles = 1 self.storageElement = StorageElement( storageElementToTest ) self.localSourceFile = "/etc/group" self.localFileSize = getSize( self.localSourceFile ) self.destDirectory = "/lhcb/test/unit-test/TestStorageElement" destinationDir = self.storageElement.getPfnForLfn( self.destDirectory )['Value'] res = self.storageElement.createDirectory( destinationDir, singleDirectory = True ) self.assert_( res['OK'] ) def tearDown( self ): destinationDir = self.storageElement.getPfnForLfn( self.destDirectory )['Value'] res = self.storageElement.removeDirectory( destinationDir, recursive = True, singleDirectory = True ) self.assert_( res['OK'] )
def _getSEParameters( seName ): se = StorageElement( seName, hideExceptions = True ) pluginsList = se.getPlugins() if not pluginsList['OK']: gLogger.warn( pluginsList['Message'], "SE: %s" % seName ) return pluginsList pluginsList = pluginsList['Value'] if 'SRM2' in pluginsList: pluginsList.remove( 'SRM2' ) pluginsList.insert( 0, 'SRM2' ) for plugin in pluginsList: seParameters = se.getStorageParameters( plugin ) if seParameters['OK']: break return seParameters
def setUp( self ): self.numberOfFiles = 1 self.storageElement = StorageElement( storageElementToTest ) self.localSourceFile = "/etc/group" self.localFileSize = getSize( self.localSourceFile ) self.destDirectory = "/lhcb/test/unit-test/TestStorageElement" destinationDir = self.storageElement.getPfnForLfn( self.destDirectory )['Value'] res = self.storageElement.createDirectory( destinationDir, singleDirectory = True ) self.assert_( res['OK'] )
class StorageElementTestCase( unittest.TestCase ): """ Base class for the StorageElement test cases """ def setUp( self ): self.numberOfFiles = 1 self.storageElement = StorageElement( storageElementToTest ) self.localSourceFile = fileToTest self.localFileSize = getSize( self.localSourceFile ) self.destDirectory = lfnDirToTest # destinationDir = returnSingleResult( self.storageElement.getURL( self.destDirectory ) )['Value'] destinationDir = self.destDirectory res = self.storageElement.createDirectory( destinationDir ) self.assert_( res['OK'] ) def tearDown( self ): # destinationDir = returnSingleResult( self.storageElement.getURL( self.destDirectory ) )['Value'] res = self.storageElement.removeDirectory( self.destDirectory, recursive = True ) self.assert_( res['OK'] )
def __fetchSpaceToken(seName): """ Fetch the space token of storage element :param seName name of the storageElement :returns space token """ seToken = None if seName: seObj = StorageElement(seName) res = seObj.getStorageParameters(protocol='srm') if not res['OK']: return res seToken = res["Value"].get("SpaceToken") return S_OK(seToken)
def doNew(self, masterParams=None): """ Gets the parameters to run, either from the master method or from its own arguments. Gets the total and the free disk space of a storage element and inserts the results in the SpaceTokenOccupancyCache table of ResourceManagementDB database. The result is also returned to the caller, not only inserted. What is inserted in the DB will normally be in MB, what is returned will be in the specified unit. """ if masterParams is not None: elementName, unit = masterParams else: params = self._prepareCommand() if not params['OK']: return params elementName, unit = params['Value'] endpointResult = CSHelpers.getStorageElementEndpoint(elementName) if not endpointResult['OK']: return endpointResult se = StorageElement(elementName) occupancyResult = se.getOccupancy(unit=unit) if not occupancyResult['OK']: return occupancyResult occupancy = occupancyResult['Value'] free = occupancy['Free'] total = occupancy['Total'] results = {'Endpoint': endpointResult['Value'], 'Free': free, 'Total': total, 'ElementName': elementName} result = self._storeCommand(results) if not result['OK']: return result return S_OK({'Free': free, 'Total': total})
def __getStoragePathExists( self, lfnPaths, storageElement ): gLogger.info( 'Determining the existance of %d files at %s' % ( len( lfnPaths ), storageElement ) ) se = StorageElement( storageElement ) res = se.exists( lfnPaths ) if not res['OK']: gLogger.error( "Failed to obtain existance of paths", res['Message'] ) return res for lfnPath, error in res['Value']['Failed'].items(): gLogger.error( 'Failed to determine existance of path', '%s %s' % ( lfnPath, error ) ) if res['Value']['Failed']: return S_ERROR( 'Failed to determine existance of paths' ) pathExists = res['Value']['Successful'] resDict = {} for lfn, exists in pathExists.items(): if exists: resDict[lfn] = True return S_OK( resDict )
def setUp( self ): self.numberOfFiles = 1 self.storageElement = StorageElement( storageElementToTest ) self.localSourceFile = fileToTest self.localFileSize = getSize( self.localSourceFile ) self.destDirectory = lfnDirToTest # destinationDir = returnSingleResult( self.storageElement.getURL( self.destDirectory ) )['Value'] destinationDir = self.destDirectory res = self.storageElement.createDirectory( destinationDir ) self.assert_( res['OK'] )
def setTargetSE( self, se ): """ set target SE :param self: self reference :param str se: target SE name """ if se == self.sourceSE: return S_ERROR( "TargetSE is SourceSE" ) self.targetSE = se self.oTargetSE = StorageElement( self.targetSE ) return self.__checkTargetSE()
def removeStorageDirectoryFromSE( directory, storageElement ): """ Delete directory on selected storage element """ se = StorageElement( storageElement, False ) res = returnSingleResult( se.exists( directory ) ) if not res['OK']: return S_ERROR( "Failed to obtain existence of directory" + res['Message'] ) exists = res['Value'] if not exists: return S_OK( "The directory %s does not exist at %s " % ( directory, storageElement ) ) res = returnSingleResult( se.removeDirectory( directory, recursive = True ) ) if not res['OK']: return S_ERROR( "Failed to remove storage directory" + res['Message'] ) return S_OK()
def __generateLocation( self, sbPath ): """ Generate the location string """ if self.__useLocalStorage: return S_OK( ( self.__localSEName, sbPath ) ) #It's external storage storageElement = StorageElement( self.__externalSEName ) res = storageElement.isValid() if not res['OK']: errStr = "Failed to instantiate destination StorageElement" gLogger.error( errStr, self.__externalSEName ) return S_ERROR( errStr ) result = storageElement.getPfnForLfn( sbPath ) if not result['OK']: errStr = "Failed to generate PFN" gLogger.error( errStr, self.__externalSEName ) return S_ERROR( errStr ) destPfn = result['Value'] return S_OK( ( self.__externalSEName, destPfn ) )
def _getSEParameters( seName ): se = StorageElement( seName, hideExceptions = True ) pluginsList = se.getPlugins() if not pluginsList['OK']: gLogger.warn( pluginsList['Message'], "SE: %s" % seName ) return pluginsList pluginsList = pluginsList['Value'] # Put the srm capable protocol first, but why doing that is a # mystery that will eventually need to be sorted out... for plugin in ( 'GFAL2_SRM2', 'SRM2' ): if plugin in pluginsList: pluginsList.remove( plugin ) pluginsList.insert( 0, plugin ) for plugin in pluginsList: seParameters = se.getStorageParameters( plugin ) if seParameters['OK']: break return seParameters
def setUp(self): basicTest.setUp(self) self.tbt = StorageElement(self.storageName, protocols='GFAL2_XROOT') basicTest.clearDirectory(self)
def __checkIntegrity(self, storageElement, seReplicaIDs, allReplicaInfo): """Check the integrity of the files to ensure they are available Updates status of Offline Replicas for a later pass Return list of Online replicas to be Stage """ if not seReplicaIDs: return S_OK({"Online": [], "Offline": []}) # Since we are with a given SE, the LFN is a unique key lfnRepIDs = {} for replicaID in seReplicaIDs: lfn = allReplicaInfo[replicaID]["LFN"] lfnRepIDs[lfn] = replicaID gLogger.info( "StageRequest.__checkIntegrity: Checking the integrity of %s replicas at %s." % (len(lfnRepIDs), storageElement)) res = StorageElement(storageElement).getFileMetadata(lfnRepIDs) if not res["OK"]: gLogger.error( "StageRequest.__checkIntegrity: Completely failed to obtain metadata for replicas.", res["Message"]) return res terminalReplicaIDs = {} onlineReplicaIDs = [] offlineReplicaIDs = [] for lfn, metadata in res["Value"]["Successful"].items(): if metadata["Size"] != allReplicaInfo[lfnRepIDs[lfn]]["Size"]: gLogger.error( "StageRequest.__checkIntegrity: LFN StorageElement size does not match FileCatalog", lfn) terminalReplicaIDs[lfnRepIDs[ lfn]] = "LFN StorageElement size does not match FileCatalog" lfnRepIDs.pop(lfn) elif metadata.get("Lost", False): gLogger.error( "StageRequest.__checkIntegrity: LFN has been Lost by the StorageElement", lfn) terminalReplicaIDs[ lfnRepIDs[lfn]] = "LFN has been Lost by the StorageElement" lfnRepIDs.pop(lfn) elif metadata.get("Unavailable", False): gLogger.error( "StageRequest.__checkIntegrity: LFN is declared Unavailable by the StorageElement", lfn) terminalReplicaIDs[lfnRepIDs[ lfn]] = "LFN is declared Unavailable by the StorageElement" lfnRepIDs.pop(lfn) elif metadata.get("Cached", metadata["Accessible"]): gLogger.verbose( "StageRequest.__checkIntegrity: Cache hit for file.") onlineReplicaIDs.append(lfnRepIDs[lfn]) else: offlineReplicaIDs.append(lfnRepIDs[lfn]) for lfn, reason in res["Value"]["Failed"].items(): if re.search("File does not exist", reason): gLogger.error( "StageRequest.__checkIntegrity: LFN does not exist in the StorageElement", lfn) terminalReplicaIDs[lfnRepIDs[ lfn]] = "LFN does not exist in the StorageElement" lfnRepIDs.pop(lfn) # Update the states of the replicas in the database #TODO Sent status to integrity DB if terminalReplicaIDs: gLogger.info( "StageRequest.__checkIntegrity: %s replicas are terminally failed." % len(terminalReplicaIDs)) res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs) if not res["OK"]: gLogger.error( "StageRequest.__checkIntegrity: Failed to update replica failures.", res["Message"]) if onlineReplicaIDs: gLogger.info( "StageRequest.__checkIntegrity: %s replicas found Online." % len(onlineReplicaIDs)) if offlineReplicaIDs: gLogger.info( "StageRequest.__checkIntegrity: %s replicas found Offline." % len(offlineReplicaIDs)) res = self.stagerClient.updateReplicaStatus( offlineReplicaIDs, "Offline") return S_OK({"Online": onlineReplicaIDs, "Offline": offlineReplicaIDs})
def getFilesToStage(lfnList): """ Utility that returns out of a list of LFNs those files that are offline, and those for which at least one copy is online """ onlineLFNs = set() offlineLFNsDict = {} if not lfnList: return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict }) dm = DataManager() lfnListReplicas = dm.getActiveReplicas(lfnList, getUrl=False) if not lfnListReplicas['OK']: return lfnListReplicas seToLFNs = dict() if lfnListReplicas['Value']['Failed']: return S_ERROR("Failures in getting replicas") for lfn, ld in lfnListReplicas['Value']['Successful'].iteritems(): for se in ld: seToLFNs.setdefault(se, list()).append(lfn) failed = {} for se, lfnsInSEList in seToLFNs.iteritems(): fileMetadata = StorageElement(se).getFileMetadata(lfnsInSEList) if not fileMetadata['OK']: failed[se] = dict.fromkeys(lfnsInSEList, fileMetadata['Message']) else: if fileMetadata['Value']['Failed']: failed[se] = fileMetadata['Value']['Failed'] # is there at least one online? for lfn, mDict in fileMetadata['Value']['Successful'].iteritems(): if 'Cached' not in mDict: failed.setdefault( se, {})[lfn] = 'No Cached item returned as metadata' elif mDict['Cached']: onlineLFNs.add(lfn) # If the file was found staged, ignore possible errors, but print out errors for se, seFailed in failed.items(): gLogger.error("Errors when getting files metadata", 'at %s' % se) for lfn, reason in seFailed.items(): gLogger.info('%s: %s' % (lfn, reason)) if lfn in onlineLFNs: failed[se].pop(lfn) if not failed[se]: failed.pop(se) if failed: gLogger.error( "Could not get metadata", "for %d files" % len(set([lfn for lfnList in failed.values() for lfn in lfnList]))) return S_ERROR("Could not get metadata for files") offlineLFNs = set(lfnList) - onlineLFNs for offlineLFN in offlineLFNs: ses = lfnListReplicas['Value']['Successful'][offlineLFN].keys() random.shuffle(ses) se = ses[0] offlineLFNsDict.setdefault(se, list()).append(offlineLFN) return S_OK({ 'onlineLFNs': list(onlineLFNs), 'offlineLFNs': offlineLFNsDict })
def __preRequestStaging(self, jobManifest, stageSite, opData): tapeSEs = [] diskSEs = [] vo = jobManifest.getOption("VirtualOrganization") inputDataPolicy = jobManifest.getOption("InputDataPolicy", "Protocol") connectionLevel = "DOWNLOAD" if "download" in inputDataPolicy.lower( ) else "PROTOCOL" # Allow staging from SEs accessible by protocol result = DMSHelpers(vo=vo).getSEsForSite( stageSite, connectionLevel=connectionLevel) if not result["OK"]: return S_ERROR("Could not determine SEs for site %s" % stageSite) siteSEs = result["Value"] for seName in siteSEs: se = StorageElement(seName, vo=vo) seStatus = se.getStatus() if not seStatus["OK"]: return seStatus seStatus = seStatus["Value"] if seStatus["Read"] and seStatus["TapeSE"]: tapeSEs.append(seName) if seStatus["Read"] and seStatus["DiskSE"]: diskSEs.append(seName) if not tapeSEs: return S_ERROR("No Local SEs for site %s" % stageSite) self.jobLog.debug("Tape SEs are %s" % (", ".join(tapeSEs))) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData["Value"]["Value"]["Successful"] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[lfn] # Check SEs seStage = [] for seName in replicas: if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append(seName) for seName in seStage: if seName not in stageLFNs: stageLFNs[seName] = [] stageLFNs[seName].append(lfn) if lfn not in lfnToStage: lfnToStage.append(lfn) if not stageLFNs: return S_ERROR("Cannot find tape replicas") # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted([(len(stageLFNs[seName]), seName) for seName in stageLFNs])) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[seName]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[seName].remove(lfn) else: found = True # 4.-If empty SE, remove if not stageLFNs[seName]: stageLFNs.pop(seName) return S_OK(stageLFNs)
if registerOption in ['both', 'dark'] and deletionSwitch: gLogger.error( "Error: '-r dark/both' and '-e' cannot be used at the same time, please use '-r dark' when deleting" ) exit(1) if registerOption in ['both', 'dark'] and srmSwitch: gLogger.error( "Error: '-r dark/both' and '-m' cannot be used at the same time, please use '-r unlinked' when using srmmode" ) exit(1) from DIRAC.DataManagementSystem.Client.DataManager import DataManager dm = DataManager() from DIRAC.Resources.Storage.StorageElement import StorageElement se = StorageElement(SITE) # initial list protocols = ['root', 'http', 'gsiftp', 'srm'] fileList = [] fileDFCList = [] fileDarkList = [] fileUnlinkedList = [] inputFileList = [] if inputSwitch: inputFile = open(INPUT, 'r') tmpList = inputFile.readlines() for i in tmpList: inputFileList.append(i.rstrip('\n').rstrip('/')) else:
def _callback(self): """" After a Transfer operation, we have to update the matching Request in the RMS, and add the registration operation just before the ReplicateAndRegister one NOTE: we don't use ReqProxy when putting the request back to avoid operational hell """ log = self._log.getSubLogger("callback", child=True) # In case there is no Request associated to the Transfer # we do not do the callback. Not really advised, but there is a feature # request to use the FTS3 system without RMS if self.rmsReqID == -1: return S_OK() # Now we check the status of the Request. # If it is not scheduled, we do not perform the callback res = self.reqClient.getRequestStatus(self.rmsReqID) if not res['OK']: log.error("Could not get request status", res) return res status = res['Value'] if status != 'Scheduled': return S_ERROR("Request with id %s is not Scheduled:%s" % (self.rmsReqID, status)) res = self._updateRmsOperationStatus() if not res['OK']: return res ftsFilesByTarget = res['Value']['ftsFilesByTarget'] request = res['Value']['request'] operation = res['Value']['operation'] log.info("will create %s 'RegisterReplica' operations" % len(ftsFilesByTarget)) for target, ftsFileList in ftsFilesByTarget.iteritems(): log.info( "creating 'RegisterReplica' operation for targetSE %s with %s files..." % (target, len(ftsFileList))) registerOperation = rmsOperation() registerOperation.Type = "RegisterReplica" registerOperation.Status = "Waiting" registerOperation.TargetSE = target if operation.Catalog: registerOperation.Catalog = operation.Catalog targetSE = StorageElement(target, vo=self.vo) for ftsFile in ftsFileList: opFile = rmsFile() opFile.LFN = ftsFile.lfn opFile.Checksum = ftsFile.checksum # TODO: are we really ever going to change type... ? opFile.ChecksumType = 'ADLER32' opFile.Size = ftsFile.size res = returnSingleResult( targetSE.getURL(ftsFile.lfn, protocol='srm')) # This should never happen ! if not res["OK"]: log.error("Could not get url", res['Message']) continue opFile.PFN = res["Value"] registerOperation.addFile(opFile) request.insertBefore(registerOperation, operation) return self.reqClient.putRequest(request, useFailoverProxy=False, retryMainService=3)
def resolveCatalogPFNSizeMismatch(self, problematicDict): """ This takes the problematic dictionary returned by the integrity DB and resolved the CatalogPFNSizeMismatch prognosis """ lfn = problematicDict['LFN'] se = problematicDict['SE'] fileID = problematicDict['FileID'] res = returnSingleResult(self.fc.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) catalogSize = res['Value'] res = returnSingleResult(StorageElement(se).getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) storageSize = res['Value'] bkKCatalog = FileCatalog(['BookkeepingDB']) res = returnSingleResult(bkKCatalog.getFileSize(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) bookkeepingSize = res['Value'] if bookkeepingSize == catalogSize == storageSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) matched all registered sizes." % fileID) return self.__updateReplicaToChecked(problematicDict) if catalogSize == bookkeepingSize: gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to mismatch the bookkeeping also" % fileID) res = returnSingleResult(self.fc.getReplicas(lfn)) if not res['OK']: return self.__returnProblematicError(fileID, res) if len(res['Value']) <= 1: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has no other replicas." % fileID) return S_ERROR( "Not removing catalog file mismatch since the only replica" ) else: gLogger.info( "CatalogPFNSizeMismatch replica (%d) has other replicas. Removing..." % fileID) res = self.dm.removeReplica(se, lfn) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.__updateCompletedFiles('CatalogPFNSizeMismatch', fileID) if (catalogSize != bookkeepingSize) and (bookkeepingSize == storageSize): gLogger.info( "CatalogPFNSizeMismatch replica (%d) found to match the bookkeeping size" % fileID) res = self.__updateReplicaToChecked(problematicDict) if not res['OK']: return self.__returnProblematicError(fileID, res) return self.changeProblematicPrognosis(fileID, 'BKCatalogSizeMismatch') gLogger.info( "CatalogPFNSizeMismatch replica (%d) all sizes found mismatch. Updating retry count" % fileID) return self.incrementProblematicRetry(fileID)
def __preRequestStaging( self, jobState, stageSite, opData ): from DIRAC.DataManagementSystem.Utilities.DMSHelpers import DMSHelpers tapeSEs = [] diskSEs = [] result = jobState.getManifest() if not result['OK']: return result manifest = result['Value'] vo = manifest.getOption( 'VirtualOrganization' ) inputDataPolicy = manifest.getOption( 'InputDataPolicy', 'Protocol' ) connectionLevel = 'DOWNLOAD' if 'download' in inputDataPolicy.lower() else 'PROTOCOL' # Allow staging from SEs accessible by protocol result = DMSHelpers( vo = vo ).getSEsForSite( stageSite, connectionLevel = connectionLevel ) if not result['OK']: return S_ERROR( 'Could not determine SEs for site %s' % stageSite ) siteSEs = result['Value'] for seName in siteSEs: se = StorageElement( seName, vo = vo ) result = se.getStatus() if not result[ 'OK' ]: self.jobLog.error( "Cannot retrieve SE %s status: %s" % ( seName, result[ 'Message' ] ) ) return S_ERROR( "Cannot retrieve SE status" ) seStatus = result[ 'Value' ] if seStatus[ 'Read' ] and seStatus[ 'TapeSE' ]: tapeSEs.append( seName ) if seStatus[ 'Read' ] and seStatus[ 'DiskSE' ]: diskSEs.append( seName ) if not tapeSEs: return S_ERROR( "No Local SEs for site %s" % stageSite ) self.jobLog.verbose( "Tape SEs are %s" % ( ", ".join( tapeSEs ) ) ) # I swear this is horrible DM code it's not mine. # Eternity of hell to the inventor of the Value of Value of Success of... inputData = opData['Value']['Value']['Successful'] stageLFNs = {} lfnToStage = [] for lfn in inputData: replicas = inputData[ lfn ] # Check SEs seStage = [] for seName in replicas: if seName in diskSEs: # This lfn is in disk. Skip it seStage = [] break if seName not in tapeSEs: # This lfn is not in this tape SE. Check next SE continue seStage.append( seName ) for seName in seStage: if seName not in stageLFNs: stageLFNs[ seName ] = [] stageLFNs[ seName ].append( lfn ) if lfn not in lfnToStage: lfnToStage.append( lfn ) if not stageLFNs: return S_ERROR( "Cannot find tape replicas" ) # Check if any LFN is in more than one SE # If that's the case, try to stage from the SE that has more LFNs to stage to group the request # 1.- Get the SEs ordered by ascending replicas sortedSEs = reversed( sorted( [ ( len( stageLFNs[ seName ] ), seName ) for seName in stageLFNs.keys() ] ) ) for lfn in lfnToStage: found = False # 2.- Traverse the SEs for _stageCount, seName in sortedSEs: if lfn in stageLFNs[ seName ]: # 3.- If first time found, just mark as found. Next time delete the replica from the request if found: stageLFNs[ seName ].remove( lfn ) else: found = True # 4.-If empty SE, remove if len( stageLFNs[ seName ] ) == 0: stageLFNs.pop( seName ) return S_OK( stageLFNs )
def __getSiteCandidates(self, okReplicas, vo): """ This method returns a list of possible site candidates based on the job input data requirement. For each site candidate, the number of files on disk and tape is resolved. """ lfnSEs = {} for lfn in okReplicas: replicas = okReplicas[lfn] siteSet = set() for seName in replicas: result = self.__getSitesForSE(seName) if not result['OK']: self.jobLog.warn("Could not get sites for SE %s: %s" % (seName, result['Message'])) return result siteSet.update(result['Value']) lfnSEs[lfn] = siteSet if not lfnSEs: return S_ERROR("No candidate sites available") # This makes an intersection of all sets in the dictionary and returns a set with it siteCandidates = set.intersection(*[lfnSEs[lfn] for lfn in lfnSEs]) if not siteCandidates: return S_ERROR('No candidate sites available') # In addition, check number of files on tape and disk for each site # for optimizations during scheduling sitesData = {} for siteName in siteCandidates: sitesData[siteName] = {'disk': set(), 'tape': set()} # Loop time! seDict = {} for lfn in okReplicas: replicas = okReplicas[lfn] # Check each SE in the replicas for seName in replicas: # If not already "loaded" the add it to the dict if seName not in seDict: result = self.__getSitesForSE(seName) if not result['OK']: self.jobLog.warn("Could not get sites for SE %s: %s" % (seName, result['Message'])) continue siteList = result['Value'] seObj = StorageElement(seName, vo=vo) seStatus = seObj.getStatus() if not seStatus['OK']: return seStatus seDict[seName] = { 'Sites': siteList, 'Status': seStatus['Value'] } # Get SE info from the dict seData = seDict[seName] siteList = seData['Sites'] seStatus = seData['Status'] for siteName in siteList: # If not a candidate site then skip it if siteName not in siteCandidates: continue # Add the LFNs to the disk/tape lists diskLFNs = sitesData[siteName]['disk'] tapeLFNs = sitesData[siteName]['tape'] if seStatus['DiskSE']: # Sets contain only unique elements, no need to check if it's there diskLFNs.add(lfn) if lfn in tapeLFNs: tapeLFNs.remove(lfn) if seStatus['TapeSE']: if lfn not in diskLFNs: tapeLFNs.add(lfn) for siteName in sitesData: sitesData[siteName]['disk'] = len(sitesData[siteName]['disk']) sitesData[siteName]['tape'] = len(sitesData[siteName]['tape']) return S_OK(sitesData)
def execute(self, dataToResolve=None): """This method is called to download the requested files in the case where enough local disk space is available. A buffer is left in this calculation to leave room for any produced files. """ # Define local configuration options present at every site localSESet = set(self.configuration['LocalSEList']) self.jobID = self.configuration.get('JobID') if dataToResolve: self.log.verbose( 'Data to resolve passed directly to DownloadInputData module') self.inputData = dataToResolve # e.g. list supplied by another module self.inputData = sorted( lfn.replace('LFN:', '') for lfn in self.inputData) self.log.info('InputData to be downloaded is:\n%s' % '\n'.join(self.inputData)) replicas = self.fileCatalogResult['Value']['Successful'] # Problematic files will be returned and can be handled by another module failedReplicas = set() # For the case that a file is found on two SEs at the same site # disk-based replicas are favoured. downloadReplicas = {} for lfn, reps in replicas.iteritems(): if lfn not in self.inputData: self.log.verbose( 'LFN %s is not in requested input data to download') failedReplicas.add(lfn) continue if not ('Size' in reps and 'GUID' in reps): self.log.error('Missing LFN metadata', "%s %s" % (lfn, str(reps))) failedReplicas.add(lfn) continue # Get and remove size and GUIS size = reps.pop('Size') guid = reps.pop('GUID') # Remove all other items that are not SEs for item in reps.keys(): if item not in self.availableSEs: reps.pop(item) downloadReplicas[lfn] = {'SE': [], 'Size': size, 'GUID': guid} # First get Disk replicas localReps = set(reps) & localSESet for seName in localReps: seStatus = StorageElement(seName).status() if seStatus['DiskSE'] and seStatus['Read']: downloadReplicas[lfn]['SE'].append(seName) # If no disk replicas, take tape replicas if not downloadReplicas[lfn]['SE']: for seName in localReps: seStatus = StorageElement(seName).status() if seStatus['TapeSE'] and seStatus['Read'] and _isCached( lfn, seName): # Only consider replicas that are cached downloadReplicas[lfn]['SE'].append(seName) totalSize = 0 verbose = self.log.verbose('Replicas to download are:') for lfn, reps in downloadReplicas.iteritems(): self.log.verbose(lfn) if not reps['SE']: self.log.info( 'Failed to find data at local SEs, will try to download from anywhere', lfn) reps['SE'] = '' else: if len(reps['SE']) > 1: # if more than one SE is available randomly select one random.shuffle(reps['SE']) # get SE and pfn from tuple reps['SE'] = reps['SE'][0] totalSize += int(reps.get('Size', 0)) if verbose: for item, value in sorted(reps.items()): if value: self.log.verbose('\t%s %s' % (item, value)) self.log.info('Total size of files to be downloaded is %s bytes' % (totalSize)) for lfn in failedReplicas: self.log.warn( 'Not all file metadata (SE,PFN,Size,GUID) was available for LFN', lfn) # Now need to check that the list of replicas to download fits into # the available disk space. Initially this is a simple check and if there is not # space for all input data, no downloads are attempted. result = self.__checkDiskSpace(totalSize) if not result['OK']: self.log.warn('Problem checking available disk space:\n%s' % (result)) return result if not result['Value']: report = 'Not enough disk space available for download: %s / %s bytes' % ( result['Value'], totalSize) self.log.warn(report) self.__setJobParam(COMPONENT_NAME, report) return S_OK({'Failed': self.inputData, 'Successful': {}}) resolvedData = {} localSECount = 0 for lfn, info in downloadReplicas.iteritems(): seName = info['SE'] guid = info['GUID'] reps = replicas.get(lfn, {}) if seName: result = StorageElement(seName).getFileMetadata(lfn) if not result['OK']: self.log.error("Error getting metadata", result['Message']) failedReplicas.add(lfn) continue if lfn in result['Value']['Failed']: self.log.error( 'Could not get Storage Metadata for %s at %s: %s' % (lfn, seName, result['Value']['Failed'][lfn])) failedReplicas.add(lfn) continue metadata = result['Value']['Successful'][lfn] if metadata.get('Lost', False): error = "PFN has been Lost by the StorageElement" elif metadata.get('Unavailable', False): error = "PFN is declared Unavailable by the StorageElement" elif not metadata.get('Cached', metadata['Accessible']): error = "PFN is no longer in StorageElement Cache" else: error = '' if error: self.log.error(error, lfn) failedReplicas.add(lfn) continue self.log.info('Preliminary checks OK, download %s from %s:' % (lfn, seName)) result = self._downloadFromSE(lfn, seName, reps, guid) if not result['OK']: self.log.error( "Download failed", "Tried downloading from SE %s: %s" % (seName, result['Message'])) else: result = {'OK': False} if not result['OK']: reps.pop(seName, None) # Check the other SEs if reps: self.log.info('Trying to download from any SE') result = self._downloadFromBestSE(lfn, reps, guid) if not result['OK']: self.log.error( "Download from best SE failed", "Tried downloading %s: %s" % (lfn, result['Message'])) failedReplicas.add(lfn) else: failedReplicas.add(lfn) else: localSECount += 1 if result['OK']: # Rename file if downloaded FileName does not match the LFN... How can this happen? lfnName = os.path.basename(lfn) oldPath = result['Value']['path'] fileName = os.path.basename(oldPath) if lfnName != fileName: newPath = os.path.join(os.path.dirname(oldPath), lfnName) os.rename(oldPath, newPath) result['Value']['path'] = newPath resolvedData[lfn] = result['Value'] # Report datasets that could not be downloaded report = '' if resolvedData: report += 'Successfully downloaded %d LFN(s)' % len(resolvedData) if localSECount != len(resolvedData): report += ' (%d from local SEs):\n' % localSECount else: report += ' from local SEs:\n' report += '\n'.join(sorted(resolvedData)) failedReplicas = sorted(failedReplicas.difference(resolvedData)) if failedReplicas: self.log.warn( 'The following LFN(s) could not be downloaded to the WN:\n%s' % 'n'.join(failedReplicas)) report += '\nFailed to download %d LFN(s):\n' % len(failedReplicas) report += '\n'.join(failedReplicas) if report: self.__setJobParam(COMPONENT_NAME, report) return S_OK({'Successful': resolvedData, 'Failed': failedReplicas})
def main(): # Registering arguments will automatically add their description to the help menu Script.registerArgument(" SE: StorageElement|All") Script.registerArgument(["LFN: LFN or file containing a List of LFNs"]) Script.parseCommandLine(ignoreErrors=False) # parseCommandLine show help when mandatory arguments are not specified or incorrect argument args = Script.getPositionalArgs() targetSE = args.pop(0) lfns = [] for inputFileName in args: if os.path.exists(inputFileName): with open(inputFileName, "r") as inputFile: string = inputFile.read() lfns.extend([lfn.strip() for lfn in string.splitlines()]) else: lfns.append(inputFileName) from DIRAC.Resources.Storage.StorageElement import StorageElement import DIRAC # Check is provided SE is OK if targetSE != "All": se = StorageElement(targetSE) if not se.valid: print(se.errorReason) print() Script.showHelp() from DIRAC.RequestManagementSystem.Client.Request import Request from DIRAC.RequestManagementSystem.Client.Operation import Operation from DIRAC.RequestManagementSystem.Client.File import File from DIRAC.RequestManagementSystem.Client.ReqClient import ReqClient from DIRAC.RequestManagementSystem.private.RequestValidator import RequestValidator from DIRAC.Resources.Catalog.FileCatalog import FileCatalog reqClient = ReqClient() fc = FileCatalog() requestOperation = "RemoveReplica" if targetSE == "All": requestOperation = "RemoveFile" for lfnList in breakListIntoChunks(lfns, 100): oRequest = Request() requestName = "%s_%s" % ( md5(repr(time.time()).encode()).hexdigest()[:16], md5(repr(time.time()).encode()).hexdigest()[:16], ) oRequest.RequestName = requestName oOperation = Operation() oOperation.Type = requestOperation oOperation.TargetSE = targetSE res = fc.getFileMetadata(lfnList) if not res["OK"]: print("Can't get file metadata: %s" % res["Message"]) DIRAC.exit(1) if res["Value"]["Failed"]: print( "Could not get the file metadata of the following, so skipping them:" ) for fFile in res["Value"]["Failed"]: print(fFile) lfnMetadata = res["Value"]["Successful"] for lfn in lfnMetadata: rarFile = File() rarFile.LFN = lfn rarFile.Size = lfnMetadata[lfn]["Size"] rarFile.Checksum = lfnMetadata[lfn]["Checksum"] rarFile.GUID = lfnMetadata[lfn]["GUID"] rarFile.ChecksumType = "ADLER32" oOperation.addFile(rarFile) oRequest.addOperation(oOperation) isValid = RequestValidator().validate(oRequest) if not isValid["OK"]: print("Request is not valid: ", isValid["Message"]) DIRAC.exit(1) result = reqClient.putRequest(oRequest) if result["OK"]: print("Request %d Submitted" % result["Value"]) else: print("Failed to submit Request: ", result["Message"])
def setuptest(request): global local_path, putDir, createDir, putFile, isFile, listDir,\ getDir, getFile, rmDir, removeFile, se, filesInFolderAandB, fileAdlers, fileSizes local_path = tempfile.mkdtemp() # create the local structure workPath = os.path.join(local_path, 'Workflow') os.mkdir(workPath) os.mkdir(os.path.join(workPath, 'FolderA')) with open(os.path.join(workPath, 'FolderA', 'FileA'), 'w') as f: f.write(_mul('FileA')) os.mkdir(os.path.join(workPath, 'FolderA', 'FolderAA')) with open(os.path.join(workPath, 'FolderA', 'FolderAA', 'FileAA'), 'w') as f: f.write(_mul('FileAA')) os.mkdir(os.path.join(workPath, 'FolderB')) with open(os.path.join(workPath, 'FolderB', 'FileB'), 'w') as f: f.write(_mul('FileB')) for fn in ["File1", "File2", "File3"]: with open(os.path.join(workPath, fn), 'w') as f: f.write(_mul(fn)) se = StorageElement(STORAGE_NAME) putDir = { os.path.join(DESTINATION_PATH, 'Workflow/FolderA'): os.path.join(local_path, 'Workflow/FolderA'), os.path.join(DESTINATION_PATH, 'Workflow/FolderB'): os.path.join(local_path, 'Workflow/FolderB') } createDir = [ os.path.join(DESTINATION_PATH, 'Workflow/FolderA/FolderAA'), os.path.join(DESTINATION_PATH, 'Workflow/FolderA/FolderABA'), os.path.join(DESTINATION_PATH, 'Workflow/FolderA/FolderAAB') ] putFile = { os.path.join(DESTINATION_PATH, 'Workflow/FolderA/File1'): os.path.join(local_path, 'Workflow/File1'), os.path.join(DESTINATION_PATH, 'Workflow/FolderAA/File1'): os.path.join(local_path, 'Workflow/File1'), os.path.join(DESTINATION_PATH, 'Workflow/FolderBB/File2'): os.path.join(local_path, 'Workflow/File2'), os.path.join(DESTINATION_PATH, 'Workflow/FolderB/File2'): os.path.join(local_path, 'Workflow/File2'), os.path.join(DESTINATION_PATH, 'Workflow/File3'): os.path.join(local_path, 'Workflow/File3') } isFile = putFile.keys() listDir = [ os.path.join(DESTINATION_PATH, 'Workflow'), os.path.join(DESTINATION_PATH, 'Workflow/FolderA'), os.path.join(DESTINATION_PATH, 'Workflow/FolderB') ] getDir = [ os.path.join(DESTINATION_PATH, 'Workflow/FolderA'), os.path.join(DESTINATION_PATH, 'Workflow/FolderB') ] removeFile = [os.path.join(DESTINATION_PATH, 'Workflow/FolderA/File1')] rmdir = [os.path.join(DESTINATION_PATH, 'Workflow')] # This list is used to check for existance of files # after uploading the directory: they should NOT exist. # Uploading a directory does not work. filesInFolderAandB = [] for dirName in ('Workflow/FolderA', 'Workflow/FolderB'): for root, _dirs, files in os.walk(os.path.join(local_path, dirName)): for fn in files: filesInFolderAandB.append( os.path.join(DESTINATION_PATH, root.replace(local_path, '').strip('/'), fn)) filesInFolderAandB = dict.fromkeys(filesInFolderAandB, False) fileAdlers = {} fileSizes = {} for lfn, localFn in putFile.iteritems(): fileAdlers[lfn] = fileAdler(localFn) fileSizes[lfn] = getSize(localFn) clearDirectory(se, local_path, DESTINATION_PATH) def teardown(): print("Cleaning local test") shutil.rmtree(local_path) clearDirectory(se, local_path, DESTINATION_PATH) request.addfinalizer(teardown) return local_path, random.randint(0, 100) # provide the fixture value
def __resolveReplicas(self, seList, replicas, ignoreTape=False, requestedProtocol=""): diskSEs = set() tapeSEs = set() if not seList: return S_OK({"Successful": {}, "Failed": []}) for localSE in seList: seStatus = StorageElement(localSE).status() if seStatus["Read"] and seStatus["DiskSE"]: diskSEs.add(localSE) elif seStatus["Read"] and seStatus["TapeSE"]: tapeSEs.add(localSE) # For the case that a file is found on two SEs at the same site # disk-based replicas are favoured. # Problematic files will be returned and can be handled by another module failedReplicas = set() newReplicasDict = {} for lfn, reps in replicas.items(): if lfn in self.inputData: # Check that all replicas are on a valid local SE if not [se for se in reps if se in diskSEs.union(tapeSEs)]: failedReplicas.add(lfn) else: sreps = set(reps) for seName in diskSEs & sreps: newReplicasDict.setdefault(lfn, []).append(seName) if not newReplicasDict.get(lfn) and not ignoreTape: for seName in tapeSEs & sreps: newReplicasDict.setdefault(lfn, []).append(seName) # Check that all LFNs have at least one replica and GUID if failedReplicas: # in principle this is not a failure but depends on the policy of the VO # datasets could be downloaded from another site self.log.info( "The following file(s) were found not to have replicas on any of %s:\n%s" % (str(seList), "\n".join(sorted(failedReplicas)))) # Need to group files by SE in order to stage optimally # we know from above that all remaining files have a replica # (preferring disk if >1) in the local storage. # IMPORTANT, only add replicas for input data that is requested # since this module could have been executed after another. seFilesDict = {} for lfn, seList in newReplicasDict.items(): for seName in seList: seFilesDict.setdefault(seName, []).append(lfn) sortedSEs = sorted( ((len(lfns), seName) for seName, lfns in seFilesDict.items()), reverse=True) trackLFNs = {} for _len, seName in sortedSEs: for lfn in seFilesDict[seName]: if "Size" in replicas[lfn] and "GUID" in replicas[lfn]: trackLFNs.setdefault(lfn, []).append({ "pfn": replicas.get(lfn, {}).get(seName, lfn), "se": seName, "size": replicas[lfn]["Size"], "guid": replicas[lfn]["GUID"], }) self.log.debug("Files grouped by SEs are:\n%s" % str(seFilesDict)) for seName, lfns in seFilesDict.items(): self.log.info(" %s LFNs found from catalog at SE %s" % (len(lfns), seName)) self.log.verbose("\n".join(lfns)) # Can now start to obtain TURLs for files grouped by localSE # for requested input data for seName, lfns in seFilesDict.items(): if not lfns: continue failedReps = set() result = StorageElement(seName).getFileMetadata(lfns) if not result["OK"]: self.log.error("Error getting metadata.", result["Message"] + ":\n%s" % "\n".join(lfns)) # If we can not get MetaData, most likely there is a problem with the SE # declare the replicas failed and continue failedReps.update(lfns) continue failed = result["Value"]["Failed"] if failed: # If MetaData can not be retrieved for some PFNs # declared them failed and go on for lfn in failed: lfns.remove(lfn) if isinstance(failed, dict): self.log.error(failed[lfn], lfn) failedReps.add(lfn) for lfn, metadata in result["Value"]["Successful"].items(): if metadata.get("Lost", False): error = "File has been Lost by the StorageElement %s" % seName elif metadata.get("Unavailable", False): error = "File is declared Unavailable by the StorageElement %s" % seName elif seName in tapeSEs and not metadata.get( "Cached", metadata["Accessible"]): error = "File is not online in StorageElement %s Cache" % seName elif not metadata.get("Accessible", True): error = "File is not accessible" else: error = "" if error: lfns.remove(lfn) self.log.error(error, lfn) # If PFN is not available # declared it failed and go on failedReps.add(lfn) if None in failedReps: failedReps.remove(None) if not failedReps: self.log.info( "Preliminary checks OK, getting TURLS at %s for:\n%s" % (seName, "\n".join(lfns))) else: self.log.warn("Errors during preliminary checks for %d files" % len(failedReps)) result = StorageElement(seName).getURL(lfns, protocol=requestedProtocol) if not result["OK"]: self.log.error("Error getting TURLs", result["Message"]) return result badTURLCount = 0 badTURLs = [] seResult = result["Value"] for lfn, cause in seResult["Failed"].items(): badTURLCount += 1 badTURLs.append("Failed to obtain TURL for %s: %s" % (lfn, cause)) failedReps.add(lfn) if badTURLCount: self.log.warn("Found %s problematic TURL(s) for job %s" % (badTURLCount, self.jobID)) param = "\n".join(badTURLs) self.log.info(param) result = self.__setJobParam("ProblematicTURLs", param) if not result["OK"]: self.log.warn("Error setting job param", result["Message"]) failedReplicas.update(failedReps) for lfn, turl in seResult["Successful"].items(): for track in trackLFNs[lfn]: if track["se"] == seName: track["turl"] = turl break self.log.info( "Resolved input data\n>>>> SE: %s\n>>>>LFN: %s\n>>>>TURL: %s" % (seName, lfn, turl)) ##### End of loop on SE ####### # Check if the files were actually resolved (i.e. have a TURL) # If so, remove them from failed list for lfn, mdataList in list(trackLFNs.items( )): # There is a pop below, can't iterate trackLFNs for mdata in list(mdataList): if "turl" not in mdata: mdataList.remove(mdata) self.log.info("No TURL resolved for %s at %s" % (lfn, mdata["se"])) if not mdataList: trackLFNs.pop(lfn, None) failedReplicas.add(lfn) elif lfn in failedReplicas: failedReplicas.remove(lfn) self.log.debug("All resolved data", sorted(trackLFNs)) self.log.debug("All failed data", sorted(failedReplicas)) return S_OK({ "Successful": trackLFNs, "Failed": sorted(failedReplicas) })
def __writeSEInfo(xml_doc, xml_site, site, site_tier, site_subtier): """ Writes SE information in the XML Document """ def __write_SE_XML(site_se_opts): """Sub-function just to populate the XML with the SE values """ site_se_name = site_se_opts.get('Host') site_se_flavour = site_se_opts.get('Protocol') site_se_path = site_se_opts.get('Path', 'UNDEFINED') site_se_endpoint = site_se_opts.get('URLBase') mappingSEFlavour = {'srm': 'SRMv2', 'root': 'XROOTD', 'http': 'HTTPS'} xml_se = xml_append(xml_doc, xml_site, 'service', endpoint=site_se_endpoint, flavour=mappingSEFlavour.get(site_se_flavour, 'UNDEFINED'), hostname=site_se_name, path=site_se_path) # ipv6 status of the SE i6Status = NagiosTopologyAgent.isHostIPV6(site_se_name) i6Comment = "" if i6Status == -1: i6Comment = "Maybe DIRAC Service, not a valid machine" xml_append(xml_doc, xml_se, 'queues', ipv6_status=str(i6Status), ipv6_comment=i6Comment) has_grid_elem = True real_site_name = site.split(".")[1] dmsHelper = DMSHelpers() if int(site_tier) in (0, 1): dst = dmsHelper.getSEInGroupAtSite('Tier1-DST', real_site_name) raw = dmsHelper.getSEInGroupAtSite('Tier1-RAW', real_site_name) if not raw['OK']: gLogger.error(raw['Message']) return False raw = raw['Value'] se_RAW = StorageElement(raw) se_plugins_RAW = se_RAW.getPlugins() if site_subtier == 'T2-D': dst = dmsHelper.getSEInGroupAtSite('Tier2D-DST', real_site_name) if not dst['OK']: gLogger.error(dst['Message']) return False dst = dst['Value'] se_DST = StorageElement(dst) se_plugins_DST = se_DST.getPlugins() if not se_plugins_DST['OK']: gLogger.error(se_plugins_DST['Message']) return False for protocol in se_plugins_DST['Value']: site_se_opts_DST = se_DST.getStorageParameters(protocol) if not site_se_opts_DST['OK']: gLogger.error(site_se_opts_DST['Message']) return False site_se_opts_DST = site_se_opts_DST['Value'] __write_SE_XML(site_se_opts_DST) if int(site_tier) in (0, 1): if protocol in se_plugins_RAW['Value']: site_se_opts_RAW = se_RAW.getStorageParameters(protocol) if not site_se_opts_RAW['OK']: gLogger.error(site_se_opts_RAW['Message']) return has_grid_elem site_se_opts_RAW = site_se_opts_RAW['Value'] # This tests if the DST and RAW StorageElements have the same endpoint. # If so it only uses the one already added. if site_se_opts_RAW['Host'] != site_se_opts_DST['Host']: __write_SE_XML(site_se_opts_RAW) return has_grid_elem
def _checkFilesToStage(seToLFNs, onlineLFNs, offlineLFNs, absentLFNs, checkOnlyTapeSEs=None, jobLog=None, proxyUserName=None, proxyUserGroup=None, executionLock=None): """ Checks on SEs whether the file is NEARLINE or ONLINE onlineLFNs, offlineLFNs and absentLFNs are modified to contain the files found online If checkOnlyTapeSEs is True, disk replicas are not checked As soon as a replica is found Online for a file, no further check is made """ # Only check on storage if it is a tape SE if jobLog is None: logger = gLogger else: logger = jobLog if checkOnlyTapeSEs is None: # Default value is True checkOnlyTapeSEs = True failed = {} for se, lfnsInSEList in seToLFNs.items(): # If we have found already all files online at another SE, no need to check the others # but still we want to set the SE as Online if not a TapeSE vo = getVOForGroup(proxyUserGroup) seObj = StorageElement(se, vo=vo) status = seObj.getStatus() if not status['OK']: return status tapeSE = status['Value']['TapeSE'] diskSE = status['Value']['DiskSE'] # If requested to check only Tape SEs and the file is at a diskSE, we guess it is Online... filesToCheck = [] for lfn in lfnsInSEList: # If the file had already been found accessible at an SE, only check that this one is on disk diskIsOK = checkOnlyTapeSEs or (lfn in onlineLFNs) if diskIsOK and diskSE: onlineLFNs.setdefault(lfn, []).append(se) elif not diskIsOK or (tapeSE and (lfn not in onlineLFNs)): filesToCheck.append(lfn) if not filesToCheck: continue # We have to use a new SE object because it caches the proxy! with UserProxy(proxyUserName=proxyUserName, proxyUserGroup=proxyUserGroup, executionLock=executionLock) as proxyResult: if proxyResult['OK']: fileMetadata = StorageElement( se, vo=vo).getFileMetadata(filesToCheck) else: fileMetadata = proxyResult if not fileMetadata['OK']: failed[se] = dict.fromkeys(filesToCheck, fileMetadata['Message']) else: if fileMetadata['Value']['Failed']: failed[se] = fileMetadata['Value']['Failed'] # is there at least one replica online? for lfn, mDict in fileMetadata['Value']['Successful'].items(): # SRM returns Cached, but others may only return Accessible if mDict.get('Cached', mDict['Accessible']): onlineLFNs.setdefault(lfn, []).append(se) elif tapeSE: # A file can be staged only at Tape SE offlineLFNs.setdefault(lfn, []).append(se) else: # File not available at a diskSE... we shall retry later pass # Doesn't matter if some files are Offline if they are also online for lfn in set(offlineLFNs) & set(onlineLFNs): offlineLFNs.pop(lfn) # If the file was found staged, ignore possible errors, but print out errors for se, failedLfns in list(failed.items()): logger.error("Errors when getting files metadata", 'at %s' % se) for lfn, reason in list(failedLfns.items()): if lfn in onlineLFNs: logger.warn(reason, 'for %s, but there is an online replica' % lfn) failed[se].pop(lfn) else: logger.error(reason, 'for %s, no online replicas' % lfn) if cmpError(reason, errno.ENOENT): absentLFNs.setdefault(lfn, []).append(se) failed[se].pop(lfn) if not failed[se]: failed.pop(se) # Find the files that do not exist at SE if failed: logger.error( "Error getting metadata", "for %d files" % len(set(lfn for lfnList in failed.values() for lfn in lfnList))) for lfn in absentLFNs: seList = absentLFNs[lfn] # FIXME: it is not possible to return here an S_ERROR(), return the message only absentLFNs[lfn] = S_ERROR(errno.ENOENT, "File not at %s" % ','.join(sorted(seList)))['Message'] # Format the error for absent files return S_OK()
def _callback(self): """" After a Transfer operation, we have to update the matching Request in the RMS, and add the registration operation just before the ReplicateAndRegister one NOTE: we don't use ReqProxy when putting the request back to avoid operational hell """ log = self._log.getSubLogger("callback", child=True) # In case there is no Request associated to the Transfer # we do not do the callback. Not really advised, but there is a feature # request to use the FTS3 system without RMS if self.rmsReqID == -1: return S_OK() # Now we check the status of the Request. # in principle, it should be scheduled res = self.reqClient.getRequestStatus(self.rmsReqID) if not res['OK']: log.error("Could not get request status", res) return res status = res['Value'] # If it is not scheduled, something went wrong # and we will not modify it if status != 'Scheduled': # If the Request is in a final state, just leave it, # and we consider our job done. # (typically happens when the callback had already been done but not persisted to the FTS3DB) if status in rmsRequest.FINAL_STATES: log.warn( "Request with id %s is not Scheduled (%s), but okay it is in a Final State" % (self.rmsReqID, status)) return S_OK() # If the Request is not in a final state, then something really wrong is going on, # and we do not do anything, keep ourselves pending else: return S_ERROR("Request with id %s is not Scheduled:%s" % (self.rmsReqID, status)) res = self._updateRmsOperationStatus() if not res['OK']: return res ftsFilesByTarget = res['Value']['ftsFilesByTarget'] request = res['Value']['request'] operation = res['Value']['operation'] registrationProtocols = DMSHelpers( vo=self._vo).getRegistrationProtocols() log.info("will create %s 'RegisterReplica' operations" % len(ftsFilesByTarget)) for target, ftsFileList in ftsFilesByTarget.iteritems(): log.info( "creating 'RegisterReplica' operation for targetSE %s with %s files..." % (target, len(ftsFileList))) registerOperation = rmsOperation() registerOperation.Type = "RegisterReplica" registerOperation.Status = "Waiting" registerOperation.TargetSE = target if operation.Catalog: registerOperation.Catalog = operation.Catalog targetSE = StorageElement(target, vo=self.vo) for ftsFile in ftsFileList: opFile = rmsFile() opFile.LFN = ftsFile.lfn opFile.Checksum = ftsFile.checksum # TODO: are we really ever going to change type... ? opFile.ChecksumType = 'ADLER32' opFile.Size = ftsFile.size res = returnSingleResult( targetSE.getURL(ftsFile.lfn, protocol=registrationProtocols)) # This should never happen ! if not res["OK"]: log.error("Could not get url", res['Message']) continue opFile.PFN = res["Value"] registerOperation.addFile(opFile) request.insertBefore(registerOperation, operation) return self.reqClient.putRequest(request, useFailoverProxy=False, retryMainService=3)
def __monitorStorageElementStageRequests(self, storageElement, seReplicaIDs, replicaIDs): terminalReplicaIDs = {} oldRequests = [] stagedReplicas = [] # Since we are in a given SE, the LFN is a unique key lfnRepIDs = {} lfnReqIDs = {} for replicaID in seReplicaIDs: lfn = replicaIDs[replicaID]['LFN'] lfnRepIDs[lfn] = replicaID requestID = replicaIDs[replicaID].get('RequestID', None) if requestID: lfnReqIDs[lfn] = replicaIDs[replicaID]['RequestID'] gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: Monitoring %s stage requests for %s." % (len(lfnRepIDs), storageElement)) oAccounting = DataOperation() oAccounting.setStartTime() res = StorageElement(storageElement).getFileMetadata(lfnReqIDs) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Completely failed to monitor stage requests for replicas.", res['Message']) return prestageStatus = res['Value'] accountingDict = self.__newAccountingDict(storageElement) for lfn, reason in prestageStatus['Failed'].items(): accountingDict['TransferTotal'] += 1 if re.search('File does not exist', reason): gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: LFN did not exist in the StorageElement", lfn) terminalReplicaIDs[ lfnRepIDs[lfn]] = 'LFN did not exist in the StorageElement' for lfn, staged in prestageStatus['Successful'].items(): if staged and 'Cached' in staged and staged['Cached']: accountingDict['TransferTotal'] += 1 accountingDict['TransferOK'] += 1 accountingDict['TransferSize'] += staged['Size'] stagedReplicas.append(lfnRepIDs[lfn]) if staged and 'Cached' in staged and not staged['Cached']: oldRequests.append(lfnRepIDs[lfn]) # only ReplicaIDs oAccounting.setValuesFromDict(accountingDict) oAccounting.setEndTime() gDataStoreClient.addRegister(oAccounting) # Update the states of the replicas in the database if terminalReplicaIDs: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s replicas are terminally failed." % len(terminalReplicaIDs)) res = self.stagerClient.updateReplicaFailure(terminalReplicaIDs) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to update replica failures.", res['Message']) if stagedReplicas: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s staged replicas to be updated." % len(stagedReplicas)) res = self.stagerClient.setStageComplete(stagedReplicas) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to updated staged replicas.", res['Message']) res = self.stagerClient.updateReplicaStatus( stagedReplicas, 'Staged') if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to insert replica status.", res['Message']) if oldRequests: gLogger.info( "StageMonitor.__monitorStorageElementStageRequests: %s old requests will be retried." % len(oldRequests)) res = self.__wakeupOldRequests(oldRequests) if not res['OK']: gLogger.error( "StageMonitor.__monitorStorageElementStageRequests: Failed to wakeup old requests.", res['Message']) return
class FTSRequest( object ): """ .. class:: FTSRequest Helper class for FTS job submission and monitoring. """ # # default checksum type __defaultCksmType = "ADLER32" # # flag to disablr/enable checksum test, default: disabled __cksmTest = False def __init__( self ): """c'tor :param self: self reference """ self.log = gLogger.getSubLogger( self.__class__.__name__, True ) # # final states tuple self.finalStates = ( 'Canceled', 'Failed', 'Hold', 'Finished', 'FinishedDirty' ) # # failed states tuple self.failedStates = ( 'Canceled', 'Failed', 'Hold', 'FinishedDirty' ) # # successful states tuple self.successfulStates = ( 'Finished', 'Done' ) # # all file states tuple self.fileStates = ( 'Done', 'Active', 'Pending', 'Ready', 'Canceled', 'Failed', 'Finishing', 'Finished', 'Submitted', 'Hold', 'Waiting' ) self.statusSummary = {} # # request status self.requestStatus = 'Unknown' # # dict for FTS job files self.fileDict = {} # # dict for replicas information self.catalogReplicas = {} # # dict for metadata information self.catalogMetadata = {} # # dict for files that failed to register self.failedRegistrations = {} # # placehoder for FileCatalog reference self.oCatalog = None # # submit timestamp self.submitTime = '' # # placeholder FTS job GUID self.ftsGUID = '' # # placeholder for FTS server URL self.ftsServer = '' # # flag marking FTS job completness self.isTerminal = False # # completness percentage self.percentageComplete = 0.0 # # source SE name self.sourceSE = '' # # flag marking source SE validity self.sourceValid = False # # source space token self.sourceToken = '' # # target SE name self.targetSE = '' # # flag marking target SE validity self.targetValid = False # # target space token self.targetToken = '' # # placeholder for target StorageElement self.oTargetSE = None # # placeholder for source StorageElement self.oSourceSE = None # # checksum type, set it to default self.__cksmType = self.__defaultCksmType # # disable checksum test by default self.__cksmTest = False # # statuses that prevent submitting to FTS self.noSubmitStatus = ( 'Failed', 'Done', 'Staging' ) # # were sources resolved? self.sourceResolved = False # # Number of file transfers actually submitted self.submittedFiles = 0 self.transferTime = 0 self.submitCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/SubmitCommand', 'glite-transfer-submit' ) self.monitorCommand = Operations().getValue( 'DataManagement/FTSPlacement/FTS2/MonitorCommand', 'glite-transfer-status' ) self.ftsVersion = Operations().getValue( 'DataManagement/FTSVersion', 'FTS2' ) self.ftsJob = None self.ftsFiles = [] #################################################################### # # Methods for setting/getting/checking the SEs # def setSourceSE( self, se ): """ set SE for source :param self: self reference :param str se: source SE name """ if se == self.targetSE: return S_ERROR( "SourceSE is TargetSE" ) self.sourceSE = se self.oSourceSE = StorageElement( self.sourceSE ) return self.__checkSourceSE() def __checkSourceSE( self ): """ check source SE availability :param self: self reference """ if not self.sourceSE: return S_ERROR( "SourceSE not set" ) res = self.oSourceSE.isValid( 'Read' ) if not res['OK']: return S_ERROR( "SourceSE not available for reading" ) res = self.__getSESpaceToken( self.oSourceSE ) if not res['OK']: self.log.error( "FTSRequest failed to get SRM Space Token for SourceSE", res['Message'] ) return S_ERROR( "SourceSE does not support FTS transfers" ) if self.__cksmTest: cksmType = self.oSourceSE.checksumType() if cksmType in ( "NONE", "NULL" ): self.log.warn( "Checksum type set to %s at SourceSE %s, disabling checksum test" % ( cksmType, self.sourceSE ) ) self.__cksmTest = False elif cksmType != self.__cksmType: self.log.warn( "Checksum type mismatch, disabling checksum test" ) self.__cksmTest = False self.sourceToken = res['Value'] self.sourceValid = True return S_OK() def setTargetSE( self, se ): """ set target SE :param self: self reference :param str se: target SE name """ if se == self.sourceSE: return S_ERROR( "TargetSE is SourceSE" ) self.targetSE = se self.oTargetSE = StorageElement( self.targetSE ) return self.__checkTargetSE() def setTargetToken( self, token ): """ target space token setter :param self: self reference :param str token: target space token """ self.targetToken = token return S_OK() def __checkTargetSE( self ): """ check target SE availability :param self: self reference """ if not self.targetSE: return S_ERROR( "TargetSE not set" ) res = self.oTargetSE.isValid( 'Write' ) if not res['OK']: return S_ERROR( "TargetSE not available for writing" ) res = self.__getSESpaceToken( self.oTargetSE ) if not res['OK']: self.log.error( "FTSRequest failed to get SRM Space Token for TargetSE", res['Message'] ) return S_ERROR( "TargetSE does not support FTS transfers" ) # # check checksum types if self.__cksmTest: cksmType = self.oTargetSE.checksumType() if cksmType in ( "NONE", "NULL" ): self.log.warn( "Checksum type set to %s at TargetSE %s, disabling checksum test" % ( cksmType, self.targetSE ) ) self.__cksmTest = False elif cksmType != self.__cksmType: self.log.warn( "Checksum type mismatch, disabling checksum test" ) self.__cksmTest = False self.targetToken = res['Value'] self.targetValid = True return S_OK() @staticmethod def __getSESpaceToken( oSE ): """ get space token from StorageElement instance :param self: self reference :param StorageElement oSE: StorageElement instance """ res = oSE.getStorageParameters( protocol = 'srm' ) if not res['OK']: return res return S_OK( res['Value'].get( 'SpaceToken' ) ) #################################################################### # # Methods for setting/getting FTS request parameters # def setFTSGUID( self, guid ): """ FTS job GUID setter :param self: self reference :param str guid: string containg GUID """ if not checkGuid( guid ): return S_ERROR( "Incorrect GUID format" ) self.ftsGUID = guid return S_OK() def setFTSServer( self, server ): """ FTS server setter :param self: self reference :param str server: FTS server URL """ self.ftsServer = server return S_OK() def isRequestTerminal( self ): """ check if FTS job has terminated :param self: self reference """ if self.requestStatus in self.finalStates: self.isTerminal = True return S_OK( self.isTerminal ) def setCksmTest( self, cksmTest = False ): """ set cksm test :param self: self reference :param bool cksmTest: flag to enable/disable checksum test """ self.__cksmTest = bool( cksmTest ) return S_OK( self.__cksmTest ) #################################################################### # # Methods for setting/getting/checking files and their metadata # def setLFN( self, lfn ): """ add LFN :lfn: to :fileDict: :param self: self reference :param str lfn: LFN to add to """ self.fileDict.setdefault( lfn, {'Status':'Waiting'} ) return S_OK() def setSourceSURL( self, lfn, surl ): """ source SURL setter :param self: self reference :param str lfn: LFN :param str surl: source SURL """ target = self.fileDict[lfn].get( 'Target' ) if target == surl: return S_ERROR( "Source and target the same" ) return self.__setFileParameter( lfn, 'Source', surl ) def getSourceSURL( self, lfn ): """ get source SURL for LFN :lfn: :param self: self reference :param str lfn: LFN """ return self.__getFileParameter( lfn, 'Source' ) def setTargetSURL( self, lfn, surl ): """ set target SURL for LFN :lfn: :param self: self reference :param str lfn: LFN :param str surl: target SURL """ source = self.fileDict[lfn].get( 'Source' ) if source == surl: return S_ERROR( "Source and target the same" ) return self.__setFileParameter( lfn, 'Target', surl ) def getFailReason( self, lfn ): """ get fail reason for file :lfn: :param self: self reference :param str lfn: LFN """ return self.__getFileParameter( lfn, 'Reason' ) def getRetries( self, lfn ): """ get number of attepmts made to transfer file :lfn: :param self: self reference :param str lfn: LFN """ return self.__getFileParameter( lfn, 'Retries' ) def getTransferTime( self, lfn ): """ get duration of transfer for file :lfn: :param self: self reference :param str lfn: LFN """ return self.__getFileParameter( lfn, 'Duration' ) def getFailed( self ): """ get list of wrongly transferred LFNs :param self: self reference """ return S_OK( [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( 'Status', '' ) in self.failedStates ] ) def getStaging( self ): """ get files set for prestaging """ return S_OK( [lfn for lfn in self.fileDict if self.fileDict[lfn].get( 'Status', '' ) == 'Staging'] ) def getDone( self ): """ get list of succesfully transferred LFNs :param self: self reference """ return S_OK( [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( 'Status', '' ) in self.successfulStates ] ) def __setFileParameter( self, lfn, paramName, paramValue ): """ set :paramName: to :paramValue: for :lfn: file :param self: self reference :param str lfn: LFN :param str paramName: parameter name :param mixed paramValue: a new parameter value """ self.setLFN( lfn ) self.fileDict[lfn][paramName] = paramValue return S_OK() def __getFileParameter( self, lfn, paramName ): """ get value of :paramName: for file :lfn: :param self: self reference :param str lfn: LFN :param str paramName: parameter name """ if lfn not in self.fileDict: return S_ERROR( "Supplied file not set" ) if paramName not in self.fileDict[lfn]: return S_ERROR( "%s not set for file" % paramName ) return S_OK( self.fileDict[lfn][paramName] ) #################################################################### # # Methods for submission # def submit( self, monitor = False, printOutput = True ): """ submit FTS job :param self: self reference :param bool monitor: flag to monitor progress of FTS job :param bool printOutput: flag to print output of execution to stdout """ res = self.__prepareForSubmission() if not res['OK']: return res res = self.__submitFTSTransfer() if not res['OK']: return res resDict = { 'ftsGUID' : self.ftsGUID, 'ftsServer' : self.ftsServer, 'submittedFiles' : self.submittedFiles } if monitor or printOutput: gLogger.always( "Submitted %s@%s" % ( self.ftsGUID, self.ftsServer ) ) if monitor: self.monitor( untilTerminal = True, printOutput = printOutput, full = False ) return S_OK( resDict ) def __prepareForSubmission( self ): """ check validity of job before submission :param self: self reference """ if not self.fileDict: return S_ERROR( "No files set" ) if not self.sourceValid: return S_ERROR( "SourceSE not valid" ) if not self.targetValid: return S_ERROR( "TargetSE not valid" ) if not self.ftsServer: res = self.__resolveFTSServer() if not res['OK']: return S_ERROR( "FTSServer not valid" ) self.resolveSource() self.resolveTarget() res = self.__filesToSubmit() if not res['OK']: return S_ERROR( "No files to submit" ) return S_OK() def __getCatalogObject( self ): """ CatalogInterface instance facade :param self: self reference """ try: if not self.oCatalog: self.oCatalog = FileCatalog() return S_OK() except: return S_ERROR() def __updateReplicaCache( self, lfns = None, overwrite = False ): """ update replica cache for list of :lfns: :param self: self reference :param mixed lfns: list of LFNs :param bool overwrite: flag to trigger cache clearing and updating """ if not lfns: lfns = self.fileDict.keys() toUpdate = [ lfn for lfn in lfns if ( lfn not in self.catalogReplicas ) or overwrite ] if not toUpdate: return S_OK() res = self.__getCatalogObject() if not res['OK']: return res res = self.oCatalog.getReplicas( toUpdate ) if not res['OK']: return S_ERROR( "Failed to update replica cache: %s" % res['Message'] ) for lfn, error in res['Value']['Failed'].items(): self.__setFileParameter( lfn, 'Reason', error ) self.__setFileParameter( lfn, 'Status', 'Failed' ) for lfn, replicas in res['Value']['Successful'].items(): self.catalogReplicas[lfn] = replicas return S_OK() def __updateMetadataCache( self, lfns = None ): """ update metadata cache for list of LFNs :param self: self reference :param list lnfs: list of LFNs """ if not lfns: lfns = self.fileDict.keys() toUpdate = [ lfn for lfn in lfns if lfn not in self.catalogMetadata ] if not toUpdate: return S_OK() res = self.__getCatalogObject() if not res['OK']: return res res = self.oCatalog.getFileMetadata( toUpdate ) if not res['OK']: return S_ERROR( "Failed to get source catalog metadata: %s" % res['Message'] ) for lfn, error in res['Value']['Failed'].items(): self.__setFileParameter( lfn, 'Reason', error ) self.__setFileParameter( lfn, 'Status', 'Failed' ) for lfn, metadata in res['Value']['Successful'].items(): self.catalogMetadata[lfn] = metadata return S_OK() def resolveSource( self ): """ resolve source SE eligible for submission :param self: self reference """ # Avoid resolving sources twice if self.sourceResolved: return S_OK() # Only resolve files that need a transfer toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( "Status", "" ) != "Failed" ] if not toResolve: return S_OK() res = self.__updateMetadataCache( toResolve ) if not res['OK']: return res res = self.__updateReplicaCache( toResolve ) if not res['OK']: return res # Define the source URLs for lfn in toResolve: replicas = self.catalogReplicas.get( lfn, {} ) if self.sourceSE not in replicas: gLogger.warn( "resolveSource: skipping %s - not replicas at SourceSE %s" % ( lfn, self.sourceSE ) ) self.__setFileParameter( lfn, 'Reason', "No replica at SourceSE" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = returnSingleResult( self.oSourceSE.getURL( lfn, protocol = 'srm' ) ) if not res['OK']: gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.setSourceSURL( lfn, res['Value'] ) if not res['OK']: gLogger.warn( "resolveSource: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue toResolve = [] for lfn in self.fileDict: if "Source" in self.fileDict[lfn]: toResolve.append( lfn ) if not toResolve: return S_ERROR( "No eligible Source files" ) # Get metadata of the sources, to check for existance, availability and caching res = self.oSourceSE.getFileMetadata( toResolve ) if not res['OK']: return S_ERROR( "Failed to check source file metadata" ) for lfn, error in res['Value']['Failed'].items(): if re.search( 'File does not exist', error ): gLogger.warn( "resolveSource: skipping %s - source file does not exists" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file does not exist" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: gLogger.warn( "resolveSource: skipping %s - failed to get source metadata" % lfn ) self.__setFileParameter( lfn, 'Reason', "Failed to get Source metadata" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) toStage = [] nbStagedFiles = 0 for lfn, metadata in res['Value']['Successful'].items(): lfnStatus = self.fileDict.get( lfn, {} ).get( 'Status' ) if metadata.get( 'Unavailable', False ): gLogger.warn( "resolveSource: skipping %s - source file unavailable" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Unavailable" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif metadata.get( 'Lost', False ): gLogger.warn( "resolveSource: skipping %s - source file lost" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source file Lost" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif not metadata.get( 'Cached', metadata['Accessible'] ): if lfnStatus != 'Staging': toStage.append( lfn ) elif metadata['Size'] != self.catalogMetadata[lfn]['Size']: gLogger.warn( "resolveSource: skipping %s - source file size mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source size mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif self.catalogMetadata[lfn]['Checksum'] and metadata['Checksum'] and \ not compareAdler( metadata['Checksum'], self.catalogMetadata[lfn]['Checksum'] ): gLogger.warn( "resolveSource: skipping %s - source file checksum mismatch" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source checksum mismatch" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif lfnStatus == 'Staging': # file that was staging is now cached self.__setFileParameter( lfn, 'Status', 'Waiting' ) nbStagedFiles += 1 # Some files were being staged if nbStagedFiles: self.log.info( 'resolveSource: %d files have been staged' % nbStagedFiles ) # Launching staging of files not in cache if toStage: gLogger.warn( "resolveSource: %s source files not cached, prestaging..." % len( toStage ) ) stage = self.oSourceSE.prestageFile( toStage ) if not stage["OK"]: gLogger.error( "resolveSource: error is prestaging", stage["Message"] ) for lfn in toStage: self.__setFileParameter( lfn, 'Reason', stage["Message"] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: for lfn in toStage: if lfn in stage['Value']['Successful']: self.__setFileParameter( lfn, 'Status', 'Staging' ) elif lfn in stage['Value']['Failed']: self.__setFileParameter( lfn, 'Reason', stage['Value']['Failed'][lfn] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) self.sourceResolved = True return S_OK() def resolveTarget( self ): """ find target SE eligible for submission :param self: self reference """ toResolve = [ lfn for lfn in self.fileDict if self.fileDict[lfn].get( 'Status' ) not in self.noSubmitStatus ] if not toResolve: return S_OK() res = self.__updateReplicaCache( toResolve ) if not res['OK']: return res for lfn in toResolve: res = returnSingleResult( self.oTargetSE.getURL( lfn, protocol = 'srm' ) ) if not res['OK']: reason = res.get( 'Message', res['Message'] ) gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, reason ) ) self.__setFileParameter( lfn, 'Reason', reason ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue res = self.setTargetSURL( lfn, res['Value'] ) if not res['OK']: gLogger.warn( "resolveTarget: skipping %s - %s" % ( lfn, res["Message"] ) ) self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) continue toResolve = [] for lfn in self.fileDict: if "Target" in self.fileDict[lfn]: toResolve.append( lfn ) if not toResolve: return S_ERROR( "No eligible Target files" ) res = self.oTargetSE.exists( toResolve ) if not res['OK']: return S_ERROR( "Failed to check target existence" ) for lfn, error in res['Value']['Failed'].items(): self.__setFileParameter( lfn, 'Reason', error ) self.__setFileParameter( lfn, 'Status', 'Failed' ) toRemove = [] for lfn, exists in res['Value']['Successful'].items(): if exists: res = self.getSourceSURL( lfn ) if not res['OK']: gLogger.warn( "resolveTarget: skipping %s - target exists" % lfn ) self.__setFileParameter( lfn, 'Reason', "Target exists" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) elif res['Value'] == self.fileDict[lfn]['Target']: gLogger.warn( "resolveTarget: skipping %s - source and target pfns are the same" % lfn ) self.__setFileParameter( lfn, 'Reason', "Source and Target the same" ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: toRemove.append( lfn ) if toRemove: self.oTargetSE.removeFile( toRemove ) return S_OK() def __filesToSubmit( self ): """ check if there is at least one file to submit :return: S_OK if at least one file is present, S_ERROR otherwise """ for lfn in self.fileDict: lfnStatus = self.fileDict[lfn].get( 'Status' ) source = self.fileDict[lfn].get( 'Source' ) target = self.fileDict[lfn].get( 'Target' ) if lfnStatus not in self.noSubmitStatus and source and target: return S_OK() return S_ERROR() def __createFTSFiles( self ): """ create LFNs file for glite-transfer-submit command This file consists one line for each fiel to be transferred: sourceSURL targetSURL [CHECKSUMTYPE:CHECKSUM] :param self: self reference """ self.__updateMetadataCache() for lfn in self.fileDict: lfnStatus = self.fileDict[lfn].get( 'Status' ) if lfnStatus not in self.noSubmitStatus: cksmStr = "" # # add chsmType:cksm only if cksmType is specified, else let FTS decide by itself if self.__cksmTest and self.__cksmType: checkSum = self.catalogMetadata.get( lfn, {} ).get( 'Checksum' ) if checkSum: cksmStr = " %s:%s" % ( self.__cksmType, intAdlerToHex( hexAdlerToInt( checkSum ) ) ) ftsFile = FTSFile() ftsFile.LFN = lfn ftsFile.SourceSURL = self.fileDict[lfn].get( 'Source' ) ftsFile.TargetSURL = self.fileDict[lfn].get( 'Target' ) ftsFile.SourceSE = self.sourceSE ftsFile.TargetSE = self.targetSE ftsFile.Status = self.fileDict[lfn].get( 'Status' ) ftsFile.Checksum = cksmStr ftsFile.Size = self.catalogMetadata.get( lfn, {} ).get( 'Size' ) self.ftsFiles.append( ftsFile ) self.submittedFiles += 1 return S_OK() def __createFTSJob( self, guid = None ): self.__createFTSFiles() ftsJob = FTSJob() ftsJob.RequestID = 0 ftsJob.OperationID = 0 ftsJob.SourceSE = self.sourceSE ftsJob.TargetSE = self.targetSE ftsJob.SourceToken = self.sourceToken ftsJob.TargetToken = self.targetToken ftsJob.FTSServer = self.ftsServer if guid: ftsJob.FTSGUID = guid for ftsFile in self.ftsFiles: ftsFile.Attempt += 1 ftsFile.Error = "" ftsJob.addFile( ftsFile ) self.ftsJob = ftsJob def __submitFTSTransfer( self ): """ create and execute glite-transfer-submit CLI command :param self: self reference """ log = gLogger.getSubLogger( 'Submit' ) self.__createFTSJob() submit = self.ftsJob.submitFTS( self.ftsVersion, command = self.submitCommand ) if not submit["OK"]: log.error( "unable to submit FTSJob: %s" % submit["Message"] ) return submit log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) ) # # update statuses for job files for ftsFile in self.ftsJob: ftsFile.FTSGUID = self.ftsJob.FTSGUID ftsFile.Status = "Submitted" ftsFile.Attempt += 1 log.info( "FTSJob '%s'@'%s' has been submitted" % ( self.ftsJob.FTSGUID, self.ftsJob.FTSServer ) ) self.ftsGUID = self.ftsJob.FTSGUID return S_OK() def __resolveFTSServer( self ): """ resolve FTS server to use, it should be the closest one from target SE :param self: self reference """ if self.ftsVersion.upper() == 'FTS2': from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS2ServersForSites if not self.targetSE: return S_ERROR( "Target SE not set" ) res = getSitesForSE( self.targetSE ) if not res['OK'] or not res['Value']: return S_ERROR( "Could not determine target site" ) targetSites = res['Value'] targetSite = '' for targetSite in targetSites: targetFTS = getFTS2ServersForSites( [targetSite] ) if targetFTS['OK']: ftsTarget = targetFTS['Value'][targetSite] if ftsTarget: self.ftsServer = ftsTarget return S_OK( self.ftsServer ) else: return targetFTS elif self.ftsVersion.upper() == 'FTS3': from DIRAC.ConfigurationSystem.Client.Helpers.Resources import getFTS3Servers res = getFTS3Servers() if not res['OK']: return res ftsServerList = res['Value'] if ftsServerList: # Here we take the first one, regardless of the policy... # Unclean but all this will disapear after refactoring the fts code self.ftsServer = ftsServerList[0] return S_OK( self.ftsServer ) else: return S_ERROR( 'Unknown FTS version %s' % self.ftsVersion ) return S_ERROR( 'No FTS server found for %s' % targetSite ) #################################################################### # # Methods for monitoring # def summary( self, untilTerminal = False, printOutput = False ): """ summary of FTS job :param self: self reference :param bool untilTerminal: flag to monitor FTS job to its final state :param bool printOutput: flag to print out monitoring information to the stdout """ res = self.__isSummaryValid() if not res['OK']: return res while not self.isTerminal: res = self.__parseOutput( full = True ) if not res['OK']: return res if untilTerminal: self.__print() self.isRequestTerminal() if res['Value'] or ( not untilTerminal ): break time.sleep( 1 ) if untilTerminal: print "" if printOutput and ( not untilTerminal ): return self.dumpSummary( printOutput = printOutput ) return S_OK() def monitor( self, untilTerminal = False, printOutput = False, full = True ): """ monitor FTS job :param self: self reference :param bool untilTerminal: flag to monitor FTS job to its final state :param bool printOutput: flag to print out monitoring information to the stdout """ if not self.ftsJob: self.resolveSource() self.__createFTSJob( self.ftsGUID ) res = self.__isSummaryValid() if not res['OK']: return res if untilTerminal: res = self.summary( untilTerminal = untilTerminal, printOutput = printOutput ) if not res['OK']: return res res = self.__parseOutput( full = full ) if not res['OK']: return res if untilTerminal: self.finalize() if printOutput: self.dump() return res def dumpSummary( self, printOutput = False ): """ get FTS job summary as str :param self: self reference :param bool printOutput: print summary to stdout """ outStr = '' for status in sorted( self.statusSummary ): if self.statusSummary[status]: outStr = '%s\t%-10s : %-10s\n' % ( outStr, status, str( self.statusSummary[status] ) ) outStr = outStr.rstrip( '\n' ) if printOutput: print outStr return S_OK( outStr ) def __print( self ): """ print progress bar of FTS job completeness to stdout :param self: self reference """ width = 100 bits = int( ( width * self.percentageComplete ) / 100 ) outStr = "|%s>%s| %.1f%s %s %s" % ( "="*bits, " "*( width - bits ), self.percentageComplete, "%", self.requestStatus, " "*10 ) sys.stdout.write( "%s\r" % ( outStr ) ) sys.stdout.flush() def dump( self ): """ print FTS job parameters and files to stdout :param self: self reference """ print "%-10s : %-10s" % ( "Status", self.requestStatus ) print "%-10s : %-10s" % ( "Source", self.sourceSE ) print "%-10s : %-10s" % ( "Target", self.targetSE ) print "%-10s : %-128s" % ( "Server", self.ftsServer ) print "%-10s : %-128s" % ( "GUID", self.ftsGUID ) for lfn in sorted( self.fileDict ): print "\n %-15s : %-128s" % ( 'LFN', lfn ) for key in ['Source', 'Target', 'Status', 'Reason', 'Duration']: print " %-15s : %-128s" % ( key, str( self.fileDict[lfn].get( key ) ) ) return S_OK() def __isSummaryValid( self ): """ check validity of FTS job summary report :param self: self reference """ if not self.ftsServer: return S_ERROR( "FTSServer not set" ) if not self.ftsGUID: return S_ERROR( "FTSGUID not set" ) return S_OK() def __parseOutput( self, full = False ): """ execute glite-transfer-status command and parse its output :param self: self reference :param bool full: glite-transfer-status verbosity level, when set, collect information of files as well """ monitor = self.ftsJob.monitorFTS( self.ftsVersion, command = self.monitorCommand, full = full ) if not monitor['OK']: return monitor self.percentageComplete = self.ftsJob.Completeness self.requestStatus = self.ftsJob.Status self.submitTime = self.ftsJob.SubmitTime statusSummary = monitor['Value'] if statusSummary: for state in statusSummary: self.statusSummary[state] = statusSummary[state] self.transferTime = 0 for ftsFile in self.ftsJob: lfn = ftsFile.LFN self.__setFileParameter( lfn, 'Status', ftsFile.Status ) self.__setFileParameter( lfn, 'Reason', ftsFile.Error ) self.__setFileParameter( lfn, 'Duration', ftsFile._duration ) targetURL = self.__getFileParameter( lfn, 'Target' ) if not targetURL['OK']: self.__setFileParameter( lfn, 'Target', ftsFile.TargetSURL ) sourceURL = self.__getFileParameter( lfn, 'Source' ) if not sourceURL['OK']: self.__setFileParameter( lfn, 'Source', ftsFile.SourceSURL ) self.transferTime += int( ftsFile._duration ) return S_OK() #################################################################### # # Methods for finalization # def finalize( self ): """ finalize FTS job :param self: self reference """ self.__updateMetadataCache() transEndTime = dateTime() regStartTime = time.time() res = self.getTransferStatistics() transDict = res['Value'] res = self.__registerSuccessful( transDict['transLFNs'] ) regSuc, regTotal = res['Value'] regTime = time.time() - regStartTime if self.sourceSE and self.targetSE: self.__sendAccounting( regSuc, regTotal, regTime, transEndTime, transDict ) return S_OK() def getTransferStatistics( self ): """ collect information of Transfers that can be used by Accounting :param self: self reference """ transDict = { 'transTotal': len( self.fileDict ), 'transLFNs': [], 'transOK': 0, 'transSize': 0 } for lfn in self.fileDict: if self.fileDict[lfn].get( 'Status' ) in self.successfulStates: if self.fileDict[lfn].get( 'Duration', 0 ): transDict['transLFNs'].append( lfn ) transDict['transOK'] += 1 if lfn in self.catalogMetadata: transDict['transSize'] += self.catalogMetadata[lfn].get( 'Size', 0 ) return S_OK( transDict ) def getFailedRegistrations( self ): """ get failed registrations dict :param self: self reference """ return S_OK( self.failedRegistrations ) def __registerSuccessful( self, transLFNs ): """ register successfully transferred files to the catalogs, fill failedRegistrations dict for files that failed to register :param self: self reference :param list transLFNs: LFNs in FTS job """ self.failedRegistrations = {} toRegister = {} for lfn in transLFNs: res = returnSingleResult( self.oTargetSE.getURL( self.fileDict[lfn].get( 'Target' ), protocol = 'srm' ) ) if not res['OK']: self.__setFileParameter( lfn, 'Reason', res['Message'] ) self.__setFileParameter( lfn, 'Status', 'Failed' ) else: toRegister[lfn] = { 'PFN' : res['Value'], 'SE' : self.targetSE } if not toRegister: return S_OK( ( 0, 0 ) ) res = self.__getCatalogObject() if not res['OK']: for lfn in toRegister: self.failedRegistrations = toRegister self.log.error( 'Failed to get Catalog Object', res['Message'] ) return S_OK( ( 0, len( toRegister ) ) ) res = self.oCatalog.addReplica( toRegister ) if not res['OK']: self.failedRegistrations = toRegister self.log.error( 'Failed to get Catalog Object', res['Message'] ) return S_OK( ( 0, len( toRegister ) ) ) for lfn, error in res['Value']['Failed'].items(): self.failedRegistrations[lfn] = toRegister[lfn] self.log.error( 'Registration of Replica failed', '%s : %s' % ( lfn, str( error ) ) ) return S_OK( ( len( res['Value']['Successful'] ), len( toRegister ) ) ) def __sendAccounting( self, regSuc, regTotal, regTime, transEndTime, transDict ): """ send accounting record :param self: self reference :param regSuc: number of files successfully registered :param regTotal: number of files attepted to register :param regTime: time stamp at the end of registration :param transEndTime: time stamp at the end of FTS job :param dict transDict: dict holding couters for files being transerred, their sizes and successfull transfers """ oAccounting = DataOperation() oAccounting.setEndTime( transEndTime ) oAccounting.setStartTime( self.submitTime ) accountingDict = {} accountingDict['OperationType'] = 'replicateAndRegister' result = getProxyInfo() if not result['OK']: userName = '******' else: userName = result['Value'].get( 'username', 'unknown' ) accountingDict['User'] = userName accountingDict['Protocol'] = 'FTS' if 'fts3' not in self.ftsServer else 'FTS3' accountingDict['RegistrationTime'] = regTime accountingDict['RegistrationOK'] = regSuc accountingDict['RegistrationTotal'] = regTotal accountingDict['TransferOK'] = transDict['transOK'] accountingDict['TransferTotal'] = transDict['transTotal'] accountingDict['TransferSize'] = transDict['transSize'] accountingDict['FinalStatus'] = self.requestStatus accountingDict['Source'] = self.sourceSE accountingDict['Destination'] = self.targetSE accountingDict['TransferTime'] = self.transferTime oAccounting.setValuesFromDict( accountingDict ) self.log.verbose( "Attempting to commit accounting message..." ) oAccounting.commit() self.log.verbose( "...committed." ) return S_OK()
def __getSiteCandidates(self, okReplicas, vo): """This method returns a list of possible site candidates based on the job input data requirement. For each site candidate, the number of files on disk and tape is resolved. """ lfnSEs = {} for lfn in okReplicas: replicas = okReplicas[lfn] siteSet = set() for seName in replicas: result = self.__getSitesForSE(seName) if not result["OK"]: self.jobLog.warn("Could not get sites for SE", "%s: %s" % (seName, result["Message"])) return result siteSet.update(result["Value"]) lfnSEs[lfn] = siteSet if not lfnSEs: return S_ERROR(JobMinorStatus.NO_CANDIDATE_SITE_FOUND) # This makes an intersection of all sets in the dictionary and returns a set with it siteCandidates = set.intersection(*[lfnSEs[lfn] for lfn in lfnSEs]) if not siteCandidates: return S_ERROR(JobMinorStatus.NO_CANDIDATE_SITE_FOUND) # In addition, check number of files on tape and disk for each site # for optimizations during scheduling sitesData = {} for siteName in siteCandidates: sitesData[siteName] = {"disk": set(), "tape": set()} # Loop time! seDict = {} for lfn in okReplicas: replicas = okReplicas[lfn] # Check each SE in the replicas for seName in replicas: # If not already "loaded" the add it to the dict if seName not in seDict: result = self.__getSitesForSE(seName) if not result["OK"]: self.jobLog.warn( "Could not get sites for SE", "%s: %s" % (seName, result["Message"])) continue siteList = result["Value"] seObj = StorageElement(seName, vo=vo) result = seObj.getStatus() if not result["OK"]: self.jobLog.error("Failed to get SE status", result["Message"]) return result seDict[seName] = { "Sites": siteList, "Status": result["Value"] } # Get SE info from the dict seData = seDict[seName] siteList = seData["Sites"] seStatus = seData["Status"] for siteName in siteList: # If not a candidate site then skip it if siteName not in siteCandidates: continue # Add the LFNs to the disk/tape lists diskLFNs = sitesData[siteName]["disk"] tapeLFNs = sitesData[siteName]["tape"] if seStatus["DiskSE"]: # Sets contain only unique elements, no need to check if it's there diskLFNs.add(lfn) if lfn in tapeLFNs: tapeLFNs.remove(lfn) if seStatus["TapeSE"]: if lfn not in diskLFNs: tapeLFNs.add(lfn) for siteName in sitesData: sitesData[siteName]["disk"] = len(sitesData[siteName]["disk"]) sitesData[siteName]["tape"] = len(sitesData[siteName]["tape"]) return S_OK(sitesData)
if not res['OK']: return res # Clear the local cache getFileDir = "%s/getFile" % base_path if os.path.exists(getFileDir): try: shutil.rmtree(getFileDir) gLogger.debug("Cleared existing getFile cache") except Exception, x: gLogger.exception("Failed to remove destination directory.", getFileDir, x) # Get the file to the cache try: storageElement = StorageElement(se) except AttributeError, x: errStr = "prepareFile: Exception while instantiating the Storage Element." gLogger.exception(errStr, se, str(x)) return S_ERROR(errStr) res = storageElement.getFile(pfn, "%s/getFile" % base_path, True) if not res['OK']: gLogger.error("prepareFile: Failed to get local copy of file.", res['Message']) return res return S_OK() types_prepareFileForHTTP = [list(StringTypes) + [ListType]] def export_prepareFileForHTTP(self, lfn): """ This method simply gets the file to the local storage area using LFN
def _prepareCommand(self): ''' DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. ''' if 'name' not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args['name'] if 'element' not in self.args: return S_ERROR('"element" not found in self.args') element = self.args['element'] if 'elementType' not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args['elementType'] if element not in ['Site', 'Resource']: return S_ERROR('element is neither Site nor Resource') hours = None if 'hours' in self.args: hours = self.args['hours'] gOCDBServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName(elementName) if not gocSite[ 'OK']: # The site is most probably is not a grid site - not an issue, of course pass # so, elementName remains unchanged else: elementName = gocSite['Value'] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # We need to distinguish if it's tape or disk try: seOptions = StorageElement(elementName).options except AttributeError: # Sometimes the SE can't be instantiated properly self.log.error( "Failure instantiating StorageElement object for %s" % elementName) return S_ERROR("Failure instantiating StorageElement") if 'SEType' in seOptions: # Type should follow the convention TXDY seType = seOptions['SEType'] diskSE = re.search('D[1-9]', seType) != None tapeSE = re.search('T[1-9]', seType) != None if tapeSE: gOCDBServiceType = "srm.nearline" elif diskSE: gOCDBServiceType = "srm" seHost = CSHelpers.getSEHost(elementName) if not seHost['OK']: return seHost seHost = seHost['Value'] if not seHost: return S_ERROR('No seHost for %s' % elementName) elementName = seHost elif elementType in ['FTS', 'FTS3']: gOCDBServiceType = 'FTS' # WARNING: this method presupposes that the server is an FTS3 type gocSite = getGOCFTSName(elementName) if not gocSite['OK']: self.log.warn("%s not in Resources/FTSEndpoints/FTS3 ?" % elementName) else: elementName = gocSite['Value'] return S_OK((element, elementName, hours, gOCDBServiceType))
def export_getParameters(self, se): """ Get the storage element parameters """ se = StorageElement(se) return se.getParameters()
# Name of the storage element that has to be tested gLogger.setLevel('DEBUG') STORAGE_NAME = posArgs[0] # Size in bytes of the file we want to produce FILE_SIZE = 5 * 1024 # 5kB # base path on the storage where the test files/folders will be created DESTINATION_PATH = '' # plugins that will be used AVAILABLE_PLUGINS = [] if len(posArgs) > 1: AVAILABLE_PLUGINS = posArgs[1].split(',') else: res = StorageElement(STORAGE_NAME).getPlugins() if not res['OK']: gLogger.error("Failed fetching available plugins", res['Message']) sys.exit(2) AVAILABLE_PLUGINS = res['Value'] try: res = getProxyInfo() if not res['OK']: gLogger.error("Failed to get client proxy information.", res['Message']) sys.exit(2) proxyInfo = res['Value'] username = proxyInfo['username'] vo = '' if 'group' in proxyInfo:
def _prepareCommand(self): """ DowntimeCommand requires four arguments: - name : <str> - element : Site / Resource - elementType: <str> If the elements are Site(s), we need to get their GOCDB names. They may not have, so we ignore them if they do not have. """ if 'name' not in self.args: return S_ERROR('"name" not found in self.args') elementName = self.args['name'] if 'element' not in self.args: return S_ERROR('"element" not found in self.args') element = self.args['element'] if 'elementType' not in self.args: return S_ERROR('"elementType" not found in self.args') elementType = self.args['elementType'] if element not in ['Site', 'Resource']: return S_ERROR('element is neither Site nor Resource') hours = None if 'hours' in self.args: hours = self.args['hours'] gOCDBServiceType = None # Transform DIRAC site names into GOCDB topics if element == 'Site': gocSite = getGOCSiteName(elementName) if not gocSite[ 'OK']: # The site is most probably is not a grid site - not an issue, of course pass # so, elementName remains unchanged else: elementName = gocSite['Value'] # The DIRAC se names mean nothing on the grid, but their hosts do mean. elif elementType == 'StorageElement': # for SRM and SRM only, we need to distinguish if it's tape or disk # if it's not SRM, then gOCDBServiceType will be None (and we'll use them all) try: se = StorageElement(elementName) seOptions = se.options seProtocols = set(se.localAccessProtocolList) | set( se.localWriteProtocolList) except AttributeError: # Sometimes the SE can't be instantiated properly self.log.error("Failure instantiating StorageElement object", elementName) return S_ERROR("Failure instantiating StorageElement") if 'SEType' in seOptions and 'srm' in seProtocols: # Type should follow the convention TXDY seType = seOptions['SEType'] diskSE = re.search('D[1-9]', seType) is not None tapeSE = re.search('T[1-9]', seType) is not None if tapeSE: gOCDBServiceType = "srm.nearline" elif diskSE: gOCDBServiceType = "srm" res = getSEHosts(elementName) if not res['OK']: return res seHosts = res['Value'] if not seHosts: return S_ERROR('No seHost(s) for %s' % elementName) elementName = seHosts # in this case it will return a list, because there might be more than one host only elif elementType in ['FTS', 'FTS3']: gOCDBServiceType = 'FTS' # WARNING: this method presupposes that the server is an FTS3 type gocSite = getGOCFTSName(elementName) if not gocSite['OK']: self.log.warn("FTS not in Resources/FTSEndpoints/FTS3 ?", elementName) else: elementName = gocSite['Value'] return S_OK((element, elementName, hours, gOCDBServiceType))
def _getStorageElement( self, seName ): from DIRAC.Resources.Storage.StorageElement import StorageElement storageElement = StorageElement( seName ) if not storageElement.valid: return S_ERROR( storageElement.errorReason ) return S_OK( storageElement )
def filterReplicas(opFile, logger=None, dataManager=None): """ filter out banned/invalid source SEs """ if logger is None: logger = gLogger if dataManager is None: dataManager = DataManager() log = logger.getSubLogger("filterReplicas") result = defaultdict(list) replicas = dataManager.getActiveReplicas(opFile.LFN, getUrl=False) if not replicas["OK"]: log.error('Failed to get active replicas', replicas["Message"]) return replicas reNotExists = re.compile(r".*such file.*") replicas = replicas["Value"] failed = replicas["Failed"].get(opFile.LFN, "") if reNotExists.match(failed.lower()): opFile.Status = "Failed" opFile.Error = failed return S_ERROR(failed) replicas = replicas["Successful"].get(opFile.LFN, {}) noReplicas = False if not replicas: allReplicas = dataManager.getReplicas(opFile.LFN, getUrl=False) if allReplicas['OK']: allReplicas = allReplicas['Value']['Successful'].get( opFile.LFN, {}) if not allReplicas: result['NoReplicas'].append(None) noReplicas = True else: # There are replicas but we cannot get metadata because the replica is not active result['NoActiveReplicas'] += list(allReplicas) log.verbose( "File has no%s replica in File Catalog" % ('' if noReplicas else ' active'), opFile.LFN) else: return allReplicas if not opFile.Checksum or hexAdlerToInt(opFile.Checksum) is False: # Set Checksum to FC checksum if not set in the request fcMetadata = FileCatalog().getFileMetadata(opFile.LFN) fcChecksum = fcMetadata.get('Value', {}).get('Successful', {}).get(opFile.LFN, {}).get('Checksum') # Replace opFile.Checksum if it doesn't match a valid FC checksum if fcChecksum: if hexAdlerToInt(fcChecksum) is not False: opFile.Checksum = fcChecksum opFile.ChecksumType = fcMetadata['Value']['Successful'][ opFile.LFN].get('ChecksumType', 'Adler32') else: opFile.Checksum = None # If no replica was found, return what we collected as information if not replicas: return S_OK(result) for repSEName in replicas: repSEMetadata = StorageElement(repSEName).getFileMetadata(opFile.LFN) error = repSEMetadata.get( 'Message', repSEMetadata.get('Value', {}).get('Failed', {}).get(opFile.LFN)) if error: log.warn( 'unable to get metadata at %s for %s' % (repSEName, opFile.LFN), error.replace('\n', '')) if 'File does not exist' in error or 'No such file' in error: result['NoReplicas'].append(repSEName) else: result["NoMetadata"].append(repSEName) elif not noReplicas: repSEMetadata = repSEMetadata['Value']['Successful'][opFile.LFN] seChecksum = hexAdlerToInt(repSEMetadata.get("Checksum")) # As from here seChecksum is an integer or False, not a hex string! if seChecksum is False and opFile.Checksum: result['NoMetadata'].append(repSEName) elif not seChecksum and opFile.Checksum: opFile.Checksum = None opFile.ChecksumType = None elif seChecksum and (not opFile.Checksum or opFile.Checksum == 'False'): # Use the SE checksum (convert to hex) and force type to be Adler32 opFile.Checksum = intAdlerToHex(seChecksum) opFile.ChecksumType = 'Adler32' if not opFile.Checksum or not seChecksum or compareAdler( intAdlerToHex(seChecksum), opFile.Checksum): # # All checksums are OK result["Valid"].append(repSEName) else: log.warn(" %s checksum mismatch, FC: '%s' @%s: '%s'" % (opFile.LFN, opFile.Checksum, repSEName, intAdlerToHex(seChecksum))) result["Bad"].append(repSEName) else: # If a replica was found somewhere, don't set the file as no replicas result['NoReplicas'] = [] return S_OK(result)
# he specified a targetSE if not fromSE: fromSE = list(seForSeBases) elif not targetSE: targetSE = list(seForSeBases) fromSE = sorted(fromSE) targetSE = sorted(targetSE) gLogger.notice("Using sources: %s" % ','.join(fromSE)) gLogger.notice("Using target: %s" % ','.join(targetSE)) # Now we construct the SE object for each SE that we want to appear ses = {} for se in set(fromSE + targetSE): ses[se] = StorageElement(seForSeBases[se]) lfn = '/lhcb/toto.xml' # Create a matrix of protocol src/dest tpMatrix = defaultdict(dict) # For each source and destination, generate the url pair, and the compatible third party protocols for src, dst in ((x, y) for x in fromSE for y in targetSE): res = ses[dst].generateTransferURLsBetweenSEs(lfn, ses[src], thirdPartyProtocols) if not res['OK']: surls = 'Error' gLogger.notice( "Could not generate transfer URLS", "src:%s, dst:%s, error:%s" % (src, dst, res['Message']))
def getActiveSEs(seList, access='Write'): """ Utility function - uses the StorageElement cached status """ return [se for se in seList if StorageElement(se).status().get(access, False)]
return res # Clear the local cache getFileDir = "%s/getFile" % BASE_PATH if os.path.exists(getFileDir): try: shutil.rmtree(getFileDir) gLogger.debug("Cleared existing getFile cache") except Exception, x: gLogger.exception("Failed to remove destination directory.", getFileDir, x) os.mkdir(getFileDir) # Get the file to the cache try: storageElement = StorageElement(se) except AttributeError, x: errStr = "prepareFile: Exception while instantiating the Storage Element." gLogger.exception(errStr, se, str(x)) return S_ERROR(errStr) res = storageElement.getFile(pfn, "%s/getFile" % BASE_PATH, True) if not res['OK']: gLogger.error("prepareFile: Failed to get local copy of file.", res['Message']) return res return S_OK() types_prepareFileForHTTP = [list(StringTypes) + [ListType]] def export_prepareFileForHTTP(self, lfn): """ This method simply gets the file to the local storage area using LFN
def _constructStagingJob(self, pinTime, allLFNs, target_spacetoken): """Build a job for staging Some attributes of the job are expected to be set * targetSE * activity (optional) * priority (optional) * filesToSubmit * operationID (optional, used as metadata for the job) :param pinTime: pining time in case staging is needed :param allLFNs: List of LFNs to stage :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem :param target_spacetoken: the space token of the target :return: S_OK( (job object, list of ftsFileIDs in the job)) """ log = gLogger.getSubLogger(f"constructStagingJob/{self.operationID}/{self.targetSE}") transfers = [] fileIDsInTheJob = set() # Set of LFNs for which we did not get an SRM URL failedLFNs = set() # getting all the target surls res = StorageElement(self.targetSE, vo=self.vo).getURL(allLFNs, protocol="srm") if not res["OK"]: return res for lfn, reason in res["Value"]["Failed"].items(): failedLFNs.add(lfn) log.error("Could not get target SURL", "%s %s" % (lfn, reason)) allTargetSURLs = res["Value"]["Successful"] for ftsFile in self.filesToSubmit: if ftsFile.lfn in failedLFNs: log.debug("Not preparing transfer for file %s" % ftsFile.lfn) continue sourceSURL = targetSURL = allTargetSURLs[ftsFile.lfn] ftsFileID = getattr(ftsFile, "fileID") trans_metadata = {"desc": "Stage %s" % ftsFileID, "fileID": ftsFileID} trans = fts3.new_transfer( sourceSURL, targetSURL, checksum="ADLER32:%s" % ftsFile.checksum, filesize=ftsFile.size, metadata=trans_metadata, activity=self.activity, ) transfers.append(trans) fileIDsInTheJob.add(ftsFileID) # If the source is not an tape SE, we should set the # copy_pin_lifetime and bring_online params to None, # otherwise they will do an extra useless queue in FTS sourceIsTape = self.__isTapeSE(self.sourceSE, self.vo) copy_pin_lifetime = pinTime if sourceIsTape else None bring_online = 86400 if sourceIsTape else None # We add a few metadata to the fts job so that we can reuse them later on without # querying our DB. # source and target SE are just used for accounting purpose job_metadata = {"operationID": self.operationID, "sourceSE": self.sourceSE, "targetSE": self.targetSE} if self.activity: job_metadata["activity"] = self.activity job = fts3.new_job( transfers=transfers, overwrite=True, source_spacetoken=target_spacetoken, spacetoken=target_spacetoken, bring_online=bring_online, copy_pin_lifetime=copy_pin_lifetime, retry=3, metadata=job_metadata, priority=self.priority, ) return S_OK((job, fileIDsInTheJob))