def _executeAction( self, trid, proposalTuple, clientInitArgs ): clientTransport = self._transportPool.get( trid ) credDict = clientTransport.getConnectingCredentials() targetService = proposalTuple[0][0] actionType = proposalTuple[1][0] actionMethod = proposalTuple[1][1] idString = self._createIdentityString( credDict, clientTransport ) #OOkay! Lets do the magic! retVal = clientTransport.receiveData() if not retVal[ 'OK' ]: gLogger.error( "Error while receiving file description", retVal[ 'Message' ] ) clientTransport.sendData( S_ERROR( "Error while receiving file description: %s" % retVal[ 'Message' ] ) ) return if actionType == "FileTransfer": gLogger.warn( "Received a file transfer action from %s" % idString ) clientTransport.sendData( S_OK( "Accepted" ) ) retVal = self.__forwardFileTransferCall( targetService, clientInitArgs, actionMethod, retVal[ 'Value' ], clientTransport ) elif actionType == "RPC": gLogger.info( "Forwarding %s/%s action to %s for %s" % ( actionType, actionMethod, targetService, idString ) ) retVal = self.__forwardRPCCall( targetService, clientInitArgs, actionMethod, retVal[ 'Value' ] ) elif actionType == "Connection" and actionMethod == "new": gLogger.info( "Initiating a messaging connection to %s for %s" % ( targetService, idString ) ) retVal = self._msgForwarder.addClient( trid, targetService, clientInitArgs, retVal[ 'Value' ] ) else: gLogger.warn( "Received an invalid %s/%s action from %s" % ( actionType, actionMethod, idString ) ) retVal = S_ERROR( "Unknown type of action (%s)" % actionType ) #TODO: Send back the data? if 'rpcStub' in retVal: retVal.pop( 'rpcStub' ) clientTransport.sendData( retVal ) return retVal
def loadJDLAsCFG(jdl): """ Load a JDL as CFG """ def cleanValue(value): value = value.strip() if value[0] == '"': entries = [] iPos = 1 current = "" state = "in" while iPos < len(value): if value[iPos] == '"': if state == "in": entries.append(current) current = "" state = "out" elif state == "out": current = current.strip() if current not in (",", ): return S_ERROR( "value seems a list but is not separated in commas" ) current = "" state = "in" else: current += value[iPos] iPos += 1 if state == "in": return S_ERROR('value is opened with " but is not closed') return S_OK(", ".join(entries)) else: return S_OK(value.replace('"', '')) def assignValue(key, value, cfg): key = key.strip() if len(key) == 0: return S_ERROR("Invalid key name") value = value.strip() if not value: return S_ERROR("No value for key %s" % key) if value[0] == "{": if value[-1] != "}": return S_ERROR( "Value '%s' seems a list but does not end in '}'" % (value)) valList = List.fromChar(value[1:-1]) for i in range(len(valList)): result = cleanValue(valList[i]) if not result['OK']: return S_ERROR("Var %s : %s" % (key, result['Message'])) valList[i] = result['Value'] if valList[i] == None: return S_ERROR( "List value '%s' seems invalid for item %s" % (value, i)) value = ", ".join(valList) else: result = cleanValue(value) if not result['OK']: return S_ERROR("Var %s : %s" % (key, result['Message'])) nV = result['Value'] if nV == None: return S_ERROR("Value '%s seems invalid" % (value)) value = nV cfg.setOption(key, value) return S_OK() if jdl[0] == "[": iPos = 1 else: iPos = 0 key = "" value = "" action = "key" insideLiteral = False cfg = CFG() while iPos < len(jdl): char = jdl[iPos] if char == ";" and not insideLiteral: if key.strip(): result = assignValue(key, value, cfg) if not result['OK']: return result key = "" value = "" action = "key" elif char == "[" and not insideLiteral: key = key.strip() if not key: return S_ERROR("Invalid key in JDL") if value.strip(): return S_ERROR( "Key %s seems to have a value and open a sub JDL at the same time" % key) result = loadJDLAsCFG(jdl[iPos:]) if not result['OK']: return result subCfg, subPos = result['Value'] cfg.createNewSection(key, contents=subCfg) key = "" value = "" action = "key" insideLiteral = False iPos += subPos elif char == "=" and not insideLiteral: if action == "key": action = "value" insideLiteral = False else: value += char elif char == "]" and not insideLiteral: key = key.strip() if len(key) > 0: result = assignValue(key, value, cfg) if not result['OK']: return result return S_OK((cfg, iPos)) else: if action == "key": key += char else: value += char if char == '"': insideLiteral = not insideLiteral iPos += 1 return S_OK((cfg, iPos))
class MySQL: """ Basic multithreaded DIRAC MySQL Client Class """ __initialized = False def __init__(self, hostName, userName, passwd, dbName, port=3306, maxQueueSize=3): """ set MySQL connection parameters and try to connect """ global gInstancesCount gInstancesCount += 1 self._connected = False if 'logger' not in dir(self): self.logger = gLogger.getSubLogger('MySQL') # let the derived class decide what to do with if is not 1 self._threadsafe = MySQLdb.thread_safe() self.logger.debug('thread_safe = %s' % self._threadsafe) _checkQueueSize(maxQueueSize) self.__hostName = str(hostName) self.__userName = str(userName) self.__passwd = str(passwd) self.__dbName = str(dbName) self.__port = port # Create the connection Queue to reuse connections self.__connectionQueue = Queue.Queue(maxQueueSize) # Create the connection Semaphore to limit total number of open connection self.__connectionSemaphore = threading.Semaphore(maxQueueSize) self.__initialized = True self._connect() def __del__(self): global gInstancesCount try: while 1 and self.__initialized: self.__connectionSemaphore.release() try: connection = self.__connectionQueue.get_nowait() connection.close() except Queue.Empty: self.logger.debug('No more connection in Queue') break if gInstancesCount == 1: # only when the last instance of a MySQL object is deleted, the server # can be ended MySQLdb.server_end() gInstancesCount -= 1 except Exception: pass def _except(self, methodName, x, err): """ print MySQL error or exeption return S_ERROR with Exception """ try: raise x except MySQLdb.Error, e: self.logger.debug('%s: %s' % (methodName, err), '%d: %s' % (e.args[0], e.args[1])) return S_ERROR('%s: ( %d: %s )' % (err, e.args[0], e.args[1])) except Exception, e: self.logger.debug('%s: %s' % (methodName, err), str(e)) return S_ERROR('%s: (%s)' % (err, str(e)))
def putAndRegister(self, index, requestObj, subAttrs, subFiles): """ putAndRegister operation processing :param self: self reference :param int index: execution order :param RequestContainer requestObj: request object :param dict subAttrs: sub-request attributes :param dict subFiles: sub-request files :return: S_OK( requestObj ) or S_ERROR """ self.info("putAndRegister: processing subrequest %s" % index) if requestObj.isSubRequestEmpty(index, "transfer")["Value"]: self.info( "putAndRegister: subrequest %s is empty, setting its status to 'Done'" % index) requestObj.setSubRequestStatus(index, "transfer", "Done") return S_OK(requestObj) ## list of targetSEs targetSEs = list( set([ targetSE.strip() for targetSE in subAttrs["TargetSE"].split(",") if targetSE.strip() ])) if len(targetSEs) != 1: self.error( "putAndRegister: wrong value for TargetSE list = %s, should contain one target!" % targetSEs) return S_ERROR( "putAndRegister: TargetSE should contain one target, got %s" % targetSEs) targetSE = targetSEs[0] ## dict for failed LFNs failed = {} catalog = "" if "Catalogue" in subAttrs and subAttrs["Catalogue"]: catalog = subAttrs["Catalogue"] for subRequestFile in subFiles: lfn = subRequestFile["LFN"] self.info("putAndRegister: processing file %s" % lfn) if subRequestFile["Status"] != "Waiting": self.info("putAndRegister: skipping file %s, status is %s" % (lfn, subRequestFile["Status"])) continue self.addMark("Put and register", 1) pfn = subRequestFile["PFN"] if subRequestFile["PFN"] else "" guid = subRequestFile["GUID"] if subRequestFile["GUID"] else "" addler = subRequestFile["Addler"] if subRequestFile[ "Addler"] else "" ## missing parameters if "" in [lfn, pfn, guid, addler]: self.error("putAndRegister: missing parameters %s" % (", ".join([ k for k, v in { "PFN": pfn, "GUID": guid, "Addler": addler, "LFN": lfn }.items() if v in ("", None) ]))) self.error( "putAndRegister: setting file status to 'Failed' and Error to 'WrongParams'" ) requestObj.setSubRequestFileAttributeValue( index, "transfer", lfn, "Error", "WrongParams") requestObj.setSubRequestFileAttributeValue( index, "transfer", lfn, "Status", "Failed") continue ## call RM at least putAndRegister = DataManager(catalogs=catalog).putAndRegister( lfn, pfn, targetSE, guid=guid, checksum=addler) if putAndRegister["OK"]: if lfn in putAndRegister["Value"]["Successful"]: if "put" not in putAndRegister["Value"]["Successful"][lfn]: self.addMark("Put failed", 1) self.dataLoggingClient().addFileRecord( lfn, "PutFail", targetSE, "", "TransferAgent") self.info("putAndRegister: failed to put %s to %s." % (lfn, targetSE)) failed[lfn] = "put failed at %s" % targetSE self.info( "putAndRegister: setting file Error to 'FailedToPut'" ) requestObj.setSubRequestFileAttributeValue( index, "transfer", lfn, "Error", "FailedToPut") elif "register" not in putAndRegister["Value"][ "Successful"][lfn]: self.addMark("Put successful", 1) self.addMark("File registration failed", 1) self.dataLoggingClient().addFileRecord( lfn, "Put", targetSE, "", "TransferAgent") self.dataLoggingClient().addFileRecord( lfn, "RegisterFail", targetSE, "", "TransferAgent") putTime = putAndRegister["Value"]["Successful"][lfn][ "put"] self.info( "putAndRegister: successfully put %s to %s in %s seconds" % (lfn, targetSE, putTime)) self.info( "putAndRegister: failed to register %s at %s" % (lfn, targetSE)) requestObj.setSubRequestFileAttributeValue( index, "transfer", lfn, "Error", "FailedToRegister") fileDict = putAndRegister["Value"]["Failed"][lfn][ "register"] registerRequestDict = { "Attributes": { "TargetSE": fileDict["TargetSE"], "Operation": "registerFile" }, "Files": [{ "LFN": fileDict["LFN"], "PFN": fileDict["PFN"], "Size": fileDict["Size"], "Addler": fileDict["Addler"], "GUID": fileDict["GUID"] }] } self.info( "putAndRegister: setting registration request for failed file" ) requestObj.addSubRequest(registerRequestDict, "register") else: self.addMark("Put successful", 1) self.addMark("File registration successful", 1) self.dataLoggingClient().addFileRecord( lfn, "Put", targetSE, "", "TransferAgent") self.dataLoggingClient().addFileRecord( lfn, "Register", targetSE, "", "TransferAgent") putTime = putAndRegister["Value"]["Successful"][lfn][ "put"] self.info( "putAndRegister: successfully put %s to %s in %s seconds" % (lfn, targetSE, putTime)) registerTime = putAndRegister["Value"]["Successful"][ lfn]["register"] self.info( "putAndRegister: successfully registered %s to %s in %s seconds" % (lfn, targetSE, registerTime)) else: self.addMark("Put failed", 1) self.dataLoggingClient().addFileRecord( lfn, "PutFail", targetSE, "", "TransferAgent") reason = putAndRegister["Value"]["Failed"][lfn] self.error( "putAndRegister: failed to put and register file %s at %s: %s" % (lfn, targetSE, reason)) requestObj.setSubRequestFileAttributeValue( index, "transfer", lfn, "Error", str(reason)[255:]) failed[lfn] = reason else: self.addMark("Put failed", 1) self.dataLoggingClient().addFileRecord(lfn, "PutFail", targetSE, "", "TransferAgent") self.error( "putAndRegister: completely failed to put and register file: %s" % putAndRegister["Message"]) reason = putAndRegister["Message"] requestObj.setSubRequestFileAttributeValue( index, "transfer", lfn, "Error", str(reason)[255:]) failed[lfn] = reason if lfn not in failed: self.info( "putAndRegister: file %s processed successfully, setting its startus do 'Done'" % lfn) requestObj.setSubRequestFileAttributeValue( index, "transfer", lfn, "Status", "Done") else: self.error("putAndRegister: processing of file %s failed" % lfn) self.error("putAndRegister: reason: %s" % failed[lfn]) if requestObj.isSubRequestDone(index, "transfer")["Value"]: self.info( "putAndRegister: all files processed, will set subrequest status to 'Done'" ) requestObj.setSubRequestStatus(index, "transfer", "Done") return S_OK(requestObj)
def optimize(self): """Merges together the operations that can be merged. They need to have the following arguments equal: * Type * Arguments * SourceSE * TargetSE * Catalog It also makes sure that the maximum number of Files in an Operation is never overcome. CAUTION: this method is meant to be called before inserting into the DB. So if the RequestID is not 0, we don't touch :return: S_ERROR if the Request should not be optimized (because already in the DB) S_OK(True) if a optimization was carried out S_OK(False) if no optimization were carried out """ # If the RequestID is not the default one (0), it probably means # the Request is already in the DB, so we don't touch anything if hasattr(self, "RequestID") and getattr(self, "RequestID"): return S_ERROR( "Cannot optimize because Request seems to be already in the DB (RequestID %s)" % getattr(self, "RequestID") ) # Set to True if the request could be optimized optimized = False # Recognise Failover request series repAndRegList = [] removeRepList = [] i = 0 while i < len(self.__operations__): insertNow = True if i < len(self.__operations__) - 1: op1 = self.__operations__[i] op2 = self.__operations__[i + 1] if getattr(op1, "Type") == "ReplicateAndRegister" and getattr(op2, "Type") == "RemoveReplica": fileSetA = set(list(f.LFN for f in op1)) fileSetB = set(list(f.LFN for f in op2)) if fileSetA == fileSetB: # Source is useless if failover if self.dmsHelper.isSEFailover(op1.SourceSE): op1.SourceSE = "" repAndRegList.append((op1.TargetSE, op1)) removeRepList.append((op2.TargetSE, op2)) del self.__operations__[i] del self.__operations__[i] # If we are at the end of the request, we must insert the new operations insertNow = i == len(self.__operations__) # print i, self.__operations__[i].Type if i < len( self.__operations__ ) # else None, len( repAndRegList ), insertNow if insertNow: if repAndRegList: # We must insert the new operations there # i.e. insert before operation i (if it exists) # Replication first, removeReplica next optimized = True insertBefore = self.__operations__[i] if i < len(self.__operations__) else None # print 'Insert new operations before', insertBefore ops = [op for _, op in sorted(repAndRegList, key=lambda x: x[0])] ops += [op for _, op in sorted(removeRepList, key=lambda x: x[0])] for op in ops: _res = self.insertBefore(op, insertBefore) if insertBefore else self.addOperation(op) # Skip the newly inserted operation i += 1 repAndRegList = [] removeRepList = [] else: # Skip current operation i += 1 else: # Just to show that in that case we don't increment i pass # List of attributes that must be equal for operations to be merged attrList = ["Type", "Arguments", "SourceSE", "TargetSE", "Catalog"] i = 0 while i < len(self.__operations__): while i < len(self.__operations__) - 1: # Some attributes need to be the same attrMismatch = False for attr in attrList: if getattr(self.__operations__[i], attr) != getattr(self.__operations__[i + 1], attr): attrMismatch = True break if attrMismatch: break # We do not do the merge if there are common files in the operations fileSetA = set(list(f.LFN for f in self.__operations__[i])) fileSetB = set(list(f.LFN for f in self.__operations__[i + 1])) if fileSetA & fileSetB: break # There is a maximum number of files one can add into an operation try: while len(self.__operations__[i + 1]): fileToMove = self.__operations__[i + 1][0] self.__operations__[i] += fileToMove # If the object is mapped to SQLAlchemy object with a relationship # having the delete-orphan option, the fileToMove will have # already disappeared from the original operation. Silly... # If not, we have to remove it manually if len(self.__operations__[i + 1]) and (self.__operations__[i + 1][0] == fileToMove): del self.__operations__[i + 1][0] optimized = True del self.__operations__[i + 1] except RuntimeError: i += 1 i += 1 return S_OK(optimized)
def getRequestSummaryWeb(self, selectDict, sortList, startItem, maxItems): """ Returns a list of Request for the web portal :param dict selectDict: parameter on which to restrain the query {key : Value} key can be any of the Request columns, 'Type' (interpreted as Operation.Type) and 'FromData' and 'ToData' are matched against the LastUpdate field :param sortList: [sorting column, ASC/DESC] :type sortList: python:list :param int startItem: start item (for pagination) :param int maxItems: max items (for pagination) """ parameterList = [ 'RequestID', 'RequestName', 'JobID', 'OwnerDN', 'OwnerGroup', 'Status', "Error", "CreationTime", "LastUpdate" ] resultDict = {} session = self.DBSession() try: summaryQuery = session.query(Request.RequestID, Request.RequestName, Request.JobID, Request.OwnerDN, Request.OwnerGroup, Request._Status, Request.Error, Request._CreationTime, Request._LastUpdate) for key, value in selectDict.items(): if key == 'ToDate': summaryQuery = summaryQuery.filter( Request._LastUpdate < value) elif key == 'FromDate': summaryQuery = summaryQuery.filter( Request._LastUpdate > value) else: tableName = 'Request' if key == 'Type': summaryQuery = summaryQuery.join(Request.__operations__)\ .group_by(Request.RequestID, Operation.Type) tableName = 'Operation' elif key == 'Status': key = '_Status' if isinstance(value, list): summaryQuery = summaryQuery.filter( eval('%s.%s.in_(%s)' % (tableName, key, value))) else: summaryQuery = summaryQuery.filter( eval('%s.%s' % (tableName, key)) == value) if sortList: summaryQuery = summaryQuery.order_by( eval('Request.%s.%s()' % (sortList[0][0], sortList[0][1].lower()))) try: requestLists = summaryQuery.all() except NoResultFound, e: resultDict['ParameterNames'] = parameterList resultDict['Records'] = [] return S_OK(resultDict) except Exception as e: return S_ERROR('Error getting the webSummary %s' % e) nRequests = len(requestLists) if startItem <= len(requestLists): firstIndex = startItem else: return S_ERROR( 'getRequestSummaryWeb: Requested index out of range') if (startItem + maxItems) <= len(requestLists): secondIndex = startItem + maxItems else: secondIndex = len(requestLists) records = [] for i in range(firstIndex, secondIndex): row = requestLists[i] records.append([str(x) for x in row]) resultDict['ParameterNames'] = parameterList resultDict['Records'] = records resultDict['TotalRecords'] = nRequests return S_OK(resultDict)
if assigned: session.execute( update( Request )\ .where( Request.RequestID == requestID )\ .values( {Request._Status : 'Assigned', Request._LastUpdate : datetime.datetime.utcnow()\ .strftime( Request._datetimeFormat )} ) ) session.commit() session.expunge_all() return S_OK(request) except Exception as e: session.rollback() log.exception("getRequest: unexpected exception", lException=e) return S_ERROR("getRequest: unexpected exception : %s" % e) finally: session.close() def getBulkRequests(self, numberOfRequest=10, assigned=True): """ read as many requests as requested for execution :param int numberOfRequest: Number of Request we want (default 10) :param bool assigned: if True, the status of the selected requests are set to assign :returns: a dictionary of Request objects indexed on the RequestID """ # expire_on_commit is set to False so that we can still use the object after we close the session session = self.DBSession(expire_on_commit=False)
class ExecutorMindHandler( RequestHandler ): MSG_DEFINITIONS = { 'ProcessTask' : { 'taskId' : ( types.IntType, types.LongType ), 'taskStub' : types.StringType, 'eType' : types.StringType }, 'TaskDone' : { 'taskId' : ( types.IntType, types.LongType ), 'taskStub' : types.StringType }, 'TaskFreeze' : { 'taskId' : ( types.IntType, types.LongType ), 'taskStub' : types.StringType, 'freezeTime' : ( types.IntType, types.LongType ) }, 'TaskError' : { 'taskId': ( types.IntType, types.LongType ), 'errorMsg' : types.StringType, 'taskStub' : types.StringType, 'eType' : types.StringType}, 'ExecutorError' : { 'taskId': ( types.IntType, types.LongType ), 'errorMsg' : types.StringType, 'eType' : types.StringType } } class MindCallbacks( ExecutorDispatcherCallbacks ): def __init__( self, sendTaskCB, dispatchCB, disconnectCB, taskProcCB, taskFreezeCB, taskErrCB ): self.__sendTaskCB = sendTaskCB self.__dispatchCB = dispatchCB self.__disconnectCB = disconnectCB self.__taskProcDB = taskProcCB self.__taskFreezeCB = taskFreezeCB self.__taskErrCB = taskErrCB self.__allowedClients = [] def cbSendTask( self, taskId, taskObj, eId, eType ): return self.__sendTaskCB( taskId, taskObj, eId, eType ) def cbDispatch( self, taskId, taskObj, pathExecuted ): return self.__dispatchCB( taskId, taskObj, pathExecuted ) def cbDisconectExecutor( self, eId ): return self.__disconnectCB( eId ) def cbTaskError( self, taskId, taskObj, errorMsg ): return self.__taskErrCB( taskId, taskObj, errorMsg ) def cbTaskProcessed( self, taskId, taskObj, eType ): return self.__taskProcDB( taskId, taskObj, eType ) def cbTaskFreeze( self, taskId, taskObj, eType ): return self.__taskFreezeCB( taskId, taskObj, eType ) ### # End of callbacks ### @classmethod def initializeHandler( cls, serviceInfoDict ): gLogger.notice( "Initializing Executor dispatcher" ) cls.__eDispatch = ExecutorDispatcher( cls.srv_getMonitor() ) cls.__callbacks = ExecutorMindHandler.MindCallbacks( cls.__sendTask, cls.exec_dispatch, cls.__execDisconnected, cls.exec_taskProcessed, cls.exec_taskFreeze, cls.exec_taskError ) cls.__eDispatch.setCallbacks( cls.__callbacks ) cls.__allowedClients = [] if cls.log.shown( "VERBOSE" ): gThreadScheduler.setMinValidPeriod( 1 ) gThreadScheduler.addPeriodicTask( 10, lambda: cls.log.verbose( "== Internal state ==\n%s\n===========" % pprint.pformat( cls.__eDispatch._internals() ) ) ) return S_OK() @classmethod def setAllowedClients( cls, aClients ): if type( aClients ) not in ( types.ListType, types.TupleType ): aClients = ( aClients, ) cls.__allowedClients = aClients @classmethod def __sendTask( self, taskId, taskObj, eId, eType ): try: result = self.exec_prepareToSend( taskId, taskObj, eId ) if not result[ 'OK' ]: return result except Exception, excp: gLogger.exception( "Exception while executing prepareToSend: %s" % str( excp ) ) return S_ERROR( "Cannot presend task" ) try: result = self.exec_serializeTask( taskObj ) except Exception, excp: gLogger.exception( "Exception while serializing task %s" % taskId ) return S_ERROR( "Cannot serialize task %s: %s" % ( taskId, str( excp ) ) )
def initialize(self, loops=0): """ Watchdog initialization. """ if self.initialized: self.log.info('Watchdog already initialized') return S_OK() else: self.initialized = True setup = gConfig.getValue('/DIRAC/Setup', '') if not setup: return S_ERROR('Can not get the DIRAC Setup value') wms_instance = getSystemInstance("WorkloadManagement") if not wms_instance: return S_ERROR( 'Can not get the WorkloadManagement system instance') self.section = '/Systems/WorkloadManagement/%s/JobWrapper' % wms_instance self.maxcount = loops self.log.verbose('Watchdog initialization') self.log.info('Attempting to Initialize Watchdog for: %s' % (self.systemFlag)) # Test control flags self.testWallClock = gConfig.getValue( self.section + '/CheckWallClockFlag', 1) self.testDiskSpace = gConfig.getValue( self.section + '/CheckDiskSpaceFlag', 1) self.testLoadAvg = gConfig.getValue(self.section + '/CheckLoadAvgFlag', 1) self.testCPUConsumed = gConfig.getValue( self.section + '/CheckCPUConsumedFlag', 1) self.testCPULimit = gConfig.getValue( self.section + '/CheckCPULimitFlag', 0) self.testMemoryLimit = gConfig.getValue( self.section + '/CheckMemoryLimitFlag', 0) self.testTimeLeft = gConfig.getValue( self.section + '/CheckTimeLeftFlag', 1) # Other parameters self.pollingTime = gConfig.getValue(self.section + '/PollingTime', 10) # 10 seconds self.checkingTime = gConfig.getValue(self.section + '/CheckingTime', 30 * 60) # 30 minute period self.minCheckingTime = gConfig.getValue( self.section + '/MinCheckingTime', 20 * 60) # 20 mins self.maxWallClockTime = gConfig.getValue( self.section + '/MaxWallClockTime', 3 * 24 * 60 * 60) # e.g. 4 days self.jobPeekFlag = gConfig.getValue(self.section + '/JobPeekFlag', 1) # on / off self.minDiskSpace = gConfig.getValue(self.section + '/MinDiskSpace', 10) # MB self.loadAvgLimit = gConfig.getValue( self.section + '/LoadAverageLimit', 1000) # > 1000 and jobs killed self.sampleCPUTime = gConfig.getValue(self.section + '/CPUSampleTime', 30 * 60) # e.g. up to 20mins sample self.jobCPUMargin = gConfig.getValue( self.section + '/JobCPULimitMargin', 20) # %age buffer before killing job self.minCPUWallClockRatio = gConfig.getValue( self.section + '/MinCPUWallClockRatio', 5) # ratio %age # After 5 sample times return null CPU consumption kill job self.nullCPULimit = gConfig.getValue( self.section + '/NullCPUCountLimit', 5) if self.checkingTime < self.minCheckingTime: self.log.info( 'Requested CheckingTime of %s setting to %s seconds (minimum)' % (self.checkingTime, self.minCheckingTime)) self.checkingTime = self.minCheckingTime # The time left is returned in seconds @ 250 SI00 = 1 HS06, # the self.checkingTime and self.pollingTime are in seconds, # thus they need to be multiplied by a large enough factor self.fineTimeLeftLimit = gConfig.getValue( self.section + '/TimeLeftLimit', 150 * self.pollingTime) self.scaleFactor = gConfig.getValue('/LocalSite/CPUScalingFactor', 1.0) return S_OK()
def getCompatibleMetadata( self, queryDict, path, credDict ): """ Get distinct metadata values compatible with the given already defined metadata """ pathDirID = 0 if path != '/': result = self.db.dtree.findDir( path ) if not result['OK']: return result if not result['Value']: return S_ERROR( 'Path not found: %s' % path ) pathDirID = int( result['Value'] ) pathDirs = [] if pathDirID: result = self.db.dtree.getSubdirectoriesByID( pathDirID, includeParent = True ) if not result['OK']: return result if result['Value']: pathDirs = result['Value'].keys() result = self.db.dtree.getPathIDsByID( pathDirID ) if not result['OK']: return result if result['Value']: pathDirs += result['Value'] # Get the list of metadata fields to inspect result = self.getMetadataFields( credDict ) if not result['OK']: return result metaFields = result['Value'] comFields = metaFields.keys() # Commented out to return compatible data also for selection metadata #for m in metaDict: # if m in comFields: # del comFields[comFields.index( m )] result = self.__expandMetaDictionary( queryDict, credDict ) if not result['OK']: return result metaDict = result['Value'] fromList = pathDirs anyMeta = True if metaDict: anyMeta = False for meta, value in metaDict.items(): result = self.__findCompatibleDirectories( meta, value, fromList ) if not result['OK']: return result cdirList = result['Value'] if cdirList: fromList = cdirList else: fromList = [] break if anyMeta or fromList: result = self.__findDistinctMetadata( comFields, fromList ) else: result = S_OK( {} ) return result
def getTimeLeft(self, cpuConsumed=0.0, processors=1): """Returns the CPU Time Left for supported batch systems. The CPUConsumed is the current raw total CPU. """ # Quit if no scale factor available if not self.scaleFactor: return S_ERROR('/LocalSite/CPUScalingFactor not defined for site %s' % DIRAC.siteName()) if not self.batchPlugin: return S_ERROR(self.batchError) resourceDict = self.batchPlugin.getResourceUsage() if not resourceDict['OK']: self.log.warn('Could not determine timeleft for batch system at site %s' % DIRAC.siteName()) return resourceDict resources = resourceDict['Value'] self.log.debug("self.batchPlugin.getResourceUsage(): %s" % str(resources)) if not resources['CPULimit'] and not resources['WallClockLimit']: # This should never happen return S_ERROR('No CPU or WallClock limit obtained') # if one of CPULimit or WallClockLimit is missing, compute a reasonable value if not resources['CPULimit']: resources['CPULimit'] = resources['WallClockLimit'] * processors elif not resources['WallClockLimit']: resources['WallClockLimit'] = resources['CPULimit'] # if one of CPU or WallClock is missing, compute a reasonable value if not resources['CPU']: resources['CPU'] = resources['WallClock'] * processors elif not resources['WallClock']: resources['WallClock'] = resources['CPU'] timeLeft = 0. cpu = float(resources['CPU']) cpuLimit = float(resources['CPULimit']) wallClock = float(resources['WallClock']) wallClockLimit = float(resources['WallClockLimit']) validTimeLeft = enoughTimeLeft(cpu, cpuLimit, wallClock, wallClockLimit, self.cpuMargin, self.wallClockMargin) if validTimeLeft: if cpu and cpuConsumed > 3600. and self.normFactor: # If there has been more than 1 hour of consumed CPU and # there is a Normalization set for the current CPU # use that value to renormalize the values returned by the batch system # NOTE: cpuConsumed is non-zero for call by the JobAgent and 0 for call by the watchdog # cpuLimit and cpu may be in the units of the batch system, not real seconds... (in this case the other case won't work) # therefore renormalise it using cpuConsumed (which is in real seconds) timeLeft = (cpuLimit - cpu) * self.normFactor * cpuConsumed / cpu elif self.normFactor: # FIXME: this is always used by the watchdog... Also used by the JobAgent # if consumed less than 1 hour of CPU # It was using self.scaleFactor but this is inconsistent: use the same as above # In case the returned cpu and cpuLimit are not in real seconds, this is however rubbish timeLeft = (cpuLimit - cpu) * self.normFactor else: # Last resort recovery... timeLeft = (cpuLimit - cpu) * self.scaleFactor self.log.verbose('Remaining CPU in normalized units is: %.02f' % timeLeft) return S_OK(timeLeft) else: return S_ERROR('No time left for slot')
def selectJob(self, resourceDescription): """ Main job selection function to find the highest priority job matching the resource capacity """ startTime = time.time() resourceDict = self.__processResourceDescription(resourceDescription) credDict = self.getRemoteCredentials() #Check credentials if not generic pilot if Properties.GENERIC_PILOT in credDict['properties']: #You can only match groups in the same VO vo = Registry.getVOForGroup(credDict['group']) result = Registry.getGroupsForVO(vo) if result['OK']: resourceDict['OwnerGroup'] = result['Value'] else: #If it's a private pilot, the DN has to be the same if Properties.PILOT in credDict['properties']: gLogger.notice("Setting the resource DN to the credentials DN") resourceDict['OwnerDN'] = credDict['DN'] #If it's a job sharing. The group has to be the same and just check that the DN (if any) # belongs to the same group elif Properties.JOB_SHARING in credDict['properties']: resourceDict['OwnerGroup'] = credDict['group'] gLogger.notice( "Setting the resource group to the credentials group") if 'OwnerDN' in resourceDict and resourceDict[ 'OwnerDN'] != credDict['DN']: ownerDN = resourceDict['OwnerDN'] result = Registry.getGroupsForDN(resourceDict['OwnerDN']) if not result['OK'] or credDict['group'] not in result[ 'Value']: #DN is not in the same group! bad boy. gLogger.notice( "You cannot request jobs from DN %s. It does not belong to your group!" % ownerDN) resourceDict['OwnerDN'] = credDict['DN'] #Nothing special, group and DN have to be the same else: resourceDict['OwnerDN'] = credDict['DN'] resourceDict['OwnerGroup'] = credDict['group'] # Check the pilot DIRAC version if self.__opsHelper.getValue("Pilot/CheckVersion", True): if 'ReleaseVersion' not in resourceDict: if not 'DIRACVersion' in resourceDict: return S_ERROR( 'Version check requested and not provided by Pilot') else: pilotVersion = resourceDict['DIRACVersion'] else: pilotVersion = resourceDict['ReleaseVersion'] validVersions = self.__opsHelper.getValue("Pilot/Version", []) if validVersions and pilotVersion not in validVersions: return S_ERROR( 'Pilot version does not match the production version %s not in ( %s )' % \ ( pilotVersion, ",".join( validVersions ) ) ) #Check project if requested validProject = self.__opsHelper.getValue("Pilot/Project", "") if validProject: if 'ReleaseProject' not in resourceDict: return S_ERROR( "Version check requested but expected project %s not received" % validProject) if resourceDict['ReleaseProject'] != validProject: return S_ERROR( "Version check requested but expected project %s != received %s" % (validProject, resourceDict['ReleaseProject'])) # Update pilot information pilotInfoReported = False pilotReference = resourceDict.get('PilotReference', '') if pilotReference: if "PilotInfoReportedFlag" in resourceDict and not resourceDict[ 'PilotInfoReportedFlag']: gridCE = resourceDict.get('GridCE', 'Unknown') site = destination = resourceDict.get('Site', 'Unknown') benchmark = benchmark = resourceDict.get('PilotBenchmark', 0.0) gLogger.verbose( 'Reporting pilot info for %s: gridCE=%s, site=%s, benchmark=%f' % (pilotReference, gridCE, site, benchmark)) result = gPilotAgentsDB.setPilotStatus(pilotReference, status='Running', gridSite=site, destination=gridCE, benchmark=benchmark) if result['OK']: pilotInfoReported = True #Check the site mask if not 'Site' in resourceDict: return S_ERROR('Missing Site Name in Resource JDL') # Get common site mask and check the agent site result = gJobDB.getSiteMask(siteState='Active') if not result['OK']: return S_ERROR('Internal error: can not get site mask') maskList = result['Value'] siteName = resourceDict['Site'] if siteName not in maskList: if 'GridCE' not in resourceDict: return S_ERROR('Site not in mask and GridCE not specified') #Even if the site is banned, if it defines a CE, it must be able to check it del resourceDict['Site'] resourceDict['Setup'] = self.serviceInfoDict['clientSetup'] gLogger.verbose("Resource description:") for key in resourceDict: gLogger.verbose("%s : %s" % (key.rjust(20), resourceDict[key])) negativeCond = self.__limiter.getNegativeCondForSite(siteName) result = gTaskQueueDB.matchAndGetJob(resourceDict, negativeCond=negativeCond) if DEBUG: print result if not result['OK']: return result result = result['Value'] if not result['matchFound']: return S_ERROR('No match found') jobID = result['jobId'] resAtt = gJobDB.getJobAttributes(jobID, ['OwnerDN', 'OwnerGroup', 'Status']) if not resAtt['OK']: return S_ERROR('Could not retrieve job attributes') if not resAtt['Value']: return S_ERROR('No attributes returned for job') if not resAtt['Value']['Status'] == 'Waiting': gLogger.error('Job matched by the TQ is not in Waiting state', str(jobID)) result = gTaskQueueDB.deleteJob(jobID) if not result['OK']: return result return S_ERROR("Job %s is not in Waiting state" % str(jobID)) attNames = ['Status', 'MinorStatus', 'ApplicationStatus', 'Site'] attValues = ['Matched', 'Assigned', 'Unknown', siteName] result = gJobDB.setJobAttributes(jobID, attNames, attValues) # result = gJobDB.setJobStatus( jobID, status = 'Matched', minor = 'Assigned' ) result = gJobLoggingDB.addLoggingRecord(jobID, status='Matched', minor='Assigned', source='Matcher') result = gJobDB.getJobJDL(jobID) if not result['OK']: return S_ERROR('Failed to get the job JDL') resultDict = {} resultDict['JDL'] = result['Value'] resultDict['JobID'] = jobID matchTime = time.time() - startTime gLogger.info("Match time: [%s]" % str(matchTime)) gMonitor.addMark("matchTime", matchTime) # Get some extra stuff into the response returned resOpt = gJobDB.getJobOptParameters(jobID) if resOpt['OK']: for key, value in resOpt['Value'].items(): resultDict[key] = value resAtt = gJobDB.getJobAttributes(jobID, ['OwnerDN', 'OwnerGroup']) if not resAtt['OK']: return S_ERROR('Could not retrieve job attributes') if not resAtt['Value']: return S_ERROR('No attributes returned for job') if self.__opsHelper.getValue("JobScheduling/CheckMatchingDelay", True): self.__limiter.updateDelayCounters(siteName, jobID) # Report pilot-job association if pilotReference: result = gPilotAgentsDB.setCurrentJobID(pilotReference, jobID) result = gPilotAgentsDB.setJobForPilot(jobID, pilotReference, updateStatus=False) resultDict['DN'] = resAtt['Value']['OwnerDN'] resultDict['Group'] = resAtt['Value']['OwnerGroup'] resultDict['PilotInfoReportedFlag'] = pilotInfoReported return S_OK(resultDict)
def addShifter(self, shifters=None): """ Adds or modify one or more shifters. Also, adds the shifter section in case this is not present. Shifter identities are used in several places, mostly for running agents :param dict shifters: has to be in the form {'ShifterRole':{'User':'******', 'Group':'aDIRACGroup'}} :return: S_OK/S_ERROR """ def getOpsSection(): """ Where is the shifters section? """ vo = CSGlobals.getVO() setup = CSGlobals.getSetup() if vo: res = gConfig.getSections('/Operations/%s/%s/Shifter' % (vo, setup)) if res['OK']: return S_OK('/Operations/%s/%s/Shifter' % (vo, setup)) res = gConfig.getSections('/Operations/%s/Defaults/Shifter' % vo) if res['OK']: return S_OK('/Operations/%s/Defaults/Shifter' % vo) else: res = gConfig.getSections('/Operations/%s/Shifter' % setup) if res['OK']: return S_OK('/Operations/%s/Shifter' % setup) res = gConfig.getSections('/Operations/Defaults/Shifter') if res['OK']: return S_OK('/Operations/Defaults/Shifter') return S_ERROR("No shifter section") if shifters is None: shifters = {} if not self.__initialized['OK']: return self.__initialized # get current shifters opsH = Operations() currentShifterRoles = opsH.getSections('Shifter') if not currentShifterRoles['OK']: # we assume the shifter section is not present currentShifterRoles = [] else: currentShifterRoles = currentShifterRoles['Value'] currentShiftersDict = {} for currentShifterRole in currentShifterRoles: currentShifter = opsH.getOptionsDict('Shifter/%s' % currentShifterRole) if not currentShifter['OK']: return currentShifter currentShifter = currentShifter['Value'] currentShiftersDict[currentShifterRole] = currentShifter # Removing from shifters what does not need to be changed for sRole in shifters.keys(): # note the pop below if sRole in currentShiftersDict: if currentShiftersDict[sRole] == shifters[sRole]: shifters.pop(sRole) # get shifters section to modify section = getOpsSection() # Is this section present? if not section['OK']: if section['Message'] == "No shifter section": gLogger.warn(section['Message']) gLogger.info("Adding shifter section") vo = CSGlobals.getVO() if vo: section = '/Operations/%s/Defaults/Shifter' % vo else: section = '/Operations/Defaults/Shifter' res = self.__csMod.createSection(section) if not res: gLogger.error("Section %s not created" % section) return S_ERROR("Section %s not created" % section) else: gLogger.error(section['Message']) return section else: section = section['Value'] # add or modify shifters for shifter in shifters: self.__csMod.removeSection(section + '/' + shifter) self.__csMod.createSection(section + '/' + shifter) self.__csMod.createSection(section + '/' + shifter + '/' + 'User') self.__csMod.createSection(section + '/' + shifter + '/' + 'Group') self.__csMod.setOptionValue(section + '/' + shifter + '/' + 'User', shifters[shifter]['User']) self.__csMod.setOptionValue( section + '/' + shifter + '/' + 'Group', shifters[shifter]['Group']) self.csModified = True return S_OK(True)
def runIt(self): """ Called by Agent Execute the following: - define the platform - check for presence of ROOTSYS variable """ self.result = S_OK() if not self.platform: self.result = S_ERROR('No ILC platform selected') elif not self.applicationLog: self.result = S_ERROR('No Log file provided') if not self.result['OK']: self.log.error("Failed to resolve input parameters:", self.result['Message']) return self.result res = getEnvironmentScript(self.platform, "root", self.applicationVersion, self.getRootEnvScript) self.log.notice("Got the environment script: %s" % res) if not res['OK']: self.log.error("Error getting the env script: ", res['Message']) return res envScriptPath = res['Value'] if not self.workflowStatus['OK'] or not self.stepStatus['OK']: self.log.verbose( 'Workflow status = %s, step status = %s' % (self.workflowStatus['OK'], self.stepStatus['OK'])) return S_OK( 'ROOT should not proceed as previous step did not end properly' ) if len(self.script) < 1: self.log.error('Macro file not defined, should not happen!') return S_ERROR("Macro file not defined") self.script = os.path.basename(self.script) scriptName = 'Root_%s_Run_%s.sh' % (self.applicationVersion, self.STEP_NUMBER) if os.path.exists(scriptName): os.remove(scriptName) script = open(scriptName, 'w') script.write('#!/bin/sh \n') script.write( '#####################################################################\n' ) script.write( '# Dynamically generated script to run a production or analysis job. #\n' ) script.write( '#####################################################################\n' ) script.write('source %s\n' % envScriptPath) if os.path.exists("./lib"): script.write('declare -x LD_LIBRARY_PATH=./lib:$LD_LIBRARY_PATH\n') script.write('echo =============================\n') script.write('echo LD_LIBRARY_PATH is\n') script.write('echo $LD_LIBRARY_PATH | tr ":" "\n"\n') script.write('echo =============================\n') script.write('echo PATH is\n') script.write('echo $PATH | tr ":" "\n"\n') script.write('echo =============================\n') script.write('env | sort >> localEnv.log\n') script.write('echo =============================\n') comm = "root -b -q %s" % self.script if self.arguments: ## need rawstring for arguments so we don't lose escaped quotation marks for string arguments comm = comm + r'\(%s\)' % self.arguments comm = comm + "\n" self.log.info("Will run %s" % (comm)) script.write(comm) script.write('declare -x appstatus=$?\n') #script.write('where\n') #script.write('quit\n') #script.write('EOF\n') script.write('exit $appstatus\n') script.close() if os.path.exists(self.applicationLog): os.remove(self.applicationLog) os.chmod(scriptName, 0755) comm = 'sh -c "./%s"' % (scriptName) self.setApplicationStatus('ROOT %s step %s' % (self.applicationVersion, self.STEP_NUMBER)) self.stdError = '' self.result = shellCall(0, comm, callbackFunction=self.redirectLogOutput, bufferLimit=20971520) #self.result = {'OK':True,'Value':(0,'Disabled Execution','')} resultTuple = self.result['Value'] if not os.path.exists(self.applicationLog): self.log.error( "Something went terribly wrong, the log file is not present") self.setApplicationStatus('root failed terribly, you are doomed!') return S_ERROR('root did not produce the expected log') status = resultTuple[0] # stdOutput = resultTuple[1] # stdError = resultTuple[2] self.log.info("Status after the application execution is %s" % str(status)) return self.finalStatusReport(status)
def __updateConfiguration(self, setElements=None, delElements=None): """ Update configuration stored by CS. """ if setElements is None: setElements = {} if delElements is None: delElements = [] log = self.log.getSubLogger("__updateConfiguration") log.debug("Begin function ...") # assure existence and proper value of a section or an option for path, value in setElements.items(): if value is None: section = path else: split = path.rsplit("/", 1) section = split[0] try: result = self.csAPI.createSection(section) if not result["OK"]: log.error("createSection() failed with message: %s" % result["Message"]) except Exception as e: log.error("Exception in createSection(): %s" % repr(e).replace(",)", ")")) if value is not None: try: result = self.csAPI.setOption(path, value) if not result["OK"]: log.error("setOption() failed with message: %s" % result["Message"]) except Exception as e: log.error("Exception in setOption(): %s" % repr(e).replace(",)", ")")) # delete elements in the configuration for path in delElements: result = self.csAPI.delOption(path) if not result["OK"]: log.warn("delOption() failed with message: %s" % result["Message"]) result = self.csAPI.delSection(path) if not result["OK"]: log.warn("delSection() failed with message: %s" % result["Message"]) if self.dryRun: log.info("Dry Run: CS won't be updated") self.csAPI.showDiff() else: # update configuration stored by CS result = self.csAPI.commit() if not result["OK"]: log.error("commit() failed with message: %s" % result["Message"]) return S_ERROR("Could not commit changes to CS.") else: log.info("Committed changes to CS") log.debug("End function.") return S_OK()
def getTransportURL(self, path, protocols=False): """ obtain the tURLs for the supplied path and protocols :param self: self reference :param str path: path on storage :param mixed protocols: protocols to use :returns: Failed dict {path : error message} Successful dict {path : transport url} S_ERROR in case of argument problems """ res = checkArgumentFormat(path) if not res['OK']: return res urls = res['Value'] self.log.debug( 'GFAL2_SRM2Storage.getTransportURL: Attempting to retrieve tURL for %s paths' % len(urls)) failed = {} successful = {} if not protocols: protocols = self.__getProtocols() if not protocols['OK']: return protocols listProtocols = protocols['Value'] elif isinstance(protocols, basestring): listProtocols = [protocols] elif isinstance(protocols, list): listProtocols = protocols else: return S_ERROR( "getTransportURL: Must supply desired protocols to this plug-in." ) # Compatibility because of castor returning a castor: url if you ask # for a root URL, and a root: url if you ask for a xroot url... if 'root' in listProtocols and 'xroot' not in listProtocols: listProtocols.insert(listProtocols.index('root'), 'xroot') elif 'xroot' in listProtocols and 'root' not in listProtocols: listProtocols.insert(listProtocols.index('xroot') + 1, 'root') # Compatibility because of castor returning a castor: url if you ask # for a root URL, and a root: url if you ask for a xroot url... if 'root' in listProtocols and 'xroot' not in listProtocols: listProtocols.insert(listProtocols.index('root'), 'xroot') elif 'xroot' in listProtocols and 'root' not in listProtocols: listProtocols.insert(listProtocols.index('xroot') + 1, 'root') # I doubt this can happen... 'srm' is not in the listProtocols, # it is normally, gsiftp, root, etc if self.protocolParameters['Protocol'] in listProtocols: successful = {} failed = {} for url in urls: if self.isURL(url)['Value']: successful[url] = url else: failed[url] = 'getTransportURL: Failed to obtain turls.' return S_OK({'Successful': successful, 'Failed': failed}) for url in urls: res = self.__getSingleTransportURL(url, listProtocols) self.log.debug('res = %s' % res) if not res['OK']: failed[url] = res['Message'] else: successful[url] = res['Value'] return S_OK({'Failed': failed, 'Successful': successful})
def __checkCPUConsumed(self): """ Checks whether the CPU consumed by application process is reasonable. This method will report stalled jobs to be killed. """ self.log.info("Checking CPU Consumed") if 'WallClockTime' not in self.parameters: return S_ERROR('Missing WallClockTime info') if 'CPUConsumed' not in self.parameters: return S_ERROR('Missing CPUConsumed info') wallClockTime = self.parameters['WallClockTime'][-1] if wallClockTime < self.sampleCPUTime: self.log.info( "Stopping check, wallclock time (%s) is still smaller than sample time (%s)" % (wallClockTime, self.sampleCPUTime)) return S_OK() intervals = max(1, int(self.sampleCPUTime / self.checkingTime)) if len(self.parameters['CPUConsumed']) < intervals + 1: self.log.info( "Not enough snapshots to calculate, there are %s and we need %s" % (len(self.parameters['CPUConsumed']), intervals + 1)) return S_OK() wallClockTime = self.parameters['WallClockTime'][-1] - self.parameters[ 'WallClockTime'][-1 - intervals] try: cpuTime = self.__convertCPUTime( self.parameters['CPUConsumed'][-1])['Value'] # For some reason, some times the CPU consumed estimation returns 0 # if cpuTime == 0: # return S_OK() cpuTime -= self.__convertCPUTime( self.parameters['CPUConsumed'][-1 - intervals])['Value'] if cpuTime < 0: self.log.warn( 'Consumed CPU time negative, something wrong may have happened, ignore' ) return S_OK() if wallClockTime <= 0: self.log.warn( 'Wallclock time should not be negative or zero, Ignore') return S_OK() ratio = (cpuTime / wallClockTime) * 100. self.log.info("CPU/Wallclock ratio is %.2f%%" % ratio) # in case of error cpuTime might be 0, exclude this if ratio < self.minCPUWallClockRatio: if os.path.exists('DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK') or \ 'DISABLE_WATCHDOG_CPU_WALLCLOCK_CHECK' in os.environ: self.log.warn( 'N.B. job would be declared as stalled but CPU / WallClock check is disabled by payload' ) return S_OK() self.log.info("Job is stalled!") return S_ERROR('Watchdog identified this job as stalled') except Exception as e: self.log.error("Cannot convert CPU consumed from string to int", str(e)) return S_OK()
def initialize(self, loops=0): """Sets default parameters and creates CE instance """ self.maxcount = loops self.logLevel = gConfig.getValue('DIRAC/LogLevel', 'INFO') self.siteRoot = gConfig.getValue('LocalSite/Root', DIRAC.rootPath) self.siteName = gConfig.getValue('LocalSite/Site', 'Unknown') self.cpuFactor = gConfig.getValue('LocalSite/CPUScalingFactor', 'Unknown') self.maxPilots = gConfig.getValue('LocalSite/MaxPilots', 100) self.log.setLevel(self.logLevel) self.log.info("Log level set to", self.logLevel) #these options are temporary until the Matcher procedure for the DIRAC site exists #they determine for which jobs in the WMS pilots are submitted self.propertiesDict = { '/DIRAC/Setup': 'LHCb-Development', '/LocalSite/Properties/OwnerDN': '', '/LocalSite/Site': '' } for propLocation, propDefault in self.propertiesDict.items(): try: prop = gConfig.getValue(propLocation, propDefault).replace('"', '') self.propertiesDict[propLocation] = str(prop) except Exception as e: print e return S_ERROR('Expected string for %s field' % propLocation) self.matchDict = { 'Setup': self.propertiesDict['/DIRAC/Setup'], 'Site': self.propertiesDict['/LocalSite/Site'], 'CPUTime': 3000000, # 'GridMiddleware' : '', # 'Platform' : '', # 'PilotType' : '', # 'JobType' : '', # 'OwnerGroup' : '', # 'GridCE' : '', 'OwnerDN': self.propertiesDict['/LocalSite/Properties/OwnerDN'], } #options to pass to the pilot self.pilotOptions = { '/LocalSite/Architecture': '', '/LocalSite/CPUScalingFactor': '', '/LocalSite/LocalCE': 'InProcess', '/LocalSite/Site': '', '/LocalSite/ConcurrentJobs': '', '/LocalSite/MaxCPUTime': '' } for optName, optDefault in self.pilotOptions.items(): self.pilotOptions[optName] = gConfig.getValue( '%(optName)s' % {'optName': optName}, optDefault) self.log.debug('======= Pilot Options =======') self.log.debug(self.pilotOptions) self.log.debug('=============================') #create CE ceUniqueID = self.am_getOption('CEUniqueID', 'Torque') if not ceUniqueID['OK']: self.log.warn(ceUniqueID['Message']) return ceUniqueID self.ceName = ceUniqueID['Value'] ce = self.__createCE(self.ceName) if not ce['OK']: self.log.warn(ce['Message']) return ce self.computingElement = ce['Value'] #path to dirac-pilot script self.diracPilotFileName = 'dirac-pilot' self.diracPilotPath = self.siteRoot + '/DIRAC/WorkloadManagementSystem/PilotAgent/' + self.diracPilotFileName #path to dirac-install script self.diracInstallFileName = 'dirac-install' self.diracInstallURL = 'http://lhcbproject.web.cern.ch/lhcbproject/dist/DIRAC3/scripts/dirac-install' self.diracInstallPath = self.siteRoot + '/' + self.diracInstallFileName if not os.path.exists(self.diracInstallPath): self.diracInstallPath = self.diracInstallFileName try: urllib.urlretrieve(self.diracInstallURL, self.diracInstallPath) os.chmod( self.diracInstallPath, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) except: self.log.error( 'Failed to retrieve %(diracInstallFileName)s from %(diracInstallUrl)s' % { 'diracInstallFileName': self.diracInstallFileName, 'diracInstallUrl': self.diracInstallURL }) return S_OK()
def export_updateSoftware(self, version, rootPath="", gridVersion="2009-08-13"): """ Update the local DIRAC software installation to version """ # Check that we have a sane local configuration result = gConfig.getOptionsDict('/LocalInstallation') if not result['OK']: return S_ERROR( 'Invalid installation - missing /LocalInstallation section in the configuration' ) elif not result['Value']: return S_ERROR( 'Invalid installation - empty /LocalInstallation section in the configuration' ) if rootPath and not os.path.exists(rootPath): return S_ERROR('Path "%s" does not exists' % rootPath) # For LHCb we need to check Oracle client installOracleClient = False oracleFlag = gConfig.getValue('/LocalInstallation/InstallOracleClient', 'unknown') if oracleFlag.lower() in ['yes', 'true', '1']: installOracleClient = True elif oracleFlag.lower() == "unknown": result = systemCall(0, ['python', '-c', 'import cx_Oracle']) if result['OK'] and result['Value'][0] == 0: installOracleClient = True cmdList = ['dirac-install', '-r', version, '-t', 'server'] if rootPath: cmdList.extend(['-P', rootPath]) # Check if there are extensions extensionList = getCSExtensions() webFlag = gConfig.getValue('/LocalInstallation/WebPortal', False) if webFlag: extensionList.append('Web') if extensionList: cmdList += ['-e', ','.join(extensionList)] # Are grid middleware bindings required ? if gridVersion: cmdList.extend(['-g', gridVersion]) targetPath = gConfig.getValue( '/LocalInstallation/TargetPath', gConfig.getValue('/LocalInstallation/RootPath', '')) if targetPath and os.path.exists(targetPath + '/etc/dirac.cfg'): cmdList.append(targetPath + '/etc/dirac.cfg') else: return S_ERROR('Local configuration not found') result = systemCall(0, cmdList) if not result['OK']: return result status = result['Value'][0] if status != 0: # Get error messages error = [] output = result['Value'][1].split('\n') for line in output: line = line.strip() if 'error' in line.lower(): error.append(line) if error: message = '\n'.join(error) else: message = "Failed to update software to %s" % version return S_ERROR(message) # Check if there is a MySQL installation and fix the server scripts if necessary if os.path.exists(InstallTools.mysqlDir): startupScript = os.path.join(InstallTools.instancePath, 'mysql', 'share', 'mysql', 'mysql.server') if not os.path.exists(startupScript): startupScript = os.path.join(InstallTools.instancePath, 'pro', 'mysql', 'share', 'mysql', 'mysql.server') if os.path.exists(startupScript): InstallTools.fixMySQLScripts(startupScript) # For LHCb we need to check Oracle client if installOracleClient: result = systemCall(0, 'install_oracle-client.sh') if not result['OK']: return result status = result['Value'][0] if status != 0: # Get error messages error = result['Value'][1].split('\n') error.extend(result['Value'][2].split('\n')) error.append('Failed to install Oracle client module') return S_ERROR('\n'.join(error)) return S_OK()
class RequestDB(object): """ .. class:: RequestDB db holding requests """ def __getDBConnectionInfo(self, fullname): """ Collect from the CS all the info needed to connect to the DB. This should be in a base class eventually """ result = getDBParameters(fullname) if not result['OK']: raise Exception('Cannot get database parameters: %s' % result['Message']) dbParameters = result['Value'] self.dbHost = dbParameters['Host'] self.dbPort = dbParameters['Port'] self.dbUser = dbParameters['User'] self.dbPass = dbParameters['Password'] self.dbName = dbParameters['DBName'] def __init__(self): """c'tor :param self: self reference """ self.log = gLogger.getSubLogger('RequestDB') # Initialize the connection info self.__getDBConnectionInfo('RequestManagement/ReqDB') runDebug = (gLogger.getLevel() == 'DEBUG') self.engine = create_engine( 'mysql://%s:%s@%s:%s/%s' % (self.dbUser, self.dbPass, self.dbHost, self.dbPort, self.dbName), echo=runDebug, pool_recycle=3600) metadata.bind = self.engine self.DBSession = sessionmaker(bind=self.engine) def createTables(self): """ create tables """ try: metadata.create_all(self.engine) except Exception as e: return S_ERROR(e) return S_OK() def cancelRequest(self, requestID): session = self.DBSession() try: updateRet = session.execute( update( Request )\ .where( Request.RequestID == requestID )\ .values( {Request._Status : 'Canceled', Request._LastUpdate : datetime.datetime.utcnow()\ .strftime( Request._datetimeFormat )} ) ) session.commit() # No row was changed if not updateRet.rowcount: return S_ERROR("No such request %s" % requestID) return S_OK() except Exception as e: session.rollback() self.log.exception("cancelRequest: unexpected exception", lException=e) return S_ERROR("cancelRequest: unexpected exception %s" % e) finally: session.close() def putRequest(self, request): """ update or insert request into db :param ~Request.Request request: Request instance """ session = self.DBSession(expire_on_commit=False) try: try: if hasattr(request, 'RequestID'): status = session.query( Request._Status )\ .filter( Request.RequestID == request.RequestID )\ .one() if status[0] == 'Canceled': self.log.info( "Request %s(%s) was canceled, don't put it back" % (request.RequestID, request.RequestName)) return S_OK(request.RequestID) except NoResultFound, e: pass # Since the object request is not attached to the session, we merge it to have an update # instead of an insert with duplicate primary key request = session.merge(request) session.add(request) session.commit() session.expunge_all() return S_OK(request.RequestID) except Exception as e: session.rollback() self.log.exception("putRequest: unexpected exception", lException=e) return S_ERROR("putRequest: unexpected exception %s" % e)
def _sendSMS(self, text): #FIXME: implement it ! return S_ERROR('Not implemented yet')
def mock__ErrorFTSServerStatus(ftsServer): return S_ERROR(ftsServer)
def getDNForHost(host): dnList = gConfig.getValue("%s/Hosts/%s/DN" % (gBaseRegistrySection, host), []) if dnList: return S_OK(dnList) return S_ERROR("No DN found for host %s" % host)
def _insertReplicas( self, lfns, master = False, connection = False ): """ Insert new replicas. lfns is a dictionary with one entry for each file. The keys are lfns, and values are dict with mandatory attributes : FileID, SE (the name), PFN :param lfns: lfns and info to insert :param master: true if they are master replica, otherwise they will be just 'Replica' :return: successful/failed convention, with successful[lfn] = true """ chunkSize = 200 connection = self._getConnection(connection) # Add the files failed = {} successful = {} # Get the status id of AprioriGood res = self._getStatusInt( 'AprioriGood', connection = connection ) if not res['OK']: return res statusID = res['Value'] lfnsToRetry = [] repValues = {} repDesc = {} # treat each file after each other for lfn in lfns.keys(): fileID = lfns[lfn]['FileID'] seName = lfns[lfn]['SE'] if type(seName) in StringTypes: seList = [seName] elif type(seName) == ListType: seList = seName else: return S_ERROR('Illegal type of SE list: %s' % str( type( seName ) ) ) replicaType = 'Master' if master else 'Replica' pfn = lfns[lfn]['PFN'] # treat each replica of a file after the other # (THIS CANNOT WORK... WE ARE ONLY CAPABLE OF DOING ONE REPLICA PER FILE AT THE TIME) for seName in seList: # get the SE id res = self.db.seManager.findSE(seName) if not res['OK']: failed[lfn] = res['Message'] continue seID = res['Value'] # This is incompatible with adding multiple replica at the time for a given file repValues[lfn] = ( fileID, seID, statusID, replicaType, pfn ) repDesc[( fileID, seID )] = lfn allChunks = list( self.__chunks( lfns.keys(), chunkSize ) ) for lfnChunk in allChunks: result = self.__insertMultipleReplicas( repValues, lfnChunk ) if result['OK']: allIds = result['Value'] for fileId, seId, repId in allIds: lfn = repDesc[ ( fileId, seId ) ] successful[lfn] = True lfns[lfn]['RepID'] = repId else: lfnsToRetry.extend( lfnChunk ) for lfn in lfnsToRetry: fileID, seID, statusID, replicaType, pfn = repValues[lfn] # insert the replica and its info result = self.db.executeStoredProcedureWithCursor( 'ps_insert_replica', ( fileID, seID, statusID, replicaType, pfn ) ) if not result['OK']: failed[lfn] = result['Message'] else: replicaID = result['Value'][0][0] lfns[lfn]['RepID'] = replicaID successful[lfn] = True return S_OK({'Successful':successful,'Failed':failed})
def execute(self): self.resolveGlobalVars() # define workflow attributes wf_exec_attr = { } # dictionary with the WF attributes, used to resolve links to self.attrname for wf_parameter in self.parameters: # parameters shall see objects in the current scope order to resolve links if wf_parameter.preExecute(): # for parm which not just outputs # print 'Input', wf_parameter if wf_parameter.isLinked(): # print "Workflow self." + wf_parameter.getName(), '=', # wf_parameter.getLinkedModule() + '.' + wf_parameter.getLinkedParameter() if wf_parameter.getLinkedModule() == "self": # this is not suppose to happen print( "Warning! Job attribute ", wf_parameter.getName(), "refers to the attribute of the same workflow", wf_parameter.getLinkedParameter(), ) wf_exec_attr[wf_parameter.getName()] = wf_exec_attr[ wf_parameter.getLinkedParameter()] else: wf_exec_attr[wf_parameter.getName()] = wf_exec_attr[ wf_parameter.getLinkedModule()][ wf_parameter.getLinkedParameter()] else: # print "Workflow self." + wf_parameter.getName(), '=', wf_parameter.getValue() wf_exec_attr[ wf_parameter.getName()] = wf_parameter.getValue() # Put all the workflow parameters into the workflow_commons dictionary self.workflow_commons[ wf_parameter.getName()] = wf_parameter.getValue() self.module_definitions.loadCode( ) # loading Module classes into current python scope # wf_exec_steps will be dictionary of dictionaries [step instance name][parameter name] # used as dictionary of step instances to carry parameters wf_exec_steps = {} # print 'Executing Workflow',self.getType() error_message = "" step_result = "" for step_inst in self.step_instances: step_inst_name = step_inst.getName() step_inst_type = step_inst.getType() wf_exec_steps[step_inst_name] = {} # print "WorkflowInstance creating Step instance ",step_inst_name," of type", step_inst_type for parameter in step_inst.parameters: if parameter.preExecute(): # print '>> Input', parameter if parameter.isLinked(): # print ">> StepInstance", step_inst_name + '.' + parameter.getName(), # '=', parameter.getLinkedModule() + '.' + parameter.getLinkedParameter() if parameter.getLinkedModule() == "self": # tale value form the step_dict wf_exec_steps[step_inst_name][parameter.getName( )] = wf_exec_attr[parameter.getLinkedParameter()] else: # print wf_exec_steps[parameter.getLinkedModule()].keys() wf_exec_steps[step_inst_name][parameter.getName( )] = wf_exec_steps[parameter.getLinkedModule()][ parameter.getLinkedParameter()] else: # print ">> StepInstance", step_inst_name + '.' + parameter.getName(), '=', parameter.getValue() wf_exec_steps[step_inst_name][ parameter.getName()] = parameter.getValue() # In the step_commons all parameters are added, both Input and Output ones. step_inst.step_commons[ parameter.getName()] = parameter.getValue() resolveVariables(wf_exec_steps[step_inst_name]) # Set proper values for all Input Parameters for key, value in wf_exec_steps[step_inst_name].items(): step_inst.step_commons[key] = value step_inst.setParent(self) step_inst.setWorkflowCommons(self.workflow_commons) result = step_inst.execute(wf_exec_steps[step_inst_name], self.step_definitions) if not result["OK"]: if self.workflowStatus["OK"]: error_message = result["Message"] self.workflowStatus = S_ERROR(result["Message"]) self.workflowStatus["Errno"] = result["Errno"] step_result = result.get("Value", step_result) # now we need to copy output values to the STEP!!! parameters # print "WorkflowInstance output assignment" for wf_parameter in self.parameters: if wf_parameter.isOutput(): if wf_parameter.isLinked(): # print "WorkflowInstance self." + wf_parameter.getName(), '=', # wf_parameter.getLinkedModule() + '.' + wf_parameter.getLinkedParameter() if wf_parameter.getLinkedModule() == "self": # this is not suppose to happen print( "Warning! Workflow OUTPUT attribute ", wf_parameter.getName(), "refer on the attribute of the same workflow", wf_parameter.getLinkedParameter(), ) wf_exec_attr[wf_parameter.getName()] = wf_exec_attr[ wf_parameter.getLinkedParameter()] else: wf_exec_attr[wf_parameter.getName()] = wf_exec_steps[ wf_parameter.getLinkedModule()][ wf_parameter.getLinkedParameter()] else: # it is also does not make sense - we can produce warning print( "Warning! Workflow OUTPUT attribute", wf_parameter.getName(), "assigned constant", wf_parameter.getValue(), ) # print "WorkflowInstance self."+ wf_parameter.getName(),'=',wf_parameter.getValue() wf_exec_attr[ wf_parameter.getName()] = wf_parameter.getValue() setattr(self, wf_parameter.getName(), wf_exec_attr[wf_parameter.getName()]) # Return the result of the first failed step or S_OK if not self.workflowStatus["OK"]: # return S_ERROR( error_message ) return self.workflowStatus return S_OK(step_result)
def handleRequest( self ): """ read SubRequests and ExecutionOrder, fire registered handlers upon SubRequests operations :param self: self reference :param dict requestDict: request dictionary as read from self.readRequest """ ############################################################## # here comes the processing ############################################################## res = self.requestObj.getNumSubRequests( self.__requestType ) if not res["OK"]: errMsg = "handleRequest: failed to obtain number of '%s' subrequests." % self.__requestType self.error( errMsg, res["Message"] ) return S_ERROR( res["Message"] ) ## for gMonitor self.addMark( "Execute", 1 ) ## process sub requests for index in range( res["Value"] ): self.info( "handleRequest: processing subrequest %s." % str(index) ) subRequestAttrs = self.requestObj.getSubRequestAttributes( index, self.__requestType )["Value"] if subRequestAttrs["ExecutionOrder"]: subExecutionOrder = int( subRequestAttrs["ExecutionOrder"] ) else: subExecutionOrder = 0 subRequestStatus = subRequestAttrs["Status"] if subRequestStatus != "Waiting": self.info( "handleRequest: subrequest %s has status '%s' and is not to be executed." % ( str(index), subRequestStatus ) ) continue if subExecutionOrder <= self.executionOrder: operation = subRequestAttrs["Operation"] if operation not in self.operationDispatcher(): self.error( "handleRequest: '%s' operation not supported" % operation ) else: self.info( "handleRequest: will execute %s '%s' subrequest" % ( str(index), operation ) ) ## get files subRequestFiles = self.requestObj.getSubRequestFiles( index, self.__requestType )["Value"] ## execute operation action ret = self.operationDispatcher()[operation].__call__( index, self.requestObj, subRequestAttrs, subRequestFiles ) ################################################ ## error in operation action? if not ret["OK"]: self.error( "handleRequest: error when handling subrequest %s: %s" % ( str(index), ret["Message"] ) ) self.requestObj.setSubRequestAttributeValue( index, self.__requestType, "Error", ret["Message"] ) else: ## update ref to requestObj self.requestObj = ret["Value"] ## check if subrequest status == Done, disable finalisation if not subRequestDone = self.requestObj.isSubRequestDone( index, self.__requestType ) if not subRequestDone["OK"]: self.error( "handleRequest: unable to determine subrequest status: %s" % subRequestDone["Message"] ) else: if not subRequestDone["Value"]: self.warn("handleRequest: subrequest %s is not done yet" % str(index) ) ################################################ # Generate the new request string after operation newRequestString = self.requestObj.toXML()['Value'] update = self.putBackRequest( self.requestName, newRequestString, self.sourceServer ) if not update["OK"]: self.error( "handleRequest: error when updating request: %s" % update["Message"] ) return update ## get request status if self.jobID: requestStatus = self.requestClient().getRequestStatus( self.requestName, self.sourceServer ) if not requestStatus["OK"]: return requestStatus requestStatus = requestStatus["Value"] ## finalize request if jobID is present and request status = 'Done' self.info("handleRequest: request status is %s" % requestStatus ) if ( requestStatus["RequestStatus"] == "Done" ) and ( requestStatus["SubRequestStatus"] not in ( "Waiting", "Assigned" ) ): self.debug("handleRequest: request is going to be finalised") finalize = self.requestClient().finalizeRequest( self.requestName, self.jobID, self.sourceServer ) if not finalize["OK"]: self.error("handleRequest: error in request finalization: %s" % finalize["Message"] ) return finalize self.info("handleRequest: request is finalised") ## for gMonitor self.addMark( "Done", 1 ) ## should return S_OK with monitor dict return S_OK( { "monitor" : self.monitor() } )
def getPilotMonitorWeb(self, selectDict, sortList, startItem, maxItems): """ Get summary of the pilot job information in a standard structure """ resultDict = {} if 'LastUpdateTime' in selectDict: del selectDict['LastUpdateTime'] if 'Owner' in selectDict: userList = selectDict['Owner'] if not isinstance(userList, type([])): userList = [userList] dnList = [] for uName in userList: uList = getDNForUsername(uName)['Value'] dnList += uList selectDict['OwnerDN'] = dnList del selectDict['Owner'] startDate = selectDict.get('FromDate', None) if startDate: del selectDict['FromDate'] # For backward compatibility if startDate is None: startDate = selectDict.get('LastUpdateTime', None) if startDate: del selectDict['LastUpdateTime'] endDate = selectDict.get('ToDate', None) if endDate: del selectDict['ToDate'] # Sorting instructions. Only one for the moment. if sortList: orderAttribute = sortList[0][0] + ":" + sortList[0][1] else: orderAttribute = None # Select pilots for the summary result = self.selectPilots(selectDict, orderAttribute=orderAttribute, newer=startDate, older=endDate, timeStamp='LastUpdateTime') if not result['OK']: return S_ERROR('Failed to select pilots: ' + result['Message']) pList = result['Value'] nPilots = len(pList) resultDict['TotalRecords'] = nPilots if nPilots == 0: return S_OK(resultDict) ini = startItem last = ini + maxItems if ini >= nPilots: return S_ERROR('Item number out of range') if last > nPilots: last = nPilots pilotList = pList[ini:last] paramNames = [ 'PilotJobReference', 'OwnerDN', 'OwnerGroup', 'GridType', 'Broker', 'Status', 'DestinationSite', 'BenchMark', 'ParentID', 'SubmissionTime', 'PilotID', 'LastUpdateTime', 'CurrentJobID', 'TaskQueueID', 'GridSite' ] result = self.getPilotInfo(pilotList, paramNames=paramNames) if not result['OK']: return S_ERROR('Failed to get pilot info: ' + result['Message']) pilotDict = result['Value'] records = [] for pilot in pilotList: parList = [] for parameter in paramNames: if not isinstance(pilotDict[pilot][parameter], (int, long)): parList.append(str(pilotDict[pilot][parameter])) else: parList.append(pilotDict[pilot][parameter]) if parameter == 'GridSite': gridSite = pilotDict[pilot][parameter] # If the Grid Site is unknown try to recover it in the last moment if gridSite == "Unknown": ce = pilotDict[pilot]['DestinationSite'] result = getSiteForCE(ce) if result['OK']: gridSite = result['Value'] del parList[-1] parList.append(gridSite) records.append(parList) resultDict['ParameterNames'] = paramNames resultDict['Records'] = records return S_OK(resultDict)
def _createTables(self, tableDict, force=False): """ tableDict: tableName: { 'Fields' : { 'Field': 'Description' }, 'ForeignKeys': {'Field': 'Table.key' }, 'PrimaryKey': 'Id', 'Indexes': { 'Index': [] }, 'UniqueIndexes': { 'Index': [] }, 'Engine': 'InnoDB' } only 'Fields' is a mandatory key. Creates a new Table for each key in tableDict, "tableName" in the DB with the provided description. It allows to create: - flat tables if no "ForeignKeys" key defined. - tables with foreign keys to auxiliary tables holding the values of some of the fields Arguments: tableDict: dictionary of dictionary with description of tables to be created. Only "Fields" is a mandatory key in the table description. "Fields": Dictionary with Field names and description of the fields "ForeignKeys": Dictionary with Field names and name of auxiliary tables. The auxiliary tables must be defined in tableDict. "PrimaryKey": Name of PRIMARY KEY for the table (if exist). "Indexes": Dictionary with definition of indexes, the value for each index is the list of fields to be indexed. "UniqueIndexes": Dictionary with definition of indexes, the value for each index is the list of fields to be indexed. This indexes will declared unique. "Engine": use the given DB engine, InnoDB is the default if not present. force: if True, requested tables are DROP if they exist. if False, returned with S_ERROR if table exist. """ # First check consistency of request if type(tableDict) != DictType: return S_ERROR('Argument is not a dictionary: %s( %s )' % (type(tableDict), tableDict)) tableList = tableDict.keys() if len(tableList) == 0: return S_OK(0) for table in tableList: thisTable = tableDict[table] # Check if Table is properly described with a dictionary if type(thisTable) != DictType: return S_ERROR( 'Table description is not a dictionary: %s( %s )' % (type(thisTable), thisTable)) if not 'Fields' in thisTable: return S_ERROR( 'Missing `Fields` key in `%s` table dictionary' % table) tableCreationList = [[]] auxiliaryTableList = [] i = 0 extracted = True while tableList and extracted: # iterate extracting tables from list if they only depend on # already extracted tables. extracted = False auxiliaryTableList += tableCreationList[i] i += 1 tableCreationList.append([]) for table in list(tableList): toBeExtracted = True thisTable = tableDict[table] if 'ForeignKeys' in thisTable: thisKeys = thisTable['ForeignKeys'] for key, auxTable in thisKeys.items(): forTable = auxTable.split('.')[0] forKey = key if forTable != auxTable: forKey = auxTable.split('.')[1] if forTable not in auxiliaryTableList: toBeExtracted = False break if not key in thisTable['Fields']: return S_ERROR( 'ForeignKey `%s` -> `%s` not defined in Primary table `%s`.' % (key, forKey, table)) if not forKey in tableDict[forTable]['Fields']: return S_ERROR( 'ForeignKey `%s` -> `%s` not defined in Auxiliary table `%s`.' % (key, forKey, forTable)) if toBeExtracted: self.logger.info('Table %s ready to be created' % table) extracted = True tableList.remove(table) tableCreationList[i].append(table) if tableList: return S_ERROR('Recursive Foreign Keys in %s' % ', '.join(tableList)) for tableList in tableCreationList: for table in tableList: # Check if Table exist retDict = self.__checkTable(table, force=force) if not retDict['OK']: return retDict thisTable = tableDict[table] cmdList = [] for field in thisTable['Fields'].keys(): cmdList.append('`%s` %s' % (field, thisTable['Fields'][field])) if thisTable.has_key('PrimaryKey'): if type(thisTable['PrimaryKey']) == types.StringType: cmdList.append('PRIMARY KEY ( `%s` )' % thisTable['PrimaryKey']) else: cmdList.append('PRIMARY KEY ( %s )' % ", ".join( ["`%s`" % str(f) for f in thisTable['PrimaryKey']])) if thisTable.has_key('Indexes'): indexDict = thisTable['Indexes'] for index in indexDict: indexedFields = '`, `'.join(indexDict[index]) cmdList.append('INDEX `%s` ( `%s` )' % (index, indexedFields)) if thisTable.has_key('UniqueIndexes'): indexDict = thisTable['UniqueIndexes'] for index in indexDict: indexedFields = '`, `'.join(indexDict[index]) cmdList.append('UNIQUE INDEX `%s` ( `%s` )' % (index, indexedFields)) if 'ForeignKeys' in thisTable: thisKeys = thisTable['ForeignKeys'] for key, auxTable in thisKeys.items(): forTable = auxTable.split('.')[0] forKey = key if forTable != auxTable: forKey = auxTable.split('.')[1] # cmdList.append( '`%s` %s' % ( forTable, tableDict[forTable]['Fields'][forKey] ) cmdList.append( 'FOREIGN KEY ( `%s` ) REFERENCES `%s` ( `%s` )' ' ON DELETE RESTRICT' % (key, forTable, forKey)) if thisTable.has_key('Engine'): engine = thisTable['Engine'] else: engine = 'InnoDB' cmd = 'CREATE TABLE `%s` (\n%s\n) ENGINE=%s' % ( table, ',\n'.join(cmdList), engine) retDict = self._update(cmd) if not retDict['OK']: return retDict self.logger.info('Table %s created' % table) return S_OK()
def getPilotInfo(self, pilotRef=False, parentId=False, conn=False, paramNames=[], pilotID=False): """ Get all the information for the pilot job reference or reference list """ parameters = [ 'PilotJobReference', 'OwnerDN', 'OwnerGroup', 'GridType', 'Broker', 'Status', 'DestinationSite', 'BenchMark', 'ParentID', 'OutputReady', 'AccountingSent', 'SubmissionTime', 'PilotID', 'LastUpdateTime', 'TaskQueueID', 'GridSite', 'PilotStamp', 'Queue' ] if paramNames: parameters = paramNames cmd = "SELECT %s FROM PilotAgents" % ", ".join(parameters) condSQL = [] if pilotRef: if isinstance(pilotRef, list): condSQL.append("PilotJobReference IN (%s)" % ",".join(['"%s"' % x for x in pilotRef])) else: condSQL.append("PilotJobReference = '%s'" % pilotRef) if pilotID: if isinstance(pilotID, list): condSQL.append("PilotID IN (%s)" % ",".join(['%s' % x for x in pilotID])) else: condSQL.append("PilotID = '%s'" % pilotID) if parentId: if isinstance(parentId, list): condSQL.append("ParentID IN (%s)" % ",".join(['%s' % x for x in parentId])) else: condSQL.append("ParentID = %s" % parentId) if condSQL: cmd = "%s WHERE %s" % (cmd, " AND ".join(condSQL)) result = self._query(cmd, conn=conn) if not result['OK']: return result if not result['Value']: msg = "No pilots found" if pilotRef: msg += " for PilotJobReference(s): %s" % pilotRef if parentId: msg += " with parent id: %s" % parentId return S_ERROR(msg) resDict = {} pilotIDs = [] for row in result['Value']: pilotDict = {} for i in range(len(parameters)): pilotDict[parameters[i]] = row[i] if parameters[i] == 'PilotID': pilotIDs.append(row[i]) resDict[row[0]] = pilotDict result = self.getJobsForPilot(pilotIDs) if not result['OK']: return S_OK(resDict) jobsDict = result['Value'] for pilotRef in resDict: pilotInfo = resDict[pilotRef] pilotID = pilotInfo['PilotID'] if pilotID in jobsDict: pilotInfo['Jobs'] = jobsDict[pilotID] return S_OK(resDict)
def makeDir(self,path): """ Create a new directory entry """ result = self.findDir(path) if not result['OK']: return result dirID = result['Value'] if dirID: result = S_OK(dirID) result['NewDirectory'] = False return result dpath = path if path == '/': level = 0 elements = [] parentDirID = 0 else: if path[0] == "/": dpath = path[1:] elements = dpath.split('/') level = len(elements) if level > MAX_LEVELS: return S_ERROR('Too many directory levels: %d' % level) result = self.getParent(path) if not result['OK']: return result parentDirID = result['Value'] epathList = [] if parentDirID: result = self.__getNumericPath(parentDirID) if not result['OK']: return result epathList = result['Value'] names = ['DirName','Level','Parent'] values = [path,level,parentDirID] if path != '/': for i in range(1,level,1): names.append('LPATH%d' % i) values.append(epathList[i-1]) result = self.db._getConnection() conn = result['Value'] #result = self.db._query("LOCK TABLES FC_DirectoryLevelTree WRITE; ",conn) result = self.db._insert('FC_DirectoryLevelTree',names,values,conn) if not result['OK']: #resUnlock = self.db._query("UNLOCK TABLES;",conn) if result['Message'].find('Duplicate') != -1: #The directory is already added resFind = self.findDir(path) if not resFind['OK']: return resFind dirID = resFind['Value'] result = S_OK(dirID) result['NewDirectory'] = False return result else: return result dirID = result['lastRowId'] # Update the path number if parentDirID: # lPath = "LPATH%d" % (level) # req = " SELECT @tmpvar:=max(%s)+1 FROM FC_DirectoryLevelTree WHERE Parent=%d; " % (lPath,parentDirID) # resultLock = self.db._query("LOCK TABLES FC_DirectoryLevelTree WRITE; ",conn) # result = self.db._query(req,conn) # req = "UPDATE FC_DirectoryLevelTree SET %s=@tmpvar WHERE DirID=%d; " % (lPath,dirID) # result = self.db._update(req,conn) # result = self.db._query("UNLOCK TABLES;",conn) lPath = "LPATH%d" % (level) req = " SELECT @tmpvar:=max(%s)+1 FROM FC_DirectoryLevelTree WHERE Parent=%d FOR UPDATE; " % ( lPath, parentDirID ) resultLock = self.db._query( "START TRANSACTION; ", conn ) result = self.db._query(req,conn) req = "UPDATE FC_DirectoryLevelTree SET %s=@tmpvar WHERE DirID=%d; " % (lPath,dirID) result = self.db._update(req,conn) result = self.db._query( "COMMIT;", conn ) if not result['OK']: return result else: result = self.db._query( "ROLLBACK;", conn ) result = S_OK(dirID) result['NewDirectory'] = True return result
def __preparePerfSONARConfiguration(self, endpointList): """ Prepare a dictionary with a new CS configuration of perfSONAR endpoints. :return: Dictionary where keys are configuration paths (options and sections) and values are values of corresponding options or None in case of a path pointing to a section. """ log = self.log.getSubLogger("__preparePerfSONARConfiguration") log.debug("Begin function ...") # static elements of a path rootPath = "/Resources/Sites" extPath = "Network" baseOptionName = "Enabled" options = {baseOptionName: "True", "ServiceType": "perfSONAR"} # enable GOCDB endpoints in configuration newConfiguration = {} for endpoint in endpointList: if endpoint["DIRACSITENAME"] is None: continue split = endpoint["DIRACSITENAME"].split(".") path = cfgPath(rootPath, split[0], endpoint["DIRACSITENAME"], extPath, endpoint["HOSTNAME"]) for name, defaultValue in options.items(): newConfiguration[cfgPath(path, name)] = defaultValue # get current configuration currentConfiguration = {} for option in options: result = gConfig.getConfigurationTree(rootPath, extPath + "/", "/" + option) if not result["OK"]: log.error("getConfigurationTree() failed with message: %s" % result["Message"]) return S_ERROR("Unable to fetch perfSONAR endpoints from CS.") currentConfiguration.update(result["Value"]) # disable endpoints that disappeared in GOCDB removedElements = set(currentConfiguration) - set(newConfiguration) newElements = set(newConfiguration) - set(currentConfiguration) addedEndpoints = int(len(newElements) / len(options)) disabledEndpoints = 0 for path in removedElements: if baseOptionName in path: newConfiguration[path] = "False" if currentConfiguration[path] != "False": disabledEndpoints = disabledEndpoints + 1 # inform what will be changed if addedEndpoints > 0: self.log.info( "%s new perfSONAR endpoints will be added to the configuration" % addedEndpoints) if disabledEndpoints > 0: self.log.info( "%s old perfSONAR endpoints will be disable in the configuration" % disabledEndpoints) if addedEndpoints == 0 and disabledEndpoints == 0: self.log.info("perfSONAR configuration is up-to-date") log.debug("End function.") return S_OK(newConfiguration)