def __refresh(self, fromMaster=False): self.__lastUpdateTime = time.time() gLogger.debug("Refreshing configuration...") gatewayList = getGatewayURLs("Configuration/Server") updatingErrorsList = [] if gatewayList: initialServerList = gatewayList gLogger.debug("Using configuration gateway", str(initialServerList[0])) elif fromMaster: masterServer = gConfigurationData.getMasterServer() initialServerList = [masterServer] gLogger.debug("Refreshing from master %s" % masterServer) else: initialServerList = gConfigurationData.getServers() gLogger.debug("Refreshing from list %s" % str(initialServerList)) # If no servers in the initial list, we are supposed to use the local configuration only if not initialServerList: return S_OK() randomServerList = List.randomize(initialServerList) gLogger.debug("Randomized server list is %s" % ", ".join(randomServerList)) for sServer in randomServerList: from DIRAC.Core.DISET.RPCClient import RPCClient oClient = RPCClient(sServer, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck()) dRetVal = _updateFromRemoteLocation(oClient) if dRetVal['OK']: return dRetVal else: updatingErrorsList.append(dRetVal['Message']) gLogger.warn("Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal['Message'])) if dRetVal['Message'].find("Insane environment") > -1: break return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
def handshake_multipleSteps(self): """Perform SSL handshakes. This has to be called after the connection was accepted (acceptConnection_multipleSteps) The remote credentials are gathered here """ try: # M2Crypto does not provide public method to # accept and handshake in two steps. # So we have to do it manually # The following lines are basically a copy/paste # of the end of SSL.Connection.accept method self.oSocket.setup_ssl() self.oSocket.set_accept_state() self.oSocket.accept_ssl() check = getattr(self.oSocket, "postConnectionCheck", self.oSocket.serverPostConnectionCheck) if check is not None: if not check(self.oSocket.get_peer_cert(), self.oSocket.addr[0]): raise SSL.Checker.SSLVerificationError( "post connection check failed") self.peerCredentials = getM2PeerInfo(self.oSocket) # Now that the handshake has been performed on the server # we can set the timeout for the RPC operations. # In practice, since we are on the server side, the # timeout we set here represents the timeout for receiving # the arguments and sending back the response. This should # in principle be reasonably quick, but just to be sure # we can set it to the DEFAULT_RPC_TIMEOUT self.oSocket.settimeout(DEFAULT_RPC_TIMEOUT) return S_OK() except (socket.error, SSL.SSLError, SSLVerificationError) as e: return S_ERROR("Error in handhsake: %s %s" % (e, repr(e)))
def __discoverCredentialsToUse(self): """ Discovers which credentials to use for connection. * Server certificate: -> If KW_USE_CERTIFICATES in kwargs, sets it in self.__useCertificates -> If not, check gConfig.useServerCertificate(), and sets it in self.__useCertificates and kwargs[KW_USE_CERTIFICATES] * Certification Authorities check: -> if KW_SKIP_CA_CHECK is not in kwargs and we are using the certificates, set KW_SKIP_CA_CHECK to false in kwargs -> if KW_SKIP_CA_CHECK is not in kwargs and we are not using the certificate, check the CS.skipCACheck * Proxy Chain -> if KW_PROXY_CHAIN in kwargs, we remove it and dump its string form into kwargs[KW_PROXY_STRING] """ # Use certificates? if self.KW_USE_CERTIFICATES in self.kwargs: self.__useCertificates = self.kwargs[self.KW_USE_CERTIFICATES] else: self.__useCertificates = gConfig.useServerCertificate() self.kwargs[self.KW_USE_CERTIFICATES] = self.__useCertificates if self.KW_SKIP_CA_CHECK not in self.kwargs: if self.__useCertificates: self.kwargs[self.KW_SKIP_CA_CHECK] = False else: self.kwargs[self.KW_SKIP_CA_CHECK] = skipCACheck() if self.KW_PROXY_CHAIN in self.kwargs: try: self.kwargs[self.KW_PROXY_STRING] = self.kwargs[ self.KW_PROXY_CHAIN].dumpAllToString()['Value'] del self.kwargs[self.KW_PROXY_CHAIN] except BaseException: return S_ERROR( "Invalid proxy chain specified on instantiation") return S_OK()
def __readFromFD(self, fd, baseLength=0): """read from file descriptior :fd: :param fd: file descriptior :param int baseLength: ??? """ dataString = "" redBuf = " " while len(redBuf) > 0: redBuf = os.read(fd, 8192).decode() dataString += redBuf if len(dataString) + baseLength > self.bufferLimit: self.log.error( "Maximum output buffer length reached", "First and last data in buffer: \n%s \n....\n %s " % (dataString[:100], dataString[-100:]), ) retDict = S_ERROR("Reached maximum allowed length (%d bytes) " "for called function return value" % self.bufferLimit) retDict["Value"] = dataString return retDict return S_OK(dataString)
def __getGlue2ExecutionEnvironmentInfo(host, executionEnvironments): """Find all the executionEnvironments. :param str host: BDII host to query :param list executionEnvironments: list of the execution environments to get some information from :returns: result of the ldapsearch for all executionEnvironments, Glue2 schema """ listOfValues = [] # break up to avoid argument list too long, it started failing at about 1900 entries for exeEnvs in breakListIntoChunks(executionEnvironments, 1000): exeFilter = "" for execEnv in exeEnvs: exeFilter += "(GLUE2ResourceID=%s)" % execEnv filt = "(&(objectClass=GLUE2ExecutionEnvironment)(|%s))" % exeFilter response = ldapsearchBDII(filt=filt, attr=None, host=host, base="o=glue", selectionString="GLUE2") if not response["OK"]: return response if not response["Value"]: sLog.error("No information found for %s" % executionEnvironments) continue listOfValues += response["Value"] if not listOfValues: return S_ERROR("No information found for executionEnvironments") return S_OK(listOfValues)
def __findServiceURL(self): if not self.__initStatus['OK']: return self.__initStatus gatewayURL = False if self.KW_IGNORE_GATEWAYS not in self.kwargs or not self.kwargs[ self.KW_IGNORE_GATEWAYS]: dRetVal = gConfig.getOption("/DIRAC/Gateways/%s" % DIRAC.siteName()) if dRetVal['OK']: rawGatewayURL = List.randomize( List.fromChar(dRetVal['Value'], ","))[0] gatewayURL = "/".join(rawGatewayURL.split("/")[:3]) for protocol in gProtocolDict.keys(): if self._destinationSrv.find("%s://" % protocol) == 0: gLogger.debug("Already given a valid url", self._destinationSrv) if not gatewayURL: return S_OK(self._destinationSrv) gLogger.debug( "Reconstructing given URL to pass through gateway") path = "/".join(self._destinationSrv.split("/")[3:]) finalURL = "%s/%s" % (gatewayURL, path) gLogger.debug("Gateway URL conversion:\n %s -> %s" % (self._destinationSrv, finalURL)) return S_OK(finalURL) if gatewayURL: gLogger.debug("Using gateway", gatewayURL) return S_OK("%s/%s" % (gatewayURL, self._destinationSrv)) try: urls = getServiceURL(self._destinationSrv, setup=self.setup) except Exception, e: return S_ERROR("Cannot get URL for %s in setup %s: %s" % (self._destinationSrv, self.setup, str(e)))
def stringToNetwork( self, stringVal ): """ Send a given string to the DISET client over the network """ stringIO = cStringIO.StringIO( stringVal ) iPacketSize = self.packetSize ioffset = 0 strlen = len( stringVal ) try: while ( ioffset ) < strlen: if ( ioffset + iPacketSize ) < strlen: result = self.sendData( stringVal[ioffset:ioffset + iPacketSize] ) else: result = self.sendData( stringVal[ioffset:strlen] ) if not result['OK']: return result if 'AbortTransfer' in result and result[ 'AbortTransfer' ]: self.__log.verbose( "Transfer aborted" ) return S_OK() ioffset += iPacketSize self.sendEOF() except Exception, e: return S_ERROR( "Error while sending string: %s" % str( e ) )
def __readFromFile( self, fd, baseLength, doAll = True ): """ read from file descriptor :fd: and save it to the dedicated buffer """ try: dataString = "" fn = fd.fileno() rawRead = type( fn ) == types.IntType while fd in select.select( [ fd ], [], [], 1 )[0]: if rawRead: nB = os.read( fn, self.bufferLimit ) else: nB = fd.read( 1 ) if nB == "": break dataString += nB except Exception, x: self.log.exception( "SUBPROCESS: readFromFile exception" ) try: self.log.error( 'Error reading', 'type(nB) =%s' % type( nB ) ) self.log.error( 'Error reading', 'nB =%s' % str( nB ) ) except Exception: pass return S_ERROR( 'Can not read from output: %s' % str( x ) )
def __refresh(self): self.__lastUpdateTime = time.time() gLogger.debug("Refreshing configuration...") gatewayList = getGatewayURLs("Configuration/Server") updatingErrorsList = [] if gatewayList: lInitialListOfServers = gatewayList gLogger.debug("Using configuration gateway", str(lInitialListOfServers[0])) else: lInitialListOfServers = gConfigurationData.getServers() gLogger.debug("Refreshing from list %s" % str(lInitialListOfServers)) lRandomListOfServers = List.randomize(lInitialListOfServers) gLogger.debug("Randomized server list is %s" % ", ".join(lRandomListOfServers)) for sServer in lRandomListOfServers: from DIRAC.Core.DISET.RPCClient import RPCClient oClient = RPCClient( sServer, useCertificates=gConfigurationData.useServerCertificate(), skipCACheck=gConfigurationData.skipCACheck()) dRetVal = _updateFromRemoteLocation(oClient) if dRetVal['OK']: return dRetVal else: updatingErrorsList.append(dRetVal['Message']) gLogger.warn( "Can't update from server", "Error while updating from %s: %s" % (sServer, dRetVal['Message'])) if dRetVal['Message'].find("Insane environment") > -1: break return S_ERROR("Reason(s):\n\t%s" % "\n\t".join(List.uniqueElements(updatingErrorsList)))
def dumpCFGAsLocalCache(self, fileName=None, raw=False): """ Dump local CFG cache to file :param str fileName: file name :param bool raw: raw :return: S_OK(str)/S_ERROR() """ cfg = gConfigurationData.mergedCFG.clone() try: if not raw and cfg.isSection('DIRAC'): diracSec = cfg['DIRAC'] if diracSec.isSection('Configuration'): # pylint: disable=no-member confSec = diracSec['Configuration'] # pylint: disable=unsubscriptable-object for opt in ('Servers', 'MasterServer'): if confSec.isOption(opt): confSec.deleteKey(opt) strData = str(cfg) if fileName: with open(fileName, "w") as fd: fd.write(strData) except Exception as e: return S_ERROR("Can't write to file %s: %s" % (fileName, str(e))) return S_OK(strData)
def msg_TaskError(self, msgObj): taskId = msgObj.taskId try: result = self.exec_deserializeTask(msgObj.taskStub) except Exception as excp: gLogger.exception("Exception while deserializing task %s" % taskId, lException=excp) return S_ERROR("Cannot deserialize task %s: %s" % (taskId, str(excp))) if not isReturnStructure(result): raise Exception( "exec_deserializeTask does not return a return structure") if not result['OK']: return result taskObj = result['Value'] # TODO: Check the executor has privileges over the task self.__eDispatch.removeTask(msgObj.taskId) try: self.exec_taskError(msgObj.taskId, taskObj, msgObj.errorMsg) except Exception as excp: gLogger.exception("Exception when processing task %s" % msgObj.taskId, lException=excp) return S_OK()
def generateContext(ftsServer, ucert, lifetime=25200): """ This method generates an fts3 context :param ftsServer: address of the fts3 server :param ucert: the path to the certificate to be used :param lifetime: duration (in sec) of the delegation to the FTS3 server (default is 7h, like FTS3 default) :returns: an fts3 context """ try: context = fts3.Context(endpoint=ftsServer, ucert=ucert, request_class=ftsSSLRequest, verify=False) # Explicitely delegate to be sure we have the lifetime we want # Note: the delegation will re-happen only when the FTS server # decides that there is not enough timeleft. # At the moment, this is 1 hour, which effectively means that if you do # not submit a job for more than 1h, you have no valid proxy in FTS servers # anymore. In future release of FTS3, the delegation will be triggered when # one third of the lifetime will be left. # Also, the proxy given as parameter might have less than "lifetime" left # since it is cached, but it does not matter, because in the FTS3Agent # we make sure that we renew it often enough # Finally, FTS3 has an issue with handling the lifetime of the proxy, # because it does not check all the chain. This is under discussion # https://its.cern.ch/jira/browse/FTS-1575 fts3.delegate(context, lifetime=datetime.timedelta(seconds=lifetime)) return S_OK(context) except FTS3ClientException as e: gLogger.exception("Error generating context", repr(e)) return S_ERROR(repr(e))
gLogger.debug( "Reconstructing given URL to pass through gateway" ) path = "/".join( self._destinationSrv.split( "/" )[3:] ) finalURL = "%s/%s" % ( gatewayURL, path ) gLogger.debug( "Gateway URL conversion:\n %s -> %s" % ( self._destinationSrv, finalURL ) ) return S_OK( finalURL ) if gatewayURL: gLogger.debug( "Using gateway", gatewayURL ) return S_OK( "%s/%s" % ( gatewayURL, self._destinationSrv ) ) try: urls = getServiceURL( self._destinationSrv, setup = self.setup ) except Exception, e: return S_ERROR( "Cannot get URL for %s in setup %s: %s" % ( self._destinationSrv, self.setup, str( e ) ) ) if not urls: return S_ERROR( "URL for service %s not found" % self._destinationSrv ) urls = List.fromChar( urls, "," ) self.__nbOfUrls = len( urls ) self.__nbOfRetry = 2 if self.__nbOfUrls > 2 else 3 # we retry 2 times all services, if we run more than 2 services if len( urls ) == len( self.__bannedUrls ): self.__bannedUrls = [] # retry all urls gLogger.debug( "Retrying again all URLs" ) if len( self.__bannedUrls ) > 0 and len( urls ) > 1 : # we have host which is not accessible. We remove that host from the list. # We only remove if we have more than one instance for i in self.__bannedUrls: gLogger.debug( "Removing banned URL", "%s" % i ) urls.remove( i )
def systemCall(self, cmdSeq, callbackFunction=None, shell=False, env=None): """system call (no shell) - execute :cmdSeq:""" if shell: self.log.verbose("shellCall:", cmdSeq) else: self.log.verbose("systemCall:", cmdSeq) self.cmdSeq = cmdSeq self.callback = callbackFunction closefd = sys.platform.find("win") != 0 try: self.child = subprocess.Popen( self.cmdSeq, shell=shell, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=closefd, env=env, universal_newlines=True, ) self.childPID = self.child.pid except OSError as v: retDict = S_ERROR(repr(v)) retDict["Value"] = (-1, "", str(v)) return retDict except Exception as x: try: self.child.stdout.close() self.child.stderr.close() except Exception: pass retDict = S_ERROR(repr(x)) retDict["Value"] = (-1, "", str(x)) return retDict try: self.bufferList = [["", 0], ["", 0]] initialTime = time.time() exitStatus = self.__poll(self.child.pid) while (0, 0) == exitStatus or exitStatus is None: retDict = self.__readFromCommand() if not retDict["OK"]: return retDict if self.timeout and time.time() - initialTime > self.timeout: exitStatus = self.killChild() self.__readFromCommand() return self.__generateSystemCommandError( exitStatus, "Timeout (%d seconds) for '%s' call" % (self.timeout, cmdSeq) ) time.sleep(0.01) exitStatus = self.__poll(self.child.pid) self.__readFromCommand() if exitStatus: exitStatus = exitStatus[1] if exitStatus >= 256: exitStatus = int(exitStatus / 256) return S_OK((exitStatus, self.bufferList[0][0], self.bufferList[1][0])) finally: try: self.child.stdout.close() self.child.stderr.close() except Exception: pass
def monitor(self, context=None, ftsServer=None, ucert=None): """ Queries the fts server to monitor the job This method assumes that the attribute self.ftsGUID is set :param context: fts3 context. If not given, it is created (see ftsServer & ucert param) :param ftsServer: the address of the fts server to submit to. Used only if context is not given. if not given either, use the ftsServer object attribute :param ucert: path to the user certificate/proxy. Might be infered by the fts cli (see its doc) :returns {FileID: { status, error } } """ if not self.ftsGUID: return S_ERROR("FTSGUID not set, FTS job not submitted?") if not context: if not ftsServer: ftsServer = self.ftsServer context = fts3.Context(endpoint=ftsServer, ucert=ucert, request_class=ftsSSLRequest, verify=False) jobStatusDict = None try: jobStatusDict = fts3.get_job_status(context, self.ftsGUID, list_files=True) except FTS3ClientException as e: return S_ERROR("Error getting the job status %s" % e) now = datetime.datetime.utcnow().replace(microsecond=0) self.lastMonitor = now newStatus = jobStatusDict['job_state'].capitalize() if newStatus != self.status: self.status = newStatus self.lastUpdate = now self.error = jobStatusDict['reason'] if newStatus in self.FINAL_STATES: self._fillAccountingDict(jobStatusDict) filesInfoList = jobStatusDict['files'] filesStatus = {} statusSummary = {} for fileDict in filesInfoList: file_state = fileDict['file_state'].capitalize() file_id = fileDict['file_metadata'] file_error = fileDict['reason'] filesStatus[file_id] = {'status': file_state, 'error': file_error} statusSummary[file_state] = statusSummary.get(file_state, 0) + 1 total = len(filesInfoList) completed = sum([ statusSummary.get(state, 0) for state in FTS3File.FTS_FINAL_STATES ]) self.completeness = 100 * completed / total return S_OK(filesStatus)
def submit(self, context=None, ftsServer=None, ucert=None, pinTime=36000, protocols=None): """ submit the job to the FTS server Some attributes are expected to be defined for the submission to work: * type (set by FTS3Operation) * sourceSE (only for Transfer jobs) * targetSE * activity (optional) * priority (optional) * username * userGroup * filesToSubmit * operationID (optional, used as metadata for the job) We also expect the FTSFiles have an ID defined, as it is given as transfer metadata :param pinTime: Time the file should be pinned on disk (used for transfers and staging) Used only if he source SE is a tape storage :param context: fts3 context. If not given, it is created (see ftsServer & ucert param) :param ftsServer: the address of the fts server to submit to. Used only if context is not given. if not given either, use the ftsServer object attribute :param ucert: path to the user certificate/proxy. Might be inferred by the fts cli (see its doc) :param protocols: list of protocols from which we should choose the protocol to use :returns S_OK([FTSFiles ids of files submitted]) """ log = gLogger.getSubLogger( "submit/%s/%s_%s" % (self.operationID, self.sourceSE, self.targetSE), True) if not context: if not ftsServer: ftsServer = self.ftsServer context = fts3.Context(endpoint=ftsServer, ucert=ucert, request_class=ftsSSLRequest, verify=False) # Construct the target SURL res = self.__fetchSpaceToken(self.targetSE) if not res['OK']: return res target_spacetoken = res['Value'] allLFNs = [ftsFile.lfn for ftsFile in self.filesToSubmit] if self.type == 'Transfer': res = self._constructTransferJob(pinTime, allLFNs, target_spacetoken, protocols=protocols) elif self.type == 'Staging': res = self._constructStagingJob(pinTime, allLFNs, target_spacetoken) # elif self.type == 'Removal': # res = self._constructRemovalJob(context, allLFNs, failedLFNs, target_spacetoken) if not res['OK']: return res job, fileIDsInTheJob = res['Value'] setFileIdsInTheJob = set(fileIDsInTheJob) try: self.ftsGUID = fts3.submit(context, job) log.info("Got GUID %s" % self.ftsGUID) # Only increase the amount of attempt # if we succeeded in submitting -> no ! Why did I do that ?? for ftsFile in self.filesToSubmit: ftsFile.attempt += 1 if ftsFile.fileID in setFileIdsInTheJob: ftsFile.status = 'Submitted' now = datetime.datetime.utcnow().replace(microsecond=0) self.submitTime = now self.lastUpdate = now self.lastMonitor = now except FTS3ClientException as e: log.exception("Error at submission", repr(e)) return S_ERROR("Error at submission: %s" % e) return S_OK(fileIDsInTheJob)
def _constructTransferJob(self, pinTime, allLFNs, target_spacetoken, protocols=None): """ Build a job for transfer Some attributes of the job are expected to be set * sourceSE * targetSE * activity (optional) * priority (optional) * filesToSubmit * operationID (optional, used as metadata for the job) :param pinTime: pining time in case staging is needed :param allLFNs: list of LFNs to transfer :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem :param target_spacetoken: the space token of the target :param protocols: list of protocols to restrict the protocol choice for the transfer :return: S_OK( (job object, list of ftsFileIDs in the job)) """ log = gLogger.getSubLogger( "constructTransferJob/%s/%s_%s" % (self.operationID, self.sourceSE, self.targetSE), True) res = self.__fetchSpaceToken(self.sourceSE) if not res['OK']: return res source_spacetoken = res['Value'] failedLFNs = set() dstSE = StorageElement(self.targetSE, vo=self.vo) srcSE = StorageElement(self.sourceSE, vo=self.vo) # getting all the (source, dest) surls res = dstSE.generateTransferURLsBetweenSEs(allLFNs, srcSE, protocols=protocols) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].iteritems(): failedLFNs.add(lfn) log.error("Could not get source SURL", "%s %s" % (lfn, reason)) allSrcDstSURLs = res['Value']['Successful'] transfers = [] fileIDsInTheJob = [] for ftsFile in self.filesToSubmit: if ftsFile.lfn in failedLFNs: log.debug("Not preparing transfer for file %s" % ftsFile.lfn) continue sourceSURL, targetSURL = allSrcDstSURLs[ftsFile.lfn] if sourceSURL == targetSURL: log.error("sourceSURL equals to targetSURL", "%s" % ftsFile.lfn) ftsFile.error = "sourceSURL equals to targetSURL" ftsFile.status = 'Defunct' continue trans = fts3.new_transfer(sourceSURL, targetSURL, checksum='ADLER32:%s' % ftsFile.checksum, filesize=ftsFile.size, metadata=getattr(ftsFile, 'fileID'), activity=self.activity) transfers.append(trans) fileIDsInTheJob.append(getattr(ftsFile, 'fileID')) # If the source is not an tape SE, we should set the # copy_pin_lifetime and bring_online params to None, # otherwise they will do an extra useless queue in FTS sourceIsTape = self.__isTapeSE(self.sourceSE) copy_pin_lifetime = pinTime if sourceIsTape else None bring_online = 86400 if sourceIsTape else None if not transfers: log.error("No transfer possible!") return S_ERROR("No transfer possible") # We add a few metadata to the fts job so that we can reuse them later on without # querying our DB. # source and target SE are just used for accounting purpose job_metadata = { 'operationID': self.operationID, 'sourceSE': self.sourceSE, 'targetSE': self.targetSE } job = fts3.new_job(transfers=transfers, overwrite=True, source_spacetoken=source_spacetoken, spacetoken=target_spacetoken, bring_online=bring_online, copy_pin_lifetime=copy_pin_lifetime, retry=3, metadata=job_metadata, priority=self.priority) return S_OK((job, fileIDsInTheJob))
def getConfigurationTree(self, root='', *filters): """ Create a dictionary with all sections, subsections and options starting from given root. Result can be filtered. :param str root: Starting point in the configuration tree. :param filters: Select results that contain given substrings (check full path, i.e. with option name) :type filters: str or python:list[str] :return: S_OK(dict)/S_ERROR() -- dictionary where keys are paths taken from the configuration (e.g. /Systems/Configuration/...). Value is "None" when path points to a section or not "None" if path points to an option. """ # check if root is an option (special case) option = self.getOption(root) if option['OK']: result = {root: option['Value']} else: result = {root: None} for substr in filters: if substr not in root: result = {} break # remove slashes at the end root = root.rstrip('/') # get options of current root options = self.getOptionsDict(root) if not options['OK']: return S_ERROR("getOptionsDict() failed with message: %s" % options['Message']) for key, value in options['Value'].iteritems(): path = cfgPath(root, key) addOption = True for substr in filters: if substr not in path: addOption = False break if addOption: result[path] = value # get subsections of the root sections = self.getSections(root) if not sections['OK']: return S_ERROR("getSections() failed with message: %s" % sections['Message']) # recursively go through subsections and get their subsections for section in sections['Value']: subtree = self.getConfigurationTree("%s/%s" % (root, section), *filters) if not subtree['OK']: return S_ERROR( "getConfigurationTree() failed with message: %s" % sections['Message']) result.update(subtree['Value']) return S_OK(result)
def transfer_listBulk(self, bulkId, token, fileHelper): return S_ERROR("This server does no allow bulk listing")
def transfer_bulkToClient(self, bulkId, token, fileHelper): return S_ERROR("This server does no allow bulk sending")
def transfer_bulkFromClient(self, bulkId, token, bulkSize, fileHelper): return S_ERROR("This server does no allow bulk receiving")
def transfer_fromClient(self, fileId, token, fileSize, fileHelper): return S_ERROR("This server does no allow receiving files")
def __doFileTransfer(self, sDirection): """ Execute a file transfer action @type sDirection: string @param sDirection: Direction of the transfer @return: S_OK/S_ERROR """ retVal = self.__trPool.receive(self.__trid) if not retVal['OK']: raise RequestHandler.ConnectionError( "Error while receiving file description %s %s" % (self.srv_getFormattedRemoteCredentials(), retVal['Message'])) fileInfo = retVal['Value'] sDirection = "%s%s" % (sDirection[0].lower(), sDirection[1:]) if "transfer_%s" % sDirection not in dir(self): self.__trPool.send( self.__trid, S_ERROR("Service can't transfer files %s" % sDirection)) return retVal = self.__trPool.send(self.__trid, S_OK("Accepted")) if not retVal['OK']: return retVal self.__logRemoteQuery("FileTransfer/%s" % sDirection, fileInfo) self.__lockManager.lock("FileTransfer/%s" % sDirection) try: try: fileHelper = FileHelper(self.__trPool.get(self.__trid)) if sDirection == "fromClient": fileHelper.setDirection("fromClient") uRetVal = self.transfer_fromClient(fileInfo[0], fileInfo[1], fileInfo[2], fileHelper) elif sDirection == "toClient": fileHelper.setDirection("toClient") uRetVal = self.transfer_toClient(fileInfo[0], fileInfo[1], fileHelper) elif sDirection == "bulkFromClient": fileHelper.setDirection("fromClient") uRetVal = self.transfer_bulkFromClient( fileInfo[0], fileInfo[1], fileInfo[2], fileHelper) elif sDirection == "bulkToClient": fileHelper.setDirection("toClient") uRetVal = self.transfer_bulkToClient( fileInfo[0], fileInfo[1], fileHelper) elif sDirection == "listBulk": fileHelper.setDirection("toClient") uRetVal = self.transfer_listBulk(fileInfo[0], fileInfo[1], fileHelper) else: return S_ERROR("Direction %s does not exist!!!" % sDirection) if uRetVal['OK'] and not fileHelper.finishedTransmission(): gLogger.error( "You haven't finished receiving/sending the file", str(fileInfo)) return S_ERROR("Incomplete transfer") return uRetVal finally: self.__lockManager.unlock("FileTransfer/%s" % sDirection) except Exception, v: gLogger.exception("Uncaught exception when serving Transfer", "%s" % sDirection) return S_ERROR("Server error while serving %s: %s" % (sDirection, str(v)))
def monitor(self, context=None, ftsServer=None, ucert=None): """ Queries the fts server to monitor the job. The internal state of the object is updated depending on the monitoring result. In case the job is not found on the server, the status is set to 'Failed' Within a job, only the transfers having a `fileID` metadata are considered. This is to allow for multihop jobs doing a staging This method assumes that the attribute self.ftsGUID is set :param context: fts3 context. If not given, it is created (see ftsServer & ucert param) :param ftsServer: the address of the fts server to submit to. Used only if context is not given. if not given either, use the ftsServer object attribute :param ucert: path to the user certificate/proxy. Might be infered by the fts cli (see its doc) :returns: {FileID: { status, error } } Possible error numbers * errno.ESRCH: If the job does not exist on the server * errno.EDEADLK: In case the job and file status are inconsistent (see comments inside the code) """ if not self.ftsGUID: return S_ERROR("FTSGUID not set, FTS job not submitted?") if not context: if not ftsServer: ftsServer = self.ftsServer context = fts3.Context(endpoint=ftsServer, ucert=ucert, request_class=ftsSSLRequest, verify=False) jobStatusDict = None try: jobStatusDict = fts3.get_job_status(context, self.ftsGUID, list_files=True) # The job is not found # Set its status to Failed and return except NotFound: self.status = 'Failed' return S_ERROR( errno.ESRCH, "FTSGUID %s not found on %s" % (self.ftsGUID, self.ftsServer)) except FTS3ClientException as e: return S_ERROR("Error getting the job status %s" % e) now = datetime.datetime.utcnow().replace(microsecond=0) self.lastMonitor = now newStatus = jobStatusDict['job_state'].capitalize() if newStatus != self.status: self.status = newStatus self.lastUpdate = now self.error = jobStatusDict['reason'] if newStatus in self.FINAL_STATES: self._fillAccountingDict(jobStatusDict) filesInfoList = jobStatusDict['files'] filesStatus = {} statusSummary = {} # Make a copy, since we are potentially # deleting objects for fileDict in list(filesInfoList): file_state = fileDict['file_state'].capitalize() file_metadata = fileDict['file_metadata'] # previous version of the code did not have dictionary as # file_metadata if isinstance(file_metadata, dict): file_id = file_metadata.get('fileID') else: file_id = file_metadata # The transfer does not have a fileID attached to it # so it does not correspond to a file in our DB: skip it # (typical of jobs with different staging protocol == CTA) # We also remove it from the fileInfoList, such that it is # not considered for accounting if not file_id: filesInfoList.remove(fileDict) continue file_error = fileDict['reason'] filesStatus[file_id] = {'status': file_state, 'error': file_error} # If the state of the file is final for FTS, set ftsGUID of the file to None, # such that it is "released" from this job and not updated anymore in future # monitoring calls if file_state in FTS3File.FTS_FINAL_STATES: filesStatus[file_id]['ftsGUID'] = None # If the file is not in a final state, but the job is, we return an error # FTS can have inconsistencies where the FTS Job is in a final state # but not all the files. # The inconsistencies are cleaned every hour on the FTS side. # https://its.cern.ch/jira/browse/FTS-1482 elif self.status in self.FINAL_STATES: return S_ERROR( errno.EDEADLK, "Job %s in a final state (%s) while File %s is not (%s)" % (self.ftsGUID, self.status, file_id, file_state)) statusSummary[file_state] = statusSummary.get(file_state, 0) + 1 # We've removed all the intermediate transfers that we are not interested in # so we put this back into the monitoring data such that the accounting is done properly jobStatusDict['files'] = filesInfoList if newStatus in self.FINAL_STATES: self._fillAccountingDict(jobStatusDict) total = len(filesInfoList) completed = sum([ statusSummary.get(state, 0) for state in FTS3File.FTS_FINAL_STATES ]) self.completeness = int(100 * completed / total) return S_OK(filesStatus)
class BaseClient: VAL_EXTRA_CREDENTIALS_HOST = "hosts" KW_USE_CERTIFICATES = "useCertificates" KW_EXTRA_CREDENTIALS = "extraCredentials" KW_TIMEOUT = "timeout" KW_SETUP = "setup" KW_VO = "VO" KW_DELEGATED_DN = "delegatedDN" KW_DELEGATED_GROUP = "delegatedGroup" KW_IGNORE_GATEWAYS = "ignoreGateways" KW_PROXY_LOCATION = "proxyLocation" KW_PROXY_STRING = "proxyString" KW_PROXY_CHAIN = "proxyChain" KW_SKIP_CA_CHECK = "skipCACheck" KW_KEEP_ALIVE_LAPSE = "keepAliveLapse" __threadConfig = ThreadConfig() def __init__( self, serviceName, **kwargs ): if type( serviceName ) not in types.StringTypes: raise TypeError( "Service name expected to be a string. Received %s type %s" % ( str( serviceName ), type( serviceName ) ) ) self._destinationSrv = serviceName self._serviceName = serviceName self.kwargs = kwargs self.__initStatus = S_OK() self.__idDict = {} self.__extraCredentials = "" self.__enableThreadCheck = False self.__retry = 0 self.__retryDelay = 0 self.__nbOfUrls = 1 #by default we always have 1 url for example: RPCClient('dips://volhcb38.cern.ch:9162/Framework/SystemAdministrator') self.__nbOfRetry = 3 # by default we try try times self.__bannedUrls = [] for initFunc in ( self.__discoverSetup, self.__discoverVO, self.__discoverTimeout, self.__discoverURL, self.__discoverCredentialsToUse, self.__checkTransportSanity, self.__setKeepAliveLapse ): result = initFunc() if not result[ 'OK' ] and self.__initStatus[ 'OK' ]: self.__initStatus = result self._initialize() #HACK for thread-safety: self.__allowedThreadID = False def _initialize( self ): pass def getDestinationService( self ): return self._destinationSrv def getServiceName( self ): return self._serviceName def __discoverSetup( self ): #Which setup to use? if self.KW_SETUP in self.kwargs and self.kwargs[ self.KW_SETUP ]: self.setup = str( self.kwargs[ self.KW_SETUP ] ) else: self.setup = self.__threadConfig.getSetup() if not self.setup: self.setup = gConfig.getValue( "/DIRAC/Setup", "Test" ) return S_OK() def __discoverVO( self ): #Which setup to use? if self.KW_VO in self.kwargs and self.kwargs[ self.KW_VO ]: self.vo = str( self.kwargs[ self.KW_VO ] ) else: self.vo = gConfig.getValue( "/DIRAC/VirtualOrganization", "unknown" ) return S_OK() def __discoverURL( self ): #Calculate final URL try: result = self.__findServiceURL() except Exception, e: return S_ERROR( str( e ) ) if not result[ 'OK' ]: return result self.serviceURL = result[ 'Value' ] retVal = Network.splitURL( self.serviceURL ) if not retVal[ 'OK' ]: return S_ERROR( "URL is malformed: %s" % retVal[ 'Message' ] ) self.__URLTuple = retVal[ 'Value' ] self._serviceName = self.__URLTuple[-1] res = gConfig.getOptionsDict( "/DIRAC/ConnConf/%s:%s" % self.__URLTuple[1:3] ) if res[ 'OK' ]: opts = res[ 'Value' ] for k in opts: if k not in self.kwargs: self.kwargs[k] = opts[k] return S_OK()
class RequestHandler(object): class ConnectionError(Exception): def __init__(self, msg): self.__msg = msg def __str__(self): return "ConnectionError: %s" % self.__msg def __init__(self, handlerInitDict, trid): """ Constructor @type handlerInitDict: dictionary @param handlerInitDict: Information vars for the service @type trid: object @param trid: Transport to use """ #Initially serviceInfoDict is the one base to the RequestHandler # the one created in _rh_initializeClass #FSM help me for I have made a complex stuff that I will forget in 5 mins :P handlerInitDict.update(self.__srvInfoDict) self.serviceInfoDict = handlerInitDict self.__trid = trid def initialize(self): """Initialize this instance of the handler (to be overwritten) """ pass @classmethod def _rh__initializeClass(cls, serviceInfoDict, lockManager, msgBroker, monitor): """ Class initialization (not to be called by hand or overwritten!!) @type serviceInfoDict: dictionary @param serviceInfoDict: Information vars for the service @type msgBroker: object @param msgBroker: Message delivery @type lockManager: object @param lockManager: Lock manager to use """ cls.__srvInfoDict = serviceInfoDict cls.__svcName = cls.__srvInfoDict['serviceName'] cls.__lockManager = lockManager cls.__msgBroker = msgBroker cls.__trPool = msgBroker.getTransportPool() cls.__monitor = monitor cls.log = gLogger def getRemoteAddress(self): """ Get the address of the remote peer. @return : Address of remote peer. """ return self.__trPool.get(self.__trid).getRemoteAddress() def getRemoteCredentials(self): """ Get the credentials of the remote peer. @return : Credentials dictionary of remote peer. """ return self.__trPool.get(self.__trid).getConnectingCredentials() @classmethod def getCSOption(cls, optionName, defaultValue=False): """ Get an option from the CS section of the services @return : Value for serviceSection/optionName in the CS being defaultValue the default """ return cls.srv_getCSOption(optionName, defaultValue) def _rh_executeAction(self, proposalTuple): """ Execute an action. @type proposalTuple: tuple @param proposalTuple: Type of action to execute. First position of the tuple must be the type of action to execute. The second position is the action itself. """ actionTuple = proposalTuple[1] gLogger.debug("Executing %s:%s action" % actionTuple) startTime = time.time() actionType = actionTuple[0] self.serviceInfoDict['actionTuple'] = actionTuple try: if actionType == "RPC": retVal = self.__doRPC(actionTuple[1]) elif actionType == "FileTransfer": retVal = self.__doFileTransfer(actionTuple[1]) elif actionType == "Connection": retVal = self.__doConnection(actionTuple[1]) else: return S_ERROR("Unknown action %s" % actionType) except RequestHandler.ConnectionError, excp: gLogger.error("ConnectionError", str(excp)) return S_ERROR(excp) if not isReturnStructure(retVal): message = "Method %s for action %s does not return a S_OK/S_ERROR!" % ( actionTuple[1], actionTuple[0]) gLogger.error(message) retVal = S_ERROR(message) self.__logRemoteQueryResponse(retVal, time.time() - startTime) return self.__trPool.send(self.__trid, retVal)
def __discoverURL( self ): #Calculate final URL try: result = self.__findServiceURL() except Exception, e: return S_ERROR( str( e ) )
def sendError(self, errorMsg): retVal = self.oTransport.sendData(S_ERROR(errorMsg)) if not retVal["OK"]: return retVal self.__finishedTransmission() return S_OK()
def transfer_toClient(self, fileId, token, fileHelper): return S_ERROR("This server does no allow sending files")
def _constructTransferJob(self, pinTime, allLFNs, target_spacetoken, protocols=None): """ Build a job for transfer Some attributes of the job are expected to be set * sourceSE * targetSE * activity (optional) * priority (optional) * filesToSubmit * operationID (optional, used as metadata for the job) :param pinTime: pining time in case staging is needed :param allLFNs: list of LFNs to transfer :param failedLFNs: set of LFNs in filesToSubmit for which there was a problem :param target_spacetoken: the space token of the target :param protocols: list of protocols to restrict the protocol choice for the transfer :return: S_OK( (job object, list of ftsFileIDs in the job)) """ log = gLogger.getSubLogger( "constructTransferJob/%s/%s_%s" % (self.operationID, self.sourceSE, self.targetSE), True) res = self.__fetchSpaceToken(self.sourceSE, self.vo) if not res['OK']: return res source_spacetoken = res['Value'] failedLFNs = set() dstSE = StorageElement(self.targetSE, vo=self.vo) srcSE = StorageElement(self.sourceSE, vo=self.vo) # If the source is not a tape SE, we should set the # copy_pin_lifetime and bring_online params to None, # otherwise they will do an extra useless queue in FTS sourceIsTape = self.__isTapeSE(self.sourceSE, self.vo) copy_pin_lifetime = pinTime if sourceIsTape else None bring_online = BRING_ONLINE_TIMEOUT if sourceIsTape else None # getting all the (source, dest) surls res = dstSE.generateTransferURLsBetweenSEs(allLFNs, srcSE, protocols=protocols) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): failedLFNs.add(lfn) log.error("Could not get source SURL", "%s %s" % (lfn, reason)) allSrcDstSURLs = res['Value']['Successful'] # This contains the staging URLs if they are different from the transfer URLs # (CTA...) allStageURLs = dict() # In case we are transfering from a tape system, and the stage protocol # is not the same as the transfer protocol, we generate the staging URLs # to do a multihop transfer. See below. if sourceIsTape: srcProto, _destProto = res['Value']['Protocols'] if srcProto not in srcSE.localStageProtocolList: # As of version 3.10, FTS can only handle one file per multi hop # job. If we are here, that means that we need one, so make sure that # we only have a single file to transfer (this should have been checked # at the job construction step in FTS3Operation). # This test is important, because multiple files would result in the source # being deleted ! if len(allLFNs) != 1: log.debug( "Multihop job has %s files while only 1 allowed" % len(allLFNs)) return S_ERROR( errno.E2BIG, "Trying multihop job with more than one file !") res = srcSE.getURL(allSrcDstSURLs, protocol=srcSE.localStageProtocolList) if not res['OK']: return res for lfn, reason in res['Value']['Failed'].items(): failedLFNs.add(lfn) log.error("Could not get stage SURL", "%s %s" % (lfn, reason)) allSrcDstSURLs.pop(lfn) allStageURLs = res['Value']['Successful'] transfers = [] fileIDsInTheJob = [] for ftsFile in self.filesToSubmit: if ftsFile.lfn in failedLFNs: log.debug("Not preparing transfer for file %s" % ftsFile.lfn) continue sourceSURL, targetSURL = allSrcDstSURLs[ftsFile.lfn] stageURL = allStageURLs.get(ftsFile.lfn) if sourceSURL == targetSURL: log.error("sourceSURL equals to targetSURL", "%s" % ftsFile.lfn) ftsFile.error = "sourceSURL equals to targetSURL" ftsFile.status = 'Defunct' continue ftsFileID = getattr(ftsFile, 'fileID') # Under normal circumstances, we simply submit an fts transfer as such: # * srcProto://myFile -> destProto://myFile # # Even in case of the source storage being a tape system, it works fine. # However, if the staging and transfer protocols are different (which might be the case for CTA), # we use the multihop machinery to submit two sequential fts transfers: # one to stage, one to transfer. # It looks like such # * stageProto://myFile -> stageProto://myFile # * srcProto://myFile -> destProto://myFile if stageURL: # We do not set a fileID in the metadata # such that we do not update the DB when monitoring stageTrans_metadata = {'desc': 'PreStage %s' % ftsFileID} stageTrans = fts3.new_transfer(stageURL, stageURL, checksum='ADLER32:%s' % ftsFile.checksum, filesize=ftsFile.size, metadata=stageTrans_metadata, activity=self.activity) transfers.append(stageTrans) trans_metadata = { 'desc': 'Transfer %s' % ftsFileID, 'fileID': ftsFileID } trans = fts3.new_transfer(sourceSURL, targetSURL, checksum='ADLER32:%s' % ftsFile.checksum, filesize=ftsFile.size, metadata=trans_metadata, activity=self.activity) transfers.append(trans) fileIDsInTheJob.append(ftsFileID) if not transfers: log.error("No transfer possible!") return S_ERROR("No transfer possible") # We add a few metadata to the fts job so that we can reuse them later on without # querying our DB. # source and target SE are just used for accounting purpose job_metadata = { 'operationID': self.operationID, 'rmsReqID': self.rmsReqID, 'sourceSE': self.sourceSE, 'targetSE': self.targetSE } job = fts3.new_job( transfers=transfers, overwrite=True, source_spacetoken=source_spacetoken, spacetoken=target_spacetoken, bring_online=bring_online, copy_pin_lifetime=copy_pin_lifetime, retry=3, verify_checksum= 'target', # Only check target vs specified, since we verify the source earlier multihop=bool( allStageURLs), # if we have stage urls, then we need multihop metadata=job_metadata, priority=self.priority) return S_OK((job, fileIDsInTheJob))