def completeDelegation(self, requestId, userDN, delegatedPem): """ Complete a delegation and store it in the db """ retVal = self.retrieveDelegationRequest(requestId, userDN) if not retVal['OK']: return retVal request = retVal['Value'] chain = X509Chain(keyObj=request.getPKey()) retVal = chain.loadChainFromString(delegatedPem) if not retVal['OK']: return retVal retVal = chain.isValidProxy(ignoreDefault=True) noGroupFlag = False if not retVal['OK']: if DErrno.cmpError(retVal, DErrno.ENOGROUP): noGroupFlag = True else: return retVal result = chain.isVOMS() if result['OK'] and result['Value']: return S_ERROR( "Proxies with VOMS extensions are not allowed to be uploaded") retVal = request.checkChain(chain) if not retVal['OK']: return retVal if not retVal['Value']: return S_ERROR("Received chain does not match request: %s" % retVal['Message']) retVal = chain.getDIRACGroup() if not retVal['OK']: return retVal userGroup = retVal['Value'] if not userGroup: userGroup = Registry.getDefaultUserGroup() retVal = Registry.getGroupsForDN(userDN) if not retVal['OK']: return retVal if not userGroup in retVal['Value']: return S_ERROR("%s group is not valid for %s" % (userGroup, userDN)) # For proxies without embedded DIRAC group only one default is allowed # Cleaning all the proxies for this DN if any before uploading the new one. if noGroupFlag: retVal = self.deleteProxy(userDN) if not retVal['OK']: return retVal retVal = self.storeProxy(userDN, userGroup, chain) if not retVal['OK']: return retVal retVal = self.deleteRequest(requestId) if not retVal['OK']: return retVal return S_OK()
def completeDelegation( self, requestId, userDN, delegatedPem ): """ Complete a delegation and store it in the db """ retVal = self.retrieveDelegationRequest( requestId, userDN ) if not retVal[ 'OK' ]: return retVal request = retVal[ 'Value' ] chain = X509Chain( keyObj = request.getPKey() ) retVal = chain.loadChainFromString( delegatedPem ) if not retVal[ 'OK' ]: return retVal retVal = chain.isValidProxy( ignoreDefault = True ) noGroupFlag = False if not retVal[ 'OK' ]: if DErrno.cmpError( retVal, DErrno.ENOGROUP ): noGroupFlag = True else: return retVal result = chain.isVOMS() if result[ 'OK' ] and result[ 'Value' ]: return S_ERROR( "Proxies with VOMS extensions are not allowed to be uploaded" ) retVal = request.checkChain( chain ) if not retVal[ 'OK' ]: return retVal if not retVal[ 'Value' ]: return S_ERROR( "Received chain does not match request: %s" % retVal[ 'Message' ] ) retVal = chain.getDIRACGroup() if not retVal[ 'OK' ]: return retVal userGroup = retVal[ 'Value' ] if not userGroup: userGroup = Registry.getDefaultUserGroup() retVal = Registry.getGroupsForDN( userDN ) if not retVal[ 'OK' ]: return retVal if not userGroup in retVal[ 'Value' ]: return S_ERROR( "%s group is not valid for %s" % ( userGroup, userDN ) ) # For proxies without embedded DIRAC group only one default is allowed # Cleaning all the proxies for this DN if any before uploading the new one. if noGroupFlag: retVal = self.deleteProxy( userDN ) if not retVal[ 'OK' ]: return retVal retVal = self.storeProxy( userDN, userGroup, chain ) if not retVal[ 'OK' ]: return retVal retVal = self.deleteRequest( requestId ) if not retVal[ 'OK' ]: return retVal return S_OK()
def _checkMatchingIssues(self, jobRequest): """Check the source of the matching issue :param dict jobRequest: S_ERROR returned by the matcher :return: S_OK """ if DErrno.cmpError(jobRequest, DErrno.EWMSNOMATCH): self.log.notice("Job request OK, but no match found", jobRequest["Message"]) elif jobRequest["Message"].find("seconds timeout") != -1: self.log.error("Timeout while requesting job", jobRequest["Message"]) else: self.log.notice("Failed to get jobs", jobRequest["Message"]) return S_OK()
def _checkMatchingIssues(self, jobRequest): """Check the source of the matching issue :param dict jobRequest: S_ERROR returned by the matcher :return: S_OK/S_ERROR """ if DErrno.cmpError(jobRequest, DErrno.EWMSPLTVER): self.log.error("Pilot version mismatch", jobRequest["Message"]) return jobRequest if DErrno.cmpError(jobRequest, DErrno.EWMSNOMATCH): self.log.notice("Job request OK, but no match found", jobRequest["Message"]) elif jobRequest["Message"].find("seconds timeout") != -1: self.log.error("Timeout while requesting job", jobRequest["Message"]) else: self.log.notice("Failed to get jobs", jobRequest["Message"]) self.matchFailedCount += 1 if self.matchFailedCount > self.stopAfterFailedMatches: return self._finish("Nothing to do for more than %d cycles" % self.stopAfterFailedMatches) return S_OK()
def __getJobPilotStatus(self, jobID): """ Get the job pilot status """ result = JobMonitoringClient().getJobParameter(jobID, 'Pilot_Reference') if not result['OK']: return result pilotReference = result['Value'].get('Pilot_Reference', 'Unknown') if pilotReference == 'Unknown': # There is no pilot reference, hence its status is unknown return S_OK('NoPilot') result = PilotManagerClient().getPilotInfo(pilotReference) if not result['OK']: if DErrno.cmpError(result, DErrno.EWMSNOPILOT): self.log.warn("No pilot found", "for job %d: %s" % (jobID, result['Message'])) return S_OK('NoPilot') self.log.error('Failed to get pilot information', 'for job %d: %s' % (jobID, result['Message'])) return result pilotStatus = result['Value'][pilotReference]['Status'] return S_OK(pilotStatus)
def _getJobPilotStatus(self, jobID): """Get the job pilot status""" result = JobMonitoringClient().getJobParameter(jobID, "Pilot_Reference") if not result["OK"]: return result pilotReference = result["Value"].get("Pilot_Reference", "Unknown") if pilotReference == "Unknown": # There is no pilot reference, hence its status is unknown return S_OK("NoPilot") result = PilotManagerClient().getPilotInfo(pilotReference) if not result["OK"]: if DErrno.cmpError(result, DErrno.EWMSNOPILOT): self.log.warn("No pilot found", "for job %d: %s" % (jobID, result["Message"])) return S_OK("NoPilot") self.log.error("Failed to get pilot information", "for job %d: %s" % (jobID, result["Message"])) return result pilotStatus = result["Value"][pilotReference]["Status"] return S_OK(pilotStatus)
def __call__(self): """request processing""" self.log.debug("about to execute request") if not self.rmsMonitoring: gMonitor.addMark("RequestAtt", 1) # # setup proxy for request owner setupProxy = self.setupProxy() if not setupProxy["OK"]: userSuspended = "User is currently suspended" self.request.Error = setupProxy["Message"] # In case the user does not have proxy if DErrno.cmpError(setupProxy, DErrno.EPROXYFIND): self.log.error("Error setting proxy. Request set to Failed:", setupProxy["Message"]) # If user is no longer registered, fail the request for operation in self.request: for opFile in operation: opFile.Status = "Failed" operation.Status = "Failed" elif userSuspended in setupProxy["Message"]: # If user is suspended, wait for a long time self.request.delayNextExecution(6 * 60) self.request.Error = userSuspended self.log.error("Error setting proxy: " + userSuspended, self.request.OwnerDN) else: self.log.error("Error setting proxy", setupProxy["Message"]) return S_OK(self.request) shifter = setupProxy["Value"]["Shifter"] error = None while self.request.Status == "Waiting": # # get waiting operation operation = self.request.getWaiting() if not operation["OK"]: self.log.error("Cannot get waiting operation", operation["Message"]) return operation operation = operation["Value"] self.log.info("executing operation", "%s" % operation.Type) # # and handler for it handler = self.getHandler(operation) if not handler["OK"]: self.log.error("Unable to process operation", "%s: %s" % (operation.Type, handler["Message"])) # gMonitor.addMark( "%s%s" % ( operation.Type, "Fail" ), 1 ) operation.Error = handler["Message"] break handler = handler["Value"] # # set shifters list in the handler handler.shifter = shifter # set rmsMonitoring flag for the RequestOperation handler.rmsMonitoring = self.rmsMonitoring # # and execute pluginName = self.getPluginName( self.handlersDict.get(operation.Type)) if self.standalone: useServerCertificate = gConfig.useServerCertificate() else: # Always use server certificates if executed within an agent useServerCertificate = True try: if pluginName: if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Operation", "operationType": pluginName, "objectID": operation.OperationID, "parentID": operation.RequestID, "status": "Attempted", "nbObject": 1, }) else: gMonitor.addMark("%s%s" % (pluginName, "Att"), 1) # Always use request owner proxy if useServerCertificate: gConfigurationData.setOptionInCFG( "/DIRAC/Security/UseServerCertificate", "false") exe = handler() if useServerCertificate: gConfigurationData.setOptionInCFG( "/DIRAC/Security/UseServerCertificate", "true") if not exe["OK"]: self.log.error("unable to process operation", "%s: %s" % (operation.Type, exe["Message"])) if pluginName: if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Operation", "operationType": pluginName, "objectID": operation.OperationID, "parentID": operation.RequestID, "status": "Failed", "nbObject": 1, }) else: gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1) if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Request", "objectID": operation.RequestID, "status": "Failed", "nbObject": 1, }) else: gMonitor.addMark("RequestFail", 1) if self.request.JobID: # Check if the job exists monitorServer = JobMonitoringClient( useCertificates=True) res = monitorServer.getJobSummary( int(self.request.JobID)) if not res["OK"]: self.log.error( "RequestTask: Failed to get job status", "%d" % self.request.JobID) elif not res["Value"]: self.log.warn( "RequestTask: job does not exist (anymore): failed request", "JobID: %d" % self.request.JobID, ) for opFile in operation: opFile.Status = "Failed" if operation.Status != "Failed": operation.Status = "Failed" self.request.Error = "Job no longer exists" except Exception as e: error = str(e) self.log.exception("hit by exception:", "%s" % error) if pluginName: if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Operation", "operationType": pluginName, "objectID": operation.OperationID, "parentID": operation.RequestID, "status": "Failed", "nbObject": 1, }) else: gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1) if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Request", "objectID": operation.RequestID, "status": "Failed", "nbObject": 1, }) else: gMonitor.addMark("RequestFail", 1) if useServerCertificate: gConfigurationData.setOptionInCFG( "/DIRAC/Security/UseServerCertificate", "true") break # # operation status check if operation.Status == "Done" and pluginName: if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Operation", "operationType": pluginName, "objectID": operation.OperationID, "parentID": operation.RequestID, "status": "Successful", "nbObject": 1, }) else: gMonitor.addMark("%s%s" % (pluginName, "OK"), 1) elif operation.Status == "Failed" and pluginName: if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Operation", "operationType": pluginName, "objectID": operation.OperationID, "parentID": operation.RequestID, "status": "Failed", "nbObject": 1, }) else: gMonitor.addMark("%s%s" % (pluginName, "Fail"), 1) elif operation.Status in ("Waiting", "Scheduled"): # # no update for waiting or all files scheduled break if not self.rmsMonitoring: gMonitor.flush() if error: return S_ERROR(error) # # request done? if self.request.Status == "Done": # # update request to the RequestDB self.log.info("Updating request status:", "%s" % self.request.Status) update = self.updateRequest() if not update["OK"]: self.log.error("Cannot update request status", update["Message"]) return update self.log.info("request is done", "%s" % self.request.RequestName) if self.rmsMonitoring: self.rmsMonitoringReporter.addRecord({ "timestamp": int(Time.toEpoch()), "host": Network.getFQDN(), "objectType": "Request", "objectID": getattr(self.request, "RequestID", 0), "status": "Successful", "nbObject": 1, }) else: gMonitor.addMark("RequestOK", 1) # # and there is a job waiting for it? finalize! if self.request.JobID: attempts = 0 while True: finalizeRequest = self.requestClient.finalizeRequest( self.request.RequestID, self.request.JobID # pylint: disable=no-member ) if not finalizeRequest["OK"]: if not attempts: self.log.error( "unable to finalize request, will retry", "ReqName %s:%s" % (self.request.RequestName, finalizeRequest["Message"]), ) self.log.debug("Waiting 10 seconds") attempts += 1 if attempts == 10: self.log.error("Giving up finalize request") return S_ERROR("Could not finalize request") time.sleep(10) else: self.log.info( "request is finalized", "ReqName %s %s" % (self.request.RequestName, (" after %d attempts" % attempts) if attempts else ""), ) break # Commit all the data to the ES Backend if self.rmsMonitoring: self.rmsMonitoringReporter.commit() # Request will be updated by the callBack method self.log.verbose("RequestTasks exiting", "request %s" % self.request.Status) return S_OK(self.request)