class DiracProduction(DiracLHCb): """ class for managing productions """ def __init__(self, tsClientIn=None): """Instantiates the Workflow object and some default parameters. """ super(DiracProduction, self).__init__() if tsClientIn is None: self.transformationClient = TransformationClient() else: self.transformationClient = tsClientIn self.prodHeaders = { 'AgentType': 'SubmissionMode', 'Status': 'Status', 'CreationDate': 'Created', 'TransformationName': 'Name', 'Type': 'Type' } self.prodAdj = 22 self.commands = { 'start': ['Active', 'Manual'], 'stop': ['Stopped', 'Manual'], 'automatic': ['Active', 'Automatic'], 'manual': ['Active', 'Manual'], 'mctestmode': ['Testing', 'Automatic'], 'completed': ['Completed', 'Manual'], 'completing': ['Completing', 'Automatic'], 'cleaning': ['Cleaning', 'Manual'], 'flush': ['Flush', 'Automatic'], 'deleted': ['Deleted', 'Manual'], 'cleaned': ['Cleaned', 'Manual'], 'archived': ['Archived', 'Manual'], 'valinput': ['ValidatingInput', 'Manual'], 'valoutput': ['ValidatingOutput', 'Manual'], 'remove': ['RemovingFiles', 'Manual'], 'validated': ['ValidatedOutput', 'Manual'], 'removed': ['RemovedFiles', 'Manual'] } def getProduction(self, productionID, printOutput=False): """Returns the metadata associated with a given production ID. Protects against LFN: being prepended and different types of production ID. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') result = self.transformationClient.getTransformation(int(productionID)) if not result['OK']: return result # to fix TODO if printOutput: adj = self.prodAdj prodInfo = result['Value'] top = '' for i in self.prodHeaders.itervalues(): top += i.ljust(adj) message = ['ProductionID'.ljust(adj) + top + '\n'] # very painful to make this consistent, better improved first on the server side productionID = str(productionID) info = productionID.ljust(adj) + prodInfo['Status'].ljust(adj) + prodInfo['Type'].ljust(adj) +\ prodInfo['AgentType'].ljust(adj) + toString(prodInfo['CreationDate']).ljust(adj) +\ prodInfo['TransformationName'].ljust(adj) message.append(info) print '\n'.join(message) return S_OK(result['Value']) def getProductionLoggingInfo(self, productionID, printOutput=False): """The logging information for the given production is returned. This includes the operation performed, any messages associated with the operation and the DN of the production manager performing it. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') result = self.transformationClient.getTransformationLogging( int(productionID)) if not result['OK']: self.log.warn( 'Could not get transformation logging information for productionID %s' % (productionID)) return result if not result['Value']: self.log.warn('No logging information found for productionID %s' % (productionID)) return result if not printOutput: return result infoM = 'ProdID'.ljust(int(0.5 * self.prodAdj)) + 'Message'.ljust(3 * self.prodAdj) +\ 'DateTime [UTC]'.ljust(self.prodAdj) + 'AuthorCN'.ljust(2 * self.prodAdj) message = [infoM] for line in result['Value']: infoL = str(line['TransformationID']).ljust(int(0.5 * self.prodAdj)) +\ line['Message'].ljust(3 * self.prodAdj) + toString(line['MessageDate']).ljust(self.prodAdj) +\ line['AuthorDN'].split('/')[-1].ljust(2 * self.prodAdj) message.append(infoL) print '\nLogging summary for productionID ' + str( productionID) + '\n\n' + '\n'.join(message) return result def getProductionSummary(self, productionID=None, printOutput=False): """Returns a detailed summary for the productions in the system. If production ID is specified, the result is restricted to this value. If printOutput is specified, the result is printed to the screen. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') result = self.transformationClient.getTransformationSummary() if not result['OK']: return result if productionID: if long(productionID) in result['Value']: newResult = S_OK() newResult['Value'] = {} newResult['Value'][long(productionID)] = result['Value'][long( productionID)] result = newResult else: self.log.info('Specified productionID was not found, \ the list of active productions is:\n%s' % ', '.join(str(pID) for pID in result['Value'])) return S_ERROR('Production ID %s was not found' % (productionID)) if printOutput: self._prettyPrint(result['Value']) return result def getProductionApplicationSummary(self, productionID, status=None, minorStatus=None, printOutput=False): """Returns an application status summary for the productions in the system. If printOutput is specified, the result is printed to the screen. This queries the WMS for the given productionID and provides an up-to-date snapshot of the application status combinations and associated WMS JobIDs. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') statusDict = self.getProdJobMetadata(productionID, status, minorStatus) if not statusDict['OK']: self.log.warn('Could not get production metadata information') return statusDict jobIDs = list(statusDict['Value']) if not jobIDs: return S_ERROR('No JobIDs with matching conditions found') self.log.verbose('Considering %s jobs with selected conditions' % (len(jobIDs))) # now need to get the application status information result = JobMonitoringClient().getJobsApplicationStatus(jobIDs) if not result['OK']: self.log.warn('Could not get application status for jobs list') return result appStatus = result['Value'] # self._prettyPrint(appStatus) # self._prettyPrint(statusDict['Value']) # Now format the result. summary = {} submittedJobs = 0 doneJobs = 0 for job, atts in statusDict['Value'].iteritems(): for key, val in atts.iteritems(): if key == 'Status': uniqueStatus = val.capitalize() if uniqueStatus not in summary: summary[uniqueStatus] = {} if atts['MinorStatus'] not in summary[uniqueStatus]: summary[uniqueStatus][atts['MinorStatus']] = {} if appStatus[job]['ApplicationStatus'] not in summary[ uniqueStatus][atts['MinorStatus']]: summary[uniqueStatus][atts['MinorStatus']][ appStatus[job]['ApplicationStatus']] = {} summary[uniqueStatus][atts['MinorStatus']][ appStatus[job]['ApplicationStatus']]['Total'] = 1 submittedJobs += 1 if uniqueStatus == 'Done': doneJobs += 1 summary[uniqueStatus][atts['MinorStatus']][ appStatus[job]['ApplicationStatus']]['JobList'] = [ job ] else: if appStatus[job]['ApplicationStatus'] not in summary[ uniqueStatus][atts['MinorStatus']]: summary[uniqueStatus][atts['MinorStatus']] = {} summary[uniqueStatus][atts['MinorStatus']][ appStatus[job]['ApplicationStatus']] = {} summary[uniqueStatus][atts['MinorStatus']][ appStatus[job] ['ApplicationStatus']]['Total'] = 1 submittedJobs += 1 if uniqueStatus == 'Done': doneJobs += 1 summary[uniqueStatus][atts['MinorStatus']][ appStatus[job] ['ApplicationStatus']]['JobList'] = [job] else: current = summary[uniqueStatus][ atts['MinorStatus']][appStatus[job][ 'ApplicationStatus']]['Total'] summary[uniqueStatus][atts['MinorStatus']][ appStatus[job] ['ApplicationStatus']]['Total'] = current + 1 submittedJobs += 1 if uniqueStatus == 'Done': doneJobs += 1 jobList = summary[uniqueStatus][ atts['MinorStatus']][appStatus[job][ 'ApplicationStatus']]['JobList'] jobList.append(job) summary[uniqueStatus][atts['MinorStatus']][ appStatus[job] ['ApplicationStatus']]['JobList'] = jobList if not printOutput: result = S_OK() if not status and not minorStatus: result['Totals'] = { 'Submitted': int(submittedJobs), 'Done': int(doneJobs) } result['Value'] = summary return result # If a printed summary is requested statAdj = int(0.5 * self.prodAdj) mStatAdj = int(2.0 * self.prodAdj) totalAdj = int(0.5 * self.prodAdj) exAdj = int(0.5 * self.prodAdj) message = '\nJob Summary for ProductionID %s considering status %s' % ( productionID, status) if minorStatus: message += 'and MinorStatus = %s' % (minorStatus) message += ':\n\n' message += 'Status'.ljust(statAdj) + 'MinorStatus'.ljust(mStatAdj) + 'ApplicationStatus'.ljust(mStatAdj) + \ 'Total'.ljust(totalAdj) + 'Example'.ljust(exAdj) + '\n' for stat, metadata in summary.iteritems(): message += '\n' for minor, appInfo in metadata.iteritems(): message += '\n' for appStat, jobInfo in appInfo.iteritems(): message += stat.ljust(statAdj) + minor.ljust(mStatAdj) + appStat.ljust(mStatAdj) + \ str(jobInfo['Total']).ljust(totalAdj) + str(jobInfo['JobList'][0]).ljust(exAdj) + '\n' # self._prettyPrint(summary) if status or minorStatus: return S_OK(summary) result = self.getProductionProgress(productionID) if not result['OK']: self.log.warn('Could not get production progress information') return result if 'Created' in result['Value']: createdJobs = int(result['Value']['Created']) + submittedJobs else: createdJobs = submittedJobs percSub = int(100 * submittedJobs / createdJobs) percDone = int(100 * doneJobs / createdJobs) print '\nCurrent status of production %s:\n' % productionID print 'Submitted'.ljust(12) + str(percSub).ljust(3) + '% ( ' + str(submittedJobs).ljust(7) + \ 'Submitted / '.ljust(15) + str(createdJobs).ljust(7) + ' Created jobs )' print 'Done'.ljust(12) + str(percDone).ljust(3) + '% ( ' + str(doneJobs).ljust(7) + \ 'Done / '.ljust(15) + str(createdJobs).ljust(7) + ' Created jobs )' result = S_OK() result['Totals'] = { 'Submitted': int(submittedJobs), 'Created': int(createdJobs), 'Done': int(doneJobs) } result['Value'] = summary # self.pPrint(result) return result def getProductionJobSummary(self, productionID, status=None, minorStatus=None, printOutput=False): """Returns a job summary for the productions in the system. If printOutput is specified, the result is printed to the screen. This queries the WMS for the given productionID and provides an up-to-date snapshot of the job status combinations and associated WMS JobIDs. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') statusDict = self.getProdJobMetadata(productionID, status, minorStatus) if not statusDict['OK']: self.log.warn('Could not get production metadata information') return statusDict # Now format the result. summary = {} submittedJobs = 0 doneJobs = 0 for job, atts in statusDict['Value'].ietritems(): for key, val in atts.iteritems(): if key == 'Status': uniqueStatus = val.capitalize() if uniqueStatus not in summary: summary[uniqueStatus] = {} if atts['MinorStatus'] not in summary[uniqueStatus]: summary[uniqueStatus][atts['MinorStatus']] = {} summary[uniqueStatus][atts['MinorStatus']]['Total'] = 1 submittedJobs += 1 if uniqueStatus == 'Done': doneJobs += 1 summary[uniqueStatus][ atts['MinorStatus']]['JobList'] = [job] else: current = summary[uniqueStatus][ atts['MinorStatus']]['Total'] summary[uniqueStatus][ atts['MinorStatus']]['Total'] = current + 1 submittedJobs += 1 if uniqueStatus == 'Done': doneJobs += 1 jobList = summary[uniqueStatus][ atts['MinorStatus']]['JobList'] jobList.append(job) summary[uniqueStatus][ atts['MinorStatus']]['JobList'] = jobList if not printOutput: result = S_OK() if not status and not minorStatus: result['Totals'] = { 'Submitted': int(submittedJobs), 'Done': int(doneJobs) } result['Value'] = summary return result # If a printed summary is requested statAdj = int(0.5 * self.prodAdj) mStatAdj = int(2.0 * self.prodAdj) totalAdj = int(0.5 * self.prodAdj) exAdj = int(0.5 * self.prodAdj) message = '\nJob Summary for ProductionID %s considering' % ( productionID) if status: message += ' Status = %s' % (status) if minorStatus: message += ' MinorStatus = %s' % (minorStatus) if not status and not minorStatus: message += ' all status combinations' message += ':\n\n' message += 'Status'.ljust(statAdj) + 'MinorStatus'.ljust(mStatAdj) + 'Total'.ljust(totalAdj) + \ 'Example'.ljust(exAdj) + '\n' for stat, metadata in summary.iteritems(): message += '\n' for minor, jobInfo in metadata.iteritems(): message += stat.ljust(statAdj) + minor.ljust(mStatAdj) + str(jobInfo['Total']).ljust(totalAdj) + \ str(jobInfo['JobList'][0]).ljust(exAdj) + '\n' print message # self._prettyPrint(summary) if status or minorStatus: return S_OK(summary) result = self.getProductionProgress(productionID) if not result['OK']: return result if 'Created' in result['Value']: createdJobs = int(result['Value']['Created']) + submittedJobs else: createdJobs = submittedJobs percSub = int(100 * submittedJobs / createdJobs) percDone = int(100 * doneJobs / createdJobs) print '\nCurrent status of production %s:\n' % productionID print 'Submitted'.ljust(12) + str(percSub).ljust(3) + '% ( ' + str(submittedJobs).ljust(7) + \ 'Submitted / '.ljust(15) + str(createdJobs).ljust(7) + ' Created jobs )' print 'Done'.ljust(12) + str(percDone).ljust(3) + '% ( ' + str(doneJobs).ljust(7) + \ 'Done / '.ljust(15) + str(createdJobs).ljust(7) + ' Created jobs )' result = S_OK() result['Totals'] = { 'Submitted': int(submittedJobs), 'Created': int(createdJobs), 'Done': int(doneJobs) } result['Value'] = summary return result def getProductionSiteSummary(self, productionID, site=None, printOutput=False): """Returns a site summary for the productions in the system. If printOutput is specified, the result is printed to the screen. This queries the WMS for the given productionID and provides an up-to-date snapshot of the sites that jobs were submitted to. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') statusDict = self.getProdJobMetadata(productionID, None, None, site) if not statusDict['OK']: self.log.warn('Could not get production metadata information') return statusDict summary = {} submittedJobs = 0 doneJobs = 0 for job, atts in statusDict['Value'].iteritems(): for key, val in atts.iteritems(): if key == 'Site': uniqueSite = val currentStatus = atts['Status'].capitalize() if uniqueSite not in summary: summary[uniqueSite] = {} if currentStatus not in summary[uniqueSite]: summary[uniqueSite][currentStatus] = {} summary[uniqueSite][currentStatus]['Total'] = 1 submittedJobs += 1 if currentStatus == 'Done': doneJobs += 1 summary[uniqueSite][currentStatus]['JobList'] = [job] else: current = summary[uniqueSite][currentStatus]['Total'] summary[uniqueSite][currentStatus][ 'Total'] = current + 1 submittedJobs += 1 if currentStatus == 'Done': doneJobs += 1 jobList = summary[uniqueSite][currentStatus]['JobList'] jobList.append(job) summary[uniqueSite][currentStatus]['JobList'] = jobList if not printOutput: result = S_OK() if not site: result = self.getProductionProgress(productionID) if not result['OK']: return result if 'Created' in result['Value']: createdJobs = result['Value']['Created'] result['Totals'] = { 'Submitted': int(submittedJobs), 'Done': int(doneJobs) } result['Value'] = summary return result # If a printed summary is requested siteAdj = int(1.0 * self.prodAdj) statAdj = int(0.5 * self.prodAdj) totalAdj = int(0.5 * self.prodAdj) exAdj = int(0.5 * self.prodAdj) message = '\nSummary for ProductionID %s' % (productionID) if site: message += ' at Site %s' % (site) else: message += ' at all Sites' message += ':\n\n' message += 'Site'.ljust(siteAdj) + 'Status'.ljust(statAdj) + 'Total'.ljust(totalAdj) + \ 'Example'.ljust(exAdj) + '\n' for siteStr, metadata in summary.iteritems(): message += '\n' for stat, jobInfo in metadata.iteritems(): message += siteStr.ljust(siteAdj) + stat.ljust(statAdj) + str(jobInfo['Total']).ljust(totalAdj) + \ str(jobInfo['JobList'][0]).ljust(exAdj) + '\n' print message # self._prettyPrint(summary) result = self.getProductionProgress(productionID) if not result['OK']: return result if 'Created' in result['Value']: createdJobs = int(result['Value']['Created']) + submittedJobs else: createdJobs = submittedJobs percSub = int(100 * submittedJobs / createdJobs) percDone = int(100 * doneJobs / createdJobs) if not site: print '\nCurrent status of production %s:\n' % productionID print 'Submitted'.ljust(12) + str(percSub).ljust(3) + '% ( ' + str(submittedJobs).ljust(7) + \ 'Submitted / '.ljust(15) + str(createdJobs).ljust(7) + ' Created jobs )' print 'Done'.ljust(12) + str(percDone).ljust(3) + '% ( ' + str(doneJobs).ljust(7) + \ 'Done / '.ljust(15) + str(createdJobs).ljust(7) + ' Created jobs )' result = S_OK() result['Totals'] = { 'Submitted': int(submittedJobs), 'Created': int(createdJobs), 'Done': int(doneJobs) } result['Value'] = summary return result def getProductionProgress(self, productionID=None, printOutput=False): """Returns the status of jobs as seen by the production management infrastructure. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') productionID = long(productionID) if not productionID: result = self._getActiveProductions() if not result['OK']: return result productionID = result['Value'] else: productionID = [productionID] productionID = [str(x) for x in productionID] self.log.verbose('Will check progress for production(s):\n%s' % (', '.join(productionID))) progress = {} for prod in productionID: # self._prettyPrint(result) result = self.transformationClient.getTransformationTaskStats( int(prod)) if not result['Value']: self.log.error(result) return result progress[int(prod)] = result['Value'] if not printOutput: return result idAdj = int(self.prodAdj) statAdj = int(self.prodAdj) countAdj = int(self.prodAdj) message = 'ProductionID'.ljust(idAdj) + 'Status'.ljust( statAdj) + 'Count'.ljust(countAdj) + '\n\n' for prod, info in progress.iteritems(): for status, count in info.iteritems(): message += str(prod).ljust(idAdj) + status.ljust( statAdj) + str(count).ljust(countAdj) + '\n' message += '\n' print message return result def _getActiveProductions(self, printOutput=False): """Returns a dictionary of active production IDs and their status, e.g. automatic, manual. """ result = self.transformationClient.getTransformations() if not result['OK']: return result prodList = result['Value'] currentProductions = {} for prodDict in prodList: self.log.debug(prodDict) if 'AgentType' in prodDict and 'TransformationID' in prodDict: prodID = prodDict['TransformationID'] status = prodDict['AgentType'] currentProductions[prodID] = status if status.lower() == 'automatic': self.log.verbose( 'Found active production %s eligible to submit jobs' % prodID) if printOutput: self._prettyPrint(currentProductions) return S_OK(currentProductions) def getProductionCommands(self): """ Returns the list of possible commands and their meaning. """ prodCommands = {} for keyword, statusSubMode in self.commands.iteritems(): prodCommands[keyword] = { 'Status': statusSubMode[0], 'SubmissionMode': statusSubMode[1] } return S_OK(prodCommands) def production(self, productionID, command, disableCheck=True): """Allows basic production management by supporting the following commands: - start : set production status to Active, job submission possible - stop : set production status to Stopped, no job submissions - automatic: set production submission mode to Automatic, e.g. submission via Agent - manual: set produciton submission mode to manual, e.g. dirac-production-submit """ commands = self.commands if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') productionID = long(productionID) if not isinstance(command, str): return self._errorReport('Expected string, for command') if not command.lower() in commands: return self._errorReport('Expected one of: %s for command string' % (', '.join(commands))) self.log.verbose( 'Requested to change production %s with command "%s"' % (productionID, command.lower().capitalize())) if not disableCheck: result = promptUser( 'Do you wish to change production %s with command "%s"? ' % (productionID, command.lower().capitalize())) if not result['OK']: self.log.info('Action cancelled') return S_OK('Action cancelled') if result['Value'] != 'y': self.log.info('Doing nothing') return S_OK('Doing nothing') actions = commands[command] self.log.info( 'Setting production status to %s and submission mode to %s for productionID %s' % (actions[0], actions[1], productionID)) result = self.transformationClient.setTransformationParameter( long(productionID), "Status", actions[0]) if not result['OK']: self.log.warn( 'Problem updating transformation status with result:\n%s' % result) return result self.log.verbose('Setting transformation status to %s successful' % (actions[0])) result = self.transformationClient.setTransformationParameter( long(productionID), 'AgentType', actions[1]) if not result['OK']: self.log.warn( 'Problem updating transformation agent type with result:\n%s' % result) return result self.log.verbose('Setting transformation agent type to %s successful' % (actions[1])) return S_OK('Production %s status updated' % productionID) def productionFileSummary(self, productionID, selectStatus=None, outputFile=None, orderOutput=True, printSummary=False, printOutput=False): """ Allows to investigate the input files for a given production transformation and provides summaries / selections based on the file status if desired. """ adj = 18 ordering = 'TaskID' if not orderOutput: ordering = 'LFN' fileSummary = self.transformationClient.getTransformationFiles( condDict={'TransformationID': int(productionID)}, orderAttribute=ordering) if not fileSummary['OK']: return fileSummary toWrite = '' totalRecords = 0 summary = {} selected = 0 if fileSummary['OK']: for lfnDict in fileSummary['Value']: totalRecords += 1 record = '' recordStatus = '' for n, v in lfnDict.iteritems(): record += str(n) + ' = ' + str(v).ljust(adj) + ' ' if n == 'Status': recordStatus = v if selectStatus == recordStatus: selected += 1 if v in summary: new = summary[v] + 1 summary[v] = new else: summary[v] = 1 if outputFile and selectStatus: if selectStatus == recordStatus: toWrite += record + '\n' if printOutput: print record elif outputFile: toWrite += record + '\n' if printOutput: print record else: if printOutput: print record if printSummary: print '\nSummary for %s files in production %s\n' % (totalRecords, productionID) print 'Status'.ljust(adj) + ' ' + 'Total'.ljust( adj) + 'Percentage'.ljust(adj) + '\n' for n, v in summary.iteritems(): percentage = int(100 * int(v) / totalRecords) print str(n).ljust(adj) + ' ' + str(v).ljust(adj) + ' ' + str( percentage).ljust(2) + ' % ' print '\n' if selectStatus and not selected: return S_ERROR( 'No files were selected for production %s and status "%s"' % (productionID, selectStatus)) elif selectStatus and selected: print '%s / %s files (%s percent) were found for production %s in status "%s"' % ( selected, totalRecords, int( 100 * int(selected) / totalRecords), productionID, selectStatus) if outputFile: if os.path.exists(outputFile): print 'Requested output file %s already exists, please remove this file to continue' % outputFile return fileSummary fopen = open(outputFile, 'w') fopen.write(toWrite) fopen.close() if not selectStatus: print 'Wrote %s lines to file %s' % (totalRecords, outputFile) else: print 'Wrote %s lines to file %s for status "%s"' % ( selected, outputFile, selectStatus) return fileSummary def checkFilesStatus(self, lfns, productionID='', printOutput=False): """Checks the given LFN(s) status in the productionDB. All productions are considered by default but can restrict to productionID. """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') if isinstance(lfns, str): lfns = lfns.replace('LFN:', '') elif isinstance(lfns, list): try: lfns = [str(lfnName.replace('LFN:', '')) for lfnName in lfns] except Exception as x: return self._errorReport(str(x), 'Expected strings for LFN(s)') else: return self._errorReport( 'Expected single string or list of strings for LFN(s)') fileStatus = self.transformationClient.getFileSummary( lfns, long(productionID)) if printOutput: self._prettyPrint(fileStatus['Value']) return fileStatus def getWMSProdJobID(self, jobID, printOutput=False): """This method takes the DIRAC WMS JobID and returns the Production JobID information. """ result = self.attributes(jobID) if not result['OK']: return result if 'JobName' not in result['Value']: return S_ERROR( 'Could not establish ProductionID / ProductionJobID, missing JobName' ) wmsJobName = result['Value']['JobName'] prodID = wmsJobName.split('_')[0] prodJobID = wmsJobName.split('_')[1] info = { 'WMSJobID': jobID, 'JobName': wmsJobName, 'ProductionID': prodID, 'JobID': prodJobID } if printOutput: self._prettyPrint(info) return S_OK(info) def getProdJobInfo(self, productionID, jobID, printOutput=False): """Retrieve production job information from Production Manager service. """ res = self.transformationClient.getTransformationTasks( condDict={ 'TransformationID': productionID, 'TaskID': jobID }, inputVector=True) if not res['OK']: return res if not res['Value']: return S_ERROR("Job %s not found for production %s" % (jobID, productionID)) jobInfo = res['Value'][0] if printOutput: self._prettyPrint(jobInfo) return S_OK(jobInfo) def selectProductionJobs(self, productionID, status=None, minorStatus=None, applicationStatus=None, site=None, owner=None, date=None): """Wraps around DIRAC API selectJobs(). Arguments correspond to the web page selections. By default, the date is the creation date of the production. """ if not date: self.log.verbose( 'No Date supplied, setting old date for production %s' % productionID) date = '2001-01-01' return self.selectJobs(status, minorStatus, applicationStatus, site, owner, str(productionID).zfill(8), date) def extendProduction(self, productionID, numberOfJobs, printOutput=False): """ Extend Simulation type Production by number of jobs. Usage: extendProduction <ProductionNameOrID> nJobs """ if not isinstance(productionID, (int, long, str)): return self._errorReport( 'Expected string, long or int for production ID') if isinstance(numberOfJobs, str): try: numberOfJobs = int(numberOfJobs) except Exception as x: return self._errorReport( str(x), 'Expected integer or string for number of jobs to submit') result = self.transformationClient.extendTransformation( long(productionID), numberOfJobs) if not result['OK']: return self._errorReport( result, 'Could not extend production %s by %s jobs' % (productionID, numberOfJobs)) if printOutput: print 'Extended production %s by %s jobs' % (productionID, numberOfJobs) return result def getProdJobMetadata(self, productionID, status=None, minorStatus=None, site=None): """Function to get the WMS job metadata for selected fields. Given a production ID will return the current WMS status information for all jobs in that production starting from the creation date. """ result = self.transformationClient.getTransformationParameters( long(productionID), ['CreationDate']) if not result['OK']: self.log.warn( 'Problem getting production metadata for ID %s:\n%s' % (productionID, result)) return result creationDate = toString(result['Value']).split()[0] result = self.selectProductionJobs(productionID, status=status, minorStatus=minorStatus, site=site, date=creationDate) if not result['OK']: self.log.warn('Problem selecting production jobs for ID %s:\n%s' % (productionID, result)) return result jobsList = result['Value'] return self.status(jobsList) def launchProduction(self, prod, publishFlag, testFlag, requestID, extend=0, tracking=0, MCsimflag=False): """ Given a production object (prod), launch it It returns the productionID created """ if publishFlag is False and testFlag: gLogger.info('Test prod will be launched locally') result = prod.runLocal() if result['OK']: gLogger.info('Template finished successfully') return S_OK() else: gLogger.error( 'Launching production: something wrong with execution!') return S_ERROR('Something wrong with execution!') result = prod.create(publish=publishFlag, requestID=requestID, reqUsed=tracking) if not result['OK']: gLogger.error( 'Error during prod creation:\n%s\ncheck that the wkf name is unique.' % (result['Message'])) return result if publishFlag: prodID = result['Value'] msg = 'Production %s successfully created ' % (prodID) if extend: self.extendProduction(prodID, extend, printOutput=True) msg += ', extended by %s jobs' % extend if MCsimflag: self.production(prodID, 'mctestmode') msg = msg + ' and started in mctestmode.' elif testFlag: self.production(prodID, 'manual') msg = msg + ' and started in manual mode.' else: self.production(prodID, 'automatic') msg = msg + ' and started in automatic mode.' gLogger.notice(msg) else: prodID = 1 gLogger.notice( 'Production creation completed but not published (publishFlag was %s). \ Setting ID = %s (useless, just for the test)' % (publishFlag, prodID)) return S_OK(prodID)
class MCSimulationTestingAgent (AgentModule): """An agent to check for MCSimulation productions that have undergone the testing phase. Productions that have the status Idle and are also in the table StoredJobDescription have undergone testing. A report is created by the agent from the results of the test phase and emailed to the Production Manager """ def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.transClient = None self.bkClient = None self.notifyClient = None self.operations = None self.failedTransIDs = [] def initialize(self): self.transClient = TransformationClient() self.bkClient = BookkeepingClient() self.notifyClient = NotificationClient() self.operations = Operations() self.email = self.am_getOption("MailTo", '') return S_OK() def execute(self): # get all the idle transformations extendableTTypes = Operations().getValue('Transformations/ExtendableTransfTypes', ['MCSimulation']) res = self.transClient.getTransformations(condDict={"Status": "Idle", "Type": extendableTTypes}) if res['OK']: idleTransformations = res['Value'] idleTransformations = [d.get("TransformationID") for d in idleTransformations] self.log.verbose("Found %d Idle MC transformations" % len(idleTransformations)) self.log.debug("Idle transformations found: %s" % ','.join([str(it) for it in idleTransformations])) else: self.log.error("Call to Transformation Client service failed", res['Message']) return res # get all the IDs of transformations undergoing a testing phase res = self.transClient.getStoredJobDescriptionIDs() if res['OK']: testingSimulations = res['Value'] testingSimulations = [pair[0] for pair in testingSimulations] self.log.verbose("Found %d MC transformations undergoing a testing phase" % len(testingSimulations)) self.log.debug("MC transformations found undergoing a testing phase: %s" % ','.join([str(ts) for ts in testingSimulations])) else: self.log.error("Call to Transformation Client service failed", res['Message']) return res # get the IDs that occur in both idle transformations and testing phase idleSimulations = list(set(testingSimulations).intersection(idleTransformations)) # remove those that we know failed idleSimulations = list(set(idleSimulations).difference(self.failedTransIDs)) self.log.info("MC transformations under considerations: %s (will loop on them)" % ','.join([str(idS) for idS in idleSimulations])) for transID in idleSimulations: self.log.info("Looking into %d" % transID) tasks = self.transClient.getTransformationTasks(condDict={"TransformationID": transID}) if not tasks['OK']: self.log.error("Call to Transformation Client service failed", tasks['Message']) continue else: tasks = tasks['Value'] numberOfTasks = len(tasks) numberOfDoneTasks = sum(1 for d in tasks if d.get("ExternalStatus") == "Done") self.log.verbose( "TransID = %d, numberOfTasks = %d, numberOfDoneTasks = %d" % (transID, numberOfTasks, numberOfDoneTasks)) if numberOfTasks == numberOfDoneTasks: self.log.info("All tasks have passed so the request can be accepted and the transformation updated") res = self._activateTransformation(transID, tasks) if not res['OK']: self.log.error("Error Activating Production", res['Message']) else: self.log.warn("There are failed tasks") report = self.__createReport(tasks) numberOfFailedTasks = sum(1 for d in tasks if d.get('ExternalStatus') == 'Failed') if numberOfFailedTasks == numberOfTasks: # all tasks have failed so the request can be rejected and an email report sent self._sendReport(report) self.log.warn("Transformation " + str(transID) + " failed the testing phase") self.failedTransIDs.append(transID) else: # only some tasks have failed so continue but send a warn email self.log.warn("Transformation " + str(transID) + " failed partially the testing phase, continuing anyway") doneTasks = list() for d in tasks: if d.get("ExternalStatus") == "Done": doneTasks.append(d) if not doneTasks: self.log.info("No tasks done for Transformation %d" % transID) continue res = self._activateTransformation(transID, doneTasks) if not res['OK']: self.log.error("Error Activating Production", res['Message']) continue subject = "MCSimulation Test Failure Report. TransformationID: " + str(transID) + " - some tasks failed" report['subject'] = subject self._sendReport(report) return S_OK() def _activateTransformation(self, transID, tasks): """ Calculate parameters, update the workflow, then move the production to Active """ parameters = self._calculateParameters(tasks) if not parameters['OK']: self.log.error("Error calculating parameters", parameters['Message']) return parameters else: parameters = parameters['Value'] self.log.verbose("TransID = %d, Calculated Parameters: %s" % (transID, str(parameters))) workflow = self._updateWorkflow(transID, int(round(float(parameters['CPUe']))), parameters['MCCpu']) if workflow['OK']: workflow = workflow['Value'] res = self._updateTransformationsTable(transID, workflow) if not res['OK']: self.log.error("Error updating transformations table", res['Message']) return res else: self.log.info("Transformation " + str(transID) + " passed the testing phase and is now set to active") return S_OK() def __createReport(self, tasks): """creates a report from a failed task to email to the production manager """ dateformat = '%d/%m/%Y %H:%M' transformationID = tasks[0]["TransformationID"] transformation = self.transClient.getTransformations(condDict={"TransformationID": transformationID}) transformation = transformation['Value'][0] subject = "MCSimulation Test Failure Report. TransformationID: " + str(transformationID) body = [subject] body.append("") body.append("Transformation:") body.append("----------------------------------------------------------------------") body.append("TransformationID: " + str(transformation["TransformationID"])) body.append("TransformationName: " + transformation["TransformationName"]) body.append("LastUpdate: " + transformation["LastUpdate"].strftime(dateformat)) body.append("Status: " + transformation["Status"]) body.append("Description: " + transformation["Description"]) body.append("TransformationFamily: " + str(transformation["TransformationFamily"])) body.append("Plugin: " + transformation["Plugin"]) body.append("Type: " + transformation["Type"]) body.append("AgentType: " + transformation["AgentType"]) body.append("GroupSize: " + str(transformation["GroupSize"])) body.append("MaxNumberOfTasks: " + str(transformation["MaxNumberOfTasks"])) body.append("AuthorDN: " + transformation["AuthorDN"]) body.append("TransformationGroup: " + transformation["TransformationGroup"]) body.append("InheritedFrom: " + str(transformation["InheritedFrom"])) body.append("CreationDate: " + transformation["CreationDate"].strftime(dateformat)) body.append("FileMask: " + transformation["FileMask"]) body.append("EventsPerTask: " + str(transformation["EventsPerTask"])) body.append("AuthorGroup: " + transformation["AuthorGroup"]) body.append("") body.append("Number of Tasks: " + str(len(tasks))) body.append("Tasks:") body.append("----------------------------------------------------------------------") for task in tasks: body.append("TaskID: " + str(task['TaskID'])) body.append("TargetSE: " + task['TargetSE']) body.append("LastUpdateTime: " + task['LastUpdateTime'].strftime(dateformat)) body.append("RunNumber: " + str(task['RunNumber'])) body.append("CreationTime: " + task['CreationTime'].strftime(dateformat)) body.append("ExternalID: " + str(task['ExternalID'])) body.append("ExternalStatus: " + task['ExternalStatus']) body.append("") return {'subject': subject, 'body': body} def _sendReport(self, report): """sends a given report to the production manager """ if not self.email: self.email = getUserOption(self.operations.getValue("Shifter/ProductionManager/User"), 'Email') body = '\n'.join(report['body']) res = self.notifyClient.sendMail( self.email, report['subject'], body, self.email, localAttempt=False, avoidSpam=True) if not res['OK']: self.log.error("sendMail failed", res['Message']) else: self.log.info('Mail summary sent to production manager') def _calculateParameters(self, tasks): """ Calculates the CPU time per event for the production """ jobIds = [int(x['ExternalID']) for x in tasks] res = self.bkClient.bulkJobInfo({'jobId': jobIds}) if not res['OK']: self.log.error("Error calling bkClient", res['Message']) return S_ERROR(res['Message']) successful = res['Value']['Successful'] self.log.debug("Successful tasks: %s" % str(successful)) if not successful: self.log.error("There are no successful tasks") return S_ERROR("There are no successful tasks") events = 0 CPUeJobTotal = 0.0 for job in successful.itervalues(): cpuJob = 0 for bkJob in job: if bkJob['ApplicationName'] in ['Gauss', 'Boole', 'Moore', 'Brunel', 'DaVinci']: if not events: events = bkJob['NumberOfEvents'] timeInSeconds = bkJob['CPUTIME'] cpuJob += timeInSeconds * bkJob['WNCPUHS06'] CPUeJob = cpuJob / events self.log.debug("CPUeJob = %d" % CPUeJob) CPUeJobTotal += CPUeJob CPUe = CPUeJobTotal / len(successful) # We want to produce at least 25 events per job... MCCpu = str(25 * int(round(float(CPUe)))) self.log.verbose("CPUe = %d, MCCpu = %s" % (CPUe, MCCpu)) return S_OK({'CPUe': CPUe, 'MCCpu': MCCpu}) def _updateWorkflow(self, transID, CPUe, MCCpu): """ Updates the workflow of a savedProductionDescription to reflect the calculated CPUe """ res = self.transClient.getStoredJobDescription(transID) if res['OK']: workflow = fromXMLString(res['Value'][0][1]) prod = Production() prod.LHCbJob.workflow = workflow prod.setParameter('CPUe', 'string', str(CPUe), 'CPU time per event') prod.LHCbJob.setCPUTime(MCCpu) self.log.info("Transformation ", str(transID)) self.log.info("Calculated CPUTime: ", str(CPUe)) self.log.info("CpuTime: ", str(MCCpu)) # maximum number of events to produce # try to get the CPU parameters from the configuration if possible cpuTimeAvg = Operations().getValue('Transformations/CPUTimeAvg') if cpuTimeAvg is None: self.log.info('Could not get CPUTimeAvg from config, defaulting to %d' % 200000) cpuTimeAvg = 200000 try: CPUNormalizationFactorAvg = getCPUNormalizationFactorAvg() except RuntimeError: self.log.info('Could not get CPUNormalizationFactorAvg, defaulting to %f' % 1.0) CPUNormalizationFactorAvg = 1.0 max_e = getEventsToProduce(CPUe, cpuTimeAvg, CPUNormalizationFactorAvg) prod.setParameter('maxNumberOfEvents', 'string', str(max_e), 'Maximum number of events to produce (Gauss)') return S_OK(prod.LHCbJob.workflow.toXML()) else: self.log.error("Call to Transformation Client service failed", res['Message']) return res def _updateTransformationsTable(self, transID, workflow): """ Puts the modified workflow from the savedProductionDescription table into the transformations table and removes it from the savedProductionDescription table. """ transformation = self.transClient.getTransformations(condDict={"TransformationID": transID}) if transformation['OK']: body = self.transClient.setTransformationParameter(transID, "Body", workflow) status = self.transClient.setTransformationParameter(transID, "Status", "Active") if body['OK'] and status['OK']: res = self.transClient.removeStoredJobDescription(transID) if not res['OK']: self.log.error("Call to removeStoredJobDescription failed", res['Message']) return res self.log.info("Transformation %s has an updated body and Status set to active" % transID) return S_OK() else: self.log.error("One of the updates has failed so set them both back to the previous value to ensure atomicity") self.log.debug(str(transformation['Value'][0]['Body'])) res = self.transClient.setTransformationParameter(transID, "Body", transformation['Value'][0]['Body']) if not res['OK']: self.log.error("Failure calling setTransformationParameter", res['Message']) return res res = self.transClient.setTransformationParameter(transID, "Status", transformation['Value'][0]['Status']) if not res['OK']: self.log.error("Failure calling setTransformationParameter", res['Message']) return res else: self.log.error("Call to getTransformations failed", transformation['Message']) return transformation
class DataRecoveryAgent(AgentModule): """ Standard DIRAC agent class """ def __init__(self, *args, **kwargs): """ c'tor """ AgentModule.__init__(self, *args, **kwargs) self.transClient = None self.reqClient = None self.consChecks = None self.enableFlag = True self.transformationTypes = [] self.transLogger = self.log ############################################################################# def initialize(self): """Sets defaults """ self.am_setOption('shifterProxy', 'ProductionManager') self.transClient = TransformationClient() self.reqClient = ReqClient() self.consChecks = ConsistencyChecks(interactive=False, transClient=self.transClient) transformationTypes = Operations().getValue( 'Transformations/DataProcessing', []) extendableTTypes = Operations().getValue( 'Transformations/ExtendableTransfTypes', ['MCSimulation']) self.transformationTypes = list( set(transformationTypes) - set(extendableTTypes)) return S_OK() ############################################################################# def execute(self): """ The main execution method. """ # Configuration settings self.enableFlag = self.am_getOption('EnableFlag', True) self.log.verbose('Enable flag is %s' % self.enableFlag) if not self.transformationTypes: self.log.warn("No transformation types to look for... aborting") return S_OK() transformationStatus = self.am_getOption('TransformationStatus', ['Active', 'Completing']) fileSelectionStatus = self.am_getOption('FileSelectionStatus', ['Assigned', 'MaxReset']) unrecoverableStatus = self.am_getOption('UnrecoverableStatus', ['MaxReset']) updateStatus = self.am_getOption('FileUpdateStatus', 'Unused') wmsStatusList = self.am_getOption('WMSStatus', ['Failed']) # only worry about files > 12hrs since last update selectDelay = self.am_getOption('SelectionDelay', 1) # hours transformationDict = {} for transStatus in transformationStatus: result = self.__getEligibleTransformations( transStatus, self.transformationTypes) if not result['OK']: self.log.error( "Could not obtain eligible transformations", "Status '%s': %s" % (transStatus, result['Message'])) return result if not result['Value']: self.log.info( 'No "%s" transformations of types %s to process.' % (transStatus, ', '.join(self.transformationTypes))) continue transformationDict.update(result['Value']) self.log.info( 'Selected %d transformations of types %s' % (len(transformationDict), ', '.join(self.transformationTypes))) self.log.verbose('Transformations selected:\n%s' % (', '.join(transformationDict))) for transformation, typeName in transformationDict.iteritems(): self.transLogger = self.log.getSubLogger('Trans-%s' % transformation) result = self.__selectTransformationFiles(transformation, fileSelectionStatus) if not result['OK']: self.transLogger.error( 'Could not select files for transformation', '%s: %s' % (transformation, result['Message'])) continue fileDict = result['Value'] if not fileDict: self.transLogger.verbose( 'No files in status %s selected for transformation %s' % (', '.join(fileSelectionStatus), transformation)) continue title = 'Looking at transformation %s, type %s ' % (transformation, typeName) self.transLogger.info('=' * len(title)) self.transLogger.info(title) self.transLogger.info( 'Selected %d files with status %s' % (len(fileDict), ','.join(fileSelectionStatus))) result = self.__obtainWMSJobIDs(transformation, fileDict, selectDelay, wmsStatusList) if not result['OK']: self.transLogger.error( "Could not obtain jobs for files of transformation", result['Message']) continue jobFileDict = result['Value'] if not jobFileDict: self.transLogger.info('No %s jobs found for selected files' % ' or '.join(wmsStatusList)) continue self.transLogger.verbose( "Looking at WMS jobs %s" % ','.join(str(jobID) for jobID in jobFileDict)) fileCount = sum( len(lfnList) for lfnList in jobFileDict.itervalues()) self.transLogger.verbose( '%s files are selected after examining WMS jobs' % (str(fileCount) if fileCount else 'No')) if not fileCount: continue result = self.__removePendingRequestsJobs(jobFileDict) if not result['OK']: self.transLogger.error( "Error while removing jobs with pending requests", result['Message']) continue # This method modifies the input dictionary if not jobFileDict: self.transLogger.info( 'No WMS jobs without pending requests to process.') continue fileCount = sum( len(lfnList) for lfnList in jobFileDict.itervalues()) self.transLogger.info( '%s files are selected in %d jobs after removing any job with pending requests' % (str(fileCount) if fileCount else 'No', len(jobFileDict))) if not fileCount: continue jobsThatDidntProduceOutputs, jobsThatProducedOutputs = self.__checkdescendants( transformation, jobFileDict) title = '======== Transformation %s: results ========' % transformation self.transLogger.info(title) self.transLogger.info('\tTotal jobs that can be updated now: %d' % len(jobsThatDidntProduceOutputs)) if jobsThatProducedOutputs: self.transLogger.info('\t%d jobs have descendants' % len(jobsThatProducedOutputs)) else: self.transLogger.info('\tNo jobs have descendants') filesToUpdate = [] filesMaxReset = [] filesWithDescendants = [] for job, fileList in jobFileDict.iteritems(): if job in jobsThatDidntProduceOutputs: recoverableFiles = set( lfn for lfn in fileList if fileDict[lfn][1] not in unrecoverableStatus) filesToUpdate += list(recoverableFiles) filesMaxReset += list(set(fileList) - recoverableFiles) elif job in jobsThatProducedOutputs: filesWithDescendants += fileList if filesToUpdate: self.transLogger.info("\tUpdating %d files to '%s'" % (len(filesToUpdate), updateStatus)) result = self.__updateFileStatus(transformation, filesToUpdate, updateStatus) if not result['OK']: self.transLogger.error( '\tRecoverable files were not updated', result['Message']) if filesMaxReset: self.transLogger.info( '\t%d files are in %s status and have no descendants' % (len(filesMaxReset), ','.join(unrecoverableStatus))) if filesWithDescendants: # FIXME: we should mark these files with another status such that they are not considered again and again # In addition a notification should be sent to the production managers self.transLogger.warn( '\t!!!!!!!! Transformation has descendants for files that are not marked as processed !!!!!!!!' ) self.transLogger.warn('\tFiles with descendants:', ','.join(filesWithDescendants)) return S_OK() ############################################################################# def __getEligibleTransformations(self, status, typeList): """ Select transformations of given status and type. """ res = self.transClient.getTransformations(condDict={ 'Status': status, 'Type': typeList }) if not res['OK']: return res transformations = dict((str(prod['TransformationID']), prod['Type']) for prod in res['Value']) return S_OK(transformations) ############################################################################# def __selectTransformationFiles(self, transformation, statusList): """ Select files, production jobIDs in specified file status for a given transformation. """ # Until a query for files with timestamp can be obtained must rely on the # WMS job last update res = self.transClient.getTransformationFiles(condDict={ 'TransformationID': transformation, 'Status': statusList }) if not res['OK']: return res resDict = {} mandatoryKeys = {'LFN', 'TaskID', 'LastUpdate'} for fileDict in res['Value']: missingKeys = mandatoryKeys - set(fileDict) if missingKeys: for key in missingKeys: self.transLogger.warn( '%s is mandatory, but missing for:\n\t%s' % (key, str(fileDict))) else: resDict[fileDict['LFN']] = (fileDict['TaskID'], fileDict['Status']) return S_OK(resDict) ############################################################################# def __obtainWMSJobIDs(self, transformation, fileDict, selectDelay, wmsStatusList): """ Group files by the corresponding WMS jobIDs, check the corresponding jobs have not been updated for the delay time. Can't get into any mess because we start from files only in MaxReset / Assigned and check corresponding jobs. Mixtures of files for jobs in MaxReset and Assigned statuses only possibly include some files in Unused status (not Processed for example) that will not be touched. """ taskIDList = sorted( set(taskID for taskID, _status in fileDict.values())) self.transLogger.verbose( "The following %d task IDs correspond to the selected files:\n%s" % (len(taskIDList), ', '.join(str(taskID) for taskID in taskIDList))) jobFileDict = {} olderThan = dateTime() - datetime.timedelta(hours=selectDelay) res = self.transClient.getTransformationTasks( condDict={ 'TransformationID': transformation, 'TaskID': taskIDList }, older=olderThan, timeStamp='LastUpdateTime') if not res['OK']: self.transLogger.error("getTransformationTasks returned an error", '%s' % res['Message']) return res mandatoryKeys = { 'TaskID', 'ExternalID', 'LastUpdateTime', 'ExternalStatus' } for taskDict in res['Value']: missingKey = mandatoryKeys - set(taskDict) if missingKey: for key in missingKey: self.transLogger.warn( 'Missing key %s for job dictionary:\n\t%s' % (key, str(taskDict))) continue taskID = taskDict['TaskID'] wmsID = taskDict['ExternalID'] wmsStatus = taskDict['ExternalStatus'] if not int(wmsID): self.transLogger.verbose( 'TaskID %s: status is %s (jobID = %s) so will not recheck with WMS' % (taskID, wmsStatus, wmsID)) continue # Exclude jobs not having appropriate WMS status - have to trust that production management status is correct if wmsStatus not in wmsStatusList: self.transLogger.verbose( 'Job %s is in status %s, not in %s so will be ignored' % (wmsID, wmsStatus, ', '.join(wmsStatusList))) continue # Must map unique files -> jobs in expected state jobFileDict[wmsID] = [ lfn for lfn, (tID, _st) in fileDict.iteritems() if int(tID) == int(taskID) ] self.transLogger.info( 'Found %d files for taskID %s, jobID %s (%s), last update %s' % (len(jobFileDict[wmsID]), taskID, wmsID, wmsStatus, taskDict['LastUpdateTime'])) return S_OK(jobFileDict) ############################################################################# def __removePendingRequestsJobs(self, jobFileDict): """ Before doing anything check that no outstanding requests are pending for the set of WMS jobIDs. """ jobs = jobFileDict.keys() level = self.reqClient.log.getLevel() self.reqClient.log.setLevel('ERROR') result = self.reqClient.getRequestIDsForJobs(jobs) self.reqClient.log.setLevel(level) if not result['OK']: return result if not result['Value']['Successful']: self.transLogger.verbose('None of the jobs have pending requests') return S_OK() for jobID, requestID in result['Value']['Successful'].iteritems(): res = self.reqClient.getRequestStatus(requestID) if not res['OK']: self.transLogger.error('Failed to get Status for Request', '%s:%s' % (requestID, res['Message'])) elif res['Value'] != 'Done': # If we fail to get the Status or it is not Done, we must wait, so remove the job from the list. del jobFileDict[str(jobID)] self.transLogger.verbose( 'Removing jobID %s from consideration until requests are completed' % (jobID)) return S_OK() ############################################################################# def __checkdescendants(self, transformation, jobFileDict): """ Check BK descendants for input files, prepare list of actions to be taken for recovery. """ jobsThatDidntProduceOutputs = [] jobsThatProducedOutputs = [] self.consChecks.prod = transformation for job, fileList in jobFileDict.iteritems(): result = self.consChecks.getDescendants(fileList) filesWithDesc = result[0] filesWithMultipleDesc = result[2] if filesWithDesc or filesWithMultipleDesc: jobsThatProducedOutputs.append(job) else: jobsThatDidntProduceOutputs.append(job) return jobsThatDidntProduceOutputs, jobsThatProducedOutputs ############################################################################ def __updateFileStatus(self, transformation, fileList, fileStatus): """ Update file list to specified status. """ if not self.enableFlag: self.transLogger.info( "\tEnable flag is False, would have updated %d files to '%s' status for %s" % (len(fileList), fileStatus, transformation)) return S_OK() return self.transClient.setFileStatusForTransformation( int(transformation), fileStatus, fileList, force=False)