def getFileInformation(workflow, lfn, outModule): # Connect to the FWJR DB fwjrDB = Database('wmagent_jobdump/fwjrs', 'http://dummy.cern.ch:5984') result = fwjrDB.loadView('FWJRDump', 'jobsByOutputLFN', {'include_docs' : True}, [[workflow, lfn]]) if result['rows']: fwjrDoc = result['rows'][0]['doc'] fwjrInfo = fwjrDoc['fwjr'] for step in fwjrInfo['steps']: if step == 'cmsRun1': if outModule not in fwjrInfo['steps'][step]['output']: print "WARNING: No output module %s in this job" % outModule return outModuleInfo = fwjrInfo['steps'][step]['output'][outModule] for fileInfo in outModuleInfo: if fileInfo['lfn'] == lfn: print "File information, %s" % fileInfo['lfn'] print "Run/Lumis:" for run in fileInfo['runs']: print 'Run: %s, Lumi range: %s-%s' % (run, fileInfo['runs'][run][0], fileInfo['runs'][run][1]) print "Number of Events: %s" % fileInfo['events'] print "Filesize (bytes): %.1f" % (float(fileInfo['size'])) print "Adler32 Checksum: %s" % fileInfo['checksums']['adler32'] else: print "WARNING: No file info in CouchDB" return
def checkWorkQueue(requestName): result = {'ActiveAgents' : {}, 'ElementsRunning' : 0, 'ElementsAcquired' : 0, 'ElementsAvailable' : 0, 'ElementsDone' : 0} x = Database('workqueue', 'https://cmsweb.cern.ch/couchdb') y = x.loadView('WorkQueue', 'elementsByParent', {'include_docs' : True}, [requestName]) for entry in y['rows']: doc = entry['doc'] element = doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] status = element['Status'] if status == 'Running': result['ElementsRunning'] += 1 elif status == 'Acquired': result['ElementsAcquired'] += 1 elif status == 'Available': result['ElementsAvailable'] += 1 elif status == 'Done': result['ElementsDone'] += 1 if status not in ['Done', 'Available']: agent = element['ChildQueueUrl'] if agent not in result['ActiveAgents']: result['ActiveAgents'][agent] = 0 result['ActiveAgents'][agent] += 1 return result
def main(): """ _main_ """ usage = "Usage: python %prog -w workflow" parser = OptionParser(usage=usage) parser.add_option('-w', '--workflow', help='Workflow name in ReqMgr', dest='wf') (options, args) = parser.parse_args() if not options.wf: parser.error('You must provide a workflow name') sys.exit(1) couchUrl = "https://cmsweb.cern.ch/couchdb" database = "acdcserver" failures = {} svc = Database(database, couchUrl) result = svc.loadView("ACDC", "byCollectionName", { 'key': options.wf, 'include_docs': True, 'reduce': False }) print "Found %i failures/rows in total." % len(result["rows"]) for entry in result["rows"]: if entry['doc']['fileset_name'] in failures: failures[entry['doc']['fileset_name']] += 1 else: failures[entry['doc']['fileset_name']] = 1 pprint(failures) print "\nDone!"
def main(): """ _main_ """ usage = "Usage: python %prog -w workflow" parser = OptionParser(usage = usage) parser.add_option('-w', '--workflow', help = 'Workflow name in ReqMgr', dest = 'wf') (options, args) = parser.parse_args() if not options.wf: parser.error('You must provide a workflow name') sys.exit(1) couchUrl = "https://cmsweb.cern.ch/couchdb" database = "acdcserver" failures = {} svc = Database(database, couchUrl) result = svc.loadView("ACDC", "byCollectionName", {'key' : options.wf, 'include_docs' : True, 'reduce' : False}) print "Found %i failures/rows in total." % len(result["rows"]) for entry in result["rows"]: if entry['doc']['fileset_name'] in failures: failures[entry['doc']['fileset_name']] += 1 else: failures[entry['doc']['fileset_name']] = 1 pprint(failures) print "\nDone!"
def main(): config = loadConfigurationFile(os.environ['WMAGENT_CONFIG']) config.CoreDatabase.dialect = 'oracle' init = WMInit() init.setDatabaseConnection(config.CoreDatabase.connectUrl, config.CoreDatabase.dialect) couchDB = Database('wmagent_jobdump/fwjrs', '') couchDB2 = Database('wmagent_jobdump/jobs', '') myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) getJobsDAO = daofactory(classname = "Jobs.GetAllJobs") completedJobs = getJobsDAO.execute(state = 'complete') candidates = [] while len(completedJobs): candidates = [] chunk = completedJobs[:500] completedJobs = completedJobs[500:] result = couchDB.loadView('FWJRDump', 'outputByJobID', keys = chunk) rows = result['rows'] for entry in rows: candidates.append(entry['key']) for jobId in candidates: doc = couchDB2.document(str(jobId)) last = max(map(int, doc['states'].keys())) lastState = doc['states'][str(last)]['newstate'] if lastState == 'success': print jobId
def main(): wmstatsDB = Database('wmstats', 'https://alancc7-cloud3.cern.ch/couchdb') conflictDocs = wmstatsDB.loadView("WMStats3", "conflicts") print("Found {} conflicting documents".format( conflictDocs.get("total_rows"))) print(" they are:\n{}".format(pformat(conflictDocs.get("rows", [])))) for doc in conflictDocs.get("rows", []): resolveConflict(doc)
def checkForMissingFiles(options): #Initialize stuff phedexAPI = PhEDEx({'cachepath' : options.cachepath}) acdcCouch = Database('wmagent_acdc', options.acdcUrl) #Let's get the IDs of the ACDC documents for the task/request/group/user array = [options.group, options.user, options.request, options.task] result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array]) documentsIDs = [x['id'] for x in result['rows']] badFiles = {} #Go through the documents for docID in documentsIDs: doc = acdcCouch.document(docID) #Are we going to change this doc? Better back it up if options.change: backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w') json.dump(doc, backupFile) backupFile.close() #Go through the files files = doc["files"] for inputFile in files: #Use PhEDEx API to get site based on the SE se = files[inputFile]["locations"][0] siteLocation = phedexAPI.getBestNodeName(se) #Now get the PFN pfnDict = phedexAPI.getPFN(siteLocation, inputFile) inputPfn = pfnDict[(siteLocation, inputFile)] #Run lcg-ls commands and see what we get command = 'lcg-ls -b -D srmv2 --srm-timeout 60 %s' % inputPfn commandList = shlex.split(command) try: (stdout, stderr, exitCode) = runCommand(commandList, False, 70) except Exception, ex: exitCode = 99999 stdout = '' stderr = str(ex) if exitCode: #Something went wrong with the command #Mark the file as bad if docID not in badFiles: badFiles[docID] = [] badFiles[docID].append(inputFile) print 'File %s is thought to be bad' % inputFile print 'Command was %s' % command print 'Return code was %i' % exitCode print 'Stdout was %s' % stdout print 'Stderr was %s' % stderr
def findParentJobs(jobId): # Connect to the Job and FWJR DBs jobDB = Database('wmagent_jobdump/jobs', 'http://dummy.cern.ch:5984') fwjrDB = Database('wmagent_jobdump/fwjrs', 'http://dummy.cern.ch:5984') # Get the document of the child job childJobDoc = jobDB.document(id = jobId) # Get the workflow and input files, transforms it into suitable keys [workflow, lfn] workflow = childJobDoc['workflow'] inputLfns = [x['lfn'] for x in childJobDoc['inputfiles']] keys = [[workflow, x] for x in inputLfns] # Get the jobs that produced the input files for this job # Load the id and fwjr for these jobs since we have to re-run them result = fwjrDB.loadView('FWJRDump', 'jobsByOutputLFN', {}, keys) for entry in result['rows']: key = entry['key'] jobId = entry['value'] fwjrId = entry['id'] result = fwjrDB.loadView('FWJRDump', 'logArchivesByJobID', {}, [[int(x) for x in fwjrId.split('-')]]) logArch = result['rows'][0]['value']['lfn'] # Check whether the logArch is in some LogCollect logCollectTarball = '' result = jobDB.loadView('JobDump', 'jobsByInputLFN', {}, [[workflow, logArch]]) if result['rows']: logCollectJobId = result['rows'][0]['id'] result = fwjrDB.loadView('FWJRDump', 'outputByJobID', {}, [int(logCollectJobId)]) if result['rows']: logCollectTarball = result['rows'][0]['value']['lfn'] else: print "WARNING: The logArchive for job %s was in a LogCollect job but not tarball was produced" % jobId # Print out the information print "Job %s produced %s, the logArch for it is %s in %s" % (jobId, key[1], logArch, logCollectTarball) return
def main(): if "WMAGENT_CONFIG" not in os.environ: os.environ["WMAGENT_CONFIG"] = '/data/srv/wmagent/current/config/wmagent/config.py' myThread = threading.currentThread() connectToDB() formatter = DBFormatter(logging, myThread.dbi) limboFiles = formatter.formatDict(myThread.dbi.processData("""SELECT dbsbuffer_workflow.name, dbsbuffer_file.lfn FROM dbsbuffer_file INNER JOIN dbsbuffer_workflow ON dbsbuffer_file.workflow = dbsbuffer_workflow.id LEFT OUTER JOIN dbsbuffer_block ON dbsbuffer_file.block_id = dbsbuffer_block.id WHERE dbsbuffer_file.status = 'READY' AND dbsbuffer_block.id is NULL""")) if not limboFiles: print "There are no bad files to fix" return for entry in limboFiles: data = Database('wmagent_jobdump/fwjrs', 'http://%s:5984' % socket.gethostname()) result = data.loadView('FWJRDump', 'jobsByOutputLFN', {'include_docs' : True}, [[entry['name'], entry['lfn']]])['rows'] if result: result = result[0] fwjr = result['doc']['fwjr'] for step in fwjr['steps']: if step == 'cmsRun1': stepInfo = fwjr['steps'][step] site = stepInfo['site'] break else: print "Could not find location for %s" % entry['lfn'] continue se = myThread.dbi.processData("""SELECT wmbs_location_senames.se_name FROM wmbs_location_senames INNER JOIN wmbs_location ON wmbs_location.id = wmbs_location_senames.location WHERE wmbs_location.site_name = '%s'""" % site) se = formatter.formatDict(se)[0] insertQuery = """INSERT INTO dbsbuffer_location (se_name) SELECT '%s' AS se_name FROM DUAL WHERE NOT EXISTS (SELECT se_name FROM dbsbuffer_location WHERE se_name = '%s')""" % (se['se_name'], se['se_name']) myThread.dbi.processData(insertQuery) updateQuery = """INSERT INTO dbsbuffer_file_location (filename, location) SELECT df.id, dl.id FROM dbsbuffer_file df, dbsbuffer_location dl WHERE df.lfn = '%s' AND dl.se_name = '%s'""" % (entry['lfn'], se['se_name']) myThread.dbi.processData(updateQuery) updateQuery = """UPDATE dbsbuffer_file SET status = 'NOTUPLOADED' WHERE lfn = '%s'""" % entry['lfn'] myThread.dbi.processData(updateQuery)
def main(): if len(sys.argv) != 2: print "Usage:" print "python CheckWorkQueueElements.py <workflowName>" sys.exit(0) workflow = sys.argv[1] x = Database('workqueue', 'https://cmsweb.cern.ch/couchdb') y = x.loadView('WorkQueue', 'elementsByParent', {'include_docs' : True}, [workflow]) for entry in y['rows']: doc = entry['doc'] element = doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] if element['Status'] != 'Done': print 'Element: %s is %s in %s' % (doc['_id'], element['Status'], element['ChildQueueUrl'])
def main(): sum = 0 x = Database('workqueue', 'http://vocms201.cern.ch:5984') y = x.loadView('WorkQueue', 'availableByPriority', {'include_docs' : True}) loadDistribution = {} for entry in y['rows']: doc = entry['doc'] element = doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] key = frozenset(element['SiteWhitelist']) if key not in loadDistribution: loadDistribution[key] = 0 loadDistribution[key] += element['Jobs'] for site, jobs in loadDistribution.items(): print "Site list %s has %d jobs" % (str(site), jobs)
def main(): db = Database('wmagent_jobdump/fwjrs', 'http://vocms237.cern.ch:5984') results = db.loadView('FWJRDump', 'fwjrsByWorkflowName', {'startkey': ['pdmvserv_TOP-Summer12pLHE-00001_3_v0_STEP0ATCERN_130728_164313_3585'], 'endkey' : ['pdmvserv_TOP-Summer12pLHE-00001_3_v0_STEP0ATCERN_130728_164313_3585', {}], 'include_docs' : True}) globalJobTime = 0.0 globalEvents = 0.0 globalCPUTime = 0.0 globalCPUEventTime = 0.0 count = 0 rows = results['rows'] for entry in rows: doc = entry['doc'] fwjr = doc['fwjr'] task = fwjr['task'] if task == '/pdmvserv_TOP-Summer12pLHE-00001_3_v0_STEP0ATCERN_130728_164313_3585/Production': steps = fwjr['steps'] breakLoop = False cmsRunStep = None for step in steps: if steps[step]['status'] != 0 and step != 'logArch1': breakLoop = True break if step == 'cmsRun1': cmsRunStep = steps[step] if breakLoop: continue count += 1 performance = cmsRunStep['performance'] totalJobTime = float(performance['cpu']['TotalJobTime']) globalJobTime += totalJobTime cpuTime = float(performance['cpu']['TotalJobCPU']) globalCPUTime += cpuTime cpuEventTime = float(performance['cpu']['TotalEventCPU']) globalCPUEventTime += cpuEventTime events = 10000 globalEvents += events timePerJob = globalJobTime/count if timePerJob > 3600: timePerJob = timePerJob/3600.0 print 'Average job duration: %.2f hours' % timePerJob else: print 'Average job duration: %.0f seconds' % timePerJob print 'Job time per event: %.2f seconds' % (globalJobTime/globalEvents) print 'Average job CPU time: %.0f seconds' % (globalCPUTime/count) print 'Average event CPU time: %.8f seconds' % (cpuEventTime/globalEvents) print 'Events processed: %d' % globalEvents print 'Jobs processed: %d' % count
def getLogArchForJob(jobId, workflow): # Connect to the Job and FWJR DBs jobDB = Database('wmagent_jobdump/jobs', 'http://dummy.cern.ch:5984') fwjrDB = Database('wmagent_jobdump/fwjrs', 'http://dummy.cern.ch:5984') # Get the logArchives for the job result = fwjrDB.loadView('FWJRDump', 'logArchivesByJobID', {'startkey' : [int(jobId)], 'endkey' : [int(jobId), {}]}) lastLogArch = sorted(result['rows'], key = lambda x: x['value']['retrycount'])[-1]['value']['lfn'] # Get the logCollect job for the logArch, if any logCollectTarball = '' result = jobDB.loadView('JobDump', 'jobsByInputLFN', {}, [[workflow, lastLogArch]]) if result['rows']: logCollectJobId = result['rows'][0]['id'] result = fwjrDB.loadView('FWJRDump', 'outputByJobID', {}, [int(logCollectJobId)]) if result['rows']: logCollectTarball = result['rows'][0]['value']['lfn'] else: print "WARNING: The logArchive for job %s was in a LogCollect job but not tarball was produced" % jobId # Print out the information print "The logArch for job %s is %s in %s" % (jobId, lastLogArch, logCollectTarball) return
def inject(clipboardUrl, clipboardDb, *requests): """ _inject_ """ couch = Database(clipboardDb, clipboardUrl) knownDocs = couch.loadView("OpsClipboard", "request_ids") knownReqs = [x[u"key"] for x in knownDocs["rows"]] for req in requests: if req[u"RequestName"] in knownReqs: continue doc = makeClipboardDoc(req) couch.queue(doc) couch.commit()
def retrieveResubmissionChildren(requestName, couchUrl, couchDBName): """ _retrieveResubmissionChildren_ Construct a list of request names which are the resubmission offspring from a request. This is a recursive call with a single requestName as input. The result only includes the children and not the original request. """ childrenRequestNames = [] reqmgrDb = Database(couchDBName, couchUrl) result = reqmgrDb.loadView('ReqMgr', 'childresubmissionrequests', keys = [requestName])['rows'] for child in result: childrenRequestNames.append(child['id']) childrenRequestNames.extend(retrieveResubmissionChildren(child['id'], couchUrl, couchDBName)) return childrenRequestNames
def inject(clipboardUrl, clipboardDb, *requests): """ Query CouchDB to check for overlap of therein already stored requests and requests in the input argument requests. Store in CouchDB the outstanding ones. """ couch = Database(clipboardDb, clipboardUrl) knownDocs = couch.loadView("OpsClipboard", "request_ids") knownReqs = [ x[u'key'] for x in knownDocs['rows']] for req in requests: if req[u'RequestName'] in knownReqs: continue doc = _makeClipboardDoc(req) couch.queue(doc) couch.commit()
def main(): if len(sys.argv) != 2: print "Usage:" print "python CheckWorkQueueElements.py <workflowName>" sys.exit(0) workflow = sys.argv[1] x = Database('workqueue', 'https://cmsweb.cern.ch/couchdb') y = x.loadView('WorkQueue', 'elementsByParent', {'include_docs': True}, [workflow]) for entry in y['rows']: doc = entry['doc'] element = doc[ 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] if element['Status'] != 'Done': print 'Element: %s is %s in %s' % (doc['_id'], element['Status'], element['ChildQueueUrl'])
def main(): requestName = sys.argv[1] x = Database('workqueue', 'https://cmsweb.cern.ch/couchdb') y = x.loadView('WorkQueue', 'elementsByParent', {'include_docs' : True}, [requestName]) runningElements = [] for entry in y['rows']: doc = entry['doc'] element = doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] if element['Status'] == 'Running': runningElements.append(doc) print "Found %d elements running, fix them?" % len(runningElements) inputData = raw_input("Type y/n: ") if inputData != "y": print "Aborting operation..." return 0 for doc in runningElements: doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement']['Status'] = 'Done' x.queue(doc) x.commit() print "Operation complete!" return 0
def main(): print "A" db = Database('wmagent_jobdump/jobs', 'http://vocms202.cern.ch:5984') results = db.loadView('JobDump', 'jobsByWorkflowName', {'startkey': ['pdmvserv_PixelRecover53_537p4_130116_130722_4919'], 'endkey' : ['pdmvserv_PixelRecover53_537p4_130116_130722_4919', {}], 'include_docs' : True}) rows = results['rows'] fileInfo = {} for entry in rows: doc = entry['doc'] jobType = doc['jobType'] if jobType != 'Processing': continue mask = doc['mask'] inputFiles = doc['inputfiles'] rAndl = mask['runAndLumis'] for file in inputFiles: lfn = file['lfn'] if lfn not in fileInfo: fileInfo[lfn] = {} for run in file['runs']: runNumber = str(run['run_number']) if runNumber not in rAndl: continue lumis = run['lumis'] for lumi in lumis: if not lumiInMask(rAndl[runNumber], lumi): continue if runNumber not in fileInfo[lfn]: fileInfo[lfn][runNumber] = {} if lumi in fileInfo[lfn][runNumber]: print "ALERT: Lumi %s from run %s is processed twice for file %s" % (lumi, runNumber, lfn) fileInfo[lfn][runNumber][lumi].append(entry['id']) print "Jobs processing it so far: %s" % str(fileInfo[lfn][runNumber][lumi]) else: fileInfo[lfn][runNumber][lumi] = [entry['id']]
class database: logger = logfactory class DatabaseNotFoundException(Exception): def __init__(self, db=''): self.db = str(db) database.logger.error('Database "%s" was not found.' % (self.db), level='critical') def __str__(self): return 'Error: Database ', self.db, ' was not found.' class DatabaseAccessError(Exception): def __init__(self, db=''): self.db = str(db) database.logger.error('Could not access database "%s".' % (self.db), level='critical') def __str__(self): return 'Error: Could not access database ', self.db class DocumentNotFoundException(Exception): def __init__(self, name=''): self.name = name database.logger.error('Document "%s" was not found.' % (self.name)) def __str__(self): return 'Error: Document ', self.name, ' was not found.' class MapReduceSyntaxError(Exception): def __init__(self, query=''): self.query = query database.logger.error('Invalid query <%s>' % (self.query)) def __str__(self): return 'Error: Invalid query "' + self.query + '"' class InvalidOperatorError(Exception): def __init__(self, op=''): self.op = str(op) def __str__(self): return 'Error: Operator "' + self.op + '" is invalid.' class InvalidParameterError(Exception): def __init__(self, param=''): self.param = str(param) def __str__(self): return 'Error: Invalid Parameter: ' + self.param cache_dictionary = defaultdict(lambda: None) def __init__(self, db_name='',url=None, cache=False): host = os.environ['HOSTNAME'] if url == None: url =locator().dbLocation() #self.logger.log('I chose the url %s'%(url)) if not db_name: raise self.DatabaseNotFoundException(db_name) self.db_name = db_name self.cache = cache if self.db_name in ['campaigns','chained_campaigns']: ## force cache for those. self.cache=True try: self.db = Database(db_name, url=url) # self.db = Database(db_name, url='http://preptest.cern.ch:5984/') # self.db = Database(db_name) # for using private DB @localhost:5984 except ValueError as ex: raise self.DatabaseAccessError(db_name) self.allowed_operators = ['<=', '<', '>=', '>', '==', '~='] def __is_number(self, s): try: float(s) return True except ValueError: return False def get(self, prepid=''): if self.cache: result = self.__get_from_cache(prepid) if result: return result self.logger.log('Looking for document "%s" in "%s"...' % (prepid,self.db_name)) try: doc = self.db.document(id=prepid) if self.cache: self.__save_to_cache( prepid, doc) return doc except Exception as ex: self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex)) return {} def __save_to_cache(self, key, value): from tools.locker import locker with locker.lock(key): self.cache_dictionary[key]=value def __get_from_cache(self, key): from tools.locker import locker with locker.lock(key): return self.cache_dictionary[key] def __document_exists(self, doc): if not doc: self.logger.error('Trying to locate empty string.', level='warning') return False id = '' if 'prepid' not in doc: if '_id' not in doc: self.logger.error('Document does not have an "_id" parameter.', level='critical') return False id = doc['_id'] elif '_id' not in doc: if 'prepid' not in doc: self.logger.error('Document does not have an "_id" parameter.', level='critical') return False id = doc['prepid'] id = doc['_id'] return self.__id_exists(prepid=id) def document_exists(self, prepid=''): self.logger.log('Checking existence of document "%s" in "%s"...' % (prepid,self.db_name)) return self.__id_exists(prepid) def __id_exists(self, prepid=''): try: if self.cache and self.__get_from_cache(prepid) or self.db.documentExists(id=prepid): return True self.logger.error('Document "%s" does not exist.' % (prepid)) return False except CouchError as ex: self.logger.error('Document "%s" was not found on CouchError Reason: %s trying a second time with a time out' % (prepid, ex)) time.sleep(0.5) return self.__id_exists(prepid) except Exception as ex: self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex)) return False def delete(self, prepid=''): if not prepid: return False if not self.__id_exists(prepid): return False self.logger.log('Trying to delete document "%s"...' % (prepid)) try: self.db.delete_doc(id=prepid) if self.cache: self.__save_to_cache(prepid, None) return True except Exception as ex: self.logger.error('Could not delete document: %s . Reason: %s ' % (prepid, ex)) return False def update(self, doc={}): if '_id' in doc: self.logger.log('Updating document "%s" in "%s"' % (doc['_id'],self.db_name)) if self.__document_exists(doc): if self.cache: ##JR the revision in the cache is not the one in the DB at this point # will be retaken at next get self.__save_to_cache(doc['_id'], None) return self.save(doc) self.logger.error('Failed to update document: %s' % (json.dumps(doc))) return False def update_all(self, docs=[]): if not docs: return False for doc in docs: if self.__document_exists(doc): self.db.queue(doc) try: self.db.commit() return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def get_all(self, page_num=-1): try: limit, skip = self.__pagify(page_num) if limit >= 0 and skip >= 0: result = self.db.loadView(self.db_name, "all", options={'limit':limit,'skip':skip, 'include_docs':True})['rows'] res = map(lambda r : r['doc'], result) return res result = self.db.loadView(self.db_name, "all",options={'include_docs':True})['rows'] res = map(lambda r : r['doc'], result) return res except Exception as ex: self.logger.error('Could not access view. Reason: %s' % (ex)) return [] def query(self, query='', page_num=0): if not query: result = self.get_all(page_num) #res = map(lambda r : r['doc'], result) return result try: result = self.__query(query, page=page_num) #res = map(lambda r : r['doc'], result) return result except Exception as ex: self.logger.error('Could not load view for query: <%s> . Reason: %s' % (query, ex)) return [] def unique_res(self,query_result): docids = map(lambda doc : doc['_id'] , query_result) docids_s = list(set(docids)) if len(docids) != len(docids_s): docids_s = [] return_dict= copy.deepcopy( query_result ) for doc in query_result: if not doc['_id'] in docids_s: docids_s.append(doc['_id']) else: return_dict.remove(doc) return return_dict return query_result def queries( self, query_list): ##page_nume does not matter if not len(query_list): return self.get_all(page_num=-1) try: results_list=[] ##make each query separately and retrieve only the doc with counting == len(query_list) for (i,query_item) in enumerate(query_list): res = self.query(query_item, page_num=-1) query_result = self.unique_res( res ) if i!=0: ## get only the one already in the intersection id_list = map(lambda doc : doc['_id'], results_list) results_list = filter(lambda doc : doc['_id'] in id_list, query_result) else: results_list= query_result return results_list except Exception as ex: self.logger.error('Could not load view for queris: <%s> . Reason: %s' % ('<br>'.join(query_list), ex)) return [] def __extract_operators(self, query=''): if not query: self.logger.error('Empty query', level='warning') return () clean = [] tokens = [] for op in self.allowed_operators: if op in query: tokens = query.rsplit(op) tokens.insert(1, op) else: continue for tok in tokens: if len(tok) < 1: continue clean.append(tok.strip().strip('"')) if len(clean) != 3: raise self.MapReduceSyntaxError(query) #if clean[0] not in self.request and clean[1] not in self.campaign: # raise self.IllegalParameterError(clean[0]) return clean raise self.MapReduceSyntaxError(query) def __pagify(self, page_num=0, limit=20): if page_num < 0: return -1,0 skip = limit*page_num return limit, skip def __execute_query(self, tokenized_query='', page=-1, limit=20): tokens = [] try: tokens = self.__extract_operators(tokenized_query) except Exception as ex: self.logger.error('Could not parse query. Reason: %s' % (ex)) return [] if tokens: view_name, view_opts = self.__build_query(tokens) if not view_name or not view_opts: return [] if page > -1: view_opts['limit']=limit view_opts['skip']=page*limit view_opts['include_docs']=True result = self.db.loadView(self.db_name, view_name, options=view_opts)['rows'] res = map(lambda r : r['doc'], result) return res else: return [] def raw_query(self, view_name, options={}): self.logger.error('Executing raw query to the database. Accessed view: %s' % (view_name), level='warning') return self.db.loadView(self.db_name, view_name, options)['rows'] def __get_op(self, oper): if oper == '>': return lambda x,y: x > y elif oper == '>=': return lambda x,y: x >= y elif oper == '<': return lambda x,y: x < y elif oper == '<=': return lambda x,y: x <= y elif oper == '==': return lambda x,y: x == y else: return None def __filter(self, tokenized_query=[], view_results=[]): if len(tokenized_query) != 3: return view_results prn = tokenized_query[0] op = tokenized_query[1] if self.__is_number(tokenized_query[2]): val = float(tokenized_query[2]) else: val = tokenized_query[2] f = self.__get_op(op) return filter(lambda x: f(x[prn],val), view_results) def __query(self, query='', page=0, limit=20): t_par = [] results = [] #what is that , split for ??? #if ',' in query: # t_par = query.rsplit(',') if not t_par: t_par = [query] if len(t_par) == 1: return self.__execute_query(t_par[0], page, limit)#[page*limit:page*limit+limit] elif len(t_par) == 0: return [] #temp = self.__execute_query(t_par[0])#[page*limit:page*limit+limit] res = self.__execute_query(t_par[0]) #res = map(lambda x: x['value'], temp) if len(res) == 0: return [] for i in range(1,len(t_par)): tq = self.__extract_operators(t_par[i]) res = self.__filter(tq, res) #return map(lambda x: {'value':x},res[page*limit:page*limit+20]) return res[page*limit:page*limit+20] def __build_query(self,tokens=[]): if not tokens: return None,None if len(tokens) != 3: raise self.MapReduceSyntaxError(tokens) param = tokens[0] op = tokens[1] kval = tokens[2] try: view_opts = self.__build_options(op, kval) except Exception as ex: self.logger.error('Value types are not compatible with operator %s value %s Error: %s' % (op, kval, str(ex))) return None,None return param, view_opts def __build_options(self,op, val): def is_number(s): try: float(s) return True except ValueError: return False # options dictionary opts = {} # default the composite key search #if '[' in val and ']' in val: if val.startswith('[') and val.endswith(']'): if op == '==': try: e=ast.literal_eval(val) opts['key'] = e except: opts['key'] = val return opts # handle alphanumeric key ranges num_flag = False if is_number(val): num_flag = True kval = float(val) else: kval = val.decode('ascii') if '>' in op: if '=' in op: opts['startkey']=kval else: if num_flag: opts['startkey']=kval+1 else: opts['startkey']=kval if num_flag: opts['endkey']=99999999 # assume its numeric else: opts['endkey']=kval+u'\u9999' elif '<' in op: if '=' in op: opts['endkey']=kval else: if num_flag: opts['endkey']=kval-1 else: opts['endkey']=kval if num_flag: opts['startkey']=-99999999 else: opts['startkey']='' elif '==' == op: opts['key']=kval elif '~=' == op: if kval[-1] == '*': opts['startkey']=kval[:len(kval)-1] opts['endkey']=kval[:len(kval)-1]+u'\u9999'#'99999999'#'\u9999' return opts def save_all(self, docs=[]): if not docs: return False for doc in docs: self.db.queue(doc) try: self.db.commit() return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def save(self, doc={}): if not doc: self.logger.error('Tried to save empty document.', level='warning') return False # TODO: Check if an object exists in the database and fail. #if '_id' in doc: # self.logger.log('Using user-defined id: %s' % (doc['_id'])) #if self.__document_exists(doc): # self.logger.error('Failed to update document: %s' % (json.dumps(doc))) # return False try: #self.logger.error('Document is %s %s'%(doc['_id'],doc)) #self.logger.error(self.db.commitOne(doc)) ## this is a change I just made (23/05/2013 13:31) because of the return value of update should be True/False saved = self.db.commitOne(doc) if 'error' in saved[0]: self.logger.error('Commit One says : %s'%(saved)) return False else: return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def count(self): try: return len(self.db.allDocs()) except Exception as ex: self.logger.error('Could not count documents in database. Reason: %s' % (ex)) return -1
def main(): """ _main_ """ usage = "Usage: python %prog -w workflow" parser = OptionParser(usage=usage) parser.add_option('-w', '--workflow', help='Workflow name in ReqMgr', dest='wf') parser.add_option('-v', '--verbose', help='Enable verbose mode', action="store_true") (options, args) = parser.parse_args() if not options.wf: parser.error('You must provide a workflow name') sys.exit(1) couchUrl = "https://cmsweb.cern.ch/couchdb" database = "acdcserver" failures = {} svc = Database(database, couchUrl) result = svc.loadView("ACDC", "byCollectionName", { 'key': options.wf, 'include_docs': True, 'reduce': False }) print("Found %i failures/rows in total." % len(result["rows"])) for entry in result["rows"]: fsetName = entry['doc']['fileset_name'] failures.setdefault( fsetName, { 'jobs': 0, 'files': 0, 'lumis': 0, 'uniqueLumis': 0, 'listLumis': set() }) failures[fsetName]['jobs'] += 1 failures[fsetName]['files'] += len(entry['doc']['files']) for fname in entry['doc']['files']: for runLumi in entry['doc']['files'][fname]['runs']: failures[fsetName]['lumis'] += len(runLumi['lumis']) failures[fsetName]['listLumis'] |= set(runLumi['lumis']) for fsetName in failures: failures[fsetName]['uniqueLumis'] = len( failures[fsetName]['listLumis']) failures[fsetName]['listLumis'] = str(failures[fsetName]['listLumis']) if not options.verbose: for fset in failures.keys(): failures[fset].pop('listLumis', None) print("Summary of failures is as follows:") pprint(failures) print("\nNow printing duplicate files + run + lumis per fileset") printDups(result["rows"]) print("\nDone!")
def swapLocations(options): #Initialize stuff phedexAPI = PhEDEx({'cachepath' : options.cachepath}) acdcCouch = Database('wmagent_acdc', options.acdcUrl) #Let's get the IDs of the ACDC documents for the task/request/group/user array = [options.group, options.user, options.request, options.task] result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array]) documentsIDs = [x['id'] for x in result['rows']] #Load the map file saying what we want to change of location mapFile = open(options.map, 'r') locationMap = json.load(mapFile) mapFile.close() #Go through the documents for docID in documentsIDs: doc = acdcCouch.document(docID) #Are we going to change this doc? Better back it up if options.change: backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w') json.dump(doc, backupFile) backupFile.close() #Go through the files files = doc["files"] for inputFile in files: #Use PhEDEx API to get site based on the SE #Then map that to the desired target se = files[inputFile]["locations"][0] siteLocation = phedexAPI.getBestNodeName(se) targetLocation = locationMap.get(siteLocation, siteLocation) if siteLocation == targetLocation: #Nothing to do with this one, move on continue if not options.change: #No changes, then give the commands to move the files #Get the PFN for both the current location and the target location pfnDict = phedexAPI.getPFN(siteLocation, inputFile) inputPfn = pfnDict[(siteLocation, inputFile)] pfnDict = phedexAPI.getPFN(targetLocation, inputFile) targetPfn = pfnDict[(targetLocation, inputFile)] #Print it to stdout print "lcg-cp -D srmv2 -b %s %s" % (inputPfn, targetPfn) else: #This is changes time, let's move the stuff targetSE = phedexAPI.getNodeSE(targetLocation) files[inputFile]["locations"][0] = targetSE print "Changing location of %s from %s to %s" % (inputFile, se, targetSE) #If specified, commit the changes if options.change: acdcCouch.commitOne(doc) return 0
def main(): parser = OptionParser() parser.add_option("-f", "--input-acdc", dest="acdcList") parser.add_option("-m", "--input-mapfile", dest="mapFile") parser.add_option("-u", "--url", dest="url") parser.add_option("-d", "--dry-run", dest="dryRun", action="store_true", default=False) parser.add_option("-l", "--log-file", dest="logFile") (options, _) = parser.parse_args() handle = open(options.logFile, 'w') url = options.url database = 'wmagent_acdc' acdcDB = Database(database, url) handle.write('Opening ACDC database in %s/%s\n' % (url, database)) inputACDC = readACDCInput(options.acdcList) usersMap = readUsersMap(options.mapFile) handle.write('Have %d workflows to fix\n' % len(inputACDC)) handle.write('=================================================================\n') for workflow in inputACDC: collection_name = workflow['collection_name'] fileset_name = workflow['fileset_name'] original_dn = workflow['original_dn'] handle.write('Original workflow: %s\n' % collection_name) handle.write('Original task: %s\n' % fileset_name) handle.write('Original owner DN: %s\n' % original_dn) if original_dn in usersMap: handle.write('This DN maps to %s-%s\n' % (usersMap[original_dn][1], usersMap[original_dn][0])) else: handle.write('The original DN can not be found in the map file, skipping the workflow\n') continue params = {'reduce' : False, 'key' : [usersMap[original_dn][1], usersMap[original_dn][0], collection_name, fileset_name]} result = acdcDB.loadView('ACDC', 'owner_coll_fileset_docs', params) rows = result['rows'] docIds = map(lambda x : x['id'], rows) handle.write('Found %d documents to change\n' % len(rows)) handle.write('Changing from %s-%s to %s-%s\n' % (usersMap[original_dn][1], usersMap[original_dn][0], workflow['group'], workflow['owner'])) for docId in docIds: doc = acdcDB.document(docId) doc['owner'] = {'group' : workflow['group'], 'user' : workflow['owner']} if not options.dryRun: acdcDB.queue(doc) if not options.dryRun: response = acdcDB.commit() else: response = 'This is a dry-run no changes were made' handle.write('Response to write operation: %s\n'% str(response)) handle.write('Response length: %d\n' % len(response)) handle.write('=================================================================\n') handle.write('Finished script') handle.close()
class OpsClipboardTest(unittest.TestCase): def setUp(self): # For experiments with CouchDB content it's useful when the docs # remain the the database by commenting out tearDownCouch statement. # If the database exists at this point, tearDownCouch was probably # commented out, so do not drop the database #self.testInit = TestInitCouchApp(__file__, dropExistingDb=False) self.testInit = TestInitCouchApp(__file__, dropExistingDb=True) self.testInit.setLogging() self.testInit.setDatabaseConnection() dbName = "opsclipboard_t" self.testInit.setupCouch(dbName, "OpsClipboard") # the tests uses httplib2 for accessing the OpsClipboard couchapp to # emulate web browser access rather than direct REST access # couch attribute is only used for back verification of written/modified data #couchServer = CouchServer(os.environ["COUCHURL"]) #self.configDatabase = couchServer.connectDatabase(dbName) # used to verify written / modified data in CouchDB self.couch = Database(dbName, self.testInit.couchUrl) def tearDown(self): # comment out to see stuff remaining in the database self.testInit.tearDownCouch() # self.testInit.couch gets None-ed here #pass def _inject(self, numRequests): # This creates 10 documents using the test data above representing # 10 requests belonging to two campaigns that have just been placed # into the "ops-hold" into the ReqMgr. # Whenever a request enters the "ops-hold" state, the dict containing the # request params should be added to the OpsClipboard using the # inject API call (see Assign.py): requests, campaignIds, requestIds = getTestRequests(numRequests) OpsClipboard.inject(self.testInit.couchUrl, self.testInit.couchDbName, *requests) return requests, campaignIds, requestIds def _getViewResults(self, viewName, options = {}): """ Query CouchDB viewName, return rows. """ try: result = self.couch.loadView("OpsClipboard", viewName, options) except Exception as ex: msg = "Error loading OpsClipboard view: '%s', reason:%s\n" % (viewName, ex) self.fail(msg) return result[u"rows"] def testA_view_all(self): """ Testing the 'all' view. """ self._inject(10) # creates 10 documents # Now read back data for the test requests and verify # that we have 10 requests in the OpsClipboard # all view returns all requests in the OpsClipboard allRequests = self._getViewResults("all") self.assertEqual(len(allRequests), 10) # expected 10 requests for request in allRequests: self.failUnless(request[u"key"] == u"NewlyHeld") def testB_view_campaign(self): """ Testing the 'campaign' view. Returns requests with campaign_id as keys. """ _, campaignIds, requestIds = self._inject(7) # creates x docs/requests campView = self._getViewResults("campaign") self.assertEqual(len(campView), 7) for c in campView: self.failUnless(c[u"key"] in campaignIds) self.failUnless(c[u"value"][u"request_id"] in requestIds) # check that result ('value') dictionary has all these keys map(self.failUnless, [c[u"value"].has_key(key) for key in ("doc_id", "state", "updated")]) def testC_view_campaign_ids(self): """ Testing the 'campaign_ids' view. Returns a list of campaign names (campaign_ids) with duplicates removed. """ _, campaignIds, _ = self._inject(8) # creates x docs/requests campList = self._getViewResults("campaign_ids", options = {"group": True}) expected = [campList[0]["key"], campList[1]["key"]] self.assertEqual(expected, campaignIds) def testD_view_reject_update_changestate(self): """ Testing the 'reject' view. Calls changeState function which also tests 'changestate' update (CouchDB) function. Returns a list of requests in the 'ReadyToReject' state. """ numRequests = 8 self._inject(numRequests) # creates x docs/requests # all currently injected requests are in the # "NewlyHeld" state, none in the "ReadyToReject" state rejectList = self._getViewResults("reject") self.assertEqual(len(rejectList), 0) # change state, need to get docIds from CouchDB first allList = self._getViewResults("all") for allItem in allList: docId = allItem[u"id"] try: changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, "ReadyToReject") except Exception as ex: self.fail(ex) rejectList = self._getViewResults("reject") self.assertEqual(len(rejectList), numRequests) def testE_view_release_update_changestate(self): """ Testing the 'release' view. Calls changeState function which also tests 'changestate' update (CouchDB) function. Returns a list of requests in the 'ReadyToRelease' state. """ numRequests = 18 self._inject(numRequests) # creates x docs/requests # all currently injected requests are in the # "NewlyHeld" state, none in the "ReadyToRelease" state rejectList = self._getViewResults("release") self.assertEqual(len(rejectList), 0) # change state, need to get docIds from CouchDB first allList = self._getViewResults("all") for allItem in allList: docId = allItem[u"id"] try: changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, "ReadyToRelease") except Exception as ex: self.fail(ex) rejectList = self._getViewResults("release") self.assertEqual(len(rejectList), numRequests) def testF_view_request(self): """ Testing the 'request' view. This view allows for look up of some request details by id. """ _, _, requestIds = self._inject(15) # creates x docs/requests requestView = self._getViewResults("request") self.assertEqual(len(requestView), 15) for reqView in requestView: self.failUnless(reqView[u"key"] in requestIds) self.failUnless(reqView[u"value"][u"state"] == u"NewlyHeld") def testG_view_request_id(self): """ Testing the 'request_ids' view. 'request_ids' maps couch docs to request ids. """ self._inject(11) # creates x docs/requests viewResult = self._getViewResults("request_ids") requestIds = [ x[u"key"] for x in viewResult ] self.assertEqual(len(requestIds), 11) def testH_view_expunge(self): """ Testing the 'expunge' view. """ self._inject(4) # creates x docs/requests requestView = self._getViewResults("all") # no "ReadyToReject" or "ReadyToReject" request, everything is in "NewlyHeld" self.assertEqual(len(requestView), 4) c = 0 for req in requestView: docId = req[u"value"] try: state = "ReadyToReject" if c % 2 == 0 else "ReadyToReject" changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, state) except Exception as ex: self.fail(ex) c += 1 expungeView = self._getViewResults("expunge") self.assertEqual(len(expungeView), 4) for req in expungeView: self.assertTrue(req[u"key"] in ("ReadyToReject", "ReadyToReject")) def testI_requestStructure(self): """ Pull documents for each request and check structure. """ _, campaignIds, requestIds = self._inject(20) # creates x documents / requests allRequests = self._getViewResults("all") for req in allRequests: docId = req[u"id"] state = req[u"key"] # all requests should be NewlyHeld state self.assertEqual(state, "NewlyHeld") # check that the doc is well formed and matches the data we inserted doc = self.couch.document(docId) self.failUnless(doc[u"state"] == "NewlyHeld") self.failUnless(doc.has_key(u"created")) self.failUnless(doc.has_key(u"timestamp")) # description is a list of dictionaries, the first one is the initial message self.failUnless("Initial injection by the RequestManager" in doc[u"description"][0].values()) self.failUnless(doc[u"request"][u"campaign_id"] in campaignIds) self.failUnless(doc[u'request'][u'request_id'] in requestIds) def testJ_update_adddescription(self): """ Create a document and update function 'adddescription' handler to add descriptions (Ops notes) to request documents. """ request = {"RequestName" : "testB_request", "CampaignName" : "testB_campaign"} OpsClipboard.inject(self.testInit.couchUrl, self.testInit.couchDbName, *[request]) allRequests = self._getViewResults("all") self.assertEqual(len(allRequests), 1) # check only one request docId = allRequests[0][u"id"] # update the doc descriptions addDescription(self.testInit.couchUrl, self.testInit.couchDbName, docId, "NewDescription") doc = self.couch.document(docId) descriptions = doc["description"] # description entry is a list of dictionaries, each newly created request # has first initial description, just above added was the second one, index 1 self.failUnless("NewDescription" in doc[u"description"][1].values())