def getFileInformation(workflow, lfn, outModule): # Connect to the FWJR DB fwjrDB = Database('wmagent_jobdump/fwjrs', 'http://dummy.cern.ch:5984') result = fwjrDB.loadView('FWJRDump', 'jobsByOutputLFN', {'include_docs' : True}, [[workflow, lfn]]) if result['rows']: fwjrDoc = result['rows'][0]['doc'] fwjrInfo = fwjrDoc['fwjr'] for step in fwjrInfo['steps']: if step == 'cmsRun1': if outModule not in fwjrInfo['steps'][step]['output']: print "WARNING: No output module %s in this job" % outModule return outModuleInfo = fwjrInfo['steps'][step]['output'][outModule] for fileInfo in outModuleInfo: if fileInfo['lfn'] == lfn: print "File information, %s" % fileInfo['lfn'] print "Run/Lumis:" for run in fileInfo['runs']: print 'Run: %s, Lumi range: %s-%s' % (run, fileInfo['runs'][run][0], fileInfo['runs'][run][1]) print "Number of Events: %s" % fileInfo['events'] print "Filesize (bytes): %.1f" % (float(fileInfo['size'])) print "Adler32 Checksum: %s" % fileInfo['checksums']['adler32'] else: print "WARNING: No file info in CouchDB" return
def testSlashInDBName(self): """ Slashes are a valid character in a database name, and are useful as it creates a directory strucutre for the couch data files. """ db_name = 'wmcore/unittests' try: self.server.deleteDatabase(db_name) except: # Ignore this - the database shouldn't already exist pass db = self.server.createDatabase(db_name) info = db.info() assert info['db_name'] == db_name db_name = 'wmcore/unittests' db = self.server.connectDatabase(db_name) info = db.info() assert info['db_name'] == db_name db = Database(db_name, url=os.environ["COUCHURL"]) info = db.info() assert info['db_name'] == db_name self.server.deleteDatabase(db_name)
def requestDetails(requestName): """ Adds details from the Couch document as well as the database """ WMCore.Lexicon.identifier(requestName) request = GetRequest.getRequestDetails(requestName) helper = loadWorkload(request) schema = helper.data.request.schema.dictionary_whole_tree_() # take the stuff from the DB preferentially schema.update(request) task = helper.getTopLevelTask()[0] schema['Site Whitelist'] = task.siteWhitelist() schema['Site Blacklist'] = task.siteBlacklist() schema['MergedLFNBase'] = str(helper.getMergedLFNBase()) schema['UnmergedLFNBase'] = str(helper.getUnmergedLFNBase()) schema['Campaign'] = str(helper.getCampaign()) schema['AcquisitionEra'] = str(helper.getAcquisitionEra()) if schema['SoftwareVersions'] == ['DEPRECATED']: schema['SoftwareVersions'] = helper.getCMSSWVersions() # Check in the CouchWorkloadDBName if not present schema.setdefault("CouchWorkloadDBName", "reqmgr_workload_cache") # get DbsUrl from CouchDB if schema.get("CouchWorkloadDBName", None) and schema.get("CouchURL", None): couchDb = Database(schema["CouchWorkloadDBName"], schema["CouchURL"]) couchReq = couchDb.document(requestName) schema["DbsUrl"] = couchReq.get("DbsUrl", None) # https://github.com/dmwm/WMCore/issues/4588 schema["SubscriptionInformation"] = helper.getSubscriptionInformation() return schema
def changePriority(requestName, priority, wmstatUrl = None): """ Changes the priority that's stored in the workload. Takes the current priority stored in the workload and adds to it the input priority value. """ request = requestDetails(requestName) # change in Oracle newPrior = int(priority) ChangeState.changeRequestPriority(requestName, newPrior) # change in workload (spec) helper = loadWorkload(request) helper.data.request.priority = newPrior saveWorkload(helper, request['RequestWorkflow'], wmstatUrl) # change priority in CouchDB couchDb = Database(request["CouchWorkloadDBName"], request["CouchURL"]) fields = {"RequestPriority": newPrior} couchDb.updateDocument(requestName, "ReqMgr", "updaterequest", fields=fields) # push the change to the WorkQueue response = ProdManagement.getProdMgr(requestName) if response == [] or response[0] is None or response[0] == "": # Request must not be assigned yet, we are safe here return workqueue = WorkQueue.WorkQueue(response[0]) workqueue.updatePriority(requestName, priority) return
def main(): """ _main_ """ usage = "Usage: python %prog -w workflow" parser = OptionParser(usage = usage) parser.add_option('-w', '--workflow', help = 'Workflow name in ReqMgr', dest = 'wf') (options, args) = parser.parse_args() if not options.wf: parser.error('You must provide a workflow name') sys.exit(1) couchUrl = "https://cmsweb.cern.ch/couchdb" database = "acdcserver" failures = {} svc = Database(database, couchUrl) result = svc.loadView("ACDC", "byCollectionName", {'key' : options.wf, 'include_docs' : True, 'reduce' : False}) print "Found %i failures/rows in total." % len(result["rows"]) for entry in result["rows"]: if entry['doc']['fileset_name'] in failures: failures[entry['doc']['fileset_name']] += 1 else: failures[entry['doc']['fileset_name']] = 1 pprint(failures) print "\nDone!"
def testSlashInDBName(self): """ Slashes are a valid character in a database name, and are useful as it creates a directory strucutre for the couch data files. """ db_name = 'wmcore/unittests' try: self.server.deleteDatabase(db_name) except: # Ignore this - the database shouldn't already exist pass db = self.server.createDatabase(db_name) info = db.info() assert info['db_name'] == db_name db_name = 'wmcore/unittests' db = self.server.connectDatabase(db_name) info = db.info() assert info['db_name'] == db_name db = Database(db_name, url = os.environ["COUCHURL"]) info = db.info() assert info['db_name'] == db_name self.server.deleteDatabase(db_name)
def main(): print "Looking for problematic inbox elements..." problemRequests = getProblematicRequests() print "Found %d bad elements:" % len(problemRequests) if not problemRequests: print "Nothing to fix, contact a developer if the problem persists..." return 0 for request in problemRequests: print request["RequestName"] var = raw_input("Can we close these for new data in inbox elements: Y/N\n") if var == "Y": print "Updating them in global inbox, you need a WMAgent proxy for this." inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb') for request in problemRequests: inboxDB.document(request._id) inboxDB.updateDocument(request._id, 'WorkQueue', 'in-place', fields={'OpenForNewData': false}) print "Done with the deletions, this should fix the problem." return 0 else: var = raw_input("Then can we delete these inbox elements: Y/N\n") if var == "Y": print "Deleting them from the global inbox, you need a WMAgent proxy for this." inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb') for request in problemRequests: inboxDB.delete_doc(request._id, request.rev) print "Done with the deletions, this should fix the problem." return 0 else: print "Doing nothing as you commanded..." return 0
def main(): """ _main_ """ usage = "Usage: python %prog -w workflow" parser = OptionParser(usage=usage) parser.add_option('-w', '--workflow', help='Workflow name in ReqMgr', dest='wf') (options, args) = parser.parse_args() if not options.wf: parser.error('You must provide a workflow name') sys.exit(1) couchUrl = "https://cmsweb.cern.ch/couchdb" database = "acdcserver" failures = {} svc = Database(database, couchUrl) result = svc.loadView("ACDC", "byCollectionName", { 'key': options.wf, 'include_docs': True, 'reduce': False }) print "Found %i failures/rows in total." % len(result["rows"]) for entry in result["rows"]: if entry['doc']['fileset_name'] in failures: failures[entry['doc']['fileset_name']] += 1 else: failures[entry['doc']['fileset_name']] = 1 pprint(failures) print "\nDone!"
def checkWorkQueue(requestName): result = {'ActiveAgents' : {}, 'ElementsRunning' : 0, 'ElementsAcquired' : 0, 'ElementsAvailable' : 0, 'ElementsDone' : 0} x = Database('workqueue', 'https://cmsweb.cern.ch/couchdb') y = x.loadView('WorkQueue', 'elementsByParent', {'include_docs' : True}, [requestName]) for entry in y['rows']: doc = entry['doc'] element = doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] status = element['Status'] if status == 'Running': result['ElementsRunning'] += 1 elif status == 'Acquired': result['ElementsAcquired'] += 1 elif status == 'Available': result['ElementsAvailable'] += 1 elif status == 'Done': result['ElementsDone'] += 1 if status not in ['Done', 'Available']: agent = element['ChildQueueUrl'] if agent not in result['ActiveAgents']: result['ActiveAgents'][agent] = 0 result['ActiveAgents'][agent] += 1 return result
def __init__(self, config): # configuration values: # 'uri' attribute (URL of the REST server and resource name) # in case of CouchDB, the resource name is the database name # http://servername:port/databaseName self.config = config logging.info("Instantiating ...") # the class currently relies only on 1 REST server possibility - the # CouchDB server. as explained above, .database will be replaced by # .connection if both a generic REST server as well as CouchDB are to # be talked to split = self.config.uri.rfind('/') dbName = self.config.uri[split + 1:] # get last item of URI - database name url = self.config.uri[:split] # as opposed to CouchSink, here it's assumed the resource (the database name) # does exist, fail here otherwise # this check / rest of the constructed may be revised for # general REST server server = CouchServer(url) databases = server.listDatabases() # there needs to be this database created upfront and also # couchapp associated with it installed, if it's there, fail if dbName not in databases: raise Exception("REST URI: %s (DB name: %s) does not exist." % (self.config.uri, dbName)) self._database = Database(dbName, url) logging.info("Initialized.")
def changePriority(requestName, priority, wmstatUrl=None): """ Changes the priority that's stored in the workload. Takes the current priority stored in the workload and adds to it the input priority value. """ request = requestDetails(requestName) # change in Oracle newPrior = int(priority) ChangeState.changeRequestPriority(requestName, newPrior) # change in workload (spec) helper = loadWorkload(request) helper.data.request.priority = newPrior saveWorkload(helper, request['RequestWorkflow'], wmstatUrl) # change priority in CouchDB couchDb = Database(request["CouchWorkloadDBName"], request["CouchURL"]) fields = {"RequestPriority": newPrior} couchDb.updateDocument(requestName, "ReqMgr", "updaterequest", fields=fields, useBody=True) # push the change to the WorkQueue gqURL = "%s/workqueue" % request["CouchURL"] workqueue = WorkQueue.WorkQueue(gqURL) workqueue.updatePriority(requestName, priority) return
def main(): config = loadConfigurationFile(os.environ['WMAGENT_CONFIG']) config.CoreDatabase.dialect = 'oracle' init = WMInit() init.setDatabaseConnection(config.CoreDatabase.connectUrl, config.CoreDatabase.dialect) couchDB = Database('wmagent_jobdump/fwjrs', '') couchDB2 = Database('wmagent_jobdump/jobs', '') myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) getJobsDAO = daofactory(classname = "Jobs.GetAllJobs") completedJobs = getJobsDAO.execute(state = 'complete') candidates = [] while len(completedJobs): candidates = [] chunk = completedJobs[:500] completedJobs = completedJobs[500:] result = couchDB.loadView('FWJRDump', 'outputByJobID', keys = chunk) rows = result['rows'] for entry in rows: candidates.append(entry['key']) for jobId in candidates: doc = couchDB2.document(str(jobId)) last = max(map(int, doc['states'].keys())) lastState = doc['states'][str(last)]['newstate'] if lastState == 'success': print jobId
def setUp(self): self.testInit = TestInitCouchApp("ReproducibleSeedingTest") self.testInit.setupCouch("seeding_config_cache", "GroupUser", "ConfigCache") self.database = Database(self.testInit.couchDbName, self.testInit.couchUrl) self.documentId = None
def main(): wmstatsDB = Database('wmstats', 'https://alancc7-cloud3.cern.ch/couchdb') conflictDocs = wmstatsDB.loadView("WMStats3", "conflicts") print("Found {} conflicting documents".format( conflictDocs.get("total_rows"))) print(" they are:\n{}".format(pformat(conflictDocs.get("rows", [])))) for doc in conflictDocs.get("rows", []): resolveConflict(doc)
def checkForMissingFiles(options): #Initialize stuff phedexAPI = PhEDEx({'cachepath' : options.cachepath}) acdcCouch = Database('wmagent_acdc', options.acdcUrl) #Let's get the IDs of the ACDC documents for the task/request/group/user array = [options.group, options.user, options.request, options.task] result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array]) documentsIDs = [x['id'] for x in result['rows']] badFiles = {} #Go through the documents for docID in documentsIDs: doc = acdcCouch.document(docID) #Are we going to change this doc? Better back it up if options.change: backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w') json.dump(doc, backupFile) backupFile.close() #Go through the files files = doc["files"] for inputFile in files: #Use PhEDEx API to get site based on the SE se = files[inputFile]["locations"][0] siteLocation = phedexAPI.getBestNodeName(se) #Now get the PFN pfnDict = phedexAPI.getPFN(siteLocation, inputFile) inputPfn = pfnDict[(siteLocation, inputFile)] #Run lcg-ls commands and see what we get command = 'lcg-ls -b -D srmv2 --srm-timeout 60 %s' % inputPfn commandList = shlex.split(command) try: (stdout, stderr, exitCode) = runCommand(commandList, False, 70) except Exception, ex: exitCode = 99999 stdout = '' stderr = str(ex) if exitCode: #Something went wrong with the command #Mark the file as bad if docID not in badFiles: badFiles[docID] = [] badFiles[docID].append(inputFile) print 'File %s is thought to be bad' % inputFile print 'Command was %s' % command print 'Return code was %i' % exitCode print 'Stdout was %s' % stdout print 'Stderr was %s' % stderr
def getSplitting(requestName): reqmgrUrl = 'https://cmsweb.cern.ch/reqmgr/reqMgr/' reqmgr = RequestManager(dict={'endpoint': reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) params = workload.getTopLevelTask()[0].jobSplittingParameters() algo = workload.getTopLevelTask()[0].jobSplittingAlgorithm() return params['events_per_job']
def getSplitting(requestName): reqmgrUrl='https://cmsweb.cern.ch/reqmgr/reqMgr/' reqmgr = RequestManager(dict = {'endpoint' : reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) params = workload.getTopLevelTask()[0].jobSplittingParameters() algo = workload.getTopLevelTask()[0].jobSplittingAlgorithm() return params['events_per_job']
def main(): demPolicy = Block() reqmgr = RequestManager(dict = {'endpoint' : 'https://cmsweb.cern.ch/reqmgr/reqMgr'}) result = reqmgr.getRequest('pdmvserv_HIG-Summer12DR53X-01392_T1_ES_PIC_MSS_1_v0__130724_063344_7207') workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment('pdmvserv_HIG-Summer12DR53X-01392_T1_ES_PIC_MSS_1_v0__130724_063344_7207', 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) x,y = demPolicy(wmspec = workload, task = workload.getTopLevelTask()[0]) print x print y
def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") self.histogramDB = Database(dbname="histogram_dump_t")
class RESTSink(object): """ Alert sink for posting alerts to a REST server. The class acts as a REST client. """ def __init__(self, config): # configuration values: # 'uri' attribute (URL of the REST server and resource name) # in case of CouchDB, the resource name is the database name # http://servername:port/databaseName self.config = config logging.info("Instantiating ...") # the class currently relies only on 1 REST server possibility - the # CouchDB server. as explained above, .database will be replaced by # .connection if both a generic REST server as well as CouchDB are to # be talked to split = self.config.uri.rfind('/') dbName = self.config.uri[split + 1:] # get last item of URI - database name url = self.config.uri[:split] # as opposed to CouchSink, here it's assumed the resource (the database name) # does exist, fail here otherwise # this check / rest of the constructed may be revised for # general REST server server = CouchServer(url) databases = server.listDatabases() # there needs to be this database created upfront and also # couchapp associated with it installed, if it's there, fail if dbName not in databases: raise Exception("REST URI: %s (DB name: %s) does not exist." % (self.config.uri, dbName)) self._database = Database(dbName, url) logging.info("Initialized.") def send(self, alerts): """ Send a list of alerts to a REST server. """ for a in alerts: doc = Document(None, a) self._database.queue(doc) # two options here: either to call commit on the couch myself # or leave the alerts buffered in the Database queue which means # the .commit() would be called automatically if size is exceeded # 1st option: retVal = self._database.commit() logging.debug("Stored %s alerts to REST resource, retVals: %s" % (len(alerts), retVal)) return retVal
def getRequestInformationAndWorkload(requestName, reqmgrUrl, centralRequestDBURL): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ wfDBReader = RequestDBReader(centralRequestDBURL, couchapp="ReqMgr") result = wfDBReader.getRequestByNames(requestName, True) workloadDB = Database(result[requestName]['CouchWorkloadDBName'], result[requestName]['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result[requestName]
def main(): if "WMAGENT_CONFIG" not in os.environ: os.environ["WMAGENT_CONFIG"] = '/data/srv/wmagent/current/config/wmagent/config.py' myThread = threading.currentThread() connectToDB() formatter = DBFormatter(logging, myThread.dbi) limboFiles = formatter.formatDict(myThread.dbi.processData("""SELECT dbsbuffer_workflow.name, dbsbuffer_file.lfn FROM dbsbuffer_file INNER JOIN dbsbuffer_workflow ON dbsbuffer_file.workflow = dbsbuffer_workflow.id LEFT OUTER JOIN dbsbuffer_block ON dbsbuffer_file.block_id = dbsbuffer_block.id WHERE dbsbuffer_file.status = 'READY' AND dbsbuffer_block.id is NULL""")) if not limboFiles: print "There are no bad files to fix" return for entry in limboFiles: data = Database('wmagent_jobdump/fwjrs', 'http://%s:5984' % socket.gethostname()) result = data.loadView('FWJRDump', 'jobsByOutputLFN', {'include_docs' : True}, [[entry['name'], entry['lfn']]])['rows'] if result: result = result[0] fwjr = result['doc']['fwjr'] for step in fwjr['steps']: if step == 'cmsRun1': stepInfo = fwjr['steps'][step] site = stepInfo['site'] break else: print "Could not find location for %s" % entry['lfn'] continue se = myThread.dbi.processData("""SELECT wmbs_location_senames.se_name FROM wmbs_location_senames INNER JOIN wmbs_location ON wmbs_location.id = wmbs_location_senames.location WHERE wmbs_location.site_name = '%s'""" % site) se = formatter.formatDict(se)[0] insertQuery = """INSERT INTO dbsbuffer_location (se_name) SELECT '%s' AS se_name FROM DUAL WHERE NOT EXISTS (SELECT se_name FROM dbsbuffer_location WHERE se_name = '%s')""" % (se['se_name'], se['se_name']) myThread.dbi.processData(insertQuery) updateQuery = """INSERT INTO dbsbuffer_file_location (filename, location) SELECT df.id, dl.id FROM dbsbuffer_file df, dbsbuffer_location dl WHERE df.lfn = '%s' AND dl.se_name = '%s'""" % (entry['lfn'], se['se_name']) myThread.dbi.processData(updateQuery) updateQuery = """UPDATE dbsbuffer_file SET status = 'NOTUPLOADED' WHERE lfn = '%s'""" % entry['lfn'] myThread.dbi.processData(updateQuery)
def getRequestInformationAndWorkload(requestName, reqmgrUrl): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ reqmgr = RequestManager(dict={'endpoint': reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result
def main(): sum = 0 x = Database('workqueue', 'http://vocms201.cern.ch:5984') y = x.loadView('WorkQueue', 'availableByPriority', {'include_docs' : True}) loadDistribution = {} for entry in y['rows']: doc = entry['doc'] element = doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] key = frozenset(element['SiteWhitelist']) if key not in loadDistribution: loadDistribution[key] = 0 loadDistribution[key] += element['Jobs'] for site, jobs in loadDistribution.items(): print "Site list %s has %d jobs" % (str(site), jobs)
def getRequestInformationAndWorkload(requestName, reqmgrUrl): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ reqmgr = RequestManager(dict = {'endpoint' : reqmgrUrl}) result = reqmgr.getRequest(requestName) workloadDB = Database(result['CouchWorkloadDBName'], result['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result
def testA(self): """ make some documents and own them""" guInt = Interface(self.testInit.couchUrl, self.testInit.couchDbName) #create a couple of docs couch = Database(self.testInit.couchDbName, self.testInit.couchUrl) for x in range(10): doc = Document("document%s" % x, {"Test Data": [1,2,3,4] }) couch.queue(doc) couch.commit() self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 0) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0) guInt.callUpdate("ownthis","document1", group = self.owner1.group.name, user = self.owner1.name) self.assertTrue("document1" in guInt.documentsOwned(self.owner1.group.name, self.owner1.name)) self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0) guInt.callUpdate("ownthis","document2", group = self.owner2.group.name, user = self.owner2.name) self.assertTrue("document2" in guInt.documentsOwned(self.owner2.group.name, self.owner2.name)) self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 1) guInt.callUpdate("newgroup", "group-DataOps", group = "DataOps") self.assertTrue(couch.documentExists("group-DataOps") ) guInt.callUpdate("newuser", "user-damason", group = "DataOps", user = "******") self.assertTrue(couch.documentExists("user-damason") )
def main(): if len(sys.argv) != 2: print "Usage:" print "python CheckWorkQueueElements.py <workflowName>" sys.exit(0) workflow = sys.argv[1] x = Database('workqueue', 'https://cmsweb.cern.ch/couchdb') y = x.loadView('WorkQueue', 'elementsByParent', {'include_docs' : True}, [workflow]) for entry in y['rows']: doc = entry['doc'] element = doc['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] if element['Status'] != 'Done': print 'Element: %s is %s in %s' % (doc['_id'], element['Status'], element['ChildQueueUrl'])
def getRequestInformationAndWorkload(requestName, reqmgrUrl, centralRequestDBURL): """ _getRequestInformationAndWorkload_ Retrieve the request information for assignment and the full pickled workload. """ wfDBReader = RequestDBReader(centralRequestDBURL, couchapp = "ReqMgr") result = wfDBReader.getRequestByNames(requestName,True) workloadDB = Database(result[requestName]['CouchWorkloadDBName'], result[requestName]['CouchURL']) workloadPickle = workloadDB.getAttachment(requestName, 'spec') spec = pickle.loads(workloadPickle) workload = WMWorkloadHelper(spec) return workload, result[requestName]
def main(): db = Database('wmagent_jobdump/fwjrs', 'http://vocms237.cern.ch:5984') results = db.loadView('FWJRDump', 'fwjrsByWorkflowName', {'startkey': ['pdmvserv_TOP-Summer12pLHE-00001_3_v0_STEP0ATCERN_130728_164313_3585'], 'endkey' : ['pdmvserv_TOP-Summer12pLHE-00001_3_v0_STEP0ATCERN_130728_164313_3585', {}], 'include_docs' : True}) globalJobTime = 0.0 globalEvents = 0.0 globalCPUTime = 0.0 globalCPUEventTime = 0.0 count = 0 rows = results['rows'] for entry in rows: doc = entry['doc'] fwjr = doc['fwjr'] task = fwjr['task'] if task == '/pdmvserv_TOP-Summer12pLHE-00001_3_v0_STEP0ATCERN_130728_164313_3585/Production': steps = fwjr['steps'] breakLoop = False cmsRunStep = None for step in steps: if steps[step]['status'] != 0 and step != 'logArch1': breakLoop = True break if step == 'cmsRun1': cmsRunStep = steps[step] if breakLoop: continue count += 1 performance = cmsRunStep['performance'] totalJobTime = float(performance['cpu']['TotalJobTime']) globalJobTime += totalJobTime cpuTime = float(performance['cpu']['TotalJobCPU']) globalCPUTime += cpuTime cpuEventTime = float(performance['cpu']['TotalEventCPU']) globalCPUEventTime += cpuEventTime events = 10000 globalEvents += events timePerJob = globalJobTime/count if timePerJob > 3600: timePerJob = timePerJob/3600.0 print 'Average job duration: %.2f hours' % timePerJob else: print 'Average job duration: %.0f seconds' % timePerJob print 'Job time per event: %.2f seconds' % (globalJobTime/globalEvents) print 'Average job CPU time: %.0f seconds' % (globalCPUTime/count) print 'Average event CPU time: %.8f seconds' % (cpuEventTime/globalEvents) print 'Events processed: %d' % globalEvents print 'Jobs processed: %d' % count
class CouchSink(object): """ Alert sink for pushing alerts to a couch database. """ def __init__(self, config): self.config = config logging.info("Instantiating ...") # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: logging.warn( "'%s' database does not exist on %s, creating it ..." % (self.config.database, self.config.url)) server.createDatabase(self.config.database) logging.warn("Created.") logging.info("'%s' database exists on %s" % (self.config.database, self.config.url)) self.database = Database(self.config.database, self.config.url) logging.info("Initialized.") def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) logging.debug("Stored %s alerts to CouchDB, retVals: %s" % (len(alerts), retVals)) return retVals
def __init__(self, config): self.config = config logging.info("Instantiating ...") # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: logging.warn( "'%s' database does not exist on %s, creating it ..." % (self.config.database, self.config.url)) server.createDatabase(self.config.database) logging.warn("Created.") logging.info("'%s' database exists on %s" % (self.config.database, self.config.url)) self.database = Database(self.config.database, self.config.url) logging.info("Initialized.")
class ReproducibleSeedingTests(unittest.TestCase): def setUp(self): self.testInit = TestInitCouchApp("ReproducibleSeedingTest") self.testInit.setupCouch("seeding_config_cache", "GroupUser", "ConfigCache") self.database = Database(self.testInit.couchDbName, self.testInit.couchUrl) self.documentId = None def tearDown(self): self.testInit.tearDownCouch() return def testA(self): """instantiate""" document = Document() document[u'pset_tweak_details'] = {} document[u'pset_tweak_details'][u'process'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed1'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed2'] = {} document[u'pset_tweak_details'][u'process'][u'RandomNumberGeneratorService'][u'seed3'] = {} document = self.database.commitOne(document)[0] seeder = ReproducibleSeeding(CouchUrl = self.testInit.couchUrl, CouchDBName = self.testInit.couchDbName, ConfigCacheDoc = document[u'id']) job = Job("testjob") seeder(job) baggage = job.getBaggage() seed1 = getattr(baggage.process.RandomNumberGeneratorService, "seed1", None) self.failUnless(seed1 != None)
def __init__(self, config): # configuration values: # 'uri' attribute (URL of the REST server and resource name) # in case of CouchDB, the resource name is the database name # http://servername:port/databaseName self.config = config # the class currently relies only on 1 REST server possibility - the # CouchDB server. as explained above, .database will be replaced by # .connection if both a generic REST server as well as CouchDB are to # be talked to split = self.config.uri.rfind('/') dbName = self.config.uri[split + 1:] # get last item of URI - database name url = self.config.uri[:split] # as opposed to CouchSink, here it's assumed the resource (the database name) # does exist, fail here otherwise # this check / rest of the constructed may be revised for # general REST server server = CouchServer(url) databases = server.listDatabases() if dbName not in databases: raise Exception("%s: REST URI: %s, %s does not exist." % (self.__class__.__name__, self.config.uri, dbName)) self._database = Database(dbName, url) logging.debug("%s initialized." % self.__class__.__name__)
def retrieveResubmissionChildren(requestName, couchUrl, couchDBName): """ _retrieveResubmissionChildren_ Construct a list of request names which are the resubmission offspring from a request. This is a recursive call with a single requestName as input. The result only includes the children and not the original request. """ childrenRequestNames = [] reqmgrDb = Database(couchDBName, couchUrl) result = reqmgrDb.loadView('ReqMgr', 'childresubmissionrequests', keys = [requestName])['rows'] for child in result: childrenRequestNames.append(child['id']) childrenRequestNames.extend(retrieveResubmissionChildren(child['id'], couchUrl, couchDBName)) return childrenRequestNames
class CouchSink(object): """ Alert sink for pushing alerts to a couch database. """ def __init__(self, config): self.config = config logging.info("Instantiating ...") # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: logging.warn( "'%s' database does not exist on %s, creating it ..." % (self.config.database, self.config.url) ) server.createDatabase(self.config.database) logging.warn("Created.") logging.info("'%s' database exists on %s" % (self.config.database, self.config.url)) self.database = Database(self.config.database, self.config.url) logging.info("Initialized.") def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) logging.debug("Stored %s alerts to CouchDB, retVals: %s" % (len(alerts), retVals)) return retVals
class CouchSink(object): """ Alert sink for pushing alerts to a couch database. """ def __init__(self, config): self.config = config # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: server.createDatabase(self.config.database) self.database = Database(self.config.database, self.config.url) logging.debug("%s initialized." % self.__class__.__name__) def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) logging.debug("%s stored alerts, retVals: %s" % (self.__class__.__name__, retVals)) return retVals
def main(): if len(sys.argv) < 2: print("Takes 1 input argument - dump of Oracle reqmgr_request " "table in a Python dictionary.") sys.exit(1) print("Creating database connection ...") # couch_server = CouchServer(couch_url) db = Database(couch_db_name, couch_url) execfile(sys.argv[1], globals()) oracle_requests = reqmgr_request # read from the input file print("Oracle requests: %s" % len(oracle_requests)) print("Retrieving data from CouchDB ...") couch_requests = db.allDocs() couch_request_names = [] for row in couch_requests["rows"]: if row["id"].startswith("_design"): continue couch_request_names.append(row["id"]) print("CouchDB requests: %s" % len(couch_request_names)) print("Comparing Oracle and CouchDB requests ...") not_present_in_couch = [] for request in oracle_requests: oracle_request_name = request["REQUEST_NAME"] # remove first occurrence of value. Raises ValueError if not present try: couch_request_names.remove(oracle_request_name) except ValueError: not_present_in_couch.append(oracle_request_name) print("CouchDB requests not present in Oracle:") print("%s requests" % len(couch_request_names)) for name in couch_request_names: request = db.document(name) if name != request["RequestName"] or name != request["_id"]: print( "\t Mismatch: CouchDB id: '%s' RequestName: '%s' name: '%s'" % (request["_id"], request["RequestName"], name)) print("%s %s %s" % (request["RequestName"], request["RequestType"], request["RequestStatus"])) print("\n\n") print("Oracle requests not present in CouchDB:") print("%s requests" % len(not_present_in_couch)) for name in not_present_in_couch: print(name)
def main(): if len(sys.argv) < 2: print ("Takes 1 input argument - dump of Oracle reqmgr_request " "table in a Python dictionary.") sys.exit(1) print "Creating database connection ..." # couch_server = CouchServer(couch_url) db = Database(couch_db_name, couch_url) execfile(sys.argv[1], globals()) oracle_requests = reqmgr_request # read from the input file print "Oracle requests: %s" % len(oracle_requests) print "Retrieving data from CouchDB ..." couch_requests = db.allDocs() couch_request_names = [] for row in couch_requests["rows"]: if row["id"].startswith("_design"): continue couch_request_names.append(row["id"]) print "CouchDB requests: %s" % len(couch_request_names) print "Comparing Oracle and CouchDB requests ..." not_present_in_couch = [] for request in oracle_requests: oracle_request_name = request["REQUEST_NAME"] # remove first occurrence of value. Raises ValueError if not present try: couch_request_names.remove(oracle_request_name) except ValueError: not_present_in_couch.append(oracle_request_name) print "CouchDB requests not present in Oracle:" print "%s requests" % len(couch_request_names) for name in couch_request_names: request = db.document(name) if name != request["RequestName"] or name != request["_id"]: print ("\t Mismatch: CouchDB id: '%s' RequestName: '%s' name: '%s'" % (request["_id"], request["RequestName"], name)) print "%s %s %s" % (request["RequestName"], request["RequestType"], request["RequestStatus"]) print "\n\n" print "Oracle requests not present in CouchDB:" print "%s requests" % len(not_present_in_couch) for name in not_present_in_couch: print name
def main(): if len(sys.argv) != 2: print "Usage:" print "python CheckWorkQueueElements.py <workflowName>" sys.exit(0) workflow = sys.argv[1] x = Database('workqueue', 'https://cmsweb.cern.ch/couchdb') y = x.loadView('WorkQueue', 'elementsByParent', {'include_docs': True}, [workflow]) for entry in y['rows']: doc = entry['doc'] element = doc[ 'WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'] if element['Status'] != 'Done': print 'Element: %s is %s in %s' % (doc['_id'], element['Status'], element['ChildQueueUrl'])
def testA(self): """ make some documents and own them""" guInt = Interface(self.testInit.couchUrl, self.testInit.couchDbName) #create a couple of docs couch = Database(self.testInit.couchDbName, self.testInit.couchUrl) for x in range(10): doc = Document("document%s" % x, {"Test Data": [1,2,3,4] }) couch.queue(doc) couch.commit() self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 0) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0) guInt.callUpdate("ownthis","document1", group = self.owner1.group.name, user = self.owner1.name) self.failUnless("document1" in guInt.documentsOwned(self.owner1.group.name, self.owner1.name)) self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0) guInt.callUpdate("ownthis","document2", group = self.owner2.group.name, user = self.owner2.name) self.failUnless("document2" in guInt.documentsOwned(self.owner2.group.name, self.owner2.name)) self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 1) guInt.callUpdate("newgroup", "group-DataOps", group = "DataOps") self.failUnless(couch.documentExists("group-DataOps") ) guInt.callUpdate("newuser", "user-damason", group = "DataOps", user = "******") self.failUnless(couch.documentExists("user-damason") )
def __init__(self, config): self.config = config # test if the configured database does not exist, create it server = CouchServer(self.config.url) databases = server.listDatabases() if self.config.database not in databases: server.createDatabase(self.config.database) self.database = Database(self.config.database, self.config.url) logging.debug("%s initialized." % self.__class__.__name__)
def changeRequestStatus(requestName, newState, priority=None, wmstatUrl=None): """ _changeRequestStatus_ Basic API to change a request to a new state, also includes optional priority change for the request - *requestName* : name of the request to be modified - *newState* : name of the new status for the request - *priority* : optional integer priority Apparently when changing request state (on assignment page), it's possible to change priority at one go. Hence the argument is here. """ # MySQL/Oracle factory = DBConnect.getConnection() reqId = getRequestID(factory, requestName) changeRequestIDStatus(reqId, newState, priority) # CouchDB # have to first get information where the request Couch document is, # extracting the information from reqmgr_request.workflow table field reqData = factory(classname="Request.Get").execute(reqId) # this would be something like this: # http://localhost:5984/reqmgr_workload_cache/maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec wfUrl = reqData['workflow'] # cut off /maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec couchUrl = wfUrl.replace('/' + requestName + "/spec", '') couchDbName = couchUrl[couchUrl.rfind('/') + 1:] # cut off database name from the URL url = couchUrl.replace('/' + couchDbName, '') couchDb = Database(couchDbName, url) fields = {"RequestStatus": newState} couchDb.updateDocument(requestName, "ReqMgr", "updaterequest", fields=fields, useBody=True) #TODO: should we make this mendatory? if wmstatUrl: wmstatSvc = WMStatsWriter(wmstatUrl) wmstatSvc.updateRequestStatus(requestName, newState)
def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") self.histogramDB = Database(dbname = "histogram_dump_t")
def changeRequestStatus(requestName, newState, priority=None, wmstatUrl=None): """ _changeRequestStatus_ Basic API to change a request to a new state, also includes optional priority change for the request - *requestName* : name of the request to be modified - *newState* : name of the new status for the request - *priority* : optional integer priority Apparently when changing request state (on assignment page), it's possible to change priority at one go. Hence the argument is here. """ # MySQL/Oracle factory = DBConnect.getConnection() reqId = getRequestID(factory, requestName) changeRequestIDStatus(reqId, newState, priority) # CouchDB # have to first get information where the request Couch document is, # extracting the information from reqmgr_request.workflow table field reqData = factory(classname="Request.Get").execute(reqId) # this would be something like this: # http://localhost:5984/reqmgr_workload_cache/maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec wfUrl = reqData["workflow"] # cut off /maxa_RequestString-OVERRIDE-ME_130306_205649_8066/spec couchUrl = wfUrl.replace("/" + requestName + "/spec", "") couchDbName = couchUrl[couchUrl.rfind("/") + 1 :] # cut off database name from the URL url = couchUrl.replace("/" + couchDbName, "") couchDb = Database(couchDbName, url) fields = {"RequestStatus": newState} couchDb.updateDocument(requestName, "ReqMgr", "updaterequest", fields=fields, useBody=True) # TODO: should we make this mendatory? if wmstatUrl: wmstatSvc = WMStatsWriter(wmstatUrl) wmstatSvc.updateRequestStatus(requestName, newState)
def test06UploadACDC(self): # get previous request we can piggyback on for request in reversed(self.__class__.reqmgr.getRequest()): request = request['WMCore.RequestManager.DataStructs.Request.Request']['RequestName'] if 'RequestCancellation_t' in request: self.__class__.requestParams['OriginalRequestName'] = request break else: raise nose.SkipTest("no suitable request in reqmgr to resubmit") self.__class__.requestParams['InitialTaskPath'] = self.__class__.requestParams['InitialTaskPath'] % self.__class__.requestParams['OriginalRequestName'] self.__class__.requestParams['ACDCServer'] = self.__class__.endpoint + '/couchdb' # create and upload acdc service = DataCollectionService(url=self.__class__.endpoint + '/couchdb', database = 'wmagent_acdc') service.createCollection(self.__class__.requestParams['OriginalRequestName'], 'integration', 'DMWM') with open(os.path.join(getTestBase(), '..', 'data', 'ACDC', 'linacre_ACDC_ReReco13JulCosmics_120809_130020_117_120823_200309_5735.json')) as infile: acdc_json = infile.read().replace('linacre_ACDC_ReReco13JulCosmics_120809_130020_117_120823_200309_5735', self.__class__.requestParams['OriginalRequestName']) acdc_json = loads(acdc_json) acdc_database = Database('wmagent_acdc', self.__class__.endpoint + '/couchdb') acdc_database.commit(acdc_json)
def setUp(self): couch = CouchServer(dburl=self.DB_URL) if self.DB_NAME in couch.listDatabases(): couch.deleteDatabase(self.DB_NAME) cdb = couch.connectDatabase(self.DB_NAME) #for dq_t in test_data.demo_data: # cdb.queue(dq_t) cdb.commit() self.db = Database(dbname=self.DB_NAME)
def _getDbConnection(couchUrl, dbName): """ Check if the database exists, create if not. """ couchServer = CouchServer(couchUrl) if not dbName in couchServer.listDatabases(): logging.info("Database '%s' does not exits, creating it." % dbName) db = couchServer.createDatabase(dbName) # returns Database else: logging.debug("Database '%s' exists." % dbName) db = Database(dbName, couchUrl) return couchServer, db
def test06UploadACDC(self): # get previous request we can piggyback on for request in reversed(self.__class__.reqmgr.getRequest()): request = request[ 'WMCore.RequestManager.DataStructs.Request.Request'][ 'RequestName'] if 'RequestCancellation_t' in request: self.__class__.requestParams['OriginalRequestName'] = request break else: raise nose.SkipTest("no suitable request in reqmgr to resubmit") self.__class__.requestParams[ 'InitialTaskPath'] = self.__class__.requestParams[ 'InitialTaskPath'] % self.__class__.requestParams[ 'OriginalRequestName'] self.__class__.requestParams[ 'ACDCServer'] = self.__class__.endpoint + '/couchdb' # create and upload acdc service = DataCollectionService(url=self.__class__.endpoint + '/couchdb', database='wmagent_acdc') service.createCollection( self.__class__.requestParams['OriginalRequestName'], 'integration', 'DMWM') with open( os.path.join( getTestBase(), '..', 'data', 'ACDC', 'linacre_ACDC_ReReco13JulCosmics_120809_130020_117_120823_200309_5735.json' )) as infile: acdc_json = infile.read().replace( 'linacre_ACDC_ReReco13JulCosmics_120809_130020_117_120823_200309_5735', self.__class__.requestParams['OriginalRequestName']) acdc_json = loads(acdc_json) acdc_database = Database('wmagent_acdc', self.__class__.endpoint + '/couchdb') acdc_database.commit(acdc_json)
def dump(full_dump=False, fields=None): print "Querying fields: %s\n\n" % fields db = Database(couch_db_name, couch_url) couch_requests = db.allDocs() doc_counter = 0 for row in couch_requests["rows"]: if row["id"].startswith("_design"): continue doc = db.document(row["id"]) if fields: s = '' for f in fields: try: s += "%s:%s " % (f, doc[f]) except KeyError: s += "%s:n/a " % f print "%s %s\n" % (s, doc["RequestName"]) elif full_dump: print "%s\n%s\n%s\n" % (row["id"], doc, 70 * '-') else: print row["id"] doc_counter += 1 #if doc_counter > 100: # break print "Total documents: %s" % doc_counter
def _getDbConnection(couchUrl, dbName): """ Check if the database exists, create if not. """ couchServer = CouchServer(couchUrl) if not dbName in couchServer.listDatabases(): logging.info("Database '%s' does not exits, creating it." % dbName) db = couchServer.createDatabase(dbName) else: logging.debug("Database '%s' exists." % dbName) db = Database(dbName, couchUrl) couchapps = "../../../src/couchapp" stat_couchapp = "%s/stat" % couchapps harness = CouchAppTestHarness(dbName, couchUrl) harness.create() harness.pushCouchapps(stat_couchapp) return couchServer, db
def update_software(config_file): """ Functions retrieves CMSSW versions and scramarchs from CMS tag collector. """ config = loadConfigurationFile(config_file) # source of the data tag_collector_url = config.views.data.tag_collector_url # store the data into CouchDB auxiliary database under "software" document couch_host = config.views.data.couch_host reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db # get data from tag collector all_archs_and_versions = _get_all_scramarchs_and_versions( tag_collector_url) if not all_archs_and_versions: return # get data already stored in CouchDB couchdb = Database(dbname=reqmgr_aux_db, url=couch_host) try: sw_already_stored = couchdb.document("software") del sw_already_stored["_id"] del sw_already_stored["_rev"] except CouchNotFoundError: logging.error("Document id software, does not exist, creating it ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) return # now compare recent data from tag collector and what we already have stored # sorting is necessary if sorted(all_archs_and_versions) != sorted(sw_already_stored): logging.debug( "ScramArch/CMSSW releases changed, updating software document ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) """
kwargs.get("BlockCloseMaxEvents", helper.getBlockCloseMaxEvents())) blockCloseMaxSize = int( kwargs.get("BlockCloseMaxSize", helper.getBlockCloseMaxSize())) helper.setBlockCloseSettings(blockCloseMaxWaitTime, blockCloseMaxFiles, blockCloseMaxEvents, blockCloseMaxSize) helper.setDashboardActivity(kwargs.get("dashboard", "")) Utilities.saveWorkload(helper, request['RequestWorkflow'], self.wmstatWriteURL) # update AcquisitionEra in the Couch document (#4380) # request object returned above from Oracle doesn't have information Couch # database reqDetails = Utilities.requestDetails(request["RequestName"]) couchDb = Database(reqDetails["CouchWorkloadDBName"], reqDetails["CouchURL"]) couchDb.updateDocument(request["RequestName"], "ReqMgr", "updaterequest", fields={ "AcquisitionEra": reqDetails["AcquisitionEra"], "Teams": JsonWrapper.JSONEncoder().encode( kwargs["Teams"]), "SiteWhitelist": JsonWrapper.JSONEncoder().encode(whiteList), "SiteBlacklist": JsonWrapper.JSONEncoder().encode(blackList) })
def main(): """ It will either delete docs in couchdb for the workflow you have provided or it will loop over the final (or almost final) states and ask for your permission to delete them. """ wfName = sys.argv[1] if len(sys.argv) == 2 else [] if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) # Instantiating central services (couch stuff) # print "Central Couch URL : %s" % config.WorkloadSummary.couchurl # print "Central ReqMgr URL : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL wfDBReader = RequestDBReader( config.AnalyticsDataCollector.centralRequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) # Central services wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl) wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl) # Local services localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name="workqueue_inbox") localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl) statusList = [ "failed", "epic-FAILED", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected", "normal-archived", "aborted-archived", "rejected-archived" ] for stat in final_status: # retrieve list of workflows in each status if not wfName: # options = {'include_docs': False} date_range = { 'startkey': [2015, 5, 15, 0, 0, 0], 'endkey': [2015, 5, 26, 0, 0, 0] } # finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range) tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range) #print "Found %d wfs in status: %s" %(len(finalWfs), stat) finalWfs = [] for wf, content in tempWfs.iteritems(): if content['RequestStatus'] in statusList: finalWfs.append(wf) print "Found %d wfs in not in active state" % len(finalWfs) else: finalWfs = [wfName] tempWfs = wfDBReader.getRequestByNames(wfName, True) print "Checking %s with status '%s'." % ( wfName, tempWfs[wfName]['RequestStatus']) wqDocs, wqInboxDocs = [], [] localWQDocs, localWQInboxDocs = [], [] for counter, wf in enumerate(finalWfs): if counter % 100 == 0: print "%d wfs queried ..." % counter # check whether there are workqueue docs wqDocIDs = wqBackend.getElements(WorkflowName=wf) if wqDocIDs: print "Found %d workqueue docs for %s, status %s" % ( len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs wqDocs.append(wqDocIDs) # check whether there are workqueue_inbox docs if wqInboxDB.documentExists(wf): print "Found workqueue_inbox doc for %s, status %s" % ( wf, tempWfs[wf]['RequestStatus']) # then retrieve the document wqInboxDoc = wqInboxDB.document(wf) wqInboxDocs.append(wqInboxDoc) # check local queue wqDocIDs = localWQBackend.getElements(WorkflowName=wf) if wqDocIDs: print "Found %d local workqueue docs for %s, status %s" % ( len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs localWQDocs.append(wqDocIDs) if localWQInboxDB.documentExists(wf): print "Found local workqueue_inbox doc for %s, status %s" % ( wf, tempWfs[wf]['RequestStatus']) wqInboxDoc = localWQInboxDB.document(wf) print wqInboxDoc localWQInboxDocs.append(wqInboxDoc) # TODO TODO TODO for the moment only deletes for a specific workflow if wfName: var = raw_input("\nCan we delete all these documents (Y/N)? ") if var == "Y": # deletes workqueue_inbox doc if wqInboxDoc: print "Deleting workqueue_inbox id %s and %s" % ( wqInboxDoc['_id'], wqInboxDoc['_rev']) wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev']) # deletes workqueue docs if wqDocIDs: print "Deleting workqueue docs %s" % wqDocIDs wqBackend.deleteElements( *[x for x in wqDocIDs if x['RequestName'] in wfName]) else: print "You are the boss, aborting it ...\n"
class ContinuousSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") random.seed() self.histogramDB = Database(dbname = "histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs): """ _buildRandomNumberList_ Builds a list with n pseudorandomly distributed numbers according to some given distribution """ numberList = [] if not kwargs: kwargs = {"mu" : 0, "sigma" : 1} for _ in range(n): generator = getattr(random, distribution) numberList.append(generator(**kwargs)) return numberList def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of uniformly distributed pseudorandom numbers. Check that the statistic properties in the histogram are accurate to some degree, that the histogram binning is done right and that this can become a document an uploaded to couch """ inputData = self.buildRandomNumberList(1000) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') # Populate the histogram for point in inputData: histogram.addPoint(point) # Get the JSON jsonHistogram = histogram.toJSON() # Check the histogram core data self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertTrue(jsonHistogram["continuous"]) # Check the internal data self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel") self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("TestHisto") self.assertEqual(len(storedJSON["data"]), 16) return def testB_extremeData(self): """ _testB_extremeData_ Put extreme points in the data and try to build a histogram. Check that it can process all this correctly """ # First no data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertEqual(jsonHistogram["average"], 0.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 0) # Data with NaNs and Infs inputData = self.buildRandomNumberList(100) inputData.append(float('NaN')) inputData.append(float('Inf')) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) # One single point, P5 histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') histogram.addPoint(5) jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["average"], 5.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 1) self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1) # Test that toJSON is idempotent inputData = self.buildRandomNumberList(100) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() oldData = jsonHistogram["data"] jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["data"], oldData) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) return def testC_compactHistogram(self): """ _testC_compactHistogram_ Check that we can create smaller histograms objects by chopping outliers and dropping the data all together """ # Input normally distributed data and chop anything above 1 stdev (32% of data) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', dropOutliers = True, sigmaLimit = 1) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) pointsInHistogram = sum([x for x in viewvalues(jsonHistogram["data"])]) # With high probability we must have chopped at least one point self.assertTrue(pointsInHistogram < 1000) self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1) # Create a histogram without histogram data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', storeHistogram = False) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 0) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) return