def main(): print "Looking for problematic inbox elements..." problemRequests = getProblematicRequests() print "Found %d bad elements:" % len(problemRequests) if not problemRequests: print "Nothing to fix, contact a developer if the problem persists..." return 0 for request in problemRequests: print request["RequestName"] var = raw_input("Can we close these for new data in inbox elements: Y/N\n") if var == "Y": print "Updating them in global inbox, you need a WMAgent proxy for this." inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb') for request in problemRequests: inboxDB.document(request._id) inboxDB.updateDocument(request._id, 'WorkQueue', 'in-place', fields={'OpenForNewData': false}) print "Done with the deletions, this should fix the problem." return 0 else: var = raw_input("Then can we delete these inbox elements: Y/N\n") if var == "Y": print "Deleting them from the global inbox, you need a WMAgent proxy for this." inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb') for request in problemRequests: inboxDB.delete_doc(request._id, request.rev) print "Done with the deletions, this should fix the problem." return 0 else: print "Doing nothing as you commanded..." return 0
def main(): print "Looking for problematic inbox elements..." problemRequests = getProblematicRequests() print "Found %d bad elements:" % len(problemRequests) if not problemRequests: print "Nothing to fix, contact a developer if the problem persists..." return 0 for request in problemRequests: print request["RequestName"] var = raw_input("Can we close these for new data in inbox elements: Y/N\n") if var == "Y": print "Updating them in global inbox, you need a WMAgent proxy for this." inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb') for request in problemRequests: inboxDB.document(request._id) inboxDB.updateDocument(request._id, 'WorkQueue', 'in-place', fields={'OpenForNewData': false}) print "Done with the deletions, this should fix the problem." return 0 else: var = raw_input("Then can we delete these inbox elements: Y/N\n") if var == "Y": print "Deleting them from the global inbox, you need a WMAgent proxy for this." inboxDB = Database('workqueue_inbox', 'https://cmsweb.cern.ch/couchdb') for request in problemRequests: inboxDB.delete_doc(request._id, request.rev) print "Done with the deletions, this should fix the problem." return 0 else: print "Doing nothing as you commanded..." return 0
def requestDetails(requestName): """ Adds details from the Couch document as well as the database """ WMCore.Lexicon.identifier(requestName) request = GetRequest.getRequestDetails(requestName) helper = loadWorkload(request) schema = helper.data.request.schema.dictionary_whole_tree_() # take the stuff from the DB preferentially schema.update(request) task = helper.getTopLevelTask()[0] schema['Site Whitelist'] = task.siteWhitelist() schema['Site Blacklist'] = task.siteBlacklist() schema['MergedLFNBase'] = str(helper.getMergedLFNBase()) schema['UnmergedLFNBase'] = str(helper.getUnmergedLFNBase()) schema['Campaign'] = str(helper.getCampaign()) schema['AcquisitionEra'] = str(helper.getAcquisitionEra()) if schema['SoftwareVersions'] == ['DEPRECATED']: schema['SoftwareVersions'] = helper.getCMSSWVersions() # Check in the CouchWorkloadDBName if not present schema.setdefault("CouchWorkloadDBName", "reqmgr_workload_cache") # get DbsUrl from CouchDB if schema.get("CouchWorkloadDBName", None) and schema.get("CouchURL", None): couchDb = Database(schema["CouchWorkloadDBName"], schema["CouchURL"]) couchReq = couchDb.document(requestName) schema["DbsUrl"] = couchReq.get("DbsUrl", None) # https://github.com/dmwm/WMCore/issues/4588 schema["SubscriptionInformation"] = helper.getSubscriptionInformation() return schema
def main(): config = loadConfigurationFile(os.environ['WMAGENT_CONFIG']) config.CoreDatabase.dialect = 'oracle' init = WMInit() init.setDatabaseConnection(config.CoreDatabase.connectUrl, config.CoreDatabase.dialect) couchDB = Database('wmagent_jobdump/fwjrs', '') couchDB2 = Database('wmagent_jobdump/jobs', '') myThread = threading.currentThread() daofactory = DAOFactory(package = "WMCore.WMBS", logger = logging, dbinterface = myThread.dbi) getJobsDAO = daofactory(classname = "Jobs.GetAllJobs") completedJobs = getJobsDAO.execute(state = 'complete') candidates = [] while len(completedJobs): candidates = [] chunk = completedJobs[:500] completedJobs = completedJobs[500:] result = couchDB.loadView('FWJRDump', 'outputByJobID', keys = chunk) rows = result['rows'] for entry in rows: candidates.append(entry['key']) for jobId in candidates: doc = couchDB2.document(str(jobId)) last = max(map(int, doc['states'].keys())) lastState = doc['states'][str(last)]['newstate'] if lastState == 'success': print jobId
def checkForMissingFiles(options): #Initialize stuff phedexAPI = PhEDEx({'cachepath' : options.cachepath}) acdcCouch = Database('wmagent_acdc', options.acdcUrl) #Let's get the IDs of the ACDC documents for the task/request/group/user array = [options.group, options.user, options.request, options.task] result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array]) documentsIDs = [x['id'] for x in result['rows']] badFiles = {} #Go through the documents for docID in documentsIDs: doc = acdcCouch.document(docID) #Are we going to change this doc? Better back it up if options.change: backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w') json.dump(doc, backupFile) backupFile.close() #Go through the files files = doc["files"] for inputFile in files: #Use PhEDEx API to get site based on the SE se = files[inputFile]["locations"][0] siteLocation = phedexAPI.getBestNodeName(se) #Now get the PFN pfnDict = phedexAPI.getPFN(siteLocation, inputFile) inputPfn = pfnDict[(siteLocation, inputFile)] #Run lcg-ls commands and see what we get command = 'lcg-ls -b -D srmv2 --srm-timeout 60 %s' % inputPfn commandList = shlex.split(command) try: (stdout, stderr, exitCode) = runCommand(commandList, False, 70) except Exception, ex: exitCode = 99999 stdout = '' stderr = str(ex) if exitCode: #Something went wrong with the command #Mark the file as bad if docID not in badFiles: badFiles[docID] = [] badFiles[docID].append(inputFile) print 'File %s is thought to be bad' % inputFile print 'Command was %s' % command print 'Return code was %i' % exitCode print 'Stdout was %s' % stdout print 'Stderr was %s' % stderr
def main(): reader = WMStatsReader("http://dummy.cern.ch:5984", "wmagent_summary") wmstats = Database('wmagent_summary', 'http://dummy.cern.ch:5984') suspiciousWorkflows = reader.workflowsByStatus(["Processing Done"], stale = False) for entry in suspiciousWorkflows: requestDoc = wmstats.document(entry) statusList = requestDoc['request_status'] if statusList[-2]['status'] == 'normal-archived': statusList = statusList[:-1] requestDoc['request_status'] = statusList wmstats.queue(requestDoc) wmstats.commit()
def main(): if len(sys.argv) < 2: print("Takes 1 input argument - dump of Oracle reqmgr_request " "table in a Python dictionary.") sys.exit(1) print("Creating database connection ...") # couch_server = CouchServer(couch_url) db = Database(couch_db_name, couch_url) execfile(sys.argv[1], globals()) oracle_requests = reqmgr_request # read from the input file print("Oracle requests: %s" % len(oracle_requests)) print("Retrieving data from CouchDB ...") couch_requests = db.allDocs() couch_request_names = [] for row in couch_requests["rows"]: if row["id"].startswith("_design"): continue couch_request_names.append(row["id"]) print("CouchDB requests: %s" % len(couch_request_names)) print("Comparing Oracle and CouchDB requests ...") not_present_in_couch = [] for request in oracle_requests: oracle_request_name = request["REQUEST_NAME"] # remove first occurrence of value. Raises ValueError if not present try: couch_request_names.remove(oracle_request_name) except ValueError: not_present_in_couch.append(oracle_request_name) print("CouchDB requests not present in Oracle:") print("%s requests" % len(couch_request_names)) for name in couch_request_names: request = db.document(name) if name != request["RequestName"] or name != request["_id"]: print( "\t Mismatch: CouchDB id: '%s' RequestName: '%s' name: '%s'" % (request["_id"], request["RequestName"], name)) print("%s %s %s" % (request["RequestName"], request["RequestType"], request["RequestStatus"])) print("\n\n") print("Oracle requests not present in CouchDB:") print("%s requests" % len(not_present_in_couch)) for name in not_present_in_couch: print(name)
def main(): if len(sys.argv) < 2: print ("Takes 1 input argument - dump of Oracle reqmgr_request " "table in a Python dictionary.") sys.exit(1) print "Creating database connection ..." # couch_server = CouchServer(couch_url) db = Database(couch_db_name, couch_url) execfile(sys.argv[1], globals()) oracle_requests = reqmgr_request # read from the input file print "Oracle requests: %s" % len(oracle_requests) print "Retrieving data from CouchDB ..." couch_requests = db.allDocs() couch_request_names = [] for row in couch_requests["rows"]: if row["id"].startswith("_design"): continue couch_request_names.append(row["id"]) print "CouchDB requests: %s" % len(couch_request_names) print "Comparing Oracle and CouchDB requests ..." not_present_in_couch = [] for request in oracle_requests: oracle_request_name = request["REQUEST_NAME"] # remove first occurrence of value. Raises ValueError if not present try: couch_request_names.remove(oracle_request_name) except ValueError: not_present_in_couch.append(oracle_request_name) print "CouchDB requests not present in Oracle:" print "%s requests" % len(couch_request_names) for name in couch_request_names: request = db.document(name) if name != request["RequestName"] or name != request["_id"]: print ("\t Mismatch: CouchDB id: '%s' RequestName: '%s' name: '%s'" % (request["_id"], request["RequestName"], name)) print "%s %s %s" % (request["RequestName"], request["RequestType"], request["RequestStatus"]) print "\n\n" print "Oracle requests not present in CouchDB:" print "%s requests" % len(not_present_in_couch) for name in not_present_in_couch: print name
def findParentJobs(jobId): # Connect to the Job and FWJR DBs jobDB = Database('wmagent_jobdump/jobs', 'http://dummy.cern.ch:5984') fwjrDB = Database('wmagent_jobdump/fwjrs', 'http://dummy.cern.ch:5984') # Get the document of the child job childJobDoc = jobDB.document(id = jobId) # Get the workflow and input files, transforms it into suitable keys [workflow, lfn] workflow = childJobDoc['workflow'] inputLfns = [x['lfn'] for x in childJobDoc['inputfiles']] keys = [[workflow, x] for x in inputLfns] # Get the jobs that produced the input files for this job # Load the id and fwjr for these jobs since we have to re-run them result = fwjrDB.loadView('FWJRDump', 'jobsByOutputLFN', {}, keys) for entry in result['rows']: key = entry['key'] jobId = entry['value'] fwjrId = entry['id'] result = fwjrDB.loadView('FWJRDump', 'logArchivesByJobID', {}, [[int(x) for x in fwjrId.split('-')]]) logArch = result['rows'][0]['value']['lfn'] # Check whether the logArch is in some LogCollect logCollectTarball = '' result = jobDB.loadView('JobDump', 'jobsByInputLFN', {}, [[workflow, logArch]]) if result['rows']: logCollectJobId = result['rows'][0]['id'] result = fwjrDB.loadView('FWJRDump', 'outputByJobID', {}, [int(logCollectJobId)]) if result['rows']: logCollectTarball = result['rows'][0]['value']['lfn'] else: print "WARNING: The logArchive for job %s was in a LogCollect job but not tarball was produced" % jobId # Print out the information print "Job %s produced %s, the logArch for it is %s in %s" % (jobId, key[1], logArch, logCollectTarball) return
def update_software(config_file): """ Functions retrieves CMSSW versions and scramarchs from CMS tag collector. """ config = loadConfigurationFile(config_file) # source of the data tag_collector_url = config.views.data.tag_collector_url # store the data into CouchDB auxiliary database under "software" document couch_host = config.views.data.couch_host reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db # get data from tag collector all_archs_and_versions = _get_all_scramarchs_and_versions( tag_collector_url) if not all_archs_and_versions: return # get data already stored in CouchDB couchdb = Database(dbname=reqmgr_aux_db, url=couch_host) try: sw_already_stored = couchdb.document("software") del sw_already_stored["_id"] del sw_already_stored["_rev"] except CouchNotFoundError: logging.error("Document id software, does not exist, creating it ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) return # now compare recent data from tag collector and what we already have stored # sorting is necessary if sorted(all_archs_and_versions) != sorted(sw_already_stored): logging.debug( "ScramArch/CMSSW releases changed, updating software document ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) """
def dump(full_dump=False, fields=None): print("Querying fields: %s\n\n" % fields) db = Database(couch_db_name, couch_url) couch_requests = db.allDocs() doc_counter = 0 for row in couch_requests["rows"]: if row["id"].startswith("_design"): continue doc = db.document(row["id"]) if fields: s = '' for f in fields: try: s += "%s:%s " % (f, doc[f]) except KeyError: s += "%s:n/a " % f print("%s %s\n" % (s, doc["RequestName"])) elif full_dump: print("%s\n%s\n%s\n" % (row["id"], doc, 70*'-')) else: print(row["id"]) doc_counter += 1 #if doc_counter > 100: # break print("Total documents: %s" % doc_counter)
def update_software(config_file): """ Functions retrieves CMSSW versions and scramarchs from CMS tag collector. """ config = loadConfigurationFile(config_file) # source of the data tag_collector_url = config.views.data.tag_collector_url # store the data into CouchDB auxiliary database under "software" document couch_host = config.views.data.couch_host reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db # get data from tag collector all_archs_and_versions = _get_all_scramarchs_and_versions(tag_collector_url) if not all_archs_and_versions: return # get data already stored in CouchDB couchdb = Database(dbname=reqmgr_aux_db, url=couch_host) try: sw_already_stored = couchdb.document("software") del sw_already_stored["_id"] del sw_already_stored["_rev"] except CouchNotFoundError: logging.error("Document id software, does not exist, creating it ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) return # now compare recent data from tag collector and what we already have stored # sorting is necessary if sorted(all_archs_and_versions) != sorted(sw_already_stored): logging.debug("ScramArch/CMSSW releases changed, updating software document ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) """
def dump(full_dump=False, fields=None): print "Querying fields: %s\n\n" % fields db = Database(couch_db_name, couch_url) couch_requests = db.allDocs() doc_counter = 0 for row in couch_requests["rows"]: if row["id"].startswith("_design"): continue doc = db.document(row["id"]) if fields: s = '' for f in fields: try: s += "%s:%s " % (f, doc[f]) except KeyError: s += "%s:n/a " % f print "%s %s\n" % (s, doc["RequestName"]) elif full_dump: print "%s\n%s\n%s\n" % (row["id"], doc, 70 * '-') else: print row["id"] doc_counter += 1 #if doc_counter > 100: # break print "Total documents: %s" % doc_counter
class OpsClipboardTest(unittest.TestCase): def setUp(self): # For experiments with CouchDB content it's useful when the docs # remain the the database by commenting out tearDownCouch statement. # If the database exists at this point, tearDownCouch was probably # commented out, so do not drop the database #self.testInit = TestInitCouchApp(__file__, dropExistingDb=False) self.testInit = TestInitCouchApp(__file__, dropExistingDb=True) self.testInit.setLogging() self.testInit.setDatabaseConnection() dbName = "opsclipboard_t" self.testInit.setupCouch(dbName, "OpsClipboard") # the tests uses httplib2 for accessing the OpsClipboard couchapp to # emulate web browser access rather than direct REST access # couch attribute is only used for back verification of written/modified data #couchServer = CouchServer(os.environ["COUCHURL"]) #self.configDatabase = couchServer.connectDatabase(dbName) # used to verify written / modified data in CouchDB self.couch = Database(dbName, self.testInit.couchUrl) def tearDown(self): # comment out to see stuff remaining in the database self.testInit.tearDownCouch() # self.testInit.couch gets None-ed here #pass def _inject(self, numRequests): # This creates 10 documents using the test data above representing # 10 requests belonging to two campaigns that have just been placed # into the "ops-hold" into the ReqMgr. # Whenever a request enters the "ops-hold" state, the dict containing the # request params should be added to the OpsClipboard using the # inject API call (see Assign.py): requests, campaignIds, requestIds = getTestRequests(numRequests) OpsClipboard.inject(self.testInit.couchUrl, self.testInit.couchDbName, *requests) return requests, campaignIds, requestIds def _getViewResults(self, viewName, options = {}): """ Query CouchDB viewName, return rows. """ try: result = self.couch.loadView("OpsClipboard", viewName, options) except Exception as ex: msg = "Error loading OpsClipboard view: '%s', reason:%s\n" % (viewName, ex) self.fail(msg) return result[u"rows"] def testA_view_all(self): """ Testing the 'all' view. """ self._inject(10) # creates 10 documents # Now read back data for the test requests and verify # that we have 10 requests in the OpsClipboard # all view returns all requests in the OpsClipboard allRequests = self._getViewResults("all") self.assertEqual(len(allRequests), 10) # expected 10 requests for request in allRequests: self.failUnless(request[u"key"] == u"NewlyHeld") def testB_view_campaign(self): """ Testing the 'campaign' view. Returns requests with campaign_id as keys. """ _, campaignIds, requestIds = self._inject(7) # creates x docs/requests campView = self._getViewResults("campaign") self.assertEqual(len(campView), 7) for c in campView: self.failUnless(c[u"key"] in campaignIds) self.failUnless(c[u"value"][u"request_id"] in requestIds) # check that result ('value') dictionary has all these keys map(self.failUnless, [c[u"value"].has_key(key) for key in ("doc_id", "state", "updated")]) def testC_view_campaign_ids(self): """ Testing the 'campaign_ids' view. Returns a list of campaign names (campaign_ids) with duplicates removed. """ _, campaignIds, _ = self._inject(8) # creates x docs/requests campList = self._getViewResults("campaign_ids", options = {"group": True}) expected = [campList[0]["key"], campList[1]["key"]] self.assertEqual(expected, campaignIds) def testD_view_reject_update_changestate(self): """ Testing the 'reject' view. Calls changeState function which also tests 'changestate' update (CouchDB) function. Returns a list of requests in the 'ReadyToReject' state. """ numRequests = 8 self._inject(numRequests) # creates x docs/requests # all currently injected requests are in the # "NewlyHeld" state, none in the "ReadyToReject" state rejectList = self._getViewResults("reject") self.assertEqual(len(rejectList), 0) # change state, need to get docIds from CouchDB first allList = self._getViewResults("all") for allItem in allList: docId = allItem[u"id"] try: changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, "ReadyToReject") except Exception as ex: self.fail(ex) rejectList = self._getViewResults("reject") self.assertEqual(len(rejectList), numRequests) def testE_view_release_update_changestate(self): """ Testing the 'release' view. Calls changeState function which also tests 'changestate' update (CouchDB) function. Returns a list of requests in the 'ReadyToRelease' state. """ numRequests = 18 self._inject(numRequests) # creates x docs/requests # all currently injected requests are in the # "NewlyHeld" state, none in the "ReadyToRelease" state rejectList = self._getViewResults("release") self.assertEqual(len(rejectList), 0) # change state, need to get docIds from CouchDB first allList = self._getViewResults("all") for allItem in allList: docId = allItem[u"id"] try: changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, "ReadyToRelease") except Exception as ex: self.fail(ex) rejectList = self._getViewResults("release") self.assertEqual(len(rejectList), numRequests) def testF_view_request(self): """ Testing the 'request' view. This view allows for look up of some request details by id. """ _, _, requestIds = self._inject(15) # creates x docs/requests requestView = self._getViewResults("request") self.assertEqual(len(requestView), 15) for reqView in requestView: self.failUnless(reqView[u"key"] in requestIds) self.failUnless(reqView[u"value"][u"state"] == u"NewlyHeld") def testG_view_request_id(self): """ Testing the 'request_ids' view. 'request_ids' maps couch docs to request ids. """ self._inject(11) # creates x docs/requests viewResult = self._getViewResults("request_ids") requestIds = [ x[u"key"] for x in viewResult ] self.assertEqual(len(requestIds), 11) def testH_view_expunge(self): """ Testing the 'expunge' view. """ self._inject(4) # creates x docs/requests requestView = self._getViewResults("all") # no "ReadyToReject" or "ReadyToReject" request, everything is in "NewlyHeld" self.assertEqual(len(requestView), 4) c = 0 for req in requestView: docId = req[u"value"] try: state = "ReadyToReject" if c % 2 == 0 else "ReadyToReject" changeState(self.testInit.couchUrl, self.testInit.couchDbName, docId, state) except Exception as ex: self.fail(ex) c += 1 expungeView = self._getViewResults("expunge") self.assertEqual(len(expungeView), 4) for req in expungeView: self.assertTrue(req[u"key"] in ("ReadyToReject", "ReadyToReject")) def testI_requestStructure(self): """ Pull documents for each request and check structure. """ _, campaignIds, requestIds = self._inject(20) # creates x documents / requests allRequests = self._getViewResults("all") for req in allRequests: docId = req[u"id"] state = req[u"key"] # all requests should be NewlyHeld state self.assertEqual(state, "NewlyHeld") # check that the doc is well formed and matches the data we inserted doc = self.couch.document(docId) self.failUnless(doc[u"state"] == "NewlyHeld") self.failUnless(doc.has_key(u"created")) self.failUnless(doc.has_key(u"timestamp")) # description is a list of dictionaries, the first one is the initial message self.failUnless("Initial injection by the RequestManager" in doc[u"description"][0].values()) self.failUnless(doc[u"request"][u"campaign_id"] in campaignIds) self.failUnless(doc[u'request'][u'request_id'] in requestIds) def testJ_update_adddescription(self): """ Create a document and update function 'adddescription' handler to add descriptions (Ops notes) to request documents. """ request = {"RequestName" : "testB_request", "CampaignName" : "testB_campaign"} OpsClipboard.inject(self.testInit.couchUrl, self.testInit.couchDbName, *[request]) allRequests = self._getViewResults("all") self.assertEqual(len(allRequests), 1) # check only one request docId = allRequests[0][u"id"] # update the doc descriptions addDescription(self.testInit.couchUrl, self.testInit.couchDbName, docId, "NewDescription") doc = self.couch.document(docId) descriptions = doc["description"] # description entry is a list of dictionaries, each newly created request # has first initial description, just above added was the second one, index 1 self.failUnless("NewDescription" in doc[u"description"][1].values())
class ContinuousSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") random.seed() self.histogramDB = Database(dbname = "histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs): """ _buildRandomNumberList_ Builds a list with n pseudorandomly distributed numbers according to some given distribution """ numberList = [] if not kwargs: kwargs = {"mu" : 0, "sigma" : 1} for _ in range(n): generator = getattr(random, distribution) numberList.append(generator(**kwargs)) return numberList def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of uniformly distributed pseudorandom numbers. Check that the statistic properties in the histogram are accurate to some degree, that the histogram binning is done right and that this can become a document an uploaded to couch """ inputData = self.buildRandomNumberList(1000) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') # Populate the histogram for point in inputData: histogram.addPoint(point) # Get the JSON jsonHistogram = histogram.toJSON() # Check the histogram core data self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertTrue(jsonHistogram["continuous"]) # Check the internal data self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel") self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("TestHisto") self.assertEqual(len(storedJSON["data"]), 16) return def testB_extremeData(self): """ _testB_extremeData_ Put extreme points in the data and try to build a histogram. Check that it can process all this correctly """ # First no data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertEqual(jsonHistogram["average"], 0.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 0) # Data with NaNs and Infs inputData = self.buildRandomNumberList(100) inputData.append(float('NaN')) inputData.append(float('Inf')) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) # One single point, P5 histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') histogram.addPoint(5) jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["average"], 5.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 1) self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1) # Test that toJSON is idempotent inputData = self.buildRandomNumberList(100) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() oldData = jsonHistogram["data"] jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["data"], oldData) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) return def testC_compactHistogram(self): """ _testC_compactHistogram_ Check that we can create smaller histograms objects by chopping outliers and dropping the data all together """ # Input normally distributed data and chop anything above 1 stdev (32% of data) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', dropOutliers = True, sigmaLimit = 1) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) pointsInHistogram = sum([x for x in viewvalues(jsonHistogram["data"])]) # With high probability we must have chopped at least one point self.assertTrue(pointsInHistogram < 1000) self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1) # Create a histogram without histogram data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', storeHistogram = False) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 0) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) return
class database: logger = logfactory class DatabaseNotFoundException(Exception): def __init__(self, db=''): self.db = str(db) database.logger.error('Database "%s" was not found.' % (self.db), level='critical') def __str__(self): return 'Error: Database ', self.db, ' was not found.' class DatabaseAccessError(Exception): def __init__(self, db=''): self.db = str(db) database.logger.error('Could not access database "%s".' % (self.db), level='critical') def __str__(self): return 'Error: Could not access database ', self.db class DocumentNotFoundException(Exception): def __init__(self, name=''): self.name = name database.logger.error('Document "%s" was not found.' % (self.name)) def __str__(self): return 'Error: Document ', self.name, ' was not found.' class MapReduceSyntaxError(Exception): def __init__(self, query=''): self.query = query database.logger.error('Invalid query <%s>' % (self.query)) def __str__(self): return 'Error: Invalid query "' + self.query + '"' class InvalidOperatorError(Exception): def __init__(self, op=''): self.op = str(op) def __str__(self): return 'Error: Operator "' + self.op + '" is invalid.' class InvalidParameterError(Exception): def __init__(self, param=''): self.param = str(param) def __str__(self): return 'Error: Invalid Parameter: ' + self.param cache_dictionary = defaultdict(lambda: None) def __init__(self, db_name='',url=None, cache=False): host = os.environ['HOSTNAME'] if url == None: url =locator().dbLocation() #self.logger.log('I chose the url %s'%(url)) if not db_name: raise self.DatabaseNotFoundException(db_name) self.db_name = db_name self.cache = cache if self.db_name in ['campaigns','chained_campaigns']: ## force cache for those. self.cache=True try: self.db = Database(db_name, url=url) # self.db = Database(db_name, url='http://preptest.cern.ch:5984/') # self.db = Database(db_name) # for using private DB @localhost:5984 except ValueError as ex: raise self.DatabaseAccessError(db_name) self.allowed_operators = ['<=', '<', '>=', '>', '==', '~='] def __is_number(self, s): try: float(s) return True except ValueError: return False def get(self, prepid=''): if self.cache: result = self.__get_from_cache(prepid) if result: return result self.logger.log('Looking for document "%s" in "%s"...' % (prepid,self.db_name)) try: doc = self.db.document(id=prepid) if self.cache: self.__save_to_cache( prepid, doc) return doc except Exception as ex: self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex)) return {} def __save_to_cache(self, key, value): from tools.locker import locker with locker.lock(key): self.cache_dictionary[key]=value def __get_from_cache(self, key): from tools.locker import locker with locker.lock(key): return self.cache_dictionary[key] def __document_exists(self, doc): if not doc: self.logger.error('Trying to locate empty string.', level='warning') return False id = '' if 'prepid' not in doc: if '_id' not in doc: self.logger.error('Document does not have an "_id" parameter.', level='critical') return False id = doc['_id'] elif '_id' not in doc: if 'prepid' not in doc: self.logger.error('Document does not have an "_id" parameter.', level='critical') return False id = doc['prepid'] id = doc['_id'] return self.__id_exists(prepid=id) def document_exists(self, prepid=''): self.logger.log('Checking existence of document "%s" in "%s"...' % (prepid,self.db_name)) return self.__id_exists(prepid) def __id_exists(self, prepid=''): try: if self.cache and self.__get_from_cache(prepid) or self.db.documentExists(id=prepid): return True self.logger.error('Document "%s" does not exist.' % (prepid)) return False except CouchError as ex: self.logger.error('Document "%s" was not found on CouchError Reason: %s trying a second time with a time out' % (prepid, ex)) time.sleep(0.5) return self.__id_exists(prepid) except Exception as ex: self.logger.error('Document "%s" was not found. Reason: %s' % (prepid, ex)) return False def delete(self, prepid=''): if not prepid: return False if not self.__id_exists(prepid): return False self.logger.log('Trying to delete document "%s"...' % (prepid)) try: self.db.delete_doc(id=prepid) if self.cache: self.__save_to_cache(prepid, None) return True except Exception as ex: self.logger.error('Could not delete document: %s . Reason: %s ' % (prepid, ex)) return False def update(self, doc={}): if '_id' in doc: self.logger.log('Updating document "%s" in "%s"' % (doc['_id'],self.db_name)) if self.__document_exists(doc): if self.cache: ##JR the revision in the cache is not the one in the DB at this point # will be retaken at next get self.__save_to_cache(doc['_id'], None) return self.save(doc) self.logger.error('Failed to update document: %s' % (json.dumps(doc))) return False def update_all(self, docs=[]): if not docs: return False for doc in docs: if self.__document_exists(doc): self.db.queue(doc) try: self.db.commit() return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def get_all(self, page_num=-1): try: limit, skip = self.__pagify(page_num) if limit >= 0 and skip >= 0: result = self.db.loadView(self.db_name, "all", options={'limit':limit,'skip':skip, 'include_docs':True})['rows'] res = map(lambda r : r['doc'], result) return res result = self.db.loadView(self.db_name, "all",options={'include_docs':True})['rows'] res = map(lambda r : r['doc'], result) return res except Exception as ex: self.logger.error('Could not access view. Reason: %s' % (ex)) return [] def query(self, query='', page_num=0): if not query: result = self.get_all(page_num) #res = map(lambda r : r['doc'], result) return result try: result = self.__query(query, page=page_num) #res = map(lambda r : r['doc'], result) return result except Exception as ex: self.logger.error('Could not load view for query: <%s> . Reason: %s' % (query, ex)) return [] def unique_res(self,query_result): docids = map(lambda doc : doc['_id'] , query_result) docids_s = list(set(docids)) if len(docids) != len(docids_s): docids_s = [] return_dict= copy.deepcopy( query_result ) for doc in query_result: if not doc['_id'] in docids_s: docids_s.append(doc['_id']) else: return_dict.remove(doc) return return_dict return query_result def queries( self, query_list): ##page_nume does not matter if not len(query_list): return self.get_all(page_num=-1) try: results_list=[] ##make each query separately and retrieve only the doc with counting == len(query_list) for (i,query_item) in enumerate(query_list): res = self.query(query_item, page_num=-1) query_result = self.unique_res( res ) if i!=0: ## get only the one already in the intersection id_list = map(lambda doc : doc['_id'], results_list) results_list = filter(lambda doc : doc['_id'] in id_list, query_result) else: results_list= query_result return results_list except Exception as ex: self.logger.error('Could not load view for queris: <%s> . Reason: %s' % ('<br>'.join(query_list), ex)) return [] def __extract_operators(self, query=''): if not query: self.logger.error('Empty query', level='warning') return () clean = [] tokens = [] for op in self.allowed_operators: if op in query: tokens = query.rsplit(op) tokens.insert(1, op) else: continue for tok in tokens: if len(tok) < 1: continue clean.append(tok.strip().strip('"')) if len(clean) != 3: raise self.MapReduceSyntaxError(query) #if clean[0] not in self.request and clean[1] not in self.campaign: # raise self.IllegalParameterError(clean[0]) return clean raise self.MapReduceSyntaxError(query) def __pagify(self, page_num=0, limit=20): if page_num < 0: return -1,0 skip = limit*page_num return limit, skip def __execute_query(self, tokenized_query='', page=-1, limit=20): tokens = [] try: tokens = self.__extract_operators(tokenized_query) except Exception as ex: self.logger.error('Could not parse query. Reason: %s' % (ex)) return [] if tokens: view_name, view_opts = self.__build_query(tokens) if not view_name or not view_opts: return [] if page > -1: view_opts['limit']=limit view_opts['skip']=page*limit view_opts['include_docs']=True result = self.db.loadView(self.db_name, view_name, options=view_opts)['rows'] res = map(lambda r : r['doc'], result) return res else: return [] def raw_query(self, view_name, options={}): self.logger.error('Executing raw query to the database. Accessed view: %s' % (view_name), level='warning') return self.db.loadView(self.db_name, view_name, options)['rows'] def __get_op(self, oper): if oper == '>': return lambda x,y: x > y elif oper == '>=': return lambda x,y: x >= y elif oper == '<': return lambda x,y: x < y elif oper == '<=': return lambda x,y: x <= y elif oper == '==': return lambda x,y: x == y else: return None def __filter(self, tokenized_query=[], view_results=[]): if len(tokenized_query) != 3: return view_results prn = tokenized_query[0] op = tokenized_query[1] if self.__is_number(tokenized_query[2]): val = float(tokenized_query[2]) else: val = tokenized_query[2] f = self.__get_op(op) return filter(lambda x: f(x[prn],val), view_results) def __query(self, query='', page=0, limit=20): t_par = [] results = [] #what is that , split for ??? #if ',' in query: # t_par = query.rsplit(',') if not t_par: t_par = [query] if len(t_par) == 1: return self.__execute_query(t_par[0], page, limit)#[page*limit:page*limit+limit] elif len(t_par) == 0: return [] #temp = self.__execute_query(t_par[0])#[page*limit:page*limit+limit] res = self.__execute_query(t_par[0]) #res = map(lambda x: x['value'], temp) if len(res) == 0: return [] for i in range(1,len(t_par)): tq = self.__extract_operators(t_par[i]) res = self.__filter(tq, res) #return map(lambda x: {'value':x},res[page*limit:page*limit+20]) return res[page*limit:page*limit+20] def __build_query(self,tokens=[]): if not tokens: return None,None if len(tokens) != 3: raise self.MapReduceSyntaxError(tokens) param = tokens[0] op = tokens[1] kval = tokens[2] try: view_opts = self.__build_options(op, kval) except Exception as ex: self.logger.error('Value types are not compatible with operator %s value %s Error: %s' % (op, kval, str(ex))) return None,None return param, view_opts def __build_options(self,op, val): def is_number(s): try: float(s) return True except ValueError: return False # options dictionary opts = {} # default the composite key search #if '[' in val and ']' in val: if val.startswith('[') and val.endswith(']'): if op == '==': try: e=ast.literal_eval(val) opts['key'] = e except: opts['key'] = val return opts # handle alphanumeric key ranges num_flag = False if is_number(val): num_flag = True kval = float(val) else: kval = val.decode('ascii') if '>' in op: if '=' in op: opts['startkey']=kval else: if num_flag: opts['startkey']=kval+1 else: opts['startkey']=kval if num_flag: opts['endkey']=99999999 # assume its numeric else: opts['endkey']=kval+u'\u9999' elif '<' in op: if '=' in op: opts['endkey']=kval else: if num_flag: opts['endkey']=kval-1 else: opts['endkey']=kval if num_flag: opts['startkey']=-99999999 else: opts['startkey']='' elif '==' == op: opts['key']=kval elif '~=' == op: if kval[-1] == '*': opts['startkey']=kval[:len(kval)-1] opts['endkey']=kval[:len(kval)-1]+u'\u9999'#'99999999'#'\u9999' return opts def save_all(self, docs=[]): if not docs: return False for doc in docs: self.db.queue(doc) try: self.db.commit() return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def save(self, doc={}): if not doc: self.logger.error('Tried to save empty document.', level='warning') return False # TODO: Check if an object exists in the database and fail. #if '_id' in doc: # self.logger.log('Using user-defined id: %s' % (doc['_id'])) #if self.__document_exists(doc): # self.logger.error('Failed to update document: %s' % (json.dumps(doc))) # return False try: #self.logger.error('Document is %s %s'%(doc['_id'],doc)) #self.logger.error(self.db.commitOne(doc)) ## this is a change I just made (23/05/2013 13:31) because of the return value of update should be True/False saved = self.db.commitOne(doc) if 'error' in saved[0]: self.logger.error('Commit One says : %s'%(saved)) return False else: return True except Exception as ex: self.logger.error('Could not commit changes to database. Reason: %s' % (ex)) return False def count(self): try: return len(self.db.allDocs()) except Exception as ex: self.logger.error('Could not count documents in database. Reason: %s' % (ex)) return -1
def main(): """ It will either delete docs in couchdb for the workflow you have provided or it will loop over the final (or almost final) states and ask for your permission to delete them. """ wfName = sys.argv[1] if len(sys.argv) == 2 else [] if 'WMAGENT_CONFIG' not in os.environ: os.environ[ 'WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) # Instantiating central services (couch stuff) # print "Central Couch URL : %s" % config.WorkloadSummary.couchurl # print "Central ReqMgr URL : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL wfDBReader = RequestDBReader( config.AnalyticsDataCollector.centralRequestDBURL, couchapp=config.AnalyticsDataCollector.RequestCouchApp) # Central services wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl) wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl) # Local services localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name="workqueue_inbox") localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl) statusList = [ "failed", "epic-FAILED", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected", "normal-archived", "aborted-archived", "rejected-archived" ] for stat in final_status: # retrieve list of workflows in each status if not wfName: # options = {'include_docs': False} date_range = { 'startkey': [2015, 5, 15, 0, 0, 0], 'endkey': [2015, 5, 26, 0, 0, 0] } # finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range) tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range) #print "Found %d wfs in status: %s" %(len(finalWfs), stat) finalWfs = [] for wf, content in tempWfs.iteritems(): if content['RequestStatus'] in statusList: finalWfs.append(wf) print "Found %d wfs in not in active state" % len(finalWfs) else: finalWfs = [wfName] tempWfs = wfDBReader.getRequestByNames(wfName, True) print "Checking %s with status '%s'." % ( wfName, tempWfs[wfName]['RequestStatus']) wqDocs, wqInboxDocs = [], [] localWQDocs, localWQInboxDocs = [], [] for counter, wf in enumerate(finalWfs): if counter % 100 == 0: print "%d wfs queried ..." % counter # check whether there are workqueue docs wqDocIDs = wqBackend.getElements(WorkflowName=wf) if wqDocIDs: print "Found %d workqueue docs for %s, status %s" % ( len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs wqDocs.append(wqDocIDs) # check whether there are workqueue_inbox docs if wqInboxDB.documentExists(wf): print "Found workqueue_inbox doc for %s, status %s" % ( wf, tempWfs[wf]['RequestStatus']) # then retrieve the document wqInboxDoc = wqInboxDB.document(wf) wqInboxDocs.append(wqInboxDoc) # check local queue wqDocIDs = localWQBackend.getElements(WorkflowName=wf) if wqDocIDs: print "Found %d local workqueue docs for %s, status %s" % ( len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs localWQDocs.append(wqDocIDs) if localWQInboxDB.documentExists(wf): print "Found local workqueue_inbox doc for %s, status %s" % ( wf, tempWfs[wf]['RequestStatus']) wqInboxDoc = localWQInboxDB.document(wf) print wqInboxDoc localWQInboxDocs.append(wqInboxDoc) # TODO TODO TODO for the moment only deletes for a specific workflow if wfName: var = raw_input("\nCan we delete all these documents (Y/N)? ") if var == "Y": # deletes workqueue_inbox doc if wqInboxDoc: print "Deleting workqueue_inbox id %s and %s" % ( wqInboxDoc['_id'], wqInboxDoc['_rev']) wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev']) # deletes workqueue docs if wqDocIDs: print "Deleting workqueue docs %s" % wqDocIDs wqBackend.deleteElements( *[x for x in wqDocIDs if x['RequestName'] in wfName]) else: print "You are the boss, aborting it ...\n"
class ContinuousSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") random.seed() self.histogramDB = Database(dbname = "histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def buildRandomNumberList(self, n, distribution = "normalvariate", **kwargs): """ _buildRandomNumberList_ Builds a list with n pseudorandomly distributed numbers according to some given distribution """ numberList = [] if not kwargs: kwargs = {"mu" : 0, "sigma" : 1} for _ in range(n): generator = getattr(random, distribution) numberList.append(generator(**kwargs)) return numberList def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of uniformly distributed pseudorandom numbers. Check that the statistic properties in the histogram are accurate to some degree, that the histogram binning is done right and that this can become a document an uploaded to couch """ inputData = self.buildRandomNumberList(1000) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') # Populate the histogram for point in inputData: histogram.addPoint(point) # Get the JSON jsonHistogram = histogram.toJSON() # Check the histogram core data self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertTrue(jsonHistogram["continuous"]) # Check the internal data self.assertEqual(jsonHistogram["internalData"]["yLabel"], "SomeoneElsesLabel") self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("TestHisto") self.assertEqual(len(storedJSON["data"]), 16) return def testB_extremeData(self): """ _testB_extremeData_ Put extreme points in the data and try to build a histogram. Check that it can process all this correctly """ # First no data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["title"], "TestHisto") self.assertEqual(jsonHistogram["xLabel"], "MyLabel") self.assertEqual(jsonHistogram["average"], 0.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 0) # Data with NaNs and Infs inputData = self.buildRandomNumberList(100) inputData.append(float('NaN')) inputData.append(float('Inf')) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) # One single point, P5 histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') histogram.addPoint(5) jsonHistogram = histogram.toJSON() self.assertEqual(jsonHistogram["average"], 5.0) self.assertEqual(jsonHistogram["stdDev"], 0.0) self.assertEqual(len(jsonHistogram["data"]), 1) self.assertEqual(jsonHistogram["data"]["5.0,5.0"], 1) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1) # Test that toJSON is idempotent inputData = self.buildRandomNumberList(100) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel') for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() oldData = jsonHistogram["data"] jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 7) self.assertEqual(jsonHistogram["data"], oldData) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 100) return def testC_compactHistogram(self): """ _testC_compactHistogram_ Check that we can create smaller histograms objects by chopping outliers and dropping the data all together """ # Input normally distributed data and chop anything above 1 stdev (32% of data) histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', dropOutliers = True, sigmaLimit = 1) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 16) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) pointsInHistogram = sum([x for x in jsonHistogram["data"].values()]) # With high probability we must have chopped at least one point self.assertTrue(pointsInHistogram < 1000) self.assertAlmostEqual(pointsInHistogram / 1000.0, 0.68, places = 1) # Create a histogram without histogram data histogram = ContinuousSummaryHistogram('TestHisto', 'MyLabel', 'SomeoneElsesLabel', storeHistogram = False) inputData = self.buildRandomNumberList(1000) for point in inputData: histogram.addPoint(point) jsonHistogram = histogram.toJSON() self.assertAlmostEqual(jsonHistogram["average"], 0.0, places = 0) self.assertAlmostEqual(jsonHistogram["stdDev"], 1.0, places = 0) self.assertEqual(len(jsonHistogram["data"]), 0) self.assertEqual(jsonHistogram["internalData"]["nPoints"], 1000) return
class TestDQISResult(unittest.TestCase): DB_NAME = 'dqis_test' DB_URL = 'localhost:5984' def setUp(self): couch = CouchServer(dburl=self.DB_URL) if self.DB_NAME in couch.listDatabases(): couch.deleteDatabase(self.DB_NAME) cdb = couch.connectDatabase(self.DB_NAME) #for dq_t in test_data.demo_data: # cdb.queue(dq_t) cdb.commit() self.db = Database(dbname=self.DB_NAME) def test_init(self): #self.assertEqual(1,2) pass def test_save_and_delete(self): #Shoud document get revision number after save? #Document can not be saved and then deleted. Because save returns not a DQISResult object! #Tests document saving document = {"_id": "abc", "test":"data"} r = API.DQISResult(dqis_db = self.db, dict = document) all_docs_count_before = len(self.db.allDocs()['rows']) r.save() all_docs_count_after_insert = len(self.db.allDocs()['rows']) self.assertEqual(all_docs_count_before +1, all_docs_count_after_insert) #Test delete doc = self.db.document("abc") r = API.DQISResult(dict=doc, dqis_db = self.db) self.assertEqual(doc["test"], "data") r.delete() self.db.commitOne(r) all_docs_count_after_deleting = len(self.db.allDocs()['rows']) self.assertEqual(all_docs_count_before, all_docs_count_after_deleting ) def test_savable(self): #Does ID has to raise exception rez = API.DQISResult(dict = {'_id': "123"})._require_savable() self.assertEqual(rez, None) self.assertRaises(DQISResultNotSavable, API.DQISResult(dict = {'id': "123"})._require_savable ) self.assertRaises(DQISResultNotSavable, API.DQISResult(dict = {'abc': "123"})._require_savable ) def test_find_id(self): #similar to test_savable self.assertEqual(DQISResult()._find_id(), "") self.assertEqual(DQISResult(dict = {'id': "123"})._find_id(), "123") self.assertEqual(DQISResult(dict = {'_id': "123"})._find_id(), "123") def test_find_id(self): id1 = API.DQISResult()._find_id() id2 = API.DQISResult(dict = {'id': "123"})._find_id() id3 = API.DQISResult(dict = {'_id': "abc"})._find_id() self.assertEqual(id1, "") self.assertEqual(id2, '123') self.assertEqual(id3, 'abc') def test_require_saveable(self): dr1 = API.DQISResult()._require_savable #dr2 = API.DQISResult(dict = {'_id': "123"})._require_savable self.assertRaises(DQISResultNotSavable, dr1) #self.assertEqual(None, dr2()) def test_save_to_queue(self): r = DQISResult(dqis_db = Database(), dict = {"_id": "abc"}) queue_size_before = len(r.dqis_db._queue) r.saveToQueue() queue_size_after = len(r.dqis_db._queue) self.assertEqual(queue_size_before, 0) self.assertEqual(queue_size_after, 1) r.dqis_db._reset_queue() def test_require_db(self): f = DQISResult()._require_db_connection self.assertRaises(DatabaseNotSetException, f) f = DQISResult(dqis_db = "dqis_db")._require_db_connection self.assertRaises(DatabaseNotSetException, f) f = DQISResult(dqis_db = Database())._require_db_connection self.assertEqual(None, f()) def test_get_document(self): doc_id = '100215-0-38bc1d29bd22844103e86f9a000500e2' r = API.DQISResult(API.Database(dbname="dqis")) r['id'] = doc_id doc = r.get_document() self.assertEqual(doc.run, 100215) doc_id = '' r = DQISResult(Database(dbname="dqis")) r['id'] = doc_id fdoc = r.get_document fdoc() self.assertRaises(DQISResultNotSavable, fdoc) # because get and s
def main(): globalwq = Database('workloadsummary_testdisplay', 'https://dballesteros.iriscouch.com') x = globalwq.document("an_id") print x['performance']['/linacre_ACDC2_ReReco13JulHT_120723_102457_7693_120810_203338_8896/DataProcessing']['cmsRun1']
def main(): myDB = Database('workqueue_inbox', 'https://vocms169.cern.ch/couchdb') document = myDB.document(sys.argv[1]) inputs = document['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement']['Inputs'] for block in inputs: print ' "%s",' % block
def main(): parser = OptionParser() parser.add_option("-f", "--input-acdc", dest="acdcList") parser.add_option("-m", "--input-mapfile", dest="mapFile") parser.add_option("-u", "--url", dest="url") parser.add_option("-d", "--dry-run", dest="dryRun", action="store_true", default=False) parser.add_option("-l", "--log-file", dest="logFile") (options, _) = parser.parse_args() handle = open(options.logFile, 'w') url = options.url database = 'wmagent_acdc' acdcDB = Database(database, url) handle.write('Opening ACDC database in %s/%s\n' % (url, database)) inputACDC = readACDCInput(options.acdcList) usersMap = readUsersMap(options.mapFile) handle.write('Have %d workflows to fix\n' % len(inputACDC)) handle.write('=================================================================\n') for workflow in inputACDC: collection_name = workflow['collection_name'] fileset_name = workflow['fileset_name'] original_dn = workflow['original_dn'] handle.write('Original workflow: %s\n' % collection_name) handle.write('Original task: %s\n' % fileset_name) handle.write('Original owner DN: %s\n' % original_dn) if original_dn in usersMap: handle.write('This DN maps to %s-%s\n' % (usersMap[original_dn][1], usersMap[original_dn][0])) else: handle.write('The original DN can not be found in the map file, skipping the workflow\n') continue params = {'reduce' : False, 'key' : [usersMap[original_dn][1], usersMap[original_dn][0], collection_name, fileset_name]} result = acdcDB.loadView('ACDC', 'owner_coll_fileset_docs', params) rows = result['rows'] docIds = map(lambda x : x['id'], rows) handle.write('Found %d documents to change\n' % len(rows)) handle.write('Changing from %s-%s to %s-%s\n' % (usersMap[original_dn][1], usersMap[original_dn][0], workflow['group'], workflow['owner'])) for docId in docIds: doc = acdcDB.document(docId) doc['owner'] = {'group' : workflow['group'], 'user' : workflow['owner']} if not options.dryRun: acdcDB.queue(doc) if not options.dryRun: response = acdcDB.commit() else: response = 'This is a dry-run no changes were made' handle.write('Response to write operation: %s\n'% str(response)) handle.write('Response length: %d\n' % len(response)) handle.write('=================================================================\n') handle.write('Finished script') handle.close()
def main(): """ It will either delete docs in couchdb for the workflow you have provided or it will loop over the final (or almost final) states and ask for your permission to delete them. """ wfName = sys.argv[1] if len(sys.argv) == 2 else [] if 'WMAGENT_CONFIG' not in os.environ: os.environ['WMAGENT_CONFIG'] = '/data/srv/wmagent/current/config/wmagent/config.py' config = loadConfigurationFile(os.environ["WMAGENT_CONFIG"]) # Instantiating central services (couch stuff) # print "Central Couch URL : %s" % config.WorkloadSummary.couchurl # print "Central ReqMgr URL : %s\n" % config.AnalyticsDataCollector.centralRequestDBURL wfDBReader = RequestDBReader(config.AnalyticsDataCollector.centralRequestDBURL, couchapp = config.AnalyticsDataCollector.RequestCouchApp) # Central services wqBackend = WorkQueueBackend(config.WorkloadSummary.couchurl) wqInboxDB = Database('workqueue_inbox', config.WorkloadSummary.couchurl) # Local services localWQBackend = WorkQueueBackend(config.WorkQueueManager.couchurl, db_name = "workqueue_inbox") localWQInboxDB = Database('workqueue', config.WorkQueueManager.couchurl) statusList = ["failed", "epic-FAILED", "completed", "closed-out", "announced", "aborted", "aborted-completed", "rejected", "normal-archived", "aborted-archived", "rejected-archived"] for stat in final_status: # retrieve list of workflows in each status if not wfName: # options = {'include_docs': False} date_range = {'startkey': [2015,5,15,0,0,0], 'endkey': [2015,5,26,0,0,0]} # finalWfs = wfDBReader.getRequestByCouchView("bydate", options, date_range) tempWfs = wfDBReader.getRequestByCouchView("bydate", date_range) #print "Found %d wfs in status: %s" %(len(finalWfs), stat) finalWfs = [] for wf, content in tempWfs.iteritems(): if content['RequestStatus'] in statusList: finalWfs.append(wf) print "Found %d wfs in not in active state" % len(finalWfs) else: finalWfs = [wfName] tempWfs = wfDBReader.getRequestByNames(wfName, True) print "Checking %s with status '%s'." % (wfName, tempWfs[wfName]['RequestStatus']) wqDocs, wqInboxDocs = [], [] localWQDocs, localWQInboxDocs = [], [] for counter, wf in enumerate(finalWfs): if counter % 100 == 0: print "%d wfs queried ..." % counter # check whether there are workqueue docs wqDocIDs = wqBackend.getElements(WorkflowName = wf) if wqDocIDs: print "Found %d workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs wqDocs.append(wqDocIDs) # check whether there are workqueue_inbox docs if wqInboxDB.documentExists(wf): print "Found workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus']) # then retrieve the document wqInboxDoc = wqInboxDB.document(wf) wqInboxDocs.append(wqInboxDoc) # check local queue wqDocIDs = localWQBackend.getElements(WorkflowName = wf) if wqDocIDs: print "Found %d local workqueue docs for %s, status %s" % (len(wqDocIDs), wf, tempWfs[wf]['RequestStatus']) print wqDocIDs localWQDocs.append(wqDocIDs) if localWQInboxDB.documentExists(wf): print "Found local workqueue_inbox doc for %s, status %s" % (wf, tempWfs[wf]['RequestStatus']) wqInboxDoc = localWQInboxDB.document(wf) print wqInboxDoc localWQInboxDocs.append(wqInboxDoc) # TODO TODO TODO for the moment only deletes for a specific workflow if wfName: var = raw_input("\nCan we delete all these documents (Y/N)? ") if var == "Y": # deletes workqueue_inbox doc if wqInboxDoc: print "Deleting workqueue_inbox id %s and %s" % (wqInboxDoc['_id'], wqInboxDoc['_rev']) wqInboxDB.delete_doc(wqInboxDoc['_id'], wqInboxDoc['_rev']) # deletes workqueue docs if wqDocIDs: print "Deleting workqueue docs %s" % wqDocIDs wqBackend.deleteElements(*[x for x in wqDocIDs if x['RequestName'] in wfName]) else: print "You are the boss, aborting it ...\n"
def swapLocations(options): #Initialize stuff phedexAPI = PhEDEx({'cachepath' : options.cachepath}) acdcCouch = Database('wmagent_acdc', options.acdcUrl) #Let's get the IDs of the ACDC documents for the task/request/group/user array = [options.group, options.user, options.request, options.task] result = acdcCouch.loadView('ACDC', 'owner_coll_fileset_docs', {'reduce' : False}, [array]) documentsIDs = [x['id'] for x in result['rows']] #Load the map file saying what we want to change of location mapFile = open(options.map, 'r') locationMap = json.load(mapFile) mapFile.close() #Go through the documents for docID in documentsIDs: doc = acdcCouch.document(docID) #Are we going to change this doc? Better back it up if options.change: backupFile = os.open(os.path.join(options.backup, "%s.bkp" % doc["_id"]), 'w') json.dump(doc, backupFile) backupFile.close() #Go through the files files = doc["files"] for inputFile in files: #Use PhEDEx API to get site based on the SE #Then map that to the desired target se = files[inputFile]["locations"][0] siteLocation = phedexAPI.getBestNodeName(se) targetLocation = locationMap.get(siteLocation, siteLocation) if siteLocation == targetLocation: #Nothing to do with this one, move on continue if not options.change: #No changes, then give the commands to move the files #Get the PFN for both the current location and the target location pfnDict = phedexAPI.getPFN(siteLocation, inputFile) inputPfn = pfnDict[(siteLocation, inputFile)] pfnDict = phedexAPI.getPFN(targetLocation, inputFile) targetPfn = pfnDict[(targetLocation, inputFile)] #Print it to stdout print "lcg-cp -D srmv2 -b %s %s" % (inputPfn, targetPfn) else: #This is changes time, let's move the stuff targetSE = phedexAPI.getNodeSE(targetLocation) files[inputFile]["locations"][0] = targetSE print "Changing location of %s from %s to %s" % (inputFile, se, targetSE) #If specified, commit the changes if options.change: acdcCouch.commitOne(doc) return 0
class DiscreteSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") self.histogramDB = Database(dbname="histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of discrete data. Check that the statistic properties in the histogram are accurate, and that this can become a document an uploaded to couch """ # Try and empty one histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') histogramJSON = histogram.toJSON() self.assertEqual(histogramJSON["title"], "SomeTitle") self.assertEqual(histogramJSON["xLabel"], "Categories") self.assertFalse(histogramJSON["continuous"]) self.assertEqual(len(histogramJSON["data"]), 0) self.assertEqual(histogramJSON["average"], {}) self.assertEqual(histogramJSON["stdDev"], {}) histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') for _ in range(5): histogram.addPoint("CategoryA", "FeatureA") histogram.addPoint("CategoryB", "FeatureB") for _ in range(17): histogram.addPoint("CategoryA", "FeatureB") histogram.addPoint("CategoryC", "FeatureB") for _ in range(3): histogram.addPoint("CategoryC", "FeatureA") jsonHistogram = histogram.toJSON() # Average/stdDev per feature: # FeatureA: avg = 2.7 stdev = 2.05 # FeatureB: avg = 13 stdev = 5.66 self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"], 2.7, places=1) self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"], 13, places=1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"], 2.05, places=1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"], 5.66, places=1) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17) # Test couch # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("SomeTitle") self.assertEqual(len(storedJSON["data"]), 3) return
def main(): if len(sys.argv) < 2: print("Missing the connect Oracle TNS argument (user/password@server).") sys.exit(1) tns = sys.argv[1] print("Creating CouchDB database connection ...") couchdb = Database(couchdb_name, couch_url) print("Creating Oracle database connection ...") oradb = cx_Oracle.Connection(tns) num_couch_requests = get_couchdb_row_count(couchdb) print("Total CouchDB request documents in ReqMgr: %s" % num_couch_requests) num_oracle_requests = get_oracle_row_count(oradb, "reqmgr_request") print("Total Oracle requests entries in ReqMgr: %s" % num_oracle_requests) if num_couch_requests != num_oracle_requests: print("Number of requests in Oracle, CouchDB don't agree, fix that first.") sys.exit(1) else: print("Database cross-check (Oracle request names vs CouchDB): DONE, THE SAME.") def get_couch_value(couch_req, mapping): try: c = couch_req[mapping["couch"]] couch_missing = False except KeyError: # comparison will not happen due to missing flag, the value # will be stored in couch c = "N/A" couch_missing = False return str(c), couch_missing def check_oracle_worflow_value(oracle_value, mapping, req_name): # check Oracle WORKFLOW value if mapping["oracle"] == "WORKFLOW": # https://cmsweb.cern.ch/couchdb/reqmgr_workload_cache/linacre_2011A_442p2_DataReprocessingMuOnia_111119_005717/spec from_wf_url_req_name = oracle_value.rsplit('/', 2)[-2] if req_name != from_wf_url_req_name: print("Workflow URL mismatch: %s" % o) sys.exit(1) counter = 0 for oracle_req in get_oracle_data(oradb): req_name = oracle_req["REQUEST_NAME"] # FILTER # check only requests injected approx. after last deployment (a lot of # stuff should have already been fixed in ReqMgr) # _13041._*$ (ending of request name with date/time) #if not re.match(".*_1304[0-3][0-9]_.*$", req_name): # all April 2013 # continue counter += 1 print("\n\n%s (%s)" % (req_name, counter)) couch_req = couchdb.document(req_name) couch_fields_to_correct = {} for mapping in MAPPING: if mapping["couch"] in COUCH_TO_IGNORE: continue o = str(oracle_req[mapping["oracle"]]) c, couch_missing = get_couch_value(couch_req, mapping) check_oracle_worflow_value(o, mapping, req_name) # compare oracle and couch values # don't update value in couch if it exists and is non-empty if (couch_missing or o != c) and c in ('None', '0', '', "N/A"): print("%s %s != %s" % (mapping, o, c)) # correct couch request by oracle value couch_fields_to_correct[mapping["couch"]] = o if couch_fields_to_correct: print("Couch corrected fields:") print(couch_fields_to_correct) if sys.argv[-1] == "-c": couchdb.updateDocument(req_name, "ReqMgr", "updaterequest", fields=couch_fields_to_correct, useBody=True) print("Couch updated") else: print("OK") # fields that should be removed from couch """
class DiscreteSummaryHistogramTest(unittest.TestCase): def setUp(self): """ _setUp_ Setup a couch database for testing of produced JSON """ self.testInit = TestInit(__file__) self.testInit.setLogging() self.testInit.setupCouch("histogram_dump_t") self.histogramDB = Database(dbname = "histogram_dump_t") def tearDown(self): """ _tearDown_ Clean the couch """ self.testInit.tearDownCouch() def testA_BasicTest(self): """ _testA_BasicTest_ Build a histogram from a set of discrete data. Check that the statistic properties in the histogram are accurate, and that this can become a document an uploaded to couch """ # Try and empty one histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') histogramJSON = histogram.toJSON() self.assertEqual(histogramJSON["title"], "SomeTitle") self.assertEqual(histogramJSON["xLabel"], "Categories") self.assertFalse(histogramJSON["continuous"]) self.assertEqual(len(histogramJSON["data"]), 0) self.assertEqual(histogramJSON["average"], {}) self.assertEqual(histogramJSON["stdDev"], {}) histogram = DiscreteSummaryHistogram('SomeTitle', 'Categories') for _ in range(5): histogram.addPoint("CategoryA", "FeatureA") histogram.addPoint("CategoryB", "FeatureB") for _ in range(17): histogram.addPoint("CategoryA", "FeatureB") histogram.addPoint("CategoryC", "FeatureB") for _ in range(3): histogram.addPoint("CategoryC", "FeatureA") jsonHistogram = histogram.toJSON() # Average/stdDev per feature: # FeatureA: avg = 2.7 stdev = 2.05 # FeatureB: avg = 13 stdev = 5.66 self.assertAlmostEqual(jsonHistogram["average"]["FeatureA"], 2.7, places = 1) self.assertAlmostEqual(jsonHistogram["average"]["FeatureB"], 13, places = 1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureA"], 2.05, places = 1) self.assertAlmostEqual(jsonHistogram["stdDev"]["FeatureB"], 5.66, places = 1) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureA"], 5) self.assertEqual(jsonHistogram["data"]["CategoryA"]["FeatureB"], 17) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureA"], 0) self.assertEqual(jsonHistogram["data"]["CategoryB"]["FeatureB"], 5) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureA"], 3) self.assertEqual(jsonHistogram["data"]["CategoryC"]["FeatureB"], 17) # Test couch # Try to commit it to couch jsonHistogram["_id"] = jsonHistogram["title"] self.histogramDB.commitOne(jsonHistogram) storedJSON = self.histogramDB.document("SomeTitle") self.assertEqual(len(storedJSON["data"]), 3) return