def __init__(self, couchDB, id=None, elementParams=None): elementParams = elementParams or {} WorkQueueElement.__init__(self, **elementParams) if id: self._id = id self._document = Document(id=id) self._couch = couchDB
def __init__(self, workloadId, url, database, workload = None): Document.__init__(self, workloadId) self.database = database self.url = url self.server = None self.couchdb = None if workload != None: self.update(workload.generateWorkloadSummary())
def __init__(self, run, lumi, dataset, user, bfield=0, initial_map = {}): ''' Instantiate the Couch document and set the appropriate values ''' CouchDocument.__init__(self, id=self._generate_id(run, lumi, dataset)) self.setdefault('run', run) self.setdefault('lumi', lumi) self.setdefault('dataset', dataset.strip('/').split('/')) self.setdefault('bfield', bfield) self.setdefault('map_history', []) self.setdefault('map', initial_map) self['map']['_meta'] = {'user': user, 'timestamp': str(datetime.now())}
def __init__(self, dbURL, couchDBName=None, id=None, rev=None, usePYCurl=True, ckey=None, cert=None, capath=None, detail=True): super(ConfigCache, self).__init__() self.dbname = couchDBName self.dburl = dbURL self.detail = detail try: self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname) except Exception as ex: msg = "Error connecting to couch: %s\n" % str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message=msg) # local cache self.docs_cache = DocumentCache(self.database, self.detail) # UserGroup variables self.group = None self.owner = None # Internal data structure self.document = Document() self.attachments = {} self.document['type'] = "config" self.document['description'] = {} self.document['description']['config_label'] = None self.document['description']['config_desc'] = None if id != None: self.document['_id'] = id self.document['pset_tweak_details'] = None self.document['info'] = None self.document['config'] = None return
def recordTaskActivity(self, taskname, comment=''): """Record a task for monitoring""" try: record = self.db.document('task_activity') except CouchNotFoundError: record = Document('task_activity') record.setdefault('tasks', {}) record['tasks'].setdefault(taskname, {}) record['tasks'][taskname]['timestamp'] = time.time() record['tasks'][taskname]['comment'] = comment try: self.db.commitOne(record) except Exception as ex: self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex)))
def testAllDocs(self): """ Test AllDocs with options """ self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456})) self.db.queue(Document(id = "2", inputDict = {'foo':123, 'bar':456})) self.db.queue(Document(id = "3", inputDict = {'foo':123, 'bar':456})) self.db.commit() self.assertEqual(3, len(self.db.allDocs()['rows'])) self.assertEqual(2, len(self.db.allDocs({'startkey': "2"})['rows'])) self.assertEqual(2, len(self.db.allDocs(keys = ["1", "3"])['rows'])) self.assertEqual(1, len(self.db.allDocs({'limit':1}, ["1", "3"])['rows'])) self.assertTrue('error' in self.db.allDocs(keys = ["1", "4"])['rows'][1])
def put(self, subName=None): """ Update document for the given self.name and subName. It assumes the client has provided the entire entity, i.e., the old content gets completely replaced by the new one. Given that the each couch document contains a revision number, these PUT calls are not going to be idempotent. """ data = cherrypy.request.body.read() if not data: raise MissingPostData() else: propertyDict = json.loads(data) result = None if subName: docName = "%s_%s" % (self.name, subName) else: docName = self.name try: existDoc = self.reqmgr_aux_db.document(docName) # replace original document newDoc = Document(existDoc['_id'], inputDict={'_rev': existDoc['_rev'], 'ConfigType': existDoc['ConfigType']}) newDoc.update(propertyDict) result = self.reqmgr_aux_db.commitOne(newDoc) except CouchNotFoundError: cherrypy.log("Document %s not found. Creating one." % docName) doc = Document(docName, propertyDict) doc.update({'ConfigType': self.name}) result = self.reqmgr_aux_db.commitOne(doc) return result
def recordTaskActivity(self, taskname, comment=""): """Record a task for monitoring""" try: record = self.db.document("task_activity") except CouchNotFoundError: record = Document("task_activity") record.setdefault("tasks", {}) record["tasks"].setdefault(taskname, {}) record["tasks"][taskname]["timestamp"] = time.time() record["tasks"][taskname]["comment"] = comment try: self.db.commitOne(record) except Exception as ex: self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex)))
def recordTaskActivity(self, taskname, comment = ''): """Record a task for monitoring""" try: record = self.db.document('task_activity') except CouchNotFoundError: record = Document('task_activity') record.setdefault('tasks', {}) record['tasks'].setdefault(taskname, {}) record['tasks'][taskname]['timestamp'] = time.time() record['tasks'][taskname]['comment'] = comment try: self.db.commitOne(record) except StandardError, ex: self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex)))
def injectReDigiConfigs(self, combinedStepOne = False): """ _injectReDigiConfigs_ Create bogus config cache documents for the various steps of the ReDigi workflow. Return the IDs of the documents. """ stepOneConfig = Document() stepOneConfig["info"] = None stepOneConfig["config"] = None stepOneConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" stepOneConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" stepOneConfig["owner"] = {"group": "cmsdataops", "user": "******"} if combinedStepOne: stepOneConfig["pset_tweak_details"] ={"process": {"outputModules_": ["RECODEBUGoutput", "DQMoutput"], "RECODEBUGoutput": {"dataset": {"filterName": "", "dataTier": "RECO-DEBUG-OUTPUT"}}, "DQMoutput": {"dataset": {"filterName": "", "dataTier": "DQM"}}}} else: stepOneConfig["pset_tweak_details"] ={"process": {"outputModules_": ["RAWDEBUGoutput"], "RAWDEBUGoutput": {"dataset": {"filterName": "", "dataTier": "RAW-DEBUG-OUTPUT"}}}} stepTwoConfig = Document() stepTwoConfig["info"] = None stepTwoConfig["config"] = None stepTwoConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" stepTwoConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" stepTwoConfig["owner"] = {"group": "cmsdataops", "user": "******"} stepTwoConfig["pset_tweak_details"] ={"process": {"outputModules_": ["RECODEBUGoutput", "DQMoutput"], "RECODEBUGoutput": {"dataset": {"filterName": "", "dataTier": "RECO-DEBUG-OUTPUT"}}, "DQMoutput": {"dataset": {"filterName": "", "dataTier": "DQM"}}}} stepThreeConfig = Document() stepThreeConfig["info"] = None stepThreeConfig["config"] = None stepThreeConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" stepThreeConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" stepThreeConfig["owner"] = {"group": "cmsdataops", "user": "******"} stepThreeConfig["pset_tweak_details"] ={"process": {"outputModules_": ["aodOutputModule"], "aodOutputModule": {"dataset": {"filterName": "", "dataTier": "AODSIM"}}}} stepOne = self.configDatabase.commitOne(stepOneConfig)[0]["id"] stepTwo = self.configDatabase.commitOne(stepTwoConfig)[0]["id"] stepThree = self.configDatabase.commitOne(stepThreeConfig)[0]["id"] return (stepOne, stepTwo, stepThree)
def injectStepChainConfigSingle(couchDatabase): """ _injectStepChainConfigSingle_ Create a single config """ miniConfig = Document() miniConfig["info"] = None miniConfig["config"] = None miniConfig["md5hash"] = "9bdc3d7b2fc90e0f4ca24e270a467ac3" miniConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10876a7" miniConfig["owner"] = {"group": "DATAOPS", "user": "******"} miniConfig["pset_tweak_details"] = { "process": { "outputModules_": ["MINIAODSIMoutput"], "MINIAODSIMoutput": { "dataset": { "filterName": "", "dataTier": "MINIAODSIM" } } } } result = couchDatabase.commitOne(miniConfig) return result[0]["id"]
def injectSkimConfig(self): """ _injectSkimConfig_ Create a bogus config cache document for the skims and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = { "process": { "outputModules_": ["SkimA", "SkimB"], "SkimA": { "dataset": { "filterName": "SkimAFilter", "dataTier": "RAW-RECO" } }, "SkimB": { "dataset": { "filterName": "SkimBFilter", "dataTier": "USER" } } } } result = self.configDatabase.commitOne(newConfig) return result[0]["id"]
def testCommitOne(self): # Can I commit one dict doc = {'foo': 123, 'bar': 456} id = self.db.commitOne(doc, returndocs=True)[0]['id'] # What about a Document doc = Document(inputDict=doc) id = self.db.commitOne(doc, returndocs=True)[0]['id']
def injectAnalysisConfig(self): """ Create a bogus config cache document for the analysis workflow and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["pset_hash"] = "21cb400c6ad63c3a97fa93f8e8785127" newConfig["owner"] = {"group": "Analysis", "user": "******"} newConfig["pset_tweak_details"] = { "process": { "outputModules_": ["OutputA", "OutputB"], "OutputA": { "dataset": { "filterName": "OutputAFilter", "dataTier": "RECO" } }, "OutputB": { "dataset": { "filterName": "OutputBFilter", "dataTier": "USER" } } } } result = self.configDatabase.commitOne(newConfig) return result[0]["id"]
def testA(self): """ make some documents and own them""" guInt = Interface(self.testInit.couchUrl, self.testInit.couchDbName) #create a couple of docs couch = Database(self.testInit.couchDbName, self.testInit.couchUrl) for x in range(10): doc = Document("document%s" % x, {"Test Data": [1,2,3,4] }) couch.queue(doc) couch.commit() self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 0) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0) guInt.callUpdate("ownthis","document1", group = self.owner1.group.name, user = self.owner1.name) self.assertTrue("document1" in guInt.documentsOwned(self.owner1.group.name, self.owner1.name)) self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0) guInt.callUpdate("ownthis","document2", group = self.owner2.group.name, user = self.owner2.name) self.assertTrue("document2" in guInt.documentsOwned(self.owner2.group.name, self.owner2.name)) self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1) self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 1) guInt.callUpdate("newgroup", "group-DataOps", group = "DataOps") self.assertTrue(couch.documentExists("group-DataOps") ) guInt.callUpdate("newuser", "user-damason", group = "DataOps", user = "******") self.assertTrue(couch.documentExists("user-damason") )
def makeGeneratorConfig(couchDatabase): """ _makeGeneratorConfig_ Create a bogus config cache document for the montecarlo generation and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = { "process": { "outputModules_": ["writeGENSIM"], "writeGENSIM": { "dataset": { "filterName": "GenSimFilter", "dataTier": "GEN-SIM" } } } } result = couchDatabase.commitOne(newConfig) return result[0]["id"]
def injectAnalysisConfig(self): """ Create a bogus config cache document for the analysis workflow and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["pset_hash"] = "21cb400c6ad63c3a97fa93f8e8785127" newConfig["owner"] = {"group": "Analysis", "user": "******"} newConfig["pset_tweak_details"] = { "process": { "maxEvents": { "parameters_": ["input"], "input": 10 }, "outputModules_": ["output"], "parameters_": ["outputModules_"], "source": { "parameters_": ["fileNames"], "fileNames": [] }, "output": { "parameters_": ["fileName"], "fileName": "outfile.root" }, "options": { "parameters_": ["wantSummary"], "wantSummary": True } } } result = self.configDatabase.commitOne(newConfig) return result[0]["id"]
def testA(self): """instantiate""" document = Document() document[u'pset_tweak_details'] = {} document[u'pset_tweak_details'][u'process'] = {} document[u'pset_tweak_details'][u'process'][ u'RandomNumberGeneratorService'] = {} document[u'pset_tweak_details'][u'process'][ u'RandomNumberGeneratorService'][u'seed1'] = {} document[u'pset_tweak_details'][u'process'][ u'RandomNumberGeneratorService'][u'seed2'] = {} document[u'pset_tweak_details'][u'process'][ u'RandomNumberGeneratorService'][u'seed3'] = {} document = self.database.commitOne(document)[0] seeder = ReproducibleSeeding(CouchUrl=self.testInit.couchUrl, CouchDBName=self.testInit.couchDbName, ConfigCacheDoc=document[u'id']) job = Job("testjob") seeder(job) baggage = job.getBaggage() seed1 = getattr(baggage.process.RandomNumberGeneratorService, "seed1", None) self.assertTrue(seed1 != None)
def injectReRecoConfig(self): """ _injectReRecoConfig_ Inject a ReReco config document that we can use to set the outputModules """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7" newConfig["owner"] = {"group": "cmsdataops", "user": "******"} newConfig["pset_tweak_details"] = { "process": { "outputModules_": ['RECOoutput', 'DQMoutput'], "RECOoutput": { 'dataset': { 'filterName': 'RECOoutputFilter', 'dataTier': 'RECO' } }, "DQMoutput": { 'dataset': { 'filterName': 'DQMoutputFilter', 'dataTier': 'DQM' } } } } result = self.configDatabase.commitOne(newConfig) return result[0]["id"]
def __init__(self, couchDB, id = None, elementParams = None): elementParams = elementParams or {} WorkQueueElement.__init__(self, **elementParams) if id: self._id = id self._document = Document(id = id) self._couch = couchDB
def put(self, team_name): """ Adds team of team_name into the database. Creates teams document if it doesn't exist. """ try: teams = self.reqmgr_aux_db.document("teams") except CouchNotFoundError as ex: msg = ("ERROR: Retrieving teams document failed, reason: %s" " Creating the document ..." % ex) cherrypy.log(msg) try: doc = Document(id="teams", inputDict={team_name: None}) self.reqmgr_aux_db.commitOne(doc) return except CouchError as ex: msg = "ERROR: Creating document teams failed, reason: %s" % ex cherrypy.log(msg) raise cherrypy.HTTPError(400, msg) if team_name in teams: return rows(["Already exists."]) else: teams[team_name] = None # TODO # this should ideally also wrap try-except self.reqmgr_aux_db.commitOne(teams) return rows(["OK"])
def testCommit(self): """ Test queue and commit modes """ # try to commit 2 random docs doc = {'foo': 123, 'bar': 456} self.db.queue(doc) self.db.queue(doc) self.assertEqual(2, len(self.db.commit())) # committing 2 docs with the same id will fail self.db.queue(Document(id="1", inputDict={'foo': 123, 'bar': 456})) self.db.queue(Document(id="1", inputDict={'foo': 1234, 'bar': 456})) answer = self.db.commit() self.assertEqual(2, len(answer)) self.assertEqual(answer[0]['error'], 'conflict') self.assertEqual(answer[1]['error'], 'conflict') # all_or_nothing mode ignores conflicts self.db.queue(Document(id="2", inputDict=doc)) self.db.queue(Document(id="2", inputDict={'foo': 1234, 'bar': 456})) answer = self.db.commit(all_or_nothing=True) self.assertEqual(2, len(answer)) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0].get('error'), None) self.assertEqual(answer[0]['id'], '2') self.assertEqual(answer[1]['id'], '2') # callbacks can do stuff when conflicts arise # this particular one just overwrites the document def callback(db, data, result): for doc in data['docs']: if doc['_id'] == result['id']: doc['_rev'] = db.document(doc['_id'])['_rev'] retval = db.commitOne(doc) return retval[0] self.db.queue(Document(id="2", inputDict={'foo': 5, 'bar': 6})) answer = self.db.commit(callback=callback) self.assertEqual(1, len(answer)) self.assertEqual(answer[0].get('error'), None) updatedDoc = self.db.document('2') self.assertEqual(updatedDoc['foo'], 5) self.assertEqual(updatedDoc['bar'], 6) return
def testDocumentSerialisation(self): """ A document should be writable into the couchdb with a timestamp. """ d = Document() d['foo'] = 'bar' doc_info = self.db.commit(doc=d, timestamp=True)[0] d_from_db = self.db.document(doc_info['id']) self.assertEqual(d['foo'], d_from_db['foo']) self.assertEqual(d['timestamp'], d_from_db['timestamp'])
def testUpdateBulkDocuments(self): """ Test AllDocs with options """ self.db.queue(Document(id="1", inputDict={'foo':123, 'bar':456})) self.db.queue(Document(id="2", inputDict={'foo':123, 'bar':456})) self.db.queue(Document(id="3", inputDict={'foo':123, 'bar':456})) self.db.commit() self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 333}, 2) result = self.db.allDocs({"include_docs": True})['rows'] self.assertEqual(3, len(result)) for item in result: self.assertEqual(333, item['doc']['foo']) self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 222}, 10) result = self.db.allDocs({"include_docs": True})['rows'] self.assertEqual(3, len(result)) for item in result: self.assertEqual(222, item['doc']['foo'])
def post(self): """ post sofware version doucment """ data = cherrypy.request.body.read() if not data: raise MissingPostData() else: doc = json.loads(data) doc = Document(self.name, doc) result = self.reqmgr_aux_db.commitOne(doc) return result
def testB(self): """test owning some sample documents""" u1 = User(name = "evansde77") g1 = Group(name = "DMWM", administrators = ["evansde77", "drsm79"]) g1.setCouch(self.url, self.database) g1.connect() u1.setGroup(g1) u1.create() doc1 = Document() doc1['test-data'] = {"key1" : "value1"} doc2 = Document() doc2['test-data'] = {"key2" : "value2"} id1 = g1.couch.commitOne(doc1)[0] id2 = g1.couch.commitOne(doc2)[0] doc1['_id'] = id1[u'id'] doc1['_rev'] = id1[u'rev'] doc2['_id'] = id2[u'id'] doc2['_rev'] = id2[u'rev'] u1.ownThis(doc1) u1.ownThis(doc2) self.failUnless(doc1.has_key("owner")) self.failUnless(doc2.has_key("owner")) self.failUnless(doc1['owner'].has_key('user')) self.failUnless(doc1['owner'].has_key('group')) self.failUnless(doc1['owner']['user'] == u1['name']) self.failUnless(doc1['owner']['group'] == u1['group']) self.failUnless(doc2['owner'].has_key('user')) self.failUnless(doc2['owner'].has_key('group')) self.failUnless(doc2['owner']['user'] == u1['name']) self.failUnless(doc2['owner']['group'] == u1['group']) g1.couch.delete_doc(id1[u'id']) g1.couch.delete_doc(id2[u'id']) u1.drop() g1.drop()
def makePromptSkimConfigs(couchDatabase): """ Fake a prompt skim config in ConfigCache for Tier0 test """ skimsConfig = Document() skimsConfig["info"] = None skimsConfig["config"] = None skimsConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5cab2755" skimsConfig["pset_hash"] = "7c856ad35f9f544839d8524ca5372888" skimsConfig["owner"] = {"group": "cmsdataops", "user": "******"} skimsConfig["pset_tweak_details"] = { "process": { "outputModules_": [ "writeSkim1", "writeSkim2", "writeSkim3", "writeSkim4", "writeSkim5" ], "writeSkim1": { "dataset": { "dataTier": "RECO-AOD", "filterName": "skim1" } }, "writeSkim2": { "dataset": { "dataTier": "RECO-AOD", "filterName": "skim2" } }, "writeSkim3": { "dataset": { "dataTier": "RECO-AOD", "filterName": "skim3" } }, "writeSkim4": { "dataset": { "dataTier": "RECO-AOD", "filterName": "skim4" } }, "writeSkim5": { "dataset": { "dataTier": "RECO-AOD", "filterName": "skim5" } }, } } couchDatabase.queue(skimsConfig) result = couchDatabase.commit() docMap = {"Skims": result[0][u'id']} return docMap
def send(self, alerts): """ Handle list of alerts. """ retVals = [] for a in alerts: doc = Document(None, a) retVal = self.database.commitOne(doc) retVals.append(retVal) logging.debug("Stored %s alerts to CouchDB, retVals: %s" % (len(alerts), retVals)) return retVals
def testB(self): """test owning some sample documents""" u1 = User(name="evansde77") g1 = Group(name="DMWM", administrators=["evansde77", "drsm79"]) g1.setCouch(self.url, self.database) g1.connect() u1.setGroup(g1) u1.create() doc1 = Document() doc1['test-data'] = {"key1": "value1"} doc2 = Document() doc2['test-data'] = {"key2": "value2"} id1 = g1.couch.commitOne(doc1)[0] id2 = g1.couch.commitOne(doc2)[0] doc1['_id'] = id1[u'id'] doc1['_rev'] = id1[u'rev'] doc2['_id'] = id2[u'id'] doc2['_rev'] = id2[u'rev'] u1.ownThis(doc1) u1.ownThis(doc2) self.failUnless("owner" in doc1) self.failUnless("owner" in doc2) self.failUnless('user' in doc1['owner']) self.failUnless('group' in doc1['owner']) self.failUnless(doc1['owner']['user'] == u1['name']) self.failUnless(doc1['owner']['group'] == u1['group']) self.failUnless('user' in doc2['owner']) self.failUnless('group' in doc2['owner']) self.failUnless(doc2['owner']['user'] == u1['name']) self.failUnless(doc2['owner']['group'] == u1['group']) g1.couch.delete_doc(id1[u'id']) g1.couch.delete_doc(id2[u'id']) u1.drop() g1.drop()
def update_software(config_file): """ Functions retrieves CMSSW versions and scramarchs from CMS tag collector. """ config = loadConfigurationFile(config_file) # source of the data tag_collector_url = config.views.data.tag_collector_url # store the data into CouchDB auxiliary database under "software" document couch_host = config.views.data.couch_host reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db # get data from tag collector all_archs_and_versions = _get_all_scramarchs_and_versions( tag_collector_url) if not all_archs_and_versions: return # get data already stored in CouchDB couchdb = Database(dbname=reqmgr_aux_db, url=couch_host) try: sw_already_stored = couchdb.document("software") del sw_already_stored["_id"] del sw_already_stored["_rev"] except CouchNotFoundError: logging.error("Document id software, does not exist, creating it ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) return # now compare recent data from tag collector and what we already have stored # sorting is necessary if sorted(all_archs_and_versions) != sorted(sw_already_stored): logging.debug( "ScramArch/CMSSW releases changed, updating software document ...") doc = Document(id="software", inputDict=all_archs_and_versions) couchdb.commitOne(doc) """
def testCommitOneWithQueue(self): """ CommitOne bypasses the queue, but it should maintain the queue if present for a future call to commit. """ # Queue up five docs doc = {'foo': 123, 'bar': 456} for i in range(1, 6): self.db.queue(doc) # Commit one Document doc = Document(inputDict=doc) id = self.db.commitOne(doc, returndocs=True)[0]['id'] self.assertEqual(1, len(self.db.allDocs()['rows'])) self.db.commit() self.assertEqual(6, len(self.db.allDocs()['rows']))
def send(self, alerts): """ Send a list of alerts to a REST server. """ for a in alerts: doc = Document(None, a) self._database.queue(doc) # two options here: either to call commit on the couch myself # or leave the alerts buffered in the Database queue which means # the .commit() would be called automatically if size is exceeded # 1st option: retVal = self._database.commit() logging.debug("Stored %s alerts to REST resource, retVals: %s" % (len(alerts), retVal)) return retVal
def injectDQMHarvestConfig(self): """ _injectDQMHarvest_ Create a bogus config cache document for DQMHarvest and inject it into couch. Return the ID of the document. """ newConfig = Document() newConfig["info"] = None newConfig["config"] = None newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e234f" newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10876a7" newConfig["owner"] = {"group": "DATAOPS", "user": "******"} newConfig["pset_tweak_details"] = {"process": {"outputModules_": []}} result = self.configDatabase.commitOne(newConfig) return result[0]["id"]
def testUpdateHandlerAndBulkUpdateProfile(self): """ Test that update function support works """ # for actual test increase the size value: For 10000 records, 96 sec vs 4 sec size = 100 for i in range(size): self.db.queue( Document(id="%s" % i, inputDict={ 'name': 123, 'counter': 0 })) update_doc = { '_id': '_design/foo', 'language': 'javascript', 'updates': { "change-counter": """function(doc, req) { if (doc) { var data = JSON.parse(req.body); for (var field in data) {doc.field = data.field;} return [doc, 'OK'];}}""", } } self.db.commit(update_doc) start = time.time() for id in range(size): doc_id = "%s" % id self.db.updateDocument(doc_id, 'foo', 'change-counter', {'counter': 1}, useBody=True) end = time.time() print("update handler: %s sec" % (end - start)) start = time.time() ids = [] for id in range(size): doc_id = "%s" % id ids.append(doc_id) self.db.updateBulkDocumentsWithConflictHandle(ids, {'counter': 2}, 1000) end = time.time() print("bulk update: %s sec" % (end - start))
def post(self, subName=None): """ Inserts a new document into the database """ data = cherrypy.request.body.read() if not data: raise MissingPostData() else: doc = json.loads(data) if subName: docName = "%s_%s" % (self.name, subName) else: docName = self.name doc["ConfigType"] = self.name doc = Document(docName, doc) result = self.reqmgr_aux_db.commitOne(doc) return result
def post(self, subName=None): """ If the document already exists, replace it with a new one. """ data = cherrypy.request.body.read() if not data: raise MissingPostData() else: doc = json.loads(data) if subName: docName = "%s_%s" % (self.name, subName) else: docName = self.name doc["ConfigType"] = self.name doc = Document(docName, doc) result = self.reqmgr_aux_db.commitOne(doc) return result
def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False, ckey = None, cert = None, capath = None, detail = True): super(ConfigCache, self).__init__() self.dbname = couchDBName self.dburl = dbURL self.detail = detail try: self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname) except Exception as ex: msg = "Error connecting to couch: %s\n" % str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) # local cache self.docs_cache = DocumentCache(self.database, self.detail) # UserGroup variables self.group = None self.owner = None # Internal data structure self.document = Document() self.attachments = {} self.document['type'] = "config" self.document['description'] = {} self.document['description']['config_label'] = None self.document['description']['config_desc'] = None if id != None: self.document['_id'] = id self.document['pset_tweak_details'] = None self.document['info'] = None self.document['config'] = None return
class ConfigCache(WMObject): """ _ConfigCache_ The class that handles the upload and download of configCache artifacts from Couch """ def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False, ckey = None, cert = None, capath = None, detail = True): self.dbname = couchDBName self.dburl = dbURL self.detail = detail try: self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath) if self.dbname not in self.couchdb.listDatabases(): self.createDatabase() self.database = self.couchdb.connectDatabase(self.dbname) except Exception as ex: msg = "Error connecting to couch: %s\n" % str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) # local cache self.docs_cache = DocumentCache(self.database, self.detail) # UserGroup variables self.group = None self.owner = None # Internal data structure self.document = Document() self.attachments = {} self.document['type'] = "config" self.document['description'] = {} self.document['description']['config_label'] = None self.document['description']['config_desc'] = None if id != None: self.document['_id'] = id self.document['pset_tweak_details'] = None self.document['info'] = None self.document['config'] = None return def createDatabase(self): """ _createDatabase_ """ database = self.couchdb.createDatabase(self.dbname) database.commit() return database def connectUserGroup(self, groupname, username): """ _connectUserGroup_ """ self.group = Group(name = groupname) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.owner = makeUser(groupname, username, couchUrl = self.dburl, couchDatabase = self.dbname) return def createUserGroup(self, groupname, username): """ _createUserGroup_ Create all the userGroup information """ self.createGroup(name = groupname) self.createUser(username = username) return def createGroup(self, name): """ _createGroup_ Create Group for GroupUser """ self.group = Group(name = name) self.group.setCouch(self.dburl, self.dbname) self.group.connect() self.group.create() return def setLabel(self, label): """ _setLabel_ Util to add a descriptive label to the configuration doc """ self.document['description']['config_label'] = label def setDescription(self, desc): """ _setDescription_ Util to add a verbose description string to a configuration doc """ self.document['description']['config_desc'] = desc @Decorators.requireGroup def createUser(self, username): self.owner = makeUser(self.group['name'], username, couchUrl = self.dburl, couchDatabase = self.dbname) self.owner.create() self.owner.ownThis(self.document) return @Decorators.requireGroup @Decorators.requireUser def save(self): """ _save_ Save yourself! Save your internal document. """ rawResults = self.database.commit(doc = self.document) # We should only be committing one document at a time # if not, get the last one. try: commitResults = rawResults[-1] self.document["_rev"] = commitResults.get('rev') self.document["_id"] = commitResults.get('id') except KeyError as ex: msg = "Document returned from couch without ID or Revision\n" msg += "Document probably bad\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(message = msg) # Now do the attachments for attachName in self.attachments: self.saveAttachment(name = attachName, attachment = self.attachments[attachName]) return def saveAttachment(self, name, attachment): """ _saveAttachment_ Save an attachment to the document """ retval = self.database.addAttachment(self.document["_id"], self.document["_rev"], attachment, name) if retval.get('ok', False) != True: # Then we have a problem msg = "Adding an attachment to document failed\n" msg += str(retval) msg += "ID: %s, Rev: %s" % (self.document["_id"], self.document["_rev"]) logging.error(msg) raise ConfigCacheException(msg) self.document["_rev"] = retval['rev'] self.document["_id"] = retval['id'] self.attachments[name] = attachment return def loadDocument(self, configID): """ _loadDocument_ Load a document from the document cache given its couchID """ self.document = self.docs_cache[configID] def loadByID(self, configID): """ _loadByID_ Load a document from the server given its couchID """ try: self.document = self.database.document(id = configID) if 'owner' in self.document.keys(): self.connectUserGroup(groupname = self.document['owner'].get('group', None), username = self.document['owner'].get('user', None)) if '_attachments' in self.document.keys(): # Then we need to load the attachments for key in self.document['_attachments'].keys(): self.loadAttachment(name = key) except CouchNotFoundError as ex: msg = "Document with id %s not found in couch\n" % (configID) msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) except Exception as ex: msg = "Error loading document from couch\n" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) return def loadAttachment(self, name, overwrite = True): """ _loadAttachment_ Load an attachment from the database and put it somewhere useful """ attach = self.database.getAttachment(self.document["_id"], name) if not overwrite: if name in self.attachments.keys(): logging.info("Attachment already exists, so we're skipping") return self.attachments[name] = attach return def loadByView(self, view, value): """ _loadByView_ Underlying code to load views """ viewRes = self.database.loadView( 'ConfigCache', view, {}, [value] ) if len(viewRes['rows']) == 0: # Then we have a problem logging.error("Unable to load using view %s and value %s" % (view, str(value))) self.unwrapView(viewRes) self.loadByID(self.document["_id"]) return def saveConfigToDisk(self, targetFile): """ _saveConfigToDisk_ Make sure we can save our config file to disk """ config = self.getConfig() if not config: return # Write to a file f = open(targetFile, 'w') f.write(config) f.close() return def load(self): """ _load_ Figure out how to load """ if self.document.get("_id", None) != None: # Then we should load by ID self.loadByID(self.document["_id"]) return # Otherwise we have to load by view if not self.document.get('md5_hash', None) == None: # Then we have an md5_hash self.loadByView(view = 'config_by_md5hash', value = self.document['md5_hash']) # TODO: Add more views as they become available. #elif not self.owner == None: # Then we have an owner #self.loadByView(view = 'config_by_owner', value = self.owner['name']) def unwrapView(self, view): """ _unwrapView_ Move view information into the main document """ self.document["_id"] = view['rows'][0].get('id') self.document["_rev"] = view['rows'][0].get('value').get('_rev') def setPSetTweaks(self, PSetTweak): """ _setPSetTweaks_ Set the PSet tweak details for the config. """ self.document['pset_tweak_details'] = PSetTweak return def getPSetTweaks(self): """ _getPSetTweaks_ Retrieve the PSet tweak details. """ return self.document['pset_tweak_details'] def getOutputModuleInfo(self): """ _getOutputModuleInfo_ Retrieve the dataset information for the config in the ConfigCache. """ psetTweaks = self.getPSetTweaks() if not 'process' in psetTweaks.keys(): raise ConfigCacheException("Could not find process field in PSet while getting output modules!") try: outputModuleNames = psetTweaks["process"]["outputModules_"] except KeyError as ex: msg = "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n" msg += str(ex) logging.error(msg) raise ConfigCacheException(msg) results = {} for outputModuleName in outputModuleNames: try: outModule = psetTweaks["process"][outputModuleName] except KeyError: msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName logging.error(msg) raise ConfigCacheException(msg) dataset = outModule.get("dataset", None) if dataset: results[outputModuleName] = {"dataTier": outModule["dataset"]["dataTier"], "filterName": outModule["dataset"]["filterName"]} else: results[outputModuleName] = {"dataTier": None, "filterName": None} return results def addConfig(self, newConfig, psetHash = None): """ _addConfig_ """ # The newConfig parameter is a URL suitable for passing to urlopen. configString = urllib.urlopen(newConfig).read(-1) configMD5 = hashlib.md5(configString).hexdigest() self.document['md5_hash'] = configMD5 self.document['pset_hash'] = psetHash self.attachments['configFile'] = configString return def getConfig(self): """ _getConfig_ Get the currently active config """ return self.attachments.get('configFile', None) def getCouchID(self): """ _getCouchID_ Return the document's couchID """ return self.document["_id"] def getCouchRev(self): """ _getCouchRev_ Return the document's couchRevision """ return self.document["_rev"] @Decorators.requireGroup @Decorators.requireUser def delete(self): """ _delete_ Deletes the document with the current docid """ if not self.document["_id"]: logging.error("Attempted to delete with no couch ID") # TODO: Delete without loading first try: self.database.queueDelete(self.document) self.database.commit() except Exception as ex: msg = "Error in deleting document from couch" msg += str(ex) msg += str(traceback.format_exc()) logging.error(msg) raise ConfigCacheException(message = msg) return def getIDFromLabel(self, label): """ _getIDFromLabel_ Retrieve the ID of a config given it's label. """ results = self.database.loadView("ConfigCache", "config_by_label", {"startkey": label, "limit": 1}) if results["rows"][0]["key"] == label: return results["rows"][0]["value"] return None def listAllConfigsByLabel(self): """ _listAllConfigsByLabel_ Retrieve a list of all the configs in the config cache. This is returned in the form of a dictionary that is keyed by label. """ configs = {} results = self.database.loadView("ConfigCache", "config_by_label") for result in results["rows"]: configs[result["key"]] = result["value"] return configs def __str__(self): """ Make something printable """ return self.document.__str__() def validate(self, configID): try: #TODO: need to change to DataCache #self.loadDocument(configID = configID) self.loadByID(configID = configID) except Exception as ex: raise ConfigCacheException("Failure to load ConfigCache while validating workload: %s" % str(ex)) if self.detail: duplicateCheck = {} try: outputModuleInfo = self.getOutputModuleInfo() except Exception as ex: # Something's gone wrong with trying to open the configCache msg = "Error in getting output modules from ConfigCache during workload validation. Check ConfigCache formatting!" raise ConfigCacheException("%s: %s" % (msg, str(ex))) for outputModule in outputModuleInfo.values(): dataTier = outputModule.get('dataTier', None) filterName = outputModule.get('filterName', None) if not dataTier: raise ConfigCacheException("No DataTier in output module.") # Add dataTier to duplicate dictionary if not dataTier in duplicateCheck.keys(): duplicateCheck[dataTier] = [] if filterName in duplicateCheck[dataTier]: # Then we've seen this combination before raise ConfigCacheException("Duplicate dataTier/filterName combination.") else: duplicateCheck[dataTier].append(filterName) return outputModuleInfo else: return True
def __init__(self, dqis_db=None, savable=False, *args, **kwargs): Document.__init__(self, *args, **kwargs) self.dqis_db = dqis_db
def gatherWMDataMiningStats( wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info ): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl) reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log("INFO: %s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ["normal-archived", "rejected-archived", "aborted-archived"] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if not oldCouchDoc.has_key("filterEfficiency") or not oldCouchDoc.has_key("runWhiteList"): runWhiteList = [] filterEfficiency = None try: # log("DEBUG: Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get("RunWhiteList", []) filterEfficiency = rmDoc.get("FilterEfficiency", None) except: pass # ReqMgr no longer has the workflow report[wf].update({"filterEfficiency": filterEfficiency, "runWhiteList": runWhiteList}) if not oldCouchDoc.has_key("mcmTotalEvents") or not oldCouchDoc.has_key("mcmApprovalTime"): prepID = oldCouchDoc.get("prepID", None) if prepID and nMCMCalls <= maxMCMCalls: nMCMCalls += 1 mcmHistory = mcm.getHistory(prepID=prepID) mcmRequest = mcm.getRequest(prepID=prepID) report[wf].update({"mcmTotalEvents": mcmRequest.get("total_events", "Unknown")}) if not oldCouchDoc.has_key("mcmApprovalTime"): report[wf].update({"mcmApprovalTime": "Unknown"}) for entry in mcmHistory: if entry["action"] == "set status" and entry["step"] == "announced": dateString = entry["updater"]["submission_date"] dt = datetime.strptime(dateString, "%Y-%m-%d-%H-%M") report[wf].update({"mcmApprovalTime": time.mktime(dt.timetuple())}) # Basic parameters of the workflow priority = requests[wf]["priority"] requestType = requests[wf]["request_type"] targetLumis = requests[wf].get("input_lumis", 0) targetEvents = requests[wf].get("input_events", 0) campaign = requests[wf]["campaign"] prep_id = requests[wf].get("prep_id", None) outputdatasets = requests[wf].get("outputdatasets", []) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf]["inputdataset"] if isinstance(inputdataset, (list,)): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = "" outputTier = "Unknown" try: outputTiers = [] for ds in outputdatasets: if type(ds) == list: outputTiers.append(ds[0].split("/")[-1]) else: outputTiers.append(ds.split("/")[-1]) except: log( "ERROR: Could not decode outputdatasets: %s" % outputdatasets ) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split("/")[-1] if inputTier in ["GEN"]: outputTier = "LHE" elif inputTier in ["RAW", "RECO"]: outputTier = "AOD" elif inputTier in ["GEN-SIM"]: outputTier = "AODSIM" elif "AODSIM" in outputTiers: outputTier = "AODSIM" else: if len(outputTiers) == 1 and "GEN" in outputTiers: if "STEP0ATCERN" in wf: outputTier = "STEP0" else: outputTier = "FullGen" elif "GEN-SIM" in outputTiers and "AODSIM" in outputTiers and requestType == "TaskChain": outputTier = "RelVal" elif "RECO" in outputTiers and requestType == "TaskChain": outputTier = "RelVal" elif "GEN-SIM" in outputTiers: outputTier = "GEN-SIM" elif "AODSIM" in outputTiers: outputTier = "AODSIM" elif "RECO" in outputTiers: outputTier = "AOD" elif "AOD" in outputTiers: outputTier = "AOD" else: outputTier = "GEN-SIM" # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get("events", 0) lumis = dsr.get("totalLumis", 0) if targetLumis: lumiPercent = min(lumiPercent, lumis / targetLumis * 100) if targetEvents: eventPercent = min(eventPercent, events / targetEvents * 100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs["sucess"] totalJobs += jobs["created"] try: if totalJobs and not report[wf].get("firstJobTime", None): report[wf].update({"firstJobTime": int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get("lastJobTime", None): report[wf].update({"lastJobTime": int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in requests[wf]["request_status"]: finalStatus = status["status"] if status["status"] == "new": newTime = status["update_time"] if status["status"] == "assignment-approved": approvedTime = status["update_time"] if status["status"] == "assigned": assignedTime = status["update_time"] if status["status"] == "completed": completedTime = status["update_time"] if status["status"] == "acquired": acquireTime = status["update_time"] if status["status"] == "closed-out": closeoutTime = status["update_time"] if status["status"] == "announced": announcedTime = status["update_time"] if status["status"] == "normal-archived": archivedTime = status["update_time"] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get("approvedTime", None): report[wf].update({"approvedTime": approvedTime}) if assignedTime and not report[wf].get("assignedTime", None): report[wf].update({"assignedTime": assignedTime}) if acquireTime and not report[wf].get("acquireTime", None): report[wf].update({"acquireTime": acquireTime}) if closeoutTime and not report[wf].get("closeoutTime", None): report[wf].update({"closeoutTime": closeoutTime}) if announcedTime and not report[wf].get("announcedTime", None): report[wf].update({"announcedTime": announcedTime}) if completedTime and not report[wf].get("completedTime", None): report[wf].update({"completedTime": completedTime}) if newTime and not report[wf].get("newTime", None): report[wf].update({"newTime": newTime}) if archivedTime and not report[wf].get("archivedTime", None): report[wf].update({"archivedTime": archivedTime}) try: dt = requests[wf]["request_date"] requestDate = "%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d" % tuple(dt) report[wf].update({"requestDate": requestDate}) except: pass report[wf].update({"priority": priority, "status": finalStatus, "type": requestType}) report[wf].update({"totalLumis": targetLumis, "totalEvents": targetEvents}) report[wf].update({"campaign": campaign, "prepID": prep_id, "outputTier": outputTier}) report[wf].update({"outputDatasets": outputdatasets, "inputDataset": inputdataset}) report[wf].setdefault("lumiPercents", {}) report[wf].setdefault("eventPercents", {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]["lumiPercents"].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]["lumiPercents"][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]["eventPercents"].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]["eventPercents"][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({"eventProgress": eventProgress, "lumiProgress": lumiProgress}) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: # log("DEBUG: Workflow updated: %s" % wf) pass else: # log("DEBUG Workflow created: %s" % wf) pass try: newCouchDoc["updateTime"] = int(time.time()) report[wf]["updateTime"] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() print "Total %s requests retrieved\n" % len(result) report = {} for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if not oldCouchDoc.has_key('filterEfficiency') or not oldCouchDoc.has_key('runWhiteList'): runWhiteList = [] filterEfficiency = None try: print "Looking up %s in ReqMgr" % wf rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None)
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmminigUrl, archived = False, log = logging.info): server, database = splitCouchServiceURL(wmminigUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl) reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log("INFO: %s: Getting job information from %s and %s. Please wait." % ( funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived'] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if not oldCouchDoc.has_key('filterEfficiency') or not oldCouchDoc.has_key('runWhiteList'): runWhiteList = [] filterEfficiency = None try: #log("DEBUG: Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList}) # Basic parameters of the workflow priority = requests[wf]['priority'] requestType = requests[wf]['request_type'] targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf]['campaign'] prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf]['inputdataset'] if isinstance(inputdataset, (list,)): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if type(ds) == list: outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log("ERROR: Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis/targetLumis*100) if targetEvents: eventPercent = min(eventPercent, events/targetEvents*100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime' : int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None): report[wf].update({'lastJobTime' : int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in requests[wf]['request_status']: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime':approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime':assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime':acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime':closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime':announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime':completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime':newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime':archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate' : requestDate}) except: pass report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType}) report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, }) report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, }) report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: #log("DEBUG: Workflow updated: %s" % wf) pass else: #log("DEBUG Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived = False, log = logging.info): server, database = splitCouchServiceURL(wmMiningUrl) analyticsServer = CouchServer(server) couchdb = analyticsServer.connectDatabase(database) WMStats = WMStatsReader(wmstatsUrl, reqmgrUrl, reqdbCouchApp = "ReqMgr") reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl) reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False) if archived: funcName = "Archived Requests" else: funcName = "Active Requests" log.info("%s: Getting job information from %s and %s. Please wait." % ( funcName, wmstatsUrl, reqmgrUrl)) if archived: checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived'] jobInfoFlag = False else: checkStates = WMStatsReader.ACTIVE_STATUS jobInfoFlag = True requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag, legacyFormat = True) requestCollection = RequestInfoCollection(requests) result = requestCollection.getJSONData() requestsDict = requestCollection.getData() log.info("%s: Total %s requests retrieved\n" % (funcName, len(result))) report = {} nMCMCalls = 0 with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm: for wf in result.keys(): # Store a copy of the CouchDB document so we can compare later before updating if couchdb.documentExists(wf): oldCouchDoc = couchdb.document(wf) wfExists = True else: oldCouchDoc = CouchDoc(id=wf) wfExists = False newCouchDoc = copy.deepcopy(oldCouchDoc) ancientCouchDoc = copy.deepcopy(oldCouchDoc) report[wf] = oldCouchDoc # FIXME: remove report, only have two instances of couchDoc if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc: runWhiteList = [] filterEfficiency = None try: #log.debug("Looking up %s in ReqMgr" % wf) rmDoc = reqMgr.document(wf) runWhiteList = rmDoc.get('RunWhiteList', []) filterEfficiency = rmDoc.get('FilterEfficiency', None) except: pass # ReqMgr no longer has the workflow report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList}) if ('mcmTotalEvents' not in oldCouchDoc or 'mcmApprovalTime' not in oldCouchDoc or oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown'): prepID = oldCouchDoc.get('prepID', None) if prepID and nMCMCalls <= maxMCMCalls: log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID)) # Get information from McM. Don't call too many times, can take a long time nMCMCalls += 1 try: mcmHistory = mcm.getHistory(prepID = prepID) if 'mcmApprovalTime' not in oldCouchDoc: report[wf].update({'mcmApprovalTime':'NoMcMData'}) found = False for entry in mcmHistory: if entry['action'] == 'set status' and entry['step'] == 'announced': dateString = entry['updater']['submission_date'] dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M') report[wf].update({'mcmApprovalTime':time.mktime(dt.timetuple())}) found = True if not found: log.error("History found but no approval time for %s" % wf) except McMNoDataError: log.error("Setting NoMcMData for %s" % wf) report[wf].update({'mcmApprovalTime':'NoMcMData'}) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting history from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) try: mcmRequest = mcm.getRequest(prepID = prepID) report[wf].update({'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData')}) except (RuntimeError, IOError): exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." % (exc_type, prepID)) except: exc_type, exc_value, exc_traceback = sys.exc_info() log.error("%s getting request from McM for PREP ID %s. Unknown error." % (exc_type, prepID)) # Basic parameters of the workflow priority = requests[wf]['priority'] requestType = requests[wf]['request_type'] targetLumis = requests[wf].get('input_lumis', 0) targetEvents = requests[wf].get('input_events', 0) campaign = requests[wf]['campaign'] prep_id = requests[wf].get('prep_id', None) outputdatasets = requests[wf].get('outputdatasets', []) statuses = requests[wf].get('request_status', []) if not statuses: log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does. # Remove a single task_ from the start of PREP ID if it exists if prep_id and prep_id.startswith('task_'): prep_id.replace('task_', '', 1) # Can be an empty list, full list, empty string, or non-empty string! inputdataset = requests[wf].get('inputdataset', "") if isinstance(inputdataset, list): if inputdataset: inputdataset = inputdataset[0] else: inputdataset = '' outputTier = 'Unknown' try: outputTiers = [] for ds in outputdatasets: if isinstance(ds, list): outputTiers.append(ds[0].split('/')[-1]) else: outputTiers.append(ds.split('/')[-1]) except: log.error("Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail if inputdataset: inputTier = inputdataset.split('/')[-1] if inputTier in ['GEN']: outputTier = 'LHE' elif inputTier in ['RAW', 'RECO']: outputTier = 'AOD' elif inputTier in ['GEN-SIM']: outputTier = 'AODSIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' else: if len(outputTiers) == 1 and 'GEN' in outputTiers: if 'STEP0ATCERN' in wf: outputTier = 'STEP0' else: outputTier = 'FullGen' elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'RECO' in outputTiers and requestType == 'TaskChain': outputTier = 'RelVal' elif 'GEN-SIM' in outputTiers: outputTier = 'GEN-SIM' elif 'AODSIM' in outputTiers: outputTier = 'AODSIM' elif 'RECO' in outputTiers: outputTier = 'AOD' elif 'AOD' in outputTiers: outputTier = 'AOD' else: outputTier = 'GEN-SIM' # Calculate completion ratios for events and lumi sections, take minimum for all datasets eventPercent = 200 lumiPercent = 200 datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset() for dataset in datasetReports: dsr = datasetReports[dataset].getReport() events = dsr.get('events', 0) lumis = dsr.get('totalLumis', 0) if targetLumis: lumiPercent = min(lumiPercent, lumis/targetLumis*100) if targetEvents: eventPercent = min(eventPercent, events/targetEvents*100) if eventPercent > 100: eventPercent = 0 if lumiPercent > 100: lumiPercent = 0 # Sum up all jobs across agents to see if we've run the first, last successJobs = 0 totalJobs = 0 for agent in result[wf]: jobs = result[wf][agent] successJobs += jobs['sucess'] totalJobs += jobs['created'] try: if totalJobs and not report[wf].get('firstJobTime', None): report[wf].update({'firstJobTime' : int(time.time())}) if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None): report[wf].update({'lastJobTime' : int(time.time())}) except: pass # Figure out current status of workflow and transition times finalStatus = None newTime = None approvedTime = None assignedTime = None acquireTime = None completedTime = None closeoutTime = None announcedTime = None archivedTime = None requestDate = None for status in statuses: finalStatus = status['status'] if status['status'] == 'new': newTime = status['update_time'] if status['status'] == 'assignment-approved': approvedTime = status['update_time'] if status['status'] == 'assigned': assignedTime = status['update_time'] if status['status'] == 'completed': completedTime = status['update_time'] if status['status'] == 'acquired': acquireTime = status['update_time'] if status['status'] == 'closed-out': closeoutTime = status['update_time'] if status['status'] == 'announced': announcedTime = status['update_time'] if status['status'] == 'normal-archived': archivedTime = status['update_time'] # Build or modify the report dictionary for the WF report.setdefault(wf, {}) if approvedTime and not report[wf].get('approvedTime', None): report[wf].update({'approvedTime':approvedTime}) if assignedTime and not report[wf].get('assignedTime', None): report[wf].update({'assignedTime':assignedTime}) if acquireTime and not report[wf].get('acquireTime', None): report[wf].update({'acquireTime':acquireTime}) if closeoutTime and not report[wf].get('closeoutTime', None): report[wf].update({'closeoutTime':closeoutTime}) if announcedTime and not report[wf].get('announcedTime', None): report[wf].update({'announcedTime':announcedTime}) if completedTime and not report[wf].get('completedTime', None): report[wf].update({'completedTime':completedTime}) if newTime and not report[wf].get('newTime', None): report[wf].update({'newTime':newTime}) if archivedTime and not report[wf].get('archivedTime', None): report[wf].update({'archivedTime':archivedTime}) try: dt = requests[wf]['request_date'] requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt) report[wf].update({'requestDate' : requestDate}) except: pass report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType}) report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, }) report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, }) report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, }) report[wf].setdefault('lumiPercents', {}) report[wf].setdefault('eventPercents', {}) lumiProgress = 0 eventProgress = 0 for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]: percent = str(percentage) percentReported = report[wf]['lumiPercents'].get(percent, None) if not percentReported and lumiPercent >= percentage: report[wf]['lumiPercents'][percent] = int(time.time()) if lumiPercent >= percentage: lumiProgress = percentage percentReported = report[wf]['eventPercents'].get(percent, None) if not percentReported and eventPercent >= percentage: report[wf]['eventPercents'][percent] = int(time.time()) if eventPercent >= percentage: eventProgress = percentage report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress, }) newCouchDoc.update(report[wf]) # Queue the updated document for addition if it's changed. if ancientCouchDoc != newCouchDoc: if wfExists: #log.debug("Workflow updated: %s" % wf) pass else: #log.debug("Workflow created: %s" % wf) pass try: newCouchDoc['updateTime'] = int(time.time()) report[wf]['updateTime'] = int(time.time()) cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue couchdb.queue(newCouchDoc) except: log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc)) log.info("%s: Finished getting job. wait for the next Cycle" % funcName) # Commit all changes to CouchDB couchdb.commit()
class CouchWorkQueueElement(WorkQueueElement): """ _CouchWorkQueueElement_ """ def __init__(self, couchDB, id = None, elementParams = None): elementParams = elementParams or {} WorkQueueElement.__init__(self, **elementParams) if id: self._id = id self._document = Document(id = id) self._couch = couchDB rev = property( lambda x: str(x._document[u'_rev']) if x._document.has_key(u'_rev') else x._document.__getitem__('_rev'), lambda x, newid: x._document.__setitem__('_rev', newid)) timestamp = property( lambda x: str(x._document[u'timestamp']) if x._document.has_key(u'timestamp') else x._document.__getitem__('timestamp') ) updatetime = property( lambda x: str(x._document[u'updatetime']) if x._document.has_key(u'updatetime') else 0 ) @classmethod def fromDocument(cls, couchDB, doc): """Create element from couch document""" element = CouchWorkQueueElement(couchDB = couchDB, id = doc['_id'], elementParams = doc.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement') ) element._document['_rev'] = doc.pop('_rev') element._document['timestamp'] = doc.pop('timestamp') element._document['updatetime'] = doc.pop('updatetime') return element def save(self): """ _save """ self.populateDocument() self._couch.queue(self._document) def load(self): """ _load_ Load the document representing this WQE """ document = self._couch.document(self._document['_id']) self.update(document.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement')) self._document['_rev'] = document.pop('_rev') self._document['timestamp'] = document.pop('timestamp', None) self._document['updatetime'] = document.pop('updatetime', None) return self def delete(self): """Delete element""" self.populateDocument() self._document.delete() self._couch.queue(self._document) def populateDocument(self): """Certain attributed shouldn't be stored""" self._document.update(self.__to_json__(None)) now = time.time() self._document['updatetime'] = now self._document.setdefault('timestamp', now) if not self._document.get('_id') and self.id: self._document['_id'] = self.id attrs = ['WMSpec', 'Task'] for attr in attrs: self._document['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'].pop(attr, None)