def __init__(self, couchDB, id=None, elementParams=None):
     elementParams = elementParams or {}
     WorkQueueElement.__init__(self, **elementParams)
     if id:
         self._id = id
     self._document = Document(id=id)
     self._couch = couchDB
Beispiel #2
0
 def __init__(self, workloadId, url, database, workload = None):
     Document.__init__(self, workloadId)
     self.database = database
     self.url = url
     self.server = None
     self.couchdb = None
     if workload != None:
         self.update(workload.generateWorkloadSummary())
Beispiel #3
0
 def __init__(self, run, lumi, dataset, user, bfield=0, initial_map = {}):
     '''
     Instantiate the Couch document and set the appropriate values 
     '''
     CouchDocument.__init__(self, id=self._generate_id(run, lumi, dataset))
     self.setdefault('run', run)
     self.setdefault('lumi', lumi)
     self.setdefault('dataset', dataset.strip('/').split('/'))
     self.setdefault('bfield', bfield)
     self.setdefault('map_history', [])
     self.setdefault('map', initial_map)
     self['map']['_meta'] = {'user': user, 
                    'timestamp': str(datetime.now())}
Beispiel #4
0
    def __init__(self,
                 dbURL,
                 couchDBName=None,
                 id=None,
                 rev=None,
                 usePYCurl=True,
                 ckey=None,
                 cert=None,
                 capath=None,
                 detail=True):
        super(ConfigCache, self).__init__()
        self.dbname = couchDBName
        self.dburl = dbURL
        self.detail = detail
        try:
            self.couchdb = CouchServer(self.dburl,
                                       usePYCurl=usePYCurl,
                                       ckey=ckey,
                                       cert=cert,
                                       capath=capath)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname)
        except Exception as ex:
            msg = "Error connecting to couch: %s\n" % str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message=msg)

        # local cache
        self.docs_cache = DocumentCache(self.database, self.detail)

        # UserGroup variables
        self.group = None
        self.owner = None

        # Internal data structure
        self.document = Document()
        self.attachments = {}
        self.document['type'] = "config"
        self.document['description'] = {}
        self.document['description']['config_label'] = None
        self.document['description']['config_desc'] = None

        if id != None:
            self.document['_id'] = id
        self.document['pset_tweak_details'] = None
        self.document['info'] = None
        self.document['config'] = None
        return
Beispiel #5
0
 def recordTaskActivity(self, taskname, comment=''):
     """Record a task for monitoring"""
     try:
         record = self.db.document('task_activity')
     except CouchNotFoundError:
         record = Document('task_activity')
     record.setdefault('tasks', {})
     record['tasks'].setdefault(taskname, {})
     record['tasks'][taskname]['timestamp'] = time.time()
     record['tasks'][taskname]['comment'] = comment
     try:
         self.db.commitOne(record)
     except Exception as ex:
         self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex)))
Beispiel #6
0
    def testAllDocs(self):
        """
        Test AllDocs with options
        """
        self.db.queue(Document(id = "1", inputDict = {'foo':123, 'bar':456}))
        self.db.queue(Document(id = "2", inputDict = {'foo':123, 'bar':456}))
        self.db.queue(Document(id = "3", inputDict = {'foo':123, 'bar':456}))

        self.db.commit()
        self.assertEqual(3, len(self.db.allDocs()['rows']))
        self.assertEqual(2, len(self.db.allDocs({'startkey': "2"})['rows']))
        self.assertEqual(2, len(self.db.allDocs(keys = ["1", "3"])['rows']))
        self.assertEqual(1, len(self.db.allDocs({'limit':1}, ["1", "3"])['rows']))
        self.assertTrue('error' in self.db.allDocs(keys = ["1", "4"])['rows'][1])
Beispiel #7
0
    def put(self, subName=None):
        """
        Update document for the given self.name and subName.
        It assumes the client has provided the entire entity, i.e., the old
        content gets completely replaced by the new one.

        Given that the each couch document contains a revision number, these PUT calls
        are not going to be idempotent.
        """
        data = cherrypy.request.body.read()
        if not data:
            raise MissingPostData()
        else:
            propertyDict = json.loads(data)

        result = None
        if subName:
            docName = "%s_%s" % (self.name, subName)
        else:
            docName = self.name

        try:
            existDoc = self.reqmgr_aux_db.document(docName)
            # replace original document
            newDoc = Document(existDoc['_id'], inputDict={'_rev': existDoc['_rev'],
                                                          'ConfigType': existDoc['ConfigType']})
            newDoc.update(propertyDict)
            result = self.reqmgr_aux_db.commitOne(newDoc)
        except CouchNotFoundError:
            cherrypy.log("Document %s not found. Creating one." % docName)
            doc = Document(docName, propertyDict)
            doc.update({'ConfigType': self.name})
            result = self.reqmgr_aux_db.commitOne(doc)

        return result
 def recordTaskActivity(self, taskname, comment=""):
     """Record a task for monitoring"""
     try:
         record = self.db.document("task_activity")
     except CouchNotFoundError:
         record = Document("task_activity")
     record.setdefault("tasks", {})
     record["tasks"].setdefault(taskname, {})
     record["tasks"][taskname]["timestamp"] = time.time()
     record["tasks"][taskname]["comment"] = comment
     try:
         self.db.commitOne(record)
     except Exception as ex:
         self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex)))
Beispiel #9
0
 def recordTaskActivity(self, taskname, comment = ''):
     """Record a task for monitoring"""
     try:
         record = self.db.document('task_activity')
     except CouchNotFoundError:
         record = Document('task_activity')
     record.setdefault('tasks', {})
     record['tasks'].setdefault(taskname, {})
     record['tasks'][taskname]['timestamp'] = time.time()
     record['tasks'][taskname]['comment'] = comment
     try:
         self.db.commitOne(record)
     except StandardError, ex:
         self.logger.error("Unable to update task %s freshness: %s" % (taskname, str(ex)))
Beispiel #10
0
    def injectReDigiConfigs(self, combinedStepOne = False):
        """
        _injectReDigiConfigs_

        Create bogus config cache documents for the various steps of the
        ReDigi workflow.  Return the IDs of the documents.
        """
        stepOneConfig = Document()
        stepOneConfig["info"] = None
        stepOneConfig["config"] = None
        stepOneConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f"
        stepOneConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7"
        stepOneConfig["owner"] = {"group": "cmsdataops", "user": "******"}
        if combinedStepOne:
            stepOneConfig["pset_tweak_details"] ={"process": {"outputModules_": ["RECODEBUGoutput", "DQMoutput"],
                                                              "RECODEBUGoutput": {"dataset": {"filterName": "",
                                                                                              "dataTier": "RECO-DEBUG-OUTPUT"}},
                                                              "DQMoutput": {"dataset": {"filterName": "",
                                                                                        "dataTier": "DQM"}}}}
        else:
            stepOneConfig["pset_tweak_details"] ={"process": {"outputModules_": ["RAWDEBUGoutput"],
                                                              "RAWDEBUGoutput": {"dataset": {"filterName": "",
                                                                                             "dataTier": "RAW-DEBUG-OUTPUT"}}}}

        stepTwoConfig = Document()
        stepTwoConfig["info"] = None
        stepTwoConfig["config"] = None
        stepTwoConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f"
        stepTwoConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7"
        stepTwoConfig["owner"] = {"group": "cmsdataops", "user": "******"}
        stepTwoConfig["pset_tweak_details"] ={"process": {"outputModules_": ["RECODEBUGoutput", "DQMoutput"],
                                                          "RECODEBUGoutput": {"dataset": {"filterName": "",
                                                                                          "dataTier": "RECO-DEBUG-OUTPUT"}},
                                                          "DQMoutput": {"dataset": {"filterName": "",
                                                                                    "dataTier": "DQM"}}}}

        stepThreeConfig = Document()
        stepThreeConfig["info"] = None
        stepThreeConfig["config"] = None
        stepThreeConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f"
        stepThreeConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7"
        stepThreeConfig["owner"] = {"group": "cmsdataops", "user": "******"}
        stepThreeConfig["pset_tweak_details"] ={"process": {"outputModules_": ["aodOutputModule"],
                                                            "aodOutputModule": {"dataset": {"filterName": "",
                                                                                            "dataTier": "AODSIM"}}}}
        stepOne = self.configDatabase.commitOne(stepOneConfig)[0]["id"]
        stepTwo = self.configDatabase.commitOne(stepTwoConfig)[0]["id"]
        stepThree = self.configDatabase.commitOne(stepThreeConfig)[0]["id"]
        return (stepOne, stepTwo, stepThree)
Beispiel #11
0
def injectStepChainConfigSingle(couchDatabase):
    """
    _injectStepChainConfigSingle_

    Create a single config
    """
    miniConfig = Document()
    miniConfig["info"] = None
    miniConfig["config"] = None
    miniConfig["md5hash"] = "9bdc3d7b2fc90e0f4ca24e270a467ac3"
    miniConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10876a7"
    miniConfig["owner"] = {"group": "DATAOPS", "user": "******"}
    miniConfig["pset_tweak_details"] = {
        "process": {
            "outputModules_": ["MINIAODSIMoutput"],
            "MINIAODSIMoutput": {
                "dataset": {
                    "filterName": "",
                    "dataTier": "MINIAODSIM"
                }
            }
        }
    }
    result = couchDatabase.commitOne(miniConfig)
    return result[0]["id"]
Beispiel #12
0
    def injectSkimConfig(self):
        """
        _injectSkimConfig_

        Create a bogus config cache document for the skims and inject it into
        couch.  Return the ID of the document.
        """
        newConfig = Document()
        newConfig["info"] = None
        newConfig["config"] = None
        newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f"
        newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7"
        newConfig["owner"] = {"group": "cmsdataops", "user": "******"}
        newConfig["pset_tweak_details"] = {
            "process": {
                "outputModules_": ["SkimA", "SkimB"],
                "SkimA": {
                    "dataset": {
                        "filterName": "SkimAFilter",
                        "dataTier": "RAW-RECO"
                    }
                },
                "SkimB": {
                    "dataset": {
                        "filterName": "SkimBFilter",
                        "dataTier": "USER"
                    }
                }
            }
        }
        result = self.configDatabase.commitOne(newConfig)
        return result[0]["id"]
Beispiel #13
0
 def testCommitOne(self):
     # Can I commit one dict
     doc = {'foo': 123, 'bar': 456}
     id = self.db.commitOne(doc, returndocs=True)[0]['id']
     # What about a Document
     doc = Document(inputDict=doc)
     id = self.db.commitOne(doc, returndocs=True)[0]['id']
Beispiel #14
0
    def injectAnalysisConfig(self):
        """
        Create a bogus config cache document for the analysis workflow and
        inject it into couch.  Return the ID of the document.
        """

        newConfig = Document()
        newConfig["info"] = None
        newConfig["config"] = None
        newConfig["pset_hash"] = "21cb400c6ad63c3a97fa93f8e8785127"
        newConfig["owner"] = {"group": "Analysis", "user": "******"}
        newConfig["pset_tweak_details"] = {
            "process": {
                "outputModules_": ["OutputA", "OutputB"],
                "OutputA": {
                    "dataset": {
                        "filterName": "OutputAFilter",
                        "dataTier": "RECO"
                    }
                },
                "OutputB": {
                    "dataset": {
                        "filterName": "OutputBFilter",
                        "dataTier": "USER"
                    }
                }
            }
        }

        result = self.configDatabase.commitOne(newConfig)
        return result[0]["id"]
Beispiel #15
0
    def testA(self):
        """ make some documents and own them"""
        guInt = Interface(self.testInit.couchUrl, self.testInit.couchDbName)


        #create a couple of docs
        couch = Database(self.testInit.couchDbName, self.testInit.couchUrl)
        for x in range(10):
            doc = Document("document%s" % x, {"Test Data": [1,2,3,4] })
            couch.queue(doc)
        couch.commit()

        self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 0)
        self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0)

        guInt.callUpdate("ownthis","document1", group = self.owner1.group.name, user = self.owner1.name)

        self.assertTrue("document1" in guInt.documentsOwned(self.owner1.group.name, self.owner1.name))
        self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1)
        self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 0)

        guInt.callUpdate("ownthis","document2", group = self.owner2.group.name, user = self.owner2.name)

        self.assertTrue("document2" in guInt.documentsOwned(self.owner2.group.name, self.owner2.name))
        self.assertEqual(len(guInt.documentsOwned(self.owner1.group.name, self.owner1.name)), 1)
        self.assertEqual(len(guInt.documentsOwned(self.owner2.group.name, self.owner2.name)), 1)


        guInt.callUpdate("newgroup", "group-DataOps", group = "DataOps")

        self.assertTrue(couch.documentExists("group-DataOps") )

        guInt.callUpdate("newuser", "user-damason", group = "DataOps", user = "******")

        self.assertTrue(couch.documentExists("user-damason") )
Beispiel #16
0
def makeGeneratorConfig(couchDatabase):
    """
    _makeGeneratorConfig_

    Create a bogus config cache document for the montecarlo generation and
    inject it into couch.  Return the ID of the document.

    """
    newConfig = Document()
    newConfig["info"] = None
    newConfig["config"] = None
    newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f"
    newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7"
    newConfig["owner"] = {"group": "cmsdataops", "user": "******"}
    newConfig["pset_tweak_details"] = {
        "process": {
            "outputModules_": ["writeGENSIM"],
            "writeGENSIM": {
                "dataset": {
                    "filterName": "GenSimFilter",
                    "dataTier": "GEN-SIM"
                }
            }
        }
    }
    result = couchDatabase.commitOne(newConfig)
    return result[0]["id"]
Beispiel #17
0
    def injectAnalysisConfig(self):
        """
        Create a bogus config cache document for the analysis workflow and
        inject it into couch.  Return the ID of the document.
        """

        newConfig = Document()
        newConfig["info"] = None
        newConfig["config"] = None
        newConfig["pset_hash"] = "21cb400c6ad63c3a97fa93f8e8785127"
        newConfig["owner"] = {"group": "Analysis", "user": "******"}
        newConfig["pset_tweak_details"] = {
            "process": {
                "maxEvents": {
                    "parameters_": ["input"],
                    "input": 10
                },
                "outputModules_": ["output"],
                "parameters_": ["outputModules_"],
                "source": {
                    "parameters_": ["fileNames"],
                    "fileNames": []
                },
                "output": {
                    "parameters_": ["fileName"],
                    "fileName": "outfile.root"
                },
                "options": {
                    "parameters_": ["wantSummary"],
                    "wantSummary": True
                }
            }
        }
        result = self.configDatabase.commitOne(newConfig)
        return result[0]["id"]
Beispiel #18
0
    def testA(self):
        """instantiate"""

        document = Document()
        document[u'pset_tweak_details'] = {}
        document[u'pset_tweak_details'][u'process'] = {}
        document[u'pset_tweak_details'][u'process'][
            u'RandomNumberGeneratorService'] = {}
        document[u'pset_tweak_details'][u'process'][
            u'RandomNumberGeneratorService'][u'seed1'] = {}
        document[u'pset_tweak_details'][u'process'][
            u'RandomNumberGeneratorService'][u'seed2'] = {}
        document[u'pset_tweak_details'][u'process'][
            u'RandomNumberGeneratorService'][u'seed3'] = {}

        document = self.database.commitOne(document)[0]
        seeder = ReproducibleSeeding(CouchUrl=self.testInit.couchUrl,
                                     CouchDBName=self.testInit.couchDbName,
                                     ConfigCacheDoc=document[u'id'])

        job = Job("testjob")
        seeder(job)
        baggage = job.getBaggage()
        seed1 = getattr(baggage.process.RandomNumberGeneratorService, "seed1",
                        None)
        self.assertTrue(seed1 != None)
Beispiel #19
0
    def injectReRecoConfig(self):
        """
        _injectReRecoConfig_

        Inject a ReReco config document that we can use to set the outputModules
        """

        newConfig = Document()
        newConfig["info"] = None
        newConfig["config"] = None
        newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e580f"
        newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10259a7"
        newConfig["owner"] = {"group": "cmsdataops", "user": "******"}
        newConfig["pset_tweak_details"] = {
            "process": {
                "outputModules_": ['RECOoutput', 'DQMoutput'],
                "RECOoutput": {
                    'dataset': {
                        'filterName': 'RECOoutputFilter',
                        'dataTier': 'RECO'
                    }
                },
                "DQMoutput": {
                    'dataset': {
                        'filterName': 'DQMoutputFilter',
                        'dataTier': 'DQM'
                    }
                }
            }
        }
        result = self.configDatabase.commitOne(newConfig)
        return result[0]["id"]
Beispiel #20
0
 def __init__(self, couchDB, id = None, elementParams = None):
     elementParams = elementParams or {}
     WorkQueueElement.__init__(self, **elementParams)
     if id:
         self._id = id
     self._document = Document(id = id)
     self._couch = couchDB
Beispiel #21
0
    def put(self, team_name):
        """
        Adds team of team_name into the database.
        Creates teams document if it doesn't exist.

        """
        try:
            teams = self.reqmgr_aux_db.document("teams")
        except CouchNotFoundError as ex:
            msg = ("ERROR: Retrieving teams document failed, reason: %s"
                   " Creating the document ..." % ex)
            cherrypy.log(msg)
            try:
                doc = Document(id="teams", inputDict={team_name: None})
                self.reqmgr_aux_db.commitOne(doc)
                return
            except CouchError as ex:
                msg = "ERROR: Creating document teams failed, reason: %s" % ex
                cherrypy.log(msg)
                raise cherrypy.HTTPError(400, msg)

        if team_name in teams:
            return rows(["Already exists."])
        else:
            teams[team_name] = None
            # TODO
            # this should ideally also wrap try-except
            self.reqmgr_aux_db.commitOne(teams)
            return rows(["OK"])
Beispiel #22
0
    def testCommit(self):
        """
        Test queue and commit modes
        """
        # try to commit 2 random docs
        doc = {'foo': 123, 'bar': 456}
        self.db.queue(doc)
        self.db.queue(doc)
        self.assertEqual(2, len(self.db.commit()))

        # committing 2 docs with the same id will fail
        self.db.queue(Document(id="1", inputDict={'foo': 123, 'bar': 456}))
        self.db.queue(Document(id="1", inputDict={'foo': 1234, 'bar': 456}))
        answer = self.db.commit()
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0]['error'], 'conflict')
        self.assertEqual(answer[1]['error'], 'conflict')

        # all_or_nothing mode ignores conflicts
        self.db.queue(Document(id="2", inputDict=doc))
        self.db.queue(Document(id="2", inputDict={'foo': 1234, 'bar': 456}))
        answer = self.db.commit(all_or_nothing=True)
        self.assertEqual(2, len(answer))
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0].get('error'), None)
        self.assertEqual(answer[0]['id'], '2')
        self.assertEqual(answer[1]['id'], '2')

        # callbacks can do stuff when conflicts arise
        # this particular one just overwrites the document
        def callback(db, data, result):
            for doc in data['docs']:
                if doc['_id'] == result['id']:
                    doc['_rev'] = db.document(doc['_id'])['_rev']
                    retval = db.commitOne(doc)
            return retval[0]

        self.db.queue(Document(id="2", inputDict={'foo': 5, 'bar': 6}))
        answer = self.db.commit(callback=callback)
        self.assertEqual(1, len(answer))
        self.assertEqual(answer[0].get('error'), None)
        updatedDoc = self.db.document('2')
        self.assertEqual(updatedDoc['foo'], 5)
        self.assertEqual(updatedDoc['bar'], 6)

        return
Beispiel #23
0
 def testDocumentSerialisation(self):
     """
     A document should be writable into the couchdb with a timestamp.
     """
     d = Document()
     d['foo'] = 'bar'
     doc_info = self.db.commit(doc=d, timestamp=True)[0]
     d_from_db = self.db.document(doc_info['id'])
     self.assertEqual(d['foo'], d_from_db['foo'])
     self.assertEqual(d['timestamp'], d_from_db['timestamp'])
Beispiel #24
0
    def testUpdateBulkDocuments(self):
        """
        Test AllDocs with options
        """
        self.db.queue(Document(id="1", inputDict={'foo':123, 'bar':456}))
        self.db.queue(Document(id="2", inputDict={'foo':123, 'bar':456}))
        self.db.queue(Document(id="3", inputDict={'foo':123, 'bar':456}))
        self.db.commit()

        self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 333}, 2)
        result = self.db.allDocs({"include_docs": True})['rows']
        self.assertEqual(3, len(result))
        for item in result:
            self.assertEqual(333, item['doc']['foo'])

        self.db.updateBulkDocumentsWithConflictHandle(["1", "2", "3"], {'foo': 222}, 10)
        result = self.db.allDocs({"include_docs": True})['rows']
        self.assertEqual(3, len(result))
        for item in result:
            self.assertEqual(222, item['doc']['foo'])
Beispiel #25
0
 def post(self):
     """
     post sofware version doucment
     """
     data = cherrypy.request.body.read()
     if not data:
         raise MissingPostData()
     else:
         doc = json.loads(data)
     doc = Document(self.name, doc)
     result = self.reqmgr_aux_db.commitOne(doc)
     return result
Beispiel #26
0
    def testB(self):
        """test owning some sample documents"""

        u1 = User(name = "evansde77")
        g1 = Group(name = "DMWM", administrators = ["evansde77", "drsm79"])
        g1.setCouch(self.url, self.database)
        g1.connect()
        u1.setGroup(g1)
        u1.create()

        doc1 = Document()
        doc1['test-data'] = {"key1" : "value1"}
        doc2 = Document()
        doc2['test-data'] = {"key2" : "value2"}
        id1 = g1.couch.commitOne(doc1)[0]
        id2 = g1.couch.commitOne(doc2)[0]
        doc1['_id'] = id1[u'id']
        doc1['_rev'] = id1[u'rev']
        doc2['_id'] = id2[u'id']
        doc2['_rev'] = id2[u'rev']

        u1.ownThis(doc1)
        u1.ownThis(doc2)

        self.failUnless(doc1.has_key("owner"))
        self.failUnless(doc2.has_key("owner"))
        self.failUnless(doc1['owner'].has_key('user'))
        self.failUnless(doc1['owner'].has_key('group'))
        self.failUnless(doc1['owner']['user'] == u1['name'])
        self.failUnless(doc1['owner']['group'] == u1['group'])
        self.failUnless(doc2['owner'].has_key('user'))
        self.failUnless(doc2['owner'].has_key('group'))
        self.failUnless(doc2['owner']['user'] == u1['name'])
        self.failUnless(doc2['owner']['group'] == u1['group'])


        g1.couch.delete_doc(id1[u'id'])
        g1.couch.delete_doc(id2[u'id'])
        u1.drop()
        g1.drop()
Beispiel #27
0
def makePromptSkimConfigs(couchDatabase):
    """
    Fake a prompt skim config in ConfigCache for Tier0 test
    """
    skimsConfig = Document()
    skimsConfig["info"] = None
    skimsConfig["config"] = None
    skimsConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5cab2755"
    skimsConfig["pset_hash"] = "7c856ad35f9f544839d8524ca5372888"
    skimsConfig["owner"] = {"group": "cmsdataops", "user": "******"}
    skimsConfig["pset_tweak_details"] = {
        "process": {
            "outputModules_": [
                "writeSkim1", "writeSkim2", "writeSkim3", "writeSkim4",
                "writeSkim5"
            ],
            "writeSkim1": {
                "dataset": {
                    "dataTier": "RECO-AOD",
                    "filterName": "skim1"
                }
            },
            "writeSkim2": {
                "dataset": {
                    "dataTier": "RECO-AOD",
                    "filterName": "skim2"
                }
            },
            "writeSkim3": {
                "dataset": {
                    "dataTier": "RECO-AOD",
                    "filterName": "skim3"
                }
            },
            "writeSkim4": {
                "dataset": {
                    "dataTier": "RECO-AOD",
                    "filterName": "skim4"
                }
            },
            "writeSkim5": {
                "dataset": {
                    "dataTier": "RECO-AOD",
                    "filterName": "skim5"
                }
            },
        }
    }
    couchDatabase.queue(skimsConfig)
    result = couchDatabase.commit()
    docMap = {"Skims": result[0][u'id']}
    return docMap
Beispiel #28
0
    def send(self, alerts):
        """
        Handle list of alerts.

        """
        retVals = []
        for a in alerts:
            doc = Document(None, a)
            retVal = self.database.commitOne(doc)
            retVals.append(retVal)
        logging.debug("Stored %s alerts to CouchDB, retVals: %s" %
                      (len(alerts), retVals))
        return retVals
Beispiel #29
0
    def testB(self):
        """test owning some sample documents"""

        u1 = User(name="evansde77")
        g1 = Group(name="DMWM", administrators=["evansde77", "drsm79"])
        g1.setCouch(self.url, self.database)
        g1.connect()
        u1.setGroup(g1)
        u1.create()

        doc1 = Document()
        doc1['test-data'] = {"key1": "value1"}
        doc2 = Document()
        doc2['test-data'] = {"key2": "value2"}
        id1 = g1.couch.commitOne(doc1)[0]
        id2 = g1.couch.commitOne(doc2)[0]
        doc1['_id'] = id1[u'id']
        doc1['_rev'] = id1[u'rev']
        doc2['_id'] = id2[u'id']
        doc2['_rev'] = id2[u'rev']

        u1.ownThis(doc1)
        u1.ownThis(doc2)

        self.failUnless("owner" in doc1)
        self.failUnless("owner" in doc2)
        self.failUnless('user' in doc1['owner'])
        self.failUnless('group' in doc1['owner'])
        self.failUnless(doc1['owner']['user'] == u1['name'])
        self.failUnless(doc1['owner']['group'] == u1['group'])
        self.failUnless('user' in doc2['owner'])
        self.failUnless('group' in doc2['owner'])
        self.failUnless(doc2['owner']['user'] == u1['name'])
        self.failUnless(doc2['owner']['group'] == u1['group'])

        g1.couch.delete_doc(id1[u'id'])
        g1.couch.delete_doc(id2[u'id'])
        u1.drop()
        g1.drop()
Beispiel #30
0
def update_software(config_file):
    """
    Functions retrieves CMSSW versions and scramarchs from CMS tag collector.

    """
    config = loadConfigurationFile(config_file)
    # source of the data
    tag_collector_url = config.views.data.tag_collector_url
    # store the data into CouchDB auxiliary database under "software" document
    couch_host = config.views.data.couch_host
    reqmgr_aux_db = config.views.data.couch_reqmgr_aux_db

    # get data from tag collector
    all_archs_and_versions = _get_all_scramarchs_and_versions(
        tag_collector_url)
    if not all_archs_and_versions:
        return

    # get data already stored in CouchDB
    couchdb = Database(dbname=reqmgr_aux_db, url=couch_host)
    try:
        sw_already_stored = couchdb.document("software")
        del sw_already_stored["_id"]
        del sw_already_stored["_rev"]
    except CouchNotFoundError:
        logging.error("Document id software, does not exist, creating it ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
        return

    # now compare recent data from tag collector and what we already have stored
    # sorting is necessary
    if sorted(all_archs_and_versions) != sorted(sw_already_stored):
        logging.debug(
            "ScramArch/CMSSW releases changed, updating software document ...")
        doc = Document(id="software", inputDict=all_archs_and_versions)
        couchdb.commitOne(doc)
    """
Beispiel #31
0
 def testCommitOneWithQueue(self):
     """
     CommitOne bypasses the queue, but it should maintain the queue if
     present for a future call to commit.
     """
     # Queue up five docs
     doc = {'foo': 123, 'bar': 456}
     for i in range(1, 6):
         self.db.queue(doc)
     # Commit one Document
     doc = Document(inputDict=doc)
     id = self.db.commitOne(doc, returndocs=True)[0]['id']
     self.assertEqual(1, len(self.db.allDocs()['rows']))
     self.db.commit()
     self.assertEqual(6, len(self.db.allDocs()['rows']))
Beispiel #32
0
    def send(self, alerts):
        """
        Send a list of alerts to a REST server.

        """
        for a in alerts:
            doc = Document(None, a)
            self._database.queue(doc)
        # two options here: either to call commit on the couch myself
        # or leave the alerts buffered in the Database queue which means
        # the .commit() would be called automatically if size is exceeded
        # 1st option:
        retVal = self._database.commit()
        logging.debug("Stored %s alerts to REST resource, retVals: %s" %
                      (len(alerts), retVal))
        return retVal
Beispiel #33
0
    def injectDQMHarvestConfig(self):
        """
        _injectDQMHarvest_

        Create a bogus config cache document for DQMHarvest and
        inject it into couch.  Return the ID of the document.
        """
        newConfig = Document()
        newConfig["info"] = None
        newConfig["config"] = None
        newConfig["md5hash"] = "eb1c38cf50e14cf9fc31278a5c8e234f"
        newConfig["pset_hash"] = "7c856ad35f9f544839d8525ca10876a7"
        newConfig["owner"] = {"group": "DATAOPS", "user": "******"}
        newConfig["pset_tweak_details"] = {"process": {"outputModules_": []}}
        result = self.configDatabase.commitOne(newConfig)
        return result[0]["id"]
Beispiel #34
0
    def testUpdateHandlerAndBulkUpdateProfile(self):
        """
        Test that update function support works
        """
        # for actual test increase the size value: For 10000 records, 96 sec vs 4 sec
        size = 100
        for i in range(size):
            self.db.queue(
                Document(id="%s" % i, inputDict={
                    'name': 123,
                    'counter': 0
                }))

        update_doc = {
            '_id': '_design/foo',
            'language': 'javascript',
            'updates': {
                "change-counter":
                """function(doc, req) { if (doc) { var data = JSON.parse(req.body);
                                      for (var field in data) {doc.field = data.field;} return [doc, 'OK'];}}""",
            }
        }

        self.db.commit(update_doc)
        start = time.time()
        for id in range(size):
            doc_id = "%s" % id
            self.db.updateDocument(doc_id,
                                   'foo',
                                   'change-counter', {'counter': 1},
                                   useBody=True)
        end = time.time()

        print("update handler: %s sec" % (end - start))

        start = time.time()
        ids = []
        for id in range(size):
            doc_id = "%s" % id
            ids.append(doc_id)
        self.db.updateBulkDocumentsWithConflictHandle(ids, {'counter': 2},
                                                      1000)
        end = time.time()

        print("bulk update: %s sec" % (end - start))
Beispiel #35
0
    def post(self, subName=None):
        """
        Inserts a new document into the database
        """
        data = cherrypy.request.body.read()
        if not data:
            raise MissingPostData()
        else:
            doc = json.loads(data)
        if subName:
            docName = "%s_%s" % (self.name, subName)
        else:
            docName = self.name

        doc["ConfigType"] = self.name
        doc = Document(docName, doc)
        result = self.reqmgr_aux_db.commitOne(doc)
        return result
Beispiel #36
0
    def post(self, subName=None):
        """
        If the document already exists, replace it with a new one.
        """
        data = cherrypy.request.body.read()
        if not data:
            raise MissingPostData()
        else:
            doc = json.loads(data)
        if subName:
            docName = "%s_%s" % (self.name, subName)
        else:
            docName = self.name

        doc["ConfigType"] = self.name
        doc = Document(docName, doc)
        result = self.reqmgr_aux_db.commitOne(doc)
        return result
Beispiel #37
0
    def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False,
                 ckey = None, cert = None, capath = None, detail = True):
        super(ConfigCache, self).__init__()
        self.dbname = couchDBName
        self.dburl  = dbURL
        self.detail = detail
        try:
            self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname)
        except Exception as ex:
            msg = "Error connecting to couch: %s\n" % str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)

        # local cache
        self.docs_cache = DocumentCache(self.database, self.detail)

        # UserGroup variables
        self.group = None
        self.owner = None

        # Internal data structure
        self.document  = Document()
        self.attachments = {}
        self.document['type'] = "config"
        self.document['description'] = {}
        self.document['description']['config_label'] = None
        self.document['description']['config_desc'] = None

        if id != None:
            self.document['_id']                = id
        self.document['pset_tweak_details'] = None
        self.document['info']               = None
        self.document['config']             = None
        return
class ConfigCache(WMObject):
    """
    _ConfigCache_

    The class that handles the upload and download of configCache
    artifacts from Couch
    """
    def __init__(self, dbURL, couchDBName = None, id = None, rev = None, usePYCurl = False, 
                 ckey = None, cert = None, capath = None, detail = True):
        self.dbname = couchDBName
        self.dburl  = dbURL
        self.detail = detail
        try:
            self.couchdb = CouchServer(self.dburl, usePYCurl=usePYCurl, ckey=ckey, cert=cert, capath=capath)
            if self.dbname not in self.couchdb.listDatabases():
                self.createDatabase()

            self.database = self.couchdb.connectDatabase(self.dbname)
        except Exception as ex:
            msg = "Error connecting to couch: %s\n" % str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)

        # local cache
        self.docs_cache = DocumentCache(self.database, self.detail)

        # UserGroup variables
        self.group = None
        self.owner = None

        # Internal data structure
        self.document  = Document()
        self.attachments = {}
        self.document['type'] = "config"
        self.document['description'] = {}
        self.document['description']['config_label'] = None
        self.document['description']['config_desc'] = None

        if id != None:
            self.document['_id']                = id
        self.document['pset_tweak_details'] = None
        self.document['info']               = None
        self.document['config']             = None
        return

    def createDatabase(self):
        """
        _createDatabase_

        """
        database = self.couchdb.createDatabase(self.dbname)
        database.commit()
        return database

    def connectUserGroup(self, groupname, username):
        """
        _connectUserGroup_

        """
        self.group = Group(name = groupname)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.owner = makeUser(groupname, username,
                              couchUrl = self.dburl,
                              couchDatabase = self.dbname)
        return

    def createUserGroup(self, groupname, username):
        """
        _createUserGroup_

        Create all the userGroup information
        """
        self.createGroup(name = groupname)
        self.createUser(username = username)
        return

    def createGroup(self, name):
        """
        _createGroup_

        Create Group for GroupUser
        """
        self.group = Group(name = name)
        self.group.setCouch(self.dburl, self.dbname)
        self.group.connect()
        self.group.create()
        return

    def setLabel(self, label):
        """
        _setLabel_

        Util to add a descriptive label to the configuration doc
        """
        self.document['description']['config_label'] = label

    def setDescription(self, desc):
        """
        _setDescription_

        Util to add a verbose description string to a configuration doc
        """
        self.document['description']['config_desc'] = desc

    @Decorators.requireGroup
    def createUser(self, username):
        self.owner = makeUser(self.group['name'], username,
                              couchUrl = self.dburl,
                              couchDatabase = self.dbname)
        self.owner.create()
        self.owner.ownThis(self.document)
        return

    @Decorators.requireGroup
    @Decorators.requireUser
    def save(self):
        """
        _save_

        Save yourself!  Save your internal document.
        """
        rawResults = self.database.commit(doc = self.document)

        # We should only be committing one document at a time
        # if not, get the last one.

        try:
            commitResults = rawResults[-1]
            self.document["_rev"] = commitResults.get('rev')
            self.document["_id"]  = commitResults.get('id')
        except KeyError as ex:
            msg  = "Document returned from couch without ID or Revision\n"
            msg += "Document probably bad\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(message = msg)


        # Now do the attachments
        for attachName in self.attachments:
            self.saveAttachment(name = attachName,
                                attachment = self.attachments[attachName])


        return


    def saveAttachment(self, name, attachment):
        """
        _saveAttachment_

        Save an attachment to the document
        """


        retval = self.database.addAttachment(self.document["_id"],
                                             self.document["_rev"],
                                             attachment,
                                             name)

        if retval.get('ok', False) != True:
            # Then we have a problem
            msg = "Adding an attachment to document failed\n"
            msg += str(retval)
            msg += "ID: %s, Rev: %s" % (self.document["_id"], self.document["_rev"])
            logging.error(msg)
            raise ConfigCacheException(msg)

        self.document["_rev"] = retval['rev']
        self.document["_id"]  = retval['id']
        self.attachments[name] = attachment

        return


    def loadDocument(self, configID):
        """
        _loadDocument_

        Load a document from the document cache given its couchID
        """
        self.document = self.docs_cache[configID]

    def loadByID(self, configID):
        """
        _loadByID_

        Load a document from the server given its couchID
        """
        try:
            self.document = self.database.document(id = configID)
            if 'owner' in self.document.keys():
                self.connectUserGroup(groupname = self.document['owner'].get('group', None),
                                      username  = self.document['owner'].get('user', None))
            if '_attachments' in self.document.keys():
                # Then we need to load the attachments
                for key in self.document['_attachments'].keys():
                    self.loadAttachment(name = key)
        except CouchNotFoundError as ex:
            msg =  "Document with id %s not found in couch\n" % (configID)
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)
        except Exception as ex:
            msg =  "Error loading document from couch\n"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)

        return

    def loadAttachment(self, name, overwrite = True):
        """
        _loadAttachment_

        Load an attachment from the database and put it somewhere useful
        """


        attach = self.database.getAttachment(self.document["_id"], name)

        if not overwrite:
            if name in self.attachments.keys():
                logging.info("Attachment already exists, so we're skipping")
                return

        self.attachments[name] = attach

        return

    def loadByView(self, view, value):
        """
        _loadByView_

        Underlying code to load views
        """

        viewRes = self.database.loadView( 'ConfigCache', view, {}, [value] )

        if len(viewRes['rows']) == 0:
            # Then we have a problem
            logging.error("Unable to load using view %s and value %s" % (view, str(value)))

        self.unwrapView(viewRes)
        self.loadByID(self.document["_id"])
        return

    def saveConfigToDisk(self, targetFile):
        """
        _saveConfigToDisk_

        Make sure we can save our config file to disk
        """
        config = self.getConfig()
        if not config:
            return

        # Write to a file
        f = open(targetFile, 'w')
        f.write(config)
        f.close()
        return


    def load(self):
        """
        _load_

        Figure out how to load
        """

        if self.document.get("_id", None) != None:
            # Then we should load by ID
            self.loadByID(self.document["_id"])
            return

        # Otherwise we have to load by view

        if not self.document.get('md5_hash', None) == None:
            # Then we have an md5_hash
            self.loadByView(view = 'config_by_md5hash', value = self.document['md5_hash'])
        # TODO: Add more views as they become available.


        #elif not self.owner == None:
            # Then we have an owner
            #self.loadByView(view = 'config_by_owner', value = self.owner['name'])





    def unwrapView(self, view):
        """
        _unwrapView_

        Move view information into the main document
        """

        self.document["_id"]  = view['rows'][0].get('id')
        self.document["_rev"] = view['rows'][0].get('value').get('_rev')




    def setPSetTweaks(self, PSetTweak):
        """
        _setPSetTweaks_

        Set the PSet tweak details for the config.
        """
        self.document['pset_tweak_details'] = PSetTweak
        return

    def getPSetTweaks(self):
        """
        _getPSetTweaks_

        Retrieve the PSet tweak details.
        """
        return self.document['pset_tweak_details']

    def getOutputModuleInfo(self):
        """
        _getOutputModuleInfo_

        Retrieve the dataset information for the config in the ConfigCache.
        """
        psetTweaks = self.getPSetTweaks()
        if not 'process' in psetTweaks.keys():
            raise ConfigCacheException("Could not find process field in PSet while getting output modules!")
        try:
            outputModuleNames = psetTweaks["process"]["outputModules_"]
        except KeyError as ex:
            msg =  "Could not find outputModules_ in psetTweaks['process'] while getting output modules.\n"
            msg += str(ex)
            logging.error(msg)
            raise ConfigCacheException(msg)

        results = {}
        for outputModuleName in outputModuleNames:
            try:
                outModule = psetTweaks["process"][outputModuleName]
            except KeyError:
                msg = "Could not find outputModule %s in psetTweaks['process']" % outputModuleName
                logging.error(msg)
                raise ConfigCacheException(msg)
            dataset = outModule.get("dataset", None)
            if dataset:
                results[outputModuleName] = {"dataTier": outModule["dataset"]["dataTier"],
                                             "filterName": outModule["dataset"]["filterName"]}
            else:
                results[outputModuleName] = {"dataTier": None, "filterName": None}

        return results


    def addConfig(self, newConfig, psetHash = None):
        """
        _addConfig_


        """
        # The newConfig parameter is a URL suitable for passing to urlopen.
        configString = urllib.urlopen(newConfig).read(-1)
        configMD5 = hashlib.md5(configString).hexdigest()

        self.document['md5_hash'] = configMD5
        self.document['pset_hash'] = psetHash
        self.attachments['configFile'] = configString
        return

    def getConfig(self):
        """
        _getConfig_

        Get the currently active config
        """
        return self.attachments.get('configFile', None)

    def getCouchID(self):
        """
        _getCouchID_

        Return the document's couchID
        """

        return self.document["_id"]


    def getCouchRev(self):
        """
        _getCouchRev_

        Return the document's couchRevision
        """


        return self.document["_rev"]


    @Decorators.requireGroup
    @Decorators.requireUser
    def delete(self):
        """
        _delete_

        Deletes the document with the current docid
        """
        if not self.document["_id"]:
            logging.error("Attempted to delete with no couch ID")


        # TODO: Delete without loading first
        try:
            self.database.queueDelete(self.document)
            self.database.commit()
        except Exception as ex:
            msg =  "Error in deleting document from couch"
            msg += str(ex)
            msg += str(traceback.format_exc())
            logging.error(msg)
            raise ConfigCacheException(message = msg)


        return

    def getIDFromLabel(self, label):
        """
        _getIDFromLabel_

        Retrieve the ID of a config given it's label.
        """
        results = self.database.loadView("ConfigCache", "config_by_label",
                                         {"startkey": label,
                                          "limit": 1})

        if results["rows"][0]["key"] == label:
            return results["rows"][0]["value"]

        return None

    def listAllConfigsByLabel(self):
        """
        _listAllConfigsByLabel_

        Retrieve a list of all the configs in the config cache.  This is
        returned in the form of a dictionary that is keyed by label.
        """
        configs = {}
        results = self.database.loadView("ConfigCache", "config_by_label")

        for result in results["rows"]:
            configs[result["key"]] = result["value"]

        return configs


    def __str__(self):
        """
        Make something printable

        """

        return self.document.__str__()
    
    def validate(self, configID):
        
        try:
            #TODO: need to change to DataCache
            #self.loadDocument(configID = configID)
            self.loadByID(configID = configID)
        except Exception as ex:
            raise ConfigCacheException("Failure to load ConfigCache while validating workload: %s" % str(ex))
        
        if self.detail:
            duplicateCheck = {}
            try:
                outputModuleInfo = self.getOutputModuleInfo()
            except Exception as ex:
                # Something's gone wrong with trying to open the configCache
                msg = "Error in getting output modules from ConfigCache during workload validation.  Check ConfigCache formatting!"
                raise ConfigCacheException("%s: %s" % (msg, str(ex)))
            for outputModule in outputModuleInfo.values():
                dataTier   = outputModule.get('dataTier', None)
                filterName = outputModule.get('filterName', None)
                if not dataTier:
                    raise ConfigCacheException("No DataTier in output module.")
    
                # Add dataTier to duplicate dictionary
                if not dataTier in duplicateCheck.keys():
                    duplicateCheck[dataTier] = []
                if filterName in duplicateCheck[dataTier]:
                    # Then we've seen this combination before
                    raise ConfigCacheException("Duplicate dataTier/filterName combination.")
                else:
                    duplicateCheck[dataTier].append(filterName)            
            return outputModuleInfo
        else:
            return True
Beispiel #39
0
Datei: API.py Projekt: dmwm/DQIS
 def __init__(self, dqis_db=None, savable=False, *args, **kwargs): 
     Document.__init__(self, *args, **kwargs)
     self.dqis_db = dqis_db
Beispiel #40
0
def gatherWMDataMiningStats(
    wmstatsUrl, reqmgrUrl, wmMiningUrl, mcmUrl, mcmCert, mcmKey, tmpDir, archived=False, log=logging.info
):

    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl)

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log("INFO: %s: Getting job information from %s and %s. Please wait." % (funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ["normal-archived", "rejected-archived", "aborted-archived"]
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag=jobInfoFlag)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if not oldCouchDoc.has_key("filterEfficiency") or not oldCouchDoc.has_key("runWhiteList"):
                runWhiteList = []
                filterEfficiency = None
                try:
                    # log("DEBUG: Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get("RunWhiteList", [])
                    filterEfficiency = rmDoc.get("FilterEfficiency", None)
                except:
                    pass  # ReqMgr no longer has the workflow
                report[wf].update({"filterEfficiency": filterEfficiency, "runWhiteList": runWhiteList})

            if not oldCouchDoc.has_key("mcmTotalEvents") or not oldCouchDoc.has_key("mcmApprovalTime"):
                prepID = oldCouchDoc.get("prepID", None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    nMCMCalls += 1
                    mcmHistory = mcm.getHistory(prepID=prepID)
                    mcmRequest = mcm.getRequest(prepID=prepID)
                    report[wf].update({"mcmTotalEvents": mcmRequest.get("total_events", "Unknown")})

                    if not oldCouchDoc.has_key("mcmApprovalTime"):
                        report[wf].update({"mcmApprovalTime": "Unknown"})
                    for entry in mcmHistory:
                        if entry["action"] == "set status" and entry["step"] == "announced":
                            dateString = entry["updater"]["submission_date"]
                            dt = datetime.strptime(dateString, "%Y-%m-%d-%H-%M")
                            report[wf].update({"mcmApprovalTime": time.mktime(dt.timetuple())})

            # Basic parameters of the workflow
            priority = requests[wf]["priority"]
            requestType = requests[wf]["request_type"]
            targetLumis = requests[wf].get("input_lumis", 0)
            targetEvents = requests[wf].get("input_events", 0)
            campaign = requests[wf]["campaign"]
            prep_id = requests[wf].get("prep_id", None)
            outputdatasets = requests[wf].get("outputdatasets", [])

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf]["inputdataset"]
            if isinstance(inputdataset, (list,)):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ""

            outputTier = "Unknown"
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if type(ds) == list:
                        outputTiers.append(ds[0].split("/")[-1])
                    else:
                        outputTiers.append(ds.split("/")[-1])
            except:
                log(
                    "ERROR: Could not decode outputdatasets: %s" % outputdatasets
                )  # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split("/")[-1]
                if inputTier in ["GEN"]:
                    outputTier = "LHE"
                elif inputTier in ["RAW", "RECO"]:
                    outputTier = "AOD"
                elif inputTier in ["GEN-SIM"]:
                    outputTier = "AODSIM"
                elif "AODSIM" in outputTiers:
                    outputTier = "AODSIM"

            else:
                if len(outputTiers) == 1 and "GEN" in outputTiers:
                    if "STEP0ATCERN" in wf:
                        outputTier = "STEP0"
                    else:
                        outputTier = "FullGen"
                elif "GEN-SIM" in outputTiers and "AODSIM" in outputTiers and requestType == "TaskChain":
                    outputTier = "RelVal"
                elif "RECO" in outputTiers and requestType == "TaskChain":
                    outputTier = "RelVal"
                elif "GEN-SIM" in outputTiers:
                    outputTier = "GEN-SIM"
                elif "AODSIM" in outputTiers:
                    outputTier = "AODSIM"
                elif "RECO" in outputTiers:
                    outputTier = "AOD"
                elif "AOD" in outputTiers:
                    outputTier = "AOD"
                else:
                    outputTier = "GEN-SIM"

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get("events", 0)
                lumis = dsr.get("totalLumis", 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis / targetLumis * 100)
                if targetEvents:
                    eventPercent = min(eventPercent, events / targetEvents * 100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs["sucess"]
                totalJobs += jobs["created"]
            try:
                if totalJobs and not report[wf].get("firstJobTime", None):
                    report[wf].update({"firstJobTime": int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[wf].get("lastJobTime", None):
                    report[wf].update({"lastJobTime": int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None
            for status in requests[wf]["request_status"]:
                finalStatus = status["status"]
                if status["status"] == "new":
                    newTime = status["update_time"]
                if status["status"] == "assignment-approved":
                    approvedTime = status["update_time"]
                if status["status"] == "assigned":
                    assignedTime = status["update_time"]
                if status["status"] == "completed":
                    completedTime = status["update_time"]
                if status["status"] == "acquired":
                    acquireTime = status["update_time"]
                if status["status"] == "closed-out":
                    closeoutTime = status["update_time"]
                if status["status"] == "announced":
                    announcedTime = status["update_time"]
                if status["status"] == "normal-archived":
                    archivedTime = status["update_time"]

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get("approvedTime", None):
                report[wf].update({"approvedTime": approvedTime})
            if assignedTime and not report[wf].get("assignedTime", None):
                report[wf].update({"assignedTime": assignedTime})
            if acquireTime and not report[wf].get("acquireTime", None):
                report[wf].update({"acquireTime": acquireTime})
            if closeoutTime and not report[wf].get("closeoutTime", None):
                report[wf].update({"closeoutTime": closeoutTime})
            if announcedTime and not report[wf].get("announcedTime", None):
                report[wf].update({"announcedTime": announcedTime})
            if completedTime and not report[wf].get("completedTime", None):
                report[wf].update({"completedTime": completedTime})
            if newTime and not report[wf].get("newTime", None):
                report[wf].update({"newTime": newTime})
            if archivedTime and not report[wf].get("archivedTime", None):
                report[wf].update({"archivedTime": archivedTime})

            try:
                dt = requests[wf]["request_date"]
                requestDate = "%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d" % tuple(dt)
                report[wf].update({"requestDate": requestDate})
            except:
                pass

            report[wf].update({"priority": priority, "status": finalStatus, "type": requestType})
            report[wf].update({"totalLumis": targetLumis, "totalEvents": targetEvents})
            report[wf].update({"campaign": campaign, "prepID": prep_id, "outputTier": outputTier})
            report[wf].update({"outputDatasets": outputdatasets, "inputDataset": inputdataset})

            report[wf].setdefault("lumiPercents", {})
            report[wf].setdefault("eventPercents", {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
                percent = str(percentage)
                percentReported = report[wf]["lumiPercents"].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]["lumiPercents"][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]["eventPercents"].get(percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]["eventPercents"][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({"eventProgress": eventProgress, "lumiProgress": lumiProgress})

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    # log("DEBUG: Workflow updated: %s" % wf)
                    pass
                else:
                    # log("DEBUG Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc["updateTime"] = int(time.time())
                    report[wf]["updateTime"] = int(time.time())
                    cjson.encode(newCouchDoc)  # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Beispiel #41
0
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    print "Total %s requests retrieved\n" % len(result)

    report = {}
    for wf in result.keys():

        # Store a copy of the CouchDB document so we can compare later before updating
        if couchdb.documentExists(wf):
            oldCouchDoc = couchdb.document(wf)
            wfExists = True
        else:
            oldCouchDoc = CouchDoc(id=wf)
            wfExists = False

        newCouchDoc = copy.deepcopy(oldCouchDoc)
        ancientCouchDoc = copy.deepcopy(oldCouchDoc)
        report[wf] = oldCouchDoc
        # FIXME: remove report, only have two instances of couchDoc

        if not oldCouchDoc.has_key('filterEfficiency') or not oldCouchDoc.has_key('runWhiteList'):
            runWhiteList = []
            filterEfficiency = None
            try:
                print "Looking up %s in ReqMgr" % wf
                rmDoc = reqMgr.document(wf)
                runWhiteList = rmDoc.get('RunWhiteList', [])
                filterEfficiency = rmDoc.get('FilterEfficiency', None)
Beispiel #42
0
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmminigUrl, archived = False, log = logging.info):
    
    server, database = splitCouchServiceURL(wmminigUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl)
    
    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)
    
    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)
    
    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"
    
    log("INFO: %s: Getting job information from %s and %s. Please wait." % (
                  funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived']
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log("INFO: %s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    for wf in result.keys():

        # Store a copy of the CouchDB document so we can compare later before updating
        if couchdb.documentExists(wf):
            oldCouchDoc = couchdb.document(wf)
            wfExists = True
        else:
            oldCouchDoc = CouchDoc(id=wf)
            wfExists = False

        newCouchDoc = copy.deepcopy(oldCouchDoc)
        ancientCouchDoc = copy.deepcopy(oldCouchDoc)
        report[wf] = oldCouchDoc
        # FIXME: remove report, only have two instances of couchDoc

        if not oldCouchDoc.has_key('filterEfficiency') or not oldCouchDoc.has_key('runWhiteList'):
            runWhiteList = []
            filterEfficiency = None
            try:
                #log("DEBUG: Looking up %s in ReqMgr" % wf)
                rmDoc = reqMgr.document(wf)
                runWhiteList = rmDoc.get('RunWhiteList', [])
                filterEfficiency = rmDoc.get('FilterEfficiency', None)
            except:
                pass # ReqMgr no longer has the workflow
            report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList})

        # Basic parameters of the workflow
        priority = requests[wf]['priority']
        requestType = requests[wf]['request_type']
        targetLumis = requests[wf].get('input_lumis', 0)
        targetEvents = requests[wf].get('input_events', 0)
        campaign = requests[wf]['campaign']
        prep_id = requests[wf].get('prep_id', None)
        outputdatasets = requests[wf].get('outputdatasets', [])

        # Can be an empty list, full list, empty string, or non-empty string!
        inputdataset = requests[wf]['inputdataset']
        if isinstance(inputdataset, (list,)):
            if inputdataset:
                inputdataset = inputdataset[0]
            else:
                inputdataset = ''

        outputTier = 'Unknown'
        try:
            outputTiers = []
            for ds in outputdatasets:
                if type(ds) == list:
                    outputTiers.append(ds[0].split('/')[-1])
                else:
                    outputTiers.append(ds.split('/')[-1])
        except:
            log("ERROR: Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail
        if inputdataset:
            inputTier = inputdataset.split('/')[-1]
            if inputTier in ['GEN']:
                outputTier = 'LHE'
            elif inputTier in ['RAW', 'RECO']:
                outputTier = 'AOD'
            elif inputTier in ['GEN-SIM']:
                outputTier = 'AODSIM'
            elif 'AODSIM' in outputTiers:
                outputTier = 'AODSIM'

        else:
            if len(outputTiers) == 1 and 'GEN' in outputTiers:
                if 'STEP0ATCERN' in wf:
                    outputTier = 'STEP0'
                else:
                    outputTier = 'FullGen'
            elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                outputTier = 'RelVal'
            elif 'RECO' in outputTiers and requestType == 'TaskChain':
                outputTier = 'RelVal'
            elif 'GEN-SIM' in outputTiers:
                outputTier = 'GEN-SIM'
            elif 'AODSIM' in outputTiers:
                outputTier = 'AODSIM'
            elif 'RECO' in outputTiers:
                outputTier = 'AOD'
            elif 'AOD' in outputTiers:
                outputTier = 'AOD'
            else:
                outputTier = 'GEN-SIM'

        # Calculate completion ratios for events and lumi sections, take minimum for all datasets
        eventPercent = 200
        lumiPercent = 200
        datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
        for dataset in datasetReports:
            dsr = datasetReports[dataset].getReport()
            events = dsr.get('events', 0)
            lumis = dsr.get('totalLumis', 0)
            if targetLumis:
                lumiPercent = min(lumiPercent, lumis/targetLumis*100)
            if targetEvents:
                eventPercent = min(eventPercent, events/targetEvents*100)
        if eventPercent > 100:
            eventPercent = 0
        if lumiPercent > 100:
            lumiPercent = 0

        # Sum up all jobs across agents to see if we've run the first, last
        successJobs = 0
        totalJobs = 0
        for agent in result[wf]:
            jobs = result[wf][agent]
            successJobs += jobs['sucess']
            totalJobs += jobs['created']
        try:
            if totalJobs and not report[wf].get('firstJobTime', None):
                report[wf].update({'firstJobTime' : int(time.time())})
            if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None):
                report[wf].update({'lastJobTime' : int(time.time())})
        except:
            pass

        # Figure out current status of workflow and transition times
        finalStatus = None
        newTime = None
        approvedTime = None
        assignedTime = None
        acquireTime = None
        completedTime = None
        closeoutTime = None
        announcedTime = None
        archivedTime = None
        requestDate = None
        for status in requests[wf]['request_status']:
            finalStatus = status['status']
            if status['status'] == 'new':
                newTime = status['update_time']
            if status['status'] == 'assignment-approved':
                approvedTime = status['update_time']
            if status['status'] == 'assigned':
                assignedTime = status['update_time']
            if status['status'] == 'completed':
                completedTime = status['update_time']
            if status['status'] == 'acquired':
                acquireTime = status['update_time']
            if status['status'] == 'closed-out':
                closeoutTime = status['update_time']
            if status['status'] == 'announced':
                announcedTime = status['update_time']
            if status['status'] == 'normal-archived':
                archivedTime = status['update_time']

        # Build or modify the report dictionary for the WF
        report.setdefault(wf, {})

        if approvedTime and not report[wf].get('approvedTime', None):
            report[wf].update({'approvedTime':approvedTime})
        if assignedTime and not report[wf].get('assignedTime', None):
            report[wf].update({'assignedTime':assignedTime})
        if acquireTime and not report[wf].get('acquireTime', None):
            report[wf].update({'acquireTime':acquireTime})
        if closeoutTime and not report[wf].get('closeoutTime', None):
            report[wf].update({'closeoutTime':closeoutTime})
        if announcedTime and not report[wf].get('announcedTime', None):
            report[wf].update({'announcedTime':announcedTime})
        if completedTime and not report[wf].get('completedTime', None):
            report[wf].update({'completedTime':completedTime})
        if newTime and not report[wf].get('newTime', None):
            report[wf].update({'newTime':newTime})
        if archivedTime and not report[wf].get('archivedTime', None):
            report[wf].update({'archivedTime':archivedTime})

        try:
            dt = requests[wf]['request_date']
            requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
            report[wf].update({'requestDate' : requestDate})
        except:
            pass

        report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType})
        report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, })
        report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, })
        report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, })

        report[wf].setdefault('lumiPercents', {})
        report[wf].setdefault('eventPercents', {})
        lumiProgress = 0
        eventProgress = 0
        for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
            percent = str(percentage)
            percentReported = report[wf]['lumiPercents'].get(percent, None)
            if not percentReported and lumiPercent >= percentage:
                report[wf]['lumiPercents'][percent] = int(time.time())
            if lumiPercent >= percentage:
                lumiProgress = percentage

            percentReported = report[wf]['eventPercents'].get(percent, None)
            if not percentReported and eventPercent >= percentage:
                report[wf]['eventPercents'][percent] = int(time.time())
            if eventPercent >= percentage:
                eventProgress = percentage

        report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress,  })

        newCouchDoc.update(report[wf])

        # Queue the updated document for addition if it's changed.
        if ancientCouchDoc != newCouchDoc:
            if wfExists:
                #log("DEBUG: Workflow updated: %s" % wf)
                pass
            else:
                #log("DEBUG Workflow created: %s" % wf)
                pass

            try:
                newCouchDoc['updateTime'] = int(time.time())
                report[wf]['updateTime'] = int(time.time())
                cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue
                couchdb.queue(newCouchDoc)
            except:
                log("ERROR: Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log("INFO: %s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Beispiel #43
0
def gatherWMDataMiningStats(wmstatsUrl, reqmgrUrl, wmMiningUrl,
                            mcmUrl, mcmCert, mcmKey, tmpDir,
                            archived = False, log = logging.info):

    server, database = splitCouchServiceURL(wmMiningUrl)
    analyticsServer = CouchServer(server)
    couchdb = analyticsServer.connectDatabase(database)

    WMStats = WMStatsReader(wmstatsUrl, reqmgrUrl, reqdbCouchApp = "ReqMgr")

    reqMgrServer, reqMgrDB = splitCouchServiceURL(reqmgrUrl)

    reqMgr = CouchServer(reqMgrServer).connectDatabase(reqMgrDB, False)

    if archived:
        funcName = "Archived Requests"
    else:
        funcName = "Active Requests"

    log.info("%s: Getting job information from %s and %s. Please wait." % (
                  funcName, wmstatsUrl, reqmgrUrl))

    if archived:
        checkStates = ['normal-archived', 'rejected-archived', 'aborted-archived']
        jobInfoFlag = False
    else:
        checkStates = WMStatsReader.ACTIVE_STATUS
        jobInfoFlag = True
    requests = WMStats.getRequestByStatus(checkStates, jobInfoFlag = jobInfoFlag, legacyFormat = True)

    requestCollection = RequestInfoCollection(requests)
    result = requestCollection.getJSONData()
    requestsDict = requestCollection.getData()
    log.info("%s: Total %s requests retrieved\n" % (funcName, len(result)))

    report = {}
    nMCMCalls = 0
    with McM(cert=mcmCert, key=mcmKey, url=mcmUrl, tmpDir=tmpDir) as mcm:
        for wf in result.keys():

            # Store a copy of the CouchDB document so we can compare later before updating
            if couchdb.documentExists(wf):
                oldCouchDoc = couchdb.document(wf)
                wfExists = True
            else:
                oldCouchDoc = CouchDoc(id=wf)
                wfExists = False

            newCouchDoc = copy.deepcopy(oldCouchDoc)
            ancientCouchDoc = copy.deepcopy(oldCouchDoc)
            report[wf] = oldCouchDoc
            # FIXME: remove report, only have two instances of couchDoc

            if 'filterEfficiency' not in oldCouchDoc or 'runWhiteList' not in oldCouchDoc:
                runWhiteList = []
                filterEfficiency = None
                try:
                    #log.debug("Looking up %s in ReqMgr" % wf)
                    rmDoc = reqMgr.document(wf)
                    runWhiteList = rmDoc.get('RunWhiteList', [])
                    filterEfficiency = rmDoc.get('FilterEfficiency', None)
                except:
                    pass # ReqMgr no longer has the workflow
                report[wf].update({'filterEfficiency':filterEfficiency, 'runWhiteList':runWhiteList})

            if ('mcmTotalEvents' not in oldCouchDoc or
                'mcmApprovalTime' not in oldCouchDoc or
                oldCouchDoc.get('mcmTotalEvents', 'Unknown') == 'Unknown' or
                oldCouchDoc.get('mcmApprovalTime', 'Unknown') == 'Unknown'):

                prepID = oldCouchDoc.get('prepID', None)
                if prepID and nMCMCalls <= maxMCMCalls:
                    log.info("Trying to update McM info for %s, PREPID %s" % (wf, prepID))
                    # Get information from McM. Don't call too many times, can take a long time
                    nMCMCalls += 1
                    try:
                        mcmHistory = mcm.getHistory(prepID = prepID)
                        if 'mcmApprovalTime' not in oldCouchDoc:
                            report[wf].update({'mcmApprovalTime':'NoMcMData'})
                        found = False
                        for entry in mcmHistory:
                            if entry['action'] == 'set status' and entry['step'] == 'announced':
                                dateString = entry['updater']['submission_date']
                                dt = datetime.strptime(dateString, '%Y-%m-%d-%H-%M')
                                report[wf].update({'mcmApprovalTime':time.mktime(dt.timetuple())})
                                found = True
                        if not found:
                            log.error("History found but no approval time for %s" % wf)
                    except McMNoDataError:
                        log.error("Setting NoMcMData for %s" % wf)
                        report[wf].update({'mcmApprovalTime':'NoMcMData'})
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting history from McM for PREP ID %s. May be transient and/or SSO problem." %
                            (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting history from McM for PREP ID %s. Unknown error." %
                            (exc_type, prepID))

                    try:
                        mcmRequest = mcm.getRequest(prepID = prepID)
                        report[wf].update({'mcmTotalEvents': mcmRequest.get('total_events', 'NoMcMData')})
                    except (RuntimeError, IOError):
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting request from McM for PREP ID %s. May be transient and/or SSO problem." %
                            (exc_type, prepID))
                    except:
                        exc_type, exc_value, exc_traceback = sys.exc_info()
                        log.error("%s getting request from McM for PREP ID %s. Unknown error." %
                            (exc_type, prepID))

            # Basic parameters of the workflow
            priority = requests[wf]['priority']
            requestType = requests[wf]['request_type']
            targetLumis = requests[wf].get('input_lumis', 0)
            targetEvents = requests[wf].get('input_events', 0)
            campaign = requests[wf]['campaign']
            prep_id = requests[wf].get('prep_id', None)
            outputdatasets = requests[wf].get('outputdatasets', [])
            statuses = requests[wf].get('request_status', [])

            if not statuses:
                log.error("Could not find any status from workflow: %s" % wf) # Should not happen but it does.

            # Remove a single  task_ from the start of PREP ID if it exists
            if prep_id and prep_id.startswith('task_'):
                prep_id.replace('task_', '', 1)

            # Can be an empty list, full list, empty string, or non-empty string!
            inputdataset = requests[wf].get('inputdataset', "")
            if isinstance(inputdataset, list):
                if inputdataset:
                    inputdataset = inputdataset[0]
                else:
                    inputdataset = ''

            outputTier = 'Unknown'
            try:
                outputTiers = []
                for ds in outputdatasets:
                    if isinstance(ds, list):
                        outputTiers.append(ds[0].split('/')[-1])
                    else:
                        outputTiers.append(ds.split('/')[-1])
            except:
                log.error("Could not decode outputdatasets: %s" % outputdatasets) # Sometimes is a list of lists, not just a list. Bail
            if inputdataset:
                inputTier = inputdataset.split('/')[-1]
                if inputTier in ['GEN']:
                    outputTier = 'LHE'
                elif inputTier in ['RAW', 'RECO']:
                    outputTier = 'AOD'
                elif inputTier in ['GEN-SIM']:
                    outputTier = 'AODSIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'

            else:
                if len(outputTiers) == 1 and 'GEN' in outputTiers:
                    if 'STEP0ATCERN' in wf:
                        outputTier = 'STEP0'
                    else:
                        outputTier = 'FullGen'
                elif 'GEN-SIM' in outputTiers and 'AODSIM' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'RECO' in outputTiers and requestType == 'TaskChain':
                    outputTier = 'RelVal'
                elif 'GEN-SIM' in outputTiers:
                    outputTier = 'GEN-SIM'
                elif 'AODSIM' in outputTiers:
                    outputTier = 'AODSIM'
                elif 'RECO' in outputTiers:
                    outputTier = 'AOD'
                elif 'AOD' in outputTiers:
                    outputTier = 'AOD'
                else:
                    outputTier = 'GEN-SIM'

            # Calculate completion ratios for events and lumi sections, take minimum for all datasets
            eventPercent = 200
            lumiPercent = 200
            datasetReports = requestsDict[wf].getProgressSummaryByOutputDataset()
            for dataset in datasetReports:
                dsr = datasetReports[dataset].getReport()
                events = dsr.get('events', 0)
                lumis = dsr.get('totalLumis', 0)
                if targetLumis:
                    lumiPercent = min(lumiPercent, lumis/targetLumis*100)
                if targetEvents:
                    eventPercent = min(eventPercent, events/targetEvents*100)
            if eventPercent > 100:
                eventPercent = 0
            if lumiPercent > 100:
                lumiPercent = 0

            # Sum up all jobs across agents to see if we've run the first, last
            successJobs = 0
            totalJobs = 0
            for agent in result[wf]:
                jobs = result[wf][agent]
                successJobs += jobs['sucess']
                totalJobs += jobs['created']
            try:
                if totalJobs and not report[wf].get('firstJobTime', None):
                    report[wf].update({'firstJobTime' : int(time.time())})
                if totalJobs and successJobs == totalJobs and not report[wf].get('lastJobTime', None):
                    report[wf].update({'lastJobTime' : int(time.time())})
            except:
                pass

            # Figure out current status of workflow and transition times
            finalStatus = None
            newTime = None
            approvedTime = None
            assignedTime = None
            acquireTime = None
            completedTime = None
            closeoutTime = None
            announcedTime = None
            archivedTime = None
            requestDate = None

            for status in statuses:
                finalStatus = status['status']
                if status['status'] == 'new':
                    newTime = status['update_time']
                if status['status'] == 'assignment-approved':
                    approvedTime = status['update_time']
                if status['status'] == 'assigned':
                    assignedTime = status['update_time']
                if status['status'] == 'completed':
                    completedTime = status['update_time']
                if status['status'] == 'acquired':
                    acquireTime = status['update_time']
                if status['status'] == 'closed-out':
                    closeoutTime = status['update_time']
                if status['status'] == 'announced':
                    announcedTime = status['update_time']
                if status['status'] == 'normal-archived':
                    archivedTime = status['update_time']

            # Build or modify the report dictionary for the WF
            report.setdefault(wf, {})

            if approvedTime and not report[wf].get('approvedTime', None):
                report[wf].update({'approvedTime':approvedTime})
            if assignedTime and not report[wf].get('assignedTime', None):
                report[wf].update({'assignedTime':assignedTime})
            if acquireTime and not report[wf].get('acquireTime', None):
                report[wf].update({'acquireTime':acquireTime})
            if closeoutTime and not report[wf].get('closeoutTime', None):
                report[wf].update({'closeoutTime':closeoutTime})
            if announcedTime and not report[wf].get('announcedTime', None):
                report[wf].update({'announcedTime':announcedTime})
            if completedTime and not report[wf].get('completedTime', None):
                report[wf].update({'completedTime':completedTime})
            if newTime and not report[wf].get('newTime', None):
                report[wf].update({'newTime':newTime})
            if archivedTime and not report[wf].get('archivedTime', None):
                report[wf].update({'archivedTime':archivedTime})

            try:
                dt = requests[wf]['request_date']
                requestDate = '%4.4d-%2.2d-%2.2d %2.2d:%2.2d:%2.2d' % tuple(dt)
                report[wf].update({'requestDate' : requestDate})
            except:
                pass

            report[wf].update({'priority':priority, 'status':finalStatus, 'type':requestType})
            report[wf].update({'totalLumis':targetLumis, 'totalEvents':targetEvents, })
            report[wf].update({'campaign' : campaign, 'prepID' : prep_id, 'outputTier' : outputTier, })
            report[wf].update({'outputDatasets' : outputdatasets, 'inputDataset' : inputdataset, })

            report[wf].setdefault('lumiPercents', {})
            report[wf].setdefault('eventPercents', {})
            lumiProgress = 0
            eventProgress = 0
            for percentage in [1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 98, 99, 100]:
                percent = str(percentage)
                percentReported = report[wf]['lumiPercents'].get(percent, None)
                if not percentReported and lumiPercent >= percentage:
                    report[wf]['lumiPercents'][percent] = int(time.time())
                if lumiPercent >= percentage:
                    lumiProgress = percentage

                percentReported = report[wf]['eventPercents'].get(percent, None)
                if not percentReported and eventPercent >= percentage:
                    report[wf]['eventPercents'][percent] = int(time.time())
                if eventPercent >= percentage:
                    eventProgress = percentage

            report[wf].update({'eventProgress' : eventProgress, 'lumiProgress' : lumiProgress,  })

            newCouchDoc.update(report[wf])

            # Queue the updated document for addition if it's changed.
            if ancientCouchDoc != newCouchDoc:
                if wfExists:
                    #log.debug("Workflow updated: %s" % wf)
                    pass
                else:
                    #log.debug("Workflow created: %s" % wf)
                    pass

                try:
                    newCouchDoc['updateTime'] = int(time.time())
                    report[wf]['updateTime'] = int(time.time())
                    cjson.encode(newCouchDoc) # Make sure it encodes before trying to queue
                    couchdb.queue(newCouchDoc)
                except:
                    log.error("Failed to queue document:%s \n" % pprint.pprint(newCouchDoc))

    log.info("%s: Finished getting job. wait for the next Cycle" % funcName)
    # Commit all changes to CouchDB
    couchdb.commit()
Beispiel #44
0
class CouchWorkQueueElement(WorkQueueElement):
    """
    _CouchWorkQueueElement_

    """
    def __init__(self, couchDB, id = None, elementParams = None):
        elementParams = elementParams or {}
        WorkQueueElement.__init__(self, **elementParams)
        if id:
            self._id = id
        self._document = Document(id = id)
        self._couch = couchDB

    rev = property(
        lambda x: str(x._document[u'_rev']) if x._document.has_key(u'_rev') else x._document.__getitem__('_rev'),
        lambda x, newid: x._document.__setitem__('_rev', newid))
    timestamp = property(
        lambda x: str(x._document[u'timestamp']) if x._document.has_key(u'timestamp') else x._document.__getitem__('timestamp')
        )
    updatetime = property(
        lambda x: str(x._document[u'updatetime']) if x._document.has_key(u'updatetime') else 0
        )


    @classmethod
    def fromDocument(cls, couchDB, doc):
        """Create element from couch document"""
        element = CouchWorkQueueElement(couchDB = couchDB,
                                        id = doc['_id'],
                                        elementParams = doc.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement')
                                        )
        element._document['_rev'] = doc.pop('_rev')
        element._document['timestamp'] = doc.pop('timestamp')
        element._document['updatetime'] = doc.pop('updatetime')
        return element

    def save(self):
        """
        _save
        """
        self.populateDocument()
        self._couch.queue(self._document)

    def load(self):
        """
        _load_

        Load the document representing this WQE
        """
        document = self._couch.document(self._document['_id'])
        self.update(document.pop('WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'))
        self._document['_rev'] = document.pop('_rev')
        self._document['timestamp'] = document.pop('timestamp', None)
        self._document['updatetime'] = document.pop('updatetime', None)
        return self

    def delete(self):
        """Delete element"""
        self.populateDocument()
        self._document.delete()
        self._couch.queue(self._document)

    def populateDocument(self):
        """Certain attributed shouldn't be stored"""
        self._document.update(self.__to_json__(None))
        now = time.time()
        self._document['updatetime'] = now
        self._document.setdefault('timestamp', now)
        if not self._document.get('_id') and self.id:
            self._document['_id'] = self.id
        attrs = ['WMSpec', 'Task']
        for attr in attrs:
            self._document['WMCore.WorkQueue.DataStructs.WorkQueueElement.WorkQueueElement'].pop(attr, None)